From ee96df51e34eb996f960653129374c5554d45a0d Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 27 May 2025 03:51:09 -0500 Subject: [PATCH] readd grid shader w/ instancing --- res/shaders/common.hlsl | 3 +- res/shaders/grid.hlsl | 59 +++++++++++------- res/shaders/triangle.hlsl | 1 - src/gpu_dx11.c | 128 ++++++++++++++++++++++---------------- src/user.c | 4 +- 5 files changed, 116 insertions(+), 79 deletions(-) diff --git a/res/shaders/common.hlsl b/res/shaders/common.hlsl index 3b145d8b..769bba61 100644 --- a/res/shaders/common.hlsl +++ b/res/shaders/common.hlsl @@ -26,6 +26,5 @@ float4 linear_from_srgb32(uint srgb32) float2 xform_mul(struct xform xf, float2 v) { - // return xf.bx * v.x + xf.by * v.y + xf.og; - return float2(xf.bx.x * v.x + xf.by.x * v.y, xf.bx.y * v.x + xf.by.y * v.y) + xf.og; + return xf.bx * v.x + xf.by * v.y + xf.og; } \ No newline at end of file diff --git a/res/shaders/grid.hlsl b/res/shaders/grid.hlsl index 68fbaad0..c01e7509 100644 --- a/res/shaders/grid.hlsl +++ b/res/shaders/grid.hlsl @@ -1,15 +1,15 @@ #include "shaders/common.hlsl" -struct vs_input { - DECL(float4, pos); - DECL(float, line_thickness); - DECL(float, line_spacing); - DECL(float2, offset); - DECL(float4, bg0_srgb); - DECL(float4, bg1_srgb); - DECL(float4, line_srgb); - DECL(float4, x_srgb); - DECL(float4, y_srgb); +struct vs_instance { + struct xform xf; + float line_thickness; + float line_spacing; + float2 offset; + uint bg0_srgb; + uint bg1_srgb; + uint line_srgb; + uint x_srgb; + uint y_srgb; }; struct ps_input { @@ -28,28 +28,43 @@ struct ps_input { * Globals * ========================== */ + StructuredBuffer G_instance_buffer : register(t0); + cbuffer constants : register(b0) { float4x4 G_projection; + uint G_instance_offset; }; /* ========================== * * Vertex shader * ========================== */ -ps_input vs_main(vs_input input) -{ - ps_input output; + static const float2 G_quad_verts[4] = { + float2(-0.5f, -0.5f), + float2( 0.5f, -0.5f), + float2( 0.5f, 0.5f), + float2(-0.5f, 0.5f) +}; - output.screen_pos = mul(G_projection, float4(input.pos.xy, 0.f, 1.f)); - output.line_thickness = input.line_thickness; - output.line_spacing = input.line_spacing; - output.offset = input.offset; - output.bg0_lin = linear_from_srgb(input.bg0_srgb); - output.bg1_lin = linear_from_srgb(input.bg1_srgb); - output.line_lin = linear_from_srgb(input.line_srgb); - output.x_lin = linear_from_srgb(input.x_srgb); - output.y_lin = linear_from_srgb(input.y_srgb); +ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID) +{ + vs_instance instance = G_instance_buffer[G_instance_offset + instance_id]; + float2 vert = G_quad_verts[vertex_id]; + + float2 world_pos = xform_mul(instance.xf, vert); + float4 screen_pos = mul(G_projection, float4(world_pos, 0, 1)); + + ps_input output; + output.screen_pos = screen_pos; + output.line_thickness = instance.line_thickness; + output.line_spacing = instance.line_spacing; + output.offset = instance.offset; + output.bg0_lin = linear_from_srgb32(instance.bg0_srgb); + output.bg1_lin = linear_from_srgb32(instance.bg1_srgb); + output.line_lin = linear_from_srgb32(instance.line_srgb); + output.x_lin = linear_from_srgb32(instance.x_srgb); + output.y_lin = linear_from_srgb32(instance.y_srgb); return output; } diff --git a/res/shaders/triangle.hlsl b/res/shaders/triangle.hlsl index 89ea1000..801b5c5d 100644 --- a/res/shaders/triangle.hlsl +++ b/res/shaders/triangle.hlsl @@ -32,7 +32,6 @@ cbuffer constants : register(b0) ps_input vs_main(vs_input input) { ps_input output; - output.screen_pos = mul(G_projection, float4(input.pos.xy, 0.f, 1.f)); output.uv = input.uv; output.tint_lin = linear_from_srgb(input.tint_srgb); diff --git a/src/gpu_dx11.c b/src/gpu_dx11.c index 19a5a4c9..1863cd1f 100644 --- a/src/gpu_dx11.c +++ b/src/gpu_dx11.c @@ -525,10 +525,11 @@ PACK(struct dx11_texture_instance { PACK(struct dx11_grid_uniform { struct mat4x4 vp; + u32 instance_offset; }); -PACK(struct dx11_grid_vertex { - struct v2 pos; +PACK(struct dx11_grid_instance { + struct xform xf; f32 line_thickness; f32 line_spacing; struct v2 offset; @@ -563,18 +564,7 @@ INTERNAL void init_shader_table(void) /* Grid shader layout */ G.shader_info[DX11_SHADER_KIND_GRID] = (struct dx11_shader_desc) { .kind = DX11_SHADER_KIND_GRID, - .name_cstr = "shaders/grid.hlsl", - .input_layout_desc = { - { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "line_thickness", 0, DXGI_FORMAT_R32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "line_spacing", 0, DXGI_FORMAT_R32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "offset", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "bg0_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "bg1_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "line_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "x_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, - { "y_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 } - } + .name_cstr = "shaders/grid.hlsl" }; #if RESOURCE_RELOADING @@ -1097,11 +1087,9 @@ struct gpu_cmd_store gpu_cmd_store_alloc(void) /* Grid buffers */ { -#if 0 struct D3D11_BUFFER_DESC desc = structured_buffer_desc; - desc.StructureByteStride = sizeof(struct dx11_grid_instance), - store->buffers.texture.instance_buffer = dx11_buffer_alloc(D3D11_BIND_SHADER_RESOURCE); -#endif + desc.StructureByteStride = sizeof(struct dx11_grid_instance); + store->buffers.grid.instance_buffer = dx11_buffer_alloc(desc, NULL); } } @@ -1146,13 +1134,11 @@ void gpu_push_cmd(struct gpu_cmd_store gpu_cmd_store, struct gpu_cmd_params para /* Start new cmd */ if (!cmd) { /* TODO: Better count method */ - u32 instance_offset = (store->buffers.texture.instance_buffer->cpu_buffer_arena.pos / sizeof(struct dx11_texture_instance)); - cmd = arena_push(&store->cpu_cmds_arena, struct dx11_cmd); cmd->kind = params.kind; cmd->texture.sprite = params.texture.sprite; cmd->texture.texture = params.texture.texture; - cmd->texture.instance_offset = instance_offset; + cmd->texture.instance_offset = (store->buffers.texture.instance_buffer->cpu_buffer_arena.pos / sizeof(struct dx11_texture_instance)); if (store->cpu_last_cmd) { store->cpu_last_cmd->next = cmd; } else { @@ -1172,7 +1158,38 @@ void gpu_push_cmd(struct gpu_cmd_store gpu_cmd_store, struct gpu_cmd_params para case GPU_CMD_KIND_DRAW_GRID: { - /* TODO */ + struct dx11_cmd *cmd = store->cpu_last_cmd; + if (cmd && cmd->kind != params.kind) { + /* Cannot batch */ + cmd = NULL; + } + + /* Start new cmd */ + if (!cmd) { + /* TODO: Better count method */ + cmd = arena_push(&store->cpu_cmds_arena, struct dx11_cmd); + cmd->kind = params.kind; + cmd->grid.instance_offset = (store->buffers.grid.instance_buffer->cpu_buffer_arena.pos / sizeof(struct dx11_grid_instance)); + if (store->cpu_last_cmd) { + store->cpu_last_cmd->next = cmd; + } else { + store->cpu_first_cmd = cmd; + } + store->cpu_last_cmd = cmd; + } + + /* Push instance data */ + ++cmd->grid.instance_count; + struct dx11_grid_instance *instance = dx11_buffer_push(store->buffers.grid.instance_buffer, sizeof(struct dx11_grid_instance)); + instance->xf = params.grid.xf; + instance->line_thickness = params.grid.line_thickness; + instance->line_spacing = params.grid.line_spacing; + instance->offset = params.grid.offset; + instance->bg0_srgb = params.grid.bg0_color; + instance->bg1_srgb = params.grid.bg1_color; + instance->line_srgb = params.grid.line_color; + instance->x_srgb = params.grid.x_color; + instance->y_srgb = params.grid.y_color; } break; } } @@ -1202,7 +1219,7 @@ void gpu_submit_cmds(struct gpu_cmd_store gpu_cmd_store) dx11_buffer_submit(store->buffers.texture.instance_buffer); /* Submit grid buffers */ - //dx11_buffer_submit(store->buffers.grid.instance_buffer); + dx11_buffer_submit(store->buffers.grid.instance_buffer); } /* TODO: Lock resources during run */ @@ -1325,19 +1342,6 @@ void gpu_run_cmds(struct gpu_cmd_store gpu_cmd_store, struct gpu_texture target, ID3D11DeviceContext_VSSetShader(G.devcon, shader->vs, 0, 0); ID3D11DeviceContext_PSSetShader(G.devcon, shader->ps, 0, 0); -#if 0 - /* Build & bind constant buffer */ - struct texture_shader_uniforms uniforms = ZI; - uniforms.vp = vp_matrix; - { - D3D11_MAPPED_SUBRESOURCE subres = ZI; - ID3D11DeviceContext_Map(G.devcon, (ID3D11Resource *)constant_buffer->gpu_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &subres); - struct texture_shader_uniforms *subres_ptr = (struct texture_shader_uniforms *)subres.pData; - MEMCPY(subres_ptr, &uniforms, sizeof(uniforms)); - ID3D11DeviceContext_Unmap(G.devcon, (ID3D11Resource *)constant_buffer->gpu_buffer, 0); - } - ID3D11DeviceContext_SetConstantBuffers(G.devcon, 0, 1, &constant_buffer->srv); -#else /* Fill & bind constant buffer */ { struct dx11_texture_uniform *uniform = dx11_buffer_push(constant_buffer, sizeof(struct dx11_texture_uniform)); @@ -1347,7 +1351,6 @@ void gpu_run_cmds(struct gpu_cmd_store gpu_cmd_store, struct gpu_texture target, } ID3D11DeviceContext_VSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); ID3D11DeviceContext_PSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); -#endif /* Bind dummy vertex buffer */ u32 zero = 0; @@ -1371,28 +1374,49 @@ void gpu_run_cmds(struct gpu_cmd_store gpu_cmd_store, struct gpu_texture target, ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, 8, null_srvs); } } - } break; case GPU_CMD_KIND_DRAW_GRID: { - /* TODO */ -#if 0 __profscope_dx11(G.profiling_ctx, Grid, RGB_F(0.2, 0.5, 0.2)); - ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + struct dx11_shader *shader = &G.shaders[DX11_SHADER_KIND_GRID]; + if (shader->valid) { + struct dx11_buffer *constant_buffer = store->buffers.constant_buffer; + struct dx11_buffer *instance_buffer = store->buffers.grid.instance_buffer; + u32 instance_offset = cmd->grid.instance_offset; + u32 instance_count = cmd->grid.instance_count; - /* Activate buffer */ - u32 zero = 0; - UINT vertex_stride = shader->vertex_size; - ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &buffer->gpu_vertex_buffer, &vertex_stride, &zero); - ID3D11DeviceContext_IASetIndexBuffer(G.devcon, buffer->gpu_index_buffer, DXGI_FORMAT_R32_UINT, zero); + /* Bind shader */ + ID3D11DeviceContext_VSSetShader(G.devcon, shader->vs, 0, 0); + ID3D11DeviceContext_PSSetShader(G.devcon, shader->ps, 0, 0); - /* Draw */ - u32 vertex_offset = cmd->vertex_offset; - u32 index_offset = cmd->index_offset; - u32 index_count = cmd->index_count; - ID3D11DeviceContext_DrawIndexed(G.devcon, index_count, index_offset, vertex_offset); -#endif + /* Fill & bind constant buffer */ + { + struct dx11_grid_uniform *uniform = dx11_buffer_push(constant_buffer, sizeof(struct dx11_grid_uniform)); + uniform->vp = vp_matrix; + uniform->instance_offset = instance_offset; + dx11_buffer_submit(constant_buffer); + } + ID3D11DeviceContext_VSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); + ID3D11DeviceContext_PSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); + + /* Bind dummy vertex buffer */ + u32 zero = 0; + ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &G.dummy_vertex_buffer->gpu_buffer, &zero, &zero); + ID3D11DeviceContext_IASetIndexBuffer(G.devcon, G.quad_index_buffer->gpu_buffer, DXGI_FORMAT_R16_UINT, zero); + + /* Bind instance buffer */ + ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, 1, &instance_buffer->srv); + ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, 1, &instance_buffer->srv); + + /* Draw */ + ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + ID3D11DeviceContext_DrawIndexedInstanced(G.devcon, 6, instance_count, 0, 0, 0); + + /* Unbind SRVs */ + ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, 8, null_srvs); + ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, 8, null_srvs); + } } break; } } diff --git a/src/user.c b/src/user.c index cc9f3987..dad51d82 100644 --- a/src/user.c +++ b/src/user.c @@ -1045,7 +1045,7 @@ INTERNAL void user_update(void) struct v2 size = xform_basis_invert_mul_v2(G.world_to_user_xf, G.user_size); u32 color0 = RGBA_F(0.17f, 0.17f, 0.17f, 1.f); u32 color1 = RGBA_F(0.15f, 0.15f, 0.15f, 1.f); - draw_grid(G.world_gpu_cmd_store, XFORM_TRS(.t = pos, .s = size), color0, color1, RGBA(0x3f, 0x3f, 0x3f, 0xFF), COLOR_RED, COLOR_GREEN, thickness, spacing, offset); + draw_grid(G.world_gpu_cmd_store, xform_from_rect(RECT_FROM_V2(pos, size)), color0, color1, RGBA(0x3f, 0x3f, 0x3f, 0xFF), COLOR_RED, COLOR_GREEN, thickness, spacing, offset); } #if 0 @@ -1267,7 +1267,7 @@ INTERNAL void user_update(void) if (tile == SIM_TILE_KIND_WALL) { struct v2i32 world_tile_index = sim_world_tile_index_from_local_tile_index(chunk_index, local_tile_index); struct v2 pos = sim_pos_from_world_tile_index(world_tile_index); - struct xform tile_xf = XFORM_TRS(.t = pos, .s = V2(tile_size, tile_size)); + struct xform tile_xf = xform_from_rect(RECT_FROM_V2(pos, V2(tile_size, tile_size))); struct draw_texture_params params = DRAW_TEXTURE_PARAMS(.xf = tile_xf, .sprite = tile_sprite); draw_texture(G.world_gpu_cmd_store, params); }