From e040c002221c4831f7bae957c4203e6881ccf54f Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 27 May 2025 03:31:52 -0500 Subject: [PATCH] use indexed instancing for quads --- res/shaders/texture.hlsl | 52 +++++++++---------------- src/gpu_dx11.c | 82 +++++++++++++++++++++++++++++----------- 2 files changed, 78 insertions(+), 56 deletions(-) diff --git a/res/shaders/texture.hlsl b/res/shaders/texture.hlsl index 50e2534c..1a65e7a4 100644 --- a/res/shaders/texture.hlsl +++ b/res/shaders/texture.hlsl @@ -33,48 +33,32 @@ cbuffer constants : register(b0) * Vertex shader * ========================== */ +static const float2 G_quad_verts[4] = { + float2(-0.5f, -0.5f), + float2( 0.5f, -0.5f), + float2( 0.5f, 0.5f), + float2(-0.5f, 0.5f) +}; + +static const int2 G_uv_factors[4] = { + int2(0, 0), + int2(1, 0), + int2(1, 1), + int2(0, 1) +}; + ps_input vs_main(uint instance_id : SV_InstanceID, uint vertex_id : SV_VertexID) { - // static float2 quad[4] = { - // float2(-0.5f, -0.5f), - // float2( 0.5f, -0.5f), - // float2( 0.5f, 0.5f), - // float2(-0.5f, 0.5f) - // }; + vs_instance instance = G_instance_buffer[G_instance_offset + instance_id]; + float2 vert = G_quad_verts[vertex_id]; + float2 uv_factor = G_uv_factors[vertex_id]; - static const uint indices[6] = { 0, 1, 2, 2, 3, 0 }; - static const float2 vertices[4] = { - float2(-0.5f, -0.5f), - float2( 0.5f, -0.5f), - float2( 0.5f, 0.5f), - float2(-0.5f, 0.5f) - }; - - vs_instance instance = G_instance_buffer[instance_id + G_instance_offset]; - int index = indices[vertex_id]; - - int top = index == 0 || index == 1; - int right = index == 1 || index == 2; - int bottom = index == 2 || index == 3; - int left = index == 3 || index == 0; - //int top = vertex_id == 0 || vertex_id == 2; - //int left = vertex_id == 0 || vertex_id == 1; - //int bottom = vertex_id == 1 || vertex_id == 3; - //int right = vertex_id == 2 || vertex_id == 3; - - float2 vert = vertices[index]; float2 world_pos = xform_mul(instance.xf, vert); float4 screen_pos = mul(G_projection, float4(world_pos, 0, 1)); - float uv_x = instance.uv0.x * left; - uv_x += instance.uv1.x * right; - float uv_y = instance.uv0.y * top; - uv_y += instance.uv1.y * bottom; - - ps_input output; output.screen_pos = screen_pos; - output.uv = float2(uv_x, uv_y); + output.uv = instance.uv0 + uv_factor * (instance.uv1 - instance.uv0); output.tint_lin = linear_from_srgb32(instance.tint_srgb); return output; diff --git a/src/gpu_dx11.c b/src/gpu_dx11.c index 19a23307..19a5a4c9 100644 --- a/src/gpu_dx11.c +++ b/src/gpu_dx11.c @@ -54,7 +54,6 @@ enum dx11_shader_kind { struct dx11_shader { enum dx11_shader_kind kind; b32 valid; /* Is this shader allocated */ - u32 vertex_size; ID3D11InputLayout *input_layout; ID3D11VertexShader *vs; ID3D11PixelShader *ps; @@ -176,7 +175,6 @@ struct handle_store { struct dx11_shader_desc { enum dx11_shader_kind kind; char *name_cstr; - u32 vertex_size; D3D11_INPUT_ELEMENT_DESC input_layout_desc[64]; /* NULL terminated array */ /* Internal */ @@ -221,12 +219,14 @@ GLOBAL struct { struct arena textures_arena; struct dx11_texture *first_free_texture; - /* Sparse array (store.valid) */ - struct gpu_cmd_store stores[MAX_CMD_STORES]; - + /* Shaders */ struct dx11_shader shaders[NUM_DX11_SHADER_KINDS]; struct dx11_shader_desc shader_info[NUM_DX11_SHADER_KINDS]; + /* Dummy buffers */ + struct dx11_buffer *dummy_vertex_buffer; + struct dx11_buffer *quad_index_buffer; + } G = ZI, DEBUG_ALIAS(G, G_gpu_dx11); /* ========================== * @@ -247,6 +247,7 @@ INLINE struct mat4x4 calculate_vp(struct xform view, f32 viewport_width, f32 vie INTERNAL void init_shader_table(void); INTERNAL void reload_shader(struct dx11_shader *shader, struct dx11_shader_desc *desc); +INTERNAL struct dx11_buffer *dx11_buffer_alloc(struct D3D11_BUFFER_DESC desc, D3D11_SUBRESOURCE_DATA *initial_data); #if RESOURCE_RELOADING INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(shader_resource_watch_callback, name); @@ -459,6 +460,29 @@ struct gpu_startup_receipt gpu_startup(struct sys_window *window) } logf_info("Finished compiling shaders"); + /* Init dummy buffers */ + { + /* Dummy vertex buffer */ + u8 dummy_data[16] = ZI; + D3D11_BUFFER_DESC vdesc = ZI; + vdesc.Usage = D3D11_USAGE_IMMUTABLE; + vdesc.ByteWidth = sizeof(dummy_data); + vdesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + D3D11_SUBRESOURCE_DATA dummy_data_subres = ZI; + dummy_data_subres.pSysMem = dummy_data; + G.dummy_vertex_buffer = dx11_buffer_alloc(vdesc, &dummy_data_subres); + + /* Quad index buffer */ + LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; + D3D11_BUFFER_DESC idesc = ZI; + idesc.Usage = D3D11_USAGE_IMMUTABLE; + idesc.ByteWidth = sizeof(quad_indices); + idesc.BindFlags = D3D11_BIND_INDEX_BUFFER; + D3D11_SUBRESOURCE_DATA idata = ZI; + idata.pSysMem = quad_indices; + G.quad_index_buffer = dx11_buffer_alloc(idesc, &idata); + } + /* Setup file change callbacks */ #if RESOURCE_RELOADING resource_register_watch_callback(shader_resource_watch_callback); @@ -483,10 +507,6 @@ PACK(struct dx11_triangle_vertex { u32 tint_srgb; }); -PACK(struct dx11_texture_vertex { - struct v2 pos; -}); - /* Texture structs */ PACK(struct dx11_texture_uniform { @@ -527,7 +547,6 @@ INTERNAL void init_shader_table(void) G.shader_info[DX11_SHADER_KIND_TRIANGLE] = (struct dx11_shader_desc) { .kind = DX11_SHADER_KIND_TRIANGLE, .name_cstr = "shaders/triangle.hlsl", - .vertex_size = sizeof(struct dx11_triangle_vertex), .input_layout_desc = { { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "uv", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, @@ -538,18 +557,13 @@ INTERNAL void init_shader_table(void) /* Texture shader layout */ G.shader_info[DX11_SHADER_KIND_TEXTURE] = (struct dx11_shader_desc) { .kind = DX11_SHADER_KIND_TEXTURE, - .name_cstr = "shaders/texture.hlsl", - .vertex_size = sizeof(struct dx11_texture_vertex), - .input_layout_desc = { - { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 } - } + .name_cstr = "shaders/texture.hlsl" }; /* Grid shader layout */ G.shader_info[DX11_SHADER_KIND_GRID] = (struct dx11_shader_desc) { .kind = DX11_SHADER_KIND_GRID, .name_cstr = "shaders/grid.hlsl", - .vertex_size = sizeof(struct dx11_grid_vertex), .input_layout_desc = { { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "line_thickness", 0, DXGI_FORMAT_R32_FLOAT, 0, D3D11_APPEND_ALIGNED_ELEMENT, D3D11_INPUT_PER_VERTEX_DATA, 0 }, @@ -723,7 +737,6 @@ INTERNAL struct string shader_alloc(struct arena *arena, struct dx11_shader *sha struct string shader_name = string_from_cstr_no_limit(shader_desc->name_cstr); shader->kind = shader_desc->kind; - shader->vertex_size = shader_desc->vertex_size; #if RESOURCE_RELOADING shader_reset_includes(shader_desc); #endif @@ -892,8 +905,8 @@ INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(shader_resource_watch_callback, name) /* TODO: Buffer caching based on size */ -/* NOTE: desc ByteWidth will be ignored (set dynamically when buffer grows) */ -INTERNAL struct dx11_buffer *dx11_buffer_alloc(struct D3D11_BUFFER_DESC desc) +/* NOTE: If initial_data is not provided, then ByteWidth will be ignored (set dynamically when buffer grows) */ +INTERNAL struct dx11_buffer *dx11_buffer_alloc(struct D3D11_BUFFER_DESC desc, D3D11_SUBRESOURCE_DATA *initial_data) { __prof; struct dx11_buffer *buffer = NULL; @@ -918,6 +931,26 @@ INTERNAL struct dx11_buffer *dx11_buffer_alloc(struct D3D11_BUFFER_DESC desc) } buffer->desc = desc; buffer->cpu_buffer = arena_dry_push(&buffer->cpu_buffer_arena, u8); + + if (desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) { + ASSERT(desc.StructureByteStride != 0); /* Must provide stride for shader resource buffers */ + } + + if (initial_data) { + ASSERT(desc.ByteWidth > 0); /* Must provide size of subresource in desc */ + buffer->gpu_buffer_capacity = desc.ByteWidth; + ID3D11Device_CreateBuffer(G.dev, &buffer->desc, initial_data, &buffer->gpu_buffer); + + /* Create SRV */ + if (desc.BindFlags & D3D11_BIND_SHADER_RESOURCE) { + D3D11_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; + srv_desc.Format = DXGI_FORMAT_UNKNOWN; + srv_desc.ViewDimension = D3D11_SRV_DIMENSION_BUFFER; + srv_desc.Buffer.NumElements = buffer->gpu_buffer_capacity / buffer->desc.StructureByteStride; + ID3D11Device_CreateShaderResourceView(G.dev, (ID3D11Resource *)buffer->gpu_buffer, &srv_desc, &buffer->srv); + } + } + return buffer; } @@ -1044,7 +1077,7 @@ struct gpu_cmd_store gpu_cmd_store_alloc(void) { /* Constant buffer */ { - store->buffers.constant_buffer = dx11_buffer_alloc(constant_buffer_desc); + store->buffers.constant_buffer = dx11_buffer_alloc(constant_buffer_desc, NULL); } /* Triangle buffers */ @@ -1059,7 +1092,7 @@ struct gpu_cmd_store gpu_cmd_store_alloc(void) { struct D3D11_BUFFER_DESC desc = structured_buffer_desc; desc.StructureByteStride = sizeof(struct dx11_texture_instance); - store->buffers.texture.instance_buffer = dx11_buffer_alloc(desc); + store->buffers.texture.instance_buffer = dx11_buffer_alloc(desc, NULL); } /* Grid buffers */ @@ -1316,6 +1349,11 @@ void gpu_run_cmds(struct gpu_cmd_store gpu_cmd_store, struct gpu_texture target, ID3D11DeviceContext_PSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); #endif + /* Bind dummy vertex buffer */ + u32 zero = 0; + ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &G.dummy_vertex_buffer->gpu_buffer, &zero, &zero); + ID3D11DeviceContext_IASetIndexBuffer(G.devcon, G.quad_index_buffer->gpu_buffer, DXGI_FORMAT_R16_UINT, zero); + /* Bind texture */ ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, 1, &texture->srv); ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, 1, &texture->srv); @@ -1326,7 +1364,7 @@ void gpu_run_cmds(struct gpu_cmd_store gpu_cmd_store, struct gpu_texture target, /* Draw */ ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - ID3D11DeviceContext_DrawInstanced(G.devcon, 6, instance_count, 0, 0); + ID3D11DeviceContext_DrawIndexedInstanced(G.devcon, 6, instance_count, 0, 0, 0); /* Unbind SRVs */ ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, 8, null_srvs);