diff --git a/src/config.h b/src/config.h index 5f508802..d802de64 100644 --- a/src/config.h +++ b/src/config.h @@ -17,6 +17,7 @@ #define SIM_CLIENT_INTERP_RATIO 2.0 +#define GPU_NAMES IsRtcEnabled #define GPU_DEBUG 0 #define GPU_DEBUG_VALIDATION 0 diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index b5941bda..822c1d59 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -153,9 +153,9 @@ void G_Bootstrap(void) { G_D12_CommandQueueDesc descs[] = { - { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH }, - { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, - { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, + { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .name = Lit("Direct Queue") }, + { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Compute Queue") }, + { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Copy Queue") }, }; for (u32 i = 0; i < MinU32(countof(descs), countof(G_D12.queues)); ++i) { @@ -167,6 +167,7 @@ void G_Bootstrap(void) if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence); + G_D12_SetObjectName((ID3D12Object *)queue->d3d_queue, desc.name); } if (FAILED(hr)) { @@ -179,22 +180,25 @@ void G_Bootstrap(void) //- Initialize descriptor heaps { - Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; }; + Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; String name; }; Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = { [G_D12_DescriptorHeapKind_CbvSrvUav] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = G_D12_MaxCbvSrvUavDescriptors, + .name = Lit("Primary Resource Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Rtv] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = G_D12_MaxRtvDescriptors, + .name = Lit("Primary RTV Descriptor Heap"), }, [G_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = G_D12_MaxSamplerDescriptors, + .name = Lit("Primary Sampler Descriptor Heap"), }, }; for (G_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) @@ -309,13 +313,15 @@ void G_Bootstrap(void) gpu_perm, cl, u8, queue->print_buffer_size, - .flags = G_ResourceFlag_AllowShaderReadWrite + .flags = G_ResourceFlag_AllowShaderReadWrite, + .name = Lit("Debug print gpu buffer"), ); queue->print_readback_buffer = G_PushBuffer( gpu_perm, cl, u8, queue->print_buffer_size, - .flags = G_ResourceFlag_HostMemory + .flags = G_ResourceFlag_HostMemory, + .name = Lit("Debug print readback buffer") ); queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer); } @@ -536,7 +542,7 @@ void G_D12_SetObjectName(ID3D12Object *object, String name) TempArena scratch = BeginScratchNoConflict(); { wchar_t *name_wstr = WstrFromString(scratch.arena, name); - ID3D12Resource_SetName(object, name_wstr); + ID3D12Object_SetName(object, name_wstr); } EndScratch(scratch); } @@ -617,20 +623,20 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc) { pipeline_name = StringF( scratch.arena, - "%F%F", - FmtHandle(desc.cs.resource.v), - FmtString(NameFromResource(desc.cs.resource)) + "%F %F", + FmtString(NameFromResource(desc.cs.resource)), + FmtHandle(desc.cs.resource.v) ); } else { pipeline_name = StringF( scratch.arena, - "%F%F-%F%F", - FmtHandle(desc.vs.resource.v), + "%F %F - %F %F", FmtString(NameFromResource(desc.vs.resource)), - FmtHandle(desc.ps.resource.v), - FmtString(NameFromResource(desc.ps.resource)) + FmtHandle(desc.vs.resource.v), + FmtString(NameFromResource(desc.ps.resource)), + FmtHandle(desc.ps.resource.v) ); } @@ -791,7 +797,7 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc) if (ok) { - if (GPU_DEBUG) + if (GPU_NAMES) { G_D12_SetObjectName((ID3D12Object *)pso, pipeline_name); } @@ -1204,7 +1210,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle ZeroStruct(release); SllQueuePush(cl->releases.first, cl->releases.last, release); release->d3d_resource = resource->d3d_resource; - if (GPU_DEBUG) + if (GPU_NAMES) { StaticAssert(countof(release->name_text) == countof(resource->name_text)); release->name_len = resource->name_len; @@ -1315,7 +1321,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle { resource->name_len = new_name.len; CopyBytes(resource->name_text, new_name.text, new_name.len); - if (GPU_DEBUG) + if (GPU_NAMES) { G_D12_SetObjectName((ID3D12Object *)resource->d3d_resource, new_name); } diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 87515748..72d7d6fe 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -222,6 +222,7 @@ Struct(G_D12_CommandQueueDesc) { D3D12_COMMAND_LIST_TYPE type; D3D12_COMMAND_QUEUE_PRIORITY priority; + String name; }; Struct(G_D12_Queue) diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 3da7af3e..ac271cc5 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -5210,6 +5210,8 @@ void V_TickForever(WaveLaneCtx *lane) { G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); + + G_DumbGlobalMemorySync(frame->cl); } ////////////////////////////// diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 17d28ba0..7b300512 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -56,7 +56,7 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) //////////////////////////////////////////////////////////// //~ Prepare frame -ComputeShader2D(V_PrepareShadeCS, 8, 8) +ComputeShader2D(V_PrepareShadeCS, 16, 16) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWTexture2D shade = G_Dereference(frame.shade_rw); @@ -69,7 +69,7 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8) } //- Prepare cells -ComputeShader2D(V_PrepareCellsCS, 8, 8) +ComputeShader2D(V_PrepareCellsCS, 16, 16) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); @@ -158,7 +158,7 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8) } //- Clear particles -ComputeShader(V_ClearParticlesCS, 64) +ComputeShader(V_ClearParticlesCS, 256) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWStructuredBuffer particles = G_Dereference(frame.particles); @@ -236,7 +236,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ////////////////////////////// //- Particle emitter shader -ComputeShader(V_EmitParticlesCS, 64) +ComputeShader(V_EmitParticlesCS, 256) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer emitters = G_Dereference(frame.emitters); @@ -267,7 +267,7 @@ ComputeShader(V_EmitParticlesCS, 64) ////////////////////////////// //- Particle sim shader -ComputeShader(V_SimParticlesCS, 64) +ComputeShader(V_SimParticlesCS, 256) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); @@ -544,7 +544,7 @@ ComputeShader(V_SimParticlesCS, 64) // TODO: Remove this -ComputeShader2D(V_ShadeCS, 8, 8) +ComputeShader2D(V_ShadeCS, 16, 16) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); @@ -580,7 +580,7 @@ ComputeShader2D(V_ShadeCS, 8, 8) //////////////////////////////////////////////////////////// //~ Composite -ComputeShader2D(V_CompositeCS, 8, 8) +ComputeShader2D(V_CompositeCS, 16, 16) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); @@ -959,7 +959,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) ////////////////////////////// //- Downsample -ComputeShader2D(V_BloomDownCS, 8, 8) +ComputeShader2D(V_BloomDownCS, 16, 16) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; @@ -1035,7 +1035,7 @@ ComputeShader2D(V_BloomDownCS, 8, 8) ////////////////////////////// //- Upsample -ComputeShader2D(V_BloomUpCS, 8, 8) +ComputeShader2D(V_BloomUpCS, 16, 16) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; @@ -1103,7 +1103,7 @@ ComputeShader2D(V_BloomUpCS, 8, 8) //////////////////////////////////////////////////////////// //~ Finalize -ComputeShader2D(V_FinalizeCS, 8, 8) +ComputeShader2D(V_FinalizeCS, 16, 16) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index f176f2f8..971053b9 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -51,29 +51,29 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density); //~ Shaders //- Utility shaders -ComputeShader2D(V_PrepareCellsCS, 8, 8); -ComputeShader(V_ClearParticlesCS, 64); +ComputeShader2D(V_PrepareCellsCS, 16, 16); +ComputeShader(V_ClearParticlesCS, 256); //- Quads VertexShader(V_QuadVS, V_QuadPSInput); PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input); //- Particle simulation -ComputeShader(V_EmitParticlesCS, 64); -ComputeShader(V_SimParticlesCS, 64); +ComputeShader(V_EmitParticlesCS, 256); +ComputeShader(V_SimParticlesCS, 256); //- Shade -ComputeShader2D(V_ShadeCS, 8, 8); +ComputeShader2D(V_ShadeCS, 16, 16); //- Composite -ComputeShader2D(V_CompositeCS, 8, 8); +ComputeShader2D(V_CompositeCS, 16, 16); //- Bloom -ComputeShader2D(V_BloomDownCS, 8, 8); -ComputeShader2D(V_BloomUpCS, 8, 8); +ComputeShader2D(V_BloomDownCS, 16, 16); +ComputeShader2D(V_BloomUpCS, 16, 16); //- Finalize -ComputeShader2D(V_FinalizeCS, 8, 8); +ComputeShader2D(V_FinalizeCS, 16, 16); //- Debug shapes VertexShader(V_DVertVS, V_DVertPSInput); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 43ea0eff..288ae9cc 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -373,7 +373,7 @@ Struct(V_SharedFrame) //////////////////////////////////////////////////////////// //~ Helpers -#define V_ThreadGroupSizeFromBufferSize(buffer_size) VEC3I32((((buffer_size) + 63) / 64), 1, 1) -#define V_ThreadGroupSizeFromTexSize(tex_size) VEC3I32(((tex_size).x + 7) / 8, ((tex_size).y + 7) / 8, 1) +#define V_ThreadGroupSizeFromBufferSize(buffer_size) VEC3I32((((buffer_size) + 255) / 256), 1, 1) +#define V_ThreadGroupSizeFromTexSize(tex_size) VEC3I32(((tex_size).x + 15) / 16, ((tex_size).y + 15) / 16, 1) V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind);