256-threads per vis compute group
This commit is contained in:
parent
3b7b324369
commit
8a87ec2f6b
@ -17,6 +17,7 @@
|
||||
#define SIM_CLIENT_INTERP_RATIO 2.0
|
||||
|
||||
|
||||
#define GPU_NAMES IsRtcEnabled
|
||||
#define GPU_DEBUG 0
|
||||
#define GPU_DEBUG_VALIDATION 0
|
||||
|
||||
|
||||
@ -153,9 +153,9 @@ void G_Bootstrap(void)
|
||||
|
||||
{
|
||||
G_D12_CommandQueueDesc descs[] = {
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH },
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL },
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL },
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .name = Lit("Direct Queue") },
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Compute Queue") },
|
||||
{ .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Copy Queue") },
|
||||
};
|
||||
for (u32 i = 0; i < MinU32(countof(descs), countof(G_D12.queues)); ++i)
|
||||
{
|
||||
@ -167,6 +167,7 @@ void G_Bootstrap(void)
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence);
|
||||
G_D12_SetObjectName((ID3D12Object *)queue->d3d_queue, desc.name);
|
||||
}
|
||||
if (FAILED(hr))
|
||||
{
|
||||
@ -179,22 +180,25 @@ void G_Bootstrap(void)
|
||||
//- Initialize descriptor heaps
|
||||
|
||||
{
|
||||
Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; };
|
||||
Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; String name; };
|
||||
Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = {
|
||||
[G_D12_DescriptorHeapKind_CbvSrvUav] = {
|
||||
.type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
||||
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||
.max = G_D12_MaxCbvSrvUavDescriptors,
|
||||
.name = Lit("Primary Resource Descriptor Heap"),
|
||||
},
|
||||
[G_D12_DescriptorHeapKind_Rtv] = {
|
||||
.type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
|
||||
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
||||
.max = G_D12_MaxRtvDescriptors,
|
||||
.name = Lit("Primary RTV Descriptor Heap"),
|
||||
},
|
||||
[G_D12_DescriptorHeapKind_Sampler] = {
|
||||
.type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
||||
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
||||
.max = G_D12_MaxSamplerDescriptors,
|
||||
.name = Lit("Primary Sampler Descriptor Heap"),
|
||||
},
|
||||
};
|
||||
for (G_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind)
|
||||
@ -309,13 +313,15 @@ void G_Bootstrap(void)
|
||||
gpu_perm, cl,
|
||||
u8,
|
||||
queue->print_buffer_size,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Debug print gpu buffer"),
|
||||
);
|
||||
queue->print_readback_buffer = G_PushBuffer(
|
||||
gpu_perm, cl,
|
||||
u8,
|
||||
queue->print_buffer_size,
|
||||
.flags = G_ResourceFlag_HostMemory
|
||||
.flags = G_ResourceFlag_HostMemory,
|
||||
.name = Lit("Debug print readback buffer")
|
||||
);
|
||||
queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer);
|
||||
}
|
||||
@ -536,7 +542,7 @@ void G_D12_SetObjectName(ID3D12Object *object, String name)
|
||||
TempArena scratch = BeginScratchNoConflict();
|
||||
{
|
||||
wchar_t *name_wstr = WstrFromString(scratch.arena, name);
|
||||
ID3D12Resource_SetName(object, name_wstr);
|
||||
ID3D12Object_SetName(object, name_wstr);
|
||||
}
|
||||
EndScratch(scratch);
|
||||
}
|
||||
@ -618,8 +624,8 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc)
|
||||
pipeline_name = StringF(
|
||||
scratch.arena,
|
||||
"%F %F",
|
||||
FmtHandle(desc.cs.resource.v),
|
||||
FmtString(NameFromResource(desc.cs.resource))
|
||||
FmtString(NameFromResource(desc.cs.resource)),
|
||||
FmtHandle(desc.cs.resource.v)
|
||||
);
|
||||
}
|
||||
else
|
||||
@ -627,10 +633,10 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc)
|
||||
pipeline_name = StringF(
|
||||
scratch.arena,
|
||||
"%F %F - %F %F",
|
||||
FmtHandle(desc.vs.resource.v),
|
||||
FmtString(NameFromResource(desc.vs.resource)),
|
||||
FmtHandle(desc.ps.resource.v),
|
||||
FmtString(NameFromResource(desc.ps.resource))
|
||||
FmtHandle(desc.vs.resource.v),
|
||||
FmtString(NameFromResource(desc.ps.resource)),
|
||||
FmtHandle(desc.ps.resource.v)
|
||||
);
|
||||
}
|
||||
|
||||
@ -791,7 +797,7 @@ G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc)
|
||||
|
||||
if (ok)
|
||||
{
|
||||
if (GPU_DEBUG)
|
||||
if (GPU_NAMES)
|
||||
{
|
||||
G_D12_SetObjectName((ID3D12Object *)pso, pipeline_name);
|
||||
}
|
||||
@ -1204,7 +1210,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
ZeroStruct(release);
|
||||
SllQueuePush(cl->releases.first, cl->releases.last, release);
|
||||
release->d3d_resource = resource->d3d_resource;
|
||||
if (GPU_DEBUG)
|
||||
if (GPU_NAMES)
|
||||
{
|
||||
StaticAssert(countof(release->name_text) == countof(resource->name_text));
|
||||
release->name_len = resource->name_len;
|
||||
@ -1315,7 +1321,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
{
|
||||
resource->name_len = new_name.len;
|
||||
CopyBytes(resource->name_text, new_name.text, new_name.len);
|
||||
if (GPU_DEBUG)
|
||||
if (GPU_NAMES)
|
||||
{
|
||||
G_D12_SetObjectName((ID3D12Object *)resource->d3d_resource, new_name);
|
||||
}
|
||||
|
||||
@ -222,6 +222,7 @@ Struct(G_D12_CommandQueueDesc)
|
||||
{
|
||||
D3D12_COMMAND_LIST_TYPE type;
|
||||
D3D12_COMMAND_QUEUE_PRIORITY priority;
|
||||
String name;
|
||||
};
|
||||
|
||||
Struct(G_D12_Queue)
|
||||
|
||||
@ -5210,6 +5210,8 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
|
||||
{
|
||||
G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
|
||||
@ -56,7 +56,7 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Prepare frame
|
||||
|
||||
ComputeShader2D(V_PrepareShadeCS, 8, 8)
|
||||
ComputeShader2D(V_PrepareShadeCS, 16, 16)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
|
||||
@ -69,7 +69,7 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8)
|
||||
}
|
||||
|
||||
//- Prepare cells
|
||||
ComputeShader2D(V_PrepareCellsCS, 8, 8)
|
||||
ComputeShader2D(V_PrepareCellsCS, 16, 16)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
||||
@ -158,7 +158,7 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
|
||||
}
|
||||
|
||||
//- Clear particles
|
||||
ComputeShader(V_ClearParticlesCS, 64)
|
||||
ComputeShader(V_ClearParticlesCS, 256)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
||||
@ -236,7 +236,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
||||
//////////////////////////////
|
||||
//- Particle emitter shader
|
||||
|
||||
ComputeShader(V_EmitParticlesCS, 64)
|
||||
ComputeShader(V_EmitParticlesCS, 256)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
|
||||
@ -267,7 +267,7 @@ ComputeShader(V_EmitParticlesCS, 64)
|
||||
//////////////////////////////
|
||||
//- Particle sim shader
|
||||
|
||||
ComputeShader(V_SimParticlesCS, 64)
|
||||
ComputeShader(V_SimParticlesCS, 256)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
||||
@ -544,7 +544,7 @@ ComputeShader(V_SimParticlesCS, 64)
|
||||
|
||||
// TODO: Remove this
|
||||
|
||||
ComputeShader2D(V_ShadeCS, 8, 8)
|
||||
ComputeShader2D(V_ShadeCS, 16, 16)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
@ -580,7 +580,7 @@ ComputeShader2D(V_ShadeCS, 8, 8)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Composite
|
||||
|
||||
ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
ComputeShader2D(V_CompositeCS, 16, 16)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
|
||||
@ -959,7 +959,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
//////////////////////////////
|
||||
//- Downsample
|
||||
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||
ComputeShader2D(V_BloomDownCS, 16, 16)
|
||||
{
|
||||
i32 mips_count = V_GpuConst_MipsCount;
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
@ -1035,7 +1035,7 @@ ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||
//////////////////////////////
|
||||
//- Upsample
|
||||
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8)
|
||||
ComputeShader2D(V_BloomUpCS, 16, 16)
|
||||
{
|
||||
i32 mips_count = V_GpuConst_MipsCount;
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
@ -1103,7 +1103,7 @@ ComputeShader2D(V_BloomUpCS, 8, 8)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Finalize
|
||||
|
||||
ComputeShader2D(V_FinalizeCS, 8, 8)
|
||||
ComputeShader2D(V_FinalizeCS, 16, 16)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
|
||||
@ -51,29 +51,29 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
|
||||
//~ Shaders
|
||||
|
||||
//- Utility shaders
|
||||
ComputeShader2D(V_PrepareCellsCS, 8, 8);
|
||||
ComputeShader(V_ClearParticlesCS, 64);
|
||||
ComputeShader2D(V_PrepareCellsCS, 16, 16);
|
||||
ComputeShader(V_ClearParticlesCS, 256);
|
||||
|
||||
//- Quads
|
||||
VertexShader(V_QuadVS, V_QuadPSInput);
|
||||
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input);
|
||||
|
||||
//- Particle simulation
|
||||
ComputeShader(V_EmitParticlesCS, 64);
|
||||
ComputeShader(V_SimParticlesCS, 64);
|
||||
ComputeShader(V_EmitParticlesCS, 256);
|
||||
ComputeShader(V_SimParticlesCS, 256);
|
||||
|
||||
//- Shade
|
||||
ComputeShader2D(V_ShadeCS, 8, 8);
|
||||
ComputeShader2D(V_ShadeCS, 16, 16);
|
||||
|
||||
//- Composite
|
||||
ComputeShader2D(V_CompositeCS, 8, 8);
|
||||
ComputeShader2D(V_CompositeCS, 16, 16);
|
||||
|
||||
//- Bloom
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8);
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8);
|
||||
ComputeShader2D(V_BloomDownCS, 16, 16);
|
||||
ComputeShader2D(V_BloomUpCS, 16, 16);
|
||||
|
||||
//- Finalize
|
||||
ComputeShader2D(V_FinalizeCS, 8, 8);
|
||||
ComputeShader2D(V_FinalizeCS, 16, 16);
|
||||
|
||||
//- Debug shapes
|
||||
VertexShader(V_DVertVS, V_DVertPSInput);
|
||||
|
||||
@ -373,7 +373,7 @@ Struct(V_SharedFrame)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Helpers
|
||||
|
||||
#define V_ThreadGroupSizeFromBufferSize(buffer_size) VEC3I32((((buffer_size) + 63) / 64), 1, 1)
|
||||
#define V_ThreadGroupSizeFromTexSize(tex_size) VEC3I32(((tex_size).x + 7) / 8, ((tex_size).y + 7) / 8, 1)
|
||||
#define V_ThreadGroupSizeFromBufferSize(buffer_size) VEC3I32((((buffer_size) + 255) / 256), 1, 1)
|
||||
#define V_ThreadGroupSizeFromTexSize(tex_size) VEC3I32(((tex_size).x + 15) / 16, ((tex_size).y + 15) / 16, 1)
|
||||
|
||||
V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user