use dimension-specific vector types for compute shader parameters

This commit is contained in:
jacob 2026-03-19 17:01:55 -05:00
parent b63b6197a6
commit cbcec3639f
3 changed files with 310 additions and 258 deletions

View File

@ -744,32 +744,34 @@ Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z)) #define GroupSize(name) VEC3U32(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))
#if IsGpu #if IsGpu
#define Semantic(name) name : name #define Semantic(name) name : name
#define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID)) #define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ComputeShader(name) \ #define ComputeShader(name) \
[numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))] \ [numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \
void name( \ void name( \
u32 Semantic(SV_GroupIndex), \ u32 Semantic(SV_GroupIndex), \
Vec3U32 Semantic(SV_GroupID), \ CAT(name,__ThreadDimsType) Semantic(SV_GroupID), \
Vec3U32 Semantic(SV_GroupThreadID), \ CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID), \
Vec3U32 Semantic(SV_DispatchThreadID) \ CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID) \
) ) \
/* ----------------------------------------------------------------------------------- */
#endif #endif
#if IsCpu #if IsCpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
#define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 }
#define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 }
#elif IsGpu #elif IsGpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z };
#define DeclVertexShader(name, resource_hash) #define DeclVertexShader(name, resource_hash)
#define DeclPixelShader(name, resource_hash) #define DeclPixelShader(name, resource_hash)
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Dynamic api linkage //~ Dynamic api linkage

View File

@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
//- Generate C file //- Generate C file
StringList shader_lines = Zi; StringList shader_lines = Zi;
StringList shader_thread_dim_type_lines = Zi;
{ {
StringList c_store_lines = Zi; StringList c_store_lines = Zi;
StringList c_include_lines = Zi; StringList c_include_lines = Zi;
@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
{ {
if (arg0_tok->valid) if (arg0_tok->valid)
{ {
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
String shader_name = arg0_tok->s; String shader_name = arg0_tok->s;
Vec3U32 thread_count = Zi; Vec3U32 thread_dims = Zi;
i32 thread_dims_count = 1;
{ {
StringList thread_count_args = Zi; StringList thread_count_args = Zi;
for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx) for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx)
@ -739,28 +735,61 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
} }
String thread_count_str = StringFromList(perm, thread_count_args, Lit(" ")); String thread_count_str = StringFromList(perm, thread_count_args, Lit(" "));
Vec3 tmp = CR_Vec3FromString(thread_count_str); Vec3 tmp = CR_Vec3FromString(thread_count_str);
thread_count.x = MaxI32(tmp.x, 1); thread_dims.x = MaxI32(tmp.x, 1);
thread_count.y = MaxI32(tmp.y, 1); thread_dims.y = MaxI32(tmp.y, 1);
thread_count.z = MaxI32(tmp.z, 1); thread_dims.z = MaxI32(tmp.z, 1);
// Determine compute shader dimensions by counting comma-separated values in dimensions string
for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx)
{
u8 c = thread_count_str.text[char_idx];
if (c == ',')
{
thread_dims_count += 1;
} }
}
thread_dims_count = ClampI32(thread_dims_count, 1, 3);
}
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name))); u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name)));
String lines = Zi; // Dims type line
if (kind == M_EntryKind_ComputeShader) if (kind == M_EntryKind_ComputeShader)
{ {
lines = StringF( String line = StringF(
perm,
"#define %F__ThreadDimsType %F",
FmtString(shader_name),
FmtString(
thread_dims_count == 1 ? Lit("u32") :
thread_dims_count == 2 ? Lit("Vec2U32") :
Lit("Vec3U32")
)
);
PushStringToList(perm, &shader_thread_dim_type_lines, line);
}
// Shader line
{
String line = Zi;
if (kind == M_EntryKind_ComputeShader)
{
line = StringF(
perm, perm,
"%F(%F, 0x%F, %F, %F, %F);", "%F(%F, 0x%F, %F, %F, %F);",
FmtString(decl_type), FmtString(decl_type),
FmtString(shader_name), FmtString(shader_name),
FmtHex(shader_resource_hash), FmtHex(shader_resource_hash),
FmtUint(thread_count.x), FmtUint(thread_dims.x),
FmtUint(thread_count.y), FmtUint(thread_dims.y),
FmtUint(thread_count.z) FmtUint(thread_dims.z)
); );
} }
else else
{ {
lines = StringF( line = StringF(
perm, perm,
"%F(%F, 0x%F);", "%F(%F, 0x%F);",
FmtString(decl_type), FmtString(decl_type),
@ -768,7 +797,8 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
FmtHex(shader_resource_hash) FmtHex(shader_resource_hash)
); );
} }
PushStringToList(perm, &shader_lines, lines); PushStringToList(perm, &shader_lines, line);
}
} }
else else
{ {
@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &c_out_lines, n->s); PushStringToList(perm, &c_out_lines, n->s);
} }
} }
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &c_out_lines, Lit(""));
PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shaders // Define shaders
if (shader_lines.count > 0) if (shader_lines.count > 0)
{ {
@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes")); PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes"));
PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path))); PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path)));
} }
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &gpu_out_lines, Lit(""));
PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &gpu_out_lines, n->s);
}
}
// Define shaders // Define shaders
if (shader_lines.count > 0) if (shader_lines.count > 0)
{ {

View File

@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS)
{ {
u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap; u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
// InterlockedMin guarantees that the highest emitter index (reflected // Using InterlockedMin guarantees that the highest emitter index
// as negative particle kind) will be used to initialize the particle // (reflected as negative particle kind) will be used to initialize the
// this frame, in case multiple emitters target the same particle (e.g. // particle this frame, in case multiple emitters target the same particle
// more particles pushed this frame than are available in the buffer) // (e.g. more particles were pushed this frame than are available in the buffer)
InterlockedMin(particles[particle_idx].kind, semantic_particle_kind); InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
} }
} }
@ -393,16 +393,11 @@ ComputeShader(V_SimParticlesCS)
Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>); Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
u32 particle_idx = SV_DispatchThreadID; u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap) if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None)
{ {
V_Particle particle = particles[particle_idx]; V_Particle particle = particles[particle_idx];
b32 prune = 0; b32 prune = 0;
//////////////////////////////
//- Initialize particle
if (particle.kind != V_ParticleKind_None)
{
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx); u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0); f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16); f32 rand_angle = Norm16(seed0 >> 16);
@ -410,7 +405,7 @@ ComputeShader(V_SimParticlesCS)
f32 rand_falloff = Norm16(seed0 >> 48); f32 rand_falloff = Norm16(seed0 >> 48);
////////////////////////////// //////////////////////////////
//- Init //- Init particle
if (particle.kind < 0) if (particle.kind < 0)
{ {
@ -427,6 +422,9 @@ ComputeShader(V_SimParticlesCS)
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed; particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
} }
//////////////////////////////
//- Simulate
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{ {
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
@ -647,6 +645,9 @@ ComputeShader(V_SimParticlesCS)
particle.life += frame.dt; particle.life += frame.dt;
} }
//////////////////////////////
//- Commit
if (prune) if (prune)
{ {
particle.kind = V_ParticleKind_None; particle.kind = V_ParticleKind_None;
@ -654,7 +655,6 @@ ComputeShader(V_SimParticlesCS)
particles[particle_idx] = particle; particles[particle_idx] = particle;
} }
}
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////