use dimension-specific vector types for compute shader parameters

This commit is contained in:
jacob 2026-03-19 17:01:55 -05:00
parent b63b6197a6
commit cbcec3639f
3 changed files with 310 additions and 258 deletions

View File

@ -754,10 +754,11 @@ Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
[numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \
void name( \
u32 Semantic(SV_GroupIndex), \
Vec3U32 Semantic(SV_GroupID), \
Vec3U32 Semantic(SV_GroupThreadID), \
Vec3U32 Semantic(SV_DispatchThreadID) \
)
CAT(name,__ThreadDimsType) Semantic(SV_GroupID), \
CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID), \
CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID) \
) \
/* ----------------------------------------------------------------------------------- */
#endif
#if IsCpu
@ -770,6 +771,7 @@ Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
#define DeclPixelShader(name, resource_hash)
#endif
////////////////////////////////////////////////////////////
//~ Dynamic api linkage

View File

@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
//- Generate C file
StringList shader_lines = Zi;
StringList shader_thread_dim_type_lines = Zi;
{
StringList c_store_lines = Zi;
StringList c_include_lines = Zi;
@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
{
if (arg0_tok->valid)
{
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
String shader_name = arg0_tok->s;
Vec3U32 thread_count = Zi;
Vec3U32 thread_dims = Zi;
i32 thread_dims_count = 1;
{
StringList thread_count_args = Zi;
for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx)
@ -739,28 +735,61 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
}
String thread_count_str = StringFromList(perm, thread_count_args, Lit(" "));
Vec3 tmp = CR_Vec3FromString(thread_count_str);
thread_count.x = MaxI32(tmp.x, 1);
thread_count.y = MaxI32(tmp.y, 1);
thread_count.z = MaxI32(tmp.z, 1);
thread_dims.x = MaxI32(tmp.x, 1);
thread_dims.y = MaxI32(tmp.y, 1);
thread_dims.z = MaxI32(tmp.z, 1);
// Determine compute shader dimensions by counting comma-separated values in dimensions string
for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx)
{
u8 c = thread_count_str.text[char_idx];
if (c == ',')
{
thread_dims_count += 1;
}
}
thread_dims_count = ClampI32(thread_dims_count, 1, 3);
}
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name)));
String lines = Zi;
// Dims type line
if (kind == M_EntryKind_ComputeShader)
{
lines = StringF(
String line = StringF(
perm,
"#define %F__ThreadDimsType %F",
FmtString(shader_name),
FmtString(
thread_dims_count == 1 ? Lit("u32") :
thread_dims_count == 2 ? Lit("Vec2U32") :
Lit("Vec3U32")
)
);
PushStringToList(perm, &shader_thread_dim_type_lines, line);
}
// Shader line
{
String line = Zi;
if (kind == M_EntryKind_ComputeShader)
{
line = StringF(
perm,
"%F(%F, 0x%F, %F, %F, %F);",
FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash),
FmtUint(thread_count.x),
FmtUint(thread_count.y),
FmtUint(thread_count.z)
FmtUint(thread_dims.x),
FmtUint(thread_dims.y),
FmtUint(thread_dims.z)
);
}
else
{
lines = StringF(
line = StringF(
perm,
"%F(%F, 0x%F);",
FmtString(decl_type),
@ -768,7 +797,8 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
FmtHex(shader_resource_hash)
);
}
PushStringToList(perm, &shader_lines, lines);
PushStringToList(perm, &shader_lines, line);
}
}
else
{
@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &c_out_lines, Lit(""));
PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shaders
if (shader_lines.count > 0)
{
@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes"));
PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path)));
}
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &gpu_out_lines, Lit(""));
PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &gpu_out_lines, n->s);
}
}
// Define shaders
if (shader_lines.count > 0)
{

View File

@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS)
{
u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
// InterlockedMin guarantees that the highest emitter index (reflected
// as negative particle kind) will be used to initialize the particle
// this frame, in case multiple emitters target the same particle (e.g.
// more particles pushed this frame than are available in the buffer)
// Using InterlockedMin guarantees that the highest emitter index
// (reflected as negative particle kind) will be used to initialize the
// particle this frame, in case multiple emitters target the same particle
// (e.g. more particles were pushed this frame than are available in the buffer)
InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
}
}
@ -393,16 +393,11 @@ ComputeShader(V_SimParticlesCS)
Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap)
if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None)
{
V_Particle particle = particles[particle_idx];
b32 prune = 0;
//////////////////////////////
//- Initialize particle
if (particle.kind != V_ParticleKind_None)
{
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16);
@ -410,7 +405,7 @@ ComputeShader(V_SimParticlesCS)
f32 rand_falloff = Norm16(seed0 >> 48);
//////////////////////////////
//- Init
//- Init particle
if (particle.kind < 0)
{
@ -427,6 +422,9 @@ ComputeShader(V_SimParticlesCS)
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
}
//////////////////////////////
//- Simulate
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
@ -647,6 +645,9 @@ ComputeShader(V_SimParticlesCS)
particle.life += frame.dt;
}
//////////////////////////////
//- Commit
if (prune)
{
particle.kind = V_ParticleKind_None;
@ -655,7 +656,6 @@ ComputeShader(V_SimParticlesCS)
particles[particle_idx] = particle;
}
}
}
////////////////////////////////////////////////////////////
//~ Shade