use dimension-specific vector types for compute shader parameters

2026-03-19 17:01:55 -05:00 · 2026-03-19 17:01:55 -05:00 · cbcec3639f
commit cbcec3639f
parent b63b6197a6
3 changed files with 310 additions and 258 deletions
--- a/src/base/base.cgh
+++ b/src/base/base.cgh
@ -744,32 +744,34 @@ Struct(VertexShaderDesc)    { ResourceKey resource; u32 x, y, z; };
 Struct(PixelShaderDesc)     { ResourceKey resource; u32 x, y, z; };
 Struct(ComputeShaderDesc)   { ResourceKey resource; u32 x, y, z; };
-#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))
+#define GroupSize(name) VEC3U32(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))
 #if IsGpu
  #define Semantic(name) name : name
  #define VertexShader(name, return_type)       return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID))
  #define PixelShader(name, return_type, ...)   return_type name(__VA_ARGS__)
  #define ComputeShader(name)                                                               \
-    [numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))]          \
+    [numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \
    void name(                                                                              \
      u32 Semantic(SV_GroupIndex),                                                          \
-      Vec3U32 Semantic(SV_GroupID),                                                                     \
+      CAT(name,__ThreadDimsType) Semantic(SV_GroupID),                                      \
-      Vec3U32 Semantic(SV_GroupThreadID),                                                               \
+      CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID),                                \
-      Vec3U32 Semantic(SV_DispatchThreadID)                                                             \
+      CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID)                              \
-    )
+    )                                                                                       \
    /* ----------------------------------------------------------------------------------- */
 #endif
 #if IsCpu
-  #define DeclComputeShader(name, resource_hash, x, y, z)   enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
+  #define DeclComputeShader(name, resource_hash, x, y, z)   enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
  #define DeclVertexShader(name, resource_hash)             static VertexShaderDesc name = { resource_hash, 1, 1, 1 }
  #define DeclPixelShader(name, resource_hash)              static PixelShaderDesc name = { resource_hash, 1, 1, 1 }
 #elif IsGpu
-  #define DeclComputeShader(name, resource_hash, x, y, z)   enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z };
+  #define DeclComputeShader(name, resource_hash, x, y, z)   enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z };
  #define DeclVertexShader(name, resource_hash)
  #define DeclPixelShader(name, resource_hash)
 #endif
 ////////////////////////////////////////////////////////////
 //~ Dynamic api linkage
--- a/src/meta/meta.c
+++ b/src/meta/meta.c
@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
    //- Generate C file
    StringList shader_lines = Zi;
    StringList shader_thread_dim_type_lines = Zi;
    {
      StringList c_store_lines = Zi;
      StringList c_include_lines = Zi;
@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
            {
              if (arg0_tok->valid)
              {
                String decl_type = (
                  kind == M_EntryKind_VertexShader  ? Lit("DeclVertexShader") :
                  kind == M_EntryKind_PixelShader   ? Lit("DeclPixelShader") :
                  kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
                  Lit("")
                );
                String shader_name = arg0_tok->s;
-                Vec3U32 thread_count = Zi;
+                Vec3U32 thread_dims = Zi;
                i32 thread_dims_count = 1;
                {
                  StringList thread_count_args = Zi;
                  for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx)
@ -739,28 +735,61 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
                  }
                  String thread_count_str = StringFromList(perm, thread_count_args, Lit(" "));
                  Vec3 tmp = CR_Vec3FromString(thread_count_str);
-                  thread_count.x = MaxI32(tmp.x, 1);
+                  thread_dims.x = MaxI32(tmp.x, 1);
-                  thread_count.y = MaxI32(tmp.y, 1);
+                  thread_dims.y = MaxI32(tmp.y, 1);
-                  thread_count.z = MaxI32(tmp.z, 1);
+                  thread_dims.z = MaxI32(tmp.z, 1);
                  // Determine compute shader dimensions by counting comma-separated values in dimensions string
                  for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx)
                  {
                    u8 c = thread_count_str.text[char_idx];
                    if (c == ',')
                    {
                      thread_dims_count += 1;
                    }
                  }
                  thread_dims_count = ClampI32(thread_dims_count, 1, 3);
                }
                String decl_type = (
                  kind == M_EntryKind_VertexShader  ? Lit("DeclVertexShader") :
                  kind == M_EntryKind_PixelShader   ? Lit("DeclPixelShader") :
                  kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
                  Lit("")
                );
                u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name)));
-                String lines = Zi;
+                // Dims type line
                if (kind == M_EntryKind_ComputeShader)
                {
-                  lines = StringF(
+                  String line = StringF(
                    perm,
                    "#define %F__ThreadDimsType %F",
                    FmtString(shader_name),
                    FmtString(
                      thread_dims_count == 1 ? Lit("u32") :
                      thread_dims_count == 2 ? Lit("Vec2U32") :
                      Lit("Vec3U32")
                    )
                  );
                  PushStringToList(perm, &shader_thread_dim_type_lines, line);
                }
                // Shader line
                {
                  String line = Zi;
                  if (kind == M_EntryKind_ComputeShader)
                  {
                    line = StringF(
                      perm,
                      "%F(%F, 0x%F, %F, %F, %F);",
                      FmtString(decl_type),
                      FmtString(shader_name),
                      FmtHex(shader_resource_hash),
-                    FmtUint(thread_count.x),
+                      FmtUint(thread_dims.x),
-                    FmtUint(thread_count.y),
+                      FmtUint(thread_dims.y),
-                    FmtUint(thread_count.z)
+                      FmtUint(thread_dims.z)
                    );
                  }
                  else
                  {
-                  lines = StringF(
+                    line = StringF(
                      perm,
                      "%F(%F, 0x%F);",
                      FmtString(decl_type),
@ -768,7 +797,8 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
                      FmtHex(shader_resource_hash)
                    );
                  }
-                PushStringToList(perm, &shader_lines, lines);
+                  PushStringToList(perm, &shader_lines, line);
                }
              }
              else
              {
@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
            PushStringToList(perm, &c_out_lines, n->s);
          }
        }
        // Define shader dimension types
        if (shader_thread_dim_type_lines.count > 0)
        {
          PushStringToList(perm, &c_out_lines, Lit(""));
          PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types"));
          for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
          {
            PushStringToList(perm, &c_out_lines, n->s);
          }
        }
        // Define shaders
        if (shader_lines.count > 0)
        {
@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
            PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes"));
            PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path)));
          }
          // Define shader dimension types
          if (shader_thread_dim_type_lines.count > 0)
          {
            PushStringToList(perm, &gpu_out_lines, Lit(""));
            PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types"));
            for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
            {
              PushStringToList(perm, &gpu_out_lines, n->s);
            }
          }
          // Define shaders
          if (shader_lines.count > 0)
          {
--- a/src/pp/pp_vis/pp_vis_gpu.g
+++ b/src/pp/pp_vis/pp_vis_gpu.g
@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS)
    {
      u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
-      // InterlockedMin guarantees that the highest emitter index (reflected
+      // Using InterlockedMin guarantees that the highest emitter index
-      // as negative particle kind) will be used to initialize the particle
+      // (reflected as negative particle kind) will be used to initialize the
-      // this frame, in case multiple emitters target the same particle (e.g.
+      // particle this frame, in case multiple emitters target the same particle
-      // more particles pushed this frame than are available in the buffer)
+      // (e.g. more particles were pushed this frame than are available in the buffer)
      InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
    }
  }
@ -393,16 +393,11 @@ ComputeShader(V_SimParticlesCS)
  Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
  u32 particle_idx = SV_DispatchThreadID;
-  if (particle_idx < V_ParticlesCap)
+  if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None)
  {
    V_Particle particle = particles[particle_idx];
    b32 prune = 0;
    //////////////////////////////
    //- Initialize particle
    if (particle.kind != V_ParticleKind_None)
    {
    u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
    f32 rand_offset = Norm16(seed0 >> 0);
    f32 rand_angle = Norm16(seed0 >> 16);
@ -410,7 +405,7 @@ ComputeShader(V_SimParticlesCS)
    f32 rand_falloff = Norm16(seed0 >> 48);
    //////////////////////////////
-      //- Init
+    //- Init particle
    if (particle.kind < 0)
    {
@ -427,6 +422,9 @@ ComputeShader(V_SimParticlesCS)
      particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
    }
    //////////////////////////////
    //- Simulate
    if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
    {
      V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
@ -647,6 +645,9 @@ ComputeShader(V_SimParticlesCS)
      particle.life += frame.dt;
    }
    //////////////////////////////
    //- Commit
    if (prune)
    {
      particle.kind = V_ParticleKind_None;
@ -654,7 +655,6 @@ ComputeShader(V_SimParticlesCS)
    particles[particle_idx] = particle;
  }
  }
 }
 ////////////////////////////////////////////////////////////