From cbcec3639ff05dd47790cdc427dc64d6cec57713 Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 19 Mar 2026 17:01:55 -0500 Subject: [PATCH] use dimension-specific vector types for compute shader parameters --- src/base/base.cgh | 28 +-- src/meta/meta.c | 104 ++++++--- src/pp/pp_vis/pp_vis_gpu.g | 436 ++++++++++++++++++------------------- 3 files changed, 310 insertions(+), 258 deletions(-) diff --git a/src/base/base.cgh b/src/base/base.cgh index 62e7a64d..3fc7cd7e 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -744,32 +744,34 @@ Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; }; -#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z)) +#define GroupSize(name) VEC3U32(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z)) #if IsGpu #define Semantic(name) name : name - #define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID)) - #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) - #define ComputeShader(name) \ - [numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))] \ - void name( \ - u32 Semantic(SV_GroupIndex), \ - Vec3U32 Semantic(SV_GroupID), \ - Vec3U32 Semantic(SV_GroupThreadID), \ - Vec3U32 Semantic(SV_DispatchThreadID) \ - ) + #define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID)) + #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) + #define ComputeShader(name) \ + [numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \ + void name( \ + u32 Semantic(SV_GroupIndex), \ + CAT(name,__ThreadDimsType) Semantic(SV_GroupID), \ + CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID), \ + CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID) \ + ) \ + /* ----------------------------------------------------------------------------------- */ #endif #if IsCpu - #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } + #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } #define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 } #elif IsGpu - #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; + #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; #define DeclVertexShader(name, resource_hash) #define DeclPixelShader(name, resource_hash) #endif + //////////////////////////////////////////////////////////// //~ Dynamic api linkage diff --git a/src/meta/meta.c b/src/meta/meta.c index c29dc51c..0edf71ea 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) //- Generate C file StringList shader_lines = Zi; + StringList shader_thread_dim_type_lines = Zi; { StringList c_store_lines = Zi; StringList c_include_lines = Zi; @@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) { if (arg0_tok->valid) { - String decl_type = ( - kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") : - kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") : - kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") : - Lit("") - ); String shader_name = arg0_tok->s; - Vec3U32 thread_count = Zi; + Vec3U32 thread_dims = Zi; + i32 thread_dims_count = 1; { StringList thread_count_args = Zi; for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx) @@ -739,36 +735,70 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) } String thread_count_str = StringFromList(perm, thread_count_args, Lit(" ")); Vec3 tmp = CR_Vec3FromString(thread_count_str); - thread_count.x = MaxI32(tmp.x, 1); - thread_count.y = MaxI32(tmp.y, 1); - thread_count.z = MaxI32(tmp.z, 1); + thread_dims.x = MaxI32(tmp.x, 1); + thread_dims.y = MaxI32(tmp.y, 1); + thread_dims.z = MaxI32(tmp.z, 1); + // Determine compute shader dimensions by counting comma-separated values in dimensions string + for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx) + { + u8 c = thread_count_str.text[char_idx]; + if (c == ',') + { + thread_dims_count += 1; + } + } + thread_dims_count = ClampI32(thread_dims_count, 1, 3); } + String decl_type = ( + kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") : + kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") : + kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") : + Lit("") + ); u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name))); - String lines = Zi; + // Dims type line if (kind == M_EntryKind_ComputeShader) { - lines = StringF( + String line = StringF( perm, - "%F(%F, 0x%F, %F, %F, %F);", - FmtString(decl_type), + "#define %F__ThreadDimsType %F", FmtString(shader_name), - FmtHex(shader_resource_hash), - FmtUint(thread_count.x), - FmtUint(thread_count.y), - FmtUint(thread_count.z) + FmtString( + thread_dims_count == 1 ? Lit("u32") : + thread_dims_count == 2 ? Lit("Vec2U32") : + Lit("Vec3U32") + ) ); + PushStringToList(perm, &shader_thread_dim_type_lines, line); } - else + // Shader line { - lines = StringF( - perm, - "%F(%F, 0x%F);", - FmtString(decl_type), - FmtString(shader_name), - FmtHex(shader_resource_hash) - ); + String line = Zi; + if (kind == M_EntryKind_ComputeShader) + { + line = StringF( + perm, + "%F(%F, 0x%F, %F, %F, %F);", + FmtString(decl_type), + FmtString(shader_name), + FmtHex(shader_resource_hash), + FmtUint(thread_dims.x), + FmtUint(thread_dims.y), + FmtUint(thread_dims.z) + ); + } + else + { + line = StringF( + perm, + "%F(%F, 0x%F);", + FmtString(decl_type), + FmtString(shader_name), + FmtHex(shader_resource_hash) + ); + } + PushStringToList(perm, &shader_lines, line); } - PushStringToList(perm, &shader_lines, lines); } else { @@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) PushStringToList(perm, &c_out_lines, n->s); } } + // Define shader dimension types + if (shader_thread_dim_type_lines.count > 0) + { + PushStringToList(perm, &c_out_lines, Lit("")); + PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types")); + for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next) + { + PushStringToList(perm, &c_out_lines, n->s); + } + } // Define shaders if (shader_lines.count > 0) { @@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes")); PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path))); } + // Define shader dimension types + if (shader_thread_dim_type_lines.count > 0) + { + PushStringToList(perm, &gpu_out_lines, Lit("")); + PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types")); + for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next) + { + PushStringToList(perm, &gpu_out_lines, n->s); + } + } // Define shaders if (shader_lines.count > 0) { diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 55d37a3a..e9a44035 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS) { u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap; - // InterlockedMin guarantees that the highest emitter index (reflected - // as negative particle kind) will be used to initialize the particle - // this frame, in case multiple emitters target the same particle (e.g. - // more particles pushed this frame than are available in the buffer) + // Using InterlockedMin guarantees that the highest emitter index + // (reflected as negative particle kind) will be used to initialize the + // particle this frame, in case multiple emitters target the same particle + // (e.g. more particles were pushed this frame than are available in the buffer) InterlockedMin(particles[particle_idx].kind, semantic_particle_kind); } } @@ -393,267 +393,267 @@ ComputeShader(V_SimParticlesCS) Texture2D occluders = G_Deref(frame.occluders, Texture2D); u32 particle_idx = SV_DispatchThreadID; - if (particle_idx < V_ParticlesCap) + if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None) { V_Particle particle = particles[particle_idx]; b32 prune = 0; + u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx); + f32 rand_offset = Norm16(seed0 >> 0); + f32 rand_angle = Norm16(seed0 >> 16); + f32 rand_speed = Norm16(seed0 >> 32); + f32 rand_falloff = Norm16(seed0 >> 48); + ////////////////////////////// - //- Initialize particle + //- Init particle - if (particle.kind != V_ParticleKind_None) + if (particle.kind < 0) { - u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx); - f32 rand_offset = Norm16(seed0 >> 0); - f32 rand_angle = Norm16(seed0 >> 16); - f32 rand_speed = Norm16(seed0 >> 32); - f32 rand_falloff = Norm16(seed0 >> 48); + u32 emitter_idx = -particle.kind - 1; + V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer)[emitter_idx]; - ////////////////////////////// - //- Init + f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); + f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); - if (particle.kind < 0) + particle = (V_Particle)0; + particle.kind = emitter.kind; + particle.life = 0; + particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset); + particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed; + } + + ////////////////////////////// + //- Simulate + + if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) + { + V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); + RWTexture2D cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D); + RWTexture2D densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D); + + u32 packed = 0; + packed |= (particle_idx & ((1 >> 24) - 1)) << 0; + packed |= (particle.kind & 0xFF) << 24; + StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits + StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits + + if (particle.life == 0) { - u32 emitter_idx = -particle.kind - 1; - V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer)[emitter_idx]; - - f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); - f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); - - particle = (V_Particle)0; - particle.kind = emitter.kind; - particle.life = 0; - particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset); - particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed; - } - - if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) - { - V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); - RWTexture2D cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D); - RWTexture2D densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D); - - u32 packed = 0; - packed |= (particle_idx & ((1 >> 24) - 1)) << 0; - packed |= (particle.kind & 0xFF) << 24; - StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits - StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits - - if (particle.life == 0) + Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); + if (IsInside(cell_pos, P_WorldCellsDims)) { - Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); - if (IsInside(cell_pos, P_WorldCellsDims)) + u32 occluder = occluders[cell_pos]; + b32 occluder_is_wall = occluder == 0xFFFFFFFF; + if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall)) { - u32 occluder = occluders[cell_pos]; - b32 occluder_is_wall = occluder == 0xFFFFFFFF; - if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall)) - { - particle.origin_occluder = occluders[cell_pos]; - particle.prev_occluder = particle.origin_occluder; - } - } - else - { - prune = 1; + particle.origin_occluder = occluders[cell_pos]; + particle.prev_occluder = particle.origin_occluder; } } - - ////////////////////////////// - //- Move - - b32 collision = 0; - - // TODO: Clip to avoid unnecessary iterations outside of world bounds - if (!prune) + else { - Vec2 p0 = particle.pos; - Vec2 p1 = particle.pos + particle.velocity * frame.dt; - f32 t = 1; + prune = 1; + } + } + + ////////////////////////////// + //- Move + + b32 collision = 0; + + // TODO: Clip to avoid unnecessary iterations outside of world bounds + if (!prune) + { + Vec2 p0 = particle.pos; + Vec2 p1 = particle.pos + particle.velocity * frame.dt; + f32 t = 1; + { + Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1)); + Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1)); + Vec2I32 cell_p0 = floor(occluder_p0); + Vec2I32 cell_p1 = floor(occluder_p1); + + Vec2 delta = occluder_p1 - occluder_p0; + Vec2 inv_delta = 1.0 / delta; + Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0)); + Vec2 t_delta = abs(inv_delta); + Vec2 t_max = cell_p0 - occluder_p0; + t_max.x += dda_step_dir.x > 0; + t_max.y += dda_step_dir.y > 0; + t_max *= inv_delta; + t_max = abs(t_max); + + Vec2 t_hit = 0; + + Vec2I32 cell_pos = cell_p0; + + b32 stepped_x = 0; + b32 stepped_y = 0; + + // TODO: Tune this + u32 max_iterations = 128; + + b32 done = 0; + f32 t_diff = 0; + u32 iteration_idx = 0; + for (; iteration_idx < max_iterations && !done; ++iteration_idx) { - Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1)); - Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1)); - Vec2I32 cell_p0 = floor(occluder_p0); - Vec2I32 cell_p1 = floor(occluder_p1); - - Vec2 delta = occluder_p1 - occluder_p0; - Vec2 inv_delta = 1.0 / delta; - Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0)); - Vec2 t_delta = abs(inv_delta); - Vec2 t_max = cell_p0 - occluder_p0; - t_max.x += dda_step_dir.x > 0; - t_max.y += dda_step_dir.y > 0; - t_max *= inv_delta; - t_max = abs(t_max); - - Vec2 t_hit = 0; - - Vec2I32 cell_pos = cell_p0; - - b32 stepped_x = 0; - b32 stepped_y = 0; - - // TODO: Tune this - u32 max_iterations = 128; - - b32 done = 0; - f32 t_diff = 0; - u32 iteration_idx = 0; - for (; iteration_idx < max_iterations && !done; ++iteration_idx) + if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y) { - if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y) - { - done = 1; - } - else if (t_max.x < t_max.y) - { - cell_pos.x += dda_step_dir.x; - f32 old = t_hit.x; - t_hit.x = t_max.x - t_delta.x; - t_diff = t_hit.x - old; - t_max.x += t_delta.x; - stepped_x = 1; - stepped_y = 0; - } - else - { - cell_pos.y += dda_step_dir.y; - f32 old = t_hit.y; - t_hit.y = t_max.y - t_delta.y; - t_diff = t_hit.y - old; - t_max.y += t_delta.y; - stepped_x = 0; - stepped_y = 1; - } + done = 1; + } + else if (t_max.x < t_max.y) + { + cell_pos.x += dda_step_dir.x; + f32 old = t_hit.x; + t_hit.x = t_max.x - t_delta.x; + t_diff = t_hit.x - old; + t_max.x += t_delta.x; + stepped_x = 1; + stepped_y = 0; + } + else + { + cell_pos.y += dda_step_dir.y; + f32 old = t_hit.y; + t_hit.y = t_max.y - t_delta.y; + t_diff = t_hit.y - old; + t_max.y += t_delta.y; + stepped_x = 0; + stepped_y = 1; + } - Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1)); - Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); - cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); + Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1)); + Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); + cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); - b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); - b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); + b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); + b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); - if (is_in_world) + if (is_in_world) + { + f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt; + particle.stain_accum += stain_delta; + + //- Handle collision { - f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt; - particle.stain_accum += stain_delta; - - //- Handle collision + u32 occluder = occluders[cell_pos]; + b32 occluder_is_wall = occluder == 0xFFFFFFFF; + if (occluder != particle.origin_occluder) { - u32 occluder = occluders[cell_pos]; - b32 occluder_is_wall = occluder == 0xFFFFFFFF; - if (occluder != particle.origin_occluder) + particle.origin_occluder = 0; + } + if ( + occluder != 0 && + !(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) && + occluder != particle.origin_occluder + ) + { + u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); + f32 rand_collision_angle = Norm16(collision_seed >> 0); + f32 rand_collision_velocity = Norm16(collision_seed >> 16); + f32 rand_collision_penetration = Norm16(collision_seed >> 32); + if (rand_collision_penetration >= desc.pen_rate) { - particle.origin_occluder = 0; - } - if ( - occluder != 0 && - !(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) && - occluder != particle.origin_occluder - ) - { - u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); - f32 rand_collision_angle = Norm16(collision_seed >> 0); - f32 rand_collision_velocity = Norm16(collision_seed >> 16); - f32 rand_collision_penetration = Norm16(collision_seed >> 32); - if (rand_collision_penetration >= desc.pen_rate) + collision = 1; + done = 1; { - collision = 1; - done = 1; + if (stepped_x) { - if (stepped_x) + if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) { - if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) - { - particle.velocity.x *= -1; - } - t = saturate(t_hit.x); + particle.velocity.x *= -1; } - else if (stepped_y) + t = saturate(t_hit.x); + } + else if (stepped_y) + { + if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) { - if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) - { - particle.velocity.y *= -1; - } - t = saturate(t_hit.y); + particle.velocity.y *= -1; } - { - f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); - // f32 collision_angle = 0; + t = saturate(t_hit.y); + } + { + f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); + // f32 collision_angle = 0; - // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); - // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); - // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); - f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); - // f32 collision_velocity_falloff = 0; + // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); + // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); + // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); + f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); + // f32 collision_velocity_falloff = 0; - particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); - particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); - } + particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); + particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); } } } - particle.prev_occluder = occluder; - } - - if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold)) - { - prune = 1; - } - - if (prune) - { - done = 1; - if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned)) - { - // particle.stain_accum = max(particle.stain_accum, 1); - particle.stain_accum += 1; - packed |= 1 << 31; - } - } - - if (!collision && particle.origin_occluder != 0xFFFFFFFF) - { - u32 stain_count = floor(particle.stain_accum); - u32 density = 1 + stain_count; - - u32 commit = packed; - if (stain_count > 0) - { - commit |= (1 << 31); - } - - InterlockedMax(cells[cell_pos], commit); - InterlockedAdd(densities[cell_pos], density); - particle.stain_accum -= stain_count; } + particle.prev_occluder = occluder; } - else + + if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold)) { - done = 1; prune = 1; } - particle.cells_count += 1; + if (prune) + { + done = 1; + if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned)) + { + // particle.stain_accum = max(particle.stain_accum, 1); + particle.stain_accum += 1; + packed |= 1 << 31; + } + } + + if (!collision && particle.origin_occluder != 0xFFFFFFFF) + { + u32 stain_count = floor(particle.stain_accum); + u32 density = 1 + stain_count; + + u32 commit = packed; + if (stain_count > 0) + { + commit |= (1 << 31); + } + + InterlockedMax(cells[cell_pos], commit); + InterlockedAdd(densities[cell_pos], density); + particle.stain_accum -= stain_count; + } } + else + { + done = 1; + prune = 1; + } + + particle.cells_count += 1; } - - f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt); - // f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt); - particle.velocity *= 1.0f - falloff; - - particle.pos = p0 + (p1 - p0) * t; } - particle.life += frame.dt; + f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt); + // f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt); + particle.velocity *= 1.0f - falloff; + + particle.pos = p0 + (p1 - p0) * t; } - if (prune) - { - particle.kind = V_ParticleKind_None; - } - - particles[particle_idx] = particle; + particle.life += frame.dt; } + + ////////////////////////////// + //- Commit + + if (prune) + { + particle.kind = V_ParticleKind_None; + } + + particles[particle_idx] = particle; } }