use dimension-specific vector types for compute shader parameters

This commit is contained in:
jacob 2026-03-19 17:01:55 -05:00
parent b63b6197a6
commit cbcec3639f
3 changed files with 310 additions and 258 deletions

View File

@ -744,32 +744,34 @@ Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))
#define GroupSize(name) VEC3U32(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))
#if IsGpu
#define Semantic(name) name : name
#define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ComputeShader(name) \
[numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))] \
void name( \
u32 Semantic(SV_GroupIndex), \
Vec3U32 Semantic(SV_GroupID), \
Vec3U32 Semantic(SV_GroupThreadID), \
Vec3U32 Semantic(SV_DispatchThreadID) \
)
#define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ComputeShader(name) \
[numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \
void name( \
u32 Semantic(SV_GroupIndex), \
CAT(name,__ThreadDimsType) Semantic(SV_GroupID), \
CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID), \
CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID) \
) \
/* ----------------------------------------------------------------------------------- */
#endif
#if IsCpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
#define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 }
#define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 }
#elif IsGpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z };
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z };
#define DeclVertexShader(name, resource_hash)
#define DeclPixelShader(name, resource_hash)
#endif
////////////////////////////////////////////////////////////
//~ Dynamic api linkage

View File

@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
//- Generate C file
StringList shader_lines = Zi;
StringList shader_thread_dim_type_lines = Zi;
{
StringList c_store_lines = Zi;
StringList c_include_lines = Zi;
@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
{
if (arg0_tok->valid)
{
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
String shader_name = arg0_tok->s;
Vec3U32 thread_count = Zi;
Vec3U32 thread_dims = Zi;
i32 thread_dims_count = 1;
{
StringList thread_count_args = Zi;
for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx)
@ -739,36 +735,70 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
}
String thread_count_str = StringFromList(perm, thread_count_args, Lit(" "));
Vec3 tmp = CR_Vec3FromString(thread_count_str);
thread_count.x = MaxI32(tmp.x, 1);
thread_count.y = MaxI32(tmp.y, 1);
thread_count.z = MaxI32(tmp.z, 1);
thread_dims.x = MaxI32(tmp.x, 1);
thread_dims.y = MaxI32(tmp.y, 1);
thread_dims.z = MaxI32(tmp.z, 1);
// Determine compute shader dimensions by counting comma-separated values in dimensions string
for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx)
{
u8 c = thread_count_str.text[char_idx];
if (c == ',')
{
thread_dims_count += 1;
}
}
thread_dims_count = ClampI32(thread_dims_count, 1, 3);
}
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name)));
String lines = Zi;
// Dims type line
if (kind == M_EntryKind_ComputeShader)
{
lines = StringF(
String line = StringF(
perm,
"%F(%F, 0x%F, %F, %F, %F);",
FmtString(decl_type),
"#define %F__ThreadDimsType %F",
FmtString(shader_name),
FmtHex(shader_resource_hash),
FmtUint(thread_count.x),
FmtUint(thread_count.y),
FmtUint(thread_count.z)
FmtString(
thread_dims_count == 1 ? Lit("u32") :
thread_dims_count == 2 ? Lit("Vec2U32") :
Lit("Vec3U32")
)
);
PushStringToList(perm, &shader_thread_dim_type_lines, line);
}
else
// Shader line
{
lines = StringF(
perm,
"%F(%F, 0x%F);",
FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash)
);
String line = Zi;
if (kind == M_EntryKind_ComputeShader)
{
line = StringF(
perm,
"%F(%F, 0x%F, %F, %F, %F);",
FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash),
FmtUint(thread_dims.x),
FmtUint(thread_dims.y),
FmtUint(thread_dims.z)
);
}
else
{
line = StringF(
perm,
"%F(%F, 0x%F);",
FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash)
);
}
PushStringToList(perm, &shader_lines, line);
}
PushStringToList(perm, &shader_lines, lines);
}
else
{
@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &c_out_lines, Lit(""));
PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shaders
if (shader_lines.count > 0)
{
@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes"));
PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path)));
}
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &gpu_out_lines, Lit(""));
PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &gpu_out_lines, n->s);
}
}
// Define shaders
if (shader_lines.count > 0)
{

View File

@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS)
{
u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
// InterlockedMin guarantees that the highest emitter index (reflected
// as negative particle kind) will be used to initialize the particle
// this frame, in case multiple emitters target the same particle (e.g.
// more particles pushed this frame than are available in the buffer)
// Using InterlockedMin guarantees that the highest emitter index
// (reflected as negative particle kind) will be used to initialize the
// particle this frame, in case multiple emitters target the same particle
// (e.g. more particles were pushed this frame than are available in the buffer)
InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
}
}
@ -393,267 +393,267 @@ ComputeShader(V_SimParticlesCS)
Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap)
if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None)
{
V_Particle particle = particles[particle_idx];
b32 prune = 0;
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16);
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
//////////////////////////////
//- Initialize particle
//- Init particle
if (particle.kind != V_ParticleKind_None)
if (particle.kind < 0)
{
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16);
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
u32 emitter_idx = -particle.kind - 1;
V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx];
//////////////////////////////
//- Init
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
if (particle.kind < 0)
particle = (V_Particle)0;
particle.kind = emitter.kind;
particle.life = 0;
particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset);
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
}
//////////////////////////////
//- Simulate
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits
if (particle.life == 0)
{
u32 emitter_idx = -particle.kind - 1;
V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx];
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
particle = (V_Particle)0;
particle.kind = emitter.kind;
particle.life = 0;
particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset);
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
}
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits
if (particle.life == 0)
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
if (IsInside(cell_pos, P_WorldCellsDims))
{
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
if (IsInside(cell_pos, P_WorldCellsDims))
u32 occluder = occluders[cell_pos];
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall))
{
u32 occluder = occluders[cell_pos];
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall))
{
particle.origin_occluder = occluders[cell_pos];
particle.prev_occluder = particle.origin_occluder;
}
}
else
{
prune = 1;
particle.origin_occluder = occluders[cell_pos];
particle.prev_occluder = particle.origin_occluder;
}
}
//////////////////////////////
//- Move
b32 collision = 0;
// TODO: Clip to avoid unnecessary iterations outside of world bounds
if (!prune)
else
{
Vec2 p0 = particle.pos;
Vec2 p1 = particle.pos + particle.velocity * frame.dt;
f32 t = 1;
prune = 1;
}
}
//////////////////////////////
//- Move
b32 collision = 0;
// TODO: Clip to avoid unnecessary iterations outside of world bounds
if (!prune)
{
Vec2 p0 = particle.pos;
Vec2 p1 = particle.pos + particle.velocity * frame.dt;
f32 t = 1;
{
Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1));
Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1));
Vec2I32 cell_p0 = floor(occluder_p0);
Vec2I32 cell_p1 = floor(occluder_p1);
Vec2 delta = occluder_p1 - occluder_p0;
Vec2 inv_delta = 1.0 / delta;
Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0));
Vec2 t_delta = abs(inv_delta);
Vec2 t_max = cell_p0 - occluder_p0;
t_max.x += dda_step_dir.x > 0;
t_max.y += dda_step_dir.y > 0;
t_max *= inv_delta;
t_max = abs(t_max);
Vec2 t_hit = 0;
Vec2I32 cell_pos = cell_p0;
b32 stepped_x = 0;
b32 stepped_y = 0;
// TODO: Tune this
u32 max_iterations = 128;
b32 done = 0;
f32 t_diff = 0;
u32 iteration_idx = 0;
for (; iteration_idx < max_iterations && !done; ++iteration_idx)
{
Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1));
Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1));
Vec2I32 cell_p0 = floor(occluder_p0);
Vec2I32 cell_p1 = floor(occluder_p1);
Vec2 delta = occluder_p1 - occluder_p0;
Vec2 inv_delta = 1.0 / delta;
Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0));
Vec2 t_delta = abs(inv_delta);
Vec2 t_max = cell_p0 - occluder_p0;
t_max.x += dda_step_dir.x > 0;
t_max.y += dda_step_dir.y > 0;
t_max *= inv_delta;
t_max = abs(t_max);
Vec2 t_hit = 0;
Vec2I32 cell_pos = cell_p0;
b32 stepped_x = 0;
b32 stepped_y = 0;
// TODO: Tune this
u32 max_iterations = 128;
b32 done = 0;
f32 t_diff = 0;
u32 iteration_idx = 0;
for (; iteration_idx < max_iterations && !done; ++iteration_idx)
if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y)
{
if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y)
{
done = 1;
}
else if (t_max.x < t_max.y)
{
cell_pos.x += dda_step_dir.x;
f32 old = t_hit.x;
t_hit.x = t_max.x - t_delta.x;
t_diff = t_hit.x - old;
t_max.x += t_delta.x;
stepped_x = 1;
stepped_y = 0;
}
else
{
cell_pos.y += dda_step_dir.y;
f32 old = t_hit.y;
t_hit.y = t_max.y - t_delta.y;
t_diff = t_hit.y - old;
t_max.y += t_delta.y;
stepped_x = 0;
stepped_y = 1;
}
done = 1;
}
else if (t_max.x < t_max.y)
{
cell_pos.x += dda_step_dir.x;
f32 old = t_hit.x;
t_hit.x = t_max.x - t_delta.x;
t_diff = t_hit.x - old;
t_max.x += t_delta.x;
stepped_x = 1;
stepped_y = 0;
}
else
{
cell_pos.y += dda_step_dir.y;
f32 old = t_hit.y;
t_hit.y = t_max.y - t_delta.y;
t_diff = t_hit.y - old;
t_max.y += t_delta.y;
stepped_x = 0;
stepped_y = 1;
}
Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1));
Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1));
cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1);
Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1));
Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1));
cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1);
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims);
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims);
if (is_in_world)
if (is_in_world)
{
f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt;
particle.stain_accum += stain_delta;
//- Handle collision
{
f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt;
particle.stain_accum += stain_delta;
//- Handle collision
u32 occluder = occluders[cell_pos];
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (occluder != particle.origin_occluder)
{
u32 occluder = occluders[cell_pos];
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (occluder != particle.origin_occluder)
particle.origin_occluder = 0;
}
if (
occluder != 0 &&
!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) &&
occluder != particle.origin_occluder
)
{
u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 rand_collision_penetration = Norm16(collision_seed >> 32);
if (rand_collision_penetration >= desc.pen_rate)
{
particle.origin_occluder = 0;
}
if (
occluder != 0 &&
!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) &&
occluder != particle.origin_occluder
)
{
u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 rand_collision_penetration = Norm16(collision_seed >> 32);
if (rand_collision_penetration >= desc.pen_rate)
collision = 1;
done = 1;
{
collision = 1;
done = 1;
if (stepped_x)
{
if (stepped_x)
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{
particle.velocity.x *= -1;
}
t = saturate(t_hit.x);
particle.velocity.x *= -1;
}
else if (stepped_y)
t = saturate(t_hit.x);
}
else if (stepped_y)
{
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{
particle.velocity.y *= -1;
}
t = saturate(t_hit.y);
particle.velocity.y *= -1;
}
{
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
// f32 collision_angle = 0;
t = saturate(t_hit.y);
}
{
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
// f32 collision_angle = 0;
// f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity);
f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = 0;
// f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity);
f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = 0;
particle.velocity = RotateVec2Angle(particle.velocity, collision_angle);
particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt);
}
particle.velocity = RotateVec2Angle(particle.velocity, collision_angle);
particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt);
}
}
}
particle.prev_occluder = occluder;
}
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{
prune = 1;
}
if (prune)
{
done = 1;
if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned))
{
// particle.stain_accum = max(particle.stain_accum, 1);
particle.stain_accum += 1;
packed |= 1 << 31;
}
}
if (!collision && particle.origin_occluder != 0xFFFFFFFF)
{
u32 stain_count = floor(particle.stain_accum);
u32 density = 1 + stain_count;
u32 commit = packed;
if (stain_count > 0)
{
commit |= (1 << 31);
}
InterlockedMax(cells[cell_pos], commit);
InterlockedAdd(densities[cell_pos], density);
particle.stain_accum -= stain_count;
}
particle.prev_occluder = occluder;
}
else
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{
done = 1;
prune = 1;
}
particle.cells_count += 1;
if (prune)
{
done = 1;
if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned))
{
// particle.stain_accum = max(particle.stain_accum, 1);
particle.stain_accum += 1;
packed |= 1 << 31;
}
}
if (!collision && particle.origin_occluder != 0xFFFFFFFF)
{
u32 stain_count = floor(particle.stain_accum);
u32 density = 1 + stain_count;
u32 commit = packed;
if (stain_count > 0)
{
commit |= (1 << 31);
}
InterlockedMax(cells[cell_pos], commit);
InterlockedAdd(densities[cell_pos], density);
particle.stain_accum -= stain_count;
}
}
else
{
done = 1;
prune = 1;
}
particle.cells_count += 1;
}
f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt);
// f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt);
particle.velocity *= 1.0f - falloff;
particle.pos = p0 + (p1 - p0) * t;
}
particle.life += frame.dt;
f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt);
// f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt);
particle.velocity *= 1.0f - falloff;
particle.pos = p0 + (p1 - p0) * t;
}
if (prune)
{
particle.kind = V_ParticleKind_None;
}
particles[particle_idx] = particle;
particle.life += frame.dt;
}
//////////////////////////////
//- Commit
if (prune)
{
particle.kind = V_ParticleKind_None;
}
particles[particle_idx] = particle;
}
}