use dimension-specific vector types for compute shader parameters

This commit is contained in:
jacob 2026-03-19 17:01:55 -05:00
parent b63b6197a6
commit cbcec3639f
3 changed files with 310 additions and 258 deletions

View File

@ -744,32 +744,34 @@ Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
#define GroupSize(name) VEC3U32(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z)) #define GroupSize(name) VEC3U32(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))
#if IsGpu #if IsGpu
#define Semantic(name) name : name #define Semantic(name) name : name
#define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID)) #define VertexShader(name, return_type) return_type name(u32 Semantic(SV_InstanceID), u32 Semantic(SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) #define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ComputeShader(name) \ #define ComputeShader(name) \
[numthreads(CAT(name, __GroupSize_X), CAT(name, __GroupSize_Y), CAT(name, __GroupSize_Z))] \ [numthreads(CAT(name,__GroupSize_X), CAT(name,__GroupSize_Y), CAT(name,__GroupSize_Z))] \
void name( \ void name( \
u32 Semantic(SV_GroupIndex), \ u32 Semantic(SV_GroupIndex), \
Vec3U32 Semantic(SV_GroupID), \ CAT(name,__ThreadDimsType) Semantic(SV_GroupID), \
Vec3U32 Semantic(SV_GroupThreadID), \ CAT(name,__ThreadDimsType) Semantic(SV_GroupThreadID), \
Vec3U32 Semantic(SV_DispatchThreadID) \ CAT(name,__ThreadDimsType) Semantic(SV_DispatchThreadID) \
) ) \
/* ----------------------------------------------------------------------------------- */
#endif #endif
#if IsCpu #if IsCpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z } #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z }; static ComputeShaderDesc name = { resource_hash, x, y, z }
#define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclVertexShader(name, resource_hash) static VertexShaderDesc name = { resource_hash, 1, 1, 1 }
#define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 } #define DeclPixelShader(name, resource_hash) static PixelShaderDesc name = { resource_hash, 1, 1, 1 }
#elif IsGpu #elif IsGpu
#define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name, __GroupSize_X) = x, CAT(name, __GroupSize_Y) = y, CAT(name, __GroupSize_Z) = z }; #define DeclComputeShader(name, resource_hash, x, y, z) enum { CAT(name,__GroupSize_X) = x, CAT(name,__GroupSize_Y) = y, CAT(name,__GroupSize_Z) = z };
#define DeclVertexShader(name, resource_hash) #define DeclVertexShader(name, resource_hash)
#define DeclPixelShader(name, resource_hash) #define DeclPixelShader(name, resource_hash)
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Dynamic api linkage //~ Dynamic api linkage

View File

@ -669,6 +669,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
//- Generate C file //- Generate C file
StringList shader_lines = Zi; StringList shader_lines = Zi;
StringList shader_thread_dim_type_lines = Zi;
{ {
StringList c_store_lines = Zi; StringList c_store_lines = Zi;
StringList c_include_lines = Zi; StringList c_include_lines = Zi;
@ -715,14 +716,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
{ {
if (arg0_tok->valid) if (arg0_tok->valid)
{ {
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
String shader_name = arg0_tok->s; String shader_name = arg0_tok->s;
Vec3U32 thread_count = Zi; Vec3U32 thread_dims = Zi;
i32 thread_dims_count = 1;
{ {
StringList thread_count_args = Zi; StringList thread_count_args = Zi;
for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx) for (i32 arg_idx = 1; arg_idx < countof(entry->arg_tokens); ++arg_idx)
@ -739,36 +735,70 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
} }
String thread_count_str = StringFromList(perm, thread_count_args, Lit(" ")); String thread_count_str = StringFromList(perm, thread_count_args, Lit(" "));
Vec3 tmp = CR_Vec3FromString(thread_count_str); Vec3 tmp = CR_Vec3FromString(thread_count_str);
thread_count.x = MaxI32(tmp.x, 1); thread_dims.x = MaxI32(tmp.x, 1);
thread_count.y = MaxI32(tmp.y, 1); thread_dims.y = MaxI32(tmp.y, 1);
thread_count.z = MaxI32(tmp.z, 1); thread_dims.z = MaxI32(tmp.z, 1);
// Determine compute shader dimensions by counting comma-separated values in dimensions string
for (u64 char_idx = 0; char_idx < thread_count_str.len; ++char_idx)
{
u8 c = thread_count_str.text[char_idx];
if (c == ',')
{
thread_dims_count += 1;
}
}
thread_dims_count = ClampI32(thread_dims_count, 1, 3);
} }
String decl_type = (
kind == M_EntryKind_VertexShader ? Lit("DeclVertexShader") :
kind == M_EntryKind_PixelShader ? Lit("DeclPixelShader") :
kind == M_EntryKind_ComputeShader ? Lit("DeclComputeShader") :
Lit("")
);
u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name))); u64 shader_resource_hash = HashStringEx(shader_store_hash, StringF(perm, "%F.dxil", FmtString(shader_name)));
String lines = Zi; // Dims type line
if (kind == M_EntryKind_ComputeShader) if (kind == M_EntryKind_ComputeShader)
{ {
lines = StringF( String line = StringF(
perm, perm,
"%F(%F, 0x%F, %F, %F, %F);", "#define %F__ThreadDimsType %F",
FmtString(decl_type),
FmtString(shader_name), FmtString(shader_name),
FmtHex(shader_resource_hash), FmtString(
FmtUint(thread_count.x), thread_dims_count == 1 ? Lit("u32") :
FmtUint(thread_count.y), thread_dims_count == 2 ? Lit("Vec2U32") :
FmtUint(thread_count.z) Lit("Vec3U32")
)
); );
PushStringToList(perm, &shader_thread_dim_type_lines, line);
} }
else // Shader line
{ {
lines = StringF( String line = Zi;
perm, if (kind == M_EntryKind_ComputeShader)
"%F(%F, 0x%F);", {
FmtString(decl_type), line = StringF(
FmtString(shader_name), perm,
FmtHex(shader_resource_hash) "%F(%F, 0x%F, %F, %F, %F);",
); FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash),
FmtUint(thread_dims.x),
FmtUint(thread_dims.y),
FmtUint(thread_dims.z)
);
}
else
{
line = StringF(
perm,
"%F(%F, 0x%F);",
FmtString(decl_type),
FmtString(shader_name),
FmtHex(shader_resource_hash)
);
}
PushStringToList(perm, &shader_lines, line);
} }
PushStringToList(perm, &shader_lines, lines);
} }
else else
{ {
@ -836,6 +866,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &c_out_lines, n->s); PushStringToList(perm, &c_out_lines, n->s);
} }
} }
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &c_out_lines, Lit(""));
PushStringToList(perm, &c_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &c_out_lines, n->s);
}
}
// Define shaders // Define shaders
if (shader_lines.count > 0) if (shader_lines.count > 0)
{ {
@ -975,6 +1015,16 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes")); PushStringToList(perm, &gpu_out_lines, Lit("//- Base layer includes"));
PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path))); PushStringToList(perm, &gpu_out_lines, StringF(perm, "#include \"%F\"", FmtString(base_inc_path)));
} }
// Define shader dimension types
if (shader_thread_dim_type_lines.count > 0)
{
PushStringToList(perm, &gpu_out_lines, Lit(""));
PushStringToList(perm, &gpu_out_lines, Lit("//- Shader thread dimension types"));
for (StringListNode *n = shader_thread_dim_type_lines.first; n; n = n->next)
{
PushStringToList(perm, &gpu_out_lines, n->s);
}
}
// Define shaders // Define shaders
if (shader_lines.count > 0) if (shader_lines.count > 0)
{ {

View File

@ -373,10 +373,10 @@ ComputeShader(V_EmitParticlesCS)
{ {
u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap; u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
// InterlockedMin guarantees that the highest emitter index (reflected // Using InterlockedMin guarantees that the highest emitter index
// as negative particle kind) will be used to initialize the particle // (reflected as negative particle kind) will be used to initialize the
// this frame, in case multiple emitters target the same particle (e.g. // particle this frame, in case multiple emitters target the same particle
// more particles pushed this frame than are available in the buffer) // (e.g. more particles were pushed this frame than are available in the buffer)
InterlockedMin(particles[particle_idx].kind, semantic_particle_kind); InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
} }
} }
@ -393,267 +393,267 @@ ComputeShader(V_SimParticlesCS)
Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>); Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
u32 particle_idx = SV_DispatchThreadID; u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap) if (particle_idx < V_ParticlesCap && particles[particle_idx].kind != V_ParticleKind_None)
{ {
V_Particle particle = particles[particle_idx]; V_Particle particle = particles[particle_idx];
b32 prune = 0; b32 prune = 0;
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16);
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
////////////////////////////// //////////////////////////////
//- Initialize particle //- Init particle
if (particle.kind != V_ParticleKind_None) if (particle.kind < 0)
{ {
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx); u32 emitter_idx = -particle.kind - 1;
f32 rand_offset = Norm16(seed0 >> 0); V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx];
f32 rand_angle = Norm16(seed0 >> 16);
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
////////////////////////////// f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
//- Init f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
if (particle.kind < 0) particle = (V_Particle)0;
particle.kind = emitter.kind;
particle.life = 0;
particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset);
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
}
//////////////////////////////
//- Simulate
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits
if (particle.life == 0)
{ {
u32 emitter_idx = -particle.kind - 1; Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx]; if (IsInside(cell_pos, P_WorldCellsDims))
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
particle = (V_Particle)0;
particle.kind = emitter.kind;
particle.life = 0;
particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset);
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
}
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits
if (particle.life == 0)
{ {
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); u32 occluder = occluders[cell_pos];
if (IsInside(cell_pos, P_WorldCellsDims)) b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall))
{ {
u32 occluder = occluders[cell_pos]; particle.origin_occluder = occluders[cell_pos];
b32 occluder_is_wall = occluder == 0xFFFFFFFF; particle.prev_occluder = particle.origin_occluder;
if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall))
{
particle.origin_occluder = occluders[cell_pos];
particle.prev_occluder = particle.origin_occluder;
}
}
else
{
prune = 1;
} }
} }
else
//////////////////////////////
//- Move
b32 collision = 0;
// TODO: Clip to avoid unnecessary iterations outside of world bounds
if (!prune)
{ {
Vec2 p0 = particle.pos; prune = 1;
Vec2 p1 = particle.pos + particle.velocity * frame.dt; }
f32 t = 1; }
//////////////////////////////
//- Move
b32 collision = 0;
// TODO: Clip to avoid unnecessary iterations outside of world bounds
if (!prune)
{
Vec2 p0 = particle.pos;
Vec2 p1 = particle.pos + particle.velocity * frame.dt;
f32 t = 1;
{
Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1));
Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1));
Vec2I32 cell_p0 = floor(occluder_p0);
Vec2I32 cell_p1 = floor(occluder_p1);
Vec2 delta = occluder_p1 - occluder_p0;
Vec2 inv_delta = 1.0 / delta;
Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0));
Vec2 t_delta = abs(inv_delta);
Vec2 t_max = cell_p0 - occluder_p0;
t_max.x += dda_step_dir.x > 0;
t_max.y += dda_step_dir.y > 0;
t_max *= inv_delta;
t_max = abs(t_max);
Vec2 t_hit = 0;
Vec2I32 cell_pos = cell_p0;
b32 stepped_x = 0;
b32 stepped_y = 0;
// TODO: Tune this
u32 max_iterations = 128;
b32 done = 0;
f32 t_diff = 0;
u32 iteration_idx = 0;
for (; iteration_idx < max_iterations && !done; ++iteration_idx)
{ {
Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1)); if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y)
Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1));
Vec2I32 cell_p0 = floor(occluder_p0);
Vec2I32 cell_p1 = floor(occluder_p1);
Vec2 delta = occluder_p1 - occluder_p0;
Vec2 inv_delta = 1.0 / delta;
Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0));
Vec2 t_delta = abs(inv_delta);
Vec2 t_max = cell_p0 - occluder_p0;
t_max.x += dda_step_dir.x > 0;
t_max.y += dda_step_dir.y > 0;
t_max *= inv_delta;
t_max = abs(t_max);
Vec2 t_hit = 0;
Vec2I32 cell_pos = cell_p0;
b32 stepped_x = 0;
b32 stepped_y = 0;
// TODO: Tune this
u32 max_iterations = 128;
b32 done = 0;
f32 t_diff = 0;
u32 iteration_idx = 0;
for (; iteration_idx < max_iterations && !done; ++iteration_idx)
{ {
if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y) done = 1;
{ }
done = 1; else if (t_max.x < t_max.y)
} {
else if (t_max.x < t_max.y) cell_pos.x += dda_step_dir.x;
{ f32 old = t_hit.x;
cell_pos.x += dda_step_dir.x; t_hit.x = t_max.x - t_delta.x;
f32 old = t_hit.x; t_diff = t_hit.x - old;
t_hit.x = t_max.x - t_delta.x; t_max.x += t_delta.x;
t_diff = t_hit.x - old; stepped_x = 1;
t_max.x += t_delta.x; stepped_y = 0;
stepped_x = 1; }
stepped_y = 0; else
} {
else cell_pos.y += dda_step_dir.y;
{ f32 old = t_hit.y;
cell_pos.y += dda_step_dir.y; t_hit.y = t_max.y - t_delta.y;
f32 old = t_hit.y; t_diff = t_hit.y - old;
t_hit.y = t_max.y - t_delta.y; t_max.y += t_delta.y;
t_diff = t_hit.y - old; stepped_x = 0;
t_max.y += t_delta.y; stepped_y = 1;
stepped_x = 0; }
stepped_y = 1;
}
Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1)); Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1));
Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1));
cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1);
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims);
if (is_in_world) if (is_in_world)
{
f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt;
particle.stain_accum += stain_delta;
//- Handle collision
{ {
f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt; u32 occluder = occluders[cell_pos];
particle.stain_accum += stain_delta; b32 occluder_is_wall = occluder == 0xFFFFFFFF;
if (occluder != particle.origin_occluder)
//- Handle collision
{ {
u32 occluder = occluders[cell_pos]; particle.origin_occluder = 0;
b32 occluder_is_wall = occluder == 0xFFFFFFFF; }
if (occluder != particle.origin_occluder) if (
occluder != 0 &&
!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) &&
occluder != particle.origin_occluder
)
{
u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 rand_collision_penetration = Norm16(collision_seed >> 32);
if (rand_collision_penetration >= desc.pen_rate)
{ {
particle.origin_occluder = 0; collision = 1;
} done = 1;
if (
occluder != 0 &&
!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) &&
occluder != particle.origin_occluder
)
{
u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 rand_collision_penetration = Norm16(collision_seed >> 32);
if (rand_collision_penetration >= desc.pen_rate)
{ {
collision = 1; if (stepped_x)
done = 1;
{ {
if (stepped_x) if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{ {
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) particle.velocity.x *= -1;
{
particle.velocity.x *= -1;
}
t = saturate(t_hit.x);
} }
else if (stepped_y) t = saturate(t_hit.x);
}
else if (stepped_y)
{
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
{ {
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) particle.velocity.y *= -1;
{
particle.velocity.y *= -1;
}
t = saturate(t_hit.y);
} }
{ t = saturate(t_hit.y);
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); }
// f32 collision_angle = 0; {
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
// f32 collision_angle = 0;
// f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity);
// f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity);
f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
// f32 collision_velocity_falloff = 0; // f32 collision_velocity_falloff = 0;
particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); particle.velocity = RotateVec2Angle(particle.velocity, collision_angle);
particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt);
}
} }
} }
} }
particle.prev_occluder = occluder;
}
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{
prune = 1;
}
if (prune)
{
done = 1;
if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned))
{
// particle.stain_accum = max(particle.stain_accum, 1);
particle.stain_accum += 1;
packed |= 1 << 31;
}
}
if (!collision && particle.origin_occluder != 0xFFFFFFFF)
{
u32 stain_count = floor(particle.stain_accum);
u32 density = 1 + stain_count;
u32 commit = packed;
if (stain_count > 0)
{
commit |= (1 << 31);
}
InterlockedMax(cells[cell_pos], commit);
InterlockedAdd(densities[cell_pos], density);
particle.stain_accum -= stain_count;
} }
particle.prev_occluder = occluder;
} }
else
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{ {
done = 1;
prune = 1; prune = 1;
} }
particle.cells_count += 1; if (prune)
{
done = 1;
if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned))
{
// particle.stain_accum = max(particle.stain_accum, 1);
particle.stain_accum += 1;
packed |= 1 << 31;
}
}
if (!collision && particle.origin_occluder != 0xFFFFFFFF)
{
u32 stain_count = floor(particle.stain_accum);
u32 density = 1 + stain_count;
u32 commit = packed;
if (stain_count > 0)
{
commit |= (1 << 31);
}
InterlockedMax(cells[cell_pos], commit);
InterlockedAdd(densities[cell_pos], density);
particle.stain_accum -= stain_count;
}
} }
else
{
done = 1;
prune = 1;
}
particle.cells_count += 1;
} }
f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt);
// f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt);
particle.velocity *= 1.0f - falloff;
particle.pos = p0 + (p1 - p0) * t;
} }
particle.life += frame.dt; f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt);
// f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt);
particle.velocity *= 1.0f - falloff;
particle.pos = p0 + (p1 - p0) * t;
} }
if (prune) particle.life += frame.dt;
{
particle.kind = V_ParticleKind_None;
}
particles[particle_idx] = particle;
} }
//////////////////////////////
//- Commit
if (prune)
{
particle.kind = V_ParticleKind_None;
}
particles[particle_idx] = particle;
} }
} }