particle cell densities w/ atomic writes

This commit is contained in:
jacob 2026-02-13 04:25:39 -06:00
parent 5332e1bdd9
commit d9228b78a3
4 changed files with 115 additions and 54 deletions

View File

@ -99,16 +99,16 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10
//~ Resource countof //~ Resource countof
#if IsGpu #if IsGpu
template<typename T> u32 countof(StructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; } template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; } template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
u32 countof(ByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
u32 countof(RWByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(Texture1D<T> tex) { u32 result; tex.GetDimensions(result); return result; } template<typename T> u32 countof(Texture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWTexture1D<T> tex) { u32 result; tex.GetDimensions(result); return result; } template<typename T> u32 countof(RWTexture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> Vec2U32 countof(Texture2D<T> tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } template<typename T> Vec2U32 countof(Texture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec2U32 countof(RWTexture2D<T> tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } template<typename T> Vec2U32 countof(RWTexture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec3U32 countof(Texture3D<T> tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } template<typename T> Vec3U32 countof(Texture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
template<typename T> Vec3U32 countof(RWTexture3D<T> tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } template<typename T> Vec3U32 countof(RWTexture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -395,12 +395,14 @@ void V_TickForever(WaveLaneCtx *lane)
G_ResourceHandle gpu_cells_res = Zi; G_ResourceHandle gpu_cells_res = Zi;
G_ResourceHandle gpu_stains_res = Zi; G_ResourceHandle gpu_stains_res = Zi;
G_ResourceHandle gpu_drynesses_res = Zi; G_ResourceHandle gpu_drynesses_res = Zi;
G_ResourceHandle gpu_densities_res = Zi;
G_Texture2DRef gpu_tiles = Zi; G_Texture2DRef gpu_tiles = Zi;
G_RWStructuredBufferRef gpu_particles = Zi; G_RWStructuredBufferRef gpu_particles = Zi;
G_RWTexture2DRef gpu_cells = Zi; G_RWTexture2DRef gpu_cells = Zi;
G_RWTexture2DRef gpu_stains = Zi; G_RWTexture2DRef gpu_stains = Zi;
G_RWTexture2DRef gpu_drynesses = Zi; G_RWTexture2DRef gpu_drynesses = Zi;
G_RWTexture2DRef gpu_densities = Zi;
{ {
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{ {
@ -432,9 +434,10 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_cells_res = G_PushTexture2D( gpu_cells_res = G_PushTexture2D(
gpu_perm, cl, gpu_perm, cl,
// G_Format_R8_Uint, // G_Format_R8_Uint,
G_Format_R32_Uint,
// G_Format_R11G11B10_Float, // G_Format_R11G11B10_Float,
// G_Format_R10G10B10A2_Unorm, // G_Format_R10G10B10A2_Unorm,
G_Format_R16G16B16A16_Float, // G_Format_R16G16B16A16_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
@ -473,6 +476,22 @@ void V_TickForever(WaveLaneCtx *lane)
); );
gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res); gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res);
} }
// Init densities texture
{
gpu_densities_res = G_PushTexture2D(
gpu_perm, cl,
// G_Format_R8_Uint,
// G_Format_R11G11B10_Float,
// G_Format_R10G10B10A2_Unorm,
// G_Format_R16_Float,
G_Format_R32_Uint,
cells_dims,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Densities")
);
gpu_densities = G_PushRWTexture2DRef(gpu_perm, gpu_densities_res);
}
} }
G_CommitCommandList(cl); G_CommitCommandList(cl);
} }
@ -594,6 +613,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->cells = gpu_cells; frame->cells = gpu_cells;
frame->stains = gpu_stains; frame->stains = gpu_stains;
frame->drynesses = gpu_drynesses; frame->drynesses = gpu_drynesses;
frame->densities = gpu_densities;
} }
////////////////////////////// //////////////////////////////
@ -2503,7 +2523,8 @@ void V_TickForever(WaveLaneCtx *lane)
////////////////////////////// //////////////////////////////
//- Push test emitter //- Push test emitter
if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G]) if (frame->held_buttons[Button_G])
// if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G])
{ {
V_Emitter emitter = Zi; V_Emitter emitter = Zi;
@ -2532,8 +2553,8 @@ void V_TickForever(WaveLaneCtx *lane)
// emitter.count = Mebi(16); // emitter.count = Mebi(16);
// emitter.count = Mebi(2); // emitter.count = Mebi(2);
// emitter.count = Kibi(32); // emitter.count = Kibi(32);
emitter.count = Kibi(8); // emitter.count = Kibi(8);
// emitter.count = Kibi(1); emitter.count = Kibi(1);
// emitter.count = 128; // emitter.count = 128;
// emitter.count = 32; // emitter.count = 32;
// emitter.count = 1; // emitter.count = 1;

View File

@ -36,8 +36,9 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8)
ComputeShader2D(V_PrepareCellsCS, 8, 8) ComputeShader2D(V_PrepareCellsCS, 8, 8)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells); RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses); RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
Vec2 cells_pos = SV_DispatchThreadID + 0.5; Vec2 cells_pos = SV_DispatchThreadID + 0.5;
if (all(cells_pos < countof(cells))) if (all(cells_pos < countof(cells)))
@ -45,6 +46,9 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
// Clear cell // Clear cell
cells[cells_pos] = 0; cells[cells_pos] = 0;
// Clear density
densities[cells_pos] = 0;
// Increase dryness // Increase dryness
// TODO: Use simulation dt // TODO: Use simulation dt
f32 dry_rate = frame.dt * 0.1; f32 dry_rate = frame.dt * 0.1;
@ -156,9 +160,10 @@ ComputeShader(V_SimParticlesCS, 64)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles); RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells); RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains); RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses); RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
u32 particle_idx = SV_DispatchThreadID; u32 particle_idx = SV_DispatchThreadID;
@ -171,11 +176,11 @@ ComputeShader(V_SimParticlesCS, 64)
if (particle.kind != 0) if (particle.kind != 0)
{ {
u64 seed0 = MixU64(particle_idx); u64 seed = MixU64(P_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed0 >> 0); f32 rand_offset = Norm16(seed >> 0);
f32 rand_angle = Norm16(seed0 >> 16); f32 rand_angle = Norm16(seed >> 16);
f32 rand_speed = Norm16(seed0 >> 32); f32 rand_speed = Norm16(seed >> 32);
f32 rand_falloff = Norm16(seed0 >> 48); f32 rand_falloff = Norm16(seed >> 48);
////////////////////////////// //////////////////////////////
//- Init //- Init
@ -283,7 +288,7 @@ ComputeShader(V_SimParticlesCS, 64)
particle.velocity.y *= -1; particle.velocity.y *= -1;
} }
{ {
u64 collision_seed = MixU64s(particle_idx, particle.collisions_count); u64 collision_seed = MixU64s(seed, particle.collisions_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0); f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16); f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
@ -311,8 +316,6 @@ ComputeShader(V_SimParticlesCS, 64)
////////////////////////////// //////////////////////////////
//- Commit //- Commit
// FIXME: Atomic writes
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(particle.pos, 1)); Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(particle.pos, 1));
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells)); b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells));
@ -321,17 +324,6 @@ ComputeShader(V_SimParticlesCS, 64)
b32 should_draw = is_in_world && is_in_screen; b32 should_draw = is_in_world && is_in_screen;
b32 should_stain = 0; b32 should_stain = 0;
// TODO: Remove this
Vec4 color = Color_Purple;
switch (particle.kind)
{
case V_ParticleKind_Test:
{
color = Color_Yellow;
} break;
}
// // Stain // // Stain
// if (is_in_world) // if (is_in_world)
// { // {
@ -360,7 +352,14 @@ ComputeShader(V_SimParticlesCS, 64)
// Draw // Draw
if (should_draw) if (should_draw)
{ {
cells[cell_pos] = color; u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0xFF); // particle kind must fit in 8 bits
InterlockedMax(cells[cell_pos], packed);
InterlockedAdd(densities[cell_pos], 1);
} }
// { // {
@ -444,7 +443,7 @@ ComputeShader2D(V_ShadeCS, 8, 8)
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
P_TileKind tile = tiles.Load(Vec3(tile_pos, 0)); P_TileKind tile = tiles[tile_pos];
Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5;
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(stains)); b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(stains));
@ -486,10 +485,12 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro); // Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro); Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains); RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells); RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses); RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler); SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
Vec2 screen_pos = input.sv_position.xy; Vec2 screen_pos = input.sv_position.xy;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
@ -502,7 +503,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1)); Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1));
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells)); b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells));
P_TileKind tile = tiles.Load(Vec3(tile_pos, 0)); P_TileKind tile = tiles[tile_pos];
P_TileKind equipped_tile = frame.equipped_tile; P_TileKind equipped_tile = frame.equipped_tile;
////////////////////////////// //////////////////////////////
@ -534,14 +535,14 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
b32 tile_is_wall = 0; b32 tile_is_wall = 0;
Vec4 tile_color = 0; Vec4 tile_color = 0;
{ {
P_TileKind tile_tl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y - 0.99, 0)); P_TileKind tile_tl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y - 0.99)];
P_TileKind tile_tr = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y - 0.99, 0)); P_TileKind tile_tr = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y - 0.99)];
P_TileKind tile_br = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y + 0.99, 0)); P_TileKind tile_br = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y + 0.99)];
P_TileKind tile_bl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y + 0.99, 0)); P_TileKind tile_bl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y + 0.99)];
P_TileKind tile_t = tiles.Load(Vec3(tile_pos.x, tile_pos.y - 0.99, 0)); P_TileKind tile_t = tiles[Vec2(tile_pos.x, tile_pos.y - 0.99)];
P_TileKind tile_r = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y, 0)); P_TileKind tile_r = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y)];
P_TileKind tile_b = tiles.Load(Vec3(tile_pos.x, tile_pos.y + 0.99, 0)); P_TileKind tile_b = tiles[Vec2(tile_pos.x, tile_pos.y + 0.99)];
P_TileKind tile_l = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y, 0)); P_TileKind tile_l = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y)];
f32 tile_edge_dist = Inf; f32 tile_edge_dist = Inf;
P_TileKind edge_tile = tile; P_TileKind edge_tile = tile;
@ -603,8 +604,8 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
Vec4 stain_color = 0; Vec4 stain_color = 0;
{ {
f32 dryness = drynesses.Load(cell_pos); f32 dryness = drynesses[cell_pos];
Vec4 stain = stains.Load(cell_pos); Vec4 stain = stains[cell_pos];
stain_color = V_DryColor(stain, dryness); stain_color = V_DryColor(stain, dryness);
stain_color.rgb *= 1.0 - (0.75 * tile_is_wall); // Darken wall stains stain_color.rgb *= 1.0 - (0.75 * tile_is_wall); // Darken wall stains
} }
@ -612,7 +613,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
////////////////////////////// //////////////////////////////
//- Albedo tex //- Albedo tex
Vec4 albedo_tex_color = albedo_tex.Load(Vec3(screen_pos, 0)); Vec4 albedo_tex_color = albedo_tex[screen_pos];
////////////////////////////// //////////////////////////////
//- Compose albedo //- Compose albedo
@ -635,8 +636,43 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
// TODO: Remove this // TODO: Remove this
Vec4 particle_color = cells.Load(cell_pos); // Vec4 particle_color = cells[cell_pos];
particle_color.rgb *= particle_color.a; // particle_color.rgb *= particle_color.a;
Vec4 particle_color = 0;
{
u32 packed = cells[cell_pos];
V_ParticleKind kind = (V_ParticleKind)((packed >> 24) & 0xFF);
if (kind != V_ParticleKind_None)
{
u32 particle_idx = packed & ((1 << 24) - 1);
if (particle_idx < V_ParticlesCap)
{
if (kind == V_ParticleKind_Test)
{
u64 seed = MixU64(P_ParticleCompositeBasis ^ particle_idx);
f32 rand_color = Norm16(seed >> 0);
Vec4 color = Vec4(0.15, 0.15, 0.15, 1);
color.rgb += (rand_color - 0.5) * 0.025;
{
f32 density = densities[cell_pos];
// f32 t = saturate(density / 10.0);
f32 t = smoothstep(-10, 32, density);
color.a = lerp(0, 0.85, t);
}
particle_color = color;
}
}
}
particle_color.rgb *= particle_color.a;
}
////////////////////////////// //////////////////////////////
//- Compose world //- Compose world

View File

@ -1,7 +1,7 @@
// #define V_ParticlesCap Kibi(128) // #define V_ParticlesCap Kibi(128)
// #define V_ParticlesCap Mebi(1) // #define V_ParticlesCap Mebi(1)
#define V_ParticlesCap Mebi(2) // #define V_ParticlesCap Mebi(2)
// #define V_ParticlesCap Mebi(16) #define V_ParticlesCap Mebi(16)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ State types //~ State types
@ -149,6 +149,7 @@ Struct(V_SharedFrame)
G_RWTexture2DRef cells; G_RWTexture2DRef cells;
G_RWTexture2DRef stains; G_RWTexture2DRef stains;
G_RWTexture2DRef drynesses; G_RWTexture2DRef drynesses;
G_RWTexture2DRef densities;
G_StructuredBufferRef dverts; G_StructuredBufferRef dverts;
G_StructuredBufferRef quads; G_StructuredBufferRef quads;
@ -157,6 +158,9 @@ Struct(V_SharedFrame)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Particle types //~ Particle types
#define P_ParticleSimBasis 0xb49f2d9e406873b9ull
#define P_ParticleCompositeBasis 0x569aa8341ecc0ea3ull
// NOTE: Higher particle kinds draw over lower ones // NOTE: Higher particle kinds draw over lower ones
Enum(V_ParticleKind) Enum(V_ParticleKind)
{ {