particle cell densities w/ atomic writes

This commit is contained in:
jacob 2026-02-13 04:25:39 -06:00
parent 5332e1bdd9
commit d9228b78a3
4 changed files with 115 additions and 54 deletions

View File

@ -99,16 +99,16 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10
//~ Resource countof
#if IsGpu
template<typename T> u32 countof(StructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> buff) { u32 result; buff.GetDimensions(result); return result; }
u32 countof(ByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; }
u32 countof(RWByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; }
template<typename T> u32 countof(Texture1D<T> tex) { u32 result; tex.GetDimensions(result); return result; }
template<typename T> u32 countof(RWTexture1D<T> tex) { u32 result; tex.GetDimensions(result); return result; }
template<typename T> Vec2U32 countof(Texture2D<T> tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec2U32 countof(RWTexture2D<T> tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec3U32 countof(Texture3D<T> tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; }
template<typename T> Vec3U32 countof(RWTexture3D<T> tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; }
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(Texture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWTexture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> Vec2U32 countof(Texture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec2U32 countof(RWTexture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec3U32 countof(Texture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
template<typename T> Vec3U32 countof(RWTexture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
#endif
////////////////////////////////////////////////////////////

View File

@ -395,12 +395,14 @@ void V_TickForever(WaveLaneCtx *lane)
G_ResourceHandle gpu_cells_res = Zi;
G_ResourceHandle gpu_stains_res = Zi;
G_ResourceHandle gpu_drynesses_res = Zi;
G_ResourceHandle gpu_densities_res = Zi;
G_Texture2DRef gpu_tiles = Zi;
G_RWStructuredBufferRef gpu_particles = Zi;
G_RWTexture2DRef gpu_cells = Zi;
G_RWTexture2DRef gpu_stains = Zi;
G_RWTexture2DRef gpu_drynesses = Zi;
G_RWTexture2DRef gpu_densities = Zi;
{
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{
@ -432,9 +434,10 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_cells_res = G_PushTexture2D(
gpu_perm, cl,
// G_Format_R8_Uint,
G_Format_R32_Uint,
// G_Format_R11G11B10_Float,
// G_Format_R10G10B10A2_Unorm,
G_Format_R16G16B16A16_Float,
// G_Format_R16G16B16A16_Float,
cells_dims,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
@ -473,6 +476,22 @@ void V_TickForever(WaveLaneCtx *lane)
);
gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res);
}
// Init densities texture
{
gpu_densities_res = G_PushTexture2D(
gpu_perm, cl,
// G_Format_R8_Uint,
// G_Format_R11G11B10_Float,
// G_Format_R10G10B10A2_Unorm,
// G_Format_R16_Float,
G_Format_R32_Uint,
cells_dims,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Densities")
);
gpu_densities = G_PushRWTexture2DRef(gpu_perm, gpu_densities_res);
}
}
G_CommitCommandList(cl);
}
@ -594,6 +613,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->cells = gpu_cells;
frame->stains = gpu_stains;
frame->drynesses = gpu_drynesses;
frame->densities = gpu_densities;
}
//////////////////////////////
@ -2503,7 +2523,8 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Push test emitter
if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G])
if (frame->held_buttons[Button_G])
// if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G])
{
V_Emitter emitter = Zi;
@ -2532,8 +2553,8 @@ void V_TickForever(WaveLaneCtx *lane)
// emitter.count = Mebi(16);
// emitter.count = Mebi(2);
// emitter.count = Kibi(32);
emitter.count = Kibi(8);
// emitter.count = Kibi(1);
// emitter.count = Kibi(8);
emitter.count = Kibi(1);
// emitter.count = 128;
// emitter.count = 32;
// emitter.count = 1;

View File

@ -36,8 +36,9 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8)
ComputeShader2D(V_PrepareCellsCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells);
RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
Vec2 cells_pos = SV_DispatchThreadID + 0.5;
if (all(cells_pos < countof(cells)))
@ -45,6 +46,9 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
// Clear cell
cells[cells_pos] = 0;
// Clear density
densities[cells_pos] = 0;
// Increase dryness
// TODO: Use simulation dt
f32 dry_rate = frame.dt * 0.1;
@ -156,9 +160,10 @@ ComputeShader(V_SimParticlesCS, 64)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells);
RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
u32 particle_idx = SV_DispatchThreadID;
@ -171,11 +176,11 @@ ComputeShader(V_SimParticlesCS, 64)
if (particle.kind != 0)
{
u64 seed0 = MixU64(particle_idx);
f32 rand_offset = Norm16(seed0 >> 0);
f32 rand_angle = Norm16(seed0 >> 16);
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
u64 seed = MixU64(P_ParticleSimBasis ^ particle_idx);
f32 rand_offset = Norm16(seed >> 0);
f32 rand_angle = Norm16(seed >> 16);
f32 rand_speed = Norm16(seed >> 32);
f32 rand_falloff = Norm16(seed >> 48);
//////////////////////////////
//- Init
@ -283,7 +288,7 @@ ComputeShader(V_SimParticlesCS, 64)
particle.velocity.y *= -1;
}
{
u64 collision_seed = MixU64s(particle_idx, particle.collisions_count);
u64 collision_seed = MixU64s(seed, particle.collisions_count);
f32 rand_collision_angle = Norm16(collision_seed >> 0);
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
@ -311,8 +316,6 @@ ComputeShader(V_SimParticlesCS, 64)
//////////////////////////////
//- Commit
// FIXME: Atomic writes
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(particle.pos, 1));
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells));
@ -321,17 +324,6 @@ ComputeShader(V_SimParticlesCS, 64)
b32 should_draw = is_in_world && is_in_screen;
b32 should_stain = 0;
// TODO: Remove this
Vec4 color = Color_Purple;
switch (particle.kind)
{
case V_ParticleKind_Test:
{
color = Color_Yellow;
} break;
}
// // Stain
// if (is_in_world)
// {
@ -360,7 +352,14 @@ ComputeShader(V_SimParticlesCS, 64)
// Draw
if (should_draw)
{
cells[cell_pos] = color;
u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
packed |= (particle.kind & 0xFF) << 24;
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
StaticAssert(V_ParticleKind_COUNT <= 0xFF); // particle kind must fit in 8 bits
InterlockedMax(cells[cell_pos], packed);
InterlockedAdd(densities[cell_pos], 1);
}
// {
@ -444,7 +443,7 @@ ComputeShader2D(V_ShadeCS, 8, 8)
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
P_TileKind tile = tiles.Load(Vec3(tile_pos, 0));
P_TileKind tile = tiles[tile_pos];
Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5;
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(stains));
@ -486,10 +485,12 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
RWTexture2D<Vec4> cells = G_Dereference<Vec4>(frame.cells);
RWTexture2D<u32> cells = G_Dereference<u32>(frame.cells);
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
RWTexture2D<u32> densities = G_Dereference<u32>(frame.densities);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
Vec2 screen_pos = input.sv_position.xy;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
@ -502,7 +503,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1));
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells));
P_TileKind tile = tiles.Load(Vec3(tile_pos, 0));
P_TileKind tile = tiles[tile_pos];
P_TileKind equipped_tile = frame.equipped_tile;
//////////////////////////////
@ -534,14 +535,14 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
b32 tile_is_wall = 0;
Vec4 tile_color = 0;
{
P_TileKind tile_tl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y - 0.99, 0));
P_TileKind tile_tr = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y - 0.99, 0));
P_TileKind tile_br = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y + 0.99, 0));
P_TileKind tile_bl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y + 0.99, 0));
P_TileKind tile_t = tiles.Load(Vec3(tile_pos.x, tile_pos.y - 0.99, 0));
P_TileKind tile_r = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y, 0));
P_TileKind tile_b = tiles.Load(Vec3(tile_pos.x, tile_pos.y + 0.99, 0));
P_TileKind tile_l = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y, 0));
P_TileKind tile_tl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y - 0.99)];
P_TileKind tile_tr = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y - 0.99)];
P_TileKind tile_br = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y + 0.99)];
P_TileKind tile_bl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y + 0.99)];
P_TileKind tile_t = tiles[Vec2(tile_pos.x, tile_pos.y - 0.99)];
P_TileKind tile_r = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y)];
P_TileKind tile_b = tiles[Vec2(tile_pos.x, tile_pos.y + 0.99)];
P_TileKind tile_l = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y)];
f32 tile_edge_dist = Inf;
P_TileKind edge_tile = tile;
@ -603,8 +604,8 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
Vec4 stain_color = 0;
{
f32 dryness = drynesses.Load(cell_pos);
Vec4 stain = stains.Load(cell_pos);
f32 dryness = drynesses[cell_pos];
Vec4 stain = stains[cell_pos];
stain_color = V_DryColor(stain, dryness);
stain_color.rgb *= 1.0 - (0.75 * tile_is_wall); // Darken wall stains
}
@ -612,7 +613,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
//////////////////////////////
//- Albedo tex
Vec4 albedo_tex_color = albedo_tex.Load(Vec3(screen_pos, 0));
Vec4 albedo_tex_color = albedo_tex[screen_pos];
//////////////////////////////
//- Compose albedo
@ -635,8 +636,43 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
// TODO: Remove this
Vec4 particle_color = cells.Load(cell_pos);
// Vec4 particle_color = cells[cell_pos];
// particle_color.rgb *= particle_color.a;
Vec4 particle_color = 0;
{
u32 packed = cells[cell_pos];
V_ParticleKind kind = (V_ParticleKind)((packed >> 24) & 0xFF);
if (kind != V_ParticleKind_None)
{
u32 particle_idx = packed & ((1 << 24) - 1);
if (particle_idx < V_ParticlesCap)
{
if (kind == V_ParticleKind_Test)
{
u64 seed = MixU64(P_ParticleCompositeBasis ^ particle_idx);
f32 rand_color = Norm16(seed >> 0);
Vec4 color = Vec4(0.15, 0.15, 0.15, 1);
color.rgb += (rand_color - 0.5) * 0.025;
{
f32 density = densities[cell_pos];
// f32 t = saturate(density / 10.0);
f32 t = smoothstep(-10, 32, density);
color.a = lerp(0, 0.85, t);
}
particle_color = color;
}
}
}
particle_color.rgb *= particle_color.a;
}
//////////////////////////////
//- Compose world

View File

@ -1,7 +1,7 @@
// #define V_ParticlesCap Kibi(128)
// #define V_ParticlesCap Mebi(1)
#define V_ParticlesCap Mebi(2)
// #define V_ParticlesCap Mebi(16)
// #define V_ParticlesCap Mebi(2)
#define V_ParticlesCap Mebi(16)
////////////////////////////////////////////////////////////
//~ State types
@ -149,6 +149,7 @@ Struct(V_SharedFrame)
G_RWTexture2DRef cells;
G_RWTexture2DRef stains;
G_RWTexture2DRef drynesses;
G_RWTexture2DRef densities;
G_StructuredBufferRef dverts;
G_StructuredBufferRef quads;
@ -157,6 +158,9 @@ Struct(V_SharedFrame)
////////////////////////////////////////////////////////////
//~ Particle types
#define P_ParticleSimBasis 0xb49f2d9e406873b9ull
#define P_ParticleCompositeBasis 0x569aa8341ecc0ea3ull
// NOTE: Higher particle kinds draw over lower ones
Enum(V_ParticleKind)
{