From d9228b78a3346e0a9999dfd0eb879e1592565cb4 Mon Sep 17 00:00:00 2001 From: jacob Date: Fri, 13 Feb 2026 04:25:39 -0600 Subject: [PATCH] particle cell densities w/ atomic writes --- src/gpu/gpu_shader_core.cgh | 20 +++--- src/pp/pp_vis/pp_vis_core.c | 29 +++++++-- src/pp/pp_vis/pp_vis_gpu.g | 112 +++++++++++++++++++++----------- src/pp/pp_vis/pp_vis_shared.cgh | 8 ++- 4 files changed, 115 insertions(+), 54 deletions(-) diff --git a/src/gpu/gpu_shader_core.cgh b/src/gpu/gpu_shader_core.cgh index d67443ce..df3660fb 100644 --- a/src/gpu/gpu_shader_core.cgh +++ b/src/gpu/gpu_shader_core.cgh @@ -99,16 +99,16 @@ G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 10 //~ Resource countof #if IsGpu - template u32 countof(StructuredBuffer buff) { u32 result; buff.GetDimensions(result); return result; } - template u32 countof(RWStructuredBuffer buff) { u32 result; buff.GetDimensions(result); return result; } - u32 countof(ByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } - u32 countof(RWByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } - template u32 countof(Texture1D tex) { u32 result; tex.GetDimensions(result); return result; } - template u32 countof(RWTexture1D tex) { u32 result; tex.GetDimensions(result); return result; } - template Vec2U32 countof(Texture2D tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } - template Vec2U32 countof(RWTexture2D tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } - template Vec3U32 countof(Texture3D tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } - template Vec3U32 countof(RWTexture3D tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } + template u32 countof(StructuredBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(RWStructuredBuffer obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; } + u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(Texture1D obj) { u32 result; obj.GetDimensions(result); return result; } + template u32 countof(RWTexture1D obj) { u32 result; obj.GetDimensions(result); return result; } + template Vec2U32 countof(Texture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } + template Vec2U32 countof(RWTexture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } + template Vec3U32 countof(Texture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } + template Vec3U32 countof(RWTexture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } #endif //////////////////////////////////////////////////////////// diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index a6321bb3..834c8016 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -395,12 +395,14 @@ void V_TickForever(WaveLaneCtx *lane) G_ResourceHandle gpu_cells_res = Zi; G_ResourceHandle gpu_stains_res = Zi; G_ResourceHandle gpu_drynesses_res = Zi; + G_ResourceHandle gpu_densities_res = Zi; G_Texture2DRef gpu_tiles = Zi; G_RWStructuredBufferRef gpu_particles = Zi; G_RWTexture2DRef gpu_cells = Zi; G_RWTexture2DRef gpu_stains = Zi; G_RWTexture2DRef gpu_drynesses = Zi; + G_RWTexture2DRef gpu_densities = Zi; { G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); { @@ -432,9 +434,10 @@ void V_TickForever(WaveLaneCtx *lane) gpu_cells_res = G_PushTexture2D( gpu_perm, cl, // G_Format_R8_Uint, + G_Format_R32_Uint, // G_Format_R11G11B10_Float, // G_Format_R10G10B10A2_Unorm, - G_Format_R16G16B16A16_Float, + // G_Format_R16G16B16A16_Float, cells_dims, G_Layout_DirectQueue_ShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, @@ -473,6 +476,22 @@ void V_TickForever(WaveLaneCtx *lane) ); gpu_drynesses = G_PushRWTexture2DRef(gpu_perm, gpu_drynesses_res); } + // Init densities texture + { + gpu_densities_res = G_PushTexture2D( + gpu_perm, cl, + // G_Format_R8_Uint, + // G_Format_R11G11B10_Float, + // G_Format_R10G10B10A2_Unorm, + // G_Format_R16_Float, + G_Format_R32_Uint, + cells_dims, + G_Layout_DirectQueue_ShaderReadWrite, + .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, + .name = Lit("Densities") + ); + gpu_densities = G_PushRWTexture2DRef(gpu_perm, gpu_densities_res); + } } G_CommitCommandList(cl); } @@ -594,6 +613,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->cells = gpu_cells; frame->stains = gpu_stains; frame->drynesses = gpu_drynesses; + frame->densities = gpu_densities; } ////////////////////////////// @@ -2503,7 +2523,8 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Push test emitter - if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G]) + if (frame->held_buttons[Button_G]) + // if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G]) { V_Emitter emitter = Zi; @@ -2532,8 +2553,8 @@ void V_TickForever(WaveLaneCtx *lane) // emitter.count = Mebi(16); // emitter.count = Mebi(2); // emitter.count = Kibi(32); - emitter.count = Kibi(8); - // emitter.count = Kibi(1); + // emitter.count = Kibi(8); + emitter.count = Kibi(1); // emitter.count = 128; // emitter.count = 32; // emitter.count = 1; diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index b49b6c4b..38ff5209 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -36,8 +36,9 @@ ComputeShader2D(V_PrepareShadeCS, 8, 8) ComputeShader2D(V_PrepareCellsCS, 8, 8) { V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; - RWTexture2D cells = G_Dereference(frame.cells); + RWTexture2D cells = G_Dereference(frame.cells); RWTexture2D drynesses = G_Dereference(frame.drynesses); + RWTexture2D densities = G_Dereference(frame.densities); Vec2 cells_pos = SV_DispatchThreadID + 0.5; if (all(cells_pos < countof(cells))) @@ -45,6 +46,9 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8) // Clear cell cells[cells_pos] = 0; + // Clear density + densities[cells_pos] = 0; + // Increase dryness // TODO: Use simulation dt f32 dry_rate = frame.dt * 0.1; @@ -156,9 +160,10 @@ ComputeShader(V_SimParticlesCS, 64) { V_SharedFrame frame = G_Dereference(V_ShaderConst_Frame)[0]; RWStructuredBuffer particles = G_Dereference(frame.particles); - RWTexture2D cells = G_Dereference(frame.cells); + RWTexture2D cells = G_Dereference(frame.cells); RWTexture2D stains = G_Dereference(frame.stains); RWTexture2D drynesses = G_Dereference(frame.drynesses); + RWTexture2D densities = G_Dereference(frame.densities); Texture2D tiles = G_Dereference(frame.tiles); u32 particle_idx = SV_DispatchThreadID; @@ -171,11 +176,11 @@ ComputeShader(V_SimParticlesCS, 64) if (particle.kind != 0) { - u64 seed0 = MixU64(particle_idx); - f32 rand_offset = Norm16(seed0 >> 0); - f32 rand_angle = Norm16(seed0 >> 16); - f32 rand_speed = Norm16(seed0 >> 32); - f32 rand_falloff = Norm16(seed0 >> 48); + u64 seed = MixU64(P_ParticleSimBasis ^ particle_idx); + f32 rand_offset = Norm16(seed >> 0); + f32 rand_angle = Norm16(seed >> 16); + f32 rand_speed = Norm16(seed >> 32); + f32 rand_falloff = Norm16(seed >> 48); ////////////////////////////// //- Init @@ -283,7 +288,7 @@ ComputeShader(V_SimParticlesCS, 64) particle.velocity.y *= -1; } { - u64 collision_seed = MixU64s(particle_idx, particle.collisions_count); + u64 collision_seed = MixU64s(seed, particle.collisions_count); f32 rand_collision_angle = Norm16(collision_seed >> 0); f32 rand_collision_velocity = Norm16(collision_seed >> 16); f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); @@ -311,8 +316,6 @@ ComputeShader(V_SimParticlesCS, 64) ////////////////////////////// //- Commit - // FIXME: Atomic writes - Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(particle.pos, 1)); b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells)); @@ -321,17 +324,6 @@ ComputeShader(V_SimParticlesCS, 64) b32 should_draw = is_in_world && is_in_screen; b32 should_stain = 0; - // TODO: Remove this - Vec4 color = Color_Purple; - switch (particle.kind) - { - case V_ParticleKind_Test: - { - color = Color_Yellow; - } break; - } - - // // Stain // if (is_in_world) // { @@ -360,7 +352,14 @@ ComputeShader(V_SimParticlesCS, 64) // Draw if (should_draw) { - cells[cell_pos] = color; + u32 packed = 0; + packed |= (particle_idx & ((1 >> 24) - 1)) << 0; + packed |= (particle.kind & 0xFF) << 24; + StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits + StaticAssert(V_ParticleKind_COUNT <= 0xFF); // particle kind must fit in 8 bits + + InterlockedMax(cells[cell_pos], packed); + InterlockedAdd(densities[cell_pos], 1); } // { @@ -444,7 +443,7 @@ ComputeShader2D(V_ShadeCS, 8, 8) Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); - P_TileKind tile = tiles.Load(Vec3(tile_pos, 0)); + P_TileKind tile = tiles[tile_pos]; Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(stains)); @@ -486,10 +485,12 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) // Texture2D shade_tex = G_Dereference(frame.shade_ro); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D stains = G_Dereference(frame.stains); - RWTexture2D cells = G_Dereference(frame.cells); + RWTexture2D cells = G_Dereference(frame.cells); RWTexture2D drynesses = G_Dereference(frame.drynesses); Texture2D tiles = G_Dereference(frame.tiles); SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler); + RWTexture2D densities = G_Dereference(frame.densities); + RWStructuredBuffer particles = G_Dereference(frame.particles); Vec2 screen_pos = input.sv_position.xy; Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); @@ -502,7 +503,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1)); b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(cells)); - P_TileKind tile = tiles.Load(Vec3(tile_pos, 0)); + P_TileKind tile = tiles[tile_pos]; P_TileKind equipped_tile = frame.equipped_tile; ////////////////////////////// @@ -534,14 +535,14 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) b32 tile_is_wall = 0; Vec4 tile_color = 0; { - P_TileKind tile_tl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y - 0.99, 0)); - P_TileKind tile_tr = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y - 0.99, 0)); - P_TileKind tile_br = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y + 0.99, 0)); - P_TileKind tile_bl = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y + 0.99, 0)); - P_TileKind tile_t = tiles.Load(Vec3(tile_pos.x, tile_pos.y - 0.99, 0)); - P_TileKind tile_r = tiles.Load(Vec3(tile_pos.x + 0.99, tile_pos.y, 0)); - P_TileKind tile_b = tiles.Load(Vec3(tile_pos.x, tile_pos.y + 0.99, 0)); - P_TileKind tile_l = tiles.Load(Vec3(tile_pos.x - 0.99, tile_pos.y, 0)); + P_TileKind tile_tl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y - 0.99)]; + P_TileKind tile_tr = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y - 0.99)]; + P_TileKind tile_br = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y + 0.99)]; + P_TileKind tile_bl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y + 0.99)]; + P_TileKind tile_t = tiles[Vec2(tile_pos.x, tile_pos.y - 0.99)]; + P_TileKind tile_r = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y)]; + P_TileKind tile_b = tiles[Vec2(tile_pos.x, tile_pos.y + 0.99)]; + P_TileKind tile_l = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y)]; f32 tile_edge_dist = Inf; P_TileKind edge_tile = tile; @@ -603,8 +604,8 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) Vec4 stain_color = 0; { - f32 dryness = drynesses.Load(cell_pos); - Vec4 stain = stains.Load(cell_pos); + f32 dryness = drynesses[cell_pos]; + Vec4 stain = stains[cell_pos]; stain_color = V_DryColor(stain, dryness); stain_color.rgb *= 1.0 - (0.75 * tile_is_wall); // Darken wall stains } @@ -612,7 +613,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) ////////////////////////////// //- Albedo tex - Vec4 albedo_tex_color = albedo_tex.Load(Vec3(screen_pos, 0)); + Vec4 albedo_tex_color = albedo_tex[screen_pos]; ////////////////////////////// //- Compose albedo @@ -635,8 +636,43 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input) // TODO: Remove this - Vec4 particle_color = cells.Load(cell_pos); - particle_color.rgb *= particle_color.a; + // Vec4 particle_color = cells[cell_pos]; + // particle_color.rgb *= particle_color.a; + + Vec4 particle_color = 0; + { + u32 packed = cells[cell_pos]; + + V_ParticleKind kind = (V_ParticleKind)((packed >> 24) & 0xFF); + if (kind != V_ParticleKind_None) + { + u32 particle_idx = packed & ((1 << 24) - 1); + + if (particle_idx < V_ParticlesCap) + { + if (kind == V_ParticleKind_Test) + { + u64 seed = MixU64(P_ParticleCompositeBasis ^ particle_idx); + f32 rand_color = Norm16(seed >> 0); + + Vec4 color = Vec4(0.15, 0.15, 0.15, 1); + color.rgb += (rand_color - 0.5) * 0.025; + + { + f32 density = densities[cell_pos]; + // f32 t = saturate(density / 10.0); + f32 t = smoothstep(-10, 32, density); + color.a = lerp(0, 0.85, t); + } + + particle_color = color; + } + } + } + + particle_color.rgb *= particle_color.a; + } + ////////////////////////////// //- Compose world diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index da890795..4bdd4b64 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -1,7 +1,7 @@ // #define V_ParticlesCap Kibi(128) // #define V_ParticlesCap Mebi(1) -#define V_ParticlesCap Mebi(2) -// #define V_ParticlesCap Mebi(16) +// #define V_ParticlesCap Mebi(2) +#define V_ParticlesCap Mebi(16) //////////////////////////////////////////////////////////// //~ State types @@ -149,6 +149,7 @@ Struct(V_SharedFrame) G_RWTexture2DRef cells; G_RWTexture2DRef stains; G_RWTexture2DRef drynesses; + G_RWTexture2DRef densities; G_StructuredBufferRef dverts; G_StructuredBufferRef quads; @@ -157,6 +158,9 @@ Struct(V_SharedFrame) //////////////////////////////////////////////////////////// //~ Particle types +#define P_ParticleSimBasis 0xb49f2d9e406873b9ull +#define P_ParticleCompositeBasis 0x569aa8341ecc0ea3ull + // NOTE: Higher particle kinds draw over lower ones Enum(V_ParticleKind) {