diff --git a/src/base/base_shader.gh b/src/base/base_shader.gh index c4d50b15..9e025a45 100644 --- a/src/base/base_shader.gh +++ b/src/base/base_shader.gh @@ -142,9 +142,11 @@ Inline f64 Norm53(u64 v) return (v & 0x1FFFFFFFFFFFFFull) / (f64)0x20000000000000ull; } -//- Match floor +//////////////////////////////////////////////////////////// +//~ Comparison #define MatchFloor(a, b) all(floor(a) == floor(b)) +#define IsInside(pos, dims) (all(pos >= 0) && all(pos <= (dims))) //////////////////////////////////////////////////////////// //~ Rotation diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 851f899f..3193ebcf 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -2168,7 +2168,7 @@ void V_TickForever(WaveLaneCtx *lane) quad->quad_uv_to_world_af = body_uv_to_world_af; quad->tex = body.tex; quad->tex_slice_uv = DivRng2Vec2(body.tex_rect, body.tex_dims); - quad->occluder = V_OccluderKind_Guy; + quad->occluder_id = ent->key.v & 0xFFFFFFFF; } } } @@ -2674,8 +2674,8 @@ void V_TickForever(WaveLaneCtx *lane) f32 angle = AngleFromVec2(frame->look); // f32 angle = 0; - f32 angle_spread = Tau * 0.25; - // f32 angle_spread = Tau; + // f32 angle_spread = Tau * 0.25; + f32 angle_spread = Tau; // f32 angle_spread = 0; // f32 speed = 5; diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index b3bc5827..2a0755b6 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -95,10 +95,10 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8) //- Reset occluders { - V_OccluderKind occluder = V_OccluderKind_None; + u32 occluder = 0; if (tile == P_TileKind_Wall) { - occluder = V_OccluderKind_Wall; + occluder = 0xFFFFFFFF; } occluders[cell_pos] = occluder; } @@ -202,16 +202,16 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) Vec2 world_pos = input.world_pos; Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); - b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < P_WorldCellsDims); + b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); Vec4 albedo = tex.Sample(sampler, input.samp_uv); if (is_in_world) { // TODO: Don't write occluders using screen space result. Do separate draw pass instead. - if (albedo.a > 0 && quad.occluder != V_OccluderKind_None && is_in_world) + if (quad.occluder_id > 0 && albedo.a > 0) { - InterlockedMax(occluders[cell_pos], quad.occluder); + InterlockedMax(occluders[cell_pos], quad.occluder_id); } } @@ -297,15 +297,20 @@ ComputeShader(V_SimParticlesCS, 64) particle.life = 0; particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset); particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed; + + Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); + if (IsInside(cell_pos, P_WorldCellsDims)) + { + particle.origin_occluder = occluders[cell_pos]; + particle.prev_occluder = particle.origin_occluder; + } + else + { + prune = 1; + } } - - - - - - - if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT) + if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) { V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); RWTexture2D cells = G_Dereference(frame.particle_cells[desc.layer]); @@ -317,6 +322,19 @@ ComputeShader(V_SimParticlesCS, 64) StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits + u32 start_occluder = 0; + { + Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); + if (IsInside(cell_pos, P_WorldCellsDims)) + { + start_occluder = occluders[cell_pos]; + if (particle.life == 0) + { + particle.origin_occluder = start_occluder; + } + } + } + ////////////////////////////// //- Move @@ -386,7 +404,7 @@ ComputeShader(V_SimParticlesCS, 64) Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); - b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < P_WorldCellsDims); + b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); if (is_in_world) @@ -395,49 +413,56 @@ ComputeShader(V_SimParticlesCS, 64) particle.stain_accum += stain_delta; //- Handle collision - V_OccluderKind occluder = (V_OccluderKind)occluders[cell_pos]; - if (occluder != V_OccluderKind_None) { - u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); - f32 rand_collision_angle = Norm16(collision_seed >> 0); - f32 rand_collision_velocity = Norm16(collision_seed >> 16); - f32 rand_collision_penetration = Norm16(collision_seed >> 32); - if (rand_collision_penetration >= desc.pen_rate) + u32 occluder = occluders[cell_pos]; + if (occluder != particle.origin_occluder) { - collision = 1; - done = 1; + particle.origin_occluder = 0; + } + if (occluder != 0 && occluder != particle.origin_occluder) + { + u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); + f32 rand_collision_angle = Norm16(collision_seed >> 0); + f32 rand_collision_velocity = Norm16(collision_seed >> 16); + f32 rand_collision_penetration = Norm16(collision_seed >> 32); + if (rand_collision_penetration >= desc.pen_rate) { - if (stepped_x) + collision = 1; + done = 1; { - if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) + if (stepped_x) { - particle.velocity.x *= -1; + if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) + { + particle.velocity.x *= -1; + } + t = saturate(t_hit.x); } - t = saturate(t_hit.x); - } - else if (stepped_y) - { - if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) + else if (stepped_y) { - particle.velocity.y *= -1; + if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) + { + particle.velocity.y *= -1; + } + t = saturate(t_hit.y); } - t = saturate(t_hit.y); - } - { - f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); + { + f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); + // f32 collision_angle = 0; - // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); - // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); - // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); - f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); - // f32 collision_velocity_falloff = 0; + // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); + // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); + // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); + f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); + // f32 collision_velocity_falloff = 0; - particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); - particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); + particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); + particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); + } } } } - + particle.prev_occluder = occluder; } if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001) @@ -459,7 +484,7 @@ ComputeShader(V_SimParticlesCS, 64) if (!collision) { u32 stain_count = floor(particle.stain_accum); - u32 density = 1; + u32 density = 1 + stain_count; u32 commit = packed; if (stain_count > 0) @@ -490,234 +515,9 @@ ComputeShader(V_SimParticlesCS, 64) particle.pos = p0 + (p1 - p0) * t; } - // Increment life particle.life += frame.dt; } - - - - - - - - - - - - - // if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT) - // { - // V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); - - // u32 packed = 0; - // packed |= (particle_idx & ((1 >> 24) - 1)) << 0; - // packed |= (particle.kind & 0xFF) << 24; - // packed |= 1 << 31; - // StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits - // StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 6 bits - - // ////////////////////////////// - // //- Move - - // b32 collision = 0; - - // // TODO: Clip to avoid unnecessary iterations outside of world bounds - // { - // Vec2 p0 = particle.pos; - // Vec2 p1 = particle.pos + particle.velocity * frame.dt; - // f32 t = 1; - // { - // Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1)); - // Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1)); - // Vec2I32 cell_p0 = floor(occluder_p0); - // Vec2I32 cell_p1 = floor(occluder_p1); - - // Vec2 delta = occluder_p1 - occluder_p0; - // Vec2 inv_delta = 1.0 / delta; - // Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0)); - // Vec2 t_delta = abs(inv_delta); - // Vec2 t_max = cell_p0 - occluder_p0; - // t_max.x += dda_step_dir.x > 0; - // t_max.y += dda_step_dir.y > 0; - // t_max *= inv_delta; - // t_max = abs(t_max); - - // Vec2 t_hit = 0; - - // Vec2I32 cell_pos = cell_p0; - - // b32 stepped_x = 0; - // b32 stepped_y = 0; - - // // TODO: Tune this - // u32 max_iterations = 128; - - // b32 done = 0; - // f32 t_diff = 0; - // for (u32 iteration_idx = 0; iteration_idx < max_iterations && !done; ++iteration_idx) - // { - // if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y) - // { - // done = 1; - // } - // else if (t_max.x < t_max.y) - // { - // cell_pos.x += dda_step_dir.x; - // f32 old = t_hit.x; - // t_hit.x = t_max.x - t_delta.x; - // t_diff = t_hit.x - old; - // t_max.x += t_delta.x; - // stepped_x = 1; - // stepped_y = 0; - // } - // else - // { - // cell_pos.y += dda_step_dir.y; - // f32 old = t_hit.y; - // t_hit.y = t_max.y - t_delta.y; - // t_diff = t_hit.y - old; - // t_max.y += t_delta.y; - // stepped_x = 0; - // stepped_y = 1; - // } - - // Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1)); - // Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); - // cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); - - // b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < P_WorldCellsDims); - // b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); - - // if (is_in_world) - // { - // f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt; - // particle.stain_accum += stain_delta; - - // //- Handle collision - // V_OccluderKind occluder = (V_OccluderKind)occluders[cell_pos]; - // if (occluder != V_OccluderKind_None) - // { - // u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); - // f32 rand_collision_angle = Norm16(collision_seed >> 0); - // f32 rand_collision_velocity = Norm16(collision_seed >> 16); - // f32 rand_collision_penetration = Norm16(collision_seed >> 32); - // if (rand_collision_penetration >= desc.pen_rate) - // { - // collision = 1; - // done = 1; - // { - // if (stepped_x) - // { - // if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) - // { - // particle.velocity.x *= -1; - // } - // t = saturate(t_hit.x); - // } - // else if (stepped_y) - // { - // if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) - // { - // particle.velocity.y *= -1; - // } - // t = saturate(t_hit.y); - // } - // { - // f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); - - // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); - // // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); - // // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); - // // f32 collision_velocity_falloff = 0; - - // particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); - // particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); - // } - // } - // } - - // } - - // if (AnyBit(desc.flags, V_ParticleFlag_PruneWhenStill)) - // { - // if (dot(particle.velocity, particle.velocity) < 0.0001) - // { - // prune = 1; - // } - // } - - // if (prune && AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned)) - // { - // particle.stain_accum += 1; - // } - - // if (!collision) - // { - // //- Stain - // u32 stains_count = floor(particle.stain_accum); - // if (stains_count > 0) - // { - // // TODO: Fixed point - // u32 density = round(stains_count * rand_density); - // InterlockedMax(stain_cells[cell_pos], packed); - // InterlockedAdd(stain_densities[cell_pos], density); - // drynesses[cell_pos] = 0; - // particle.stain_accum -= stains_count; - // } - - // //- Draw - // { - // b32 should_draw_ground = is_visible && AnyBit(desc.flags, V_ParticleFlag_Ground); - // b32 should_draw_air = is_visible && AnyBit(desc.flags, V_ParticleFlag_Air); - - // if (should_draw_ground) - // { - // InterlockedMax(ground_cells[cell_pos], packed); - // InterlockedAdd(ground_densities[cell_pos], 1); - // } - - // if (should_draw_air) - // { - // InterlockedMax(air_cells[cell_pos], packed); - // InterlockedAdd(air_densities[cell_pos], 1); - // } - // } - // } - // } - // else - // { - // done = 1; - // prune = 1; - // } - - // particle.cells_count += 1; - // iteration_idx += 1; - // } - // } - - // f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt); - // // f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt); - // particle.velocity *= 1.0f - falloff; - - // particle.pos = p0 + (p1 - p0) * t; - // } - - // // Increment life - // particle.life += frame.dt; - // } - - - - - - - - - - - - if (prune) { particle.kind = V_ParticleKind_None; @@ -750,7 +550,7 @@ ComputeShader2D(V_ShadeCS, 8, 8) P_TileKind tile = tiles[tile_pos]; Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; - b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < P_WorldCellsDims); + b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); ////////////////////////////// //- Compute result @@ -790,8 +590,8 @@ ComputeShader2D(V_CompositeCS, 8, 8) Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; Vec2 world_bounds_screen_p0 = mul(frame.af.world_to_screen, Vec3(-half_world_dims.xy, 1)); Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1)); - b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < P_WorldCellsDims); - b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < countof(screen_tex)); + b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); + b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); P_TileKind tile = tiles[tile_pos]; P_TileKind equipped_tile = frame.equipped_tile; @@ -902,7 +702,6 @@ ComputeShader2D(V_CompositeCS, 8, 8) stain_color = orig_stain; } - Vec4 particle_color = 0; @@ -923,6 +722,13 @@ ComputeShader2D(V_CompositeCS, 8, 8) } } + // Darken wall particles / stains + if (tile == P_TileKind_Wall) + { + particle_color *= 0.25; + stain_color *= 0.25; + } + @@ -1243,7 +1049,7 @@ ComputeShader2D(V_BloomDownCS, 8, 8) result += src * desc.weight * knee_weight; } - if (all(bloom_pos >= 0) && all(bloom_pos < down_dims)) + if (IsInside(bloom_pos, down_dims)) { bloom_down[bloom_pos] = result; } @@ -1285,7 +1091,7 @@ ComputeShader2D(V_BloomUpCS, 8, 8) result /= 16; } - if (all(bloom_pos >= 0) && all(bloom_pos < up_dims)) + if (IsInside(bloom_pos, up_dims)) { bloom_up[bloom_pos] += result; } @@ -1303,7 +1109,7 @@ ComputeShader2D(V_PostProcessCS, 8, 8) Vec2 screen_pos = SV_DispatchThreadID + 0.5; Vec2 screen_uv = screen_pos / frame.screen_dims; - b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < frame.screen_dims); + b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); ////////////////////////////// //- Original diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 6737ceaf..1e0a23e6 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -18,16 +18,6 @@ G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2); G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3); G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4); -//////////////////////////////////////////////////////////// -//~ Occluder types - -Enum(V_OccluderKind) -{ - V_OccluderKind_None, - V_OccluderKind_Guy, - V_OccluderKind_Wall, -}; - //////////////////////////////////////////////////////////// //~ Particle types @@ -68,7 +58,7 @@ Enum(V_ParticleLayer) /* Name */ BloodTrail, \ /* Flags */ V_ParticleFlag_NoReflect | V_ParticleFlag_StainWhenPruned, \ /* Layer */ V_ParticleLayer_Ground, \ - /* Stain rate, pen chance */ 30, 0.25, \ + /* Stain rate, pen chance */ 100, 0.25, \ /* Base color */ 0.5, 0.1, 0.1, 0.05 \ ) \ X( \ @@ -136,6 +126,8 @@ Struct(V_Emitter) Struct(V_Particle) { i32 kind; // If >= 0, then map to V_ParticleKind. Otherwize initialize particle using emitter at index [abs(kind) - 1] + u32 origin_occluder; + u32 prev_occluder; // TODO: Remove this f32 life; f32 stain_accum; u32 cells_count; @@ -172,7 +164,7 @@ Enum(V_QuadFlag) Struct(V_Quad) { V_QuadFlag flags; - V_OccluderKind occluder; + u32 occluder_id; Affine quad_uv_to_world_af; G_Texture2DRef tex; Rng2 tex_slice_uv;