//////////////////////////////////////////////////////////// //~ Helpers f32 V_RandFromPos(Vec3 pos) { Texture3D noise3d = G_Dereference(V_GpuConst_NoiseTex); // TODO: Compile-time noise dims u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)]; f32 rand = Norm16(noise); return rand; } Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) { Vec4 result = 0; u64 seed = MixU64(V_ParticleColorBasis ^ particle_idx); f32 rand_color = Norm16(seed >> 0); result = desc.base_color; // Apply density { if (AnyBit(desc.flags, V_ParticleFlag_GasBlend)) { // f32 t = saturate(density / 10.0); // f32 t = smoothstep(-10, 32, density); f32 t = smoothstep(-10, 50, density); // f32 t = smoothstep(0, 2, (f32)density); result.a += (1.0 - result.a) * (t); } else if (desc.kind == V_ParticleKind_BloodTrail || desc.kind == V_ParticleKind_BloodDebris) { // f32 t = (f32)density / 5; // t = pow(t, 2); // t = saturate(t); // result.rgb *= 1.0 - (t * 0.9); f32 t = (f32)density / 10; // t = smoothstep(-10, 10, t); // t = smoothstep(-5, 5, t); t = smoothstep(-50, 50, t); // result.rgb *= 1.0 - (t * 0.9); // result.a = t; result.a += (1.0 - result.a) * (t); } } result.rgb = result.rgb + (rand_color - 0.5) * 0.05; return result; } //////////////////////////////////////////////////////////// //~ Prepare frame //- Prepare shade ImplComputeShader2D(V_PrepareShadeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWTexture2D shade = G_Dereference(frame.shade_rw); Vec2 shade_pos = SV_DispatchThreadID + 0.5; if (all(shade_pos < countof(shade))) { // Clear shade shade[shade_pos] = 0; } } //- Prepare cells ImplComputeShader2D(V_PrepareCellsCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); RWTexture2D stains = G_Dereference(frame.stains); RWTexture2D dry_stains = G_Dereference(frame.dry_stains); RWTexture2D drynesses = G_Dereference(frame.drynesses); RWTexture2D occluders = G_Dereference(frame.occluders); Vec2 cell_pos = SV_DispatchThreadID + 0.5; if (all(cell_pos < P_WorldCellsDims)) { Vec2 world_pos = mul(frame.af.cell_to_world, Vec3(cell_pos, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); P_TileKind tile = tiles[tile_pos]; //- Reset occluders { u32 occluder = 0; if (tile == P_TileKind_Wall) { occluder = 0xFFFFFFFF; } occluders[cell_pos] = occluder; } //- Reset particle layers Vec4 over_stain = 0; Vec4 over_dry_stain = 0; for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { RWTexture2D cells = G_Dereference(frame.particle_cells[layer]); RWTexture2D densities = G_Dereference(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; if (packed & (1 << 31)) { V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); V_ParticleDesc desc = V_DescFromParticleKind(particle_kind); u32 density = densities[cell_pos]; u32 particle_idx = packed & ((1 << 24) - 1); Vec4 base_color = V_ColorFromParticle(desc, particle_idx, density); Vec4 dry_color = base_color * desc.dry_factor; base_color.rgb *= base_color.a; dry_color.rgb *= dry_color.a; over_stain = BlendPremul(base_color, over_stain); over_dry_stain = BlendPremul(dry_color, over_dry_stain); } cells[cell_pos] = 0; densities[cell_pos] = 0; } //- Update stains if (frame.should_clear_particles) { stains[cell_pos] = 0; dry_stains[cell_pos] = 0; drynesses[cell_pos] = 0; } else if (over_stain.a > 0) { Vec4 dry_stain = max(dry_stains[cell_pos], 0); Vec4 stain = dry_stain; dry_stain = BlendPremul(over_dry_stain, dry_stain); stain = BlendPremul(over_stain, stain); stains[cell_pos] = stain; dry_stains[cell_pos] = dry_stain; drynesses[cell_pos] = 0; } else { f32 dry_rate = saturate(frame.dt * 0.1); Vec4 before_stain = stains[cell_pos]; Vec4 before_dry_stain = dry_stains[cell_pos]; drynesses[cell_pos] = lerp(drynesses[cell_pos], 1, dry_rate); } } } //- Clear particles ImplComputeShader(V_ClearParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; RWStructuredBuffer particles = G_Dereference(frame.particles); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) { particles[particle_idx].kind = V_ParticleKind_None; } } //////////////////////////////////////////////////////////// //~ Quads ////////////////////////////// //- Vertex shader ImplVertexShader(V_QuadVS, V_QuadPSInput) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); V_Quad quad = quads[SV_InstanceID]; Vec2 rect_uv = RectUvFromIdx(SV_VertexID); Vec2 world_pos = mul(quad.quad_uv_to_world_af, Vec3(rect_uv, 1)); Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(world_pos, 1)); Vec2 samp_uv = lerp(quad.tex_slice_uv.p0, quad.tex_slice_uv.p1, rect_uv); V_QuadPSInput result; result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1); result.quad_idx = SV_InstanceID; result.world_pos = world_pos; result.samp_uv = samp_uv; return result; } ////////////////////////////// //- Pixel shader ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); RWTexture2D occluders = G_Dereference(frame.occluders); V_Quad quad = quads[input.quad_idx]; Texture2D tex = G_Dereference(quad.tex); Vec2 world_pos = input.world_pos; Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); Vec4 albedo = tex.Sample(sampler, input.samp_uv); if (is_in_world) { // TODO: Don't write occluders using screen space result. Do separate draw pass instead. if (quad.occluder_id > 0 && albedo.a > 0) { InterlockedMax(occluders[cell_pos], quad.occluder_id); } } V_QuadPSOutput output; output.sv_target0 = albedo; return output; } //////////////////////////////////////////////////////////// //~ Particle simulation ////////////////////////////// //- Particle emitter shader ImplComputeShader(V_EmitParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer emitters = G_Dereference(frame.emitters); RWStructuredBuffer particles = G_Dereference(frame.particles); u32 emitter_idx = SV_DispatchThreadID; if (emitter_idx < frame.emitters_count) { V_Emitter emitter = emitters[emitter_idx]; i32 semantic_particle_kind = V_ParticleKind_None; if (emitter.kind > V_ParticleKind_None) { semantic_particle_kind = (i32)(emitter_idx + 1) * -1; } for (u32 emitter_particle_idx = 0; emitter_particle_idx < emitter.count; ++emitter_particle_idx) { u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap; // InterlockedMin guarantees that the highest emitter index (reflected // as negative particle kind) will be used to initialize the particle // this frame, in case multiple emitters target the same particle (e.g. // more particles pushed this frame than are available in the buffer) InterlockedMin(particles[particle_idx].kind, semantic_particle_kind); } } } ////////////////////////////// //- Particle sim shader ImplComputeShader(V_SimParticlesCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D tiles = G_Dereference(frame.tiles); RWStructuredBuffer particles = G_Dereference(frame.particles); RWTexture2D occluders = G_Dereference(frame.occluders); u32 particle_idx = SV_DispatchThreadID; if (particle_idx < V_ParticlesCap) { V_Particle particle = particles[particle_idx]; b32 prune = 0; ////////////////////////////// //- Initialize particle if (particle.kind != V_ParticleKind_None) { u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx); f32 rand_offset = Norm16(seed0 >> 0); f32 rand_angle = Norm16(seed0 >> 16); f32 rand_speed = Norm16(seed0 >> 32); f32 rand_falloff = Norm16(seed0 >> 48); ////////////////////////////// //- Init if (particle.kind < 0) { u32 emitter_idx = -particle.kind - 1; V_Emitter emitter = G_Dereference(frame.emitters)[emitter_idx]; f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); particle = (V_Particle)0; particle.kind = emitter.kind; particle.life = 0; particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset); particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed; } if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) { V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); RWTexture2D cells = G_Dereference(frame.particle_cells[desc.layer]); RWTexture2D densities = G_Dereference(frame.particle_densities[desc.layer]); u32 packed = 0; packed |= (particle_idx & ((1 >> 24) - 1)) << 0; packed |= (particle.kind & 0xFF) << 24; StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits if (particle.life == 0) { Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1)); if (IsInside(cell_pos, P_WorldCellsDims)) { u32 occluder = occluders[cell_pos]; b32 occluder_is_wall = occluder == 0xFFFFFFFF; if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall)) { particle.origin_occluder = occluders[cell_pos]; particle.prev_occluder = particle.origin_occluder; } } else { prune = 1; } } ////////////////////////////// //- Move b32 collision = 0; // TODO: Clip to avoid unnecessary iterations outside of world bounds if (!prune) { Vec2 p0 = particle.pos; Vec2 p1 = particle.pos + particle.velocity * frame.dt; f32 t = 1; { Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1)); Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1)); Vec2I32 cell_p0 = floor(occluder_p0); Vec2I32 cell_p1 = floor(occluder_p1); Vec2 delta = occluder_p1 - occluder_p0; Vec2 inv_delta = 1.0 / delta; Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0)); Vec2 t_delta = abs(inv_delta); Vec2 t_max = cell_p0 - occluder_p0; t_max.x += dda_step_dir.x > 0; t_max.y += dda_step_dir.y > 0; t_max *= inv_delta; t_max = abs(t_max); Vec2 t_hit = 0; Vec2I32 cell_pos = cell_p0; b32 stepped_x = 0; b32 stepped_y = 0; // TODO: Tune this u32 max_iterations = 128; b32 done = 0; f32 t_diff = 0; u32 iteration_idx = 0; for (; iteration_idx < max_iterations && !done; ++iteration_idx) { if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y) { done = 1; } else if (t_max.x < t_max.y) { cell_pos.x += dda_step_dir.x; f32 old = t_hit.x; t_hit.x = t_max.x - t_delta.x; t_diff = t_hit.x - old; t_max.x += t_delta.x; stepped_x = 1; stepped_y = 0; } else { cell_pos.y += dda_step_dir.y; f32 old = t_hit.y; t_hit.y = t_max.y - t_delta.y; t_diff = t_hit.y - old; t_max.y += t_delta.y; stepped_x = 0; stepped_y = 1; } Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1)); Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1)); cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1); b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims); if (is_in_world) { f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt; particle.stain_accum += stain_delta; //- Handle collision { u32 occluder = occluders[cell_pos]; b32 occluder_is_wall = occluder == 0xFFFFFFFF; if (occluder != particle.origin_occluder) { particle.origin_occluder = 0; } if ( occluder != 0 && !(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) && occluder != particle.origin_occluder ) { u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count); f32 rand_collision_angle = Norm16(collision_seed >> 0); f32 rand_collision_velocity = Norm16(collision_seed >> 16); f32 rand_collision_penetration = Norm16(collision_seed >> 32); if (rand_collision_penetration >= desc.pen_rate) { collision = 1; done = 1; { if (stepped_x) { if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) { particle.velocity.x *= -1; } t = saturate(t_hit.x); } else if (stepped_y) { if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect)) { particle.velocity.y *= -1; } t = saturate(t_hit.y); } { f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle); // f32 collision_angle = 0; // f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); // f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity); // f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity); f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity); // f32 collision_velocity_falloff = 0; particle.velocity = RotateVec2Angle(particle.velocity, collision_angle); particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt); } } } } particle.prev_occluder = occluder; } if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold)) { prune = 1; } if (prune) { done = 1; if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned)) { // particle.stain_accum = max(particle.stain_accum, 1); particle.stain_accum += 1; packed |= 1 << 31; } } if (!collision && particle.origin_occluder != 0xFFFFFFFF) { u32 stain_count = floor(particle.stain_accum); u32 density = 1 + stain_count; u32 commit = packed; if (stain_count > 0) { commit |= (1 << 31); } InterlockedMax(cells[cell_pos], commit); InterlockedAdd(densities[cell_pos], density); particle.stain_accum -= stain_count; } } else { done = 1; prune = 1; } particle.cells_count += 1; } } f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt); // f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt); particle.velocity *= 1.0f - falloff; particle.pos = p0 + (p1 - p0) * t; } particle.life += frame.dt; } if (prune) { particle.kind = V_ParticleKind_None; } particles[particle_idx] = particle; } } } //////////////////////////////////////////////////////////// //~ Shade // TODO: Remove this ImplComputeShader2D(V_ShadeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D tiles = G_Dereference(frame.tiles); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D shade_tex = G_Dereference(frame.shade_rw); RWTexture2D drynesses = G_Dereference(frame.drynesses); Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); P_TileKind tile = tiles[tile_pos]; Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); ////////////////////////////// //- Compute result Vec4 result = 0; ////////////////////////////// //- Write result if (all(shade_pos < countof(shade_tex))) { shade_tex[shade_pos] = result; } } //////////////////////////////////////////////////////////// //~ Composite ImplComputeShader2D(V_CompositeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D screen_tex = G_Dereference(frame.screen_rw); RWTexture2D stains = G_Dereference(frame.stains); RWTexture2D dry_stains = G_Dereference(frame.dry_stains); RWTexture2D drynesses = G_Dereference(frame.drynesses); Texture2D tiles = G_Dereference(frame.tiles); RWStructuredBuffer particles = G_Dereference(frame.particles); Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5; Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); Vec2 shade_pos = mul(frame.af.screen_to_shade, Vec3(screen_pos.xy, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5; Vec2 world_bounds_screen_p0 = mul(frame.af.world_to_screen, Vec3(-half_world_dims.xy, 1)); Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1)); b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims); b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); P_TileKind tile = tiles[tile_pos]; P_TileKind equipped_tile = frame.equipped_tile; ////////////////////////////// //- World color Vec4 world_color = Vec4(0.025, 0.025, 0.025, 1); if (is_in_world) { ////////////////////////////// //- Shade color Vec4 shade_color = 0; // if (all(shade_pos >= Vec2(0, 0)) && all(shade_pos < countof(shade_tex))) // { // Vec2 shade_uv = shade_pos / countof(shade_tex); // shade_color = shade_tex.SampleLevel(sampler, shade_uv, 0); // } ////////////////////////////// //- Tile // TODO: Remove this b32 tile_is_wall = 0; Vec4 tile_color = 0; { P_TileKind tile_tl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y - 0.99)]; P_TileKind tile_tr = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y - 0.99)]; P_TileKind tile_br = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y + 0.99)]; P_TileKind tile_bl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y + 0.99)]; P_TileKind tile_t = tiles[Vec2(tile_pos.x, tile_pos.y - 0.99)]; P_TileKind tile_r = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y)]; P_TileKind tile_b = tiles[Vec2(tile_pos.x, tile_pos.y + 0.99)]; P_TileKind tile_l = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y)]; f32 tile_edge_dist = Inf; P_TileKind edge_tile = tile; if (tile_tl != tile) { edge_tile = tile_tl; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(floor(tile_pos.x), floor(tile_pos.y)))); } if (tile_tr != tile) { edge_tile = tile_tr; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(ceil(tile_pos.x), floor(tile_pos.y)))); } if (tile_br != tile) { edge_tile = tile_br; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(ceil(tile_pos.x), ceil(tile_pos.y)))); } if (tile_bl != tile) { edge_tile = tile_bl; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(floor(tile_pos.x), ceil(tile_pos.y)))); } if (tile_l != tile) { edge_tile = tile_l; tile_edge_dist = min(tile_edge_dist, frac(tile_pos.x)); } if (tile_r != tile) { edge_tile = tile_r; tile_edge_dist = min(tile_edge_dist, 1.0 - frac(tile_pos.x)); } if (tile_t != tile) { edge_tile = tile_t; tile_edge_dist = min(tile_edge_dist, frac(tile_pos.y)); } if (tile_b != tile) { edge_tile = tile_b; tile_edge_dist = min(tile_edge_dist, 1.0 - frac(tile_pos.y)); } if (tile == P_TileKind_Wall) { Vec4 outer = LinearFromSrgb(Vec4(0.05, 0.05, 0.05, 1)); Vec4 inner = LinearFromSrgb(Vec4(0.15, 0.15, 0.15, 1)); tile_color = lerp(outer, inner, smoothstep(0, 1, tile_edge_dist / 0.375)); tile_is_wall = 1; } else if (tile != P_TileKind_Empty) { V_TileDesc tile_desc = frame.tile_descs[tile]; Texture2D tile_tex = G_Dereference(tile_desc.tex); Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001); Vec2 tile_samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t); tile_color = tile_tex.SampleLevel(sampler, tile_samp_uv, 0); } else if (tile == P_TileKind_Empty) { // Checkered grid i32 color_idx = 0; Vec4 colors[2] = { LinearFromSrgb(Vec4(0.30, 0.30, 0.30, 1)), LinearFromSrgb(Vec4(0.15, 0.15, 0.15, 1)) }; Vec2 tile_pos_mod = fmod(abs(tile_pos), Vec2(2, 2)); if (tile_pos_mod.x < 1) { color_idx = !color_idx; } if (tile_pos_mod.y < 1) { color_idx = !color_idx; } tile_color = colors[color_idx]; } } ////////////////////////////// //- Albedo tex Vec4 albedo_tex_color = albedo_tex[screen_pos]; ////////////////////////////// //- Particles // FIXME: Stain Vec4 stain_color = 0; { Vec4 wet_stain = stains[cell_pos]; Vec4 dry_stain = dry_stains[cell_pos]; f32 dryness = drynesses[cell_pos]; stain_color = max(lerp(wet_stain, dry_stain, dryness), 0); } Vec4 ground_particle_color = 0; Vec4 air_particle_color = 0; for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) { RWTexture2D cells = G_Dereference(frame.particle_cells[layer]); RWTexture2D densities = G_Dereference(frame.particle_densities[layer]); u32 packed = cells[cell_pos]; V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); if (particle_kind != V_ParticleKind_None) { u32 density = densities[cell_pos]; V_ParticleDesc desc = V_DescFromParticleKind(particle_kind); u32 particle_idx = packed & ((1 << 24) - 1); Vec4 cell_color = V_ColorFromParticle(desc, particle_idx, density); cell_color.rgb *= cell_color.a; if (layer == V_ParticleLayer_Ground) { ground_particle_color = BlendPremul(cell_color, ground_particle_color); } else { air_particle_color = BlendPremul(cell_color, air_particle_color); } } } // Darken wall particles / stains if (tile == P_TileKind_Wall) { ground_particle_color *= 0.5; air_particle_color *= 0.5; stain_color *= 0.5; } ////////////////////////////// //- Compose world // world_color = BlendPremul(shade_color, world_color); if (!tile_is_wall) { world_color = BlendPremul(tile_color, world_color); // Blend ground tile world_color = BlendPremul(stain_color, world_color); // Blend ground stain world_color = BlendPremul(ground_particle_color, world_color); // Blend ground particle } world_color = BlendPremul(albedo_tex_color, world_color); if (tile_is_wall) { world_color = BlendPremul(tile_color, world_color); // Blend wall tile world_color = BlendPremul(stain_color, world_color); // Blend wall stain world_color = BlendPremul(ground_particle_color, world_color); // Blend wall particle } world_color = BlendPremul(air_particle_color, world_color); // Blend air particle // // world_color = BlendPremul(shade_color, world_color); // world_color = BlendPremul(stain_particle_color, world_color); // world_color = BlendPremul(ground_particle_color, world_color); // if (!tile_is_wall) // { // world_color = BlendPremul(tile_color, world_color); // Blend ground tile // world_color = BlendPremul(stain_particle_color, world_color); // Blend ground stain // world_color = BlendPremul(ground_particle_color, world_color); // Blend ground particle // } // world_color = BlendPremul(albedo_tex_color, world_color); // if (tile_is_wall) // { // world_color = BlendPremul(tile_color, world_color); // Blend wall tile // world_color = BlendPremul(stain_particle_color, world_color); // Blend wall stain // world_color = BlendPremul(ground_particle_color, world_color); // Blend wall particle // } // world_color = BlendPremul(air_particle_color, world_color); } ////////////////////////////// //- Overlay color Vec4 overlay_color = 0; { f32 half_thickness = 1; ////////////////////////////// //- Tile selection overlay Vec4 selection_color = 0; if ( frame.is_editing && frame.edit_mode == V_EditMode_Tile && frame.has_mouse_focus && is_in_world ) { Vec4 border_color = LinearFromSrgb(Vec4(1, 1, 1, 1)); // Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.4, 0.4, 0.25)); Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.8, 0.4, 0.6)); Rng2 screen_selection = frame.screen_selection; Rng2 world_selection = frame.world_selection; Rng2 tile_selection; tile_selection.p0 = floor(mul(frame.af.world_to_tile, Vec3(world_selection.p0, 1))); tile_selection.p1 = ceil(mul(frame.af.world_to_tile, Vec3(world_selection.p1, 1))); tile_selection.p1 = max(tile_selection.p1, tile_selection.p0 + 1); f32 dist = 100000000; dist = min(dist, screen_pos.x - screen_selection.p0.x); dist = min(dist, screen_pos.y - screen_selection.p0.y); dist = min(dist, screen_selection.p1.x - screen_pos.x); dist = min(dist, screen_selection.p1.y - screen_pos.y); dist = -dist; // if (dist >= -half_thickness && dist <= half_thickness) // { // selection_color = border_color; // } // else { if ( tile_pos.x >= tile_selection.p0.x && tile_pos.x <= tile_selection.p1.x && tile_pos.y >= tile_selection.p0.y && tile_pos.y <= tile_selection.p1.y ) { selection_color = inner_color; } } selection_color.rgb *= selection_color.a; } ////////////////////////////// //- Grid Vec4 grid_color = 0; if (is_in_world) { b32 debug_draw = !!frame.show_console; // Grid outline if (frame.show_console) { const Vec4 line_color = LinearFromSrgb(Vec4(1, 1, 1, 0.1)); Vec2 line_screen_p0 = mul(frame.af.world_to_screen, Vec3(floor(world_pos), 1)); Vec2 line_screen_p1 = mul(frame.af.world_to_screen, Vec3(ceil(world_pos), 1)); f32 line_dist = 100000; line_dist = min(line_dist, abs(screen_pos.x - line_screen_p0.x)); line_dist = min(line_dist, abs(screen_pos.x - line_screen_p1.x)); line_dist = min(line_dist, abs(screen_pos.y - line_screen_p0.y)); line_dist = min(line_dist, abs(screen_pos.y - line_screen_p1.y)); if (line_dist <= half_thickness * 0.5) { grid_color = line_color; } } // Axis if (frame.show_console) { const Vec4 x_axis_color = LinearFromSrgb(Vec4(0.75, 0, 0, 1)); const Vec4 y_axis_color = LinearFromSrgb(Vec4(0, 0.75, 0, 1)); Vec2 zero_screen = mul(frame.af.world_to_screen, Vec3(0, 0, 1)); f32 x_dist = abs(screen_pos.x - zero_screen.x); f32 y_dist = abs(screen_pos.y - zero_screen.y); if (y_dist <= half_thickness) { grid_color = x_axis_color; } else if (x_dist <= half_thickness) { grid_color = y_axis_color; } } // World bounds { const Vec4 bounds_color = LinearFromSrgb(Vec4(0.75, 0.75, 0, 1)); f32 bounds_dist = 100000; bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p0.x)); bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p1.x)); bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p0.y)); bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p1.y)); if (bounds_dist <= half_thickness) { grid_color = bounds_color; } } grid_color.rgb *= grid_color.a; } ////////////////////////////// //- Crosshair // TODO: Remove this // TODO: Move to final step after post-processing pass Vec4 crosshair_color = 0; if (!frame.is_editing) { f32 dist = length(frame.screen_crosshair - screen_pos); if (dist < 4) { // Adaptive crosshair color based on underlying luminance f32 world_luminance = LuminanceFromColor(world_color); f32 adaptive_threshold = 0.5; Vec4 adapted_crosshair_color = crosshair_color; if (world_luminance <= adaptive_threshold) { crosshair_color = Color_White; } else { crosshair_color = InvertColor(Color_White); } crosshair_color.rgb *= crosshair_color.a; } } ////////////////////////////// //- Compose overlay overlay_color = BlendPremul(selection_color, overlay_color); overlay_color = BlendPremul(grid_color, overlay_color); overlay_color = BlendPremul(crosshair_color, overlay_color); } ////////////////////////////// //- Compose result Vec4 result = 0; result = BlendPremul(world_color, result); result = BlendPremul(overlay_color, result); result = Unpremul(result); if (is_in_screen) { screen_tex[screen_pos] = result; } } //////////////////////////////////////////////////////////// //~ Bloom ////////////////////////////// //- Downsample ImplComputeShader2D(V_BloomDownCS) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); RWTexture2D bloom_down = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); Texture2D bloom_up; b32 is_first_pass = mip_idx == 1; if (is_first_pass) { bloom_up = G_Dereference(frame.screen_ro); } else { bloom_up = G_Dereference(frame.bloom_mips_ro[mip_idx - 2]); } Vec2 down_dims = countof(bloom_down); Vec2 bloom_pos = SV_DispatchThreadID + 0.5; Vec2 bloom_uv = bloom_pos / down_dims; Vec2 off_uv = 0.5 / down_dims; f32 threshold = 1; f32 knee = 0.75; Vec4 result = 0; { // 5-tap sample Struct(SampleDesc) { Vec2 uv; f32 weight; }; SampleDesc samples[] = { { bloom_uv + Vec2(0, 0), 0.5 }, { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 }, { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 }, { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 }, { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 }, }; for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx) { SampleDesc desc = samples[sample_idx]; Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0); f32 knee_weight = 1; if (is_first_pass) { f32 luminance = LuminanceFromColor(src); f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance f32 bright = max(luminance, (max_rgb - 1.0) * 0.5); if (bright > 0) { f32 over_threshold = max(bright - threshold, 0.0); f32 ramp = saturate(over_threshold / knee); knee_weight = (over_threshold * ramp * ramp) / bright; } else { knee_weight = 0; } } result += src * desc.weight * knee_weight; } } if (IsInside(bloom_pos, down_dims)) { bloom_down[bloom_pos] = result; } } ////////////////////////////// //- Upsample ImplComputeShader2D(V_BloomUpCS) { i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); Texture2D bloom_down = G_Dereference(frame.bloom_mips_ro[mip_idx]); b32 is_last_pass = mip_idx == 0; RWTexture2D bloom_up; if (is_last_pass) { bloom_up = G_Dereference(frame.screen_rw); } else { bloom_up = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); } Vec2 down_dims = countof(bloom_down); Vec2 up_dims = countof(bloom_up); Vec2 bloom_pos = SV_DispatchThreadID + 0.5; Vec2 bloom_uv = bloom_pos / up_dims; Vec2 off_inner_uv = 1 / down_dims; Vec2 off_outer_uv = off_inner_uv * 2; // 13-tap sample Vec4 result = 0; { // Center result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f; // Outer Edges result += ( bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_outer_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, 0), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_outer_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, 0), 0) ) * 3.0f / 41.0f; // Inner corners result += ( bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, -off_inner_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, -off_inner_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, off_inner_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, off_inner_uv.y), 0) ) * 4.0f / 41.0f; // Outer corners result += ( bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, -off_outer_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, -off_outer_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, off_outer_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, off_outer_uv.y), 0) ) * 1.0f / 41.0f; } if (IsInside(bloom_pos, up_dims)) { bloom_up[bloom_pos] += result * 0.75; } } //////////////////////////////////////////////////////////// //~ Finalize ImplComputeShader2D(V_FinalizeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); Texture2D bloom_tex = G_Dereference(frame.bloom_mips_ro[0]); RWTexture2D screen_tex = G_Dereference(frame.screen_rw); Vec2 screen_pos = SV_DispatchThreadID + 0.5; b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); if (is_in_screen) { Vec4 result = screen_tex[screen_pos]; //- Tone map if (frame.should_tone_map) { // ACES approximation by Krzysztof Narkowicz // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f)); } result = Unpremul(result); screen_tex[screen_pos] = result; } } //////////////////////////////////////////////////////////// //~ Debug shapes ////////////////////////////// //- Vertex shader ImplVertexShader(V_DVertVS, V_DVertPSInput) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer verts = G_Dereference(frame.dverts); V_DVert vert = verts[SV_VertexID]; Vec2 screen_pos = vert.pos; V_DVertPSInput result; result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1); result.color_lin = vert.color_lin; return result; } ////////////////////////////// //- Pixel shader ImplPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input) { V_DVertPSOutput output; output.sv_target0 = input.color_lin; return output; }