1171 lines
39 KiB
HLSL
1171 lines
39 KiB
HLSL
////////////////////////////////////////////////////////////
|
|
//~ Helpers
|
|
|
|
f32 V_RandFromPos(Vec3 pos)
|
|
{
|
|
Texture3D<u32> noise3d = G_Dereference<u32>(V_GpuConst_NoiseTex);
|
|
// TODO: Compile-time noise dims
|
|
u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)];
|
|
f32 rand = Norm16(noise);
|
|
return rand;
|
|
}
|
|
|
|
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
|
|
{
|
|
Vec4 result = 0;
|
|
u64 seed = MixU64(V_ParticleColorBasis ^ particle_idx);
|
|
f32 rand_color = Norm16(seed >> 0);
|
|
|
|
result = desc.base_color;
|
|
|
|
// Apply density
|
|
{
|
|
if (AnyBit(desc.flags, V_ParticleFlag_GasBlend))
|
|
{
|
|
// f32 t = saturate(density / 10.0);
|
|
// f32 t = smoothstep(-10, 32, density);
|
|
f32 t = smoothstep(-10, 50, density);
|
|
// f32 t = smoothstep(0, 2, (f32)density);
|
|
|
|
|
|
result.a += (1.0 - result.a) * (t);
|
|
}
|
|
else if (desc.kind == V_ParticleKind_BloodTrail || desc.kind == V_ParticleKind_BloodDebris)
|
|
{
|
|
// f32 t = (f32)density / 5;
|
|
// t = pow(t, 2);
|
|
// t = saturate(t);
|
|
// result.rgb *= 1.0 - (t * 0.9);
|
|
|
|
f32 t = (f32)density / 10;
|
|
// t = smoothstep(-10, 10, t);
|
|
// t = smoothstep(-5, 5, t);
|
|
t = smoothstep(-50, 50, t);
|
|
// result.rgb *= 1.0 - (t * 0.9);
|
|
|
|
// result.a = t;
|
|
result.a += (1.0 - result.a) * (t);
|
|
}
|
|
}
|
|
|
|
result.rgb = result.rgb + (rand_color - 0.5) * 0.05;
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Prepare frame
|
|
|
|
ComputeShader2D(V_PrepareShadeCS, 8, 8)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
|
|
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
|
if (all(shade_pos < countof(shade)))
|
|
{
|
|
// Clear shade
|
|
shade[shade_pos] = 0;
|
|
}
|
|
}
|
|
|
|
//- Prepare cells
|
|
ComputeShader2D(V_PrepareCellsCS, 8, 8)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
|
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
|
|
RWTexture2D<Vec4> dry_stains = G_Dereference<Vec4>(frame.dry_stains);
|
|
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
|
|
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
|
|
|
|
Vec2 cell_pos = SV_DispatchThreadID + 0.5;
|
|
if (all(cell_pos < P_WorldCellsDims))
|
|
{
|
|
Vec2 world_pos = mul(frame.af.cell_to_world, Vec3(cell_pos, 1));
|
|
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
|
|
P_TileKind tile = tiles[tile_pos];
|
|
|
|
//- Reset occluders
|
|
|
|
{
|
|
u32 occluder = 0;
|
|
if (tile == P_TileKind_Wall)
|
|
{
|
|
occluder = 0xFFFFFFFF;
|
|
}
|
|
occluders[cell_pos] = occluder;
|
|
}
|
|
|
|
//- Reset particle layers
|
|
|
|
Vec4 over_stain = 0;
|
|
Vec4 over_dry_stain = 0;
|
|
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
|
{
|
|
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
|
|
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[layer]);
|
|
u32 packed = cells[cell_pos];
|
|
if (packed & (1 << 31))
|
|
{
|
|
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
|
|
V_ParticleDesc desc = V_DescFromParticleKind(particle_kind);
|
|
u32 density = densities[cell_pos];
|
|
u32 particle_idx = packed & ((1 << 24) - 1);
|
|
|
|
Vec4 base_color = V_ColorFromParticle(desc, particle_idx, density);
|
|
Vec4 dry_color = base_color * desc.dry_factor;
|
|
|
|
base_color.rgb *= base_color.a;
|
|
dry_color.rgb *= dry_color.a;
|
|
|
|
over_stain = BlendPremul(base_color, over_stain);
|
|
over_dry_stain = BlendPremul(dry_color, over_dry_stain);
|
|
}
|
|
cells[cell_pos] = 0;
|
|
densities[cell_pos] = 0;
|
|
}
|
|
|
|
//- Update stains
|
|
|
|
if (frame.should_clear_particles)
|
|
{
|
|
stains[cell_pos] = 0;
|
|
dry_stains[cell_pos] = 0;
|
|
drynesses[cell_pos] = 0;
|
|
}
|
|
else if (over_stain.a > 0)
|
|
{
|
|
Vec4 dry_stain = max(dry_stains[cell_pos], 0);
|
|
Vec4 stain = dry_stain;
|
|
|
|
dry_stain = BlendPremul(over_dry_stain, dry_stain);
|
|
stain = BlendPremul(over_stain, stain);
|
|
|
|
stains[cell_pos] = stain;
|
|
dry_stains[cell_pos] = dry_stain;
|
|
drynesses[cell_pos] = 0;
|
|
}
|
|
else
|
|
{
|
|
f32 dry_rate = saturate(frame.dt * 0.1);
|
|
|
|
Vec4 before_stain = stains[cell_pos];
|
|
Vec4 before_dry_stain = dry_stains[cell_pos];
|
|
|
|
drynesses[cell_pos] = lerp(drynesses[cell_pos], 1, dry_rate);
|
|
}
|
|
}
|
|
}
|
|
|
|
//- Clear particles
|
|
ComputeShader(V_ClearParticlesCS, 64)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
|
u32 particle_idx = SV_DispatchThreadID;
|
|
if (particle_idx < V_ParticlesCap)
|
|
{
|
|
particles[particle_idx].kind = V_ParticleKind_None;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Quads
|
|
|
|
//////////////////////////////
|
|
//- Vertex shader
|
|
|
|
VertexShader(V_QuadVS, V_QuadPSInput)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
|
|
|
|
V_Quad quad = quads[SV_InstanceID];
|
|
|
|
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
|
|
Vec2 world_pos = mul(quad.quad_uv_to_world_af, Vec3(rect_uv, 1));
|
|
Vec2 screen_pos = mul(frame.af.world_to_screen, Vec3(world_pos, 1));
|
|
|
|
Vec2 samp_uv = lerp(quad.tex_slice_uv.p0, quad.tex_slice_uv.p1, rect_uv);
|
|
|
|
V_QuadPSInput result;
|
|
result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1);
|
|
result.quad_idx = SV_InstanceID;
|
|
result.world_pos = world_pos;
|
|
result.samp_uv = samp_uv;
|
|
return result;
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Pixel shader
|
|
|
|
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
|
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
|
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
|
|
|
|
V_Quad quad = quads[input.quad_idx];
|
|
Texture2D<Vec4> tex = G_Dereference<Vec4>(quad.tex);
|
|
|
|
Vec2 world_pos = input.world_pos;
|
|
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
|
|
|
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
|
|
|
|
Vec4 albedo = tex.Sample(sampler, input.samp_uv);
|
|
|
|
if (is_in_world)
|
|
{
|
|
// TODO: Don't write occluders using screen space result. Do separate draw pass instead.
|
|
if (quad.occluder_id > 0 && albedo.a > 0)
|
|
{
|
|
InterlockedMax(occluders[cell_pos], quad.occluder_id);
|
|
}
|
|
}
|
|
|
|
V_QuadPSOutput output;
|
|
output.sv_target0 = albedo;
|
|
return output;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Particle simulation
|
|
|
|
//////////////////////////////
|
|
//- Particle emitter shader
|
|
|
|
ComputeShader(V_EmitParticlesCS, 64)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
|
|
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
|
|
|
u32 emitter_idx = SV_DispatchThreadID;
|
|
if (emitter_idx < frame.emitters_count)
|
|
{
|
|
V_Emitter emitter = emitters[emitter_idx];
|
|
i32 semantic_particle_kind = V_ParticleKind_None;
|
|
if (emitter.kind > V_ParticleKind_None)
|
|
{
|
|
semantic_particle_kind = (i32)(emitter_idx + 1) * -1;
|
|
}
|
|
for (u32 emitter_particle_idx = 0; emitter_particle_idx < emitter.count; ++emitter_particle_idx)
|
|
{
|
|
u32 particle_idx = (emitter.first_particle_seq + emitter_particle_idx) % (u32)V_ParticlesCap;
|
|
|
|
// InterlockedMin guarantees that the highest emitter index (reflected
|
|
// as negative particle kind) will be used to initialize the particle
|
|
// this frame, in case multiple emitters target the same particle (e.g.
|
|
// more particles pushed this frame than are available in the buffer)
|
|
InterlockedMin(particles[particle_idx].kind, semantic_particle_kind);
|
|
}
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Particle sim shader
|
|
|
|
ComputeShader(V_SimParticlesCS, 64)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
|
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
|
RWTexture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
|
|
|
|
u32 particle_idx = SV_DispatchThreadID;
|
|
if (particle_idx < V_ParticlesCap)
|
|
{
|
|
V_Particle particle = particles[particle_idx];
|
|
b32 prune = 0;
|
|
|
|
//////////////////////////////
|
|
//- Initialize particle
|
|
|
|
if (particle.kind != V_ParticleKind_None)
|
|
{
|
|
u64 seed0 = MixU64(V_ParticleSimBasis ^ particle_idx);
|
|
f32 rand_offset = Norm16(seed0 >> 0);
|
|
f32 rand_angle = Norm16(seed0 >> 16);
|
|
f32 rand_speed = Norm16(seed0 >> 32);
|
|
f32 rand_falloff = Norm16(seed0 >> 48);
|
|
|
|
//////////////////////////////
|
|
//- Init
|
|
|
|
if (particle.kind < 0)
|
|
{
|
|
u32 emitter_idx = -particle.kind - 1;
|
|
V_Emitter emitter = G_Dereference<V_Emitter>(frame.emitters)[emitter_idx];
|
|
|
|
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
|
|
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
|
|
|
|
particle = (V_Particle)0;
|
|
particle.kind = emitter.kind;
|
|
particle.life = 0;
|
|
particle.pos = lerp(emitter.pos.p0, emitter.pos.p1, rand_offset);
|
|
particle.velocity = Vec2(cos(initial_angle), sin(initial_angle)) * initial_speed;
|
|
}
|
|
|
|
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
|
|
{
|
|
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
|
|
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[desc.layer]);
|
|
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[desc.layer]);
|
|
|
|
u32 packed = 0;
|
|
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
|
|
packed |= (particle.kind & 0xFF) << 24;
|
|
StaticAssert(V_ParticlesCap <= (1 << 24)); // particle idx must fit in 24 bits
|
|
StaticAssert(V_ParticleKind_COUNT <= 0x7F); // particle kind must fit in 7 bits
|
|
|
|
if (particle.life == 0)
|
|
{
|
|
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(particle.pos, 1));
|
|
if (IsInside(cell_pos, P_WorldCellsDims))
|
|
{
|
|
u32 occluder = occluders[cell_pos];
|
|
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
|
|
if (!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall))
|
|
{
|
|
particle.origin_occluder = occluders[cell_pos];
|
|
particle.prev_occluder = particle.origin_occluder;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
prune = 1;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Move
|
|
|
|
b32 collision = 0;
|
|
|
|
// TODO: Clip to avoid unnecessary iterations outside of world bounds
|
|
if (!prune)
|
|
{
|
|
Vec2 p0 = particle.pos;
|
|
Vec2 p1 = particle.pos + particle.velocity * frame.dt;
|
|
f32 t = 1;
|
|
{
|
|
Vec2 occluder_p0 = mul(frame.af.world_to_cell, Vec3(p0, 1));
|
|
Vec2 occluder_p1 = mul(frame.af.world_to_cell, Vec3(p1, 1));
|
|
Vec2I32 cell_p0 = floor(occluder_p0);
|
|
Vec2I32 cell_p1 = floor(occluder_p1);
|
|
|
|
Vec2 delta = occluder_p1 - occluder_p0;
|
|
Vec2 inv_delta = 1.0 / delta;
|
|
Vec2 dda_step_dir = Vec2((delta.x > 0) - (delta.x < 0), (delta.y > 0) - (delta.y < 0));
|
|
Vec2 t_delta = abs(inv_delta);
|
|
Vec2 t_max = cell_p0 - occluder_p0;
|
|
t_max.x += dda_step_dir.x > 0;
|
|
t_max.y += dda_step_dir.y > 0;
|
|
t_max *= inv_delta;
|
|
t_max = abs(t_max);
|
|
|
|
Vec2 t_hit = 0;
|
|
|
|
Vec2I32 cell_pos = cell_p0;
|
|
|
|
b32 stepped_x = 0;
|
|
b32 stepped_y = 0;
|
|
|
|
// TODO: Tune this
|
|
u32 max_iterations = 128;
|
|
|
|
b32 done = 0;
|
|
f32 t_diff = 0;
|
|
for (u32 iteration_idx = 0; iteration_idx < max_iterations && !done; ++iteration_idx)
|
|
{
|
|
if (cell_pos.x == cell_p1.x && cell_pos.y == cell_p1.y)
|
|
{
|
|
done = 1;
|
|
}
|
|
else if (t_max.x < t_max.y)
|
|
{
|
|
cell_pos.x += dda_step_dir.x;
|
|
f32 old = t_hit.x;
|
|
t_hit.x = t_max.x - t_delta.x;
|
|
t_diff = t_hit.x - old;
|
|
t_max.x += t_delta.x;
|
|
stepped_x = 1;
|
|
stepped_y = 0;
|
|
}
|
|
else
|
|
{
|
|
cell_pos.y += dda_step_dir.y;
|
|
f32 old = t_hit.y;
|
|
t_hit.y = t_max.y - t_delta.y;
|
|
t_diff = t_hit.y - old;
|
|
t_max.y += t_delta.y;
|
|
stepped_x = 0;
|
|
stepped_y = 1;
|
|
}
|
|
|
|
Vec2 cell_screen_pos_p0 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(floor(cell_pos), 1)), 1));
|
|
Vec2 cell_screen_pos_p1 = mul(frame.af.world_to_screen, Vec3(mul(frame.af.cell_to_world, Vec3(ceil(cell_pos), 1)), 1));
|
|
cell_screen_pos_p1 = max(cell_screen_pos_p1, cell_screen_pos_p0 + 1);
|
|
|
|
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
|
|
b32 is_visible = all(cell_screen_pos_p1 >= 0) && all(cell_screen_pos_p0 < frame.screen_dims);
|
|
|
|
if (is_in_world)
|
|
{
|
|
f32 stain_delta = abs(t_diff) * desc.stain_rate * frame.dt;
|
|
particle.stain_accum += stain_delta;
|
|
|
|
//- Handle collision
|
|
{
|
|
u32 occluder = occluders[cell_pos];
|
|
b32 occluder_is_wall = occluder == 0xFFFFFFFF;
|
|
if (occluder != particle.origin_occluder)
|
|
{
|
|
particle.origin_occluder = 0;
|
|
}
|
|
if (
|
|
occluder != 0 &&
|
|
!(AnyBit(desc.flags, V_ParticleFlag_OnlyCollideWithWalls) && !occluder_is_wall) &&
|
|
occluder != particle.origin_occluder
|
|
)
|
|
{
|
|
u64 collision_seed = MixU64(V_ParticleCellBasis ^ seed0 ^ particle.cells_count);
|
|
f32 rand_collision_angle = Norm16(collision_seed >> 0);
|
|
f32 rand_collision_velocity = Norm16(collision_seed >> 16);
|
|
f32 rand_collision_penetration = Norm16(collision_seed >> 32);
|
|
if (rand_collision_penetration >= desc.pen_rate)
|
|
{
|
|
collision = 1;
|
|
done = 1;
|
|
{
|
|
if (stepped_x)
|
|
{
|
|
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
|
|
{
|
|
particle.velocity.x *= -1;
|
|
}
|
|
t = saturate(t_hit.x);
|
|
}
|
|
else if (stepped_y)
|
|
{
|
|
if (!AnyBit(desc.flags, V_ParticleFlag_NoReflect))
|
|
{
|
|
particle.velocity.y *= -1;
|
|
}
|
|
t = saturate(t_hit.y);
|
|
}
|
|
{
|
|
f32 collision_angle = lerp(-0.05 * Tau, 0.05 * Tau, rand_collision_angle);
|
|
// f32 collision_angle = 0;
|
|
|
|
// f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
|
|
// f32 collision_velocity_falloff = lerp(5000, 10000, rand_collision_velocity);
|
|
// f32 collision_velocity_falloff = lerp(500, 10000, rand_collision_velocity);
|
|
f32 collision_velocity_falloff = lerp(50, 100, rand_collision_velocity);
|
|
// f32 collision_velocity_falloff = 0;
|
|
|
|
particle.velocity = RotateVec2Angle(particle.velocity, collision_angle);
|
|
particle.velocity *= 1.0f - saturate(collision_velocity_falloff * frame.dt);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
particle.prev_occluder = occluder;
|
|
}
|
|
|
|
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
|
|
{
|
|
prune = 1;
|
|
}
|
|
|
|
if (prune)
|
|
{
|
|
done = 1;
|
|
if (AnyBit(desc.flags, V_ParticleFlag_StainWhenPruned))
|
|
{
|
|
// particle.stain_accum = max(particle.stain_accum, 1);
|
|
particle.stain_accum += 1;
|
|
packed |= 1 << 31;
|
|
}
|
|
}
|
|
|
|
if (!collision && particle.origin_occluder != 0xFFFFFFFF)
|
|
{
|
|
u32 stain_count = floor(particle.stain_accum);
|
|
u32 density = 1 + stain_count;
|
|
|
|
u32 commit = packed;
|
|
if (stain_count > 0)
|
|
{
|
|
commit |= (1 << 31);
|
|
}
|
|
|
|
InterlockedMax(cells[cell_pos], commit);
|
|
InterlockedAdd(densities[cell_pos], density);
|
|
particle.stain_accum -= stain_count;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
done = 1;
|
|
prune = 1;
|
|
}
|
|
|
|
particle.cells_count += 1;
|
|
iteration_idx += 1;
|
|
}
|
|
}
|
|
|
|
f32 falloff = saturate(lerp(10, 20, rand_falloff) * frame.dt);
|
|
// f32 falloff = saturate(lerp(1, 2, rand_falloff) * frame.dt);
|
|
particle.velocity *= 1.0f - falloff;
|
|
|
|
particle.pos = p0 + (p1 - p0) * t;
|
|
}
|
|
|
|
particle.life += frame.dt;
|
|
}
|
|
|
|
if (prune)
|
|
{
|
|
particle.kind = V_ParticleKind_None;
|
|
}
|
|
|
|
particles[particle_idx] = particle;
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Shade
|
|
|
|
// TODO: Remove this
|
|
|
|
ComputeShader2D(V_ShadeCS, 8, 8)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
|
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
|
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
|
|
RWTexture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_rw);
|
|
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
|
|
|
|
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
|
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
|
|
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
|
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
|
|
|
|
P_TileKind tile = tiles[tile_pos];
|
|
|
|
Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5;
|
|
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
|
|
|
|
//////////////////////////////
|
|
//- Compute result
|
|
|
|
Vec4 result = 0;
|
|
|
|
//////////////////////////////
|
|
//- Write result
|
|
|
|
if (all(shade_pos < countof(shade_tex)))
|
|
{
|
|
shade_tex[shade_pos] = result;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Composite
|
|
|
|
ComputeShader2D(V_CompositeCS, 8, 8)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
|
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
|
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
|
|
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
|
RWTexture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
|
|
RWTexture2D<Vec4> dry_stains = G_Dereference<Vec4>(frame.dry_stains);
|
|
RWTexture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
|
|
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
|
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
|
|
|
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
|
|
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
|
|
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
|
|
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
|
Vec2 shade_pos = mul(frame.af.screen_to_shade, Vec3(screen_pos.xy, 1));
|
|
|
|
Vec2 half_world_dims = Vec2(P_WorldPitch, P_WorldPitch) * 0.5;
|
|
Vec2 world_bounds_screen_p0 = mul(frame.af.world_to_screen, Vec3(-half_world_dims.xy, 1));
|
|
Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1));
|
|
b32 is_in_world = IsInside(cell_pos, P_WorldCellsDims);
|
|
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
|
|
|
P_TileKind tile = tiles[tile_pos];
|
|
P_TileKind equipped_tile = frame.equipped_tile;
|
|
|
|
//////////////////////////////
|
|
//- World color
|
|
|
|
Vec4 world_color = Vec4(0.025, 0.025, 0.025, 1);
|
|
if (is_in_world)
|
|
{
|
|
//////////////////////////////
|
|
//- Shade color
|
|
|
|
Vec4 shade_color = 0;
|
|
// if (all(shade_pos >= Vec2(0, 0)) && all(shade_pos < countof(shade_tex)))
|
|
// {
|
|
// Vec2 shade_uv = shade_pos / countof(shade_tex);
|
|
// shade_color = shade_tex.SampleLevel(sampler, shade_uv, 0);
|
|
// }
|
|
|
|
//////////////////////////////
|
|
//- Tile
|
|
|
|
// TODO: Remove this
|
|
|
|
b32 tile_is_wall = 0;
|
|
Vec4 tile_color = 0;
|
|
{
|
|
P_TileKind tile_tl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y - 0.99)];
|
|
P_TileKind tile_tr = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y - 0.99)];
|
|
P_TileKind tile_br = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y + 0.99)];
|
|
P_TileKind tile_bl = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y + 0.99)];
|
|
P_TileKind tile_t = tiles[Vec2(tile_pos.x, tile_pos.y - 0.99)];
|
|
P_TileKind tile_r = tiles[Vec2(tile_pos.x + 0.99, tile_pos.y)];
|
|
P_TileKind tile_b = tiles[Vec2(tile_pos.x, tile_pos.y + 0.99)];
|
|
P_TileKind tile_l = tiles[Vec2(tile_pos.x - 0.99, tile_pos.y)];
|
|
|
|
f32 tile_edge_dist = Inf;
|
|
P_TileKind edge_tile = tile;
|
|
if (tile_tl != tile) { edge_tile = tile_tl; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(floor(tile_pos.x), floor(tile_pos.y)))); }
|
|
if (tile_tr != tile) { edge_tile = tile_tr; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(ceil(tile_pos.x), floor(tile_pos.y)))); }
|
|
if (tile_br != tile) { edge_tile = tile_br; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(ceil(tile_pos.x), ceil(tile_pos.y)))); }
|
|
if (tile_bl != tile) { edge_tile = tile_bl; tile_edge_dist = min(tile_edge_dist, length(tile_pos - Vec2(floor(tile_pos.x), ceil(tile_pos.y)))); }
|
|
if (tile_l != tile) { edge_tile = tile_l; tile_edge_dist = min(tile_edge_dist, frac(tile_pos.x)); }
|
|
if (tile_r != tile) { edge_tile = tile_r; tile_edge_dist = min(tile_edge_dist, 1.0 - frac(tile_pos.x)); }
|
|
if (tile_t != tile) { edge_tile = tile_t; tile_edge_dist = min(tile_edge_dist, frac(tile_pos.y)); }
|
|
if (tile_b != tile) { edge_tile = tile_b; tile_edge_dist = min(tile_edge_dist, 1.0 - frac(tile_pos.y)); }
|
|
|
|
if (tile == P_TileKind_Wall)
|
|
{
|
|
Vec4 outer = LinearFromSrgb(Vec4(0.05, 0.05, 0.05, 1));
|
|
Vec4 inner = LinearFromSrgb(Vec4(0.15, 0.15, 0.15, 1));
|
|
tile_color = lerp(outer, inner, smoothstep(0, 1, tile_edge_dist / 0.375));
|
|
tile_is_wall = 1;
|
|
}
|
|
else if (tile != P_TileKind_Empty)
|
|
{
|
|
V_TileDesc tile_desc = frame.tile_descs[tile];
|
|
Texture2D<Vec4> tile_tex = G_Dereference<Vec4>(tile_desc.tex);
|
|
Vec2 tile_samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, frac(world_pos));
|
|
tile_color = tile_tex.SampleLevel(sampler, tile_samp_uv, 0);
|
|
}
|
|
// Checkered grid
|
|
else if (tile == P_TileKind_Empty)
|
|
{
|
|
i32 color_idx = 0;
|
|
Vec4 colors[2] = {
|
|
LinearFromSrgb(Vec4(0.30, 0.30, 0.30, 1)),
|
|
LinearFromSrgb(Vec4(0.15, 0.15, 0.15, 1))
|
|
};
|
|
const f32 checker_size = 0.5;
|
|
Vec2 world_pos_modded = fmod(abs(world_pos), Vec2(checker_size * 2, checker_size * 2));
|
|
if (world_pos_modded.x < checker_size)
|
|
{
|
|
color_idx = !color_idx;
|
|
}
|
|
if (world_pos_modded.y < checker_size)
|
|
{
|
|
color_idx = !color_idx;
|
|
}
|
|
if (world_pos.x < 0)
|
|
{
|
|
color_idx = !color_idx;
|
|
}
|
|
if (world_pos.y < 0)
|
|
{
|
|
color_idx = !color_idx;
|
|
}
|
|
tile_color = colors[color_idx];
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Albedo tex
|
|
|
|
Vec4 albedo_tex_color = albedo_tex[screen_pos];
|
|
|
|
//////////////////////////////
|
|
//- Particles
|
|
|
|
// FIXME: Stain
|
|
Vec4 stain_color = 0;
|
|
{
|
|
Vec4 wet_stain = stains[cell_pos];
|
|
Vec4 dry_stain = dry_stains[cell_pos];
|
|
f32 dryness = drynesses[cell_pos];
|
|
stain_color = max(lerp(wet_stain, dry_stain, dryness), 0);
|
|
}
|
|
|
|
Vec4 ground_particle_color = 0;
|
|
Vec4 air_particle_color = 0;
|
|
|
|
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
|
{
|
|
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
|
|
RWTexture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[layer]);
|
|
u32 packed = cells[cell_pos];
|
|
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
|
|
if (particle_kind != V_ParticleKind_None)
|
|
{
|
|
u32 density = densities[cell_pos];
|
|
V_ParticleDesc desc = V_DescFromParticleKind(particle_kind);
|
|
u32 particle_idx = packed & ((1 << 24) - 1);
|
|
Vec4 cell_color = V_ColorFromParticle(desc, particle_idx, density);
|
|
cell_color.rgb *= cell_color.a;
|
|
|
|
if (layer == V_ParticleLayer_Ground)
|
|
{
|
|
ground_particle_color = BlendPremul(cell_color, ground_particle_color);
|
|
}
|
|
else
|
|
{
|
|
air_particle_color = BlendPremul(cell_color, air_particle_color);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Darken wall particles / stains
|
|
if (tile == P_TileKind_Wall)
|
|
{
|
|
ground_particle_color *= 0.5;
|
|
air_particle_color *= 0.5;
|
|
stain_color *= 0.5;
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Compose world
|
|
|
|
|
|
|
|
// world_color = BlendPremul(shade_color, world_color);
|
|
if (!tile_is_wall)
|
|
{
|
|
world_color = BlendPremul(tile_color, world_color); // Blend ground tile
|
|
world_color = BlendPremul(stain_color, world_color); // Blend ground stain
|
|
world_color = BlendPremul(ground_particle_color, world_color); // Blend ground particle
|
|
}
|
|
world_color = BlendPremul(albedo_tex_color, world_color);
|
|
if (tile_is_wall)
|
|
{
|
|
world_color = BlendPremul(tile_color, world_color); // Blend wall tile
|
|
world_color = BlendPremul(stain_color, world_color); // Blend wall stain
|
|
world_color = BlendPremul(ground_particle_color, world_color); // Blend wall particle
|
|
}
|
|
world_color = BlendPremul(air_particle_color, world_color); // Blend air particle
|
|
|
|
|
|
|
|
// // world_color = BlendPremul(shade_color, world_color);
|
|
// world_color = BlendPremul(stain_particle_color, world_color);
|
|
// world_color = BlendPremul(ground_particle_color, world_color);
|
|
// if (!tile_is_wall)
|
|
// {
|
|
// world_color = BlendPremul(tile_color, world_color); // Blend ground tile
|
|
// world_color = BlendPremul(stain_particle_color, world_color); // Blend ground stain
|
|
// world_color = BlendPremul(ground_particle_color, world_color); // Blend ground particle
|
|
// }
|
|
// world_color = BlendPremul(albedo_tex_color, world_color);
|
|
// if (tile_is_wall)
|
|
// {
|
|
// world_color = BlendPremul(tile_color, world_color); // Blend wall tile
|
|
// world_color = BlendPremul(stain_particle_color, world_color); // Blend wall stain
|
|
// world_color = BlendPremul(ground_particle_color, world_color); // Blend wall particle
|
|
// }
|
|
// world_color = BlendPremul(air_particle_color, world_color);
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Overlay color
|
|
|
|
Vec4 overlay_color = 0;
|
|
{
|
|
f32 half_thickness = 1;
|
|
|
|
//////////////////////////////
|
|
//- Tile selection overlay
|
|
|
|
Vec4 selection_color = 0;
|
|
if (
|
|
frame.is_editing &&
|
|
frame.edit_mode == V_EditMode_Tile &&
|
|
frame.has_mouse_focus &&
|
|
is_in_world
|
|
)
|
|
{
|
|
Vec4 border_color = LinearFromSrgb(Vec4(1, 1, 1, 1));
|
|
// Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.4, 0.4, 0.25));
|
|
Vec4 inner_color = LinearFromSrgb(Vec4(0.4, 0.8, 0.4, 0.6));
|
|
|
|
Rng2 screen_selection = frame.screen_selection;
|
|
Rng2 world_selection = frame.world_selection;
|
|
|
|
Rng2 tile_selection;
|
|
tile_selection.p0 = floor(mul(frame.af.world_to_tile, Vec3(world_selection.p0, 1)));
|
|
tile_selection.p1 = ceil(mul(frame.af.world_to_tile, Vec3(world_selection.p1, 1)));
|
|
tile_selection.p1 = max(tile_selection.p1, tile_selection.p0 + 1);
|
|
|
|
f32 dist = 100000000;
|
|
dist = min(dist, screen_pos.x - screen_selection.p0.x);
|
|
dist = min(dist, screen_pos.y - screen_selection.p0.y);
|
|
dist = min(dist, screen_selection.p1.x - screen_pos.x);
|
|
dist = min(dist, screen_selection.p1.y - screen_pos.y);
|
|
dist = -dist;
|
|
|
|
// if (dist >= -half_thickness && dist <= half_thickness)
|
|
// {
|
|
// selection_color = border_color;
|
|
// }
|
|
// else
|
|
{
|
|
if (
|
|
tile_pos.x >= tile_selection.p0.x &&
|
|
tile_pos.x <= tile_selection.p1.x &&
|
|
tile_pos.y >= tile_selection.p0.y &&
|
|
tile_pos.y <= tile_selection.p1.y
|
|
)
|
|
{
|
|
selection_color = inner_color;
|
|
}
|
|
}
|
|
|
|
selection_color.rgb *= selection_color.a;
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Grid
|
|
|
|
Vec4 grid_color = 0;
|
|
if (is_in_world)
|
|
{
|
|
b32 debug_draw = !!frame.show_console;
|
|
|
|
// Grid outline
|
|
if (frame.show_console)
|
|
{
|
|
const Vec4 line_color = LinearFromSrgb(Vec4(1, 1, 1, 0.1));
|
|
Vec2 line_screen_p0 = mul(frame.af.world_to_screen, Vec3(floor(world_pos), 1));
|
|
Vec2 line_screen_p1 = mul(frame.af.world_to_screen, Vec3(ceil(world_pos), 1));
|
|
f32 line_dist = 100000;
|
|
line_dist = min(line_dist, abs(screen_pos.x - line_screen_p0.x));
|
|
line_dist = min(line_dist, abs(screen_pos.x - line_screen_p1.x));
|
|
line_dist = min(line_dist, abs(screen_pos.y - line_screen_p0.y));
|
|
line_dist = min(line_dist, abs(screen_pos.y - line_screen_p1.y));
|
|
if (line_dist <= half_thickness * 0.5)
|
|
{
|
|
grid_color = line_color;
|
|
}
|
|
}
|
|
|
|
// Axis
|
|
if (frame.show_console)
|
|
{
|
|
const Vec4 x_axis_color = LinearFromSrgb(Vec4(0.75, 0, 0, 1));
|
|
const Vec4 y_axis_color = LinearFromSrgb(Vec4(0, 0.75, 0, 1));
|
|
|
|
Vec2 zero_screen = mul(frame.af.world_to_screen, Vec3(0, 0, 1));
|
|
f32 x_dist = abs(screen_pos.x - zero_screen.x);
|
|
f32 y_dist = abs(screen_pos.y - zero_screen.y);
|
|
if (y_dist <= half_thickness)
|
|
{
|
|
grid_color = x_axis_color;
|
|
}
|
|
else if (x_dist <= half_thickness)
|
|
{
|
|
grid_color = y_axis_color;
|
|
}
|
|
}
|
|
|
|
// World bounds
|
|
{
|
|
const Vec4 bounds_color = LinearFromSrgb(Vec4(0.75, 0.75, 0, 1));
|
|
f32 bounds_dist = 100000;
|
|
bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p0.x));
|
|
bounds_dist = min(bounds_dist, abs(screen_pos.x - world_bounds_screen_p1.x));
|
|
bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p0.y));
|
|
bounds_dist = min(bounds_dist, abs(screen_pos.y - world_bounds_screen_p1.y));
|
|
if (bounds_dist <= half_thickness)
|
|
{
|
|
grid_color = bounds_color;
|
|
}
|
|
}
|
|
|
|
grid_color.rgb *= grid_color.a;
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Crosshair
|
|
|
|
// TODO: Remove this
|
|
// TODO: Move to final step after post-processing pass
|
|
|
|
Vec4 crosshair_color = 0;
|
|
if (!frame.is_editing)
|
|
{
|
|
f32 dist = length(frame.screen_crosshair - screen_pos);
|
|
if (dist < 4)
|
|
{
|
|
// Adaptive crosshair color based on underlying luminance
|
|
f32 world_luminance = LuminanceFromColor(world_color);
|
|
f32 adaptive_threshold = 0.5;
|
|
Vec4 adapted_crosshair_color = crosshair_color;
|
|
if (world_luminance <= adaptive_threshold)
|
|
{
|
|
crosshair_color = Color_White;
|
|
}
|
|
else
|
|
{
|
|
crosshair_color = InvertColor(Color_White);
|
|
}
|
|
|
|
crosshair_color.rgb *= crosshair_color.a;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Compose overlay
|
|
|
|
overlay_color = BlendPremul(selection_color, overlay_color);
|
|
overlay_color = BlendPremul(grid_color, overlay_color);
|
|
overlay_color = BlendPremul(crosshair_color, overlay_color);
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Compose result
|
|
|
|
Vec4 result = 0;
|
|
result = BlendPremul(world_color, result);
|
|
result = BlendPremul(overlay_color, result);
|
|
|
|
result = Unpremul(result);
|
|
|
|
if (is_in_screen)
|
|
{
|
|
screen_tex[screen_pos] = result;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Bloom
|
|
|
|
//////////////////////////////
|
|
//- Downsample
|
|
|
|
ComputeShader2D(V_BloomDownCS, 8, 8)
|
|
{
|
|
i32 mips_count = V_GpuConst_MipsCount;
|
|
i32 mip_idx = V_GpuConst_MipIdx;
|
|
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
|
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
|
|
|
|
Texture2D<Vec4> bloom_up;
|
|
b32 is_first_pass = mip_idx == 1;
|
|
if (is_first_pass)
|
|
{
|
|
bloom_up = G_Dereference<Vec4>(frame.screen_ro);
|
|
}
|
|
else
|
|
{
|
|
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
|
|
}
|
|
|
|
Vec2 down_dims = countof(bloom_down);
|
|
|
|
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
|
Vec2 bloom_uv = bloom_pos / down_dims;
|
|
Vec2 off_uv = 0.5 / down_dims;
|
|
|
|
f32 threshold = 0.25;
|
|
f32 knee = 0.75;
|
|
|
|
Vec4 result = 0;
|
|
{
|
|
// 5-tap sample
|
|
Struct(SampleDesc) { Vec2 uv; f32 weight; };
|
|
SampleDesc samples[] = {
|
|
{ bloom_uv + Vec2(0, 0), 0.5 },
|
|
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
|
|
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
|
|
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
|
|
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
|
|
};
|
|
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
|
|
{
|
|
SampleDesc desc = samples[sample_idx];
|
|
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
|
|
|
|
f32 knee_weight = 1;
|
|
if (is_first_pass)
|
|
{
|
|
f32 luminance = LuminanceFromColor(src);
|
|
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
|
|
f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
|
|
if (bright > 0)
|
|
{
|
|
f32 over_threshold = max(bright - threshold, 0.0);
|
|
f32 ramp = saturate(over_threshold / knee);
|
|
knee_weight = (over_threshold * ramp * ramp) / bright;
|
|
}
|
|
else
|
|
{
|
|
knee_weight = 0;
|
|
}
|
|
}
|
|
|
|
result += src * desc.weight * knee_weight;
|
|
}
|
|
}
|
|
|
|
if (IsInside(bloom_pos, down_dims))
|
|
{
|
|
bloom_down[bloom_pos] = result;
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Upsample
|
|
|
|
ComputeShader2D(V_BloomUpCS, 8, 8)
|
|
{
|
|
i32 mips_count = V_GpuConst_MipsCount;
|
|
i32 mip_idx = V_GpuConst_MipIdx;
|
|
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
|
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
|
|
|
|
b32 is_last_pass = mip_idx == 0;
|
|
RWTexture2D<Vec4> bloom_up;
|
|
if (is_last_pass)
|
|
{
|
|
bloom_up = G_Dereference<Vec4>(frame.screen_rw);
|
|
}
|
|
else
|
|
{
|
|
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
|
|
}
|
|
|
|
Vec2 down_dims = countof(bloom_down);
|
|
Vec2 up_dims = countof(bloom_up);
|
|
|
|
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
|
Vec2 bloom_uv = bloom_pos / up_dims;
|
|
Vec2 off_inner_uv = 1 / down_dims;
|
|
Vec2 off_outer_uv = off_inner_uv * 2;
|
|
|
|
// 13-tap sample
|
|
Vec4 result = 0;
|
|
{
|
|
// Center
|
|
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;
|
|
|
|
// Outer Edges
|
|
result += (
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_outer_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, 0), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_outer_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, 0), 0)
|
|
) * 3.0f / 41.0f;
|
|
|
|
// Inner corners
|
|
result += (
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, -off_inner_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, -off_inner_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, off_inner_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, off_inner_uv.y), 0)
|
|
) * 4.0f / 41.0f;
|
|
|
|
// Outer corners
|
|
result += (
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, -off_outer_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, -off_outer_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, off_outer_uv.y), 0) +
|
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, off_outer_uv.y), 0)
|
|
) * 1.0f / 41.0f;
|
|
}
|
|
|
|
if (IsInside(bloom_pos, up_dims))
|
|
{
|
|
bloom_up[bloom_pos] += result * 0.75;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Finalize
|
|
|
|
ComputeShader2D(V_FinalizeCS, 8, 8)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
|
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips_ro[0]);
|
|
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
|
|
|
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
|
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
|
if (is_in_screen)
|
|
{
|
|
Vec4 result = screen_tex[screen_pos];
|
|
|
|
//- Tone map
|
|
if (frame.should_tone_map)
|
|
{
|
|
// ACES approximation by Krzysztof Narkowicz
|
|
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
|
|
result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));
|
|
}
|
|
|
|
result = Unpremul(result);
|
|
|
|
screen_tex[screen_pos] = result;
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Debug shapes
|
|
|
|
//////////////////////////////
|
|
//- Vertex shader
|
|
|
|
VertexShader(V_DVertVS, V_DVertPSInput)
|
|
{
|
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
|
StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts);
|
|
|
|
V_DVert vert = verts[SV_VertexID];
|
|
|
|
Vec2 screen_pos = vert.pos;
|
|
|
|
V_DVertPSInput result;
|
|
result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1);
|
|
result.color_lin = vert.color_lin;
|
|
return result;
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Pixel shader
|
|
|
|
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input)
|
|
{
|
|
V_DVertPSOutput output;
|
|
output.sv_target0 = input.color_lin;
|
|
return output;
|
|
}
|