fix bloom shimmer. use common layouts for vis textures

This commit is contained in:
jacob 2026-02-18 14:21:16 -06:00
parent 88f37a4bbb
commit e9bad68135
10 changed files with 1163 additions and 204 deletions

View File

@ -25,7 +25,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl, gpu_perm, cl,
G_Format_R8G8B8A8_Uint, G_Format_R8G8B8A8_Uint,
VEC2I32(8, 8), VEC2I32(8, 8),
G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, G_Layout_Simultaneous,
.flags = G_ResourceFlag_ZeroMemory .flags = G_ResourceFlag_ZeroMemory
); );
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
@ -44,7 +44,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl, gpu_perm, cl,
G_Format_R16_Uint, G_Format_R16_Uint,
noise_dims, noise_dims,
G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present G_Layout_Simultaneous
); );
G_CopyCpuToTexture( G_CopyCpuToTexture(
cl, cl,
@ -143,30 +143,54 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
//- Mip //- Mip
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip) i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
{ {
mip = ClampI32(mip, 0, 31); mip = ClampI32(mip, -31, 31);
i32 result = 0; i32 result = 0;
result = MaxI32(result >> mip, 1); if (mip >= 0)
{
result = MaxI32(result >> mip, 1);
}
else
{
result = MaxI32(result << -mip, 1);
}
return result; return result;
} }
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip) Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
{ {
mip = ClampI32(mip, 0, 31); mip = ClampI32(mip, -31, 31);
Vec2I32 result = Zi; Vec2I32 result = Zi;
result.x = MaxI32(texture_dims.x >> mip, 1); if (mip >= 0)
result.y = MaxI32(texture_dims.y >> mip, 1); {
result.x = MaxI32(mip0_dims.x >> mip, 1);
result.y = MaxI32(mip0_dims.y >> mip, 1);
}
else
{
result.x = MaxI32(mip0_dims.x << -mip, 1);
result.y = MaxI32(mip0_dims.y << -mip, 1);
}
return result; return result;
} }
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip) Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
{ {
mip = ClampI32(mip, 0, 31); mip = ClampI32(mip, -31, 31);
Vec3I32 result = Zi; Vec3I32 result = Zi;
result.x = MaxI32(texture_dims.x >> mip, 1); if (mip >= 0)
result.y = MaxI32(texture_dims.y >> mip, 1); {
result.z = MaxI32(texture_dims.z >> mip, 1); result.x = MaxI32(mip0_dims.x >> mip, 1);
result.y = MaxI32(mip0_dims.y >> mip, 1);
result.z = MaxI32(mip0_dims.z >> mip, 1);
}
else
{
result.x = MaxI32(mip0_dims.x << -mip, 1);
result.y = MaxI32(mip0_dims.y << -mip, 1);
result.z = MaxI32(mip0_dims.z << -mip, 1);
}
return result; return result;
} }

View File

@ -35,9 +35,9 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ }) G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
//- Mip //- Mip
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip); i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip); Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip); Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);
//- Viewport / scissor //- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture); Rng3 G_ViewportFromTexture(G_ResourceHandle texture);

View File

@ -242,18 +242,16 @@ Enum(G_Access)
G_Access_IndexBuffer = (1 << 8), G_Access_IndexBuffer = (1 << 8),
G_Access_IndirectArgument = (1 << 9), G_Access_IndirectArgument = (1 << 9),
G_Access_All = 0xFFFFFFFF G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage
}; };
Enum(G_Layout) Enum(G_Layout)
{ {
G_Layout_NoChange, G_Layout_NoChange,
// "Simultaneous" allows a resource to be used on any queue with any access // Simultaneous layout allows a resource to be used on any queue with any
// type, as long as there is only one writer at a time, and the writer is not // access type (except depth-stencil). Resources cannot transition to/from
// writing to any texels currently being read. // this layout, they must be created with it.
// Resources cannot transition to/from this layout. They must be created
// with it and are locked to it.
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED

View File

@ -26,7 +26,7 @@
@ComputeShader V_CompositeCS @ComputeShader V_CompositeCS
@ComputeShader V_BloomDownCS @ComputeShader V_BloomDownCS
@ComputeShader V_BloomUpCS @ComputeShader V_BloomUpCS
@ComputeShader V_PostProcessCS @ComputeShader V_FinalizeCS
@VertexShader V_DVertVS @VertexShader V_DVertVS
@PixelShader V_DVertPS @PixelShader V_DVertPS

View File

@ -416,7 +416,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R8_Uint, G_Format_R8_Uint,
tiles_dims, tiles_dims,
G_Layout_DirectQueue_ShaderRead, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory, .flags = G_ResourceFlag_ZeroMemory,
.name = Lit("Tiles") .name = Lit("Tiles")
); );
@ -441,7 +441,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
); );
@ -454,7 +454,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
); );
@ -469,7 +469,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Stains") .name = Lit("Stains")
); );
@ -481,7 +481,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Dry stains") .name = Lit("Dry stains")
); );
@ -493,7 +493,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Float, G_Format_R32_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Drynesses") .name = Lit("Drynesses")
); );
@ -505,7 +505,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Occluders cells") .name = Lit("Occluders cells")
); );
@ -614,6 +614,8 @@ void V_TickForever(WaveLaneCtx *lane)
frame->dt = SecondsFromNs(frame->dt_ns); frame->dt = SecondsFromNs(frame->dt_ns);
frame->rand = prev_frame->rand; frame->rand = prev_frame->rand;
frame->should_tone_map = TweakBool("Tone mapping enabled", 1);
if (P_IsEntKeyNil(V.player_key)) if (P_IsEntKeyNil(V.player_key))
{ {
TrueRand(StringFromStruct(&V.player_key)); TrueRand(StringFromStruct(&V.player_key));
@ -4918,18 +4920,17 @@ void V_TickForever(WaveLaneCtx *lane)
frame->tile_descs[tile_kind] = tile_desc; frame->tile_descs[tile_kind] = tile_desc;
} }
} }
// Upload tiles // Upload tiles
if (frame->tiles_dirty) if (frame->tiles_dirty)
{ {
// LogDebugF("Uploading tiles to gpu"); // LogDebugF("Uploading tiles to gpu");
G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);
G_CopyCpuToTexture( G_CopyCpuToTexture(
frame->cl, frame->cl,
gpu_tiles_res, VEC3I32(0, 0, 0), gpu_tiles_res, VEC3I32(0, 0, 0),
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1), local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1)) RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
); );
G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);
} }
// Screen texture // Screen texture
@ -4937,7 +4938,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl, frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
frame->screen_dims, frame->screen_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
); );
@ -4951,11 +4952,10 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl, frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1), G_DimsFromMip2D(G_Count2D(screen_target), 1),
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
// .max_mips = 4 .max_mips = 64
.max_mips = 8
); );
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{ {
@ -4979,7 +4979,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl, frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
frame->shade_dims, frame->shade_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
); );
@ -5091,6 +5091,9 @@ void V_TickForever(WaveLaneCtx *lane)
// Sync particles & occluders // Sync particles & occluders
G_DumbGlobalMemorySync(frame->cl); G_DumbGlobalMemorySync(frame->cl);
// Transition albedo
G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
} }
////////////////////////////// //////////////////////////////
@ -5113,83 +5116,63 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
} }
//////////////////////////////
//- Transition G-buffers to readonly
{
G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);
G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);
}
////////////////////////////// //////////////////////////////
//- Composite pass //- Composite pass
{ {
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); // Sync screen tex
G_DumbGlobalMemorySync(frame->cl);
} }
////////////////////////////// //////////////////////////////
//- Bloom passes //- Bloom passes
{ {
i32 mips_count = G_CountMips(bloom_target); i32 mips_count = G_CountMips(bloom_target) + 1;
G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);
// NOTE: Because bloom mip chain starts at half screen size, mip_idx 0
// actually represents the screen texture, while mip_idx - 1 represents
// the first mip index in the bloom mip chain
//- Downsample + blur passes //- Downsample + blur passes
for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx) for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
{ {
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
if (mip_idx == 0)
{ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
// Init bloom pyramid from screen target on first pass (prefilter) G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));
gpu_flags |= V_GpuFlag_InitBloom;
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); G_DumbGlobalMemorySync(frame->cl);
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
}
else
{
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
}
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
{
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
}
gpu_flags &= ~V_GpuFlag_InitBloom;
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
} }
//- Upsample passes //- Upsample passes
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
{ {
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]); G_DumbGlobalMemorySync(frame->cl);
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]); }
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
}
} }
////////////////////////////// //////////////////////////////
//- Post process pass //- Finalization pass
{ {
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite); G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
} }
////////////////////////////// //////////////////////////////
//- Debug shapes pass //- Debug shapes pass
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
{ {
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
G_Rasterize( G_Rasterize(
frame->cl, frame->cl,
V_DVertVS, V_DVertPS, V_DVertVS, V_DVertPS,
@ -5198,12 +5181,13 @@ void V_TickForever(WaveLaneCtx *lane)
screen_viewport, screen_scissor, screen_viewport, screen_scissor,
G_RasterMode_TriangleList G_RasterMode_TriangleList
); );
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
} }
////////////////////////////// //////////////////////////////
//- Finalize screen target //- Finalize screen target
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
{ {
Rng2 uv = Zi; Rng2 uv = Zi;
uv.p0 = Vec2FromVec(screen_viewport.p0); uv.p0 = Vec2FromVec(screen_viewport.p0);

View File

@ -53,13 +53,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
return result; return result;
} }
// ACES approximation by Krzysztof Narkowicz
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
Vec3 V_ToneMap(Vec3 v)
{
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Prepare frame //~ Prepare frame
@ -142,11 +135,11 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
} }
else if (over_stain.a > 0) else if (over_stain.a > 0)
{ {
Vec4 stain = dry_stains[cell_pos];
Vec4 dry_stain = max(dry_stains[cell_pos], 0); Vec4 dry_stain = max(dry_stains[cell_pos], 0);
Vec4 stain = dry_stain;
stain = BlendPremul(over_stain, stain);
dry_stain = BlendPremul(over_dry_stain, dry_stain); dry_stain = BlendPremul(over_dry_stain, dry_stain);
stain = BlendPremul(over_stain, stain);
stains[cell_pos] = stain; stains[cell_pos] = stain;
dry_stains[cell_pos] = dry_stain; dry_stains[cell_pos] = dry_stain;
@ -483,7 +476,7 @@ ComputeShader(V_SimParticlesCS, 64)
particle.prev_occluder = occluder; particle.prev_occluder = occluder;
} }
if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001) if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{ {
prune = 1; prune = 1;
} }
@ -723,7 +716,6 @@ ComputeShader2D(V_CompositeCS, 8, 8)
Vec4 ground_particle_color = 0; Vec4 ground_particle_color = 0;
Vec4 air_particle_color = 0; Vec4 air_particle_color = 0;
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{ {
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]); RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
@ -752,9 +744,9 @@ ComputeShader2D(V_CompositeCS, 8, 8)
// Darken wall particles / stains // Darken wall particles / stains
if (tile == P_TileKind_Wall) if (tile == P_TileKind_Wall)
{ {
ground_particle_color *= 0.25; ground_particle_color *= 0.5;
air_particle_color *= 0.25; air_particle_color *= 0.5;
stain_color *= 0.25; stain_color *= 0.5;
} }
////////////////////////////// //////////////////////////////
@ -972,57 +964,74 @@ ComputeShader2D(V_CompositeCS, 8, 8)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Bloom //~ Bloom
//////////////////////////////
//- Downsample
ComputeShader2D(V_BloomDownCS, 8, 8) ComputeShader2D(V_BloomDownCS, 8, 8)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; i32 mips_count = V_GpuConst_MipsCount;
Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead); i32 mip_idx = V_GpuConst_MipIdx;
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
Texture2D<Vec4> bloom_up;
b32 is_first_pass = mip_idx == 1;
if (is_first_pass)
{
bloom_up = G_Dereference<Vec4>(frame.screen_ro);
}
else
{
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
}
Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down); Vec2 down_dims = countof(bloom_down);
Vec2 bloom_pos = SV_DispatchThreadID + 0.5; Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / down_dims; Vec2 bloom_uv = bloom_pos / down_dims;
Vec2 off_uv = 0.5 / down_dims; Vec2 off_uv = 0.5 / down_dims;
b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
Struct(SampleDesc) { Vec2 uv; f32 weight; }; f32 threshold = 0.25;
SampleDesc samples[] = { f32 knee = 0.75;
{ bloom_uv + Vec2(0, 0), 0.5 },
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
};
Vec4 result = 0; Vec4 result = 0;
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{ {
SampleDesc desc = samples[sample_idx]; // 5-tap sample
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0); Struct(SampleDesc) { Vec2 uv; f32 weight; };
SampleDesc samples[] = {
f32 knee_weight = 1; { bloom_uv + Vec2(0, 0), 0.5 },
if (is_first_pass) { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
};
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{ {
f32 luminance = LuminanceFromColor(src); SampleDesc desc = samples[sample_idx];
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
if (bright > 0)
{
f32 threshold = 1.0;
f32 knee = 0.5;
f32 over_threshold = max(bright - threshold, 0.0);
f32 ramp = saturate(over_threshold / knee);
knee_weight = (over_threshold * ramp * ramp) / bright;
}
else
{
knee_weight = 0;
}
}
result += src * desc.weight * knee_weight; f32 knee_weight = 1;
if (is_first_pass)
{
f32 luminance = LuminanceFromColor(src);
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
if (bright > 0)
{
f32 over_threshold = max(bright - threshold, 0.0);
f32 ramp = saturate(over_threshold / knee);
knee_weight = (over_threshold * ramp * ramp) / bright;
}
else
{
knee_weight = 0;
}
}
result += src * desc.weight * knee_weight;
}
} }
if (IsInside(bloom_pos, down_dims)) if (IsInside(bloom_pos, down_dims))
@ -1031,52 +1040,78 @@ ComputeShader2D(V_BloomDownCS, 8, 8)
} }
} }
//////////////////////////////
//- Upsample
ComputeShader2D(V_BloomUpCS, 8, 8) ComputeShader2D(V_BloomUpCS, 8, 8)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; i32 mips_count = V_GpuConst_MipsCount;
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead); i32 mip_idx = V_GpuConst_MipIdx;
RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
b32 is_last_pass = mip_idx == 0;
RWTexture2D<Vec4> bloom_up;
if (is_last_pass)
{
bloom_up = G_Dereference<Vec4>(frame.screen_rw);
}
else
{
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
}
Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down); Vec2 down_dims = countof(bloom_down);
Vec2 up_dims = countof(bloom_up);
Vec2 bloom_pos = SV_DispatchThreadID + 0.5; Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / up_dims; Vec2 bloom_uv = bloom_pos / up_dims;
Vec2 off_uv = 1 / up_dims; Vec2 off_inner_uv = 1 / down_dims;
Vec2 off_outer_uv = off_inner_uv * 2;
// 13-tap sample
Vec4 result = 0; Vec4 result = 0;
{ {
// Center // Center
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4; result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;
// Edges
// Outer Edges
result += ( result += (
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_outer_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, 0), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_outer_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0) bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, 0), 0)
) * 2; ) * 3.0f / 41.0f;
// Corners
// Inner corners
result += ( result += (
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, -off_inner_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, -off_inner_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, off_inner_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0) bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, off_inner_uv.y), 0)
); ) * 4.0f / 41.0f;
// Normalize
result /= 16; // Outer corners
result += (
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, -off_outer_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, -off_outer_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, off_outer_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, off_outer_uv.y), 0)
) * 1.0f / 41.0f;
} }
if (IsInside(bloom_pos, up_dims)) if (IsInside(bloom_pos, up_dims))
{ {
bloom_up[bloom_pos] += result; bloom_up[bloom_pos] += result * 0.75;
} }
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Post process //~ Finalize
ComputeShader2D(V_PostProcessCS, 8, 8) ComputeShader2D(V_FinalizeCS, 8, 8)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
@ -1084,42 +1119,21 @@ ComputeShader2D(V_PostProcessCS, 8, 8)
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw); RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
Vec2 screen_pos = SV_DispatchThreadID + 0.5; Vec2 screen_pos = SV_DispatchThreadID + 0.5;
Vec2 screen_uv = screen_pos / frame.screen_dims;
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
//////////////////////////////
//- Original
Vec4 original = 0;
if (is_in_screen) if (is_in_screen)
{ {
original = screen_tex[screen_pos]; Vec4 result = screen_tex[screen_pos];
original.rgb *= original.a;
}
//- Tone map
if (frame.should_tone_map)
{
// ACES approximation by Krzysztof Narkowicz
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));
}
////////////////////////////// result = Unpremul(result);
//- Bloom
Vec4 bloom = 0;
if (is_in_screen)
{
bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
// bloom.rgb *= bloom.a;
}
//////////////////////////////
//- Compose
Vec4 result = Vec4(0, 0, 0, 1);
result = BlendPremul(original, result);
result += bloom;
// result.rgb = V_ToneMap(result);
result = Unpremul(result);
if (is_in_screen)
{
screen_tex[screen_pos] = result; screen_tex[screen_pos] = result;
} }
} }

View File

@ -46,7 +46,6 @@ Struct(V_DVertPSOutput)
f32 V_RandFromPos(Vec3 pos); f32 V_RandFromPos(Vec3 pos);
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density); Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
Vec3 V_ToneMap(Vec3 v);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shaders //~ Shaders
@ -73,8 +72,8 @@ ComputeShader2D(V_CompositeCS, 8, 8);
ComputeShader2D(V_BloomDownCS, 8, 8); ComputeShader2D(V_BloomDownCS, 8, 8);
ComputeShader2D(V_BloomUpCS, 8, 8); ComputeShader2D(V_BloomUpCS, 8, 8);
//- Post process //- Finalize
ComputeShader2D(V_PostProcessCS, 8, 8); ComputeShader2D(V_FinalizeCS, 8, 8);
//- Debug shapes //- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput); VertexShader(V_DVertVS, V_DVertPSInput);

View File

@ -11,37 +11,42 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
V_ParticleDesc result; V_ParticleDesc result;
{ {
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = { PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = { PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = { PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = { PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = { PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = { PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = { PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor, #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,
V_ParticlesXList(X) V_ParticlesXList(X)
#undef X #undef X
}; };
@ -51,6 +56,7 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
result.stain_rate = stain_rates[kind]; result.stain_rate = stain_rates[kind];
result.pen_rate = pen_rates[kind]; result.pen_rate = pen_rates[kind];
result.lifetime = lifetimes[kind]; result.lifetime = lifetimes[kind];
result.prune_speed_threshold = prune_speed_thresholds[kind];
result.base_color = LinearFromSrgb(base_colors[kind]); result.base_color = LinearFromSrgb(base_colors[kind]);
result.dry_factor = LinearFromSrgb(dry_factor[kind]); result.dry_factor = LinearFromSrgb(dry_factor[kind]);
} }

View File

@ -9,14 +9,13 @@
Enum(V_GpuFlag) Enum(V_GpuFlag)
{ {
V_GpuFlag_None = 0, V_GpuFlag_None = 0,
V_GpuFlag_InitBloom = (1 << 0),
}; };
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0); G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1); G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2); G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3); G_DeclConstant(i32, V_GpuConst_MipsCount, 3);
G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4); G_DeclConstant(i32, V_GpuConst_MipIdx, 4);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Particle types //~ Particle types
@ -29,7 +28,6 @@ G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
Enum(V_ParticleFlag) Enum(V_ParticleFlag)
{ {
V_ParticleFlag_None = 0, V_ParticleFlag_None = 0,
V_ParticleFlag_NoPruneWhenStill = (1 << 0),
V_ParticleFlag_StainWhenPruned = (1 << 1), V_ParticleFlag_StainWhenPruned = (1 << 1),
V_ParticleFlag_NoReflect = (1 << 2), V_ParticleFlag_NoReflect = (1 << 2),
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3), V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),
@ -53,6 +51,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \ /* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 30, 0, \ /* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0, 0, 0, 0), \ /* Base color */ CompVec4(0, 0, 0, 0), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -64,8 +63,9 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \ /* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 100, 0.25, \ /* Stain rate, pen chance */ 100, 0.25, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \ /* Prune speed threshold */ 0.5, \
/* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \ /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \
/* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \
) \ ) \
X( \ X( \
/* Name */ BloodDebris, \ /* Name */ BloodDebris, \
@ -73,6 +73,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \ /* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 30, 0, \ /* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \ /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -82,6 +83,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \ /* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \ /* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -91,6 +93,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \ /* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.1, \
/* Base color */ CompVec4(2, 0.5, 0, 1), \ /* Base color */ CompVec4(2, 0.5, 0, 1), \
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \ /* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \
) \ ) \
@ -102,6 +105,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \ /* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ 0.075, \ /* Lifetime */ 0.075, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \ /* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -111,6 +115,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Air, \ /* Layer */ V_ParticleLayer_Air, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \ /* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -122,6 +127,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \ /* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \ /* Lifetime */ Inf, \
/* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(1, 1, 0, 1), \ /* Base color */ CompVec4(1, 1, 0, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \ /* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \ ) \
@ -168,6 +174,7 @@ Struct(V_ParticleDesc)
f32 stain_rate; f32 stain_rate;
f32 pen_rate; f32 pen_rate;
f32 lifetime; f32 lifetime;
f32 prune_speed_threshold;
Vec4 base_color; Vec4 base_color;
Vec4 dry_factor; Vec4 dry_factor;
}; };
@ -264,6 +271,7 @@ Struct(V_SharedFrame)
b32 tiles_dirty; b32 tiles_dirty;
b32 should_clear_particles; b32 should_clear_particles;
b32 should_tone_map;
b32 is_looking; b32 is_looking;
b32 is_moving; b32 is_moving;

926
tatus Normal file
View File

@ -0,0 +1,926 @@
diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c
index a9686d87..43835793 100644
--- a/src/gpu/gpu_common.c
+++ b/src/gpu/gpu_common.c
@@ -25,7 +25,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl,
G_Format_R8G8B8A8_Uint,
VEC2I32(8, 8),
- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,
+ G_Layout_Simultaneous,
.flags = G_ResourceFlag_ZeroMemory
);
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
@@ -44,7 +44,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl,
G_Format_R16_Uint,
noise_dims,
- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present
+ G_Layout_Simultaneous
);
G_CopyCpuToTexture(
cl,
@@ -143,30 +143,54 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList

//- Mip

-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
i32 result = 0;
- result = MaxI32(result >> mip, 1);
+ if (mip >= 0)
+ {
+ result = MaxI32(result >> mip, 1);
+ }
+ else
+ {
+ result = MaxI32(result << -mip, 1);
+ }
return result;
}

-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
Vec2I32 result = Zi;
- result.x = MaxI32(texture_dims.x >> mip, 1);
- result.y = MaxI32(texture_dims.y >> mip, 1);
+ if (mip >= 0)
+ {
+ result.x = MaxI32(mip0_dims.x >> mip, 1);
+ result.y = MaxI32(mip0_dims.y >> mip, 1);
+ }
+ else
+ {
+ result.x = MaxI32(mip0_dims.x << -mip, 1);
+ result.y = MaxI32(mip0_dims.y << -mip, 1);
+ }
return result;
}

-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
Vec3I32 result = Zi;
- result.x = MaxI32(texture_dims.x >> mip, 1);
- result.y = MaxI32(texture_dims.y >> mip, 1);
- result.z = MaxI32(texture_dims.z >> mip, 1);
+ if (mip >= 0)
+ {
+ result.x = MaxI32(mip0_dims.x >> mip, 1);
+ result.y = MaxI32(mip0_dims.y >> mip, 1);
+ result.z = MaxI32(mip0_dims.z >> mip, 1);
+ }
+ else
+ {
+ result.x = MaxI32(mip0_dims.x << -mip, 1);
+ result.y = MaxI32(mip0_dims.y << -mip, 1);
+ result.z = MaxI32(mip0_dims.z << -mip, 1);
+ }
return result;
}

diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h
index eb3ee6d2..03927040 100644
--- a/src/gpu/gpu_common.h
+++ b/src/gpu/gpu_common.h
@@ -35,9 +35,9 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })

//- Mip
-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);
-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);
-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);

//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h
index 7e1b329a..bed18c93 100644
--- a/src/gpu/gpu_core.h
+++ b/src/gpu/gpu_core.h
@@ -242,18 +242,16 @@ Enum(G_Access)
G_Access_IndexBuffer = (1 << 8),
G_Access_IndirectArgument = (1 << 9),

- G_Access_All = 0xFFFFFFFF
+ G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage
};

Enum(G_Layout)
{
G_Layout_NoChange,

- // "Simultaneous" allows a resource to be used on any queue with any access
- // type, as long as there is only one writer at a time, and the writer is not
- // writing to any texels currently being read.
- // Resources cannot transition to/from this layout. They must be created
- // with it and are locked to it.
+ // Simultaneous layout allows a resource to be used on any queue with any
+ // access type (except depth-stencil). Resources cannot transition to/from
+ // this layout, they must be created with it.
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS

G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED
diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay
index f72dc528..2d916376 100644
--- a/src/pp/pp_vis/pp_vis.lay
+++ b/src/pp/pp_vis/pp_vis.lay
@@ -26,7 +26,7 @@
@ComputeShader V_CompositeCS
@ComputeShader V_BloomDownCS
@ComputeShader V_BloomUpCS
-@ComputeShader V_PostProcessCS
+@ComputeShader V_FinalizeCS
@VertexShader V_DVertVS
@PixelShader V_DVertPS

diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c
index f2f5e6b5..338036ba 100644
--- a/src/pp/pp_vis/pp_vis_core.c
+++ b/src/pp/pp_vis/pp_vis_core.c
@@ -416,7 +416,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R8_Uint,
tiles_dims,
- G_Layout_DirectQueue_ShaderRead,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory,
.name = Lit("Tiles")
);
@@ -441,7 +441,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
);
@@ -454,7 +454,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
);
@@ -469,7 +469,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R16G16B16A16_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Stains")
);
@@ -481,7 +481,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R16G16B16A16_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Dry stains")
);
@@ -493,7 +493,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Drynesses")
);
@@ -505,7 +505,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Occluders cells")
);
@@ -614,6 +614,8 @@ void V_TickForever(WaveLaneCtx *lane)
frame->dt = SecondsFromNs(frame->dt_ns);
frame->rand = prev_frame->rand;

+ frame->should_tone_map = TweakBool("Tone mapping enabled", 1);
+
if (P_IsEntKeyNil(V.player_key))
{
TrueRand(StringFromStruct(&V.player_key));
@@ -4918,18 +4920,17 @@ void V_TickForever(WaveLaneCtx *lane)
frame->tile_descs[tile_kind] = tile_desc;
}
}
+
// Upload tiles
if (frame->tiles_dirty)
{
// LogDebugF("Uploading tiles to gpu");
- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);
G_CopyCpuToTexture(
frame->cl,
gpu_tiles_res, VEC3I32(0, 0, 0),
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
);
- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);
}

// Screen texture
@@ -4937,7 +4938,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
);
@@ -4951,11 +4952,10 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1),
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
- // .max_mips = 4
- .max_mips = 8
+ .max_mips = 64
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
@@ -4979,7 +4979,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->shade_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
@@ -5091,6 +5091,9 @@ void V_TickForever(WaveLaneCtx *lane)

// Sync particles & occluders
G_DumbGlobalMemorySync(frame->cl);
+
+ // Transition albedo
+ G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
}

//////////////////////////////
@@ -5113,83 +5116,63 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
}

- //////////////////////////////
- //- Transition G-buffers to readonly
-
- {
- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);
- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);
- }
-
//////////////////////////////
//- Composite pass

{
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));

- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
+ // Sync screen tex
+ G_DumbGlobalMemorySync(frame->cl);
}

//////////////////////////////
//- Bloom passes

{
- i32 mips_count = G_CountMips(bloom_target);
+ i32 mips_count = G_CountMips(bloom_target) + 1;
+ G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);
+
+ // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0
+ // actually represents the screen texture, while mip_idx - 1 represents
+ // the first mip index in the bloom mip chain

//- Downsample + blur passes
- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
+ for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
{
- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
- if (mip_idx == 0)
- {
- // Init bloom pyramid from screen target on first pass (prefilter)
- gpu_flags |= V_GpuFlag_InitBloom;
- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
- }
- else
- {
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
- }
- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
- {
- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
- }
- gpu_flags &= ~V_GpuFlag_InitBloom;
- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
+ Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
+
+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
+ G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));
+
+ G_DumbGlobalMemorySync(frame->cl);
}

//- Upsample passes
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
{
- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
-
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
+ Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);

- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
+ G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));

- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
- }
+ G_DumbGlobalMemorySync(frame->cl);
+ }
}

//////////////////////////////
- //- Post process pass
+ //- Finalization pass

{
- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
+ G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
}

//////////////////////////////
//- Debug shapes pass

- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
-
{
+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
+
G_Rasterize(
frame->cl,
V_DVertVS, V_DVertPS,
@@ -5198,12 +5181,13 @@ void V_TickForever(WaveLaneCtx *lane)
screen_viewport, screen_scissor,
G_RasterMode_TriangleList
);
+
+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
}

//////////////////////////////
//- Finalize screen target

- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
{
Rng2 uv = Zi;
uv.p0 = Vec2FromVec(screen_viewport.p0);
diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g
index f8a254de..c0a9e47d 100644
--- a/src/pp/pp_vis/pp_vis_gpu.g
+++ b/src/pp/pp_vis/pp_vis_gpu.g
@@ -53,13 +53,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
return result;
}

-// ACES approximation by Krzysztof Narkowicz
-// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
-Vec3 V_ToneMap(Vec3 v)
-{
- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
-}
-
////////////////////////////////////////////////////////////
//~ Prepare frame

@@ -142,11 +135,11 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
}
else if (over_stain.a > 0)
{
- Vec4 stain = dry_stains[cell_pos];
Vec4 dry_stain = max(dry_stains[cell_pos], 0);
+ Vec4 stain = dry_stain;

- stain = BlendPremul(over_stain, stain);
dry_stain = BlendPremul(over_dry_stain, dry_stain);
+ stain = BlendPremul(over_stain, stain);

stains[cell_pos] = stain;
dry_stains[cell_pos] = dry_stain;
@@ -483,7 +476,7 @@ ComputeShader(V_SimParticlesCS, 64)
particle.prev_occluder = occluder;
}

- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)
+ if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{
prune = 1;
}
@@ -723,7 +716,6 @@ ComputeShader2D(V_CompositeCS, 8, 8)
Vec4 ground_particle_color = 0;
Vec4 air_particle_color = 0;

-
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
@@ -752,9 +744,9 @@ ComputeShader2D(V_CompositeCS, 8, 8)
// Darken wall particles / stains
if (tile == P_TileKind_Wall)
{
- ground_particle_color *= 0.25;
- air_particle_color *= 0.25;
- stain_color *= 0.25;
+ ground_particle_color *= 0.5;
+ air_particle_color *= 0.5;
+ stain_color *= 0.5;
}

//////////////////////////////
@@ -972,57 +964,73 @@ ComputeShader2D(V_CompositeCS, 8, 8)
////////////////////////////////////////////////////////////
//~ Bloom

+//////////////////////////////
+//- Downsample
+
ComputeShader2D(V_BloomDownCS, 8, 8)
{
+ i32 mips_count = V_GpuConst_MipsCount;
+ i32 mip_idx = V_GpuConst_MipIdx;
+
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
- Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
- RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
+ RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
+
+ Texture2D<Vec4> bloom_up;
+ b32 is_first_pass = mip_idx == 1;
+ if (is_first_pass)
+ {
+ bloom_up = G_Dereference<Vec4>(frame.screen_ro);
+ }
+ else
+ {
+ bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
+ }

- Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down);

Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / down_dims;
Vec2 off_uv = 0.5 / down_dims;
- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);

- Struct(SampleDesc) { Vec2 uv; f32 weight; };
- SampleDesc samples[] = {
- { bloom_uv + Vec2(0, 0), 0.5 },
- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
- };
+ f32 threshold = 0.25;
+ f32 knee = 0.75;

Vec4 result = 0;
- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{
- SampleDesc desc = samples[sample_idx];
- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
-
- f32 knee_weight = 1;
- if (is_first_pass)
+ Struct(SampleDesc) { Vec2 uv; f32 weight; };
+ SampleDesc samples[] = {
+ { bloom_uv + Vec2(0, 0), 0.5 },
+ { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
+ { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
+ { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
+ { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
+ };
+ for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{
- f32 luminance = LuminanceFromColor(src);
- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
- if (bright > 0)
- {
- f32 threshold = 1.0;
- f32 knee = 0.5;
- f32 over_threshold = max(bright - threshold, 0.0);
- f32 ramp = saturate(over_threshold / knee);
- knee_weight = (over_threshold * ramp * ramp) / bright;
- }
- else
+ SampleDesc desc = samples[sample_idx];
+ Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
+
+ f32 knee_weight = 1;
+ if (is_first_pass)
{
- knee_weight = 0;
+ f32 luminance = LuminanceFromColor(src);
+ f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
+ f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
+ if (bright > 0)
+ {
+ f32 over_threshold = max(bright - threshold, 0.0);
+ f32 ramp = saturate(over_threshold / knee);
+ knee_weight = (over_threshold * ramp * ramp) / bright;
+ }
+ else
+ {
+ knee_weight = 0;
+ }
}
- }

- result += src * desc.weight * knee_weight;
+ result += src * desc.weight * knee_weight;
+ }
}

if (IsInside(bloom_pos, down_dims))
@@ -1031,52 +1039,77 @@ ComputeShader2D(V_BloomDownCS, 8, 8)
}
}

+//////////////////////////////
+//- Upsample
+
ComputeShader2D(V_BloomUpCS, 8, 8)
{
+ i32 mips_count = V_GpuConst_MipsCount;
+ i32 mip_idx = V_GpuConst_MipIdx;
+
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
- Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
- RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
+ Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
+
+ b32 is_last_pass = mip_idx == 0;
+ RWTexture2D<Vec4> bloom_up;
+ if (is_last_pass)
+ {
+ bloom_up = G_Dereference<Vec4>(frame.screen_rw);
+ }
+ else
+ {
+ bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
+ }

- Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down);
+ Vec2 up_dims = countof(bloom_up);

Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / up_dims;
- Vec2 off_uv = 1 / up_dims;
+ Vec2 off_uv0 = 1 / down_dims;
+ Vec2 off_uv1 = off_uv0 * 2;

Vec4 result = 0;
{
// Center
- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
- // Edges
+ result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;
+
+ // Outer Edges
result += (
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
- ) * 2;
- // Corners
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0)
+ ) * 3.0f / 41.0f;
+
+ // Inner corners
+ result += (
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0)
+ ) * 4.0f / 41.0f;
+
+ // Outer corners
result += (
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
- );
- // Normalize
- result /= 16;
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0)
+ ) * 1.0f / 41.0f;
}

if (IsInside(bloom_pos, up_dims))
{
- bloom_up[bloom_pos] += result;
+ bloom_up[bloom_pos] += result * 0.75;
}
}

////////////////////////////////////////////////////////////
-//~ Post process
+//~ Finalize

-ComputeShader2D(V_PostProcessCS, 8, 8)
+ComputeShader2D(V_FinalizeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
@@ -1084,42 +1117,21 @@ ComputeShader2D(V_PostProcessCS, 8, 8)
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);

Vec2 screen_pos = SV_DispatchThreadID + 0.5;
- Vec2 screen_uv = screen_pos / frame.screen_dims;
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
-
- //////////////////////////////
- //- Original
-
- Vec4 original = 0;
if (is_in_screen)
{
- original = screen_tex[screen_pos];
- original.rgb *= original.a;
- }
+ Vec4 result = screen_tex[screen_pos];

+ //- Tone map
+ if (frame.should_tone_map)
+ {
+ // ACES approximation by Krzysztof Narkowicz
+ // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
+ result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));
+ }

- //////////////////////////////
- //- Bloom
-
- Vec4 bloom = 0;
- if (is_in_screen)
- {
- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
- // bloom.rgb *= bloom.a;
- }
-
- //////////////////////////////
- //- Compose
-
- Vec4 result = Vec4(0, 0, 0, 1);
- result = BlendPremul(original, result);
- result += bloom;
- // result.rgb = V_ToneMap(result);
+ result = Unpremul(result);

- result = Unpremul(result);
-
- if (is_in_screen)
- {
screen_tex[screen_pos] = result;
}
}
diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh
index a47a2335..f176f2f8 100644
--- a/src/pp/pp_vis/pp_vis_gpu.gh
+++ b/src/pp/pp_vis/pp_vis_gpu.gh
@@ -46,7 +46,6 @@ Struct(V_DVertPSOutput)

f32 V_RandFromPos(Vec3 pos);
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
-Vec3 V_ToneMap(Vec3 v);

////////////////////////////////////////////////////////////
//~ Shaders
@@ -73,8 +72,8 @@ ComputeShader2D(V_CompositeCS, 8, 8);
ComputeShader2D(V_BloomDownCS, 8, 8);
ComputeShader2D(V_BloomUpCS, 8, 8);

-//- Post process
-ComputeShader2D(V_PostProcessCS, 8, 8);
+//- Finalize
+ComputeShader2D(V_FinalizeCS, 8, 8);

//- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput);
diff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg
index 2419a6f2..72f6ae8d 100644
--- a/src/pp/pp_vis/pp_vis_shared.cg
+++ b/src/pp/pp_vis/pp_vis_shared.cg
@@ -11,37 +11,42 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
V_ParticleDesc result;
{
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,
+ V_ParticlesXList(X)
+ #undef X
+ };
+ PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,
V_ParticlesXList(X)
#undef X
};
@@ -51,6 +56,7 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
result.stain_rate = stain_rates[kind];
result.pen_rate = pen_rates[kind];
result.lifetime = lifetimes[kind];
+ result.prune_speed_threshold = prune_speed_thresholds[kind];
result.base_color = LinearFromSrgb(base_colors[kind]);
result.dry_factor = LinearFromSrgb(dry_factor[kind]);
}
diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh
index 16ca6419..71d88ea5 100644
--- a/src/pp/pp_vis/pp_vis_shared.cgh
+++ b/src/pp/pp_vis/pp_vis_shared.cgh
@@ -9,14 +9,13 @@
Enum(V_GpuFlag)
{
V_GpuFlag_None = 0,
- V_GpuFlag_InitBloom = (1 << 0),
};

G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
-G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
-G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
+G_DeclConstant(i32, V_GpuConst_MipsCount, 3);
+G_DeclConstant(i32, V_GpuConst_MipIdx, 4);

////////////////////////////////////////////////////////////
//~ Particle types
@@ -29,7 +28,6 @@ G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
Enum(V_ParticleFlag)
{
V_ParticleFlag_None = 0,
- V_ParticleFlag_NoPruneWhenStill = (1 << 0),
V_ParticleFlag_StainWhenPruned = (1 << 1),
V_ParticleFlag_NoReflect = (1 << 2),
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),
@@ -53,6 +51,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0, 0, 0, 0), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -64,8 +63,9 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 100, 0.25, \
/* Lifetime */ Inf, \
- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \
- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \
+ /* Prune speed threshold */ 0.5, \
+ /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \
+ /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \
) \
X( \
/* Name */ BloodDebris, \
@@ -73,6 +73,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -82,6 +83,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -91,6 +93,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.1, \
/* Base color */ CompVec4(2, 0.5, 0, 1), \
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \
) \
@@ -102,6 +105,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ 0.075, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -111,6 +115,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Air, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -122,6 +127,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(1, 1, 0, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -168,6 +174,7 @@ Struct(V_ParticleDesc)
f32 stain_rate;
f32 pen_rate;
f32 lifetime;
+ f32 prune_speed_threshold;
Vec4 base_color;
Vec4 dry_factor;
};
@@ -264,6 +271,7 @@ Struct(V_SharedFrame)

b32 tiles_dirty;
b32 should_clear_particles;
+ b32 should_tone_map;

b32 is_looking;
b32 is_moving;