power_play/tatus

48 KiB

diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c
index a9686d87..43835793 100644
--- a/src/gpu/gpu_common.c
+++ b/src/gpu/gpu_common.c
@@ -25,7 +25,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl,
G_Format_R8G8B8A8_Uint,
VEC2I32(8, 8),
- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,
+ G_Layout_Simultaneous,
.flags = G_ResourceFlag_ZeroMemory
);
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
@@ -44,7 +44,7 @@ void G_BootstrapCommon(void)
gpu_perm, cl,
G_Format_R16_Uint,
noise_dims,
- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present
+ G_Layout_Simultaneous
);
G_CopyCpuToTexture(
cl,
@@ -143,30 +143,54 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
 
//- Mip
 
-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
i32 result = 0;
- result = MaxI32(result >> mip, 1);
+ if (mip >= 0)
+ {
+ result = MaxI32(result >> mip, 1);
+ }
+ else
+ {
+ result = MaxI32(result << -mip, 1);
+ }
return result;
}
 
-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
Vec2I32 result = Zi;
- result.x = MaxI32(texture_dims.x >> mip, 1);
- result.y = MaxI32(texture_dims.y >> mip, 1);
+ if (mip >= 0)
+ {
+ result.x = MaxI32(mip0_dims.x >> mip, 1);
+ result.y = MaxI32(mip0_dims.y >> mip, 1);
+ }
+ else
+ {
+ result.x = MaxI32(mip0_dims.x << -mip, 1);
+ result.y = MaxI32(mip0_dims.y << -mip, 1);
+ }
return result;
}
 
-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
{
- mip = ClampI32(mip, 0, 31);
+ mip = ClampI32(mip, -31, 31);
Vec3I32 result = Zi;
- result.x = MaxI32(texture_dims.x >> mip, 1);
- result.y = MaxI32(texture_dims.y >> mip, 1);
- result.z = MaxI32(texture_dims.z >> mip, 1);
+ if (mip >= 0)
+ {
+ result.x = MaxI32(mip0_dims.x >> mip, 1);
+ result.y = MaxI32(mip0_dims.y >> mip, 1);
+ result.z = MaxI32(mip0_dims.z >> mip, 1);
+ }
+ else
+ {
+ result.x = MaxI32(mip0_dims.x << -mip, 1);
+ result.y = MaxI32(mip0_dims.y << -mip, 1);
+ result.z = MaxI32(mip0_dims.z << -mip, 1);
+ }
return result;
}
 
diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h
index eb3ee6d2..03927040 100644
--- a/src/gpu/gpu_common.h
+++ b/src/gpu/gpu_common.h
@@ -35,9 +35,9 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
 
//- Mip
-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);
-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);
-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);
 
//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h
index 7e1b329a..bed18c93 100644
--- a/src/gpu/gpu_core.h
+++ b/src/gpu/gpu_core.h
@@ -242,18 +242,16 @@ Enum(G_Access)
G_Access_IndexBuffer = (1 << 8),
G_Access_IndirectArgument = (1 << 9),
 
- G_Access_All = 0xFFFFFFFF
+ G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage
};
 
Enum(G_Layout)
{
G_Layout_NoChange,
 
- // "Simultaneous" allows a resource to be used on any queue with any access
- // type, as long as there is only one writer at a time, and the writer is not
- // writing to any texels currently being read.
- // Resources cannot transition to/from this layout. They must be created
- // with it and are locked to it.
+ // Simultaneous layout allows a resource to be used on any queue with any
+ // access type (except depth-stencil). Resources cannot transition to/from
+ // this layout, they must be created with it.
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
 
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED
diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay
index f72dc528..2d916376 100644
--- a/src/pp/pp_vis/pp_vis.lay
+++ b/src/pp/pp_vis/pp_vis.lay
@@ -26,7 +26,7 @@
@ComputeShader V_CompositeCS
@ComputeShader V_BloomDownCS
@ComputeShader V_BloomUpCS
-@ComputeShader V_PostProcessCS
+@ComputeShader V_FinalizeCS
@VertexShader V_DVertVS
@PixelShader V_DVertPS
 
diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c
index f2f5e6b5..338036ba 100644
--- a/src/pp/pp_vis/pp_vis_core.c
+++ b/src/pp/pp_vis/pp_vis_core.c
@@ -416,7 +416,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R8_Uint,
tiles_dims,
- G_Layout_DirectQueue_ShaderRead,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory,
.name = Lit("Tiles")
);
@@ -441,7 +441,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
);
@@ -454,7 +454,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
);
@@ -469,7 +469,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R16G16B16A16_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Stains")
);
@@ -481,7 +481,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R16G16B16A16_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Dry stains")
);
@@ -493,7 +493,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Float,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Drynesses")
);
@@ -505,7 +505,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl,
G_Format_R32_Uint,
cells_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Occluders cells")
);
@@ -614,6 +614,8 @@ void V_TickForever(WaveLaneCtx *lane)
frame->dt = SecondsFromNs(frame->dt_ns);
frame->rand = prev_frame->rand;
 
+ frame->should_tone_map = TweakBool("Tone mapping enabled", 1);
+
if (P_IsEntKeyNil(V.player_key))
{
TrueRand(StringFromStruct(&V.player_key));
@@ -4918,18 +4920,17 @@ void V_TickForever(WaveLaneCtx *lane)
frame->tile_descs[tile_kind] = tile_desc;
}
}
+
// Upload tiles
if (frame->tiles_dirty)
{
// LogDebugF("Uploading tiles to gpu");
- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);
G_CopyCpuToTexture(
frame->cl,
gpu_tiles_res, VEC3I32(0, 0, 0),
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
);
- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);
}
 
// Screen texture
@@ -4937,7 +4938,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
);
@@ -4951,11 +4952,10 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1),
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
- // .max_mips = 4
- .max_mips = 8
+ .max_mips = 64
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
@@ -4979,7 +4979,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->shade_dims,
- G_Layout_DirectQueue_ShaderReadWrite,
+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
@@ -5091,6 +5091,9 @@ void V_TickForever(WaveLaneCtx *lane)
 
// Sync particles & occluders
G_DumbGlobalMemorySync(frame->cl);
+
+ // Transition albedo
+ G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
}
 
//////////////////////////////
@@ -5113,83 +5116,63 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
}
 
- //////////////////////////////
- //- Transition G-buffers to readonly
-
- {
- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);
- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);
- }
-
//////////////////////////////
//- Composite pass
 
{
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
 
- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
+ // Sync screen tex
+ G_DumbGlobalMemorySync(frame->cl);
}
 
//////////////////////////////
//- Bloom passes
 
{
- i32 mips_count = G_CountMips(bloom_target);
+ i32 mips_count = G_CountMips(bloom_target) + 1;
+ G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);
+
+ // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0
+ // actually represents the screen texture, while mip_idx - 1 represents
+ // the first mip index in the bloom mip chain
 
//- Downsample + blur passes
- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
+ for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
{
- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
- if (mip_idx == 0)
- {
- // Init bloom pyramid from screen target on first pass (prefilter)
- gpu_flags |= V_GpuFlag_InitBloom;
- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
- }
- else
- {
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
- }
- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
- {
- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
- }
- gpu_flags &= ~V_GpuFlag_InitBloom;
- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
+ Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
+
+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
+ G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));
+
+ G_DumbGlobalMemorySync(frame->cl);
}
 
//- Upsample passes
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
{
- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
-
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
+ Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
 
- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
+ G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));
 
- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
- }
+ G_DumbGlobalMemorySync(frame->cl);
+ }
}
 
//////////////////////////////
- //- Post process pass
+ //- Finalization pass
 
{
- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
+ G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
}
 
//////////////////////////////
//- Debug shapes pass
 
- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
-
{
+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
+
G_Rasterize(
frame->cl,
V_DVertVS, V_DVertPS,
@@ -5198,12 +5181,13 @@ void V_TickForever(WaveLaneCtx *lane)
screen_viewport, screen_scissor,
G_RasterMode_TriangleList
);
+
+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
}
 
//////////////////////////////
//- Finalize screen target
 
- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
{
Rng2 uv = Zi;
uv.p0 = Vec2FromVec(screen_viewport.p0);
diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g
index f8a254de..c0a9e47d 100644
--- a/src/pp/pp_vis/pp_vis_gpu.g
+++ b/src/pp/pp_vis/pp_vis_gpu.g
@@ -53,13 +53,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
return result;
}
 
-// ACES approximation by Krzysztof Narkowicz
-// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
-Vec3 V_ToneMap(Vec3 v)
-{
- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
-}
-
////////////////////////////////////////////////////////////
//~ Prepare frame
 
@@ -142,11 +135,11 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
}
else if (over_stain.a > 0)
{
- Vec4 stain = dry_stains[cell_pos];
Vec4 dry_stain = max(dry_stains[cell_pos], 0);
+ Vec4 stain = dry_stain;
 
- stain = BlendPremul(over_stain, stain);
dry_stain = BlendPremul(over_dry_stain, dry_stain);
+ stain = BlendPremul(over_stain, stain);
 
stains[cell_pos] = stain;
dry_stains[cell_pos] = dry_stain;
@@ -483,7 +476,7 @@ ComputeShader(V_SimParticlesCS, 64)
particle.prev_occluder = occluder;
}
 
- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)
+ if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
{
prune = 1;
}
@@ -723,7 +716,6 @@ ComputeShader2D(V_CompositeCS, 8, 8)
Vec4 ground_particle_color = 0;
Vec4 air_particle_color = 0;
 
-
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
@@ -752,9 +744,9 @@ ComputeShader2D(V_CompositeCS, 8, 8)
// Darken wall particles / stains
if (tile == P_TileKind_Wall)
{
- ground_particle_color *= 0.25;
- air_particle_color *= 0.25;
- stain_color *= 0.25;
+ ground_particle_color *= 0.5;
+ air_particle_color *= 0.5;
+ stain_color *= 0.5;
}
 
//////////////////////////////
@@ -972,57 +964,73 @@ ComputeShader2D(V_CompositeCS, 8, 8)
////////////////////////////////////////////////////////////
//~ Bloom
 
+//////////////////////////////
+//- Downsample
+
ComputeShader2D(V_BloomDownCS, 8, 8)
{
+ i32 mips_count = V_GpuConst_MipsCount;
+ i32 mip_idx = V_GpuConst_MipIdx;
+
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
- Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
- RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
+ RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
+
+ Texture2D<Vec4> bloom_up;
+ b32 is_first_pass = mip_idx == 1;
+ if (is_first_pass)
+ {
+ bloom_up = G_Dereference<Vec4>(frame.screen_ro);
+ }
+ else
+ {
+ bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
+ }
 
- Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down);
 
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / down_dims;
Vec2 off_uv = 0.5 / down_dims;
- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
 
- Struct(SampleDesc) { Vec2 uv; f32 weight; };
- SampleDesc samples[] = {
- { bloom_uv + Vec2(0, 0), 0.5 },
- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
- };
+ f32 threshold = 0.25;
+ f32 knee = 0.75;
 
Vec4 result = 0;
- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{
- SampleDesc desc = samples[sample_idx];
- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
-
- f32 knee_weight = 1;
- if (is_first_pass)
+ Struct(SampleDesc) { Vec2 uv; f32 weight; };
+ SampleDesc samples[] = {
+ { bloom_uv + Vec2(0, 0), 0.5 },
+ { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
+ { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
+ { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
+ { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
+ };
+ for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{
- f32 luminance = LuminanceFromColor(src);
- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
- if (bright > 0)
- {
- f32 threshold = 1.0;
- f32 knee = 0.5;
- f32 over_threshold = max(bright - threshold, 0.0);
- f32 ramp = saturate(over_threshold / knee);
- knee_weight = (over_threshold * ramp * ramp) / bright;
- }
- else
+ SampleDesc desc = samples[sample_idx];
+ Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
+
+ f32 knee_weight = 1;
+ if (is_first_pass)
{
- knee_weight = 0;
+ f32 luminance = LuminanceFromColor(src);
+ f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
+ f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
+ if (bright > 0)
+ {
+ f32 over_threshold = max(bright - threshold, 0.0);
+ f32 ramp = saturate(over_threshold / knee);
+ knee_weight = (over_threshold * ramp * ramp) / bright;
+ }
+ else
+ {
+ knee_weight = 0;
+ }
}
- }
 
- result += src * desc.weight * knee_weight;
+ result += src * desc.weight * knee_weight;
+ }
}
 
if (IsInside(bloom_pos, down_dims))
@@ -1031,52 +1039,77 @@ ComputeShader2D(V_BloomDownCS, 8, 8)
}
}
 
+//////////////////////////////
+//- Upsample
+
ComputeShader2D(V_BloomUpCS, 8, 8)
{
+ i32 mips_count = V_GpuConst_MipsCount;
+ i32 mip_idx = V_GpuConst_MipIdx;
+
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
- Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
- RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
+ Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
+
+ b32 is_last_pass = mip_idx == 0;
+ RWTexture2D<Vec4> bloom_up;
+ if (is_last_pass)
+ {
+ bloom_up = G_Dereference<Vec4>(frame.screen_rw);
+ }
+ else
+ {
+ bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
+ }
 
- Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down);
+ Vec2 up_dims = countof(bloom_up);
 
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / up_dims;
- Vec2 off_uv = 1 / up_dims;
+ Vec2 off_uv0 = 1 / down_dims;
+ Vec2 off_uv1 = off_uv0 * 2;
 
Vec4 result = 0;
{
// Center
- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
- // Edges
+ result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;
+
+ // Outer Edges
result += (
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
- ) * 2;
- // Corners
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0)
+ ) * 3.0f / 41.0f;
+
+ // Inner corners
+ result += (
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0)
+ ) * 4.0f / 41.0f;
+
+ // Outer corners
result += (
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
- );
- // Normalize
- result /= 16;
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) +
+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0)
+ ) * 1.0f / 41.0f;
}
 
if (IsInside(bloom_pos, up_dims))
{
- bloom_up[bloom_pos] += result;
+ bloom_up[bloom_pos] += result * 0.75;
}
}
 
////////////////////////////////////////////////////////////
-//~ Post process
+//~ Finalize
 
-ComputeShader2D(V_PostProcessCS, 8, 8)
+ComputeShader2D(V_FinalizeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
@@ -1084,42 +1117,21 @@ ComputeShader2D(V_PostProcessCS, 8, 8)
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
 
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
- Vec2 screen_uv = screen_pos / frame.screen_dims;
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
-
- //////////////////////////////
- //- Original
-
- Vec4 original = 0;
if (is_in_screen)
{
- original = screen_tex[screen_pos];
- original.rgb *= original.a;
- }
+ Vec4 result = screen_tex[screen_pos];
 
+ //- Tone map
+ if (frame.should_tone_map)
+ {
+ // ACES approximation by Krzysztof Narkowicz
+ // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
+ result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));
+ }
 
- //////////////////////////////
- //- Bloom
-
- Vec4 bloom = 0;
- if (is_in_screen)
- {
- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
- // bloom.rgb *= bloom.a;
- }
-
- //////////////////////////////
- //- Compose
-
- Vec4 result = Vec4(0, 0, 0, 1);
- result = BlendPremul(original, result);
- result += bloom;
- // result.rgb = V_ToneMap(result);
+ result = Unpremul(result);
 
- result = Unpremul(result);
-
- if (is_in_screen)
- {
screen_tex[screen_pos] = result;
}
}
diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh
index a47a2335..f176f2f8 100644
--- a/src/pp/pp_vis/pp_vis_gpu.gh
+++ b/src/pp/pp_vis/pp_vis_gpu.gh
@@ -46,7 +46,6 @@ Struct(V_DVertPSOutput)
 
f32 V_RandFromPos(Vec3 pos);
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
-Vec3 V_ToneMap(Vec3 v);
 
////////////////////////////////////////////////////////////
//~ Shaders
@@ -73,8 +72,8 @@ ComputeShader2D(V_CompositeCS, 8, 8);
ComputeShader2D(V_BloomDownCS, 8, 8);
ComputeShader2D(V_BloomUpCS, 8, 8);
 
-//- Post process
-ComputeShader2D(V_PostProcessCS, 8, 8);
+//- Finalize
+ComputeShader2D(V_FinalizeCS, 8, 8);
 
//- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput);
diff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg
index 2419a6f2..72f6ae8d 100644
--- a/src/pp/pp_vis/pp_vis_shared.cg
+++ b/src/pp/pp_vis/pp_vis_shared.cg
@@ -11,37 +11,42 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
V_ParticleDesc result;
{
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,
+ V_ParticlesXList(X)
+ #undef X
+ };
+ PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,
V_ParticlesXList(X)
#undef X
};
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {
- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,
+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,
V_ParticlesXList(X)
#undef X
};
@@ -51,6 +56,7 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
result.stain_rate = stain_rates[kind];
result.pen_rate = pen_rates[kind];
result.lifetime = lifetimes[kind];
+ result.prune_speed_threshold = prune_speed_thresholds[kind];
result.base_color = LinearFromSrgb(base_colors[kind]);
result.dry_factor = LinearFromSrgb(dry_factor[kind]);
}
diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh
index 16ca6419..71d88ea5 100644
--- a/src/pp/pp_vis/pp_vis_shared.cgh
+++ b/src/pp/pp_vis/pp_vis_shared.cgh
@@ -9,14 +9,13 @@
Enum(V_GpuFlag)
{
V_GpuFlag_None = 0,
- V_GpuFlag_InitBloom = (1 << 0),
};
 
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
-G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
-G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
+G_DeclConstant(i32, V_GpuConst_MipsCount, 3);
+G_DeclConstant(i32, V_GpuConst_MipIdx, 4);
 
////////////////////////////////////////////////////////////
//~ Particle types
@@ -29,7 +28,6 @@ G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
Enum(V_ParticleFlag)
{
V_ParticleFlag_None = 0,
- V_ParticleFlag_NoPruneWhenStill = (1 << 0),
V_ParticleFlag_StainWhenPruned = (1 << 1),
V_ParticleFlag_NoReflect = (1 << 2),
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),
@@ -53,6 +51,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0, 0, 0, 0), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -64,8 +63,9 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Ground, \
/* Stain rate, pen chance */ 100, 0.25, \
/* Lifetime */ Inf, \
- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \
- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \
+ /* Prune speed threshold */ 0.5, \
+ /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \
+ /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \
) \
X( \
/* Name */ BloodDebris, \
@@ -73,6 +73,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 30, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -82,6 +83,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -91,6 +93,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.1, \
/* Base color */ CompVec4(2, 0.5, 0, 1), \
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \
) \
@@ -102,6 +105,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ 0.075, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -111,6 +115,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Air, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -122,6 +127,7 @@ Enum(V_ParticleLayer)
/* Layer */ V_ParticleLayer_Mid, \
/* Stain rate, pen chance */ 0, 0, \
/* Lifetime */ Inf, \
+ /* Prune speed threshold */ 0.01, \
/* Base color */ CompVec4(1, 1, 0, 1), \
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
) \
@@ -168,6 +174,7 @@ Struct(V_ParticleDesc)
f32 stain_rate;
f32 pen_rate;
f32 lifetime;
+ f32 prune_speed_threshold;
Vec4 base_color;
Vec4 dry_factor;
};
@@ -264,6 +271,7 @@ Struct(V_SharedFrame)
 
b32 tiles_dirty;
b32 should_clear_particles;
+ b32 should_tone_map;
 
b32 is_looking;
b32 is_moving;