From 9ca01a920df9c751885e9dacf13e9f1936d94e4d Mon Sep 17 00:00:00 2001 From: jacob Date: Sun, 15 Feb 2026 15:20:46 -0600 Subject: [PATCH] thresholded & smoothed bloom --- src/gpu/gpu.lay | 4 +- src/gpu/gpu_core.h | 2 - src/gpu/gpu_dx12/gpu_dx12_core.c | 2 +- .../{gpu_shader_core.cgh => gpu_shared.cgh} | 3 + src/meta/meta.c | 2 + src/pp/pp_vis/pp_vis.lay | 5 +- src/pp/pp_vis/pp_vis_core.c | 118 +++++++---- src/pp/pp_vis/pp_vis_gpu.g | 187 +++++++++++++----- src/pp/pp_vis/pp_vis_gpu.gh | 10 +- src/pp/pp_vis/pp_vis_shared.cgh | 28 ++- src/ui/ui_core.c | 2 +- 11 files changed, 255 insertions(+), 108 deletions(-) rename src/gpu/{gpu_shader_core.cgh => gpu_shared.cgh} (99%) diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index a769ae19..f7ed7d2e 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -13,11 +13,11 @@ ////////////////////////////// //- Api -@IncludeC gpu_shader_core.cgh +@IncludeC gpu_shared.cgh @IncludeC gpu_core.h @IncludeC gpu_common.h -@IncludeG gpu_shader_core.cgh +@IncludeG gpu_shared.cgh @Bootstrap G_Bootstrap @Bootstrap G_BootstrapCommon diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index cc3d741e..7e1b329a 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -475,8 +475,6 @@ Struct(G_RefDesc) //////////////////////////////////////////////////////////// //~ Rasterization types -#define G_MaxRenderTargets 8 - Enum(G_RasterMode) { G_RasterMode_None, diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index c2b6ed4d..785efbfe 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -1129,7 +1129,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle else if (is_texture) { i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); - i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1; + i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips); d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); d3d_desc.Dimension = ( desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : diff --git a/src/gpu/gpu_shader_core.cgh b/src/gpu/gpu_shared.cgh similarity index 99% rename from src/gpu/gpu_shader_core.cgh rename to src/gpu/gpu_shared.cgh index 111e1fbc..ce528aaa 100644 --- a/src/gpu/gpu_shader_core.cgh +++ b/src/gpu/gpu_shared.cgh @@ -113,6 +113,9 @@ Enum(G_BasicSamplerKind) //////////////////////////////////////////////////////////// //~ Resource countof +#define G_MaxMips 16 +#define G_MaxRenderTargets 8 + #if IsGpu template u32 countof(StructuredBuffer obj) { u32 result; obj.GetDimensions(result); return result; } template u32 countof(RWStructuredBuffer obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; } diff --git a/src/meta/meta.c b/src/meta/meta.c index 2cfd69df..6704df9b 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -544,7 +544,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow")); // Disable warnings + PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-local-type-template-args")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable")); + PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch")); } diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay index 4eab5318..f72dc528 100644 --- a/src/pp/pp_vis/pp_vis.lay +++ b/src/pp/pp_vis/pp_vis.lay @@ -24,8 +24,9 @@ @ComputeShader V_SimParticlesCS @ComputeShader V_ShadeCS @ComputeShader V_CompositeCS -@ComputeShader V_BlurDownCS -@ComputeShader V_BlurUpCS +@ComputeShader V_BloomDownCS +@ComputeShader V_BloomUpCS +@ComputeShader V_PostProcessCS @VertexShader V_DVertVS @PixelShader V_DVertPS diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 714ffa42..c8f15763 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -2620,8 +2620,6 @@ void V_TickForever(WaveLaneCtx *lane) } } - - ////////////////////////////// //- Push test explosion @@ -2654,6 +2652,7 @@ void V_TickForever(WaveLaneCtx *lane) // emitter.falloff.min = emitter.falloff.max = 0; // emitter.count = CeilF32(Kibi(64) * frame->dt); + // emitter.count = CeilF32(Mebi(32) * frame->dt); // emitter.count = Mebi(16); // emitter.count = Mebi(2); // emitter.count = Kibi(32); @@ -4849,15 +4848,28 @@ void V_TickForever(WaveLaneCtx *lane) frame->screen_dims, G_Layout_DirectQueue_ShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, - .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)), - .max_mips = 4 // For bloom pyramid + .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) ); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); - for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx) + frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target); + frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target); + + // Bloom texture + G_ResourceHandle bloom_target = G_PushTexture2D( + frame->gpu_arena, frame->cl, + G_Format_R16G16B16A16_Float, + G_DimsFromMip2D(G_Count2D(screen_target), 1), + G_Layout_DirectQueue_ShaderReadWrite, + .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, + .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), + // .max_mips = 4 + .max_mips = 8 + ); + for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) { - frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); - frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); } // Albedo texture @@ -4929,19 +4941,18 @@ void V_TickForever(WaveLaneCtx *lane) frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter); // Upload gpu frame - { - // Gpu frame - G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, - StringFromStruct(&frame->shared_frame), - .name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick)) - ); - G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame); + G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy( + frame->gpu_arena, frame->cl, + StringFromStruct(&frame->shared_frame), + .name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick)) + ); + G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame); - // Set constants - G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame); - G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); - } + // Set initial constants + V_GpuFlag gpu_flags = V_GpuFlag_None; + G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); + G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame); + G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); // Sync G_DumbGlobalMemorySync(frame->cl); @@ -5024,45 +5035,70 @@ void V_TickForever(WaveLaneCtx *lane) { G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); + + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); } ////////////////////////////// - //- Blur passes + //- Bloom passes { - // TODO: Limit passes - i32 mips_count = G_CountMips(screen_target); + i32 mips_count = G_CountMips(bloom_target); - // Downsample + blur passes - G_LogResource(frame->cl, screen_target); - for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); + + //- Downsample + blur passes + for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx) { - Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); - - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1)); - // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); - - G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims)); + Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); + if (mip_idx == 0) + { + // Init bloom pyramid from screen target on first pass (prefilter) + gpu_flags |= V_GpuFlag_InitBloom; + G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); + G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro); + } + else + { + G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1)); + G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]); + } + G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]); + { + G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims)); + } + gpu_flags &= ~V_GpuFlag_InitBloom; + G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); } - // Upsample passes + //- Upsample passes for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) { - Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); + Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); + G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); + G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); - G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); - G_Compute(frame->cl, V_BlurUpCS, V_ThreadGroupSizeFromTexSize(dims)); + G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]); + G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]); + + G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims)); } } + ////////////////////////////// + //- Post process pass + + { + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite); + G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0)); + G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); + } + ////////////////////////////// //- Debug shapes pass - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0)); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite); { G_Rasterize( @@ -5078,13 +5114,13 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Finalize screen target - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0)); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); { Rng2 uv = Zi; uv.p0 = Vec2FromVec(screen_viewport.p0); uv.p1 = Vec2FromVec(screen_viewport.p1); uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims)); - UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv); + UI_SetRawTexture(vis_box, frame->screen_ro, uv); } } diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 8b453bba..5083d3f9 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -62,7 +62,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den } } - result.rgb = saturate(result.rgb + (rand_color - 0.5) * 0.05); + result.rgb = result.rgb + (rand_color - 0.5) * 0.05; // result.a += (rand_alpha - 0.5) * 0.025; // result.a *= rand_alpha; @@ -72,6 +72,13 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den return result; } +// ACES approximation by Krzysztof Narkowicz +// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ +Vec3 V_ToneMap(Vec3 v) +{ + return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f)); +} + //////////////////////////////////////////////////////////// //~ Prepare frame @@ -569,7 +576,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) // Texture2D shade_tex = G_Dereference(frame.shade_ro); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); - RWTexture2D screen_tex = G_Dereference(frame.screen_mips_rw[0]); + RWTexture2D screen_tex = G_Dereference(frame.screen_rw); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); RWTexture2D ground_cells = G_Dereference(frame.ground_cells); RWTexture2D stain_densities = G_Dereference(frame.stain_densities); @@ -922,7 +929,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) ////////////////////////////// //- Compose result - Vec4 result = Vec4(0, 0, 0, 1); + Vec4 result = 0; result = BlendPremul(world_color, result); result = BlendPremul(overlay_color, result); @@ -935,75 +942,157 @@ ComputeShader2D(V_CompositeCS, 8, 8) } //////////////////////////////////////////////////////////// -//~ Blur +//~ Bloom -ComputeShader2D(V_BlurDownCS, 8, 8) +ComputeShader2D(V_BloomDownCS, 8, 8) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D screen_up = G_Dereference(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]); - RWTexture2D screen_down = G_Dereference(frame.screen_mips_rw[V_GpuConst_MipIdx]); + Texture2D bloom_up = G_Dereference(V_GpuConst_BloomRead); + RWTexture2D bloom_down = G_Dereference(V_GpuConst_BloomWrite); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Vec2 up_dims = countof(screen_up); - Vec2 down_dims = countof(screen_down); + Vec2 up_dims = countof(bloom_up); + Vec2 down_dims = countof(bloom_down); - Vec2 blur_pos = SV_DispatchThreadID + 0.5; - Vec2 blur_uv = blur_pos / down_dims; - f32 offset_uv = 0.5 / up_dims; + Vec2 bloom_pos = SV_DispatchThreadID + 0.5; + Vec2 bloom_uv = bloom_pos / down_dims; + Vec2 off_uv = 0.5 / down_dims; + b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom); + + Struct(SampleDesc) { Vec2 uv; f32 weight; }; + SampleDesc samples[] = { + { bloom_uv + Vec2(0, 0), 0.5 }, + { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 }, + { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 }, + { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 }, + { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 }, + }; Vec4 result = 0; + for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx) { - Vec4 accum = 0; - accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv)); - accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv)); - accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv)); - accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv)); - result = accum / 4.0f; + SampleDesc desc = samples[sample_idx]; + Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0); + + f32 knee_weight = 1; + if (is_first_pass) + { + f32 luminance = LuminanceFromColor(src); + f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance + f32 bright = max(luminance, max_rgb * 0.5); + if (bright > 0) + { + f32 threshold = 1.0; + f32 knee = 0.5; + f32 over_threshold = max(bright - threshold, 0.0); + f32 ramp = saturate(over_threshold / knee); + knee_weight = (over_threshold * ramp * ramp) / bright; + } + else + { + knee_weight = 0; + } + } + + result += src * desc.weight * knee_weight; } - if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down))) + if (all(bloom_pos >= 0) && all(bloom_pos < down_dims)) { - screen_down[blur_pos] = result; + bloom_down[bloom_pos] = result; } } -ComputeShader2D(V_BlurUpCS, 8, 8) +ComputeShader2D(V_BloomUpCS, 8, 8) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - Texture2D screen_down = G_Dereference(frame.screen_mips_ro[V_GpuConst_MipIdx + 1]); - RWTexture2D screen_up = G_Dereference(frame.screen_mips_rw[V_GpuConst_MipIdx]); + Texture2D bloom_down = G_Dereference(V_GpuConst_BloomRead); + RWTexture2D bloom_up = G_Dereference(V_GpuConst_BloomWrite); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - Vec2 up_dims = countof(screen_up); - Vec2 down_dims = countof(screen_down); + Vec2 up_dims = countof(bloom_up); + Vec2 down_dims = countof(bloom_down); - Vec2 blur_pos = SV_DispatchThreadID + 0.5; - Vec2 blur_uv = blur_pos / up_dims; - f32 offset_uv = 1 / down_dims; + Vec2 bloom_pos = SV_DispatchThreadID + 0.5; + Vec2 bloom_uv = bloom_pos / up_dims; + Vec2 off_uv = 1 / up_dims; Vec4 result = 0; - - // Center - result += screen_down.Sample(sampler, blur_uv) * 4; - - // Edges - result += screen_down.Sample(sampler, blur_uv + Vec2(0, -offset_uv)) * 2; - result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, 0)) * 2; - result += screen_down.Sample(sampler, blur_uv + Vec2(0, offset_uv)) * 2; - result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, 0)) * 2; - - // Corners - result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv)); - result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv)); - result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv)); - result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv)); - - // Normalize - result /= 16; - - if (all(blur_pos >= 0) && all(blur_pos < countof(screen_up))) { - screen_up[blur_pos] += result; + // Center + result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4; + // Edges + result += ( + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0) + ) * 2; + // Corners + result += ( + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) + + bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0) + ); + // Normalize + result /= 16; + } + + if (all(bloom_pos >= 0) && all(bloom_pos < up_dims)) + { + bloom_up[bloom_pos] += result; + } +} + +//////////////////////////////////////////////////////////// +//~ Post process + +ComputeShader2D(V_PostProcessCS, 8, 8) +{ + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + Texture2D bloom_tex = G_Dereference(frame.bloom_mips_ro[0]); + RWTexture2D screen_tex = G_Dereference(frame.screen_rw); + + Vec2 screen_pos = SV_DispatchThreadID + 0.5; + Vec2 screen_uv = screen_pos / frame.screen_dims; + b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < frame.screen_dims); + + ////////////////////////////// + //- Original + + Vec4 original = 0; + if (is_in_screen) + { + original = screen_tex[screen_pos]; + original.rgb *= original.a; + } + + + ////////////////////////////// + //- Bloom + + Vec4 bloom = 0; + if (is_in_screen) + { + bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0); + // bloom.rgb *= bloom.a; + } + + ////////////////////////////// + //- Compose + + Vec4 result = Vec4(0, 0, 0, 1); + result = BlendPremul(original, result); + result += bloom; + // result.rgb = V_ToneMap(result); + + result = Unpremul(result); + + if (is_in_screen) + { + screen_tex[screen_pos] = result; } } diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index ac57fa17..feb4f08e 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -46,6 +46,7 @@ Struct(V_DVertPSOutput) f32 V_RandFromPos(Vec3 pos); Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness); +Vec3 V_ToneMap(Vec3 v); //////////////////////////////////////////////////////////// //~ Shaders @@ -68,9 +69,12 @@ ComputeShader2D(V_ShadeCS, 8, 8); //- Composite ComputeShader2D(V_CompositeCS, 8, 8); -//- Blur -ComputeShader2D(V_BlurDownCS, 8, 8); -ComputeShader2D(V_BlurUpCS, 8, 8); +//- Bloom +ComputeShader2D(V_BloomDownCS, 8, 8); +ComputeShader2D(V_BloomUpCS, 8, 8); + +//- Post process +ComputeShader2D(V_PostProcessCS, 8, 8); //- Debug shapes VertexShader(V_DVertVS, V_DVertPSInput); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index c9c1d40f..6acdf82f 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -4,11 +4,22 @@ // #define V_ParticlesCap Mebi(16) //////////////////////////////////////////////////////////// -//~ State types +//~ Constant types -G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0); -G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1); -G_DeclConstant(u32, V_GpuConst_MipIdx, 2); +Enum(V_GpuFlag) +{ + V_GpuFlag_None = 0, + V_GpuFlag_InitBloom = (1 << 0), +}; + +G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0); +G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1); +G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2); +G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3); +G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4); + +//////////////////////////////////////////////////////////// +//~ State types Struct(V_TileDesc) { @@ -136,13 +147,16 @@ Struct(V_SharedFrame) V_TileDesc tile_descs[P_TileKind_COUNT]; G_Texture2DRef tiles; - G_Texture2DRef screen_mips_ro[16]; - G_RWTexture2DRef screen_mips_rw[16]; + G_Texture2DRef screen_ro; + G_RWTexture2DRef screen_rw; G_Texture2DRef shade_ro; G_RWTexture2DRef shade_rw; G_Texture2DRef albedo_ro; G_RWTexture2DRef albedo_rw; + G_Texture2DRef bloom_mips_ro[G_MaxMips]; + G_RWTexture2DRef bloom_mips_rw[G_MaxMips]; + u32 emitters_count; G_StructuredBufferRef emitters; G_RWStructuredBufferRef particles; @@ -214,7 +228,7 @@ Enum(V_ParticleFlag) /* Name */ Debris, \ /* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \ /* Stain rate, pen chance */ 0, 0, \ - /* Base color */ 1, 0.5, 0, 1 \ + /* Base color */ 2.0, 0.5, 0, 1 \ ) \ \ /* Air particles */ \ diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index d2f6767b..1643a3af 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1735,7 +1735,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) ); G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams); - // Constants + // Initial constants G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro); // Sync