diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay index 0477727d..4eab5318 100644 --- a/src/pp/pp_vis/pp_vis.lay +++ b/src/pp/pp_vis/pp_vis.lay @@ -25,6 +25,7 @@ @ComputeShader V_ShadeCS @ComputeShader V_CompositeCS @ComputeShader V_BlurDownCS +@ComputeShader V_BlurUpCS @VertexShader V_DVertVS @PixelShader V_DVertPS diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 999bdde2..714ffa42 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -4850,7 +4850,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Layout_DirectQueue_ShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)), - .max_mips = countof(frame->screen_mips_ro) // For blur pyramid + .max_mips = 4 // For bloom pyramid ); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); @@ -5047,16 +5047,16 @@ void V_TickForever(WaveLaneCtx *lane) } // Upsample passes - // for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) - // { - // Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); + for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) + { + Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); - // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); - // G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); - // G_SetConstant(frame->cl, V_GpuConst_Mip, mip_idx); - // G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims)); - // } + G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); + G_Compute(frame->cl, V_BlurUpCS, V_ThreadGroupSizeFromTexSize(dims)); + } } ////////////////////////////// diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 7aaefdea..8b453bba 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -194,7 +194,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; StructuredBuffer quads = G_Dereference(frame.quads); - SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); RWTexture2D occluders = G_Dereference(frame.occluders); V_Quad quad = quads[input.quad_idx]; @@ -205,7 +205,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(occluders)); - Vec4 albedo = tex.Sample(clamp_sampler, input.samp_uv); + Vec4 albedo = tex.Sample(sampler, input.samp_uv); if (is_in_world) { @@ -529,7 +529,7 @@ ComputeShader(V_SimParticlesCS, 64) ComputeShader2D(V_ShadeCS, 8, 8) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D tiles = G_Dereference(frame.tiles); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D shade_tex = G_Dereference(frame.shade_rw); @@ -567,7 +567,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); - SamplerState clamp_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D screen_tex = G_Dereference(frame.screen_mips_rw[0]); RWTexture2D stain_cells = G_Dereference(frame.stain_cells); @@ -608,7 +608,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) // if (all(shade_pos >= Vec2(0, 0)) && all(shade_pos < countof(shade_tex))) // { // Vec2 shade_uv = shade_pos / countof(shade_tex); - // shade_color = shade_tex.SampleLevel(clamp_sampler, shade_uv, 0); + // shade_color = shade_tex.SampleLevel(sampler, shade_uv, 0); // } ////////////////////////////// @@ -651,7 +651,7 @@ ComputeShader2D(V_CompositeCS, 8, 8) V_TileDesc tile_desc = frame.tile_descs[tile]; Texture2D tile_tex = G_Dereference(tile_desc.tex); Vec2 tile_samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, frac(world_pos)); - tile_color = tile_tex.SampleLevel(clamp_sampler, tile_samp_uv, 0); + tile_color = tile_tex.SampleLevel(sampler, tile_samp_uv, 0); } // Checkered grid else if (tile == P_TileKind_Empty) @@ -942,20 +942,71 @@ ComputeShader2D(V_BlurDownCS, 8, 8) V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; Texture2D screen_up = G_Dereference(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]); RWTexture2D screen_down = G_Dereference(frame.screen_mips_rw[V_GpuConst_MipIdx]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); - // V_GpuConst_MipIdx + Vec2 up_dims = countof(screen_up); + Vec2 down_dims = countof(screen_down); Vec2 blur_pos = SV_DispatchThreadID + 0.5; - - + Vec2 blur_uv = blur_pos / down_dims; + f32 offset_uv = 0.5 / up_dims; Vec4 result = 0; + { + Vec4 accum = 0; + accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv)); + accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv)); + accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv)); + accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv)); + result = accum / 4.0f; + } + if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down))) { screen_down[blur_pos] = result; } } +ComputeShader2D(V_BlurUpCS, 8, 8) +{ + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + Texture2D screen_down = G_Dereference(frame.screen_mips_ro[V_GpuConst_MipIdx + 1]); + RWTexture2D screen_up = G_Dereference(frame.screen_mips_rw[V_GpuConst_MipIdx]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + + Vec2 up_dims = countof(screen_up); + Vec2 down_dims = countof(screen_down); + + Vec2 blur_pos = SV_DispatchThreadID + 0.5; + Vec2 blur_uv = blur_pos / up_dims; + f32 offset_uv = 1 / down_dims; + + Vec4 result = 0; + + // Center + result += screen_down.Sample(sampler, blur_uv) * 4; + + // Edges + result += screen_down.Sample(sampler, blur_uv + Vec2(0, -offset_uv)) * 2; + result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, 0)) * 2; + result += screen_down.Sample(sampler, blur_uv + Vec2(0, offset_uv)) * 2; + result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, 0)) * 2; + + // Corners + result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv)); + result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv)); + result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv)); + result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv)); + + // Normalize + result /= 16; + + if (all(blur_pos >= 0) && all(blur_pos < countof(screen_up))) + { + screen_up[blur_pos] += result; + } +} + //////////////////////////////////////////////////////////// //~ Debug shapes diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index 7b6bbf64..ac57fa17 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -70,6 +70,7 @@ ComputeShader2D(V_CompositeCS, 8, 8); //- Blur ComputeShader2D(V_BlurDownCS, 8, 8); +ComputeShader2D(V_BlurUpCS, 8, 8); //- Debug shapes VertexShader(V_DVertVS, V_DVertPSInput);