diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index d00464de..09c18440 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -2695,6 +2695,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); if (rt) { + Assert(AnyBit(rt->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); if (bound_render_target_uids[i] != rt->uid + desc.mip) { G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; diff --git a/src/pp/pp_res/backdrop.ase b/src/pp/pp_res/backdrop.ase index 77aaaa9b..3c0607a2 100644 --- a/src/pp/pp_res/backdrop.ase +++ b/src/pp/pp_res/backdrop.ase @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ceffe48c980f3b984e489de79f4c58e7900295ce3f0c5884b5dca7ea7f530912 -size 4710 +oid sha256:05919b6a3d8b11b2c7699e7daa2187e334bb98f2d8c49c662aa6a8749d9b0b1f +size 7005 diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay index 2d916376..66c899c4 100644 --- a/src/pp/pp_vis/pp_vis.lay +++ b/src/pp/pp_vis/pp_vis.lay @@ -17,6 +17,8 @@ @ComputeShader V_PrepareShadeCS @ComputeShader V_PrepareCellsCS +@ComputeShader V_BackdropDownCS +@ComputeShader V_BackdropUpCS @ComputeShader V_ClearParticlesCS @VertexShader V_QuadVS @PixelShader V_QuadPS diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 26bd9254..328393fe 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -1061,7 +1061,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, MulVec2(frame->screen_dims, 0.5)); frame->af.world_to_screen = ScaleAffine(frame->af.world_to_screen, VEC2(camera_scale, camera_scale)); frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, NegVec2(frame->camera_pos)); - frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og); + // frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og); frame->af.screen_to_world = InvertAffine(frame->af.world_to_screen); } @@ -1073,7 +1073,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, MulVec2(frame->shade_dims, 0.5)); frame->af.world_to_shade = ScaleAffine(frame->af.world_to_shade, VEC2(camera_scale, camera_scale)); frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, NegVec2(frame->camera_pos)); - frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og); + // frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og); frame->af.shade_to_world = InvertAffine(frame->af.world_to_shade); } @@ -1082,7 +1082,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->af.screen_to_shade = AffineIdentity; { frame->af.shade_to_screen = MulAffine(frame->af.world_to_screen, frame->af.shade_to_world); - frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og); + // frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og); frame->af.screen_to_shade = InvertAffine(frame->af.shade_to_screen); } @@ -1093,7 +1093,7 @@ void V_TickForever(WaveLaneCtx *lane) { frame->af.world_to_cell = ScaleAffine(frame->af.world_to_cell, VEC2(P_CellsPerMeter, P_CellsPerMeter)); frame->af.world_to_cell = TranslateAffine(frame->af.world_to_cell, VEC2((P_WorldPitch / 2.0), (P_WorldPitch / 2.0))); - frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og); + // frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og); frame->af.cell_to_world = InvertAffine(frame->af.world_to_cell); } @@ -1104,7 +1104,6 @@ void V_TickForever(WaveLaneCtx *lane) { frame->af.world_to_tile = ScaleAffine(frame->af.world_to_tile, VEC2(P_TilesPerMeter, P_TilesPerMeter)); frame->af.world_to_tile = TranslateAffine(frame->af.world_to_tile, VEC2((P_WorldPitch / 2.0), (P_WorldPitch / 2.0))); - // frame->af.world_to_tile.og = RoundVec2(frame->af.world_to_tile.og); frame->af.tile_to_world = InvertAffine(frame->af.world_to_tile); } @@ -4754,11 +4753,12 @@ void V_TickForever(WaveLaneCtx *lane) //- Build gpu data // Backdrop + frame->backdrop_parallax = TweakFloat("Backdrop parallax", 10, 0, 20); { SPR_SheetKey sheet = SPR_SheetKeyFromResource(ResourceKeyFromStore(&P_Resources, Lit("backdrop.ase"))); SPR_Sprite sprite = SPR_SpriteFromSheet(sheet, SPR_NilSpanKey, 0); - frame->backdrop = sprite.tex; - frame->backdrop_slice_uv = DivRng2Vec2(sprite.tex_rect, sprite.tex_dims); + frame->backdrop_src = sprite.tex; + frame->backdrop_src_slice_uv = DivRng2Vec2(sprite.tex_rect, sprite.tex_dims); } // Tiles @@ -4814,7 +4814,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 1), G_Layout_DirectQueue_General, - .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, + .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), .max_mips = 64 ); @@ -4835,6 +4835,22 @@ void V_TickForever(WaveLaneCtx *lane) ); frame->albedo_ro = G_PushTexture2DRef(frame->gpu_arena, albedo_target); + // Backdrop texture + G_ResourceHandle backdrop_target = G_PushTexture2D( + frame->gpu_arena, frame->cl, + G_Format_R16G16B16A16_Float, + G_DimsFromMip2D(G_Count2D(screen_target), 0), + G_Layout_DirectQueue_General, + .flags = G_ResourceFlag_AllowShaderReadWrite, + .name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)), + .max_mips = 4 + ); + for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) + { + frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); + } + // Shade texture G_ResourceHandle shade_target = G_PushTexture2D( frame->gpu_arena, frame->cl, @@ -4926,13 +4942,37 @@ void V_TickForever(WaveLaneCtx *lane) V.particle_seq = 0; } + // Backdrop passes + { + i32 mips_count = G_CountMips(backdrop_target); + G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count); + + //- Downsample + for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx) + { + Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(backdrop_target), mip_idx); + + G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); + G_Compute(frame->cl, V_BackdropDownCS, V_ThreadGroupSizeFromTexSize(down_dims)); + + G_DumbGlobalMemorySync(frame->cl); + } + + //- Upsample passes + for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) + { + Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(backdrop_target), mip_idx); + + G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); + G_Compute(frame->cl, V_BackdropUpCS, V_ThreadGroupSizeFromTexSize(up_dims)); + + G_DumbGlobalMemorySync(frame->cl); + } + } + // Prepare RTs G_DiscardRenderTarget(frame->cl, screen_target, 0); G_ClearRenderTarget(frame->cl, albedo_target, VEC4(0, 0, 0, 0), 0); - - // Sync - G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_General); - G_DumbGlobalMemorySync(frame->cl); } ////////////////////////////// @@ -5000,7 +5040,7 @@ void V_TickForever(WaveLaneCtx *lane) // actually represents the screen texture, while mip_idx - 1 represents // the first mip index in the bloom mip chain - //- Downsample + blur passes + //- Downsample for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) { Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index d8bc3a43..95593b99 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -27,7 +27,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) f32 t = smoothstep(-10, 50, density); // f32 t = smoothstep(0, 2, (f32)density); - result.a += (1.0 - result.a) * (t); } else if (desc.kind == V_ParticleKind_BloodTrail || desc.kind == V_ParticleKind_BloodDebris) @@ -170,6 +169,143 @@ ImplComputeShader(V_ClearParticlesCS) } } +//////////////////////////////////////////////////////////// +//~ Backdrop + +////////////////////////////// +//- Downsample + +ImplComputeShader2D(V_BackdropDownCS) +{ + i32 mip_idx = V_GpuConst_MipIdx; + b32 is_first_pass = mip_idx == 0; + + + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + + Texture2D bd_up; + if (is_first_pass) + { + bd_up = G_Dereference(frame.backdrop_src); + } + else + { + bd_up = G_Dereference(frame.backdrop_mips_ro[mip_idx - 1]); + } + RWTexture2D bd_down = G_Dereference(frame.backdrop_mips_rw[mip_idx]); + + + Vec2 down_dims = countof(bd_down); + + Vec2 bd_pos = SV_DispatchThreadID + 0.5; + Vec2 bd_uv = bd_pos / down_dims; + Vec2 off_uv = 0.5 / down_dims; + + Vec2 screen_pos = bd_uv * frame.screen_dims; + Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); + Rng2 world_bounds = { Vec2(-P_WorldPitch, -P_WorldPitch) * 0.5, Vec2(P_WorldPitch, P_WorldPitch) * 0.5 }; + + Vec4 result = 0; + if (is_first_pass) + { + f32 parallax = frame.backdrop_parallax; + + Vec2 cam_center = frame.camera_pos; + Vec2 backdrop_pos = lerp(cam_center, world_pos, parallax); + + Vec2 samp_t = frac(abs(backdrop_pos - world_bounds.p0) / (world_bounds.p1 - world_bounds.p0)); + Vec2 samp_uv = lerp(frame.backdrop_src_slice_uv.p0, frame.backdrop_src_slice_uv.p1, samp_t); + + result = bd_up[samp_uv * countof(bd_up)]; + } + else + { + // 5-tap sample + Struct(SampleDesc) { Vec2 uv; f32 weight; }; + SampleDesc samples[] = { + { bd_uv + Vec2(0, 0), 0.5 }, + { bd_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 }, + { bd_uv + Vec2(off_uv.x, -off_uv.y), 0.125 }, + { bd_uv + Vec2(off_uv.x, off_uv.y), 0.125 }, + { bd_uv + Vec2(-off_uv.x, off_uv.y), 0.125 }, + }; + for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx) + { + SampleDesc desc = samples[sample_idx]; + result += bd_up.SampleLevel(sampler, desc.uv, 0) * desc.weight; + } + } + + if (IsInside(bd_pos, down_dims)) + { + bd_down[bd_pos] = result; + } +} + +////////////////////////////// +//- Upsample + +ImplComputeShader2D(V_BackdropUpCS) +{ + i32 mip_idx = V_GpuConst_MipIdx; + i32 is_last_pass = mip_idx == 0; + + V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; + Texture2D bd_down = G_Dereference(frame.backdrop_mips_ro[mip_idx + 1]); + RWTexture2D bd_up = G_Dereference(frame.backdrop_mips_rw[mip_idx]); + SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); + + Vec2 down_dims = countof(bd_down); + Vec2 up_dims = countof(bd_up); + + Vec2 bd_pos = SV_DispatchThreadID + 0.5; + Vec2 bd_uv = bd_pos / up_dims; + Vec2 off_inner_uv = 1 / down_dims; + Vec2 off_outer_uv = off_inner_uv * 2; + + // 13-tap sample + Vec4 result = 0; + if (is_last_pass) + { + result = bd_down.SampleLevel(sampler, bd_uv, 0); + } + else + { + // Center + result += bd_down.SampleLevel(sampler, bd_uv, 0) * 9.0f / 41.0f; + + // Outer Edges + result += ( + bd_down.SampleLevel(sampler, bd_uv + Vec2(0, -off_outer_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(off_outer_uv.x, 0), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(0, off_outer_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(-off_outer_uv.x, 0), 0) + ) * 3.0f / 41.0f; + + // Inner corners + result += ( + bd_down.SampleLevel(sampler, bd_uv + Vec2(-off_inner_uv.x, -off_inner_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(off_inner_uv.x, -off_inner_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(off_inner_uv.x, off_inner_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(-off_inner_uv.x, off_inner_uv.y), 0) + ) * 4.0f / 41.0f; + + // Outer corners + result += ( + bd_down.SampleLevel(sampler, bd_uv + Vec2(-off_outer_uv.x, -off_outer_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(off_outer_uv.x, -off_outer_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(off_outer_uv.x, off_outer_uv.y), 0) + + bd_down.SampleLevel(sampler, bd_uv + Vec2(-off_outer_uv.x, off_outer_uv.y), 0) + ) * 1.0f / 41.0f; + } + + if (IsInside(bd_pos, up_dims)) + { + bd_up[bd_pos] = result; + } +} + //////////////////////////////////////////////////////////// //~ Quads @@ -585,13 +721,15 @@ ImplComputeShader2D(V_CompositeCS) { V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; // Texture2D shade_tex = G_Dereference(frame.shade_ro); - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState point_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); + SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); Texture2D albedo_tex = G_Dereference(frame.albedo_ro); RWTexture2D screen_tex = G_Dereference(frame.screen_rw); RWTexture2D stains = G_Dereference(frame.stains); RWTexture2D dry_stains = G_Dereference(frame.dry_stains); RWTexture2D drynesses = G_Dereference(frame.drynesses); Texture2D tiles = G_Dereference(frame.tiles); + Texture2D backdrop = G_Dereference(frame.backdrop_mips_ro[0]); RWStructuredBuffer particles = G_Dereference(frame.particles); Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5; @@ -600,6 +738,8 @@ ImplComputeShader2D(V_CompositeCS) Vec2 shade_pos = mul(frame.af.screen_to_shade, Vec3(screen_pos.xy, 1)); Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1)); + Vec2 screen_uv = screen_pos / frame.screen_dims; + Rng2 world_bounds = { Vec2(-P_WorldPitch, -P_WorldPitch) * 0.5, Vec2(P_WorldPitch, P_WorldPitch) * 0.5 }; Vec2 world_bounds_screen_p0 = mul(frame.af.world_to_screen, Vec3(world_bounds.p0, 1)); Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(world_bounds.p1, 1)); @@ -612,22 +752,11 @@ ImplComputeShader2D(V_CompositeCS) ////////////////////////////// //- Backdrop color - Vec4 backdrop_color = Vec4(0.025, 0.025, 0.025, 1); + Vec4 backdrop_color = 0; { - // if (!frame.is_editing) - if (1) + if (!frame.is_editing) { - Texture2D backdrop_tex = G_Dereference(frame.backdrop); - - f32 parallax = 2; - Vec2 cam_center = frame.camera_pos + frame.screen_dims * 0.5; - Vec2 backdrop_pos = lerp(cam_center, world_pos, parallax); - - Vec2 samp_t = frac(abs(backdrop_pos - world_bounds.p0) / (world_bounds.p1 - world_bounds.p0)); - samp_t = clamp(samp_t, 0.00001, 1.0 - 0.00001); - - Vec2 samp_uv = lerp(frame.backdrop_slice_uv.p0, frame.backdrop_slice_uv.p1, samp_t); - backdrop_color = backdrop_tex.SampleLevel(sampler, samp_uv, 0); + backdrop_color = backdrop.SampleLevel(bilinear_sampler, screen_uv, 0); } else if (is_in_world) { @@ -648,7 +777,10 @@ ImplComputeShader2D(V_CompositeCS) } backdrop_color = colors[color_idx]; } - + else + { + backdrop_color = Vec4(0.025, 0.025, 0.025, 1); + } backdrop_color.rgb *= backdrop_color.a; } @@ -709,7 +841,7 @@ ImplComputeShader2D(V_CompositeCS) Texture2D tile_tex = G_Dereference(tile_desc.tex); Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001); Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t); - tile_color = tile_tex.SampleLevel(sampler, samp_uv, 0); + tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0); } } @@ -987,7 +1119,6 @@ ImplComputeShader2D(V_CompositeCS) ImplComputeShader2D(V_BloomDownCS) { - i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; @@ -1063,7 +1194,6 @@ ImplComputeShader2D(V_BloomDownCS) ImplComputeShader2D(V_BloomUpCS) { - i32 mips_count = V_GpuConst_MipsCount; i32 mip_idx = V_GpuConst_MipIdx; V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh index c0afd030..db690ca8 100644 --- a/src/pp/pp_vis/pp_vis_gpu.gh +++ b/src/pp/pp_vis/pp_vis_gpu.gh @@ -55,6 +55,10 @@ DeclComputeShader2D(V_PrepareShadeCS, 16, 16); DeclComputeShader2D(V_PrepareCellsCS, 16, 16); DeclComputeShader(V_ClearParticlesCS, 256); +//- Backdrop +DeclComputeShader2D(V_BackdropDownCS, 16, 16); +DeclComputeShader2D(V_BackdropUpCS, 16, 16); + //- Quads DeclVertexShader(V_QuadVS, V_QuadPSInput); DeclPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input); diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 6429d2d6..67c625d9 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -341,12 +341,16 @@ Struct(V_SharedFrame) G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT]; - G_Texture2DRef backdrop; - Rng2 backdrop_slice_uv; - V_TileDesc tile_descs[P_TileKind_COUNT]; G_Texture2DRef tiles; + G_Texture2DRef backdrop_src; + Rng2 backdrop_src_slice_uv; + + f32 backdrop_parallax; + G_Texture2DRef backdrop_mips_ro[G_MaxMips]; + G_RWTexture2DRef backdrop_mips_rw[G_MaxMips]; + G_Texture2DRef screen_ro; G_RWTexture2DRef screen_rw; G_Texture2DRef shade_ro;