thresholded & smoothed bloom

This commit is contained in:
jacob 2026-02-15 15:20:46 -06:00
parent 78e9635840
commit 9ca01a920d
11 changed files with 255 additions and 108 deletions

4
src/gpu/gpu.lay generated
View File

@ -13,11 +13,11 @@
////////////////////////////// //////////////////////////////
//- Api //- Api
@IncludeC gpu_shader_core.cgh @IncludeC gpu_shared.cgh
@IncludeC gpu_core.h @IncludeC gpu_core.h
@IncludeC gpu_common.h @IncludeC gpu_common.h
@IncludeG gpu_shader_core.cgh @IncludeG gpu_shared.cgh
@Bootstrap G_Bootstrap @Bootstrap G_Bootstrap
@Bootstrap G_BootstrapCommon @Bootstrap G_BootstrapCommon

View File

@ -475,8 +475,6 @@ Struct(G_RefDesc)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Rasterization types //~ Rasterization types
#define G_MaxRenderTargets 8
Enum(G_RasterMode) Enum(G_RasterMode)
{ {
G_RasterMode_None, G_RasterMode_None,

View File

@ -1129,7 +1129,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
else if (is_texture) else if (is_texture)
{ {
i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1; i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips);
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
d3d_desc.Dimension = ( d3d_desc.Dimension = (
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :

View File

@ -113,6 +113,9 @@ Enum(G_BasicSamplerKind)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Resource countof //~ Resource countof
#define G_MaxMips 16
#define G_MaxRenderTargets 8
#if IsGpu #if IsGpu
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; } template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; } template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }

View File

@ -544,7 +544,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow"));
// Disable warnings // Disable warnings
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-local-type-template-args"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
} }

View File

@ -24,8 +24,9 @@
@ComputeShader V_SimParticlesCS @ComputeShader V_SimParticlesCS
@ComputeShader V_ShadeCS @ComputeShader V_ShadeCS
@ComputeShader V_CompositeCS @ComputeShader V_CompositeCS
@ComputeShader V_BlurDownCS @ComputeShader V_BloomDownCS
@ComputeShader V_BlurUpCS @ComputeShader V_BloomUpCS
@ComputeShader V_PostProcessCS
@VertexShader V_DVertVS @VertexShader V_DVertVS
@PixelShader V_DVertPS @PixelShader V_DVertPS

View File

@ -2620,8 +2620,6 @@ void V_TickForever(WaveLaneCtx *lane)
} }
} }
////////////////////////////// //////////////////////////////
//- Push test explosion //- Push test explosion
@ -2654,6 +2652,7 @@ void V_TickForever(WaveLaneCtx *lane)
// emitter.falloff.min = emitter.falloff.max = 0; // emitter.falloff.min = emitter.falloff.max = 0;
// emitter.count = CeilF32(Kibi(64) * frame->dt); // emitter.count = CeilF32(Kibi(64) * frame->dt);
// emitter.count = CeilF32(Mebi(32) * frame->dt);
// emitter.count = Mebi(16); // emitter.count = Mebi(16);
// emitter.count = Mebi(2); // emitter.count = Mebi(2);
// emitter.count = Kibi(32); // emitter.count = Kibi(32);
@ -4849,15 +4848,28 @@ void V_TickForever(WaveLaneCtx *lane)
frame->screen_dims, frame->screen_dims,
G_Layout_DirectQueue_ShaderReadWrite, G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)), .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
.max_mips = 4 // For bloom pyramid
); );
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx) frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
// Bloom texture
G_ResourceHandle bloom_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1),
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
// .max_mips = 4
.max_mips = 8
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{ {
frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx)); frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
} }
// Albedo texture // Albedo texture
@ -4929,8 +4941,6 @@ void V_TickForever(WaveLaneCtx *lane)
frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter); frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter);
// Upload gpu frame // Upload gpu frame
{
// Gpu frame
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy( G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl, frame->gpu_arena, frame->cl,
StringFromStruct(&frame->shared_frame), StringFromStruct(&frame->shared_frame),
@ -4938,10 +4948,11 @@ void V_TickForever(WaveLaneCtx *lane)
); );
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame); G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
// Set constants // Set initial constants
V_GpuFlag gpu_flags = V_GpuFlag_None;
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame); G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
}
// Sync // Sync
G_DumbGlobalMemorySync(frame->cl); G_DumbGlobalMemorySync(frame->cl);
@ -5024,45 +5035,70 @@ void V_TickForever(WaveLaneCtx *lane)
{ {
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
} }
////////////////////////////// //////////////////////////////
//- Blur passes //- Bloom passes
{ {
// TODO: Limit passes i32 mips_count = G_CountMips(bloom_target);
i32 mips_count = G_CountMips(screen_target);
// Downsample + blur passes G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
G_LogResource(frame->cl, screen_target);
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) //- Downsample + blur passes
for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
{ {
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
if (mip_idx == 0)
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1)); {
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); // Init bloom pyramid from screen target on first pass (prefilter)
gpu_flags |= V_GpuFlag_InitBloom;
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims)); G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
}
else
{
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
}
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
{
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
}
gpu_flags &= ~V_GpuFlag_InitBloom;
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
} }
// Upsample passes //- Upsample passes
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
{ {
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
G_Compute(frame->cl, V_BlurUpCS, V_ThreadGroupSizeFromTexSize(dims)); G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
} }
} }
//////////////////////////////
//- Post process pass
{
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
}
////////////////////////////// //////////////////////////////
//- Debug shapes pass //- Debug shapes pass
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0)); G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
{ {
G_Rasterize( G_Rasterize(
@ -5078,13 +5114,13 @@ void V_TickForever(WaveLaneCtx *lane)
////////////////////////////// //////////////////////////////
//- Finalize screen target //- Finalize screen target
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0)); G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
{ {
Rng2 uv = Zi; Rng2 uv = Zi;
uv.p0 = Vec2FromVec(screen_viewport.p0); uv.p0 = Vec2FromVec(screen_viewport.p0);
uv.p1 = Vec2FromVec(screen_viewport.p1); uv.p1 = Vec2FromVec(screen_viewport.p1);
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims)); uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv); UI_SetRawTexture(vis_box, frame->screen_ro, uv);
} }
} }

View File

@ -62,7 +62,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
} }
} }
result.rgb = saturate(result.rgb + (rand_color - 0.5) * 0.05); result.rgb = result.rgb + (rand_color - 0.5) * 0.05;
// result.a += (rand_alpha - 0.5) * 0.025; // result.a += (rand_alpha - 0.5) * 0.025;
// result.a *= rand_alpha; // result.a *= rand_alpha;
@ -72,6 +72,13 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
return result; return result;
} }
// ACES approximation by Krzysztof Narkowicz
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
Vec3 V_ToneMap(Vec3 v)
{
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Prepare frame //~ Prepare frame
@ -569,7 +576,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro); // Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro); Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_mips_rw[0]); RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells); RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells); RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities); RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
@ -922,7 +929,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
////////////////////////////// //////////////////////////////
//- Compose result //- Compose result
Vec4 result = Vec4(0, 0, 0, 1); Vec4 result = 0;
result = BlendPremul(world_color, result); result = BlendPremul(world_color, result);
result = BlendPremul(overlay_color, result); result = BlendPremul(overlay_color, result);
@ -935,75 +942,157 @@ ComputeShader2D(V_CompositeCS, 8, 8)
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Blur //~ Bloom
ComputeShader2D(V_BlurDownCS, 8, 8) ComputeShader2D(V_BloomDownCS, 8, 8)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]); Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
RWTexture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]); RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Vec2 up_dims = countof(screen_up); Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(screen_down); Vec2 down_dims = countof(bloom_down);
Vec2 blur_pos = SV_DispatchThreadID + 0.5; Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 blur_uv = blur_pos / down_dims; Vec2 bloom_uv = bloom_pos / down_dims;
f32 offset_uv = 0.5 / up_dims; Vec2 off_uv = 0.5 / down_dims;
b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
Struct(SampleDesc) { Vec2 uv; f32 weight; };
SampleDesc samples[] = {
{ bloom_uv + Vec2(0, 0), 0.5 },
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
};
Vec4 result = 0;
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
{
SampleDesc desc = samples[sample_idx];
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
f32 knee_weight = 1;
if (is_first_pass)
{
f32 luminance = LuminanceFromColor(src);
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
f32 bright = max(luminance, max_rgb * 0.5);
if (bright > 0)
{
f32 threshold = 1.0;
f32 knee = 0.5;
f32 over_threshold = max(bright - threshold, 0.0);
f32 ramp = saturate(over_threshold / knee);
knee_weight = (over_threshold * ramp * ramp) / bright;
}
else
{
knee_weight = 0;
}
}
result += src * desc.weight * knee_weight;
}
if (all(bloom_pos >= 0) && all(bloom_pos < down_dims))
{
bloom_down[bloom_pos] = result;
}
}
ComputeShader2D(V_BloomUpCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Vec2 up_dims = countof(bloom_up);
Vec2 down_dims = countof(bloom_down);
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
Vec2 bloom_uv = bloom_pos / up_dims;
Vec2 off_uv = 1 / up_dims;
Vec4 result = 0; Vec4 result = 0;
{ {
Vec4 accum = 0;
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv));
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv));
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv));
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv));
result = accum / 4.0f;
}
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down)))
{
screen_down[blur_pos] = result;
}
}
ComputeShader2D(V_BlurUpCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx + 1]);
RWTexture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Vec2 up_dims = countof(screen_up);
Vec2 down_dims = countof(screen_down);
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
Vec2 blur_uv = blur_pos / up_dims;
f32 offset_uv = 1 / down_dims;
Vec4 result = 0;
// Center // Center
result += screen_down.Sample(sampler, blur_uv) * 4; result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
// Edges // Edges
result += screen_down.Sample(sampler, blur_uv + Vec2(0, -offset_uv)) * 2; result += (
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, 0)) * 2; bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
result += screen_down.Sample(sampler, blur_uv + Vec2(0, offset_uv)) * 2; bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, 0)) * 2; bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
) * 2;
// Corners // Corners
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv)); result += (
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv)); bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv)); bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv)); bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
);
// Normalize // Normalize
result /= 16; result /= 16;
}
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_up))) if (all(bloom_pos >= 0) && all(bloom_pos < up_dims))
{ {
screen_up[blur_pos] += result; bloom_up[bloom_pos] += result;
}
}
////////////////////////////////////////////////////////////
//~ Post process
ComputeShader2D(V_PostProcessCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips_ro[0]);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
Vec2 screen_uv = screen_pos / frame.screen_dims;
b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < frame.screen_dims);
//////////////////////////////
//- Original
Vec4 original = 0;
if (is_in_screen)
{
original = screen_tex[screen_pos];
original.rgb *= original.a;
}
//////////////////////////////
//- Bloom
Vec4 bloom = 0;
if (is_in_screen)
{
bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
// bloom.rgb *= bloom.a;
}
//////////////////////////////
//- Compose
Vec4 result = Vec4(0, 0, 0, 1);
result = BlendPremul(original, result);
result += bloom;
// result.rgb = V_ToneMap(result);
result = Unpremul(result);
if (is_in_screen)
{
screen_tex[screen_pos] = result;
} }
} }

View File

@ -46,6 +46,7 @@ Struct(V_DVertPSOutput)
f32 V_RandFromPos(Vec3 pos); f32 V_RandFromPos(Vec3 pos);
Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness); Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness);
Vec3 V_ToneMap(Vec3 v);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shaders //~ Shaders
@ -68,9 +69,12 @@ ComputeShader2D(V_ShadeCS, 8, 8);
//- Composite //- Composite
ComputeShader2D(V_CompositeCS, 8, 8); ComputeShader2D(V_CompositeCS, 8, 8);
//- Blur //- Bloom
ComputeShader2D(V_BlurDownCS, 8, 8); ComputeShader2D(V_BloomDownCS, 8, 8);
ComputeShader2D(V_BlurUpCS, 8, 8); ComputeShader2D(V_BloomUpCS, 8, 8);
//- Post process
ComputeShader2D(V_PostProcessCS, 8, 8);
//- Debug shapes //- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput); VertexShader(V_DVertVS, V_DVertPSInput);

View File

@ -4,11 +4,22 @@
// #define V_ParticlesCap Mebi(16) // #define V_ParticlesCap Mebi(16)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ State types //~ Constant types
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0); Enum(V_GpuFlag)
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1); {
G_DeclConstant(u32, V_GpuConst_MipIdx, 2); V_GpuFlag_None = 0,
V_GpuFlag_InitBloom = (1 << 0),
};
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
////////////////////////////////////////////////////////////
//~ State types
Struct(V_TileDesc) Struct(V_TileDesc)
{ {
@ -136,13 +147,16 @@ Struct(V_SharedFrame)
V_TileDesc tile_descs[P_TileKind_COUNT]; V_TileDesc tile_descs[P_TileKind_COUNT];
G_Texture2DRef tiles; G_Texture2DRef tiles;
G_Texture2DRef screen_mips_ro[16]; G_Texture2DRef screen_ro;
G_RWTexture2DRef screen_mips_rw[16]; G_RWTexture2DRef screen_rw;
G_Texture2DRef shade_ro; G_Texture2DRef shade_ro;
G_RWTexture2DRef shade_rw; G_RWTexture2DRef shade_rw;
G_Texture2DRef albedo_ro; G_Texture2DRef albedo_ro;
G_RWTexture2DRef albedo_rw; G_RWTexture2DRef albedo_rw;
G_Texture2DRef bloom_mips_ro[G_MaxMips];
G_RWTexture2DRef bloom_mips_rw[G_MaxMips];
u32 emitters_count; u32 emitters_count;
G_StructuredBufferRef emitters; G_StructuredBufferRef emitters;
G_RWStructuredBufferRef particles; G_RWStructuredBufferRef particles;
@ -214,7 +228,7 @@ Enum(V_ParticleFlag)
/* Name */ Debris, \ /* Name */ Debris, \
/* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \ /* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \
/* Stain rate, pen chance */ 0, 0, \ /* Stain rate, pen chance */ 0, 0, \
/* Base color */ 1, 0.5, 0, 1 \ /* Base color */ 2.0, 0.5, 0, 1 \
) \ ) \
\ \
/* Air particles */ \ /* Air particles */ \

View File

@ -1735,7 +1735,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
); );
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams); G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams);
// Constants // Initial constants
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro); G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
// Sync // Sync