thresholded & smoothed bloom
This commit is contained in:
parent
78e9635840
commit
9ca01a920d
4
src/gpu/gpu.lay
generated
4
src/gpu/gpu.lay
generated
@ -13,11 +13,11 @@
|
||||
//////////////////////////////
|
||||
//- Api
|
||||
|
||||
@IncludeC gpu_shader_core.cgh
|
||||
@IncludeC gpu_shared.cgh
|
||||
@IncludeC gpu_core.h
|
||||
@IncludeC gpu_common.h
|
||||
|
||||
@IncludeG gpu_shader_core.cgh
|
||||
@IncludeG gpu_shared.cgh
|
||||
|
||||
@Bootstrap G_Bootstrap
|
||||
@Bootstrap G_BootstrapCommon
|
||||
|
||||
@ -475,8 +475,6 @@ Struct(G_RefDesc)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Rasterization types
|
||||
|
||||
#define G_MaxRenderTargets 8
|
||||
|
||||
Enum(G_RasterMode)
|
||||
{
|
||||
G_RasterMode_None,
|
||||
|
||||
@ -1129,7 +1129,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
else if (is_texture)
|
||||
{
|
||||
i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
|
||||
i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1;
|
||||
i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips);
|
||||
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
|
||||
d3d_desc.Dimension = (
|
||||
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
||||
|
||||
@ -113,6 +113,9 @@ Enum(G_BasicSamplerKind)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Resource countof
|
||||
|
||||
#define G_MaxMips 16
|
||||
#define G_MaxRenderTargets 8
|
||||
|
||||
#if IsGpu
|
||||
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
|
||||
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
|
||||
@ -544,7 +544,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow"));
|
||||
|
||||
// Disable warnings
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-local-type-template-args"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
|
||||
}
|
||||
|
||||
5
src/pp/pp_vis/pp_vis.lay
generated
5
src/pp/pp_vis/pp_vis.lay
generated
@ -24,8 +24,9 @@
|
||||
@ComputeShader V_SimParticlesCS
|
||||
@ComputeShader V_ShadeCS
|
||||
@ComputeShader V_CompositeCS
|
||||
@ComputeShader V_BlurDownCS
|
||||
@ComputeShader V_BlurUpCS
|
||||
@ComputeShader V_BloomDownCS
|
||||
@ComputeShader V_BloomUpCS
|
||||
@ComputeShader V_PostProcessCS
|
||||
@VertexShader V_DVertVS
|
||||
@PixelShader V_DVertPS
|
||||
|
||||
|
||||
@ -2620,8 +2620,6 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////
|
||||
//- Push test explosion
|
||||
|
||||
@ -2654,6 +2652,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
// emitter.falloff.min = emitter.falloff.max = 0;
|
||||
|
||||
// emitter.count = CeilF32(Kibi(64) * frame->dt);
|
||||
// emitter.count = CeilF32(Mebi(32) * frame->dt);
|
||||
// emitter.count = Mebi(16);
|
||||
// emitter.count = Mebi(2);
|
||||
// emitter.count = Kibi(32);
|
||||
@ -4849,15 +4848,28 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->screen_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)),
|
||||
.max_mips = 4 // For bloom pyramid
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
|
||||
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
|
||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx)
|
||||
frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
|
||||
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
|
||||
|
||||
// Bloom texture
|
||||
G_ResourceHandle bloom_target = G_PushTexture2D(
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
G_DimsFromMip2D(G_Count2D(screen_target), 1),
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
|
||||
// .max_mips = 4
|
||||
.max_mips = 8
|
||||
);
|
||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
|
||||
{
|
||||
frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||
frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||
frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||
frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||
}
|
||||
|
||||
// Albedo texture
|
||||
@ -4929,19 +4941,18 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter);
|
||||
|
||||
// Upload gpu frame
|
||||
{
|
||||
// Gpu frame
|
||||
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
|
||||
frame->gpu_arena, frame->cl,
|
||||
StringFromStruct(&frame->shared_frame),
|
||||
.name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
|
||||
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
|
||||
frame->gpu_arena, frame->cl,
|
||||
StringFromStruct(&frame->shared_frame),
|
||||
.name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
|
||||
|
||||
// Set constants
|
||||
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
|
||||
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
|
||||
}
|
||||
// Set initial constants
|
||||
V_GpuFlag gpu_flags = V_GpuFlag_None;
|
||||
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
|
||||
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
|
||||
|
||||
// Sync
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
@ -5024,45 +5035,70 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
|
||||
{
|
||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Blur passes
|
||||
//- Bloom passes
|
||||
|
||||
{
|
||||
// TODO: Limit passes
|
||||
i32 mips_count = G_CountMips(screen_target);
|
||||
i32 mips_count = G_CountMips(bloom_target);
|
||||
|
||||
// Downsample + blur passes
|
||||
G_LogResource(frame->cl, screen_target);
|
||||
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||
|
||||
//- Downsample + blur passes
|
||||
for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
|
||||
{
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
|
||||
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
||||
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||
if (mip_idx == 0)
|
||||
{
|
||||
// Init bloom pyramid from screen target on first pass (prefilter)
|
||||
gpu_flags |= V_GpuFlag_InitBloom;
|
||||
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
|
||||
}
|
||||
else
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
|
||||
}
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||
{
|
||||
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
}
|
||||
gpu_flags &= ~V_GpuFlag_InitBloom;
|
||||
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||
}
|
||||
|
||||
// Upsample passes
|
||||
//- Upsample passes
|
||||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
|
||||
{
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
|
||||
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BlurUpCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||
|
||||
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Post process pass
|
||||
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
|
||||
G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Debug shapes pass
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0));
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||
|
||||
{
|
||||
G_Rasterize(
|
||||
@ -5078,13 +5114,13 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
//////////////////////////////
|
||||
//- Finalize screen target
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||
{
|
||||
Rng2 uv = Zi;
|
||||
uv.p0 = Vec2FromVec(screen_viewport.p0);
|
||||
uv.p1 = Vec2FromVec(screen_viewport.p1);
|
||||
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
|
||||
UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv);
|
||||
UI_SetRawTexture(vis_box, frame->screen_ro, uv);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -62,7 +62,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
|
||||
}
|
||||
}
|
||||
|
||||
result.rgb = saturate(result.rgb + (rand_color - 0.5) * 0.05);
|
||||
result.rgb = result.rgb + (rand_color - 0.5) * 0.05;
|
||||
// result.a += (rand_alpha - 0.5) * 0.025;
|
||||
// result.a *= rand_alpha;
|
||||
|
||||
@ -72,6 +72,13 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
|
||||
return result;
|
||||
}
|
||||
|
||||
// ACES approximation by Krzysztof Narkowicz
|
||||
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
|
||||
Vec3 V_ToneMap(Vec3 v)
|
||||
{
|
||||
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Prepare frame
|
||||
|
||||
@ -569,7 +576,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_mips_rw[0]);
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
||||
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
|
||||
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
|
||||
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
|
||||
@ -922,7 +929,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
//////////////////////////////
|
||||
//- Compose result
|
||||
|
||||
Vec4 result = Vec4(0, 0, 0, 1);
|
||||
Vec4 result = 0;
|
||||
result = BlendPremul(world_color, result);
|
||||
result = BlendPremul(overlay_color, result);
|
||||
|
||||
@ -935,75 +942,157 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Blur
|
||||
//~ Bloom
|
||||
|
||||
ComputeShader2D(V_BlurDownCS, 8, 8)
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]);
|
||||
RWTexture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
|
||||
Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
|
||||
Vec2 up_dims = countof(screen_up);
|
||||
Vec2 down_dims = countof(screen_down);
|
||||
Vec2 up_dims = countof(bloom_up);
|
||||
Vec2 down_dims = countof(bloom_down);
|
||||
|
||||
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 blur_uv = blur_pos / down_dims;
|
||||
f32 offset_uv = 0.5 / up_dims;
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 bloom_uv = bloom_pos / down_dims;
|
||||
Vec2 off_uv = 0.5 / down_dims;
|
||||
b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
|
||||
|
||||
Struct(SampleDesc) { Vec2 uv; f32 weight; };
|
||||
SampleDesc samples[] = {
|
||||
{ bloom_uv + Vec2(0, 0), 0.5 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
|
||||
};
|
||||
|
||||
Vec4 result = 0;
|
||||
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
|
||||
{
|
||||
Vec4 accum = 0;
|
||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv));
|
||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv));
|
||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv));
|
||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv));
|
||||
result = accum / 4.0f;
|
||||
SampleDesc desc = samples[sample_idx];
|
||||
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
|
||||
|
||||
f32 knee_weight = 1;
|
||||
if (is_first_pass)
|
||||
{
|
||||
f32 luminance = LuminanceFromColor(src);
|
||||
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
|
||||
f32 bright = max(luminance, max_rgb * 0.5);
|
||||
if (bright > 0)
|
||||
{
|
||||
f32 threshold = 1.0;
|
||||
f32 knee = 0.5;
|
||||
f32 over_threshold = max(bright - threshold, 0.0);
|
||||
f32 ramp = saturate(over_threshold / knee);
|
||||
knee_weight = (over_threshold * ramp * ramp) / bright;
|
||||
}
|
||||
else
|
||||
{
|
||||
knee_weight = 0;
|
||||
}
|
||||
}
|
||||
|
||||
result += src * desc.weight * knee_weight;
|
||||
}
|
||||
|
||||
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down)))
|
||||
if (all(bloom_pos >= 0) && all(bloom_pos < down_dims))
|
||||
{
|
||||
screen_down[blur_pos] = result;
|
||||
bloom_down[bloom_pos] = result;
|
||||
}
|
||||
}
|
||||
|
||||
ComputeShader2D(V_BlurUpCS, 8, 8)
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx + 1]);
|
||||
RWTexture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
|
||||
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||
RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
|
||||
Vec2 up_dims = countof(screen_up);
|
||||
Vec2 down_dims = countof(screen_down);
|
||||
Vec2 up_dims = countof(bloom_up);
|
||||
Vec2 down_dims = countof(bloom_down);
|
||||
|
||||
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 blur_uv = blur_pos / up_dims;
|
||||
f32 offset_uv = 1 / down_dims;
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 bloom_uv = bloom_pos / up_dims;
|
||||
Vec2 off_uv = 1 / up_dims;
|
||||
|
||||
Vec4 result = 0;
|
||||
|
||||
// Center
|
||||
result += screen_down.Sample(sampler, blur_uv) * 4;
|
||||
|
||||
// Edges
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(0, -offset_uv)) * 2;
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, 0)) * 2;
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(0, offset_uv)) * 2;
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, 0)) * 2;
|
||||
|
||||
// Corners
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv));
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv));
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv));
|
||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv));
|
||||
|
||||
// Normalize
|
||||
result /= 16;
|
||||
|
||||
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_up)))
|
||||
{
|
||||
screen_up[blur_pos] += result;
|
||||
// Center
|
||||
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
|
||||
// Edges
|
||||
result += (
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
|
||||
) * 2;
|
||||
// Corners
|
||||
result += (
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
|
||||
);
|
||||
// Normalize
|
||||
result /= 16;
|
||||
}
|
||||
|
||||
if (all(bloom_pos >= 0) && all(bloom_pos < up_dims))
|
||||
{
|
||||
bloom_up[bloom_pos] += result;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Post process
|
||||
|
||||
ComputeShader2D(V_PostProcessCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips_ro[0]);
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
||||
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 screen_uv = screen_pos / frame.screen_dims;
|
||||
b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < frame.screen_dims);
|
||||
|
||||
//////////////////////////////
|
||||
//- Original
|
||||
|
||||
Vec4 original = 0;
|
||||
if (is_in_screen)
|
||||
{
|
||||
original = screen_tex[screen_pos];
|
||||
original.rgb *= original.a;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////
|
||||
//- Bloom
|
||||
|
||||
Vec4 bloom = 0;
|
||||
if (is_in_screen)
|
||||
{
|
||||
bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
|
||||
// bloom.rgb *= bloom.a;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Compose
|
||||
|
||||
Vec4 result = Vec4(0, 0, 0, 1);
|
||||
result = BlendPremul(original, result);
|
||||
result += bloom;
|
||||
// result.rgb = V_ToneMap(result);
|
||||
|
||||
result = Unpremul(result);
|
||||
|
||||
if (is_in_screen)
|
||||
{
|
||||
screen_tex[screen_pos] = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -46,6 +46,7 @@ Struct(V_DVertPSOutput)
|
||||
|
||||
f32 V_RandFromPos(Vec3 pos);
|
||||
Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness);
|
||||
Vec3 V_ToneMap(Vec3 v);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shaders
|
||||
@ -68,9 +69,12 @@ ComputeShader2D(V_ShadeCS, 8, 8);
|
||||
//- Composite
|
||||
ComputeShader2D(V_CompositeCS, 8, 8);
|
||||
|
||||
//- Blur
|
||||
ComputeShader2D(V_BlurDownCS, 8, 8);
|
||||
ComputeShader2D(V_BlurUpCS, 8, 8);
|
||||
//- Bloom
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8);
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8);
|
||||
|
||||
//- Post process
|
||||
ComputeShader2D(V_PostProcessCS, 8, 8);
|
||||
|
||||
//- Debug shapes
|
||||
VertexShader(V_DVertVS, V_DVertPSInput);
|
||||
|
||||
@ -4,11 +4,22 @@
|
||||
// #define V_ParticlesCap Mebi(16)
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ State types
|
||||
//~ Constant types
|
||||
|
||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0);
|
||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1);
|
||||
G_DeclConstant(u32, V_GpuConst_MipIdx, 2);
|
||||
Enum(V_GpuFlag)
|
||||
{
|
||||
V_GpuFlag_None = 0,
|
||||
V_GpuFlag_InitBloom = (1 << 0),
|
||||
};
|
||||
|
||||
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
|
||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
|
||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
|
||||
G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
|
||||
G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ State types
|
||||
|
||||
Struct(V_TileDesc)
|
||||
{
|
||||
@ -136,13 +147,16 @@ Struct(V_SharedFrame)
|
||||
V_TileDesc tile_descs[P_TileKind_COUNT];
|
||||
G_Texture2DRef tiles;
|
||||
|
||||
G_Texture2DRef screen_mips_ro[16];
|
||||
G_RWTexture2DRef screen_mips_rw[16];
|
||||
G_Texture2DRef screen_ro;
|
||||
G_RWTexture2DRef screen_rw;
|
||||
G_Texture2DRef shade_ro;
|
||||
G_RWTexture2DRef shade_rw;
|
||||
G_Texture2DRef albedo_ro;
|
||||
G_RWTexture2DRef albedo_rw;
|
||||
|
||||
G_Texture2DRef bloom_mips_ro[G_MaxMips];
|
||||
G_RWTexture2DRef bloom_mips_rw[G_MaxMips];
|
||||
|
||||
u32 emitters_count;
|
||||
G_StructuredBufferRef emitters;
|
||||
G_RWStructuredBufferRef particles;
|
||||
@ -214,7 +228,7 @@ Enum(V_ParticleFlag)
|
||||
/* Name */ Debris, \
|
||||
/* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Base color */ 1, 0.5, 0, 1 \
|
||||
/* Base color */ 2.0, 0.5, 0, 1 \
|
||||
) \
|
||||
\
|
||||
/* Air particles */ \
|
||||
|
||||
@ -1735,7 +1735,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
);
|
||||
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams);
|
||||
|
||||
// Constants
|
||||
// Initial constants
|
||||
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
|
||||
|
||||
// Sync
|
||||
|
||||
Loading…
Reference in New Issue
Block a user