thresholded & smoothed bloom
This commit is contained in:
parent
78e9635840
commit
9ca01a920d
4
src/gpu/gpu.lay
generated
4
src/gpu/gpu.lay
generated
@ -13,11 +13,11 @@
|
|||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Api
|
//- Api
|
||||||
|
|
||||||
@IncludeC gpu_shader_core.cgh
|
@IncludeC gpu_shared.cgh
|
||||||
@IncludeC gpu_core.h
|
@IncludeC gpu_core.h
|
||||||
@IncludeC gpu_common.h
|
@IncludeC gpu_common.h
|
||||||
|
|
||||||
@IncludeG gpu_shader_core.cgh
|
@IncludeG gpu_shared.cgh
|
||||||
|
|
||||||
@Bootstrap G_Bootstrap
|
@Bootstrap G_Bootstrap
|
||||||
@Bootstrap G_BootstrapCommon
|
@Bootstrap G_BootstrapCommon
|
||||||
|
|||||||
@ -475,8 +475,6 @@ Struct(G_RefDesc)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Rasterization types
|
//~ Rasterization types
|
||||||
|
|
||||||
#define G_MaxRenderTargets 8
|
|
||||||
|
|
||||||
Enum(G_RasterMode)
|
Enum(G_RasterMode)
|
||||||
{
|
{
|
||||||
G_RasterMode_None,
|
G_RasterMode_None,
|
||||||
|
|||||||
@ -1129,7 +1129,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
|||||||
else if (is_texture)
|
else if (is_texture)
|
||||||
{
|
{
|
||||||
i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
|
i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z);
|
||||||
i32 max_mips = FloorF32(Log2F32(largest_dim)) + 1;
|
i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips);
|
||||||
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
|
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
|
||||||
d3d_desc.Dimension = (
|
d3d_desc.Dimension = (
|
||||||
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
||||||
|
|||||||
@ -113,6 +113,9 @@ Enum(G_BasicSamplerKind)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Resource countof
|
//~ Resource countof
|
||||||
|
|
||||||
|
#define G_MaxMips 16
|
||||||
|
#define G_MaxRenderTargets 8
|
||||||
|
|
||||||
#if IsGpu
|
#if IsGpu
|
||||||
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
|
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
|
||||||
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
|
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
|
||||||
@ -544,7 +544,9 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
|
|||||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow"));
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wshadow"));
|
||||||
|
|
||||||
// Disable warnings
|
// Disable warnings
|
||||||
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-local-type-template-args"));
|
||||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable"));
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-variable"));
|
||||||
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
|
||||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
|
||||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
|
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
|
||||||
}
|
}
|
||||||
|
|||||||
5
src/pp/pp_vis/pp_vis.lay
generated
5
src/pp/pp_vis/pp_vis.lay
generated
@ -24,8 +24,9 @@
|
|||||||
@ComputeShader V_SimParticlesCS
|
@ComputeShader V_SimParticlesCS
|
||||||
@ComputeShader V_ShadeCS
|
@ComputeShader V_ShadeCS
|
||||||
@ComputeShader V_CompositeCS
|
@ComputeShader V_CompositeCS
|
||||||
@ComputeShader V_BlurDownCS
|
@ComputeShader V_BloomDownCS
|
||||||
@ComputeShader V_BlurUpCS
|
@ComputeShader V_BloomUpCS
|
||||||
|
@ComputeShader V_PostProcessCS
|
||||||
@VertexShader V_DVertVS
|
@VertexShader V_DVertVS
|
||||||
@PixelShader V_DVertPS
|
@PixelShader V_DVertPS
|
||||||
|
|
||||||
|
|||||||
@ -2620,8 +2620,6 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Push test explosion
|
//- Push test explosion
|
||||||
|
|
||||||
@ -2654,6 +2652,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
// emitter.falloff.min = emitter.falloff.max = 0;
|
// emitter.falloff.min = emitter.falloff.max = 0;
|
||||||
|
|
||||||
// emitter.count = CeilF32(Kibi(64) * frame->dt);
|
// emitter.count = CeilF32(Kibi(64) * frame->dt);
|
||||||
|
// emitter.count = CeilF32(Mebi(32) * frame->dt);
|
||||||
// emitter.count = Mebi(16);
|
// emitter.count = Mebi(16);
|
||||||
// emitter.count = Mebi(2);
|
// emitter.count = Mebi(2);
|
||||||
// emitter.count = Kibi(32);
|
// emitter.count = Kibi(32);
|
||||||
@ -4849,15 +4848,28 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
frame->screen_dims,
|
frame->screen_dims,
|
||||||
G_Layout_DirectQueue_ShaderReadWrite,
|
G_Layout_DirectQueue_ShaderReadWrite,
|
||||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)),
|
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
|
||||||
.max_mips = 4 // For bloom pyramid
|
|
||||||
);
|
);
|
||||||
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
|
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
|
||||||
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
|
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
|
||||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(screen_target); ++mip_idx)
|
frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
|
||||||
|
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
|
||||||
|
|
||||||
|
// Bloom texture
|
||||||
|
G_ResourceHandle bloom_target = G_PushTexture2D(
|
||||||
|
frame->gpu_arena, frame->cl,
|
||||||
|
G_Format_R16G16B16A16_Float,
|
||||||
|
G_DimsFromMip2D(G_Count2D(screen_target), 1),
|
||||||
|
G_Layout_DirectQueue_ShaderReadWrite,
|
||||||
|
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||||
|
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
|
||||||
|
// .max_mips = 4
|
||||||
|
.max_mips = 8
|
||||||
|
);
|
||||||
|
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
|
||||||
{
|
{
|
||||||
frame->screen_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
|
frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||||
frame->screen_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, screen_target, .mips = RNGI32(mip_idx, mip_idx));
|
frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Albedo texture
|
// Albedo texture
|
||||||
@ -4929,8 +4941,6 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter);
|
frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter);
|
||||||
|
|
||||||
// Upload gpu frame
|
// Upload gpu frame
|
||||||
{
|
|
||||||
// Gpu frame
|
|
||||||
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
|
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
|
||||||
frame->gpu_arena, frame->cl,
|
frame->gpu_arena, frame->cl,
|
||||||
StringFromStruct(&frame->shared_frame),
|
StringFromStruct(&frame->shared_frame),
|
||||||
@ -4938,10 +4948,11 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
);
|
);
|
||||||
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
|
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
|
||||||
|
|
||||||
// Set constants
|
// Set initial constants
|
||||||
|
V_GpuFlag gpu_flags = V_GpuFlag_None;
|
||||||
|
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||||
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
|
G_SetConstant(frame->cl, V_GpuConst_Frame, gpu_frame);
|
||||||
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
|
G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
|
||||||
}
|
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
G_DumbGlobalMemorySync(frame->cl);
|
G_DumbGlobalMemorySync(frame->cl);
|
||||||
@ -5024,45 +5035,70 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
|
|
||||||
{
|
{
|
||||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||||
|
|
||||||
|
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Blur passes
|
//- Bloom passes
|
||||||
|
|
||||||
{
|
{
|
||||||
// TODO: Limit passes
|
i32 mips_count = G_CountMips(bloom_target);
|
||||||
i32 mips_count = G_CountMips(screen_target);
|
|
||||||
|
|
||||||
// Downsample + blur passes
|
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||||
G_LogResource(frame->cl, screen_target);
|
|
||||||
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
|
//- Downsample + blur passes
|
||||||
|
for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
|
||||||
{
|
{
|
||||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||||
|
if (mip_idx == 0)
|
||||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
|
{
|
||||||
// G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
// Init bloom pyramid from screen target on first pass (prefilter)
|
||||||
|
gpu_flags |= V_GpuFlag_InitBloom;
|
||||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||||
G_Compute(frame->cl, V_BlurDownCS, V_ThreadGroupSizeFromTexSize(dims));
|
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
|
||||||
|
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
|
||||||
|
}
|
||||||
|
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||||
|
{
|
||||||
|
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||||
|
}
|
||||||
|
gpu_flags &= ~V_GpuFlag_InitBloom;
|
||||||
|
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upsample passes
|
//- Upsample passes
|
||||||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
|
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
|
||||||
{
|
{
|
||||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||||
|
|
||||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
||||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
|
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
|
||||||
|
|
||||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
|
||||||
G_Compute(frame->cl, V_BlurUpCS, V_ThreadGroupSizeFromTexSize(dims));
|
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||||
|
|
||||||
|
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////
|
||||||
|
//- Post process pass
|
||||||
|
|
||||||
|
{
|
||||||
|
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
|
||||||
|
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
|
||||||
|
G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Debug shapes pass
|
//- Debug shapes pass
|
||||||
|
|
||||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite, .mips = RNGI32(0, 0));
|
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||||
|
|
||||||
{
|
{
|
||||||
G_Rasterize(
|
G_Rasterize(
|
||||||
@ -5078,13 +5114,13 @@ void V_TickForever(WaveLaneCtx *lane)
|
|||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Finalize screen target
|
//- Finalize screen target
|
||||||
|
|
||||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
|
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||||
{
|
{
|
||||||
Rng2 uv = Zi;
|
Rng2 uv = Zi;
|
||||||
uv.p0 = Vec2FromVec(screen_viewport.p0);
|
uv.p0 = Vec2FromVec(screen_viewport.p0);
|
||||||
uv.p1 = Vec2FromVec(screen_viewport.p1);
|
uv.p1 = Vec2FromVec(screen_viewport.p1);
|
||||||
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
|
uv = DivRng2Vec2(uv, Vec2FromVec(frame->screen_dims));
|
||||||
UI_SetRawTexture(vis_box, frame->screen_mips_ro[0], uv);
|
UI_SetRawTexture(vis_box, frame->screen_ro, uv);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -62,7 +62,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
result.rgb = saturate(result.rgb + (rand_color - 0.5) * 0.05);
|
result.rgb = result.rgb + (rand_color - 0.5) * 0.05;
|
||||||
// result.a += (rand_alpha - 0.5) * 0.025;
|
// result.a += (rand_alpha - 0.5) * 0.025;
|
||||||
// result.a *= rand_alpha;
|
// result.a *= rand_alpha;
|
||||||
|
|
||||||
@ -72,6 +72,13 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ACES approximation by Krzysztof Narkowicz
|
||||||
|
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
|
||||||
|
Vec3 V_ToneMap(Vec3 v)
|
||||||
|
{
|
||||||
|
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Prepare frame
|
//~ Prepare frame
|
||||||
|
|
||||||
@ -569,7 +576,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
|||||||
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
|
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||||
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
|
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
|
||||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_mips_rw[0]);
|
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
||||||
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
|
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
|
||||||
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
|
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
|
||||||
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
|
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
|
||||||
@ -922,7 +929,7 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
|||||||
//////////////////////////////
|
//////////////////////////////
|
||||||
//- Compose result
|
//- Compose result
|
||||||
|
|
||||||
Vec4 result = Vec4(0, 0, 0, 1);
|
Vec4 result = 0;
|
||||||
result = BlendPremul(world_color, result);
|
result = BlendPremul(world_color, result);
|
||||||
result = BlendPremul(overlay_color, result);
|
result = BlendPremul(overlay_color, result);
|
||||||
|
|
||||||
@ -935,75 +942,157 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
|||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Blur
|
//~ Bloom
|
||||||
|
|
||||||
ComputeShader2D(V_BlurDownCS, 8, 8)
|
ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
Texture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx - 1]);
|
Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||||
RWTexture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
|
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
|
|
||||||
Vec2 up_dims = countof(screen_up);
|
Vec2 up_dims = countof(bloom_up);
|
||||||
Vec2 down_dims = countof(screen_down);
|
Vec2 down_dims = countof(bloom_down);
|
||||||
|
|
||||||
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
|
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||||
Vec2 blur_uv = blur_pos / down_dims;
|
Vec2 bloom_uv = bloom_pos / down_dims;
|
||||||
f32 offset_uv = 0.5 / up_dims;
|
Vec2 off_uv = 0.5 / down_dims;
|
||||||
|
b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
|
||||||
|
|
||||||
|
Struct(SampleDesc) { Vec2 uv; f32 weight; };
|
||||||
|
SampleDesc samples[] = {
|
||||||
|
{ bloom_uv + Vec2(0, 0), 0.5 },
|
||||||
|
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
|
||||||
|
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
|
||||||
|
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
|
||||||
|
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
|
||||||
|
};
|
||||||
|
|
||||||
Vec4 result = 0;
|
Vec4 result = 0;
|
||||||
|
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
|
||||||
{
|
{
|
||||||
Vec4 accum = 0;
|
SampleDesc desc = samples[sample_idx];
|
||||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv));
|
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
|
||||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv));
|
|
||||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv));
|
f32 knee_weight = 1;
|
||||||
accum += screen_up.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv));
|
if (is_first_pass)
|
||||||
result = accum / 4.0f;
|
{
|
||||||
|
f32 luminance = LuminanceFromColor(src);
|
||||||
|
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
|
||||||
|
f32 bright = max(luminance, max_rgb * 0.5);
|
||||||
|
if (bright > 0)
|
||||||
|
{
|
||||||
|
f32 threshold = 1.0;
|
||||||
|
f32 knee = 0.5;
|
||||||
|
f32 over_threshold = max(bright - threshold, 0.0);
|
||||||
|
f32 ramp = saturate(over_threshold / knee);
|
||||||
|
knee_weight = (over_threshold * ramp * ramp) / bright;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
knee_weight = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_down)))
|
result += src * desc.weight * knee_weight;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (all(bloom_pos >= 0) && all(bloom_pos < down_dims))
|
||||||
{
|
{
|
||||||
screen_down[blur_pos] = result;
|
bloom_down[bloom_pos] = result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ComputeShader2D(V_BlurUpCS, 8, 8)
|
ComputeShader2D(V_BloomUpCS, 8, 8)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
Texture2D<Vec4> screen_down = G_Dereference<Vec4>(frame.screen_mips_ro[V_GpuConst_MipIdx + 1]);
|
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||||
RWTexture2D<Vec4> screen_up = G_Dereference<Vec4>(frame.screen_mips_rw[V_GpuConst_MipIdx]);
|
RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
|
|
||||||
Vec2 up_dims = countof(screen_up);
|
Vec2 up_dims = countof(bloom_up);
|
||||||
Vec2 down_dims = countof(screen_down);
|
Vec2 down_dims = countof(bloom_down);
|
||||||
|
|
||||||
Vec2 blur_pos = SV_DispatchThreadID + 0.5;
|
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||||
Vec2 blur_uv = blur_pos / up_dims;
|
Vec2 bloom_uv = bloom_pos / up_dims;
|
||||||
f32 offset_uv = 1 / down_dims;
|
Vec2 off_uv = 1 / up_dims;
|
||||||
|
|
||||||
Vec4 result = 0;
|
Vec4 result = 0;
|
||||||
|
{
|
||||||
// Center
|
// Center
|
||||||
result += screen_down.Sample(sampler, blur_uv) * 4;
|
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
|
||||||
|
|
||||||
// Edges
|
// Edges
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(0, -offset_uv)) * 2;
|
result += (
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, 0)) * 2;
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(0, offset_uv)) * 2;
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, 0)) * 2;
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
|
||||||
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
|
||||||
|
) * 2;
|
||||||
// Corners
|
// Corners
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, -offset_uv));
|
result += (
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, -offset_uv));
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(offset_uv, offset_uv));
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
|
||||||
result += screen_down.Sample(sampler, blur_uv + Vec2(-offset_uv, offset_uv));
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
|
||||||
|
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
|
||||||
|
);
|
||||||
// Normalize
|
// Normalize
|
||||||
result /= 16;
|
result /= 16;
|
||||||
|
}
|
||||||
|
|
||||||
if (all(blur_pos >= 0) && all(blur_pos < countof(screen_up)))
|
if (all(bloom_pos >= 0) && all(bloom_pos < up_dims))
|
||||||
{
|
{
|
||||||
screen_up[blur_pos] += result;
|
bloom_up[bloom_pos] += result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
//~ Post process
|
||||||
|
|
||||||
|
ComputeShader2D(V_PostProcessCS, 8, 8)
|
||||||
|
{
|
||||||
|
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
|
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
|
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips_ro[0]);
|
||||||
|
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
||||||
|
|
||||||
|
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||||
|
Vec2 screen_uv = screen_pos / frame.screen_dims;
|
||||||
|
b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < frame.screen_dims);
|
||||||
|
|
||||||
|
//////////////////////////////
|
||||||
|
//- Original
|
||||||
|
|
||||||
|
Vec4 original = 0;
|
||||||
|
if (is_in_screen)
|
||||||
|
{
|
||||||
|
original = screen_tex[screen_pos];
|
||||||
|
original.rgb *= original.a;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//////////////////////////////
|
||||||
|
//- Bloom
|
||||||
|
|
||||||
|
Vec4 bloom = 0;
|
||||||
|
if (is_in_screen)
|
||||||
|
{
|
||||||
|
bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
|
||||||
|
// bloom.rgb *= bloom.a;
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////
|
||||||
|
//- Compose
|
||||||
|
|
||||||
|
Vec4 result = Vec4(0, 0, 0, 1);
|
||||||
|
result = BlendPremul(original, result);
|
||||||
|
result += bloom;
|
||||||
|
// result.rgb = V_ToneMap(result);
|
||||||
|
|
||||||
|
result = Unpremul(result);
|
||||||
|
|
||||||
|
if (is_in_screen)
|
||||||
|
{
|
||||||
|
screen_tex[screen_pos] = result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -46,6 +46,7 @@ Struct(V_DVertPSOutput)
|
|||||||
|
|
||||||
f32 V_RandFromPos(Vec3 pos);
|
f32 V_RandFromPos(Vec3 pos);
|
||||||
Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness);
|
Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 density, f32 dryness);
|
||||||
|
Vec3 V_ToneMap(Vec3 v);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Shaders
|
//~ Shaders
|
||||||
@ -68,9 +69,12 @@ ComputeShader2D(V_ShadeCS, 8, 8);
|
|||||||
//- Composite
|
//- Composite
|
||||||
ComputeShader2D(V_CompositeCS, 8, 8);
|
ComputeShader2D(V_CompositeCS, 8, 8);
|
||||||
|
|
||||||
//- Blur
|
//- Bloom
|
||||||
ComputeShader2D(V_BlurDownCS, 8, 8);
|
ComputeShader2D(V_BloomDownCS, 8, 8);
|
||||||
ComputeShader2D(V_BlurUpCS, 8, 8);
|
ComputeShader2D(V_BloomUpCS, 8, 8);
|
||||||
|
|
||||||
|
//- Post process
|
||||||
|
ComputeShader2D(V_PostProcessCS, 8, 8);
|
||||||
|
|
||||||
//- Debug shapes
|
//- Debug shapes
|
||||||
VertexShader(V_DVertVS, V_DVertPSInput);
|
VertexShader(V_DVertVS, V_DVertPSInput);
|
||||||
|
|||||||
@ -4,11 +4,22 @@
|
|||||||
// #define V_ParticlesCap Mebi(16)
|
// #define V_ParticlesCap Mebi(16)
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ State types
|
//~ Constant types
|
||||||
|
|
||||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 0);
|
Enum(V_GpuFlag)
|
||||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 1);
|
{
|
||||||
G_DeclConstant(u32, V_GpuConst_MipIdx, 2);
|
V_GpuFlag_None = 0,
|
||||||
|
V_GpuFlag_InitBloom = (1 << 0),
|
||||||
|
};
|
||||||
|
|
||||||
|
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
|
||||||
|
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
|
||||||
|
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
|
||||||
|
G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
|
||||||
|
G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////
|
||||||
|
//~ State types
|
||||||
|
|
||||||
Struct(V_TileDesc)
|
Struct(V_TileDesc)
|
||||||
{
|
{
|
||||||
@ -136,13 +147,16 @@ Struct(V_SharedFrame)
|
|||||||
V_TileDesc tile_descs[P_TileKind_COUNT];
|
V_TileDesc tile_descs[P_TileKind_COUNT];
|
||||||
G_Texture2DRef tiles;
|
G_Texture2DRef tiles;
|
||||||
|
|
||||||
G_Texture2DRef screen_mips_ro[16];
|
G_Texture2DRef screen_ro;
|
||||||
G_RWTexture2DRef screen_mips_rw[16];
|
G_RWTexture2DRef screen_rw;
|
||||||
G_Texture2DRef shade_ro;
|
G_Texture2DRef shade_ro;
|
||||||
G_RWTexture2DRef shade_rw;
|
G_RWTexture2DRef shade_rw;
|
||||||
G_Texture2DRef albedo_ro;
|
G_Texture2DRef albedo_ro;
|
||||||
G_RWTexture2DRef albedo_rw;
|
G_RWTexture2DRef albedo_rw;
|
||||||
|
|
||||||
|
G_Texture2DRef bloom_mips_ro[G_MaxMips];
|
||||||
|
G_RWTexture2DRef bloom_mips_rw[G_MaxMips];
|
||||||
|
|
||||||
u32 emitters_count;
|
u32 emitters_count;
|
||||||
G_StructuredBufferRef emitters;
|
G_StructuredBufferRef emitters;
|
||||||
G_RWStructuredBufferRef particles;
|
G_RWStructuredBufferRef particles;
|
||||||
@ -214,7 +228,7 @@ Enum(V_ParticleFlag)
|
|||||||
/* Name */ Debris, \
|
/* Name */ Debris, \
|
||||||
/* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \
|
/* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \
|
||||||
/* Stain rate, pen chance */ 0, 0, \
|
/* Stain rate, pen chance */ 0, 0, \
|
||||||
/* Base color */ 1, 0.5, 0, 1 \
|
/* Base color */ 2.0, 0.5, 0, 1 \
|
||||||
) \
|
) \
|
||||||
\
|
\
|
||||||
/* Air particles */ \
|
/* Air particles */ \
|
||||||
|
|||||||
@ -1735,7 +1735,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
|||||||
);
|
);
|
||||||
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams);
|
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_GpuParams);
|
||||||
|
|
||||||
// Constants
|
// Initial constants
|
||||||
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
|
G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro);
|
||||||
|
|
||||||
// Sync
|
// Sync
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user