927 lines
48 KiB
Plaintext
927 lines
48 KiB
Plaintext
[1mdiff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c[m
|
||
[1mindex a9686d87..43835793 100644[m
|
||
[1m--- a/src/gpu/gpu_common.c[m
|
||
[1m+++ b/src/gpu/gpu_common.c[m
|
||
[36m@@ -25,7 +25,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R8G8B8A8_Uint,[m
|
||
VEC2I32(8, 8),[m
|
||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,[m
|
||
[32m+[m[32m G_Layout_Simultaneous,[m
|
||
.flags = G_ResourceFlag_ZeroMemory[m
|
||
);[m
|
||
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);[m
|
||
[36m@@ -44,7 +44,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R16_Uint,[m
|
||
noise_dims,[m
|
||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present[m
|
||
[32m+[m[32m G_Layout_Simultaneous[m
|
||
);[m
|
||
G_CopyCpuToTexture([m
|
||
cl,[m
|
||
[36m@@ -143,30 +143,54 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||
[m
|
||
//- Mip[m
|
||
[m
|
||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)[m
|
||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)[m
|
||
{[m
|
||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||
i32 result = 0;[m
|
||
[31m- result = MaxI32(result >> mip, 1);[m
|
||
[32m+[m[32m if (mip >= 0)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result = MaxI32(result >> mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result = MaxI32(result << -mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
return result;[m
|
||
}[m
|
||
[m
|
||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)[m
|
||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)[m
|
||
{[m
|
||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||
Vec2I32 result = Zi;[m
|
||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||
[32m+[m[32m if (mip >= 0)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
return result;[m
|
||
}[m
|
||
[m
|
||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)[m
|
||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)[m
|
||
{[m
|
||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||
Vec3I32 result = Zi;[m
|
||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||
[31m- result.z = MaxI32(texture_dims.z >> mip, 1);[m
|
||
[32m+[m[32m if (mip >= 0)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||
[32m+[m[32m result.z = MaxI32(mip0_dims.z >> mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||
[32m+[m[32m result.z = MaxI32(mip0_dims.z << -mip, 1);[m
|
||
[32m+[m[32m }[m
|
||
return result;[m
|
||
}[m
|
||
[m
|
||
[1mdiff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h[m
|
||
[1mindex eb3ee6d2..03927040 100644[m
|
||
[1m--- a/src/gpu/gpu_common.h[m
|
||
[1m+++ b/src/gpu/gpu_common.h[m
|
||
[36m@@ -35,9 +35,9 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })[m
|
||
[m
|
||
//- Mip[m
|
||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);[m
|
||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);[m
|
||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);[m
|
||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);[m
|
||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);[m
|
||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);[m
|
||
[m
|
||
//- Viewport / scissor[m
|
||
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);[m
|
||
[1mdiff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h[m
|
||
[1mindex 7e1b329a..bed18c93 100644[m
|
||
[1m--- a/src/gpu/gpu_core.h[m
|
||
[1m+++ b/src/gpu/gpu_core.h[m
|
||
[36m@@ -242,18 +242,16 @@[m [mEnum(G_Access)[m
|
||
G_Access_IndexBuffer = (1 << 8),[m
|
||
G_Access_IndirectArgument = (1 << 9),[m
|
||
[m
|
||
[31m- G_Access_All = 0xFFFFFFFF[m
|
||
[32m+[m[32m G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage[m
|
||
};[m
|
||
[m
|
||
Enum(G_Layout)[m
|
||
{[m
|
||
G_Layout_NoChange,[m
|
||
[m
|
||
[31m- // "Simultaneous" allows a resource to be used on any queue with any access[m
|
||
[31m- // type, as long as there is only one writer at a time, and the writer is not[m
|
||
[31m- // writing to any texels currently being read.[m
|
||
[31m- // Resources cannot transition to/from this layout. They must be created[m
|
||
[31m- // with it and are locked to it.[m
|
||
[32m+[m[32m // Simultaneous layout allows a resource to be used on any queue with any[m
|
||
[32m+[m[32m // access type (except depth-stencil). Resources cannot transition to/from[m
|
||
[32m+[m[32m // this layout, they must be created with it.[m
|
||
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS[m
|
||
[m
|
||
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay[m
|
||
[1mindex f72dc528..2d916376 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis.lay[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis.lay[m
|
||
[36m@@ -26,7 +26,7 @@[m
|
||
@ComputeShader V_CompositeCS[m
|
||
@ComputeShader V_BloomDownCS[m
|
||
@ComputeShader V_BloomUpCS[m
|
||
[31m-@ComputeShader V_PostProcessCS[m
|
||
[32m+[m[32m@ComputeShader V_FinalizeCS[m
|
||
@VertexShader V_DVertVS[m
|
||
@PixelShader V_DVertPS[m
|
||
[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c[m
|
||
[1mindex f2f5e6b5..338036ba 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis_core.c[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis_core.c[m
|
||
[36m@@ -416,7 +416,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R8_Uint,[m
|
||
tiles_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderRead,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory,[m
|
||
.name = Lit("Tiles")[m
|
||
);[m
|
||
[36m@@ -441,7 +441,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R32_Uint,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))[m
|
||
);[m
|
||
[36m@@ -454,7 +454,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R32_Uint,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))[m
|
||
);[m
|
||
[36m@@ -469,7 +469,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R16G16B16A16_Float,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = Lit("Stains")[m
|
||
);[m
|
||
[36m@@ -481,7 +481,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R16G16B16A16_Float,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = Lit("Dry stains")[m
|
||
);[m
|
||
[36m@@ -493,7 +493,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R32_Float,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = Lit("Drynesses")[m
|
||
);[m
|
||
[36m@@ -505,7 +505,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
gpu_perm, cl,[m
|
||
G_Format_R32_Uint,[m
|
||
cells_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = Lit("Occluders cells")[m
|
||
);[m
|
||
[36m@@ -614,6 +614,8 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
frame->dt = SecondsFromNs(frame->dt_ns);[m
|
||
frame->rand = prev_frame->rand;[m
|
||
[m
|
||
[32m+[m[32m frame->should_tone_map = TweakBool("Tone mapping enabled", 1);[m
|
||
[32m+[m
|
||
if (P_IsEntKeyNil(V.player_key))[m
|
||
{[m
|
||
TrueRand(StringFromStruct(&V.player_key));[m
|
||
[36m@@ -4918,18 +4920,17 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
frame->tile_descs[tile_kind] = tile_desc;[m
|
||
}[m
|
||
}[m
|
||
[32m+[m
|
||
// Upload tiles[m
|
||
if (frame->tiles_dirty)[m
|
||
{[m
|
||
// LogDebugF("Uploading tiles to gpu");[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);[m
|
||
G_CopyCpuToTexture([m
|
||
frame->cl,[m
|
||
gpu_tiles_res, VEC3I32(0, 0, 0),[m
|
||
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),[m
|
||
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))[m
|
||
);[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);[m
|
||
}[m
|
||
[m
|
||
// Screen texture[m
|
||
[36m@@ -4937,7 +4938,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
frame->gpu_arena, frame->cl,[m
|
||
G_Format_R16G16B16A16_Float,[m
|
||
frame->screen_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))[m
|
||
);[m
|
||
[36m@@ -4951,11 +4952,10 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
frame->gpu_arena, frame->cl,[m
|
||
G_Format_R16G16B16A16_Float,[m
|
||
G_DimsFromMip2D(G_Count2D(screen_target), 1),[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),[m
|
||
[31m- // .max_mips = 4[m
|
||
[31m- .max_mips = 8[m
|
||
[32m+[m[32m .max_mips = 64[m
|
||
);[m
|
||
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)[m
|
||
{[m
|
||
[36m@@ -4979,7 +4979,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
frame->gpu_arena, frame->cl,[m
|
||
G_Format_R16G16B16A16_Float,[m
|
||
frame->shade_dims,[m
|
||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||
.flags = G_ResourceFlag_AllowShaderReadWrite,[m
|
||
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))[m
|
||
);[m
|
||
[36m@@ -5091,6 +5091,9 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
[m
|
||
// Sync particles & occluders[m
|
||
G_DumbGlobalMemorySync(frame->cl);[m
|
||
[32m+[m
|
||
[32m+[m[32m // Transition albedo[m
|
||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
[36m@@ -5113,83 +5116,63 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));[m
|
||
}[m
|
||
[m
|
||
[31m- //////////////////////////////[m
|
||
[31m- //- Transition G-buffers to readonly[m
|
||
[31m-[m
|
||
[31m- {[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);[m
|
||
[31m- }[m
|
||
[31m-[m
|
||
//////////////////////////////[m
|
||
//- Composite pass[m
|
||
[m
|
||
{[m
|
||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||
[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||
[32m+[m[32m // Sync screen tex[m
|
||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
//- Bloom passes[m
|
||
[m
|
||
{[m
|
||
[31m- i32 mips_count = G_CountMips(bloom_target);[m
|
||
[32m+[m[32m i32 mips_count = G_CountMips(bloom_target) + 1;[m
|
||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);[m
|
||
[32m+[m
|
||
[32m+[m[32m // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0[m
|
||
[32m+[m[32m // actually represents the screen texture, while mip_idx - 1 represents[m
|
||
[32m+[m[32m // the first mip index in the bloom mip chain[m
|
||
[m
|
||
//- Downsample + blur passes[m
|
||
[31m- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)[m
|
||
[32m+[m[32m for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)[m
|
||
{[m
|
||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||
[31m- if (mip_idx == 0)[m
|
||
[31m- {[m
|
||
[31m- // Init bloom pyramid from screen target on first pass (prefilter)[m
|
||
[31m- gpu_flags |= V_GpuFlag_InitBloom;[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);[m
|
||
[31m- }[m
|
||
[31m- else[m
|
||
[31m- {[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);[m
|
||
[31m- }[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||
[31m- {[m
|
||
[31m- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||
[31m- }[m
|
||
[31m- gpu_flags &= ~V_GpuFlag_InitBloom;[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||
[32m+[m[32m Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||
[32m+[m
|
||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||
[32m+[m[32m G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));[m
|
||
[32m+[m
|
||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||
}[m
|
||
[m
|
||
//- Upsample passes[m
|
||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)[m
|
||
{[m
|
||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||
[31m-[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));[m
|
||
[32m+[m[32m Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||
[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);[m
|
||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||
[32m+[m[32m G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));[m
|
||
[m
|
||
[31m- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||
[31m- }[m
|
||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||
[32m+[m[32m }[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
[31m- //- Post process pass[m
|
||
[32m+[m[32m //- Finalization pass[m
|
||
[m
|
||
{[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));[m
|
||
[31m- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||
[32m+[m[32m G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
//- Debug shapes pass[m
|
||
[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||
[31m-[m
|
||
{[m
|
||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||
[32m+[m
|
||
G_Rasterize([m
|
||
frame->cl,[m
|
||
V_DVertVS, V_DVertPS,[m
|
||
[36m@@ -5198,12 +5181,13 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||
screen_viewport, screen_scissor,[m
|
||
G_RasterMode_TriangleList[m
|
||
);[m
|
||
[32m+[m
|
||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
//- Finalize screen target[m
|
||
[m
|
||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||
{[m
|
||
Rng2 uv = Zi;[m
|
||
uv.p0 = Vec2FromVec(screen_viewport.p0);[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||
[1mindex f8a254de..c0a9e47d 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.g[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||
[36m@@ -53,13 +53,6 @@[m [mVec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)[m
|
||
return result;[m
|
||
}[m
|
||
[m
|
||
[31m-// ACES approximation by Krzysztof Narkowicz[m
|
||
[31m-// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||
[31m-Vec3 V_ToneMap(Vec3 v)[m
|
||
[31m-{[m
|
||
[31m- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));[m
|
||
[31m-}[m
|
||
[31m-[m
|
||
////////////////////////////////////////////////////////////[m
|
||
//~ Prepare frame[m
|
||
[m
|
||
[36m@@ -142,11 +135,11 @@[m [mComputeShader2D(V_PrepareCellsCS, 8, 8)[m
|
||
}[m
|
||
else if (over_stain.a > 0)[m
|
||
{[m
|
||
[31m- Vec4 stain = dry_stains[cell_pos];[m
|
||
Vec4 dry_stain = max(dry_stains[cell_pos], 0);[m
|
||
[32m+[m[32m Vec4 stain = dry_stain;[m
|
||
[m
|
||
[31m- stain = BlendPremul(over_stain, stain);[m
|
||
dry_stain = BlendPremul(over_dry_stain, dry_stain);[m
|
||
[32m+[m[32m stain = BlendPremul(over_stain, stain);[m
|
||
[m
|
||
stains[cell_pos] = stain;[m
|
||
dry_stains[cell_pos] = dry_stain;[m
|
||
[36m@@ -483,7 +476,7 @@[m [mComputeShader(V_SimParticlesCS, 64)[m
|
||
particle.prev_occluder = occluder;[m
|
||
}[m
|
||
[m
|
||
[31m- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)[m
|
||
[32m+[m[32m if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))[m
|
||
{[m
|
||
prune = 1;[m
|
||
}[m
|
||
[36m@@ -723,7 +716,6 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||
Vec4 ground_particle_color = 0;[m
|
||
Vec4 air_particle_color = 0;[m
|
||
[m
|
||
[31m-[m
|
||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)[m
|
||
{[m
|
||
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);[m
|
||
[36m@@ -752,9 +744,9 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||
// Darken wall particles / stains[m
|
||
if (tile == P_TileKind_Wall)[m
|
||
{[m
|
||
[31m- ground_particle_color *= 0.25;[m
|
||
[31m- air_particle_color *= 0.25;[m
|
||
[31m- stain_color *= 0.25;[m
|
||
[32m+[m[32m ground_particle_color *= 0.5;[m
|
||
[32m+[m[32m air_particle_color *= 0.5;[m
|
||
[32m+[m[32m stain_color *= 0.5;[m
|
||
}[m
|
||
[m
|
||
//////////////////////////////[m
|
||
[36m@@ -972,57 +964,73 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||
////////////////////////////////////////////////////////////[m
|
||
//~ Bloom[m
|
||
[m
|
||
[32m+[m[32m//////////////////////////////[m
|
||
[32m+[m[32m//- Downsample[m
|
||
[32m+[m
|
||
ComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||
{[m
|
||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||
[32m+[m
|
||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||
[31m- Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||
[31m- RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||
[32m+[m[32m RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||
[32m+[m
|
||
[32m+[m[32m Texture2D<Vec4> bloom_up;[m
|
||
[32m+[m[32m b32 is_first_pass = mip_idx == 1;[m
|
||
[32m+[m[32m if (is_first_pass)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_ro);[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);[m
|
||
[32m+[m[32m }[m
|
||
[m
|
||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||
Vec2 down_dims = countof(bloom_down);[m
|
||
[m
|
||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||
Vec2 bloom_uv = bloom_pos / down_dims;[m
|
||
Vec2 off_uv = 0.5 / down_dims;[m
|
||
[31m- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);[m
|
||
[m
|
||
[31m- Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||
[31m- SampleDesc samples[] = {[m
|
||
[31m- { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||
[31m- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||
[31m- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||
[31m- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||
[31m- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||
[31m- };[m
|
||
[32m+[m[32m f32 threshold = 0.25;[m
|
||
[32m+[m[32m f32 knee = 0.75;[m
|
||
[m
|
||
Vec4 result = 0;[m
|
||
[31m- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||
{[m
|
||
[31m- SampleDesc desc = samples[sample_idx];[m
|
||
[31m- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||
[31m-[m
|
||
[31m- f32 knee_weight = 1;[m
|
||
[31m- if (is_first_pass)[m
|
||
[32m+[m[32m Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||
[32m+[m[32m SampleDesc samples[] = {[m
|
||
[32m+[m[32m { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||
[32m+[m[32m };[m
|
||
[32m+[m[32m for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||
{[m
|
||
[31m- f32 luminance = LuminanceFromColor(src);[m
|
||
[31m- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||
[31m- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||
[31m- if (bright > 0)[m
|
||
[31m- {[m
|
||
[31m- f32 threshold = 1.0;[m
|
||
[31m- f32 knee = 0.5;[m
|
||
[31m- f32 over_threshold = max(bright - threshold, 0.0);[m
|
||
[31m- f32 ramp = saturate(over_threshold / knee);[m
|
||
[31m- knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||
[31m- }[m
|
||
[31m- else[m
|
||
[32m+[m[32m SampleDesc desc = samples[sample_idx];[m
|
||
[32m+[m[32m Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||
[32m+[m
|
||
[32m+[m[32m f32 knee_weight = 1;[m
|
||
[32m+[m[32m if (is_first_pass)[m
|
||
{[m
|
||
[31m- knee_weight = 0;[m
|
||
[32m+[m[32m f32 luminance = LuminanceFromColor(src);[m
|
||
[32m+[m[32m f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||
[32m+[m[32m f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||
[32m+[m[32m if (bright > 0)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m f32 over_threshold = max(bright - threshold, 0.0);[m
|
||
[32m+[m[32m f32 ramp = saturate(over_threshold / knee);[m
|
||
[32m+[m[32m knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m knee_weight = 0;[m
|
||
[32m+[m[32m }[m
|
||
}[m
|
||
[31m- }[m
|
||
[m
|
||
[31m- result += src * desc.weight * knee_weight;[m
|
||
[32m+[m[32m result += src * desc.weight * knee_weight;[m
|
||
[32m+[m[32m }[m
|
||
}[m
|
||
[m
|
||
if (IsInside(bloom_pos, down_dims))[m
|
||
[36m@@ -1031,52 +1039,77 @@[m [mComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||
}[m
|
||
}[m
|
||
[m
|
||
[32m+[m[32m//////////////////////////////[m
|
||
[32m+[m[32m//- Upsample[m
|
||
[32m+[m
|
||
ComputeShader2D(V_BloomUpCS, 8, 8)[m
|
||
{[m
|
||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||
[32m+[m
|
||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||
[31m- Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||
[31m- RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||
[32m+[m[32m Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);[m
|
||
[32m+[m
|
||
[32m+[m[32m b32 is_last_pass = mip_idx == 0;[m
|
||
[32m+[m[32m RWTexture2D<Vec4> bloom_up;[m
|
||
[32m+[m[32m if (is_last_pass)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_rw);[m
|
||
[32m+[m[32m }[m
|
||
[32m+[m[32m else[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||
[32m+[m[32m }[m
|
||
[m
|
||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||
Vec2 down_dims = countof(bloom_down);[m
|
||
[32m+[m[32m Vec2 up_dims = countof(bloom_up);[m
|
||
[m
|
||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||
Vec2 bloom_uv = bloom_pos / up_dims;[m
|
||
[31m- Vec2 off_uv = 1 / up_dims;[m
|
||
[32m+[m[32m Vec2 off_uv0 = 1 / down_dims;[m
|
||
[32m+[m[32m Vec2 off_uv1 = off_uv0 * 2;[m
|
||
[m
|
||
Vec4 result = 0;[m
|
||
{[m
|
||
// Center[m
|
||
[31m- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;[m
|
||
[31m- // Edges[m
|
||
[32m+[m[32m result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;[m
|
||
[32m+[m
|
||
[32m+[m[32m // Outer Edges[m
|
||
result += ([m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)[m
|
||
[31m- ) * 2;[m
|
||
[31m- // Corners[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0)[m
|
||
[32m+[m[32m ) * 3.0f / 41.0f;[m
|
||
[32m+[m
|
||
[32m+[m[32m // Inner corners[m
|
||
[32m+[m[32m result += ([m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0)[m
|
||
[32m+[m[32m ) * 4.0f / 41.0f;[m
|
||
[32m+[m
|
||
[32m+[m[32m // Outer corners[m
|
||
result += ([m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +[m
|
||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)[m
|
||
[31m- );[m
|
||
[31m- // Normalize[m
|
||
[31m- result /= 16;[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) +[m
|
||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0)[m
|
||
[32m+[m[32m ) * 1.0f / 41.0f;[m
|
||
}[m
|
||
[m
|
||
if (IsInside(bloom_pos, up_dims))[m
|
||
{[m
|
||
[31m- bloom_up[bloom_pos] += result;[m
|
||
[32m+[m[32m bloom_up[bloom_pos] += result * 0.75;[m
|
||
}[m
|
||
}[m
|
||
[m
|
||
////////////////////////////////////////////////////////////[m
|
||
[31m-//~ Post process[m
|
||
[32m+[m[32m//~ Finalize[m
|
||
[m
|
||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8)[m
|
||
{[m
|
||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||
[36m@@ -1084,42 +1117,21 @@[m [mComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);[m
|
||
[m
|
||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;[m
|
||
[31m- Vec2 screen_uv = screen_pos / frame.screen_dims;[m
|
||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);[m
|
||
[31m-[m
|
||
[31m- //////////////////////////////[m
|
||
[31m- //- Original[m
|
||
[31m-[m
|
||
[31m- Vec4 original = 0;[m
|
||
if (is_in_screen)[m
|
||
{[m
|
||
[31m- original = screen_tex[screen_pos];[m
|
||
[31m- original.rgb *= original.a;[m
|
||
[31m- }[m
|
||
[32m+[m[32m Vec4 result = screen_tex[screen_pos];[m
|
||
[m
|
||
[32m+[m[32m //- Tone map[m
|
||
[32m+[m[32m if (frame.should_tone_map)[m
|
||
[32m+[m[32m {[m
|
||
[32m+[m[32m // ACES approximation by Krzysztof Narkowicz[m
|
||
[32m+[m[32m // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||
[32m+[m[32m result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));[m
|
||
[32m+[m[32m }[m
|
||
[m
|
||
[31m- //////////////////////////////[m
|
||
[31m- //- Bloom[m
|
||
[31m-[m
|
||
[31m- Vec4 bloom = 0;[m
|
||
[31m- if (is_in_screen)[m
|
||
[31m- {[m
|
||
[31m- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);[m
|
||
[31m- // bloom.rgb *= bloom.a;[m
|
||
[31m- }[m
|
||
[31m-[m
|
||
[31m- //////////////////////////////[m
|
||
[31m- //- Compose[m
|
||
[31m-[m
|
||
[31m- Vec4 result = Vec4(0, 0, 0, 1);[m
|
||
[31m- result = BlendPremul(original, result);[m
|
||
[31m- result += bloom;[m
|
||
[31m- // result.rgb = V_ToneMap(result);[m
|
||
[32m+[m[32m result = Unpremul(result);[m
|
||
[m
|
||
[31m- result = Unpremul(result);[m
|
||
[31m-[m
|
||
[31m- if (is_in_screen)[m
|
||
[31m- {[m
|
||
screen_tex[screen_pos] = result;[m
|
||
}[m
|
||
}[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||
[1mindex a47a2335..f176f2f8 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||
[36m@@ -46,7 +46,6 @@[m [mStruct(V_DVertPSOutput)[m
|
||
[m
|
||
f32 V_RandFromPos(Vec3 pos);[m
|
||
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);[m
|
||
[31m-Vec3 V_ToneMap(Vec3 v);[m
|
||
[m
|
||
////////////////////////////////////////////////////////////[m
|
||
//~ Shaders[m
|
||
[36m@@ -73,8 +72,8 @@[m [mComputeShader2D(V_CompositeCS, 8, 8);[m
|
||
ComputeShader2D(V_BloomDownCS, 8, 8);[m
|
||
ComputeShader2D(V_BloomUpCS, 8, 8);[m
|
||
[m
|
||
[31m-//- Post process[m
|
||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8);[m
|
||
[32m+[m[32m//- Finalize[m
|
||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8);[m
|
||
[m
|
||
//- Debug shapes[m
|
||
VertexShader(V_DVertVS, V_DVertPSInput);[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||
[1mindex 2419a6f2..72f6ae8d 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cg[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||
[36m@@ -11,37 +11,42 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||
V_ParticleDesc result;[m
|
||
{[m
|
||
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,[m
|
||
[32m+[m[32m V_ParticlesXList(X)[m
|
||
[32m+[m[32m #undef X[m
|
||
[32m+[m[32m };[m
|
||
[32m+[m[32m PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {[m
|
||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,[m
|
||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,[m
|
||
V_ParticlesXList(X)[m
|
||
#undef X[m
|
||
};[m
|
||
[36m@@ -51,6 +56,7 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||
result.stain_rate = stain_rates[kind];[m
|
||
result.pen_rate = pen_rates[kind];[m
|
||
result.lifetime = lifetimes[kind];[m
|
||
[32m+[m[32m result.prune_speed_threshold = prune_speed_thresholds[kind];[m
|
||
result.base_color = LinearFromSrgb(base_colors[kind]);[m
|
||
result.dry_factor = LinearFromSrgb(dry_factor[kind]);[m
|
||
}[m
|
||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||
[1mindex 16ca6419..71d88ea5 100644[m
|
||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||
[36m@@ -9,14 +9,13 @@[m
|
||
Enum(V_GpuFlag)[m
|
||
{[m
|
||
V_GpuFlag_None = 0,[m
|
||
[31m- V_GpuFlag_InitBloom = (1 << 0),[m
|
||
};[m
|
||
[m
|
||
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);[m
|
||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);[m
|
||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);[m
|
||
[31m-G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);[m
|
||
[31m-G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipsCount, 3);[m
|
||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipIdx, 4);[m
|
||
[m
|
||
////////////////////////////////////////////////////////////[m
|
||
//~ Particle types[m
|
||
[36m@@ -29,7 +28,6 @@[m [mG_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||
Enum(V_ParticleFlag)[m
|
||
{[m
|
||
V_ParticleFlag_None = 0,[m
|
||
[31m- V_ParticleFlag_NoPruneWhenStill = (1 << 0),[m
|
||
V_ParticleFlag_StainWhenPruned = (1 << 1),[m
|
||
V_ParticleFlag_NoReflect = (1 << 2),[m
|
||
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),[m
|
||
[36m@@ -53,6 +51,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||
/* Stain rate, pen chance */ 30, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(0, 0, 0, 0), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -64,8 +63,9 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||
/* Stain rate, pen chance */ 100, 0.25, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[31m- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \[m
|
||
[31m- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.5, \[m
|
||
[32m+[m[32m /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \[m
|
||
[32m+[m[32m /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \[m
|
||
) \[m
|
||
X( \[m
|
||
/* Name */ BloodDebris, \[m
|
||
[36m@@ -73,6 +73,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||
/* Stain rate, pen chance */ 30, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -82,6 +83,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||
/* Stain rate, pen chance */ 0, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -91,6 +93,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||
/* Stain rate, pen chance */ 0, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.1, \[m
|
||
/* Base color */ CompVec4(2, 0.5, 0, 1), \[m
|
||
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \[m
|
||
) \[m
|
||
[36m@@ -102,6 +105,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||
/* Stain rate, pen chance */ 0, 0, \[m
|
||
/* Lifetime */ 0.075, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -111,6 +115,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Air, \[m
|
||
/* Stain rate, pen chance */ 0, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -122,6 +127,7 @@[m [mEnum(V_ParticleLayer)[m
|
||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||
/* Stain rate, pen chance */ 0, 0, \[m
|
||
/* Lifetime */ Inf, \[m
|
||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||
/* Base color */ CompVec4(1, 1, 0, 1), \[m
|
||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||
) \[m
|
||
[36m@@ -168,6 +174,7 @@[m [mStruct(V_ParticleDesc)[m
|
||
f32 stain_rate;[m
|
||
f32 pen_rate;[m
|
||
f32 lifetime;[m
|
||
[32m+[m[32m f32 prune_speed_threshold;[m
|
||
Vec4 base_color;[m
|
||
Vec4 dry_factor;[m
|
||
};[m
|
||
[36m@@ -264,6 +271,7 @@[m [mStruct(V_SharedFrame)[m
|
||
[m
|
||
b32 tiles_dirty;[m
|
||
b32 should_clear_particles;[m
|
||
[32m+[m[32m b32 should_tone_map;[m
|
||
[m
|
||
b32 is_looking;[m
|
||
b32 is_moving;[m
|