fix bloom shimmer. use common layouts for vis textures
This commit is contained in:
parent
88f37a4bbb
commit
e9bad68135
@ -25,7 +25,7 @@ void G_BootstrapCommon(void)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8G8B8A8_Uint,
|
||||
VEC2I32(8, 8),
|
||||
G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,
|
||||
G_Layout_Simultaneous,
|
||||
.flags = G_ResourceFlag_ZeroMemory
|
||||
);
|
||||
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
|
||||
@ -44,7 +44,7 @@ void G_BootstrapCommon(void)
|
||||
gpu_perm, cl,
|
||||
G_Format_R16_Uint,
|
||||
noise_dims,
|
||||
G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present
|
||||
G_Layout_Simultaneous
|
||||
);
|
||||
G_CopyCpuToTexture(
|
||||
cl,
|
||||
@ -143,30 +143,54 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
|
||||
|
||||
//- Mip
|
||||
|
||||
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)
|
||||
i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
|
||||
{
|
||||
mip = ClampI32(mip, 0, 31);
|
||||
mip = ClampI32(mip, -31, 31);
|
||||
i32 result = 0;
|
||||
result = MaxI32(result >> mip, 1);
|
||||
if (mip >= 0)
|
||||
{
|
||||
result = MaxI32(result >> mip, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
result = MaxI32(result << -mip, 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)
|
||||
Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
|
||||
{
|
||||
mip = ClampI32(mip, 0, 31);
|
||||
mip = ClampI32(mip, -31, 31);
|
||||
Vec2I32 result = Zi;
|
||||
result.x = MaxI32(texture_dims.x >> mip, 1);
|
||||
result.y = MaxI32(texture_dims.y >> mip, 1);
|
||||
if (mip >= 0)
|
||||
{
|
||||
result.x = MaxI32(mip0_dims.x >> mip, 1);
|
||||
result.y = MaxI32(mip0_dims.y >> mip, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.x = MaxI32(mip0_dims.x << -mip, 1);
|
||||
result.y = MaxI32(mip0_dims.y << -mip, 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)
|
||||
Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
|
||||
{
|
||||
mip = ClampI32(mip, 0, 31);
|
||||
mip = ClampI32(mip, -31, 31);
|
||||
Vec3I32 result = Zi;
|
||||
result.x = MaxI32(texture_dims.x >> mip, 1);
|
||||
result.y = MaxI32(texture_dims.y >> mip, 1);
|
||||
result.z = MaxI32(texture_dims.z >> mip, 1);
|
||||
if (mip >= 0)
|
||||
{
|
||||
result.x = MaxI32(mip0_dims.x >> mip, 1);
|
||||
result.y = MaxI32(mip0_dims.y >> mip, 1);
|
||||
result.z = MaxI32(mip0_dims.z >> mip, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
result.x = MaxI32(mip0_dims.x << -mip, 1);
|
||||
result.y = MaxI32(mip0_dims.y << -mip, 1);
|
||||
result.z = MaxI32(mip0_dims.z << -mip, 1);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -35,9 +35,9 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList
|
||||
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
|
||||
|
||||
//- Mip
|
||||
i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);
|
||||
Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);
|
||||
Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);
|
||||
i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
|
||||
Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
|
||||
Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);
|
||||
|
||||
//- Viewport / scissor
|
||||
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
|
||||
|
||||
@ -242,18 +242,16 @@ Enum(G_Access)
|
||||
G_Access_IndexBuffer = (1 << 8),
|
||||
G_Access_IndirectArgument = (1 << 9),
|
||||
|
||||
G_Access_All = 0xFFFFFFFF
|
||||
G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage
|
||||
};
|
||||
|
||||
Enum(G_Layout)
|
||||
{
|
||||
G_Layout_NoChange,
|
||||
|
||||
// "Simultaneous" allows a resource to be used on any queue with any access
|
||||
// type, as long as there is only one writer at a time, and the writer is not
|
||||
// writing to any texels currently being read.
|
||||
// Resources cannot transition to/from this layout. They must be created
|
||||
// with it and are locked to it.
|
||||
// Simultaneous layout allows a resource to be used on any queue with any
|
||||
// access type (except depth-stencil). Resources cannot transition to/from
|
||||
// this layout, they must be created with it.
|
||||
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
|
||||
|
||||
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED
|
||||
|
||||
2
src/pp/pp_vis/pp_vis.lay
generated
2
src/pp/pp_vis/pp_vis.lay
generated
@ -26,7 +26,7 @@
|
||||
@ComputeShader V_CompositeCS
|
||||
@ComputeShader V_BloomDownCS
|
||||
@ComputeShader V_BloomUpCS
|
||||
@ComputeShader V_PostProcessCS
|
||||
@ComputeShader V_FinalizeCS
|
||||
@VertexShader V_DVertVS
|
||||
@PixelShader V_DVertPS
|
||||
|
||||
|
||||
@ -416,7 +416,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R8_Uint,
|
||||
tiles_dims,
|
||||
G_Layout_DirectQueue_ShaderRead,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory,
|
||||
.name = Lit("Tiles")
|
||||
);
|
||||
@ -441,7 +441,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
|
||||
);
|
||||
@ -454,7 +454,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
|
||||
);
|
||||
@ -469,7 +469,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Stains")
|
||||
);
|
||||
@ -481,7 +481,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Dry stains")
|
||||
);
|
||||
@ -493,7 +493,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Float,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Drynesses")
|
||||
);
|
||||
@ -505,7 +505,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
gpu_perm, cl,
|
||||
G_Format_R32_Uint,
|
||||
cells_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = Lit("Occluders cells")
|
||||
);
|
||||
@ -614,6 +614,8 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->dt = SecondsFromNs(frame->dt_ns);
|
||||
frame->rand = prev_frame->rand;
|
||||
|
||||
frame->should_tone_map = TweakBool("Tone mapping enabled", 1);
|
||||
|
||||
if (P_IsEntKeyNil(V.player_key))
|
||||
{
|
||||
TrueRand(StringFromStruct(&V.player_key));
|
||||
@ -4918,18 +4920,17 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->tile_descs[tile_kind] = tile_desc;
|
||||
}
|
||||
}
|
||||
|
||||
// Upload tiles
|
||||
if (frame->tiles_dirty)
|
||||
{
|
||||
// LogDebugF("Uploading tiles to gpu");
|
||||
G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);
|
||||
G_CopyCpuToTexture(
|
||||
frame->cl,
|
||||
gpu_tiles_res, VEC3I32(0, 0, 0),
|
||||
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
|
||||
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
|
||||
);
|
||||
G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);
|
||||
}
|
||||
|
||||
// Screen texture
|
||||
@ -4937,7 +4938,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
frame->screen_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
@ -4951,11 +4952,10 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
G_DimsFromMip2D(G_Count2D(screen_target), 1),
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
|
||||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
|
||||
// .max_mips = 4
|
||||
.max_mips = 8
|
||||
.max_mips = 64
|
||||
);
|
||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
|
||||
{
|
||||
@ -4979,7 +4979,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
frame->gpu_arena, frame->cl,
|
||||
G_Format_R16G16B16A16_Float,
|
||||
frame->shade_dims,
|
||||
G_Layout_DirectQueue_ShaderReadWrite,
|
||||
G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite,
|
||||
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
|
||||
);
|
||||
@ -5091,6 +5091,9 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
|
||||
// Sync particles & occluders
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
|
||||
// Transition albedo
|
||||
G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -5113,83 +5116,63 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Transition G-buffers to readonly
|
||||
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);
|
||||
G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Composite pass
|
||||
|
||||
{
|
||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||
// Sync screen tex
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Bloom passes
|
||||
|
||||
{
|
||||
i32 mips_count = G_CountMips(bloom_target);
|
||||
i32 mips_count = G_CountMips(bloom_target) + 1;
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);
|
||||
|
||||
// NOTE: Because bloom mip chain starts at half screen size, mip_idx 0
|
||||
// actually represents the screen texture, while mip_idx - 1 represents
|
||||
// the first mip index in the bloom mip chain
|
||||
|
||||
//- Downsample + blur passes
|
||||
for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)
|
||||
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
|
||||
{
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||
if (mip_idx == 0)
|
||||
{
|
||||
// Init bloom pyramid from screen target on first pass (prefilter)
|
||||
gpu_flags |= V_GpuFlag_InitBloom;
|
||||
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);
|
||||
}
|
||||
else
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);
|
||||
}
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||
{
|
||||
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
}
|
||||
gpu_flags &= ~V_GpuFlag_InitBloom;
|
||||
G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);
|
||||
Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
||||
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));
|
||||
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
|
||||
//- Upsample passes
|
||||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
|
||||
{
|
||||
Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);
|
||||
Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));
|
||||
G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);
|
||||
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));
|
||||
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);
|
||||
G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);
|
||||
|
||||
G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));
|
||||
}
|
||||
G_DumbGlobalMemorySync(frame->cl);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Post process pass
|
||||
//- Finalization pass
|
||||
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);
|
||||
G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));
|
||||
G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Debug shapes pass
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||
|
||||
{
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||
|
||||
G_Rasterize(
|
||||
frame->cl,
|
||||
V_DVertVS, V_DVertPS,
|
||||
@ -5198,12 +5181,13 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
screen_viewport, screen_scissor,
|
||||
G_RasterMode_TriangleList
|
||||
);
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Finalize screen target
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);
|
||||
{
|
||||
Rng2 uv = Zi;
|
||||
uv.p0 = Vec2FromVec(screen_viewport.p0);
|
||||
|
||||
@ -53,13 +53,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
|
||||
return result;
|
||||
}
|
||||
|
||||
// ACES approximation by Krzysztof Narkowicz
|
||||
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
|
||||
Vec3 V_ToneMap(Vec3 v)
|
||||
{
|
||||
return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Prepare frame
|
||||
|
||||
@ -142,11 +135,11 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8)
|
||||
}
|
||||
else if (over_stain.a > 0)
|
||||
{
|
||||
Vec4 stain = dry_stains[cell_pos];
|
||||
Vec4 dry_stain = max(dry_stains[cell_pos], 0);
|
||||
Vec4 stain = dry_stain;
|
||||
|
||||
stain = BlendPremul(over_stain, stain);
|
||||
dry_stain = BlendPremul(over_dry_stain, dry_stain);
|
||||
stain = BlendPremul(over_stain, stain);
|
||||
|
||||
stains[cell_pos] = stain;
|
||||
dry_stains[cell_pos] = dry_stain;
|
||||
@ -483,7 +476,7 @@ ComputeShader(V_SimParticlesCS, 64)
|
||||
particle.prev_occluder = occluder;
|
||||
}
|
||||
|
||||
if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)
|
||||
if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))
|
||||
{
|
||||
prune = 1;
|
||||
}
|
||||
@ -723,7 +716,6 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
Vec4 ground_particle_color = 0;
|
||||
Vec4 air_particle_color = 0;
|
||||
|
||||
|
||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
||||
{
|
||||
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
|
||||
@ -752,9 +744,9 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
// Darken wall particles / stains
|
||||
if (tile == P_TileKind_Wall)
|
||||
{
|
||||
ground_particle_color *= 0.25;
|
||||
air_particle_color *= 0.25;
|
||||
stain_color *= 0.25;
|
||||
ground_particle_color *= 0.5;
|
||||
air_particle_color *= 0.5;
|
||||
stain_color *= 0.5;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -972,57 +964,74 @@ ComputeShader2D(V_CompositeCS, 8, 8)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Bloom
|
||||
|
||||
//////////////////////////////
|
||||
//- Downsample
|
||||
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
i32 mips_count = V_GpuConst_MipsCount;
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
|
||||
|
||||
Texture2D<Vec4> bloom_up;
|
||||
b32 is_first_pass = mip_idx == 1;
|
||||
if (is_first_pass)
|
||||
{
|
||||
bloom_up = G_Dereference<Vec4>(frame.screen_ro);
|
||||
}
|
||||
else
|
||||
{
|
||||
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);
|
||||
}
|
||||
|
||||
Vec2 up_dims = countof(bloom_up);
|
||||
Vec2 down_dims = countof(bloom_down);
|
||||
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 bloom_uv = bloom_pos / down_dims;
|
||||
Vec2 off_uv = 0.5 / down_dims;
|
||||
b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);
|
||||
|
||||
Struct(SampleDesc) { Vec2 uv; f32 weight; };
|
||||
SampleDesc samples[] = {
|
||||
{ bloom_uv + Vec2(0, 0), 0.5 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
|
||||
};
|
||||
f32 threshold = 0.25;
|
||||
f32 knee = 0.75;
|
||||
|
||||
Vec4 result = 0;
|
||||
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
|
||||
{
|
||||
SampleDesc desc = samples[sample_idx];
|
||||
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
|
||||
|
||||
f32 knee_weight = 1;
|
||||
if (is_first_pass)
|
||||
// 5-tap sample
|
||||
Struct(SampleDesc) { Vec2 uv; f32 weight; };
|
||||
SampleDesc samples[] = {
|
||||
{ bloom_uv + Vec2(0, 0), 0.5 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },
|
||||
{ bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },
|
||||
};
|
||||
for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)
|
||||
{
|
||||
f32 luminance = LuminanceFromColor(src);
|
||||
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
|
||||
f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
|
||||
if (bright > 0)
|
||||
{
|
||||
f32 threshold = 1.0;
|
||||
f32 knee = 0.5;
|
||||
f32 over_threshold = max(bright - threshold, 0.0);
|
||||
f32 ramp = saturate(over_threshold / knee);
|
||||
knee_weight = (over_threshold * ramp * ramp) / bright;
|
||||
}
|
||||
else
|
||||
{
|
||||
knee_weight = 0;
|
||||
}
|
||||
}
|
||||
SampleDesc desc = samples[sample_idx];
|
||||
Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);
|
||||
|
||||
result += src * desc.weight * knee_weight;
|
||||
f32 knee_weight = 1;
|
||||
if (is_first_pass)
|
||||
{
|
||||
f32 luminance = LuminanceFromColor(src);
|
||||
f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance
|
||||
f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);
|
||||
if (bright > 0)
|
||||
{
|
||||
f32 over_threshold = max(bright - threshold, 0.0);
|
||||
f32 ramp = saturate(over_threshold / knee);
|
||||
knee_weight = (over_threshold * ramp * ramp) / bright;
|
||||
}
|
||||
else
|
||||
{
|
||||
knee_weight = 0;
|
||||
}
|
||||
}
|
||||
|
||||
result += src * desc.weight * knee_weight;
|
||||
}
|
||||
}
|
||||
|
||||
if (IsInside(bloom_pos, down_dims))
|
||||
@ -1031,52 +1040,78 @@ ComputeShader2D(V_BloomDownCS, 8, 8)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Upsample
|
||||
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);
|
||||
RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
i32 mips_count = V_GpuConst_MipsCount;
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);
|
||||
|
||||
b32 is_last_pass = mip_idx == 0;
|
||||
RWTexture2D<Vec4> bloom_up;
|
||||
if (is_last_pass)
|
||||
{
|
||||
bloom_up = G_Dereference<Vec4>(frame.screen_rw);
|
||||
}
|
||||
else
|
||||
{
|
||||
bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);
|
||||
}
|
||||
|
||||
Vec2 up_dims = countof(bloom_up);
|
||||
Vec2 down_dims = countof(bloom_down);
|
||||
Vec2 up_dims = countof(bloom_up);
|
||||
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 bloom_uv = bloom_pos / up_dims;
|
||||
Vec2 off_uv = 1 / up_dims;
|
||||
Vec2 off_inner_uv = 1 / down_dims;
|
||||
Vec2 off_outer_uv = off_inner_uv * 2;
|
||||
|
||||
// 13-tap sample
|
||||
Vec4 result = 0;
|
||||
{
|
||||
// Center
|
||||
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;
|
||||
// Edges
|
||||
result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;
|
||||
|
||||
// Outer Edges
|
||||
result += (
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)
|
||||
) * 2;
|
||||
// Corners
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_outer_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, 0), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_outer_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, 0), 0)
|
||||
) * 3.0f / 41.0f;
|
||||
|
||||
// Inner corners
|
||||
result += (
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)
|
||||
);
|
||||
// Normalize
|
||||
result /= 16;
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, -off_inner_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, -off_inner_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_inner_uv.x, off_inner_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_inner_uv.x, off_inner_uv.y), 0)
|
||||
) * 4.0f / 41.0f;
|
||||
|
||||
// Outer corners
|
||||
result += (
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, -off_outer_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, -off_outer_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_outer_uv.x, off_outer_uv.y), 0) +
|
||||
bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_outer_uv.x, off_outer_uv.y), 0)
|
||||
) * 1.0f / 41.0f;
|
||||
}
|
||||
|
||||
if (IsInside(bloom_pos, up_dims))
|
||||
{
|
||||
bloom_up[bloom_pos] += result;
|
||||
bloom_up[bloom_pos] += result * 0.75;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Post process
|
||||
//~ Finalize
|
||||
|
||||
ComputeShader2D(V_PostProcessCS, 8, 8)
|
||||
ComputeShader2D(V_FinalizeCS, 8, 8)
|
||||
{
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
@ -1084,42 +1119,21 @@ ComputeShader2D(V_PostProcessCS, 8, 8)
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
|
||||
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 screen_uv = screen_pos / frame.screen_dims;
|
||||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
||||
|
||||
//////////////////////////////
|
||||
//- Original
|
||||
|
||||
Vec4 original = 0;
|
||||
if (is_in_screen)
|
||||
{
|
||||
original = screen_tex[screen_pos];
|
||||
original.rgb *= original.a;
|
||||
}
|
||||
Vec4 result = screen_tex[screen_pos];
|
||||
|
||||
//- Tone map
|
||||
if (frame.should_tone_map)
|
||||
{
|
||||
// ACES approximation by Krzysztof Narkowicz
|
||||
// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/
|
||||
result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Bloom
|
||||
result = Unpremul(result);
|
||||
|
||||
Vec4 bloom = 0;
|
||||
if (is_in_screen)
|
||||
{
|
||||
bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);
|
||||
// bloom.rgb *= bloom.a;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Compose
|
||||
|
||||
Vec4 result = Vec4(0, 0, 0, 1);
|
||||
result = BlendPremul(original, result);
|
||||
result += bloom;
|
||||
// result.rgb = V_ToneMap(result);
|
||||
|
||||
result = Unpremul(result);
|
||||
|
||||
if (is_in_screen)
|
||||
{
|
||||
screen_tex[screen_pos] = result;
|
||||
}
|
||||
}
|
||||
|
||||
@ -46,7 +46,6 @@ Struct(V_DVertPSOutput)
|
||||
|
||||
f32 V_RandFromPos(Vec3 pos);
|
||||
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
|
||||
Vec3 V_ToneMap(Vec3 v);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shaders
|
||||
@ -73,8 +72,8 @@ ComputeShader2D(V_CompositeCS, 8, 8);
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8);
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8);
|
||||
|
||||
//- Post process
|
||||
ComputeShader2D(V_PostProcessCS, 8, 8);
|
||||
//- Finalize
|
||||
ComputeShader2D(V_FinalizeCS, 8, 8);
|
||||
|
||||
//- Debug shapes
|
||||
VertexShader(V_DVertVS, V_DVertPSInput);
|
||||
|
||||
@ -11,37 +11,42 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
|
||||
V_ParticleDesc result;
|
||||
{
|
||||
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,
|
||||
#define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,
|
||||
V_ParticlesXList(X)
|
||||
#undef X
|
||||
};
|
||||
@ -51,6 +56,7 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)
|
||||
result.stain_rate = stain_rates[kind];
|
||||
result.pen_rate = pen_rates[kind];
|
||||
result.lifetime = lifetimes[kind];
|
||||
result.prune_speed_threshold = prune_speed_thresholds[kind];
|
||||
result.base_color = LinearFromSrgb(base_colors[kind]);
|
||||
result.dry_factor = LinearFromSrgb(dry_factor[kind]);
|
||||
}
|
||||
|
||||
@ -9,14 +9,13 @@
|
||||
Enum(V_GpuFlag)
|
||||
{
|
||||
V_GpuFlag_None = 0,
|
||||
V_GpuFlag_InitBloom = (1 << 0),
|
||||
};
|
||||
|
||||
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);
|
||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);
|
||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);
|
||||
G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);
|
||||
G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
|
||||
G_DeclConstant(i32, V_GpuConst_MipsCount, 3);
|
||||
G_DeclConstant(i32, V_GpuConst_MipIdx, 4);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Particle types
|
||||
@ -29,7 +28,6 @@ G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);
|
||||
Enum(V_ParticleFlag)
|
||||
{
|
||||
V_ParticleFlag_None = 0,
|
||||
V_ParticleFlag_NoPruneWhenStill = (1 << 0),
|
||||
V_ParticleFlag_StainWhenPruned = (1 << 1),
|
||||
V_ParticleFlag_NoReflect = (1 << 2),
|
||||
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),
|
||||
@ -53,6 +51,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Ground, \
|
||||
/* Stain rate, pen chance */ 30, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(0, 0, 0, 0), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -64,8 +63,9 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Ground, \
|
||||
/* Stain rate, pen chance */ 100, 0.25, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \
|
||||
/* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \
|
||||
/* Prune speed threshold */ 0.5, \
|
||||
/* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \
|
||||
/* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \
|
||||
) \
|
||||
X( \
|
||||
/* Name */ BloodDebris, \
|
||||
@ -73,6 +73,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Mid, \
|
||||
/* Stain rate, pen chance */ 30, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -82,6 +83,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Mid, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -91,6 +93,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Mid, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.1, \
|
||||
/* Base color */ CompVec4(2, 0.5, 0, 1), \
|
||||
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \
|
||||
) \
|
||||
@ -102,6 +105,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Mid, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Lifetime */ 0.075, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -111,6 +115,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Air, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -122,6 +127,7 @@ Enum(V_ParticleLayer)
|
||||
/* Layer */ V_ParticleLayer_Mid, \
|
||||
/* Stain rate, pen chance */ 0, 0, \
|
||||
/* Lifetime */ Inf, \
|
||||
/* Prune speed threshold */ 0.01, \
|
||||
/* Base color */ CompVec4(1, 1, 0, 1), \
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \
|
||||
) \
|
||||
@ -168,6 +174,7 @@ Struct(V_ParticleDesc)
|
||||
f32 stain_rate;
|
||||
f32 pen_rate;
|
||||
f32 lifetime;
|
||||
f32 prune_speed_threshold;
|
||||
Vec4 base_color;
|
||||
Vec4 dry_factor;
|
||||
};
|
||||
@ -264,6 +271,7 @@ Struct(V_SharedFrame)
|
||||
|
||||
b32 tiles_dirty;
|
||||
b32 should_clear_particles;
|
||||
b32 should_tone_map;
|
||||
|
||||
b32 is_looking;
|
||||
b32 is_moving;
|
||||
|
||||
926
tatus
Normal file
926
tatus
Normal file
@ -0,0 +1,926 @@
|
||||
[1mdiff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c[m
|
||||
[1mindex a9686d87..43835793 100644[m
|
||||
[1m--- a/src/gpu/gpu_common.c[m
|
||||
[1m+++ b/src/gpu/gpu_common.c[m
|
||||
[36m@@ -25,7 +25,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R8G8B8A8_Uint,[m
|
||||
VEC2I32(8, 8),[m
|
||||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present,[m
|
||||
[32m+[m[32m G_Layout_Simultaneous,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory[m
|
||||
);[m
|
||||
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);[m
|
||||
[36m@@ -44,7 +44,7 @@[m [mvoid G_BootstrapCommon(void)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16_Uint,[m
|
||||
noise_dims,[m
|
||||
[31m- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present[m
|
||||
[32m+[m[32m G_Layout_Simultaneous[m
|
||||
);[m
|
||||
G_CopyCpuToTexture([m
|
||||
cl,[m
|
||||
[36m@@ -143,30 +143,54 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||||
[m
|
||||
//- Mip[m
|
||||
[m
|
||||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
i32 result = 0;[m
|
||||
[31m- result = MaxI32(result >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result = MaxI32(result >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result = MaxI32(result << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
Vec2I32 result = Zi;[m
|
||||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip)[m
|
||||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)[m
|
||||
{[m
|
||||
[31m- mip = ClampI32(mip, 0, 31);[m
|
||||
[32m+[m[32m mip = ClampI32(mip, -31, 31);[m
|
||||
Vec3I32 result = Zi;[m
|
||||
[31m- result.x = MaxI32(texture_dims.x >> mip, 1);[m
|
||||
[31m- result.y = MaxI32(texture_dims.y >> mip, 1);[m
|
||||
[31m- result.z = MaxI32(texture_dims.z >> mip, 1);[m
|
||||
[32m+[m[32m if (mip >= 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x >> mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y >> mip, 1);[m
|
||||
[32m+[m[32m result.z = MaxI32(mip0_dims.z >> mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m result.x = MaxI32(mip0_dims.x << -mip, 1);[m
|
||||
[32m+[m[32m result.y = MaxI32(mip0_dims.y << -mip, 1);[m
|
||||
[32m+[m[32m result.z = MaxI32(mip0_dims.z << -mip, 1);[m
|
||||
[32m+[m[32m }[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[1mdiff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h[m
|
||||
[1mindex eb3ee6d2..03927040 100644[m
|
||||
[1m--- a/src/gpu/gpu_common.h[m
|
||||
[1m+++ b/src/gpu/gpu_common.h[m
|
||||
[36m@@ -35,9 +35,9 @@[m [mG_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList[m
|
||||
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })[m
|
||||
[m
|
||||
//- Mip[m
|
||||
[31m-i32 G_DimsFromMip1D(i32 texture_dims, i32 mip);[m
|
||||
[31m-Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip);[m
|
||||
[31m-Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip);[m
|
||||
[32m+[m[32mi32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);[m
|
||||
[32m+[m[32mVec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);[m
|
||||
[32m+[m[32mVec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);[m
|
||||
[m
|
||||
//- Viewport / scissor[m
|
||||
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);[m
|
||||
[1mdiff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h[m
|
||||
[1mindex 7e1b329a..bed18c93 100644[m
|
||||
[1m--- a/src/gpu/gpu_core.h[m
|
||||
[1m+++ b/src/gpu/gpu_core.h[m
|
||||
[36m@@ -242,18 +242,16 @@[m [mEnum(G_Access)[m
|
||||
G_Access_IndexBuffer = (1 << 8),[m
|
||||
G_Access_IndirectArgument = (1 << 9),[m
|
||||
[m
|
||||
[31m- G_Access_All = 0xFFFFFFFF[m
|
||||
[32m+[m[32m G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage[m
|
||||
};[m
|
||||
[m
|
||||
Enum(G_Layout)[m
|
||||
{[m
|
||||
G_Layout_NoChange,[m
|
||||
[m
|
||||
[31m- // "Simultaneous" allows a resource to be used on any queue with any access[m
|
||||
[31m- // type, as long as there is only one writer at a time, and the writer is not[m
|
||||
[31m- // writing to any texels currently being read.[m
|
||||
[31m- // Resources cannot transition to/from this layout. They must be created[m
|
||||
[31m- // with it and are locked to it.[m
|
||||
[32m+[m[32m // Simultaneous layout allows a resource to be used on any queue with any[m
|
||||
[32m+[m[32m // access type (except depth-stencil). Resources cannot transition to/from[m
|
||||
[32m+[m[32m // this layout, they must be created with it.[m
|
||||
G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS[m
|
||||
[m
|
||||
G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay[m
|
||||
[1mindex f72dc528..2d916376 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis.lay[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis.lay[m
|
||||
[36m@@ -26,7 +26,7 @@[m
|
||||
@ComputeShader V_CompositeCS[m
|
||||
@ComputeShader V_BloomDownCS[m
|
||||
@ComputeShader V_BloomUpCS[m
|
||||
[31m-@ComputeShader V_PostProcessCS[m
|
||||
[32m+[m[32m@ComputeShader V_FinalizeCS[m
|
||||
@VertexShader V_DVertVS[m
|
||||
@PixelShader V_DVertPS[m
|
||||
[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[1mindex f2f5e6b5..338036ba 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_core.c[m
|
||||
[36m@@ -416,7 +416,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R8_Uint,[m
|
||||
tiles_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderRead,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory,[m
|
||||
.name = Lit("Tiles")[m
|
||||
);[m
|
||||
[36m@@ -441,7 +441,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))[m
|
||||
);[m
|
||||
[36m@@ -454,7 +454,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))[m
|
||||
);[m
|
||||
[36m@@ -469,7 +469,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Stains")[m
|
||||
);[m
|
||||
[36m@@ -481,7 +481,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Dry stains")[m
|
||||
);[m
|
||||
[36m@@ -493,7 +493,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Float,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Drynesses")[m
|
||||
);[m
|
||||
[36m@@ -505,7 +505,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
gpu_perm, cl,[m
|
||||
G_Format_R32_Uint,[m
|
||||
cells_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = Lit("Occluders cells")[m
|
||||
);[m
|
||||
[36m@@ -614,6 +614,8 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->dt = SecondsFromNs(frame->dt_ns);[m
|
||||
frame->rand = prev_frame->rand;[m
|
||||
[m
|
||||
[32m+[m[32m frame->should_tone_map = TweakBool("Tone mapping enabled", 1);[m
|
||||
[32m+[m
|
||||
if (P_IsEntKeyNil(V.player_key))[m
|
||||
{[m
|
||||
TrueRand(StringFromStruct(&V.player_key));[m
|
||||
[36m@@ -4918,18 +4920,17 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->tile_descs[tile_kind] = tile_desc;[m
|
||||
}[m
|
||||
}[m
|
||||
[32m+[m
|
||||
// Upload tiles[m
|
||||
if (frame->tiles_dirty)[m
|
||||
{[m
|
||||
// LogDebugF("Uploading tiles to gpu");[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite);[m
|
||||
G_CopyCpuToTexture([m
|
||||
frame->cl,[m
|
||||
gpu_tiles_res, VEC3I32(0, 0, 0),[m
|
||||
local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),[m
|
||||
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))[m
|
||||
);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead);[m
|
||||
}[m
|
||||
[m
|
||||
// Screen texture[m
|
||||
[36m@@ -4937,7 +4938,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
frame->screen_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||||
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))[m
|
||||
);[m
|
||||
[36m@@ -4951,11 +4952,10 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
G_DimsFromMip2D(G_Count2D(screen_target), 1),[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,[m
|
||||
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),[m
|
||||
[31m- // .max_mips = 4[m
|
||||
[31m- .max_mips = 8[m
|
||||
[32m+[m[32m .max_mips = 64[m
|
||||
);[m
|
||||
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)[m
|
||||
{[m
|
||||
[36m@@ -4979,7 +4979,7 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
frame->gpu_arena, frame->cl,[m
|
||||
G_Format_R16G16B16A16_Float,[m
|
||||
frame->shade_dims,[m
|
||||
[31m- G_Layout_DirectQueue_ShaderReadWrite,[m
|
||||
[32m+[m[32m G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite,[m
|
||||
.flags = G_ResourceFlag_AllowShaderReadWrite,[m
|
||||
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))[m
|
||||
);[m
|
||||
[36m@@ -5091,6 +5091,9 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
[m
|
||||
// Sync particles & occluders[m
|
||||
G_DumbGlobalMemorySync(frame->cl);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Transition albedo[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[36m@@ -5113,83 +5116,63 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims));[m
|
||||
}[m
|
||||
[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Transition G-buffers to readonly[m
|
||||
[31m-[m
|
||||
[31m- {[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[31m- }[m
|
||||
[31m-[m
|
||||
//////////////////////////////[m
|
||||
//- Composite pass[m
|
||||
[m
|
||||
{[m
|
||||
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
[32m+[m[32m // Sync screen tex[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Bloom passes[m
|
||||
[m
|
||||
{[m
|
||||
[31m- i32 mips_count = G_CountMips(bloom_target);[m
|
||||
[32m+[m[32m i32 mips_count = G_CountMips(bloom_target) + 1;[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0[m
|
||||
[32m+[m[32m // actually represents the screen texture, while mip_idx - 1 represents[m
|
||||
[32m+[m[32m // the first mip index in the bloom mip chain[m
|
||||
[m
|
||||
//- Downsample + blur passes[m
|
||||
[31m- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx)[m
|
||||
[32m+[m[32m for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)[m
|
||||
{[m
|
||||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||||
[31m- if (mip_idx == 0)[m
|
||||
[31m- {[m
|
||||
[31m- // Init bloom pyramid from screen target on first pass (prefilter)[m
|
||||
[31m- gpu_flags |= V_GpuFlag_InitBloom;[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro);[m
|
||||
[31m- }[m
|
||||
[31m- else[m
|
||||
[31m- {[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1));[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]);[m
|
||||
[31m- }[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||||
[31m- {[m
|
||||
[31m- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||||
[31m- }[m
|
||||
[31m- gpu_flags &= ~V_GpuFlag_InitBloom;[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags);[m
|
||||
[32m+[m[32m Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims));[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
}[m
|
||||
[m
|
||||
//- Upsample passes[m
|
||||
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)[m
|
||||
{[m
|
||||
[31m- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx);[m
|
||||
[31m-[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx));[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1));[m
|
||||
[32m+[m[32m Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);[m
|
||||
[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]);[m
|
||||
[31m- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]);[m
|
||||
[32m+[m[32m G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx);[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims));[m
|
||||
[m
|
||||
[31m- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims));[m
|
||||
[31m- }[m
|
||||
[32m+[m[32m G_DumbGlobalMemorySync(frame->cl);[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[31m- //- Post process pass[m
|
||||
[32m+[m[32m //- Finalization pass[m
|
||||
[m
|
||||
{[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite);[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0));[m
|
||||
[31m- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
[32m+[m[32m G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Debug shapes pass[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||||
[31m-[m
|
||||
{[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);[m
|
||||
[32m+[m
|
||||
G_Rasterize([m
|
||||
frame->cl,[m
|
||||
V_DVertVS, V_DVertPS,[m
|
||||
[36m@@ -5198,12 +5181,13 @@[m [mvoid V_TickForever(WaveLaneCtx *lane)[m
|
||||
screen_viewport, screen_scissor,[m
|
||||
G_RasterMode_TriangleList[m
|
||||
);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite);[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
//- Finalize screen target[m
|
||||
[m
|
||||
[31m- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead);[m
|
||||
{[m
|
||||
Rng2 uv = Zi;[m
|
||||
uv.p0 = Vec2FromVec(screen_viewport.p0);[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[1mindex f8a254de..c0a9e47d 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.g[m
|
||||
[36m@@ -53,13 +53,6 @@[m [mVec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)[m
|
||||
return result;[m
|
||||
}[m
|
||||
[m
|
||||
[31m-// ACES approximation by Krzysztof Narkowicz[m
|
||||
[31m-// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||||
[31m-Vec3 V_ToneMap(Vec3 v)[m
|
||||
[31m-{[m
|
||||
[31m- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f));[m
|
||||
[31m-}[m
|
||||
[31m-[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Prepare frame[m
|
||||
[m
|
||||
[36m@@ -142,11 +135,11 @@[m [mComputeShader2D(V_PrepareCellsCS, 8, 8)[m
|
||||
}[m
|
||||
else if (over_stain.a > 0)[m
|
||||
{[m
|
||||
[31m- Vec4 stain = dry_stains[cell_pos];[m
|
||||
Vec4 dry_stain = max(dry_stains[cell_pos], 0);[m
|
||||
[32m+[m[32m Vec4 stain = dry_stain;[m
|
||||
[m
|
||||
[31m- stain = BlendPremul(over_stain, stain);[m
|
||||
dry_stain = BlendPremul(over_dry_stain, dry_stain);[m
|
||||
[32m+[m[32m stain = BlendPremul(over_stain, stain);[m
|
||||
[m
|
||||
stains[cell_pos] = stain;[m
|
||||
dry_stains[cell_pos] = dry_stain;[m
|
||||
[36m@@ -483,7 +476,7 @@[m [mComputeShader(V_SimParticlesCS, 64)[m
|
||||
particle.prev_occluder = occluder;[m
|
||||
}[m
|
||||
[m
|
||||
[31m- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001)[m
|
||||
[32m+[m[32m if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold))[m
|
||||
{[m
|
||||
prune = 1;[m
|
||||
}[m
|
||||
[36m@@ -723,7 +716,6 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
Vec4 ground_particle_color = 0;[m
|
||||
Vec4 air_particle_color = 0;[m
|
||||
[m
|
||||
[31m-[m
|
||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)[m
|
||||
{[m
|
||||
RWTexture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);[m
|
||||
[36m@@ -752,9 +744,9 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
// Darken wall particles / stains[m
|
||||
if (tile == P_TileKind_Wall)[m
|
||||
{[m
|
||||
[31m- ground_particle_color *= 0.25;[m
|
||||
[31m- air_particle_color *= 0.25;[m
|
||||
[31m- stain_color *= 0.25;[m
|
||||
[32m+[m[32m ground_particle_color *= 0.5;[m
|
||||
[32m+[m[32m air_particle_color *= 0.5;[m
|
||||
[32m+[m[32m stain_color *= 0.5;[m
|
||||
}[m
|
||||
[m
|
||||
//////////////////////////////[m
|
||||
[36m@@ -972,57 +964,73 @@[m [mComputeShader2D(V_CompositeCS, 8, 8)[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Bloom[m
|
||||
[m
|
||||
[32m+[m[32m//////////////////////////////[m
|
||||
[32m+[m[32m//- Downsample[m
|
||||
[32m+[m
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||||
{[m
|
||||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||||
[32m+[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
[31m- Texture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||||
[31m- RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[32m+[m[32m RWTexture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m Texture2D<Vec4> bloom_up;[m
|
||||
[32m+[m[32m b32 is_first_pass = mip_idx == 1;[m
|
||||
[32m+[m[32m if (is_first_pass)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_ro);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx - 2]);[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||||
Vec2 down_dims = countof(bloom_down);[m
|
||||
[m
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||||
Vec2 bloom_uv = bloom_pos / down_dims;[m
|
||||
Vec2 off_uv = 0.5 / down_dims;[m
|
||||
[31m- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom);[m
|
||||
[m
|
||||
[31m- Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||||
[31m- SampleDesc samples[] = {[m
|
||||
[31m- { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||||
[31m- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||||
[31m- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||||
[31m- };[m
|
||||
[32m+[m[32m f32 threshold = 0.25;[m
|
||||
[32m+[m[32m f32 knee = 0.75;[m
|
||||
[m
|
||||
Vec4 result = 0;[m
|
||||
[31m- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||||
{[m
|
||||
[31m- SampleDesc desc = samples[sample_idx];[m
|
||||
[31m- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||||
[31m-[m
|
||||
[31m- f32 knee_weight = 1;[m
|
||||
[31m- if (is_first_pass)[m
|
||||
[32m+[m[32m Struct(SampleDesc) { Vec2 uv; f32 weight; };[m
|
||||
[32m+[m[32m SampleDesc samples[] = {[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(0, 0), 0.5 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 },[m
|
||||
[32m+[m[32m };[m
|
||||
[32m+[m[32m for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx)[m
|
||||
{[m
|
||||
[31m- f32 luminance = LuminanceFromColor(src);[m
|
||||
[31m- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||||
[31m- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||||
[31m- if (bright > 0)[m
|
||||
[31m- {[m
|
||||
[31m- f32 threshold = 1.0;[m
|
||||
[31m- f32 knee = 0.5;[m
|
||||
[31m- f32 over_threshold = max(bright - threshold, 0.0);[m
|
||||
[31m- f32 ramp = saturate(over_threshold / knee);[m
|
||||
[31m- knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||||
[31m- }[m
|
||||
[31m- else[m
|
||||
[32m+[m[32m SampleDesc desc = samples[sample_idx];[m
|
||||
[32m+[m[32m Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m f32 knee_weight = 1;[m
|
||||
[32m+[m[32m if (is_first_pass)[m
|
||||
{[m
|
||||
[31m- knee_weight = 0;[m
|
||||
[32m+[m[32m f32 luminance = LuminanceFromColor(src);[m
|
||||
[32m+[m[32m f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance[m
|
||||
[32m+[m[32m f32 bright = max(luminance, (max_rgb - 1.0) * 0.5);[m
|
||||
[32m+[m[32m if (bright > 0)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m f32 over_threshold = max(bright - threshold, 0.0);[m
|
||||
[32m+[m[32m f32 ramp = saturate(over_threshold / knee);[m
|
||||
[32m+[m[32m knee_weight = (over_threshold * ramp * ramp) / bright;[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m knee_weight = 0;[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[31m- }[m
|
||||
[m
|
||||
[31m- result += src * desc.weight * knee_weight;[m
|
||||
[32m+[m[32m result += src * desc.weight * knee_weight;[m
|
||||
[32m+[m[32m }[m
|
||||
}[m
|
||||
[m
|
||||
if (IsInside(bloom_pos, down_dims))[m
|
||||
[36m@@ -1031,52 +1039,77 @@[m [mComputeShader2D(V_BloomDownCS, 8, 8)[m
|
||||
}[m
|
||||
}[m
|
||||
[m
|
||||
[32m+[m[32m//////////////////////////////[m
|
||||
[32m+[m[32m//- Upsample[m
|
||||
[32m+[m
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8)[m
|
||||
{[m
|
||||
[32m+[m[32m i32 mips_count = V_GpuConst_MipsCount;[m
|
||||
[32m+[m[32m i32 mip_idx = V_GpuConst_MipIdx;[m
|
||||
[32m+[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
[31m- Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(V_GpuConst_BloomRead);[m
|
||||
[31m- RWTexture2D<Vec4> bloom_up = G_Dereference<Vec4>(V_GpuConst_BloomWrite);[m
|
||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[32m+[m[32m Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips_ro[mip_idx]);[m
|
||||
[32m+[m
|
||||
[32m+[m[32m b32 is_last_pass = mip_idx == 0;[m
|
||||
[32m+[m[32m RWTexture2D<Vec4> bloom_up;[m
|
||||
[32m+[m[32m if (is_last_pass)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.screen_rw);[m
|
||||
[32m+[m[32m }[m
|
||||
[32m+[m[32m else[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m bloom_up = G_Dereference<Vec4>(frame.bloom_mips_rw[mip_idx - 1]);[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- Vec2 up_dims = countof(bloom_up);[m
|
||||
Vec2 down_dims = countof(bloom_down);[m
|
||||
[32m+[m[32m Vec2 up_dims = countof(bloom_up);[m
|
||||
[m
|
||||
Vec2 bloom_pos = SV_DispatchThreadID + 0.5;[m
|
||||
Vec2 bloom_uv = bloom_pos / up_dims;[m
|
||||
[31m- Vec2 off_uv = 1 / up_dims;[m
|
||||
[32m+[m[32m Vec2 off_uv0 = 1 / down_dims;[m
|
||||
[32m+[m[32m Vec2 off_uv1 = off_uv0 * 2;[m
|
||||
[m
|
||||
Vec4 result = 0;[m
|
||||
{[m
|
||||
// Center[m
|
||||
[31m- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4;[m
|
||||
[31m- // Edges[m
|
||||
[32m+[m[32m result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Outer Edges[m
|
||||
result += ([m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0)[m
|
||||
[31m- ) * 2;[m
|
||||
[31m- // Corners[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0)[m
|
||||
[32m+[m[32m ) * 3.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Inner corners[m
|
||||
[32m+[m[32m result += ([m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0)[m
|
||||
[32m+[m[32m ) * 4.0f / 41.0f;[m
|
||||
[32m+[m
|
||||
[32m+[m[32m // Outer corners[m
|
||||
result += ([m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) +[m
|
||||
[31m- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0)[m
|
||||
[31m- );[m
|
||||
[31m- // Normalize[m
|
||||
[31m- result /= 16;[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) +[m
|
||||
[32m+[m[32m bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0)[m
|
||||
[32m+[m[32m ) * 1.0f / 41.0f;[m
|
||||
}[m
|
||||
[m
|
||||
if (IsInside(bloom_pos, up_dims))[m
|
||||
{[m
|
||||
[31m- bloom_up[bloom_pos] += result;[m
|
||||
[32m+[m[32m bloom_up[bloom_pos] += result * 0.75;[m
|
||||
}[m
|
||||
}[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
[31m-//~ Post process[m
|
||||
[32m+[m[32m//~ Finalize[m
|
||||
[m
|
||||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8)[m
|
||||
{[m
|
||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];[m
|
||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);[m
|
||||
[36m@@ -1084,42 +1117,21 @@[m [mComputeShader2D(V_PostProcessCS, 8, 8)[m
|
||||
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);[m
|
||||
[m
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;[m
|
||||
[31m- Vec2 screen_uv = screen_pos / frame.screen_dims;[m
|
||||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);[m
|
||||
[31m-[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Original[m
|
||||
[31m-[m
|
||||
[31m- Vec4 original = 0;[m
|
||||
if (is_in_screen)[m
|
||||
{[m
|
||||
[31m- original = screen_tex[screen_pos];[m
|
||||
[31m- original.rgb *= original.a;[m
|
||||
[31m- }[m
|
||||
[32m+[m[32m Vec4 result = screen_tex[screen_pos];[m
|
||||
[m
|
||||
[32m+[m[32m //- Tone map[m
|
||||
[32m+[m[32m if (frame.should_tone_map)[m
|
||||
[32m+[m[32m {[m
|
||||
[32m+[m[32m // ACES approximation by Krzysztof Narkowicz[m
|
||||
[32m+[m[32m // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/[m
|
||||
[32m+[m[32m result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f));[m
|
||||
[32m+[m[32m }[m
|
||||
[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Bloom[m
|
||||
[31m-[m
|
||||
[31m- Vec4 bloom = 0;[m
|
||||
[31m- if (is_in_screen)[m
|
||||
[31m- {[m
|
||||
[31m- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0);[m
|
||||
[31m- // bloom.rgb *= bloom.a;[m
|
||||
[31m- }[m
|
||||
[31m-[m
|
||||
[31m- //////////////////////////////[m
|
||||
[31m- //- Compose[m
|
||||
[31m-[m
|
||||
[31m- Vec4 result = Vec4(0, 0, 0, 1);[m
|
||||
[31m- result = BlendPremul(original, result);[m
|
||||
[31m- result += bloom;[m
|
||||
[31m- // result.rgb = V_ToneMap(result);[m
|
||||
[32m+[m[32m result = Unpremul(result);[m
|
||||
[m
|
||||
[31m- result = Unpremul(result);[m
|
||||
[31m-[m
|
||||
[31m- if (is_in_screen)[m
|
||||
[31m- {[m
|
||||
screen_tex[screen_pos] = result;[m
|
||||
}[m
|
||||
}[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[1mindex a47a2335..f176f2f8 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_gpu.gh[m
|
||||
[36m@@ -46,7 +46,6 @@[m [mStruct(V_DVertPSOutput)[m
|
||||
[m
|
||||
f32 V_RandFromPos(Vec3 pos);[m
|
||||
Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);[m
|
||||
[31m-Vec3 V_ToneMap(Vec3 v);[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Shaders[m
|
||||
[36m@@ -73,8 +72,8 @@[m [mComputeShader2D(V_CompositeCS, 8, 8);[m
|
||||
ComputeShader2D(V_BloomDownCS, 8, 8);[m
|
||||
ComputeShader2D(V_BloomUpCS, 8, 8);[m
|
||||
[m
|
||||
[31m-//- Post process[m
|
||||
[31m-ComputeShader2D(V_PostProcessCS, 8, 8);[m
|
||||
[32m+[m[32m//- Finalize[m
|
||||
[32m+[m[32mComputeShader2D(V_FinalizeCS, 8, 8);[m
|
||||
[m
|
||||
//- Debug shapes[m
|
||||
VertexShader(V_DVertVS, V_DVertPSInput);[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[1mindex 2419a6f2..72f6ae8d 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cg[m
|
||||
[36m@@ -11,37 +11,42 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||||
V_ParticleDesc result;[m
|
||||
{[m
|
||||
PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime,[m
|
||||
[32m+[m[32m V_ParticlesXList(X)[m
|
||||
[32m+[m[32m #undef X[m
|
||||
[32m+[m[32m };[m
|
||||
[32m+[m[32m PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = {[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = {[m
|
||||
[31m- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor,[m
|
||||
[32m+[m[32m #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor,[m
|
||||
V_ParticlesXList(X)[m
|
||||
#undef X[m
|
||||
};[m
|
||||
[36m@@ -51,6 +56,7 @@[m [mV_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind)[m
|
||||
result.stain_rate = stain_rates[kind];[m
|
||||
result.pen_rate = pen_rates[kind];[m
|
||||
result.lifetime = lifetimes[kind];[m
|
||||
[32m+[m[32m result.prune_speed_threshold = prune_speed_thresholds[kind];[m
|
||||
result.base_color = LinearFromSrgb(base_colors[kind]);[m
|
||||
result.dry_factor = LinearFromSrgb(dry_factor[kind]);[m
|
||||
}[m
|
||||
[1mdiff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[1mindex 16ca6419..71d88ea5 100644[m
|
||||
[1m--- a/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[1m+++ b/src/pp/pp_vis/pp_vis_shared.cgh[m
|
||||
[36m@@ -9,14 +9,13 @@[m
|
||||
Enum(V_GpuFlag)[m
|
||||
{[m
|
||||
V_GpuFlag_None = 0,[m
|
||||
[31m- V_GpuFlag_InitBloom = (1 << 0),[m
|
||||
};[m
|
||||
[m
|
||||
G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0);[m
|
||||
G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1);[m
|
||||
G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2);[m
|
||||
[31m-G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3);[m
|
||||
[31m-G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipsCount, 3);[m
|
||||
[32m+[m[32mG_DeclConstant(i32, V_GpuConst_MipIdx, 4);[m
|
||||
[m
|
||||
////////////////////////////////////////////////////////////[m
|
||||
//~ Particle types[m
|
||||
[36m@@ -29,7 +28,6 @@[m [mG_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4);[m
|
||||
Enum(V_ParticleFlag)[m
|
||||
{[m
|
||||
V_ParticleFlag_None = 0,[m
|
||||
[31m- V_ParticleFlag_NoPruneWhenStill = (1 << 0),[m
|
||||
V_ParticleFlag_StainWhenPruned = (1 << 1),[m
|
||||
V_ParticleFlag_NoReflect = (1 << 2),[m
|
||||
V_ParticleFlag_OnlyCollideWithWalls = (1 << 3),[m
|
||||
[36m@@ -53,6 +51,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||||
/* Stain rate, pen chance */ 30, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0, 0, 0, 0), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -64,8 +63,9 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Ground, \[m
|
||||
/* Stain rate, pen chance */ 100, 0.25, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[31m- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \[m
|
||||
[31m- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.5, \[m
|
||||
[32m+[m[32m /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \[m
|
||||
[32m+[m[32m /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \[m
|
||||
) \[m
|
||||
X( \[m
|
||||
/* Name */ BloodDebris, \[m
|
||||
[36m@@ -73,6 +73,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 30, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -82,6 +83,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -91,6 +93,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.1, \[m
|
||||
/* Base color */ CompVec4(2, 0.5, 0, 1), \[m
|
||||
/* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -102,6 +105,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ 0.075, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -111,6 +115,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Air, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -122,6 +127,7 @@[m [mEnum(V_ParticleLayer)[m
|
||||
/* Layer */ V_ParticleLayer_Mid, \[m
|
||||
/* Stain rate, pen chance */ 0, 0, \[m
|
||||
/* Lifetime */ Inf, \[m
|
||||
[32m+[m[32m /* Prune speed threshold */ 0.01, \[m
|
||||
/* Base color */ CompVec4(1, 1, 0, 1), \[m
|
||||
/* Dry color factor */ CompVec4(1, 1, 1, 1) \[m
|
||||
) \[m
|
||||
[36m@@ -168,6 +174,7 @@[m [mStruct(V_ParticleDesc)[m
|
||||
f32 stain_rate;[m
|
||||
f32 pen_rate;[m
|
||||
f32 lifetime;[m
|
||||
[32m+[m[32m f32 prune_speed_threshold;[m
|
||||
Vec4 base_color;[m
|
||||
Vec4 dry_factor;[m
|
||||
};[m
|
||||
[36m@@ -264,6 +271,7 @@[m [mStruct(V_SharedFrame)[m
|
||||
[m
|
||||
b32 tiles_dirty;[m
|
||||
b32 should_clear_particles;[m
|
||||
[32m+[m[32m b32 should_tone_map;[m
|
||||
[m
|
||||
b32 is_looking;[m
|
||||
b32 is_moving;[m
|
||||
Loading…
Reference in New Issue
Block a user