turn composite pass into compute shader

This commit is contained in:
jacob 2026-02-15 04:11:08 -06:00
parent fff1b69eff
commit 83a41fc289
8 changed files with 97 additions and 71 deletions

View File

@ -61,12 +61,11 @@ void G_Bootstrap(void)
String error = Lit("Could not initialize GPU device.");
String first_gpu_name = Zi;
u32 adapter_index = 0;
b32 skip = 0; // For iGPU testing
for (;;)
{
b32 done = 0;
i32 skips = 0; // For iGPU testing
while (!done)
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter);
}
if (SUCCEEDED(hr))
{
DXGI_ADAPTER_DESC1 desc;
@ -80,24 +79,29 @@ void G_Bootstrap(void)
// - HighestShaderModel >= D3D_SHADER_MODEL_6_6
// - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
// - EnhancedBarriersSupported == 1
// - AtomicInt64OnDescriptorHeapResourceSupported == 1
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
}
if (SUCCEEDED(hr) && !skip)
if (SUCCEEDED(hr) && skips <= 0)
{
break;
done = 1;
}
skip = 0;
else
{
skips -= 1;
adapter_index += 1;
ID3D12Device_Release(device);
IDXGIAdapter3_Release(adapter);
adapter = 0;
device = 0;
++adapter_index;
}
}
else
{
break;
done = 1;
}
}
if (!device)
{
if (first_gpu_name.len > 0)

View File

@ -23,8 +23,7 @@
@ComputeShader V_EmitParticlesCS
@ComputeShader V_SimParticlesCS
@ComputeShader V_ShadeCS
@VertexShader V_CompositeVS
@PixelShader V_CompositePS
@ComputeShader V_CompositeCS
@VertexShader V_DVertVS
@PixelShader V_DVertPS

View File

@ -2566,7 +2566,8 @@ void V_TickForever(WaveLaneCtx *lane)
{
V_Emitter emitter = Zi;
emitter.kind = V_ParticleKind_Blood;
emitter.kind = V_ParticleKind_BloodTrail;
// emitter.kind = V_ParticleKind_BloodDebris;
f32 angle = AngleFromVec2(frame->look);
// f32 angle = 0;
@ -2585,19 +2586,34 @@ void V_TickForever(WaveLaneCtx *lane)
emitter.speed.max = speed + speed_spread * 0.5;
emitter.angle.min = angle - angle_spread * 0.5;
emitter.angle.max = angle + angle_spread * 0.5;
emitter.count = Kibi(32) * frame->dt;
V_PushParticles(emitter);
}
// emitter.falloff.min = emitter.falloff.max = 0;
{
V_Emitter emitter = Zi;
// emitter.count = CeilF32(Kibi(64) * frame->dt);
// emitter.count = Mebi(16);
// emitter.count = Mebi(2);
// emitter.count = Kibi(32);
// emitter.count = Kibi(8);
emitter.count = 128;
// emitter.count = 128;
// emitter.count = 32;
// emitter.count = 1;
// emitter.kind = V_ParticleKind_BloodTrail;
emitter.kind = V_ParticleKind_BloodDebris;
f32 angle = AngleFromVec2(frame->look);
// f32 angle = 0;
f32 angle_spread = Tau * 0.25;
// f32 angle_spread = Tau;
// f32 angle_spread = 0;
// f32 speed = 5;
f32 speed = 10;
// f32 speed = 50;
// f32 speed = 100;
f32 speed_spread = speed * 2;
emitter.pos.p0 = emitter.pos.p1 = frame->world_cursor;
emitter.speed.min = speed - speed_spread * 0.5;
emitter.speed.max = speed + speed_spread * 0.5;
emitter.angle.min = angle - angle_spread * 0.5;
emitter.angle.max = angle + angle_spread * 0.5;
emitter.count = Kibi(32) * frame->dt;
V_PushParticles(emitter);
}
}
@ -4829,11 +4845,12 @@ void V_TickForever(WaveLaneCtx *lane)
frame->gpu_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
G_Layout_DirectQueue_RenderTargetWrite,
.flags = G_ResourceFlag_AllowRenderTarget,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
);
frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
@ -4855,8 +4872,6 @@ void V_TickForever(WaveLaneCtx *lane)
frame->shade_dims,
G_Layout_DirectQueue_ShaderReadWrite,
.flags = G_ResourceFlag_AllowShaderReadWrite,
// FIXME: Remove this
// .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_ForceNoReuse,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target);
@ -5015,14 +5030,9 @@ void V_TickForever(WaveLaneCtx *lane)
if (!disable_vis_draw)
{
G_Rasterize(
frame->cl,
V_CompositeVS, V_CompositePS,
1, G_QuadIndices(),
1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha),
screen_viewport, screen_scissor,
G_RasterMode_TriangleList
);
G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims));
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite);
}
//////////////////////////////

View File

@ -44,7 +44,7 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
// f32 t = smoothstep(0, 2, (f32)density);
result.a = lerp(0, 0.85, t);
}
else if (particle_kind == V_ParticleKind_Blood)
else if (particle_kind == V_ParticleKind_BloodTrail || particle_kind == V_ParticleKind_BloodDebris)
{
// f32 t = (f32)density / 5;
// t = pow(t, 2);
@ -53,13 +53,18 @@ Vec4 V_ColorFromParticle(V_ParticleKind particle_kind, u32 particle_idx, u32 den
f32 t = (f32)density / 5;
// t = smoothstep(-10, 10, t);
t = smoothstep(-5, 5, t);
// t = smoothstep(-5, 5, t);
t = smoothstep(0, 50, t);
// result.rgb *= 1.0 - (t * 0.9);
result.a = t;
// result.a = t;
result.a += (1.0 - result.a) * (t);
}
}
result.rgb += (rand_color - 0.5) * 0.025;
result.rgb = saturate(result.rgb + (rand_color - 0.5) * 0.05);
// result.a += (rand_alpha - 0.5) * 0.025;
// result.a *= rand_alpha;
// Apply dryness
result.rgb *= 1.0 - (dryness * 0.75);
@ -284,6 +289,9 @@ ComputeShader(V_SimParticlesCS, 64)
f32 rand_speed = Norm16(seed0 >> 32);
f32 rand_falloff = Norm16(seed0 >> 48);
u64 seed1 = MixU64(seed0);
f32 rand_density = Norm16(seed1 >> 0);
//////////////////////////////
//- Init
@ -454,8 +462,10 @@ ComputeShader(V_SimParticlesCS, 64)
u32 stains_count = floor(particle.stain_accum);
if (stains_count > 0)
{
// TODO: Fixed point
u32 density = round(stains_count * rand_density);
InterlockedMax(stain_cells[cell_pos], packed);
InterlockedAdd(stain_densities[cell_pos], stains_count);
InterlockedAdd(stain_densities[cell_pos], density);
drynesses[cell_pos] = 0;
particle.stain_accum -= stains_count;
}
@ -553,25 +563,12 @@ ComputeShader2D(V_ShadeCS, 8, 8)
////////////////////////////////////////////////////////////
//~ Composite
//////////////////////////////
//- Vertex shader
VertexShader(V_CompositeVS, V_CompositePSInput)
{
Vec2 uv = RectUvFromIdx(SV_VertexID);
V_CompositePSInput result;
result.sv_position = Vec4(NdcFromUv(uv).xy, 0, 1);
return result;
}
//////////////////////////////
//- Pixel shader
PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
ComputeShader2D(V_CompositeCS, 8, 8)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_ShaderConst_Frame)[0];
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo_ro);
RWTexture2D<Vec4> screen_tex = G_Dereference<Vec4>(frame.screen_rw);
RWTexture2D<u32> stain_cells = G_Dereference<u32>(frame.stain_cells);
RWTexture2D<u32> ground_cells = G_Dereference<u32>(frame.ground_cells);
RWTexture2D<u32> stain_densities = G_Dereference<u32>(frame.stain_densities);
@ -583,7 +580,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
SamplerState clamp_sampler = G_Dereference(frame.pt_clamp_sampler);
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
Vec2 screen_pos = input.sv_position.xy;
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
Vec2 tile_pos = mul(frame.af.world_to_tile, Vec3(world_pos, 1));
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
@ -593,6 +590,7 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
Vec2 world_bounds_screen_p0 = mul(frame.af.world_to_screen, Vec3(-half_world_dims.xy, 1));
Vec2 world_bounds_screen_p1 = mul(frame.af.world_to_screen, Vec3(half_world_dims.xy, 1));
b32 is_in_world = all(cell_pos >= 0) && all(cell_pos < countof(ground_cells));
b32 is_in_screen = all(screen_pos >= 0) && all(screen_pos < countof(screen_tex));
P_TileKind tile = tiles[tile_pos];
P_TileKind equipped_tile = frame.equipped_tile;
@ -929,9 +927,10 @@ PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input)
result = Unpremul(result);
V_CompositePSOutput output;
output.sv_target0 = result;
return output;
if (is_in_screen)
{
screen_tex[screen_pos] = result;
}
}
////////////////////////////////////////////////////////////

View File

@ -68,6 +68,7 @@ ComputeShader2D(V_ShadeCS, 8, 8);
//- Composite
VertexShader(V_CompositeVS, V_CompositePSInput);
PixelShader(V_CompositePS, V_CompositePSOutput, V_CompositePSInput input);
ComputeShader2D(V_CompositeCS, 8, 8);
//- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput);

View File

@ -137,6 +137,7 @@ Struct(V_SharedFrame)
G_Texture2DRef tiles;
G_Texture2DRef screen_ro;
G_RWTexture2DRef screen_rw;
G_Texture2DRef shade_ro;
G_RWTexture2DRef shade_rw;
G_Texture2DRef albedo_ro;
@ -198,10 +199,16 @@ Enum(V_ParticleFlag)
\
/* Ground particles */ \
X( \
/* Name */ Blood, \
/* Flags */ V_ParticleFlag_None | V_ParticleFlag_NoReflect, \
/* Name */ BloodTrail, \
/* Flags */ V_ParticleFlag_NoReflect, \
/* Stain rate, pen chance */ 500, 0.25, \
/* Base color */ 0.5, 0.1, 0.1, 1 \
/* Base color */ 0.5, 0.1, 0.1, 0.1 \
) \
X( \
/* Name */ BloodDebris, \
/* Flags */ V_ParticleFlag_Ground | V_ParticleFlag_PruneWhenStill | V_ParticleFlag_StainWhenPruned, \
/* Stain rate, pen chance */ 1, 0, \
/* Base color */ 0.5, 0.1, 0.1, 0.8 \
) \
X( \
/* Name */ Debris, \
@ -231,7 +238,7 @@ Enum(V_ParticleFlag)
/* Stain rate, pen chance */ 0, 0, \
/* Base color */ 1, 1, 0, 1 \
) \
/* -------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------------------------------------------------------------------- */
Enum(V_ParticleKind)
{

View File

@ -39,7 +39,7 @@ Struct(PT_BlitPSOutput)
VertexShader(PT_BlitVS, PT_BlitPSInput)
{
Vec2 uv = RectUvFromVertexId(SV_VertexID);
Vec2 uv = RectUvFromIdx(SV_VertexID);
PT_BlitPSInput result;
result.sv_position = Vec4(NdcFromUv(uv).xy, 0, 1);
result.src_uv = uv;

View File

@ -1,5 +1,10 @@
WND_W32_Ctx WND_W32 = Zi;
////////////////////////////////////////////////////////////
//~ Win32 libs
#pragma comment(lib, "gdi32")
////////////////////////////////////////////////////////////
//~ @hookimpl Bootstrap
@ -130,6 +135,7 @@ void WND_W32_ProcessMessagesForever(WaveLaneCtx *lane)
{
WND_W32_Window *window = &WND_W32.window;
window->w2u_events_arena = AcquireArena(Gibi(64));
Atomic64Set(&window->desired_cursor, (i64)WND_W32.cursors[WND_CursorKind_Default]);
//- Initialize hwnd
{
@ -231,7 +237,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l
if ((HWND)wparam == hwnd && LOWORD(lparam) == HTCLIENT)
{
HCURSOR desired_cursor = (HCURSOR)Atomic64Fetch(&window->desired_cursor);
b32 desired_cursor_hidden = !Atomic64Fetch(&window->desired_cursor_hidden);
b32 desired_cursor_hidden = !!Atomic64Fetch(&window->desired_cursor_hidden);
if (desired_cursor != window->active_cursor)
{
SetCursor(desired_cursor);
@ -241,13 +247,13 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l
{
if (desired_cursor_hidden)
{
while (ShowCursor(1) < 0)
while (ShowCursor(0) >= 0)
{
}
}
else
{
while (ShowCursor(0) >= 0)
while (ShowCursor(1) < 0)
{
}
}