un-double-buffer gpu frame arena. use unrounded affine for backdrop

This commit is contained in:
jacob 2026-02-24 01:34:05 -06:00
parent 879491753e
commit 3fd910702f
7 changed files with 60 additions and 49 deletions

View File

@ -726,7 +726,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
Struct(PixelShader) { ResourceKey resource; };
Struct(ComputeShader) { ResourceKey resource; };
#elif IsGpu
#define Semantic(t, n) t n : n
#define Semantic(type, name) type name : name
#define DeclComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID))
#define DeclComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define DeclComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))

View File

@ -284,7 +284,7 @@ Enum(G_Layout)
// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`.
// When barrier executes:
// - Necessary resource flushes will occur based on `access_prev` & `access_next`
// - Texture layout will transition based on `layout` (if specified)
// - Resource layout will transition based on `layout` (if specified)
Struct(G_MemoryBarrierDesc)
{
G_ResourceHandle resource;
@ -477,9 +477,9 @@ Enum(G_BlendMode)
Struct(G_IndexBufferDesc)
{
u32 count;
u32 stride; // Either 2 for u16 indices, or 4 for u32 indices
G_ResourceHandle resource;
u32 index_size; // Either 2 for u16 indices, or 4 for u32 indices
u32 index_count;
};
Struct(G_RenderTargetDesc)
@ -582,8 +582,8 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_R
//- Index buffer helpers
#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .index_size = 2, .index_count = (G_CountBuffer((_res), i16)) })
#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .index_size = 4, .index_count = (G_CountBuffer((_res), i32)) })
#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) })
#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) })
//- Render target helpers

View File

@ -2562,17 +2562,17 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
D3D12_INDEX_BUFFER_VIEW ibv = Zi;
{
G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
if (desc.index_count > 0)
if (desc.count > 0)
{
G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource);
ibv.BufferLocation = index_buffer_resource->buffer_gpu_address;
ibv.SizeInBytes = desc.index_size * desc.index_count;
if (desc.index_size == 2)
ibv.SizeInBytes = desc.stride * desc.count;
if (desc.stride == 2)
{
ibv.Format = DXGI_FORMAT_R16_UINT;
indices_count = ibv.SizeInBytes / 2;
}
else if (desc.index_size == 4)
else if (desc.stride == 4)
{
ibv.Format = DXGI_FORMAT_R32_UINT;
indices_count = ibv.SizeInBytes / 4;
@ -3134,7 +3134,7 @@ void G_Rasterize(
G_RasterMode raster_mode
)
{
if (instances_count > 0 && index_buffer.index_count > 0)
if (instances_count > 0 && index_buffer.count > 0)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);

View File

@ -335,6 +335,8 @@ void V_TickForever(WaveLaneCtx *lane)
{
Arena *perm = PermArena();
G_ArenaHandle gpu_perm = G_PermArena();
G_ArenaHandle gpu_frame_arena = G_AcquireArena();
P_tl.debug_arena = AcquireArena(Gibi(64));
P_tl.debug_tint = VEC4(0, 0.4, 0.9, 0.75);
P_tl.out_msgs_arena = AcquireArena(Gibi(64));
@ -522,7 +524,6 @@ void V_TickForever(WaveLaneCtx *lane)
frame->quads_arena = AcquireArena(Gibi(64));
frame->dverts_arena = AcquireArena(Gibi(64));
frame->dvert_idxs_arena = AcquireArena(Gibi(64));
frame->gpu_arena = G_AcquireArena();
}
//////////////////////////////
@ -576,20 +577,18 @@ void V_TickForever(WaveLaneCtx *lane)
Arena *old_quads_arena = frame->quads_arena;
Arena *old_dverts_arena = frame->dverts_arena;
Arena *old_dvert_idxs_arena = frame->dvert_idxs_arena;
G_ArenaHandle old_gpu_arena = frame->gpu_arena;
ZeroStruct(frame);
frame->arena = old_arena;
frame->quads_arena = old_quads_arena;
frame->dverts_arena = old_dverts_arena;
frame->dvert_idxs_arena = old_dvert_idxs_arena;
frame->gpu_arena = old_gpu_arena;
}
frame->cl = G_PrepareCommandList(G_QueueKind_Direct);
ResetArena(frame->arena);
ResetArena(frame->quads_arena);
ResetArena(frame->dverts_arena);
ResetArena(frame->dvert_idxs_arena);
G_ResetArena(frame->cl, frame->gpu_arena);
G_ResetArena(frame->cl, gpu_frame_arena);
// Persist state
CopyBytes(frame->held_buttons, prev_frame->held_buttons, sizeof(frame->held_buttons));
@ -1053,15 +1052,23 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Compute frame affines
// World <-> screen (raw)
frame->af.world_to_screen_raw = AffineIdentity;
frame->af.screen_to_world_raw = AffineIdentity;
{
f32 camera_scale = frame->screen_dims.x / (meters_per_camera_width * frame->camera_zoom);
frame->af.world_to_screen_raw = TranslateAffine(frame->af.world_to_screen_raw, MulVec2(frame->screen_dims, 0.5));
frame->af.world_to_screen_raw = ScaleAffine(frame->af.world_to_screen_raw, VEC2(camera_scale, camera_scale));
frame->af.world_to_screen_raw = TranslateAffine(frame->af.world_to_screen_raw, NegVec2(frame->camera_pos));
frame->af.screen_to_world_raw = InvertAffine(frame->af.world_to_screen_raw);
}
// World <-> screen
frame->af.world_to_screen = AffineIdentity;
frame->af.screen_to_world = AffineIdentity;
{
f32 camera_scale = frame->screen_dims.x / (meters_per_camera_width * frame->camera_zoom);
frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, MulVec2(frame->screen_dims, 0.5));
frame->af.world_to_screen = ScaleAffine(frame->af.world_to_screen, VEC2(camera_scale, camera_scale));
frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, NegVec2(frame->camera_pos));
// frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og);
frame->af.world_to_screen = frame->af.world_to_screen_raw;;
frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og);
frame->af.screen_to_world = InvertAffine(frame->af.world_to_screen);
}
@ -1073,7 +1080,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, MulVec2(frame->shade_dims, 0.5));
frame->af.world_to_shade = ScaleAffine(frame->af.world_to_shade, VEC2(camera_scale, camera_scale));
frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, NegVec2(frame->camera_pos));
// frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og);
frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og);
frame->af.shade_to_world = InvertAffine(frame->af.world_to_shade);
}
@ -1082,7 +1089,7 @@ void V_TickForever(WaveLaneCtx *lane)
frame->af.screen_to_shade = AffineIdentity;
{
frame->af.shade_to_screen = MulAffine(frame->af.world_to_screen, frame->af.shade_to_world);
// frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og);
frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og);
frame->af.screen_to_shade = InvertAffine(frame->af.shade_to_screen);
}
@ -1093,7 +1100,7 @@ void V_TickForever(WaveLaneCtx *lane)
{
frame->af.world_to_cell = ScaleAffine(frame->af.world_to_cell, VEC2(P_CellsPerMeter, P_CellsPerMeter));
frame->af.world_to_cell = TranslateAffine(frame->af.world_to_cell, VEC2((P_WorldPitch / 2.0), (P_WorldPitch / 2.0)));
// frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og);
frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og);
frame->af.cell_to_world = InvertAffine(frame->af.world_to_cell);
}
@ -4796,7 +4803,7 @@ void V_TickForever(WaveLaneCtx *lane)
// Screen texture
G_ResourceHandle screen_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
G_Layout_DirectQueue_RenderTarget,
@ -4805,12 +4812,12 @@ void V_TickForever(WaveLaneCtx *lane)
);
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target);
frame->screen_ro = G_PushTexture2DRef(gpu_frame_arena, screen_target);
frame->screen_rw = G_PushRWTexture2DRef(gpu_frame_arena, screen_target);
// Bloom texture
G_ResourceHandle bloom_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1),
G_Layout_DirectQueue_General,
@ -4820,24 +4827,24 @@ void V_TickForever(WaveLaneCtx *lane)
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx));
}
// Albedo texture
G_ResourceHandle albedo_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
G_Layout_DirectQueue_RenderTarget,
.flags = G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
);
frame->albedo_ro = G_PushTexture2DRef(frame->gpu_arena, albedo_target);
frame->albedo_ro = G_PushTexture2DRef(gpu_frame_arena, albedo_target);
// Backdrop texture
G_ResourceHandle backdrop_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 0),
G_Layout_DirectQueue_General,
@ -4847,13 +4854,13 @@ void V_TickForever(WaveLaneCtx *lane)
);
for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx)
{
frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx));
}
// Shade texture
G_ResourceHandle shade_target = G_PushTexture2D(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
G_Format_R16G16B16A16_Float,
frame->shade_dims,
G_Layout_DirectQueue_General,
@ -4862,29 +4869,29 @@ void V_TickForever(WaveLaneCtx *lane)
);
Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1));
Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y));
frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target);
frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target);
frame->shade_ro = G_PushTexture2DRef(gpu_frame_arena, shade_target);
frame->shade_rw = G_PushRWTexture2DRef(gpu_frame_arena, shade_target);
// Quad buffers
G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
StringFromArena(frame->quads_arena),
.name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick))
);
frame->quads = G_PushStructuredBufferRef(frame->gpu_arena, quads_buff, V_Quad);
frame->quads = G_PushStructuredBufferRef(gpu_frame_arena, quads_buff, V_Quad);
// Debug shape buffers
G_ResourceHandle dverts_buff = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
StringFromArena(frame->dverts_arena),
.name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick))
);
G_ResourceHandle dvert_idxs_buff = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
StringFromArena(frame->dvert_idxs_arena),
.name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick))
);
frame->dverts = G_PushStructuredBufferRef(frame->gpu_arena, dverts_buff, V_DVert);
frame->dverts = G_PushStructuredBufferRef(gpu_frame_arena, dverts_buff, V_DVert);
G_IndexBufferDesc dvert_idxs_ib = G_IdxBuff32(dvert_idxs_buff);
// Particles
@ -4901,20 +4908,20 @@ void V_TickForever(WaveLaneCtx *lane)
}
}
gpu_emitters = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
StringFromStructs(flattened_emitters, frame->emitters_count),
.name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick))
);
}
frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter);
frame->emitters = G_PushStructuredBufferRef(gpu_frame_arena, gpu_emitters, V_Emitter);
// Upload gpu frame
G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy(
frame->gpu_arena, frame->cl,
gpu_frame_arena, frame->cl,
StringFromStruct(&frame->shared_frame),
.name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick))
);
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame);
G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(gpu_frame_arena, gpu_frame_res, V_SharedFrame);
// Set initial constants
V_GpuFlag gpu_flags = V_GpuFlag_None;
@ -5075,6 +5082,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Debug shapes pass
if (dvert_idxs_ib.count > 0)
{
G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTarget);

View File

@ -224,7 +224,6 @@ Struct(V_Frame)
Arena *quads_arena;
Arena *dverts_arena;
Arena *dvert_idxs_arena;
G_ArenaHandle gpu_arena;
G_CommandListHandle cl;
Embed(V_SharedFrame, shared_frame);

View File

@ -203,7 +203,7 @@ ImplComputeShader2D(V_BackdropDownCS)
Vec2 off_uv = 0.5 / down_dims;
Vec2 screen_pos = bd_uv * frame.screen_dims;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
Vec2 world_pos = mul(frame.af.screen_to_world_raw, Vec3(screen_pos, 1));
Rng2 world_bounds = { Vec2(-P_WorldPitch, -P_WorldPitch) * 0.5, Vec2(P_WorldPitch, P_WorldPitch) * 0.5 };
Vec4 result = 0;

View File

@ -235,6 +235,10 @@ Enum(V_EditMode)
Struct(V_Affines)
{
// World <-> screen (raw)
Affine world_to_screen_raw;
Affine screen_to_world_raw;
// World <-> screen
Affine world_to_screen;
Affine screen_to_world;