From 3fd910702f7494e113c95f282eaf1647c5b0160b Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 24 Feb 2026 01:34:05 -0600 Subject: [PATCH] un-double-buffer gpu frame arena. use unrounded affine for backdrop --- src/base/base.cgh | 2 +- src/gpu/gpu_core.h | 12 ++--- src/gpu/gpu_dx12/gpu_dx12_core.c | 10 ++-- src/pp/pp_vis/pp_vis_core.c | 78 ++++++++++++++++++-------------- src/pp/pp_vis/pp_vis_core.h | 1 - src/pp/pp_vis/pp_vis_gpu.g | 2 +- src/pp/pp_vis/pp_vis_shared.cgh | 4 ++ 7 files changed, 60 insertions(+), 49 deletions(-) diff --git a/src/base/base.cgh b/src/base/base.cgh index 1b0e4065..3ff55d03 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -726,7 +726,7 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; } Struct(PixelShader) { ResourceKey resource; }; Struct(ComputeShader) { ResourceKey resource; }; #elif IsGpu - #define Semantic(t, n) t n : n + #define Semantic(type, name) type name : name #define DeclComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) #define DeclComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) #define DeclComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 3216eff6..8186f548 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -284,7 +284,7 @@ Enum(G_Layout) // Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`. // When barrier executes: // - Necessary resource flushes will occur based on `access_prev` & `access_next` -// - Texture layout will transition based on `layout` (if specified) +// - Resource layout will transition based on `layout` (if specified) Struct(G_MemoryBarrierDesc) { G_ResourceHandle resource; @@ -294,7 +294,7 @@ Struct(G_MemoryBarrierDesc) G_Access access_prev; G_Access access_next; G_Layout layout; - RngI32 mips; // Inclusive range of texture mip levels to sync + RngI32 mips; // Inclusive range of texture mip levels to sync }; //////////////////////////////////////////////////////////// @@ -477,9 +477,9 @@ Enum(G_BlendMode) Struct(G_IndexBufferDesc) { + u32 count; + u32 stride; // Either 2 for u16 indices, or 4 for u32 indices G_ResourceHandle resource; - u32 index_size; // Either 2 for u16 indices, or 4 for u32 indices - u32 index_count; }; Struct(G_RenderTargetDesc) @@ -582,8 +582,8 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_R //- Index buffer helpers -#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .index_size = 2, .index_count = (G_CountBuffer((_res), i16)) }) -#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .index_size = 4, .index_count = (G_CountBuffer((_res), i32)) }) +#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) }) +#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) }) //- Render target helpers diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 09c18440..73c7af4f 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -2562,17 +2562,17 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) D3D12_INDEX_BUFFER_VIEW ibv = Zi; { G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; - if (desc.index_count > 0) + if (desc.count > 0) { G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; - ibv.SizeInBytes = desc.index_size * desc.index_count; - if (desc.index_size == 2) + ibv.SizeInBytes = desc.stride * desc.count; + if (desc.stride == 2) { ibv.Format = DXGI_FORMAT_R16_UINT; indices_count = ibv.SizeInBytes / 2; } - else if (desc.index_size == 4) + else if (desc.stride == 4) { ibv.Format = DXGI_FORMAT_R32_UINT; indices_count = ibv.SizeInBytes / 4; @@ -3134,7 +3134,7 @@ void G_Rasterize( G_RasterMode raster_mode ) { - if (instances_count > 0 && index_buffer.index_count > 0) + if (instances_count > 0 && index_buffer.count > 0) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 6500983f..be135f2e 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -335,6 +335,8 @@ void V_TickForever(WaveLaneCtx *lane) { Arena *perm = PermArena(); G_ArenaHandle gpu_perm = G_PermArena(); + G_ArenaHandle gpu_frame_arena = G_AcquireArena(); + P_tl.debug_arena = AcquireArena(Gibi(64)); P_tl.debug_tint = VEC4(0, 0.4, 0.9, 0.75); P_tl.out_msgs_arena = AcquireArena(Gibi(64)); @@ -522,7 +524,6 @@ void V_TickForever(WaveLaneCtx *lane) frame->quads_arena = AcquireArena(Gibi(64)); frame->dverts_arena = AcquireArena(Gibi(64)); frame->dvert_idxs_arena = AcquireArena(Gibi(64)); - frame->gpu_arena = G_AcquireArena(); } ////////////////////////////// @@ -576,20 +577,18 @@ void V_TickForever(WaveLaneCtx *lane) Arena *old_quads_arena = frame->quads_arena; Arena *old_dverts_arena = frame->dverts_arena; Arena *old_dvert_idxs_arena = frame->dvert_idxs_arena; - G_ArenaHandle old_gpu_arena = frame->gpu_arena; ZeroStruct(frame); frame->arena = old_arena; frame->quads_arena = old_quads_arena; frame->dverts_arena = old_dverts_arena; frame->dvert_idxs_arena = old_dvert_idxs_arena; - frame->gpu_arena = old_gpu_arena; } frame->cl = G_PrepareCommandList(G_QueueKind_Direct); ResetArena(frame->arena); ResetArena(frame->quads_arena); ResetArena(frame->dverts_arena); ResetArena(frame->dvert_idxs_arena); - G_ResetArena(frame->cl, frame->gpu_arena); + G_ResetArena(frame->cl, gpu_frame_arena); // Persist state CopyBytes(frame->held_buttons, prev_frame->held_buttons, sizeof(frame->held_buttons)); @@ -1053,15 +1052,23 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Compute frame affines + // World <-> screen (raw) + frame->af.world_to_screen_raw = AffineIdentity; + frame->af.screen_to_world_raw = AffineIdentity; + { + f32 camera_scale = frame->screen_dims.x / (meters_per_camera_width * frame->camera_zoom); + frame->af.world_to_screen_raw = TranslateAffine(frame->af.world_to_screen_raw, MulVec2(frame->screen_dims, 0.5)); + frame->af.world_to_screen_raw = ScaleAffine(frame->af.world_to_screen_raw, VEC2(camera_scale, camera_scale)); + frame->af.world_to_screen_raw = TranslateAffine(frame->af.world_to_screen_raw, NegVec2(frame->camera_pos)); + frame->af.screen_to_world_raw = InvertAffine(frame->af.world_to_screen_raw); + } + // World <-> screen frame->af.world_to_screen = AffineIdentity; frame->af.screen_to_world = AffineIdentity; { - f32 camera_scale = frame->screen_dims.x / (meters_per_camera_width * frame->camera_zoom); - frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, MulVec2(frame->screen_dims, 0.5)); - frame->af.world_to_screen = ScaleAffine(frame->af.world_to_screen, VEC2(camera_scale, camera_scale)); - frame->af.world_to_screen = TranslateAffine(frame->af.world_to_screen, NegVec2(frame->camera_pos)); - // frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og); + frame->af.world_to_screen = frame->af.world_to_screen_raw;; + frame->af.world_to_screen.og = RoundVec2(frame->af.world_to_screen.og); frame->af.screen_to_world = InvertAffine(frame->af.world_to_screen); } @@ -1073,7 +1080,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, MulVec2(frame->shade_dims, 0.5)); frame->af.world_to_shade = ScaleAffine(frame->af.world_to_shade, VEC2(camera_scale, camera_scale)); frame->af.world_to_shade = TranslateAffine(frame->af.world_to_shade, NegVec2(frame->camera_pos)); - // frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og); + frame->af.world_to_shade.og = RoundVec2(frame->af.world_to_shade.og); frame->af.shade_to_world = InvertAffine(frame->af.world_to_shade); } @@ -1082,7 +1089,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->af.screen_to_shade = AffineIdentity; { frame->af.shade_to_screen = MulAffine(frame->af.world_to_screen, frame->af.shade_to_world); - // frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og); + frame->af.shade_to_screen.og = RoundVec2(frame->af.shade_to_screen.og); frame->af.screen_to_shade = InvertAffine(frame->af.shade_to_screen); } @@ -1093,7 +1100,7 @@ void V_TickForever(WaveLaneCtx *lane) { frame->af.world_to_cell = ScaleAffine(frame->af.world_to_cell, VEC2(P_CellsPerMeter, P_CellsPerMeter)); frame->af.world_to_cell = TranslateAffine(frame->af.world_to_cell, VEC2((P_WorldPitch / 2.0), (P_WorldPitch / 2.0))); - // frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og); + frame->af.world_to_cell.og = RoundVec2(frame->af.world_to_cell.og); frame->af.cell_to_world = InvertAffine(frame->af.world_to_cell); } @@ -4796,7 +4803,7 @@ void V_TickForever(WaveLaneCtx *lane) // Screen texture G_ResourceHandle screen_target = G_PushTexture2D( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->screen_dims, G_Layout_DirectQueue_RenderTarget, @@ -4805,12 +4812,12 @@ void V_TickForever(WaveLaneCtx *lane) ); Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); - frame->screen_ro = G_PushTexture2DRef(frame->gpu_arena, screen_target); - frame->screen_rw = G_PushRWTexture2DRef(frame->gpu_arena, screen_target); + frame->screen_ro = G_PushTexture2DRef(gpu_frame_arena, screen_target); + frame->screen_rw = G_PushRWTexture2DRef(gpu_frame_arena, screen_target); // Bloom texture G_ResourceHandle bloom_target = G_PushTexture2D( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 1), G_Layout_DirectQueue_General, @@ -4820,24 +4827,24 @@ void V_TickForever(WaveLaneCtx *lane) ); for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) { - frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); - frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->bloom_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->bloom_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, bloom_target, .mips = RNGI32(mip_idx, mip_idx)); } // Albedo texture G_ResourceHandle albedo_target = G_PushTexture2D( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->screen_dims, G_Layout_DirectQueue_RenderTarget, .flags = G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) ); - frame->albedo_ro = G_PushTexture2DRef(frame->gpu_arena, albedo_target); + frame->albedo_ro = G_PushTexture2DRef(gpu_frame_arena, albedo_target); // Backdrop texture G_ResourceHandle backdrop_target = G_PushTexture2D( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 0), G_Layout_DirectQueue_General, @@ -4847,13 +4854,13 @@ void V_TickForever(WaveLaneCtx *lane) ); for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) { - frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); - frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(frame->gpu_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->backdrop_mips_ro[mip_idx] = G_PushTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); + frame->backdrop_mips_rw[mip_idx] = G_PushRWTexture2DRef(gpu_frame_arena, backdrop_target, .mips = RNGI32(mip_idx, mip_idx)); } // Shade texture G_ResourceHandle shade_target = G_PushTexture2D( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->shade_dims, G_Layout_DirectQueue_General, @@ -4862,29 +4869,29 @@ void V_TickForever(WaveLaneCtx *lane) ); Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1)); Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y)); - frame->shade_ro = G_PushTexture2DRef(frame->gpu_arena, shade_target); - frame->shade_rw = G_PushRWTexture2DRef(frame->gpu_arena, shade_target); + frame->shade_ro = G_PushTexture2DRef(gpu_frame_arena, shade_target); + frame->shade_rw = G_PushRWTexture2DRef(gpu_frame_arena, shade_target); // Quad buffers G_ResourceHandle quads_buff = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, StringFromArena(frame->quads_arena), .name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick)) ); - frame->quads = G_PushStructuredBufferRef(frame->gpu_arena, quads_buff, V_Quad); + frame->quads = G_PushStructuredBufferRef(gpu_frame_arena, quads_buff, V_Quad); // Debug shape buffers G_ResourceHandle dverts_buff = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, StringFromArena(frame->dverts_arena), .name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick)) ); G_ResourceHandle dvert_idxs_buff = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, StringFromArena(frame->dvert_idxs_arena), .name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick)) ); - frame->dverts = G_PushStructuredBufferRef(frame->gpu_arena, dverts_buff, V_DVert); + frame->dverts = G_PushStructuredBufferRef(gpu_frame_arena, dverts_buff, V_DVert); G_IndexBufferDesc dvert_idxs_ib = G_IdxBuff32(dvert_idxs_buff); // Particles @@ -4901,20 +4908,20 @@ void V_TickForever(WaveLaneCtx *lane) } } gpu_emitters = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, StringFromStructs(flattened_emitters, frame->emitters_count), .name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick)) ); } - frame->emitters = G_PushStructuredBufferRef(frame->gpu_arena, gpu_emitters, V_Emitter); + frame->emitters = G_PushStructuredBufferRef(gpu_frame_arena, gpu_emitters, V_Emitter); // Upload gpu frame G_ResourceHandle gpu_frame_res = G_PushBufferFromCpuCopy( - frame->gpu_arena, frame->cl, + gpu_frame_arena, frame->cl, StringFromStruct(&frame->shared_frame), .name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick)) ); - G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(frame->gpu_arena, gpu_frame_res, V_SharedFrame); + G_StructuredBufferRef gpu_frame = G_PushStructuredBufferRef(gpu_frame_arena, gpu_frame_res, V_SharedFrame); // Set initial constants V_GpuFlag gpu_flags = V_GpuFlag_None; @@ -5075,6 +5082,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Debug shapes pass + if (dvert_idxs_ib.count > 0) { G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTarget); diff --git a/src/pp/pp_vis/pp_vis_core.h b/src/pp/pp_vis/pp_vis_core.h index 618b262d..b57e67cd 100644 --- a/src/pp/pp_vis/pp_vis_core.h +++ b/src/pp/pp_vis/pp_vis_core.h @@ -224,7 +224,6 @@ Struct(V_Frame) Arena *quads_arena; Arena *dverts_arena; Arena *dvert_idxs_arena; - G_ArenaHandle gpu_arena; G_CommandListHandle cl; Embed(V_SharedFrame, shared_frame); diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g index 95593b99..464308c3 100644 --- a/src/pp/pp_vis/pp_vis_gpu.g +++ b/src/pp/pp_vis/pp_vis_gpu.g @@ -203,7 +203,7 @@ ImplComputeShader2D(V_BackdropDownCS) Vec2 off_uv = 0.5 / down_dims; Vec2 screen_pos = bd_uv * frame.screen_dims; - Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); + Vec2 world_pos = mul(frame.af.screen_to_world_raw, Vec3(screen_pos, 1)); Rng2 world_bounds = { Vec2(-P_WorldPitch, -P_WorldPitch) * 0.5, Vec2(P_WorldPitch, P_WorldPitch) * 0.5 }; Vec4 result = 0; diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 67c625d9..45548b60 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -235,6 +235,10 @@ Enum(V_EditMode) Struct(V_Affines) { + // World <-> screen (raw) + Affine world_to_screen_raw; + Affine screen_to_world_raw; + // World <-> screen Affine world_to_screen; Affine screen_to_world;