re-enable ui & vis rendering

This commit is contained in:
jacob 2025-12-11 15:41:31 -06:00
parent eeec7e7191
commit ad359c8495
11 changed files with 228 additions and 206 deletions

View File

@ -73,7 +73,7 @@
#define GPU_DEBUG_VALIDATION 0
#define GPU_SHADER_PRINT 1
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1);
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(64);
#define GPU_SHADER_PRINT_LOG 1
/* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */

View File

@ -307,6 +307,12 @@ void G_Bootstrap(void)
queue->print_buffer_size,
.flags = G_ResourceFlag_AllowShaderReadWrite
);
queue->print_readback_buffer = G_PushBuffer(
gpu_perm,
u8,
queue->print_buffer_size,
.flags = G_ResourceFlag_HostMemory
);
queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer);
}
}
@ -2837,197 +2843,196 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
{
G_QueueKind queue_kind = G_QueueKind_Direct;
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
// if (queue->print_buffer_size > 0)
G_ArenaHandle gpu_perm = G_PermArena();
G_ResourceHandle readback_buff = G_PushBuffer(
gpu_perm,
u8,
queue->print_buffer_size,
.flags = G_ResourceFlag_HostMemory
);
for (;;)
{
/* FIXME: Remove this */
P_SleepSeconds(0.100);
Sleep(100);
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
/* Copy print-buffers to readback */
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
{
/* Copy print buffer to readback buffer */
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
/* Reset counters to 0 */
G_MemorySync(cl, queue->print_buffer,
G_Stage_Copy, G_Access_CopyRead,
G_Stage_Copy, G_Access_CopyWrite
);
u8 zero[12] = ZI;
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero)));
}
G_CommitCommandList(cl);
G_SyncCpu(G_MaskFromQueue(queue_kind));
u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0); /* The number of bytes shaders attempted to write */
u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1); /* The number of shader prints that are in the buffer */
u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2); /* The number of shader prints that could not fit in the buffer */
u8 *start = G_StructFromResource(readback_buff, u8) + 12;
/* Deserialize */
if (GPU_SHADER_PRINT_LOG)
{
if (prints_count > 0)
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
if (!G_IsResourceNil(queue->print_buffer))
{
LogDebugF(
"Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F",
FmtUint(prints_count),
FmtUint(prints_count + overflows_count),
FmtUint(attempted_print_bytes_count)
);
}
/* FIXME: Remove this */
TempArena scratch = BeginScratchNoConflict();
u8 *at = start;
{
for (u32 print_num = 1; print_num <= prints_count; ++print_num)
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
{
u32 chars_count = 0;
u32 args_count = 0;
b32 internal_overflow = 0;
/* Copy print buffer to readback buffer */
G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
/* Reset counters to 0 */
G_MemorySync(cl, queue->print_buffer,
G_Stage_Copy, G_Access_CopyRead,
G_Stage_Copy, G_Access_CopyWrite
);
u8 zero[12] = ZI;
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero)));
}
G_CommitCommandList(cl);
}
}
/* TODO: Collect asynchronously */
G_SyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute);
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
{
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
if (!G_IsResourceNil(queue->print_buffer))
{
u32 attempted_print_bytes_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 0); /* The number of bytes shaders attempted to write */
u32 prints_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 1); /* The number of shader prints that are in the buffer */
u32 overflows_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 2); /* The number of shader prints that could not fit in the buffer */
u8 *start = G_StructFromResource(queue->print_readback_buffer, u8) + 12;
/* Deserialize */
if (GPU_SHADER_PRINT_LOG)
{
if (prints_count > 0)
{
u32 header = *(u32 *)at;
chars_count = (header & 0x0000FFFF) >> 0;
args_count = (header & 0x7FFF0000) >> 16;
internal_overflow = (header & 0xF0000000) >> 31;
at += 4;
LogDebugF(
"Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F",
FmtUint(prints_count),
FmtUint(prints_count + overflows_count),
FmtUint(attempted_print_bytes_count)
);
}
String fmt = ZI;
/* FIXME: Remove this */
TempArena scratch = BeginScratchNoConflict();
u8 *at = start;
{
fmt.len = chars_count;
fmt.text = at;
at += chars_count;
}
FmtArgArray args = ZI;
args.count = args_count;
{
if (args_count > 0)
for (u32 print_num = 1; print_num <= prints_count; ++print_num)
{
args.args = PushStructs(scratch.arena, FmtArg, args_count);
for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx)
u32 chars_count = 0;
u32 args_count = 0;
b32 internal_overflow = 0;
{
G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at);
at += 1;
u32 header = *(u32 *)at;
chars_count = (header & 0x0000FFFF) >> 0;
args_count = (header & 0x7FFF0000) >> 16;
internal_overflow = (header & 0xF0000000) >> 31;
at += 4;
}
FmtArg *dst = &args.args[arg_idx];
switch (gpu_kind)
String fmt = ZI;
{
fmt.len = chars_count;
fmt.text = at;
at += chars_count;
}
FmtArgArray args = ZI;
args.count = args_count;
{
if (args_count > 0)
{
args.args = PushStructs(scratch.arena, FmtArg, args_count);
for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx)
{
G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at);
at += 1;
/* Translate unsigned args */
case G_FmtArgKind_Uint:
{
u32 gpu_value = *(u32 *)at;
*dst = FmtUint(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Uint2:
{
Vec2U32 gpu_value = *(Vec2U32 *)at;
*dst = FmtUint2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Uint3:
{
Vec3U32 gpu_value = *(Vec3U32 *)at;
*dst = FmtUint3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Uint4:
{
Vec4U32 gpu_value = *(Vec4U32 *)at;
*dst = FmtUint4(gpu_value);
at += 16;
} break;
FmtArg *dst = &args.args[arg_idx];
switch (gpu_kind)
{
/* Translate signed args */
case G_FmtArgKind_Sint:
{
i32 gpu_value = *(i32 *)at;
*dst = FmtSint(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Sint2:
{
Vec2I32 gpu_value = *(Vec2I32 *)at;
*dst = FmtSint2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Sint3:
{
Vec3I32 gpu_value = *(Vec3I32 *)at;
*dst = FmtSint3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Sint4:
{
Vec4I32 gpu_value = *(Vec4I32 *)at;
*dst = FmtSint4(gpu_value);
at += 16;
} break;
/* Translate unsigned args */
case G_FmtArgKind_Uint:
{
u32 gpu_value = *(u32 *)at;
*dst = FmtUint(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Uint2:
{
Vec2U32 gpu_value = *(Vec2U32 *)at;
*dst = FmtUint2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Uint3:
{
Vec3U32 gpu_value = *(Vec3U32 *)at;
*dst = FmtUint3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Uint4:
{
Vec4U32 gpu_value = *(Vec4U32 *)at;
*dst = FmtUint4(gpu_value);
at += 16;
} break;
/* Translate float args */
case G_FmtArgKind_Float:
{
f32 gpu_value = *(f32 *)at;
*dst = FmtFloat(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Float2:
{
Vec2 gpu_value = *(Vec2 *)at;
*dst = FmtFloat2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Float3:
{
Vec3 gpu_value = *(Vec3 *)at;
*dst = FmtFloat3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Float4:
{
Vec4 gpu_value = *(Vec4 *)at;
*dst = FmtFloat4(gpu_value);
at += 16;
} break;
/* Translate signed args */
case G_FmtArgKind_Sint:
{
i32 gpu_value = *(i32 *)at;
*dst = FmtSint(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Sint2:
{
Vec2I32 gpu_value = *(Vec2I32 *)at;
*dst = FmtSint2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Sint3:
{
Vec3I32 gpu_value = *(Vec3I32 *)at;
*dst = FmtSint3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Sint4:
{
Vec4I32 gpu_value = *(Vec4I32 *)at;
*dst = FmtSint4(gpu_value);
at += 16;
} break;
/* Translate float args */
case G_FmtArgKind_Float:
{
f32 gpu_value = *(f32 *)at;
*dst = FmtFloat(gpu_value);
at += 4;
} break;
case G_FmtArgKind_Float2:
{
Vec2 gpu_value = *(Vec2 *)at;
*dst = FmtFloat2(gpu_value);
at += 8;
} break;
case G_FmtArgKind_Float3:
{
Vec3 gpu_value = *(Vec3 *)at;
*dst = FmtFloat3(gpu_value);
at += 12;
} break;
case G_FmtArgKind_Float4:
{
Vec4 gpu_value = *(Vec4 *)at;
*dst = FmtFloat4(gpu_value);
at += 16;
} break;
}
}
}
}
String final_str = ZI;
if (internal_overflow)
{
final_str = Lit("[Shader PrintF is too large]");
}
else
{
final_str = FormatString(scratch.arena, fmt, args);
}
LogDebug(final_str);
at = (u8 *)AlignU64((u64)at, 4);
}
}
String final_str = ZI;
if (internal_overflow)
{
final_str = Lit("[Shader PrintF is too large]");
}
else
{
final_str = FormatString(scratch.arena, fmt, args);
}
LogDebug(final_str);
at = (u8 *)AlignU64((u64)at, 4);
EndScratch(scratch);
}
}
EndScratch(scratch);
}
DEBUGBREAKABLE;
}
}

View File

@ -222,6 +222,7 @@ Struct(G_D12_Queue)
/* Global resources */
u64 print_buffer_size;
G_ResourceHandle print_buffer;
G_ResourceHandle print_readback_buffer;
G_RWByteAddressBufferRef print_buffer_ref;
/* Raw command lists */

View File

@ -137,5 +137,6 @@ i64 P_GetCurrentTimerPeriodNs(void);
////////////////////////////////////////////////////////////
//~ @hookdecl Sleep
void P_SleepSeconds(f64 seconds);
void P_SleepPrecise(i64 sleep_time_ns);
void P_SleepFrame(i64 last_frame_time_ns, i64 target_dt_ns);

View File

@ -978,6 +978,11 @@ i64 P_GetCurrentTimerPeriodNs(void)
////////////////////////////////////////////////////////////
//~ @hookimpl Sleep
void P_SleepSeconds(f64 seconds)
{
Sleep(seconds / 1000.0);
}
void P_SleepPrecise(i64 sleep_time_ns)
{
i64 now_ns = TimeNs();

View File

@ -580,11 +580,12 @@ void V_TickForever(WaveLaneCtx *lane)
/* Params */
V_DParams params = ZI;
{
params.target_size = draw_size;
params.target_ro = draw_target_ro;
params.target_rw = draw_target_rw;
params.shape_verts = dverts_ro;
params.target_size = draw_size;
params.target_ro = draw_target_ro;
params.target_rw = draw_target_rw;
params.shape_verts = dverts_ro;
params.world_to_draw_xf = world_to_draw_xf;
params.target_cursor_pos = draw_cursor;
}
G_ResourceHandle params_buff = G_PushBufferFromString(frame->gpu_arena, frame->cl, StringFromStruct(&params));
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, V_DParams);
@ -598,7 +599,6 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Backdrop pass
/* Backdrop pass */
{
G_Compute(frame->cl, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size));
@ -623,7 +623,11 @@ void V_TickForever(WaveLaneCtx *lane)
//- Finalize draw target
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_ShaderRead);
UI_SetRawTexture(vis_box, draw_target_ro, VEC2(0, 0), VEC2(1, 1));
{
Vec2 uv0 = DivVec2Vec2(Vec2FromVec(viewport.p0), Vec2FromVec(window_frame.monitor_size));
Vec2 uv1 = DivVec2Vec2(Vec2FromVec(viewport.p1), Vec2FromVec(window_frame.monitor_size));
UI_SetRawTexture(vis_box, draw_target_ro, uv0, uv1);
}
}
//////////////////////////////

View File

@ -5,12 +5,15 @@ G_DeclConstant(G_StructuredBufferRef, V_ShaderConst_Params, 0);
Struct(V_DParams)
{
Vec2I32 target_size;
G_Texture2DRef target_ro;
G_RWTexture2DRef target_rw;
G_StructuredBufferRef quads;
G_StructuredBufferRef shape_verts;
Xform world_to_draw_xf;
Vec2I32 target_size;
G_Texture2DRef target_ro;
G_RWTexture2DRef target_rw;
G_StructuredBufferRef quads;
G_StructuredBufferRef shape_verts;
Vec2 target_cursor_pos;
Xform world_to_draw_xf;
};
////////////////////////////////////////////////////////////

View File

@ -10,7 +10,8 @@ ComputeShader2D(V_BackdropCS, 8, 8)
Vec2I32 target_size = params.target_size;
if (target_pos.x < target_size.x && target_pos.y < target_size.y)
{
Vec4 result = Color_Blue;
Vec4 result = Vec4(0.05, 0.05, 0.05, 1);
target[target_pos] = result;
}
}

View File

@ -483,7 +483,7 @@ UI_Frame *UI_BeginFrame(UI_FrameFlag frame_flags, Vec4 swapchain_color)
{
Arena *old_arena = frame->arena;
Arena *old_rects_arena = frame->arena;
Arena *old_rects_arena = frame->rects_arena;
G_ArenaHandle old_gpu_arena = frame->gpu_arena;
ZeroStruct(frame);
frame->arena = old_arena;
@ -691,6 +691,8 @@ void UI_EndFrame(UI_Frame *frame)
UI_State *g = &UI_state;
Vec2I32 monitor_size = frame->window_frame.monitor_size;
Rng3 monitor_viewport = RNG3(VEC3(0, 0, 0), VEC3(monitor_size.x, monitor_size.y, 1));
Rng2 monitor_scissor = RNG2(VEC2(0, 0), VEC2(monitor_size.x, monitor_size.y));
Vec2I32 draw_size = frame->window_frame.draw_size;
Rng3 draw_viewport = RNG3(VEC3(0, 0, 0), VEC3(draw_size.x, draw_size.y, 1));
@ -1386,6 +1388,7 @@ void UI_EndFrame(UI_Frame *frame)
G_Texture2DRef draw_target_ro = G_PushTexture2DRef(frame->gpu_arena, draw_target);
/* Rects */
u64 rects_count = ArenaCount(frame->rects_arena, UI_DRect);
G_ResourceHandle rects_buff = G_PushBufferFromString(frame->gpu_arena, frame->cl, StringFromArena(frame->rects_arena));
G_StructuredBufferRef rects_ro = G_PushStructuredBufferRef(frame->gpu_arena, rects_buff, UI_DRect);
@ -1396,6 +1399,7 @@ void UI_EndFrame(UI_Frame *frame)
params.target_ro = draw_target_ro;
params.rects = rects_ro;
params.sampler = G_BasicSampler();
params.cursor_pos = frame->cursor_pos;
}
G_ResourceHandle params_buff = G_PushBufferFromString(frame->gpu_arena, frame->cl, StringFromStruct(&params));
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(frame->gpu_arena, params_buff, UI_DParams);
@ -1410,33 +1414,33 @@ void UI_EndFrame(UI_Frame *frame)
//- Clear pass
{
G_ClearRenderTarget(frame->cl, draw_target, VEC4(1, 0, 0, 1));
G_ClearRenderTarget(frame->cl, draw_target, VEC4(0, 0, 0, 1));
}
//- Rect pass
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite);
if (G_CountBufferBytes(rects_buff) > 0)
if (rects_count > 0)
{
/* Render rects */
G_Rasterize(frame->cl,
UI_DRectVS, UI_DRectPS,
1, G_QuadIndices(),
1, &draw_target,
draw_viewport, draw_scissor,
G_RasterMode_TriangleList);
UI_DRectVS, UI_DRectPS,
rects_count, G_QuadIndices(),
1, &draw_target,
draw_viewport, draw_scissor,
G_RasterMode_TriangleList);
/* Render rect wireframes */
if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug))
{
G_SetConstant(frame->cl, UI_ShaderConst_DebugDraw, 1);
G_Rasterize(frame->cl,
UI_DRectVS, UI_DRectPS,
1, G_QuadIndices(),
1, &draw_target,
draw_viewport, draw_scissor,
G_RasterMode_WireTriangleList);
UI_DRectVS, UI_DRectPS,
rects_count, G_QuadIndices(),
1, &draw_target,
draw_viewport, draw_scissor,
G_RasterMode_WireTriangleList);
}
}
@ -1447,11 +1451,11 @@ void UI_EndFrame(UI_Frame *frame)
{
G_Rasterize(frame->cl,
UI_BlitVS, UI_BlitPS,
1, G_QuadIndices(),
1, &backbuffer,
draw_viewport, draw_scissor,
G_RasterMode_TriangleList);
UI_BlitVS, UI_BlitPS,
1, G_QuadIndices(),
1, &backbuffer,
monitor_viewport, monitor_scissor,
G_RasterMode_TriangleList);
}
G_DumbMemoryLayoutSync(frame->cl, backbuffer, G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);

View File

@ -8,6 +8,9 @@ Struct(UI_DParams)
{
Vec2I32 target_size;
G_Texture2DRef target_ro;
Vec2 cursor_pos;
G_StructuredBufferRef rects;
G_SamplerStateRef sampler;
};

View File

@ -130,6 +130,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput)
UI_BlitPSInput result;
result.sv_position = Vec4(NdcFromUv(uv).xy, 0, 1);
result.src_uv = uv;
return result;
}
@ -145,12 +146,6 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
Vec2 uv = input.src_uv;
Vec4 result = tex.Sample(sampler, uv);
G_PrintF(
"Bla: %F %F",
G_Fmt(input.sv_position),
G_Fmt(uv)
);
UI_BlitPSOutput output;
output.SV_Target0 = result;
return output;