reset print buffer size in collection worker

This commit is contained in:
jacob 2025-12-10 17:23:51 -06:00
parent bc17e94758
commit f911e98c98
7 changed files with 108 additions and 88 deletions

View File

@ -71,7 +71,7 @@ G_ArenaHandle G_PermArena(void)
return perm;
}
//- Cpu -> Gpu copy
//- Cpu -> Gpu upload
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc)
{

View File

@ -23,7 +23,7 @@ void G_BootstrapCommon(void);
G_ArenaHandle G_PermArena(void);
//- Cpu -> Gpu copy
//- Cpu -> Gpu upload
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc);
#define G_PushBufferFromString(_arena, _cl, _src, ...) \

View File

@ -299,7 +299,7 @@ Enum(G_Layout)
G_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
};
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`.
/* Barrier will execute after previous stages specified by `stage_prev`, and before next stages specified by `stage_next`.
* When barrier executes:
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
* - Texture layout will transition based on `layout` (if specified)
@ -308,8 +308,8 @@ Struct(G_MemoryBarrierDesc)
{
G_ResourceHandle resource;
b32 is_global;
G_Stage sync_prev;
G_Stage sync_next;
G_Stage stage_prev;
G_Stage stage_next;
G_Access access_prev;
G_Access access_next;
G_Layout layout;
@ -324,8 +324,8 @@ Enum(G_ResourceFlag)
G_ResourceFlag_AllowShaderReadWrite = (1 << 0),
G_ResourceFlag_AllowRenderTarget = (1 << 1),
G_ResourceFlag_AllowDepthStencil = (1 << 2),
G_ResourceFlag_HostMemory = (1 << 3),
G_ResourceFlag_WriteCombinedHostMemory = (1 << 4),
G_ResourceFlag_HostMemory = (1 << 3), /* Resource will automatically be mapped into the cpu's address space */
G_ResourceFlag_WriteCombined = (1 << 4), /* Writes into the mapped resource will be combined. Fast for linear memcpy, slow for everything else */
};
////////////////////////////////////////////////////////////
@ -680,31 +680,31 @@ void G_SetConstant_(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size)
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
#define G_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.sync_prev = _sync_prev, \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.sync_next = _sync_next, \
.stage_next = _stage_next, \
.access_next = _access_next, \
})
#define G_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.sync_prev = _sync_prev, \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.sync_next = _sync_next, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.layout = _layout, \
})
#define G_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next) \
G_MemorySync((_cl), (G_MemoryBarrierDesc) { \
.is_global = 1, \
.sync_prev = _sync_prev, \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.sync_next = _sync_next, \
.stage_next = _stage_next, \
.access_next = _access_next, \
})

View File

@ -316,14 +316,16 @@ void G_Bootstrap(void)
//////////////////////////////
//- Start workers
for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
{
String name = ZI;
if (kind == G_QueueKind_Direct) name = Lit("Direct queue worker");
if (kind == G_QueueKind_AsyncCompute) name = Lit("Compute queue worker");
if (kind == G_QueueKind_AsyncCopy) name = Lit("Copy queue worker");
DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
}
// for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
// {
// String name = ZI;
// if (kind == G_QueueKind_Direct) name = Lit("Gpu direct queue worker");
// if (kind == G_QueueKind_AsyncCompute) name = Lit("Gpu compute queue worker");
// if (kind == G_QueueKind_AsyncCopy) name = Lit("Gpu copy queue worker");
// DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
// }
DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntry, 0);
EndScratch(scratch);
}
@ -949,7 +951,7 @@ G_ResourceHandle G_PushBufferEx(G_ArenaHandle arena_handle, G_BufferResourceDesc
if (desc.flags & G_ResourceFlag_HostMemory)
{
heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
if (desc.flags & G_ResourceFlag_WriteCombined)
{
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
}
@ -1028,7 +1030,7 @@ G_ResourceHandle G_PushTextureEx(G_ArenaHandle arena_handle, G_TextureResourceDe
if (desc.flags & G_ResourceFlag_HostMemory)
{
heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
if (desc.flags & G_ResourceFlag_WriteCombined)
{
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
}
@ -1626,7 +1628,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (!G_IsRefNil(queue->print_buffer_ref))
{
slotted_constants[G_ShaderConst_DebugBufferRef] = queue->print_buffer_ref.v;
slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v;
}
/* Rasterizer state */
@ -1655,10 +1657,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
Lock lock = LockE(&g->free_cmd_chunks_mutex);
{
for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
G_D12_CmdChunk *chunk = cl->first_cmd_chunk;
while (chunk)
{
chunk->next = g->first_free_cmd_chunk;
G_D12_CmdChunk *next = chunk->next;
g->first_free_cmd_chunk = chunk;
chunk = next;
}
}
Unlock(&lock);
@ -1729,6 +1733,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
} break;
//- Constant
case G_D12_CmdKind_Constant:
{
i32 slot = cmd->constant.slot;
@ -1766,8 +1771,8 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
/* Translate gpu barrier kind -> d3d barrier fields */
D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.sync_prev);
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.sync_next);
D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev);
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next);
D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev);
D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next);
D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout;
@ -2818,16 +2823,15 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
}
////////////////////////////////////////////////////////////
//~ Workers
//~ Collection worker
void G_D12_WorkerEntry(WaveLaneCtx *lane)
void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
{
G_QueueKind queue_kind = (G_QueueKind)lane->wave->udata;
G_QueueKind queue_kind = G_QueueKind_Direct;
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
// if (queue->print_buffer_size > 0)
if (queue_kind == G_QueueKind_Direct)
{
G_ArenaHandle gpu_perm = G_PermArena();
G_ResourceHandle readback_buff = G_PushBuffer(
gpu_perm,
@ -2836,15 +2840,23 @@ void G_D12_WorkerEntry(WaveLaneCtx *lane)
.flags = G_ResourceFlag_HostMemory
);
u32 zero = 0;
for (;;)
{
/* FIXME: Remove this */
Sleep(500);
Sleep(100);
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
{
/* Copy print buffer to readback buffer */
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
/* Reset size to 0 */
G_MemorySync(cl, queue->print_buffer,
G_Stage_Copy, G_Access_CopyRead,
G_Stage_Copy, G_Access_CopyWrite
);
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, &zero, RNGU64(0, 4));
}
i64 completion = G_CommitCommandList(cl);
@ -2856,5 +2868,4 @@ void G_D12_WorkerEntry(WaveLaneCtx *lane)
DEBUGBREAKABLE;
}
}
}

View File

@ -469,6 +469,6 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
////////////////////////////////////////////////////////////
//~ Workers
//~ Collection worker
void G_D12_WorkerEntry(WaveLaneCtx *lane);
void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane);

View File

@ -102,7 +102,7 @@ Struct(G_SamplerStateRef) { u32 v; };
StaticAssert(G_NumGeneralPurposeConstants == 8);
StaticAssert(G_NumReservedConstants == 1);
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8);
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8);
////////////////////////////////////////////////////////////
//~ Debug printf
@ -110,24 +110,32 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
#if IsLanguageG && GPU_SHADER_PRINT
Struct(G_DebugBuffer)
Struct(G_TempPrintBuffer)
{
u32 data_u32[256];
u32 byte_pos;
};
void G_PushDebugChar(inout G_DebugBuffer buff, u32 c)
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 c)
{
/* TODO: Overflow check */
u32 u32_arr_pos = buff.byte_pos / 4;
u32 idx_in_u32 = buff.byte_pos & 0x03;
if (idx_in_u32 == 0)
{
/* Since buff is not zero initialized, we set the byte on first write here */
buff.data_u32[u32_arr_pos] = c & 0xFF;
}
else
{
buff.data_u32[u32_arr_pos] |= (c & 0xFF) << (idx_in_u32 * 8);
}
buff.byte_pos += 1;
}
void G_CommitDebugBuffer(G_DebugBuffer buff)
void G_CommitPrint(G_TempPrintBuffer buff)
{
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_DebugBufferRef);
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
u32 u32s_count = (buff.byte_pos + 3) / 4;
u32 alloc_size = u32s_count * 4;
@ -145,17 +153,18 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
}
}
#define G_DebugPrint(fmt) do { \
G_DebugBuffer __dbg; \
__dbg.byte_pos = 0; \
#define G_Print(fmt) do { \
G_TempPrintBuffer __tmp; \
__tmp.byte_pos = 0; \
u32 __pos = 0; \
while (U32FromChar(fmt[__pos]) != 0) \
{ \
G_PushDebugChar(__dbg, U32FromChar(fmt[__pos])); \
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
++__pos; \
} \
G_CommitDebugBuffer(__dbg); \
G_PushPrintChar(__tmp, 0); \
G_CommitPrint(__tmp); \
} while (0)
#else
#define G_DebugPrint(fmt)
#define G_Print(fmt)
#endif

View File

@ -145,7 +145,7 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
Vec2 uv = input.src_uv;
Vec4 result = tex.Sample(sampler, uv);
G_DebugPrint("Hello there");
G_Print("Hello there!");
UI_BlitPSOutput output;
output.SV_Target0 = result;