reset print buffer size in collection worker

This commit is contained in:
jacob 2025-12-10 17:23:51 -06:00
parent bc17e94758
commit f911e98c98
7 changed files with 108 additions and 88 deletions

View File

@ -71,7 +71,7 @@ G_ArenaHandle G_PermArena(void)
return perm; return perm;
} }
//- Cpu -> Gpu copy //- Cpu -> Gpu upload
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc) G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc)
{ {

View File

@ -23,7 +23,7 @@ void G_BootstrapCommon(void);
G_ArenaHandle G_PermArena(void); G_ArenaHandle G_PermArena(void);
//- Cpu -> Gpu copy //- Cpu -> Gpu upload
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc); G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc);
#define G_PushBufferFromString(_arena, _cl, _src, ...) \ #define G_PushBufferFromString(_arena, _cl, _src, ...) \

View File

@ -299,7 +299,7 @@ Enum(G_Layout)
G_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */ G_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
}; };
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`. /* Barrier will execute after previous stages specified by `stage_prev`, and before next stages specified by `stage_next`.
* When barrier executes: * When barrier executes:
* - Necessary resource flushes will occur based on `access_prev` & `access_next` * - Necessary resource flushes will occur based on `access_prev` & `access_next`
* - Texture layout will transition based on `layout` (if specified) * - Texture layout will transition based on `layout` (if specified)
@ -308,8 +308,8 @@ Struct(G_MemoryBarrierDesc)
{ {
G_ResourceHandle resource; G_ResourceHandle resource;
b32 is_global; b32 is_global;
G_Stage sync_prev; G_Stage stage_prev;
G_Stage sync_next; G_Stage stage_next;
G_Access access_prev; G_Access access_prev;
G_Access access_next; G_Access access_next;
G_Layout layout; G_Layout layout;
@ -324,8 +324,8 @@ Enum(G_ResourceFlag)
G_ResourceFlag_AllowShaderReadWrite = (1 << 0), G_ResourceFlag_AllowShaderReadWrite = (1 << 0),
G_ResourceFlag_AllowRenderTarget = (1 << 1), G_ResourceFlag_AllowRenderTarget = (1 << 1),
G_ResourceFlag_AllowDepthStencil = (1 << 2), G_ResourceFlag_AllowDepthStencil = (1 << 2),
G_ResourceFlag_HostMemory = (1 << 3), G_ResourceFlag_HostMemory = (1 << 3), /* Resource will automatically be mapped into the cpu's address space */
G_ResourceFlag_WriteCombinedHostMemory = (1 << 4), G_ResourceFlag_WriteCombined = (1 << 4), /* Writes into the mapped resource will be combined. Fast for linear memcpy, slow for everything else */
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -680,32 +680,32 @@ void G_SetConstant_(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size)
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
#define G_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \ #define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \ .resource = (_resource), \
.sync_prev = _sync_prev, \ .stage_prev = _stage_prev, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.sync_next = _sync_next, \ .stage_next = _stage_next, \
.access_next = _access_next, \ .access_next = _access_next, \
}) })
#define G_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ #define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \ .resource = (_resource), \
.sync_prev = _sync_prev, \ .stage_prev = _stage_prev, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.sync_next = _sync_next, \ .stage_next = _stage_next, \
.access_next = _access_next, \ .access_next = _access_next, \
.layout = _layout, \ .layout = _layout, \
}) })
#define G_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ #define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next) \
G_MemorySync((_cl), (G_MemoryBarrierDesc) { \ G_MemorySync((_cl), (G_MemoryBarrierDesc) { \
.is_global = 1, \ .is_global = 1, \
.sync_prev = _sync_prev, \ .stage_prev = _stage_prev, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.sync_next = _sync_next, \ .stage_next = _stage_next, \
.access_next = _access_next, \ .access_next = _access_next, \
}) })
#define G_DumbMemorySync(cl, resource) \ #define G_DumbMemorySync(cl, resource) \

View File

@ -316,14 +316,16 @@ void G_Bootstrap(void)
////////////////////////////// //////////////////////////////
//- Start workers //- Start workers
for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind) // for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
{ // {
String name = ZI; // String name = ZI;
if (kind == G_QueueKind_Direct) name = Lit("Direct queue worker"); // if (kind == G_QueueKind_Direct) name = Lit("Gpu direct queue worker");
if (kind == G_QueueKind_AsyncCompute) name = Lit("Compute queue worker"); // if (kind == G_QueueKind_AsyncCompute) name = Lit("Gpu compute queue worker");
if (kind == G_QueueKind_AsyncCopy) name = Lit("Copy queue worker"); // if (kind == G_QueueKind_AsyncCopy) name = Lit("Gpu copy queue worker");
DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind); // DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
} // }
DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntry, 0);
EndScratch(scratch); EndScratch(scratch);
} }
@ -949,7 +951,7 @@ G_ResourceHandle G_PushBufferEx(G_ArenaHandle arena_handle, G_BufferResourceDesc
if (desc.flags & G_ResourceFlag_HostMemory) if (desc.flags & G_ResourceFlag_HostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_Cpu; heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory) if (desc.flags & G_ResourceFlag_WriteCombined)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
} }
@ -1028,7 +1030,7 @@ G_ResourceHandle G_PushTextureEx(G_ArenaHandle arena_handle, G_TextureResourceDe
if (desc.flags & G_ResourceFlag_HostMemory) if (desc.flags & G_ResourceFlag_HostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_Cpu; heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory) if (desc.flags & G_ResourceFlag_WriteCombined)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
} }
@ -1626,7 +1628,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (!G_IsRefNil(queue->print_buffer_ref)) if (!G_IsRefNil(queue->print_buffer_ref))
{ {
slotted_constants[G_ShaderConst_DebugBufferRef] = queue->print_buffer_ref.v; slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v;
} }
/* Rasterizer state */ /* Rasterizer state */
@ -1655,10 +1657,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{ {
Lock lock = LockE(&g->free_cmd_chunks_mutex); Lock lock = LockE(&g->free_cmd_chunks_mutex);
{ {
for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) G_D12_CmdChunk *chunk = cl->first_cmd_chunk;
while (chunk)
{ {
chunk->next = g->first_free_cmd_chunk; G_D12_CmdChunk *next = chunk->next;
g->first_free_cmd_chunk = chunk; g->first_free_cmd_chunk = chunk;
chunk = next;
} }
} }
Unlock(&lock); Unlock(&lock);
@ -1729,6 +1733,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
} break; } break;
//- Constant //- Constant
case G_D12_CmdKind_Constant: case G_D12_CmdKind_Constant:
{ {
i32 slot = cmd->constant.slot; i32 slot = cmd->constant.slot;
@ -1766,8 +1771,8 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
/* Translate gpu barrier kind -> d3d barrier fields */ /* Translate gpu barrier kind -> d3d barrier fields */
D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.sync_prev); D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev);
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.sync_next); D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next);
D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev);
D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next);
D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout; D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout;
@ -2818,43 +2823,49 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Workers //~ Collection worker
void G_D12_WorkerEntry(WaveLaneCtx *lane) void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
{ {
G_QueueKind queue_kind = (G_QueueKind)lane->wave->udata; G_QueueKind queue_kind = G_QueueKind_Direct;
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
// if (queue->print_buffer_size > 0) // if (queue->print_buffer_size > 0)
if (queue_kind == G_QueueKind_Direct)
G_ArenaHandle gpu_perm = G_PermArena();
G_ResourceHandle readback_buff = G_PushBuffer(
gpu_perm,
u8,
queue->print_buffer_size,
.flags = G_ResourceFlag_HostMemory
);
u32 zero = 0;
for (;;)
{ {
G_ArenaHandle gpu_perm = G_PermArena(); /* FIXME: Remove this */
G_ResourceHandle readback_buff = G_PushBuffer(
gpu_perm,
u8,
queue->print_buffer_size,
.flags = G_ResourceFlag_HostMemory
);
for (;;) Sleep(100);
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
{ {
/* FIXME: Remove this */ /* Copy print buffer to readback buffer */
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
Sleep(500); /* Reset size to 0 */
G_MemorySync(cl, queue->print_buffer,
G_CommandListHandle cl = G_PrepareCommandList(queue_kind); G_Stage_Copy, G_Access_CopyRead,
{ G_Stage_Copy, G_Access_CopyWrite
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); );
} G_CopyCpuToBuffer(cl, queue->print_buffer, 0, &zero, RNGU64(0, 4));
i64 completion = G_CommitCommandList(cl);
G_SyncCpu(G_MaskFromQueue(queue_kind));
u32 size = *G_StructFromResource(readback_buff, u32);
u8 *text = G_StructFromResource(readback_buff, u8) + 4;
String s = STRING(size, text);
DEBUGBREAKABLE;
} }
i64 completion = G_CommitCommandList(cl);
G_SyncCpu(G_MaskFromQueue(queue_kind));
u32 size = *G_StructFromResource(readback_buff, u32);
u8 *text = G_StructFromResource(readback_buff, u8) + 4;
String s = STRING(size, text);
DEBUGBREAKABLE;
} }
} }

View File

@ -469,6 +469,6 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Workers //~ Collection worker
void G_D12_WorkerEntry(WaveLaneCtx *lane); void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane);

View File

@ -102,7 +102,7 @@ Struct(G_SamplerStateRef) { u32 v; };
StaticAssert(G_NumGeneralPurposeConstants == 8); StaticAssert(G_NumGeneralPurposeConstants == 8);
StaticAssert(G_NumReservedConstants == 1); StaticAssert(G_NumReservedConstants == 1);
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8); G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Debug printf //~ Debug printf
@ -110,24 +110,32 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */ /* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
#if IsLanguageG && GPU_SHADER_PRINT #if IsLanguageG && GPU_SHADER_PRINT
Struct(G_DebugBuffer) Struct(G_TempPrintBuffer)
{ {
u32 data_u32[256]; u32 data_u32[256];
u32 byte_pos; u32 byte_pos;
}; };
void G_PushDebugChar(inout G_DebugBuffer buff, u32 c) void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 c)
{ {
/* TODO: Overflow check */ /* TODO: Overflow check */
u32 u32_arr_pos = buff.byte_pos / 4; u32 u32_arr_pos = buff.byte_pos / 4;
u32 idx_in_u32 = buff.byte_pos & 0x03; u32 idx_in_u32 = buff.byte_pos & 0x03;
buff.data_u32[u32_arr_pos] |= (c & 0xFF) << (idx_in_u32 * 8); if (idx_in_u32 == 0)
{
/* Since buff is not zero initialized, we set the byte on first write here */
buff.data_u32[u32_arr_pos] = c & 0xFF;
}
else
{
buff.data_u32[u32_arr_pos] |= (c & 0xFF) << (idx_in_u32 * 8);
}
buff.byte_pos += 1; buff.byte_pos += 1;
} }
void G_CommitDebugBuffer(G_DebugBuffer buff) void G_CommitPrint(G_TempPrintBuffer buff)
{ {
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_DebugBufferRef); RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
u32 u32s_count = (buff.byte_pos + 3) / 4; u32 u32s_count = (buff.byte_pos + 3) / 4;
u32 alloc_size = u32s_count * 4; u32 alloc_size = u32s_count * 4;
@ -145,17 +153,18 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
} }
} }
#define G_DebugPrint(fmt) do { \ #define G_Print(fmt) do { \
G_DebugBuffer __dbg; \ G_TempPrintBuffer __tmp; \
__dbg.byte_pos = 0; \ __tmp.byte_pos = 0; \
u32 __pos = 0; \ u32 __pos = 0; \
while (U32FromChar(fmt[__pos]) != 0) \ while (U32FromChar(fmt[__pos]) != 0) \
{ \ { \
G_PushDebugChar(__dbg, U32FromChar(fmt[__pos])); \ G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
++__pos; \ ++__pos; \
} \ } \
G_CommitDebugBuffer(__dbg); \ G_PushPrintChar(__tmp, 0); \
G_CommitPrint(__tmp); \
} while (0) } while (0)
#else #else
#define G_DebugPrint(fmt) #define G_Print(fmt)
#endif #endif

View File

@ -145,7 +145,7 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
Vec2 uv = input.src_uv; Vec2 uv = input.src_uv;
Vec4 result = tex.Sample(sampler, uv); Vec4 result = tex.Sample(sampler, uv);
G_DebugPrint("Hello there"); G_Print("Hello there!");
UI_BlitPSOutput output; UI_BlitPSOutput output;
output.SV_Target0 = result; output.SV_Target0 = result;