reset print buffer size in collection worker
This commit is contained in:
parent
bc17e94758
commit
f911e98c98
@ -71,7 +71,7 @@ G_ArenaHandle G_PermArena(void)
|
||||
return perm;
|
||||
}
|
||||
|
||||
//- Cpu -> Gpu copy
|
||||
//- Cpu -> Gpu upload
|
||||
|
||||
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc)
|
||||
{
|
||||
|
||||
@ -23,7 +23,7 @@ void G_BootstrapCommon(void);
|
||||
|
||||
G_ArenaHandle G_PermArena(void);
|
||||
|
||||
//- Cpu -> Gpu copy
|
||||
//- Cpu -> Gpu upload
|
||||
|
||||
G_ResourceHandle G_PushBufferFromString_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferResourceDesc desc);
|
||||
#define G_PushBufferFromString(_arena, _cl, _src, ...) \
|
||||
|
||||
@ -299,7 +299,7 @@ Enum(G_Layout)
|
||||
G_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
|
||||
};
|
||||
|
||||
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`.
|
||||
/* Barrier will execute after previous stages specified by `stage_prev`, and before next stages specified by `stage_next`.
|
||||
* When barrier executes:
|
||||
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
||||
* - Texture layout will transition based on `layout` (if specified)
|
||||
@ -308,8 +308,8 @@ Struct(G_MemoryBarrierDesc)
|
||||
{
|
||||
G_ResourceHandle resource;
|
||||
b32 is_global;
|
||||
G_Stage sync_prev;
|
||||
G_Stage sync_next;
|
||||
G_Stage stage_prev;
|
||||
G_Stage stage_next;
|
||||
G_Access access_prev;
|
||||
G_Access access_next;
|
||||
G_Layout layout;
|
||||
@ -324,8 +324,8 @@ Enum(G_ResourceFlag)
|
||||
G_ResourceFlag_AllowShaderReadWrite = (1 << 0),
|
||||
G_ResourceFlag_AllowRenderTarget = (1 << 1),
|
||||
G_ResourceFlag_AllowDepthStencil = (1 << 2),
|
||||
G_ResourceFlag_HostMemory = (1 << 3),
|
||||
G_ResourceFlag_WriteCombinedHostMemory = (1 << 4),
|
||||
G_ResourceFlag_HostMemory = (1 << 3), /* Resource will automatically be mapped into the cpu's address space */
|
||||
G_ResourceFlag_WriteCombined = (1 << 4), /* Writes into the mapped resource will be combined. Fast for linear memcpy, slow for everything else */
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -680,31 +680,31 @@ void G_SetConstant_(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size)
|
||||
|
||||
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
|
||||
|
||||
#define G_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next) \
|
||||
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
})
|
||||
|
||||
#define G_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
||||
#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout) \
|
||||
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
.layout = _layout, \
|
||||
})
|
||||
|
||||
#define G_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next) \
|
||||
G_MemorySync((_cl), (G_MemoryBarrierDesc) { \
|
||||
.is_global = 1, \
|
||||
.sync_prev = _sync_prev, \
|
||||
.stage_prev = _stage_prev, \
|
||||
.access_prev = _access_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.stage_next = _stage_next, \
|
||||
.access_next = _access_next, \
|
||||
})
|
||||
|
||||
|
||||
@ -316,14 +316,16 @@ void G_Bootstrap(void)
|
||||
//////////////////////////////
|
||||
//- Start workers
|
||||
|
||||
for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
|
||||
{
|
||||
String name = ZI;
|
||||
if (kind == G_QueueKind_Direct) name = Lit("Direct queue worker");
|
||||
if (kind == G_QueueKind_AsyncCompute) name = Lit("Compute queue worker");
|
||||
if (kind == G_QueueKind_AsyncCopy) name = Lit("Copy queue worker");
|
||||
DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
|
||||
}
|
||||
// for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
|
||||
// {
|
||||
// String name = ZI;
|
||||
// if (kind == G_QueueKind_Direct) name = Lit("Gpu direct queue worker");
|
||||
// if (kind == G_QueueKind_AsyncCompute) name = Lit("Gpu compute queue worker");
|
||||
// if (kind == G_QueueKind_AsyncCopy) name = Lit("Gpu copy queue worker");
|
||||
// DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
|
||||
// }
|
||||
|
||||
DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntry, 0);
|
||||
|
||||
EndScratch(scratch);
|
||||
}
|
||||
@ -949,7 +951,7 @@ G_ResourceHandle G_PushBufferEx(G_ArenaHandle arena_handle, G_BufferResourceDesc
|
||||
if (desc.flags & G_ResourceFlag_HostMemory)
|
||||
{
|
||||
heap_kind = G_D12_ResourceHeapKind_Cpu;
|
||||
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
|
||||
if (desc.flags & G_ResourceFlag_WriteCombined)
|
||||
{
|
||||
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
|
||||
}
|
||||
@ -1028,7 +1030,7 @@ G_ResourceHandle G_PushTextureEx(G_ArenaHandle arena_handle, G_TextureResourceDe
|
||||
if (desc.flags & G_ResourceFlag_HostMemory)
|
||||
{
|
||||
heap_kind = G_D12_ResourceHeapKind_Cpu;
|
||||
if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
|
||||
if (desc.flags & G_ResourceFlag_WriteCombined)
|
||||
{
|
||||
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
|
||||
}
|
||||
@ -1626,7 +1628,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
|
||||
if (!G_IsRefNil(queue->print_buffer_ref))
|
||||
{
|
||||
slotted_constants[G_ShaderConst_DebugBufferRef] = queue->print_buffer_ref.v;
|
||||
slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v;
|
||||
}
|
||||
|
||||
/* Rasterizer state */
|
||||
@ -1655,10 +1657,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
{
|
||||
Lock lock = LockE(&g->free_cmd_chunks_mutex);
|
||||
{
|
||||
for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
|
||||
G_D12_CmdChunk *chunk = cl->first_cmd_chunk;
|
||||
while (chunk)
|
||||
{
|
||||
chunk->next = g->first_free_cmd_chunk;
|
||||
G_D12_CmdChunk *next = chunk->next;
|
||||
g->first_free_cmd_chunk = chunk;
|
||||
chunk = next;
|
||||
}
|
||||
}
|
||||
Unlock(&lock);
|
||||
@ -1729,6 +1733,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
} break;
|
||||
|
||||
//- Constant
|
||||
|
||||
case G_D12_CmdKind_Constant:
|
||||
{
|
||||
i32 slot = cmd->constant.slot;
|
||||
@ -1766,8 +1771,8 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
||||
|
||||
/* Translate gpu barrier kind -> d3d barrier fields */
|
||||
D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.sync_prev);
|
||||
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.sync_next);
|
||||
D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev);
|
||||
D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next);
|
||||
D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev);
|
||||
D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next);
|
||||
D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout;
|
||||
@ -2818,16 +2823,15 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Workers
|
||||
//~ Collection worker
|
||||
|
||||
void G_D12_WorkerEntry(WaveLaneCtx *lane)
|
||||
void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
||||
{
|
||||
G_QueueKind queue_kind = (G_QueueKind)lane->wave->udata;
|
||||
G_QueueKind queue_kind = G_QueueKind_Direct;
|
||||
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
||||
|
||||
// if (queue->print_buffer_size > 0)
|
||||
if (queue_kind == G_QueueKind_Direct)
|
||||
{
|
||||
|
||||
G_ArenaHandle gpu_perm = G_PermArena();
|
||||
G_ResourceHandle readback_buff = G_PushBuffer(
|
||||
gpu_perm,
|
||||
@ -2836,15 +2840,23 @@ void G_D12_WorkerEntry(WaveLaneCtx *lane)
|
||||
.flags = G_ResourceFlag_HostMemory
|
||||
);
|
||||
|
||||
u32 zero = 0;
|
||||
for (;;)
|
||||
{
|
||||
/* FIXME: Remove this */
|
||||
|
||||
Sleep(500);
|
||||
Sleep(100);
|
||||
|
||||
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
|
||||
{
|
||||
/* Copy print buffer to readback buffer */
|
||||
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
|
||||
/* Reset size to 0 */
|
||||
G_MemorySync(cl, queue->print_buffer,
|
||||
G_Stage_Copy, G_Access_CopyRead,
|
||||
G_Stage_Copy, G_Access_CopyWrite
|
||||
);
|
||||
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, &zero, RNGU64(0, 4));
|
||||
}
|
||||
i64 completion = G_CommitCommandList(cl);
|
||||
|
||||
@ -2857,4 +2869,3 @@ void G_D12_WorkerEntry(WaveLaneCtx *lane)
|
||||
DEBUGBREAKABLE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -469,6 +469,6 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
|
||||
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Workers
|
||||
//~ Collection worker
|
||||
|
||||
void G_D12_WorkerEntry(WaveLaneCtx *lane);
|
||||
void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane);
|
||||
|
||||
@ -102,7 +102,7 @@ Struct(G_SamplerStateRef) { u32 v; };
|
||||
StaticAssert(G_NumGeneralPurposeConstants == 8);
|
||||
StaticAssert(G_NumReservedConstants == 1);
|
||||
|
||||
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8);
|
||||
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Debug printf
|
||||
@ -110,24 +110,32 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
|
||||
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
|
||||
|
||||
#if IsLanguageG && GPU_SHADER_PRINT
|
||||
Struct(G_DebugBuffer)
|
||||
Struct(G_TempPrintBuffer)
|
||||
{
|
||||
u32 data_u32[256];
|
||||
u32 byte_pos;
|
||||
};
|
||||
|
||||
void G_PushDebugChar(inout G_DebugBuffer buff, u32 c)
|
||||
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 c)
|
||||
{
|
||||
/* TODO: Overflow check */
|
||||
u32 u32_arr_pos = buff.byte_pos / 4;
|
||||
u32 idx_in_u32 = buff.byte_pos & 0x03;
|
||||
if (idx_in_u32 == 0)
|
||||
{
|
||||
/* Since buff is not zero initialized, we set the byte on first write here */
|
||||
buff.data_u32[u32_arr_pos] = c & 0xFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
buff.data_u32[u32_arr_pos] |= (c & 0xFF) << (idx_in_u32 * 8);
|
||||
}
|
||||
buff.byte_pos += 1;
|
||||
}
|
||||
|
||||
void G_CommitDebugBuffer(G_DebugBuffer buff)
|
||||
void G_CommitPrint(G_TempPrintBuffer buff)
|
||||
{
|
||||
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_DebugBufferRef);
|
||||
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
|
||||
u32 u32s_count = (buff.byte_pos + 3) / 4;
|
||||
u32 alloc_size = u32s_count * 4;
|
||||
|
||||
@ -145,17 +153,18 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8)
|
||||
}
|
||||
}
|
||||
|
||||
#define G_DebugPrint(fmt) do { \
|
||||
G_DebugBuffer __dbg; \
|
||||
__dbg.byte_pos = 0; \
|
||||
#define G_Print(fmt) do { \
|
||||
G_TempPrintBuffer __tmp; \
|
||||
__tmp.byte_pos = 0; \
|
||||
u32 __pos = 0; \
|
||||
while (U32FromChar(fmt[__pos]) != 0) \
|
||||
{ \
|
||||
G_PushDebugChar(__dbg, U32FromChar(fmt[__pos])); \
|
||||
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
||||
++__pos; \
|
||||
} \
|
||||
G_CommitDebugBuffer(__dbg); \
|
||||
G_PushPrintChar(__tmp, 0); \
|
||||
G_CommitPrint(__tmp); \
|
||||
} while (0)
|
||||
#else
|
||||
#define G_DebugPrint(fmt)
|
||||
#define G_Print(fmt)
|
||||
#endif
|
||||
|
||||
@ -145,7 +145,7 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
|
||||
Vec2 uv = input.src_uv;
|
||||
Vec4 result = tex.Sample(sampler, uv);
|
||||
|
||||
G_DebugPrint("Hello there");
|
||||
G_Print("Hello there!");
|
||||
|
||||
UI_BlitPSOutput output;
|
||||
output.SV_Target0 = result;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user