working shader print w/o formatting

This commit is contained in:
jacob 2025-12-10 10:24:01 -06:00
parent 93e9c4b78a
commit 4727e5786b
5 changed files with 71 additions and 41 deletions

View File

@ -70,6 +70,7 @@
#define FLOOD_DEBUG 0 #define FLOOD_DEBUG 0
#define GPU_DEBUG 1 #define GPU_DEBUG 1
#define GPU_SHADER_PRINT 1
#define GPU_DEBUG_VALIDATION 1 #define GPU_DEBUG_VALIDATION 1
/* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */

View File

@ -299,7 +299,7 @@ Enum(G_ResourceFlag)
G_ResourceFlag_AllowRenderTarget = (1 << 1), G_ResourceFlag_AllowRenderTarget = (1 << 1),
G_ResourceFlag_AllowDepthStencil = (1 << 2), G_ResourceFlag_AllowDepthStencil = (1 << 2),
G_ResourceFlag_HostMemory = (1 << 3), G_ResourceFlag_HostMemory = (1 << 3),
G_ResourceFlag_WriteCombineHostMemory = (1 << 4), G_ResourceFlag_WriteCombinedHostMemory = (1 << 4),
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -743,14 +743,14 @@ G_D12_ResourceHeap *G_D12_ResourceHeapFromArena(G_D12_Arena *gpu_arena, G_D12_Re
{ {
D3D12_HEAP_DESC d3d_desc = ZI; D3D12_HEAP_DESC d3d_desc = ZI;
d3d_desc.SizeInBytes = Mebi(512); d3d_desc.SizeInBytes = Mebi(512);
if (kind == G_D12_ResourceHeapKind_CpuWriteBack) if (kind == G_D12_ResourceHeapKind_Cpu)
{ {
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
is_mappable = 1; is_mappable = 1;
} }
else if (kind == G_D12_ResourceHeapKind_CpuWriteCombine) else if (kind == G_D12_ResourceHeapKind_CpuWriteCombined)
{ {
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
@ -942,13 +942,13 @@ G_ResourceHandle G_PushBufferEx(G_ArenaHandle arena_handle, G_BufferResourceDesc
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
/* Fetch heap */ /* Fetch heap */
G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Default; G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu;
if (desc.flags & G_ResourceFlag_HostMemory) if (desc.flags & G_ResourceFlag_HostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteBack; heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombineHostMemory) if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombine; heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
} }
} }
G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind); G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind);
@ -1021,13 +1021,13 @@ G_ResourceHandle G_PushTextureEx(G_ArenaHandle arena_handle, G_TextureResourceDe
D3D12_BARRIER_LAYOUT initial_layout = G_D12_BarrierLayoutFromLayout(desc.initial_layout); D3D12_BARRIER_LAYOUT initial_layout = G_D12_BarrierLayoutFromLayout(desc.initial_layout);
/* Fetch heap */ /* Fetch heap */
G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Default; G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu;
if (desc.flags & G_ResourceFlag_HostMemory) if (desc.flags & G_ResourceFlag_HostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteBack; heap_kind = G_D12_ResourceHeapKind_Cpu;
if (desc.flags & G_ResourceFlag_WriteCombineHostMemory) if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory)
{ {
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombine; heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
} }
} }
G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind); G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind);
@ -1621,9 +1621,9 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G
for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; }
for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; }
if (GPU_DEBUG) if (!G_IsRefNil(queue->debug_print_buffer_ref))
{ {
slotted_constants[G_DebugPrintBufferConstantSlot] = queue->debug_print_buffer_ref.v; slotted_constants[G_ShaderConst_DebugBufferRef] = queue->debug_print_buffer_ref.v;
} }
/* Rasterizer state */ /* Rasterizer state */

View File

@ -109,9 +109,9 @@ Struct(G_D12_DescriptorList)
*/ */
Enum(G_D12_ResourceHeapKind) Enum(G_D12_ResourceHeapKind)
{ {
G_D12_ResourceHeapKind_Default, G_D12_ResourceHeapKind_Gpu,
G_D12_ResourceHeapKind_CpuWriteBack, G_D12_ResourceHeapKind_Cpu,
G_D12_ResourceHeapKind_CpuWriteCombine, G_D12_ResourceHeapKind_CpuWriteCombined,
G_D12_ResourceHeapKind_Count, G_D12_ResourceHeapKind_Count,
}; };

View File

@ -34,11 +34,13 @@ Struct(G_SamplerStateRef) { u32 v; };
//~ Constant types //~ Constant types
/* /*
* NOTE: D3d12 exposes 64 root constants, and vulkan 32 push constants. * D3D12 exposes 64 root constants and Vulkan exposes 32 push constants.
* Supposedly amd hardware will spill constants to scratch memory once there
* are more than 13: https://gpuopen.com/learn/rdna-performance-guide/
*/ */
#define G_NumGeneralPurposeConstants (8) /* Constants available for any usage */ #define G_NumGeneralPurposeConstants (8) /* Constants available for any usage */
#define G_NumReservedConstants (1) /* Constants reserved for usage by the GPU layer */ #define G_NumReservedConstants (1) /* Constants reserved for usage by the GPU layer */
#define G_NumBackendReservedConstants (1) /* Constants reserved for usage by the implementation backend layer */ #define G_NumBackendReservedConstants (1) /* Constants reserved for usage by the GPU backend layer */
#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants + G_NumBackendReservedConstants) #define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants + G_NumBackendReservedConstants)
#if IsLanguageC #if IsLanguageC
@ -101,33 +103,60 @@ Struct(G_SamplerStateRef) { u32 v; };
StaticAssert(G_NumGeneralPurposeConstants == 8); StaticAssert(G_NumGeneralPurposeConstants == 8);
StaticAssert(G_NumReservedConstants == 1); StaticAssert(G_NumReservedConstants == 1);
#define G_DebugPrintBufferConstantSlot 8 G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8);
G_ForceDeclConstant(G_RWByteAddressBufferRef, G_DebugPrintBuffer, 8);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Debug printf //~ Debug printf
#if IsLanguageG /* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
/* This technique comes from MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */ #if IsLanguageG && GPU_SHADER_PRINT
#if GPU_DEBUG Struct(G_DebugBuffer)
#define G_DebugPrint(fmt_cstr) do { \ {
u32 __strlen = 0; \ u32 data_u32[256];
for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \ u32 byte_pos;
RWByteAddressBuffer __print_buff; \
__print_buff = G_RWByteAddressBufferFromRef(G_DebugPrintBuffer); \ };
u32 __pos; \
__print_buff.InterlockedAdd(0, __strlen, __pos); \ void G_PushByteToDebug(inout G_DebugBuffer buff, u32 byte)
if (__pos < countof(__print_buff)) \ {
u32 u32_arr_pos = buff.byte_pos / 4;
u32 idx_in_u32 = buff.byte_pos & 0x03;
buff.data_u32[u32_arr_pos] |= (byte & 0xFF) << (idx_in_u32 * 8);
buff.byte_pos += 1;
}
void G_CommitDebugBuffer(G_DebugBuffer buff)
{
RWByteAddressBuffer rw = G_RWByteAddressBufferFromRef(G_ShaderConst_DebugBufferRef);
u32 u32s_count = (buff.byte_pos + 3) / 4;
u32 alloc_size = u32s_count * 4;
u32 base;
rw.InterlockedAdd(0, alloc_size, base);
base += 4; /* Account for counter at beginning of buff */
if ((base + alloc_size) < countof(rw))
{
for (u32 u32_idx = 0; u32_idx < u32s_count; ++u32_idx)
{
u32 data = buff.data_u32[u32_idx];
rw.Store(base + (u32_idx * 4), data);
}
}
}
#define G_DebugPrint(fmt) do { \
G_DebugBuffer __dbg; \
__dbg.byte_pos = 0; \
u32 __pos = 0; \
while (U32FromChar(fmt[__pos]) != 0) \
{ \ { \
for (u32 char_idx = 0; char_idx < __strlen; ++char_idx) \ G_PushByteToDebug(__dbg, U32FromChar(fmt[__pos])); \
{ \ ++__pos; \
__print_buff.Store(__pos + char_idx, U32FromChar(fmt_cstr[char_idx])); \
} \
} \ } \
G_CommitDebugBuffer(__dbg); \
} while (0) } while (0)
#else #else
#define G_DebugPrint(fmt_cstr) #define G_DebugPrint(fmt)
#endif
#endif #endif