diff --git a/src/config.h b/src/config.h index 36009283..f40de1e6 100644 --- a/src/config.h +++ b/src/config.h @@ -70,6 +70,7 @@ #define FLOOD_DEBUG 0 #define GPU_DEBUG 1 +#define GPU_SHADER_PRINT 1 #define GPU_DEBUG_VALIDATION 1 /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 8165d5af..c419079d 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -299,7 +299,7 @@ Enum(G_ResourceFlag) G_ResourceFlag_AllowRenderTarget = (1 << 1), G_ResourceFlag_AllowDepthStencil = (1 << 2), G_ResourceFlag_HostMemory = (1 << 3), - G_ResourceFlag_WriteCombineHostMemory = (1 << 4), + G_ResourceFlag_WriteCombinedHostMemory = (1 << 4), }; //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index bc7df499..df0aece4 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -743,14 +743,14 @@ G_D12_ResourceHeap *G_D12_ResourceHeapFromArena(G_D12_Arena *gpu_arena, G_D12_Re { D3D12_HEAP_DESC d3d_desc = ZI; d3d_desc.SizeInBytes = Mebi(512); - if (kind == G_D12_ResourceHeapKind_CpuWriteBack) + if (kind == G_D12_ResourceHeapKind_Cpu) { d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; is_mappable = 1; } - else if (kind == G_D12_ResourceHeapKind_CpuWriteCombine) + else if (kind == G_D12_ResourceHeapKind_CpuWriteCombined) { d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; @@ -942,13 +942,13 @@ G_ResourceHandle G_PushBufferEx(G_ArenaHandle arena_handle, G_BufferResourceDesc G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); /* Fetch heap */ - G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Default; + G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; if (desc.flags & G_ResourceFlag_HostMemory) { - heap_kind = G_D12_ResourceHeapKind_CpuWriteBack; - if (desc.flags & G_ResourceFlag_WriteCombineHostMemory) + heap_kind = G_D12_ResourceHeapKind_Cpu; + if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory) { - heap_kind = G_D12_ResourceHeapKind_CpuWriteCombine; + heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; } } G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind); @@ -1021,13 +1021,13 @@ G_ResourceHandle G_PushTextureEx(G_ArenaHandle arena_handle, G_TextureResourceDe D3D12_BARRIER_LAYOUT initial_layout = G_D12_BarrierLayoutFromLayout(desc.initial_layout); /* Fetch heap */ - G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Default; + G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; if (desc.flags & G_ResourceFlag_HostMemory) { - heap_kind = G_D12_ResourceHeapKind_CpuWriteBack; - if (desc.flags & G_ResourceFlag_WriteCombineHostMemory) + heap_kind = G_D12_ResourceHeapKind_Cpu; + if (desc.flags & G_ResourceFlag_WriteCombinedHostMemory) { - heap_kind = G_D12_ResourceHeapKind_CpuWriteCombine; + heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; } } G_D12_ResourceHeap *heap = G_D12_ResourceHeapFromArena(gpu_arena, heap_kind); @@ -1621,9 +1621,9 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } - if (GPU_DEBUG) + if (!G_IsRefNil(queue->debug_print_buffer_ref)) { - slotted_constants[G_DebugPrintBufferConstantSlot] = queue->debug_print_buffer_ref.v; + slotted_constants[G_ShaderConst_DebugBufferRef] = queue->debug_print_buffer_ref.v; } /* Rasterizer state */ diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 199a7524..cbfba434 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -109,9 +109,9 @@ Struct(G_D12_DescriptorList) */ Enum(G_D12_ResourceHeapKind) { - G_D12_ResourceHeapKind_Default, - G_D12_ResourceHeapKind_CpuWriteBack, - G_D12_ResourceHeapKind_CpuWriteCombine, + G_D12_ResourceHeapKind_Gpu, + G_D12_ResourceHeapKind_Cpu, + G_D12_ResourceHeapKind_CpuWriteCombined, G_D12_ResourceHeapKind_Count, }; diff --git a/src/gpu/gpu_shader_core.cgh b/src/gpu/gpu_shader_core.cgh index 3ad09f94..a91b1b12 100644 --- a/src/gpu/gpu_shader_core.cgh +++ b/src/gpu/gpu_shader_core.cgh @@ -34,11 +34,13 @@ Struct(G_SamplerStateRef) { u32 v; }; //~ Constant types /* - * NOTE: D3d12 exposes 64 root constants, and vulkan 32 push constants. + * D3D12 exposes 64 root constants and Vulkan exposes 32 push constants. + * Supposedly amd hardware will spill constants to scratch memory once there + * are more than 13: https://gpuopen.com/learn/rdna-performance-guide/ */ #define G_NumGeneralPurposeConstants (8) /* Constants available for any usage */ #define G_NumReservedConstants (1) /* Constants reserved for usage by the GPU layer */ -#define G_NumBackendReservedConstants (1) /* Constants reserved for usage by the implementation backend layer */ +#define G_NumBackendReservedConstants (1) /* Constants reserved for usage by the GPU backend layer */ #define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants + G_NumBackendReservedConstants) #if IsLanguageC @@ -101,33 +103,60 @@ Struct(G_SamplerStateRef) { u32 v; }; StaticAssert(G_NumGeneralPurposeConstants == 8); StaticAssert(G_NumReservedConstants == 1); -#define G_DebugPrintBufferConstantSlot 8 -G_ForceDeclConstant(G_RWByteAddressBufferRef, G_DebugPrintBuffer, 8); +G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_DebugBufferRef, 8); //////////////////////////////////////////////////////////// //~ Debug printf -#if IsLanguageG +/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */ - /* This technique comes from MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */ - #if GPU_DEBUG - #define G_DebugPrint(fmt_cstr) do { \ - u32 __strlen = 0; \ - for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \ - RWByteAddressBuffer __print_buff; \ - __print_buff = G_RWByteAddressBufferFromRef(G_DebugPrintBuffer); \ - u32 __pos; \ - __print_buff.InterlockedAdd(0, __strlen, __pos); \ - if (__pos < countof(__print_buff)) \ - { \ - for (u32 char_idx = 0; char_idx < __strlen; ++char_idx) \ - { \ - __print_buff.Store(__pos + char_idx, U32FromChar(fmt_cstr[char_idx])); \ - } \ - } \ - } while (0) - #else - #define G_DebugPrint(fmt_cstr) - #endif +#if IsLanguageG && GPU_SHADER_PRINT + Struct(G_DebugBuffer) + { + u32 data_u32[256]; + u32 byte_pos; + }; + + void G_PushByteToDebug(inout G_DebugBuffer buff, u32 byte) + { + u32 u32_arr_pos = buff.byte_pos / 4; + u32 idx_in_u32 = buff.byte_pos & 0x03; + buff.data_u32[u32_arr_pos] |= (byte & 0xFF) << (idx_in_u32 * 8); + buff.byte_pos += 1; + } + + void G_CommitDebugBuffer(G_DebugBuffer buff) + { + RWByteAddressBuffer rw = G_RWByteAddressBufferFromRef(G_ShaderConst_DebugBufferRef); + u32 u32s_count = (buff.byte_pos + 3) / 4; + u32 alloc_size = u32s_count * 4; + + u32 base; + rw.InterlockedAdd(0, alloc_size, base); + base += 4; /* Account for counter at beginning of buff */ + + if ((base + alloc_size) < countof(rw)) + { + for (u32 u32_idx = 0; u32_idx < u32s_count; ++u32_idx) + { + u32 data = buff.data_u32[u32_idx]; + rw.Store(base + (u32_idx * 4), data); + } + } + } + + #define G_DebugPrint(fmt) do { \ + G_DebugBuffer __dbg; \ + __dbg.byte_pos = 0; \ + u32 __pos = 0; \ + while (U32FromChar(fmt[__pos]) != 0) \ + { \ + G_PushByteToDebug(__dbg, U32FromChar(fmt[__pos])); \ + ++__pos; \ + } \ + G_CommitDebugBuffer(__dbg); \ + } while (0) +#else + #define G_DebugPrint(fmt) #endif