From 3197f1849577d8bf2837ceaa14e21f38e82a1f92 Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 5 Mar 2026 07:07:00 -0600 Subject: [PATCH] replace granular gpu sync, access, & texture-layouts with queue-exclusive/shared layouts & tracked sync --- .gitattributes | 5 - src/base/base.cgh | 1 + src/base/base_win32/base_win32.c | 28 + src/gpu/gpu.lay | 3 +- src/gpu/gpu_core.h | 206 +- src/gpu/gpu_dx12/gpu_dx12_core.c | 1707 +++++++++-------- src/gpu/gpu_dx12/gpu_dx12_core.h | 124 +- .../AgilitySDK/1.618.5/D3D12Core.dat | 3 - .../AgilitySDK/1.618.5/d3d12SDKLayers.dat | 3 - .../AgilitySDK/1.619.0/D3D12Core.dat | 3 + .../AgilitySDK/1.619.0/d3d12SDKLayers.dat | 3 + src/pp/pp_vis/pp_vis_core.c | 59 +- src/proto/proto.c | 104 +- src/ui/ui_core.c | 25 +- 14 files changed, 1124 insertions(+), 1150 deletions(-) delete mode 100644 src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat delete mode 100644 src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat create mode 100644 src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/D3D12Core.dat create mode 100644 src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/d3d12SDKLayers.dat diff --git a/.gitattributes b/.gitattributes index ee9b721b..827724dd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -23,8 +23,3 @@ *.ttf filter=lfs diff=lfs merge=lfs -text *.mp3 filter=lfs diff=lfs merge=lfs -text *.dat filter=lfs diff=lfs merge=lfs -text - -############################## -#- Libraries - -# vulkan/* filter=lfs diff=lfs merge=lfs -text diff --git a/src/base/base.cgh b/src/base/base.cgh index 31ebdf3f..362529f3 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -798,6 +798,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b) String GetAppDirectory(void); void Echo(String msg); b32 Panic(String msg); + b32 DebugBreakPrompt(String title, String msg); Callstack CaptureCallstack(u64 skip_frames); b32 IsRunningInDebugger(void); b32 IsRunningInWine(void); diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 963c0d08..1e6584d7 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -96,6 +96,34 @@ b32 Panic(String msg) return 0; } +b32 DebugBreakPrompt(String title, String msg) +{ + LogInfoF("[DEBUG BREAK PROMPT]: %F", FmtString(msg)); + TempArena scratch = BeginScratchNoConflict(); + i32 result = 0; + { + wchar_t *title_wstr = WstrFromString(scratch.arena, title); + wchar_t *msg_wstr = WstrFromString(scratch.arena, msg); + b32 is_debug = IsRunningInDebugger(); + i32 mb_result = 0; + { + u32 mb_flags = MB_SETFOREGROUND | MB_ICONWARNING; + if (is_debug) + { + mb_flags |= MB_CANCELTRYCONTINUE; + } + mb_result = MessageBoxExW(0, msg_wstr, title_wstr, mb_flags, 0); + } + if (mb_result == IDCANCEL) + { + ExitProcess(1); + } + result = is_debug && mb_result != IDCONTINUE; + } + EndScratch(scratch); + return result; +} + Callstack CaptureCallstack(u64 skip_frames) { Callstack result; diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index 54213fe1..f7ed7d2e 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -27,5 +27,4 @@ @IncludeC gpu_common.c -// @DefaultDownstream Win32 gpu_dx12 -@DefaultDownstream Win32 gpu_vk +@DefaultDownstream Win32 gpu_dx12 diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index ccee6c07..af16ca8e 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -189,112 +189,22 @@ Enum(G_Format) }; //////////////////////////////////////////////////////////// -//~ Memory sync types - -Enum(G_Stage) -{ - G_Stage_None = 0, - - // Compute stages - G_Stage_ComputeShading = (1 << 1), - - // Draw stages - G_Stage_IndexAssembly = (1 << 2), - G_Stage_VertexShading = (1 << 3), - G_Stage_PixelShading = (1 << 4), - G_Stage_DepthStencil = (1 << 5), - G_Stage_RenderTarget = (1 << 6), - - // Copy stages - G_Stage_Copy = (1 << 7), - - // Indirect stages - G_Stage_Indirect = (1 << 8), - - // Aggregate stages - G_Stage_Drawing = G_Stage_IndexAssembly | - G_Stage_VertexShading | - G_Stage_PixelShading | - G_Stage_DepthStencil | - G_Stage_RenderTarget, - - G_Stage_Shading = G_Stage_ComputeShading | - G_Stage_VertexShading | - G_Stage_PixelShading, - - G_Stage_All = 0xFFFFFFFF -}; - -Enum(G_Access) -{ - G_Access_None = 0, - - G_Access_ShaderReadWrite = (1 << 1), - G_Access_ShaderRead = (1 << 2), - - G_Access_CopyWrite = (1 << 3), - G_Access_CopyRead = (1 << 4), - - G_Access_DepthStencilRead = (1 << 5), - G_Access_DepthStencilWrite = (1 << 6), - G_Access_RenderTargetWrite = (1 << 7), - - G_Access_IndexBuffer = (1 << 8), - G_Access_IndirectArgument = (1 << 9), - - G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier -}; +//~ Texture layout types Enum(G_Layout) { - G_Layout_NoChange, - G_Layout_Undefined, + // Supports any read access with up to 1 write access to non overlapping regions from any queue. + // Cannot be transitioned to/from. + // Depth-stencil textures cannot use this layout. + G_Layout_Simultaneous, - ////////////////////////////// - //- Queue-agnostic + // Supports present, shader-read, and copy-read/write in any queue kind. + // Transitionable from `G_Layout_Exclusive` in non-copy queue. + G_Layout_Common, - // Simultaneous layout allows a resource to be used on any queue with any - // access type (except depth-stencil). Resources cannot transition to/from - // this layout, they must be created with it. Allows concurrent reads - // with up to 1 write to non-overlapping regions. - G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS - G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON - - ////////////////////////////// - //- Direct queue - - G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON - G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ - G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE - G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET - - ////////////////////////////// - //- Compute queue - - G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON - - ////////////////////////////// - //- Direct & Compute queue - - G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ - G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS - G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST -}; - -// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`. -// When barrier executes: -// - Necessary resource flushes will occur based on `access_prev` & `access_next` -// - Texture layout will transition based on `layout` (if specified) -Struct(G_MemoryBarrierDesc) -{ - G_ResourceHandle resource; - b32 is_global; - G_Stage stage_prev; - G_Stage stage_next; - G_Access access_prev; - G_Access access_next; - G_Layout layout; - RngI32 mips; // Inclusive range of texture mip levels to sync + // Supports any access in the current queue kind. + // Transitionable from `G_Layout_Common` in non-copy queue. + G_Layout_Exclusive, }; //////////////////////////////////////////////////////////// @@ -390,14 +300,14 @@ Enum(G_ResourceKind) Enum(G_ResourceFlag) { - G_ResourceFlag_None = 0, - G_ResourceFlag_AllowShaderReadWrite = (1 << 0), - G_ResourceFlag_AllowRenderTarget = (1 << 1), - G_ResourceFlag_AllowDepthStencil = (1 << 2), - G_ResourceFlag_ZeroMemory = (1 << 3), - G_ResourceFlag_HostMemory = (1 << 4), // Resource will be mapped into the cpu's address space - G_ResourceFlag_Uncached = (1 << 5), // Cpu writes will be combined & reads will be uncached - G_ResourceFlag_ForceNoReuse = (1 << 6), + G_ResourceFlag_None = 0, + G_ResourceFlag_AllowShaderReadWrite = (1 << 1), + G_ResourceFlag_AllowRenderTarget = (1 << 2), + G_ResourceFlag_AllowDepthStencil = (1 << 3), + G_ResourceFlag_ZeroMemory = (1 << 4), + G_ResourceFlag_HostMemory = (1 << 5), // Resource will be mapped into the cpu's address space + G_ResourceFlag_Uncached = (1 << 6), // Cpu writes will be combined & reads will be uncached + G_ResourceFlag_ForceNoReuse = (1 << 7), }; Struct(G_BufferDesc) @@ -454,18 +364,18 @@ Struct(G_RefDesc) }; //////////////////////////////////////////////////////////// -//~ Rasterization types +//~ Draw types -Enum(G_RasterMode) +Enum(G_DrawMode) { - G_RasterMode_None, - G_RasterMode_PointList, - G_RasterMode_LineList, - G_RasterMode_LineStrip, - G_RasterMode_TriangleList, - G_RasterMode_TriangleStrip, - G_RasterMode_WireTriangleList, - G_RasterMode_WireTriangleStrip, + G_DrawMode_None, + G_DrawMode_PointList, + G_DrawMode_LineList, + G_DrawMode_LineStrip, + G_DrawMode_TriangleList, + G_DrawMode_TriangleStrip, + G_DrawMode_WireTriangleList, + G_DrawMode_WireTriangleStrip, }; Enum(G_BlendMode) @@ -684,52 +594,10 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ } while (0) -//- Memory sync +//- Sync -void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); - -#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .resource = (_resource), \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .resource = (_resource), \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .layout = _layout, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .is_global = 1, \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_DumbMemorySync(cl, resource, ...) \ - G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) - -#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \ - G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__) - -#define G_DumbGlobalMemorySync(cl, ...) \ - G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) +void G_Sync(G_CommandListHandle cl); +void G_SyncLayout(G_CommandListHandle cl, G_ResourceHandle resource, G_Layout layout); //- Compute @@ -739,25 +607,21 @@ void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads); #define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1)) #define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z)) -//- Rasterize +//- Draw -void G_Rasterize( +void G_Draw( G_CommandListHandle cl, VertexShaderDesc vs, PixelShaderDesc ps, u32 instances_count, G_IndexBufferDesc index_buffer, u32 render_targets_count, G_RenderTargetDesc *render_targets, Rng3 viewport, Rng2 scissor, - G_RasterMode raster_mode + G_DrawMode draw_mode ); //- Clear void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip); -//- Log - -void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource); - //////////////////////////////////////////////////////////// //~ @hookdecl Queue synchronization diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 385a47a6..09ecaefa 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -34,8 +34,8 @@ void G_Bootstrap(void) String appdir = GetAppDirectory(); - u32 sdk_ver_num = 618; - String sdk_ver_str = Lit("1.618.5"); + u32 sdk_ver_num = 619; + String sdk_ver_str = Lit("1.619.0"); String sdk_dir_path = StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str)); { LogInfoF("D3D12 agility sdk path: \"%F\"", FmtString(sdk_dir_path)); @@ -44,8 +44,8 @@ void G_Bootstrap(void) if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) { LogInfoF("Unpacking D3D12 Agility SDK to %F", FmtString(sdk_dir_path)); - ResourceKey core_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/D3D12Core.dat")); - ResourceKey layers_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/d3d12SDKLayers.dat")); + ResourceKey core_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.619.0/D3D12Core.dat")); + ResourceKey layers_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.619.0/d3d12SDKLayers.dat")); String core_data = PLT_Decompress(scratch.arena, DataFromResource(core_key), PLT_CompressionLevel_3); String layers_data = PLT_Decompress(scratch.arena, DataFromResource(layers_key), PLT_CompressionLevel_3); PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); @@ -270,10 +270,10 @@ void G_Bootstrap(void) G_D12.device = device; } - // Enable debug layer breaks + // Enable debug layer callbacks if (G_D12.debug_layer_enabled) { - // Enable D3D12 Debug break + // D3D12 debug { ID3D12InfoQueue1 *info = 0; hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12InfoQueue1, (void **)&info); @@ -281,11 +281,20 @@ void G_Bootstrap(void) { Panic(Lit("Failed to query ID3D12Device interface")); } - ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); - ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); - ID3D12InfoQueue_Release(info); + // ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); + // ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); + { + DWORD cookie = 0; + ID3D12InfoQueue1_RegisterMessageCallback( + info, + G_D12_DebugCallback, + D3D12_MESSAGE_CALLBACK_FLAG_NONE, // D3D12_MESSAGE_CALLBACK_IGNORE_FILTERS + 0, + &cookie + ); + } } - // Enable DXGI Debug break + // DXGI Debug { IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); @@ -293,9 +302,8 @@ void G_Bootstrap(void) { Panic(Lit("Failed to retrieve DXGI debug interface")); } - IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); - IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); - IDXGIInfoQueue_Release(dxgi_info); + // IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); + // IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); } } @@ -580,120 +588,26 @@ DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format) return (DXGI_FORMAT)format; } -D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages) +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromUsageKind(G_QueueKind queue_kind, G_D12_TrackedUsageKind usage_kind) { - D3D12_BARRIER_SYNC result = 0; - if (stages == G_Stage_All) + D3D12_BARRIER_LAYOUT untracked_layout; + switch (queue_kind) { - result = D3D12_BARRIER_SYNC_ALL; + default: untracked_layout = D3D12_BARRIER_LAYOUT_COMMON; break; + case G_QueueKind_Direct: untracked_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break; + case G_QueueKind_AsyncCompute: untracked_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break; } - else - { - result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, G_Stage_ComputeShading); - result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, G_Stage_IndexAssembly); - result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, G_Stage_VertexShading); - result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, G_Stage_PixelShading); - result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, G_Stage_DepthStencil); - result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, G_Stage_RenderTarget); - result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, G_Stage_Copy); - result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, G_Stage_Indirect); - } - return result; -} -D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses) -{ - D3D12_BARRIER_ACCESS result = 0; - if (accesses == 0) + D3D12_BARRIER_LAYOUT result; + switch(usage_kind) { - result = D3D12_BARRIER_ACCESS_NO_ACCESS; - } - else if (accesses == G_Access_All) - { - result = D3D12_BARRIER_ACCESS_COMMON; - } - else - { - result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, G_Access_ShaderReadWrite); - result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, G_Access_ShaderRead); - result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, G_Access_CopyWrite); - result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, G_Access_CopyRead); - result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, G_Access_IndexBuffer); - result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, G_Access_IndirectArgument); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, G_Access_DepthStencilRead); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, G_Access_DepthStencilWrite); - result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, G_Access_RenderTargetWrite); - } - return result; -} - -D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout) -{ - PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { - [G_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, - [G_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON, - [G_Layout_Common] = D3D12_BARRIER_LAYOUT_COMMON, - [G_Layout_DirectQueue_General] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, - [G_Layout_DirectQueue_Read] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, - [G_Layout_DirectQueue_DepthStencil] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, - [G_Layout_DirectQueue_RenderTarget] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, - [G_Layout_ComputeQueue_General] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, - [G_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, - [G_Layout_DirectComputeQueue_Read] = D3D12_BARRIER_LAYOUT_GENERIC_READ, - [G_Layout_DirectComputeQueue_CopyWrite] = D3D12_BARRIER_LAYOUT_COPY_DEST, - }; - D3D12_BARRIER_LAYOUT result = D3D12_BARRIER_LAYOUT_UNDEFINED; - if (layout >= 0 && layout < countof(translate)) - { - result = translate[layout]; - } - return result; -}; - -String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout) -{ - PERSIST Readonly String names[] = { - [D3D12_BARRIER_LAYOUT_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMMON"), - [D3D12_BARRIER_LAYOUT_PRESENT] = CompLit("D3D12_BARRIER_LAYOUT_PRESENT"), - [D3D12_BARRIER_LAYOUT_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_RENDER_TARGET] = CompLit("D3D12_BARRIER_LAYOUT_RENDER_TARGET"), - [D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE"), - [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ"), - [D3D12_BARRIER_LAYOUT_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE"), - [D3D12_BARRIER_LAYOUT_RESOLVE_DEST] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_DEST"), - [D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE"), - [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE"), - [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE"), - [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON") - }; - String result = Zi; - if (layout >= 0 && layout < countof(names)) - { - result = names[layout]; - } - else if (layout == D3D12_BARRIER_LAYOUT_UNDEFINED) - { - result = Lit("D3D12_BARRIER_LAYOUT_UNDEFINED"); + default: + case G_D12_TrackedUsageKind_Untracked: result = untracked_layout; break; + case G_D12_TrackedUsageKind_RenderTarget: result = D3D12_BARRIER_LAYOUT_RENDER_TARGET; break; + case G_D12_TrackedUsageKind_DepthStencilRead: result = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ; break; + case G_D12_TrackedUsageKind_DepthStencilReadWrite: result = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; break; + case G_D12_TrackedUsageKind_MakeCommon: result = D3D12_BARRIER_LAYOUT_COMMON; break; + case G_D12_TrackedUsageKind_MakeExclusive: result = untracked_layout; break; } return result; } @@ -1237,6 +1151,9 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) gpu_arena->descriptors.first = 0; gpu_arena->descriptors.last = 0; } + + // Full sync + G_Sync(G_D12_MakeHandle(G_CommandListHandle, cl)); } //////////////////////////////////////////////////////////// @@ -1247,6 +1164,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle Arena *perm = PermArena(); G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_QueueKind queue_kind = cl->queue_kind; G_D12_Resource *resource = 0; b32 is_buffer = desc.kind == G_ResourceKind_Buffer; @@ -1320,7 +1238,6 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle ////////////////////////////// //- Initialize d3d resource desc - D3D12_BARRIER_LAYOUT d3d_begin_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; D3D12_CLEAR_VALUE clear_value = Zi; D3D12_RESOURCE_DESC1 d3d_desc = Zi; if (is_buffer) @@ -1363,19 +1280,6 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle clear_value.Color[2] = desc.texture.clear_color.z, clear_value.Color[3] = desc.texture.clear_color.w, clear_value.Format = d3d_desc.Format; - - d3d_begin_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); - if (!AnyBit(flags, G_ResourceFlag_ZeroMemory) && !AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)) - { - if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) - { - d3d_begin_layout = D3D12_BARRIER_LAYOUT_RENDER_TARGET; - } - else if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - d3d_begin_layout = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; - } - } } ////////////////////////////// @@ -1452,7 +1356,6 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle //- Init resource resource->flags = flags; - resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels) + 1; if (is_buffer) { @@ -1486,12 +1389,27 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle { clear_value_arg = &clear_value; } + + D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (is_texture) + { + d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON; + if (desc.texture.initial_layout == G_Layout_Exclusive) + { + switch (queue_kind) + { + case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break; + case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break; + } + } + } + HRESULT hr = ID3D12Device10_CreateCommittedResource3( G_D12.device, &heap_props, heap_flags, &resource->d3d_desc, - d3d_begin_layout, + d3d_initial_layout, clear_value_arg, 0, // pProtectedSession 0, // NumCastableFormats @@ -1500,39 +1418,17 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle (void **)&resource->d3d_resource ); Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); - for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) - { - resource->cmdlist_texture_layouts[mip_idx] = d3d_begin_layout; - } + resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels); // Queue initial Rtv/Dsv discard - if (!AnyBit(flags, G_ResourceFlag_ZeroMemory)) + if ( + !AnyBit(flags, G_ResourceFlag_ZeroMemory) && + AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + ) { - if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Discard; - cmd->discard.resource = resource; - } - - if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET) - { - G_MemoryLayoutSync( - cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), - G_Stage_RenderTarget, G_Access_RenderTargetWrite, - G_Stage_All, G_Access_All, - desc.texture.initial_layout - ); - } - else if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE) - { - G_MemoryLayoutSync( - cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), - G_Stage_DepthStencil, G_Access_DepthStencilWrite, - G_Stage_All, G_Access_All, - desc.texture.initial_layout - ); - } + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Discard; + cmd->discard.resource = resource; } if (!SUCCEEDED(hr)) @@ -1574,28 +1470,19 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle } ////////////////////////////// - //- Barrier reused resource + //- Transition reused resources to common - // TODO: These barriers are overly cautious. It's unlikely that anything - // other than an activation-layout transition is needed for textures, since - // arenas are rarely reset in the middle of a command list. In the case that - // a resource is reused within the same command list, we should insert - // barriers as described in the spec: - // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#resource-aliasing - - if (can_reuse) + G_ResourceHandle resource_handle = G_D12_MakeHandle(G_ResourceHandle, resource); + if ( + can_reuse && + desc.texture.initial_layout == G_Layout_Common && + queue_kind != G_QueueKind_AsyncCopy + ) { - if (is_buffer) - { - G_DumbMemorySync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource)); - } - else if (is_texture) - { - G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout); - } + G_SyncLayout(cl_handle, resource_handle, G_Layout_Common); } - return G_D12_MakeHandle(G_ResourceHandle, resource); + return resource_handle; } //////////////////////////////////////////////////////////// @@ -2190,6 +2077,57 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) return result; } +void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind) +{ + // b32 should_track = !AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS) && ( + // usage_kind == G_D12_TrackedUsageKind_ToCommon || + // usage_kind == G_D12_TrackedUsageKind_ToExclusive || + // ); + b32 should_track = !AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); + + if (should_track) + { + u64 hash = MixU64(resource->uid); + + if (!batch->tracked_resource_bins) + { + batch->tracked_resource_bins = PushStructs(arena, G_D12_TrackedResourceBin, G_D12_TrackedResourceBinsCount); + } + + G_D12_TrackedResourceBin *bin = &batch->tracked_resource_bins[hash % G_D12_TrackedResourceBinsCount]; + G_D12_TrackedResourceNode *trn = bin->first; + for (; trn; trn = trn->next_in_bin) + { + if (trn->hash == hash) + { + break; + } + } + + if (!trn) + { + trn = PushStruct(arena, G_D12_TrackedResourceNode); + trn->resource = resource; + trn->hash = hash; + SllQueuePush(batch->first_tracked_resource, batch->last_tracked_resource, trn); + SllStackPushN(bin->first, trn, next_in_bin); + } + + for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) + { + G_D12_TrackedMip *mip = &trn->mips[mip_idx]; + if (usage_kind > mip->usage) + { + mip->usage = usage_kind; + } + if (usage_kind == G_D12_TrackedUsageKind_MakeExclusive) + { + mip->prev_usage = G_D12_TrackedUsageKind_MakeCommon; + } + } + } +} + //////////////////////////////////////////////////////////// //~ @hookimpl Command @@ -2225,44 +2163,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); - // Begin dx12 command list - G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); - ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; - - // Pipeline state - b32 graphics_rootsig_set = 0; - b32 compute_rootsig_set = 0; - b32 descriptor_heaps_set = 0; - G_D12_Pipeline *bound_pipeline = 0; - - // Constants state - u64 slotted_constants[G_NumConstants]; - u64 bound_compute_constants[G_NumConstants]; - u64 bound_graphics_constants[G_NumConstants]; - for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots - for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } - for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } - - // Fill built-in constants - if (!G_IsRefNil(queue->print_buffer_ref)) - { - slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; - } - { - b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); - f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); - slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; - slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; - } - - // Rasterizer state - D3D12_VIEWPORT bound_viewport = Zi; - D3D12_RECT bound_scissor = Zi; - D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; - D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; - u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; - u64 bound_render_clear_target_uid = 0; - // Flatten command chunks u64 cmds_count = 0; G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count); @@ -2293,641 +2193,733 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } } - // Batch barrier cmds - i64 max_buffer_barriers = 0; - i64 max_texture_barriers = 0; - i64 max_global_barriers = 0; + ////////////////////////////// + //- Build batches + + u64 batches_count = 0; + G_D12_CmdBatch *first_batch = 0; + G_D12_CmdBatch *last_batch = 0; + { - u64 cmd_idx = 0; - u64 batch_gen = 0; - G_D12_Cmd *prev_barrier_cmd = 0; - while (cmd_idx < cmds_count) + G_D12_CmdBatch *batch = PushStruct(scratch.arena, G_D12_CmdBatch); + + for (u64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) { G_D12_Cmd *cmd = &cmds[cmd_idx]; - switch (cmd->kind) + G_D12_CmdKind cmd_kind = cmd->kind; + + if (cmd_kind != G_D12_CmdKind_Barrier && cmd_kind != G_D12_CmdKind_Constant) { - // Batch-interrupting cmds - default: - { - cmd_idx += 1; - batch_gen += 1; - } break; + batch->contains_hazard = 1; + } - // Non-batch-interrupting cmds - case G_D12_CmdKind_Constant: - { - cmd_idx += 1; - } break; + if (cmd_kind == G_D12_CmdKind_Compute) + { + batch->contains_compute_shader = 1; + } + else if (cmd_kind == G_D12_CmdKind_Draw) + { + // TODO: Track depth-stencil as well, not just render targets - case G_D12_CmdKind_Barrier: + for (u32 rt_idx = 0; rt_idx < G_MaxRenderTargets; ++rt_idx) { - // Determine 'before' state from lookup - if (prev_barrier_cmd && prev_barrier_cmd->barrier.batch_gen != batch_gen) + G_RenderTargetDesc *rt_desc = &cmd->draw.render_target_descs[rt_idx]; + G_D12_Resource *resource = G_D12_ResourceFromHandle(rt_desc->resource); + if (resource) { - // This barrier is part of new batch - prev_barrier_cmd->barrier.is_end_of_batch = 1; - } - cmd->barrier.batch_gen = batch_gen; - prev_barrier_cmd = cmd; - - if (cmd->barrier.desc.is_global) - { - max_global_barriers += 1; + batch->contains_rtv = 1; + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(rt_desc->mip, rt_desc->mip), G_D12_TrackedUsageKind_RenderTarget); } else { - G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource); - if (resource->is_texture) + break; + } + } + } + else if (cmd_kind == G_D12_CmdKind_ClearRtv) + { + batch->contains_rtv = 1; + G_D12_Resource *resource = cmd->clear_rtv.resource; + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(cmd->clear_rtv.mip, cmd->clear_rtv.mip), G_D12_TrackedUsageKind_RenderTarget); + } + else if (cmd_kind == G_D12_CmdKind_Discard) + { + G_D12_Resource *resource = cmd->discard.resource; + b32 is_rtv = AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + b32 is_dsv = AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL); + if (is_rtv) + { + batch->contains_rtv = 1; + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(0, resource->texture_mips - 1), G_D12_TrackedUsageKind_RenderTarget); + } + else if (is_dsv) + { + batch->contains_dsv = 1; + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(0, resource->texture_mips - 1), G_D12_TrackedUsageKind_DepthStencilReadWrite); + } + } + else if (cmd_kind == G_D12_CmdKind_CopyBytes || cmd_kind == G_D12_CmdKind_CopyTexels) + { + batch->contains_copy = 1; + } + + if (cmd_kind == G_D12_CmdKind_Barrier) + { + // Start new batch + if (batch->contains_hazard) + { + ++batches_count; + SllQueuePush(first_batch, last_batch, batch); + batch = PushStruct(scratch.arena, G_D12_CmdBatch); + } + + G_D12_Resource *resource = cmd->barrier.resource; + if (resource) + { + // ++batch->transitions_count; + // G_D12_TransitionNode *tn = PushStruct(scratch.arena, G_D12_TransitionNode); + // SllQueuePush(batch->first_transition, batch->last_transition, tn); + // tn->resource = resource; + // tn->mips = RNGI32(0, resource->texture_mips - 1); + + if (cmd->barrier.to_exclusive) + { + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(0, resource->texture_mips - 1), G_D12_TrackedUsageKind_MakeExclusive); + } + else + { + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(0, resource->texture_mips - 1), G_D12_TrackedUsageKind_MakeCommon); + } + } + } + else + { + ++batch->cmds_count; + G_D12_BatchedCmdNode *bcn = PushStruct(scratch.arena, G_D12_BatchedCmdNode); + bcn->cmd = cmd; + SllQueuePush(batch->first_cmd, batch->last_cmd, bcn); + } + + // TODO: Only perform tracking on batches containing draws/clear/discard + } + + if (batch->first_cmd || batch->first_tracked_resource) + { + // Submit open batch + ++batches_count; + SllQueuePush(first_batch, last_batch, batch); + + if (batch->first_tracked_resource) + { + // Final empty batch to implicitly decay tracked resources + batch = PushStruct(scratch.arena, G_D12_CmdBatch); + ++batches_count; + SllQueuePush(first_batch, last_batch, batch); + } + } + } + + ////////////////////////////// + //- Generate transitions + + { + G_D12_CmdBatch *prev_batch = 0; + for (G_D12_CmdBatch *batch = first_batch; batch; batch = batch->next) + { + G_D12_CmdBatch *next_batch = batch->next; + + for (G_D12_TrackedResourceNode *trn = batch->first_tracked_resource; trn; trn = trn->next) + { + G_D12_Resource *resource = trn->resource; + + // Fetch next & prev resources + G_D12_TrackedResourceNode *next_trn = 0; + G_D12_TrackedResourceNode *prev_trn = 0; + { + u64 hash = MixU64(resource->uid); + if (prev_batch && prev_batch->tracked_resource_bins) + { + G_D12_TrackedResourceBin *bin = &prev_batch->tracked_resource_bins[hash % G_D12_TrackedResourceBinsCount]; + for (; prev_trn; prev_trn = prev_trn->next_in_bin) { - RngI32 mips = cmd->barrier.desc.mips; - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - max_texture_barriers += mips.max - mips.min + 1; + if (prev_trn->hash == hash) + { + break; + } + } + } + if (next_batch && next_batch->tracked_resource_bins) + { + G_D12_TrackedResourceBin *bin = &next_batch->tracked_resource_bins[hash % G_D12_TrackedResourceBinsCount]; + for (; next_trn; next_trn = next_trn->next_in_bin) + { + if (next_trn->hash == hash) + { + break; + } + } + } + } + + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + G_D12_TrackedMip *mip = &trn->mips[mip_idx]; + + if (mip->prev_usage == G_D12_TrackedUsageKind_Untracked && prev_trn) + { + G_D12_TrackedMip *prev_mip = &prev_trn->mips[mip_idx]; + mip->prev_usage = prev_mip->usage; + } + + G_D12_TrackedUsageKind next_usage = G_D12_TrackedUsageKind_Untracked; + if (mip->usage == G_D12_TrackedUsageKind_MakeCommon) + { + next_usage = G_D12_TrackedUsageKind_MakeCommon; + } + else if (next_trn) + { + G_D12_TrackedMip *next_mip = &next_trn->mips[mip_idx]; + next_usage = next_mip->usage; + } + + // Push promotion transition + if (mip->usage != mip->prev_usage) + { + ++batch->transitions_count; + G_D12_TransitionNode *tn = PushStruct(scratch.arena, G_D12_TransitionNode); + SllQueuePush(batch->first_transition, batch->last_transition, tn); + + tn->resource = resource; + tn->old = mip->prev_usage; + tn->new = mip->usage; + tn->mips = RNGI32(mip_idx, mip_idx); + } + + // Push decay transition to next batch + if (next_batch && mip->usage != next_usage) + { + ++next_batch->transitions_count; + G_D12_TransitionNode *tn = PushStruct(scratch.arena, G_D12_TransitionNode); + SllQueuePush(next_batch->first_transition, next_batch->last_transition, tn); + + tn->resource = resource; + tn->old = mip->usage; + tn->new = next_usage; + tn->mips = RNGI32(mip_idx, mip_idx); + } + } + } + prev_batch = batch; + } + } + + ////////////////////////////// + //- Build D3D12 command list + + G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); + ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; + { + // Pipeline state + b32 graphics_rootsig_set = 0; + b32 compute_rootsig_set = 0; + b32 descriptor_heaps_set = 0; + G_D12_Pipeline *bound_pipeline = 0; + + // Constants state + u64 slotted_constants[G_NumConstants]; + u64 bound_compute_constants[G_NumConstants]; + u64 bound_graphics_constants[G_NumConstants]; + for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots + for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } + for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } + + // Fill built-in constants + if (!G_IsRefNil(queue->print_buffer_ref)) + { + slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; + } + { + b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); + f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); + slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; + slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; + } + + // Rasterizer state + D3D12_VIEWPORT bound_viewport = Zi; + D3D12_RECT bound_scissor = Zi; + D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; + D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; + u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; + u64 bound_render_clear_target_uid = 0; + + // Shader-visible heaps + ID3D12DescriptorHeap *heaps[] = { + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + + for (G_D12_CmdBatch *batch = first_batch; batch; batch = batch->next) + { + ////////////////////////////// + //- Execute barriers + + { + u64 texture_barriers_count = batch->transitions_count; + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, texture_barriers_count); + + // TODO: Granular + D3D12_BARRIER_SYNC sync_before = D3D12_BARRIER_SYNC_ALL; + D3D12_BARRIER_SYNC sync_after = D3D12_BARRIER_SYNC_ALL; + D3D12_BARRIER_ACCESS access_before = D3D12_BARRIER_ACCESS_COMMON; + D3D12_BARRIER_ACCESS access_after = D3D12_BARRIER_ACCESS_COMMON; + + b32 is_transition_batch = batch->cmds_count == 0; + if (batch == first_batch) + { + sync_before = D3D12_BARRIER_SYNC_NONE; + access_before = D3D12_BARRIER_ACCESS_NO_ACCESS; + } + if (batch == last_batch) + { + sync_before = D3D12_BARRIER_SYNC_NONE; + access_before = D3D12_BARRIER_ACCESS_NO_ACCESS; + } + + D3D12_GLOBAL_BARRIER global_barrier = Zi; + { + global_barrier.SyncBefore = sync_before; + global_barrier.SyncAfter = sync_after; + global_barrier.AccessBefore = access_before; + global_barrier.AccessAfter = access_after; + } + + // Push transition barriers + { + u64 barrier_idx = 0; + for (G_D12_TransitionNode *tn = batch->first_transition; tn; tn = tn->next) + { + G_D12_Resource *resource = tn->resource; + D3D12_BARRIER_LAYOUT old_layout = G_D12_BarrierLayoutFromUsageKind(queue_kind, tn->old); + D3D12_BARRIER_LAYOUT new_layout = G_D12_BarrierLayoutFromUsageKind(queue_kind, tn->new); + + D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[barrier_idx]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->LayoutBefore = old_layout; + barrier->LayoutAfter = new_layout; + barrier->pResource = resource->d3d_resource; + barrier->Subresources.NumArraySlices = 1; + barrier->Subresources.NumPlanes = 1; + barrier->Subresources.IndexOrFirstMipLevel = tn->mips.min; + barrier->Subresources.NumMipLevels = tn->mips.max - tn->mips.min + 1; + ++barrier_idx; + } + } + + // Dispatch barriers + { + u32 barrier_groups_count = 0; + D3D12_BARRIER_GROUP barrier_groups[2] = Zi; + if (!is_transition_batch) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_GLOBAL; + group->NumBarriers = 1; + group->pGlobalBarriers = &global_barrier; + } + if (texture_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_TEXTURE; + group->NumBarriers = texture_barriers_count; + group->pTextureBarriers = texture_barriers; + } + if (barrier_groups_count > 0) + { + ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); + } + } + } + + ////////////////////////////// + //- Execute commands + + for (G_D12_BatchedCmdNode *bcn = batch->first_cmd; bcn; bcn = bcn->next) + { + G_D12_Cmd *cmd = bcn->cmd; + switch (cmd->kind) + { + //- Constant + + case G_D12_CmdKind_Constant: + { + i32 slot = cmd->constant.slot; + u32 value = cmd->constant.value; + if (slot >= 0 && slot < countof(slotted_constants)) + { + slotted_constants[slot] = value; + } + } break; + + //- Copy bytes + + case G_D12_CmdKind_CopyBytes: + { + u64 src_offset = cmd->copy_bytes.src_range.min; + u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; + ID3D12GraphicsCommandList_CopyBufferRegion( + d3d_cl, + cmd->copy_bytes.dst->d3d_resource, + cmd->copy_bytes.dst_offset, + cmd->copy_bytes.src->d3d_resource, + src_offset, + copy_size + ); + } break; + + //- Copy texels + + case G_D12_CmdKind_CopyTexels: + { + G_D12_Resource *dst = cmd->copy_texels.dst; + G_D12_Resource *src = cmd->copy_texels.src; + D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; + D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; + Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; + Rng3I32 src_range = cmd->copy_texels.src_texture_range; + + D3D12_BOX src_box = Zi; + D3D12_BOX *src_box_ptr = 0; + { + src_box.left = src_range.p0.x; + src_box.top = src_range.p0.y; + src_box.front = src_range.p0.z; + src_box.right = src_range.p1.x; + src_box.bottom = src_range.p1.y; + src_box.back = src_range.p1.z; + if (src->is_texture) + { + src_box_ptr = &src_box; + } + } + + if (dst->flags & G_ResourceFlag_AllowDepthStencil) + { + // Depth-stencil textures must have src box & dst offset set to 0 + // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); } else { - max_buffer_barriers += 1; + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); } - } + } break; - cmd_idx += 1; - } break; - } - } + //- Compute - if (prev_barrier_cmd) - { - prev_barrier_cmd->barrier.is_end_of_batch = 1; - } - } - - // Build d3d commands - { - u64 batch_barrier_idx_start = 0; - u64 batch_barrier_idx_opl = 0; // One past last - - u64 cmd_idx = 0; - while (cmd_idx < cmds_count) - { - G_D12_Cmd *cmd = &cmds[cmd_idx]; - switch (cmd->kind) - { - default: - { - cmd_idx += 1; - } break; - - //- Constant - - case G_D12_CmdKind_Constant: - { - i32 slot = cmd->constant.slot; - u32 value = cmd->constant.value; - if (slot >= 0 && slot < countof(slotted_constants)) + case G_D12_CmdKind_Compute: { - slotted_constants[slot] = value; - } - cmd_idx += 1; - } break; - - //- Barrier - - case G_D12_CmdKind_Barrier: - { - batch_barrier_idx_opl = cmd_idx + 1; - - // Submit batched barriers - if (cmd->barrier.is_end_of_batch) - { - // Build barriers - u64 buffer_barriers_count = 0; - u64 texture_barriers_count = 0; - u64 global_barriers_count = 0; - D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers); - D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers); - D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers); - for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; { - G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; - if (barrier_cmd->kind == G_D12_CmdKind_Barrier) + G_D12_PipelineDesc pipeline_desc; + ZeroStruct(&pipeline_desc); + pipeline_desc.cs = cmd->compute.cs; + pipeline = G_D12_PipelineFromDesc(pipeline_desc); + } + + if (pipeline) + { + // Set descriptor heaps + if (!descriptor_heaps_set) { - G_MemoryBarrierDesc desc = barrier_cmd->barrier.desc; - // Translate gpu barrier kind -> d3d barrier fields - D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev); - D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next); - D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); - D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); - D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL; - if (!desc.is_global) + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + // Bind rootsig + if (!compute_rootsig_set) + { + ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); + compute_rootsig_set = 1; + } + + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_compute_constants[slot] != slotted_constants[slot]) { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; - } - - // Build barrier - switch (barrier_type) - { - case D3D12_BARRIER_TYPE_BUFFER: - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - barrier->pResource = resource->d3d_resource; - barrier->Offset = 0; - barrier->Size = U64Max; - } break; - - case D3D12_BARRIER_TYPE_TEXTURE: - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - RngI32 mips = barrier_cmd->barrier.desc.mips; - { - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - } - // Create a barrier for each contiguous span of mips with matching layout - D3D12_TEXTURE_BARRIER *barrier = 0; - for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) - { - D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx]; - D3D12_BARRIER_LAYOUT layout_after = layout_before; - if (desc.layout != G_Layout_NoChange) - { - layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); - } - if (barrier == 0 || barrier->LayoutBefore != layout_before) - { - barrier = &texture_barriers[texture_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - barrier->LayoutBefore = layout_before; - barrier->LayoutAfter = layout_after; - barrier->pResource = resource->d3d_resource; - barrier->Subresources.IndexOrFirstMipLevel = mip_idx; - barrier->Subresources.NumArraySlices = 1; - barrier->Subresources.NumPlanes = 1; - } - barrier->Subresources.NumMipLevels += 1; - resource->cmdlist_texture_layouts[mip_idx] = layout_after; - } - } break; - - case D3D12_BARRIER_TYPE_GLOBAL: - { - D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - } break; + ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_compute_constants[slot] = slotted_constants[slot]; } } + + // Dispatch + ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); } + } break; - // Dispatch barriers - { - u32 barrier_groups_count = 0; - D3D12_BARRIER_GROUP barrier_groups[3] = Zi; - if (buffer_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_BUFFER; - group->NumBarriers = buffer_barriers_count; - group->pBufferBarriers = buffer_barriers; - } - if (texture_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_TEXTURE; - group->NumBarriers = texture_barriers_count; - group->pTextureBarriers = texture_barriers; - } - if (global_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_GLOBAL; - group->NumBarriers = global_barriers_count; - group->pGlobalBarriers = global_barriers; - } - if (barrier_groups_count > 0) - { - ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); - } - } + //- Draw - batch_barrier_idx_start = cmd_idx + 1; - } - - cmd_idx += 1; - } break; - - //- Copy bytes - - case G_D12_CmdKind_CopyBytes: - { - u64 src_offset = cmd->copy_bytes.src_range.min; - u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; - ID3D12GraphicsCommandList_CopyBufferRegion( - d3d_cl, - cmd->copy_bytes.dst->d3d_resource, - cmd->copy_bytes.dst_offset, - cmd->copy_bytes.src->d3d_resource, - src_offset, - copy_size - ); - cmd_idx += 1; - } break; - - //- Copy texels - - case G_D12_CmdKind_CopyTexels: - { - G_D12_Resource *dst = cmd->copy_texels.dst; - G_D12_Resource *src = cmd->copy_texels.src; - D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; - D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; - Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; - Rng3I32 src_range = cmd->copy_texels.src_texture_range; - - D3D12_BOX src_box = Zi; - D3D12_BOX *src_box_ptr = 0; + case G_D12_CmdKind_Draw: { - src_box.left = src_range.p0.x; - src_box.top = src_range.p0.y; - src_box.front = src_range.p0.z; - src_box.right = src_range.p1.x; - src_box.bottom = src_range.p1.y; - src_box.back = src_range.p1.z; - if (src->is_texture) + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; { - src_box_ptr = &src_box; - } - } - - if (dst->flags & G_ResourceFlag_AllowDepthStencil) - { - // Depth-stencil textures must have src box & dst offset set to 0 - // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); - } - else - { - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); - } - - cmd_idx += 1; - } break; - - //- Compute - - case G_D12_CmdKind_Compute: - { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; - { - G_D12_PipelineDesc pipeline_desc; - ZeroStruct(&pipeline_desc); - pipeline_desc.cs = cmd->compute.cs; - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - if (pipeline) - { - // Set descriptor heaps - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - // Bind rootsig - if (!compute_rootsig_set) - { - ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); - compute_rootsig_set = 1; - } - - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } - - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_compute_constants[slot] != slotted_constants[slot]) + G_D12_PipelineDesc pipeline_desc; + ZeroStruct(&pipeline_desc); + pipeline_desc.vs = cmd->draw.vs; + pipeline_desc.ps = cmd->draw.ps; { - ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_compute_constants[slot] = slotted_constants[slot]; + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->draw.draw_mode) + { + default: Assert(0); break; + case G_DrawMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case G_DrawMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_DrawMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_DrawMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_DrawMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_DrawMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_DrawMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } } - } - - // Dispatch - ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); - } - - cmd_idx += 1; - } break; - - //- Rasterize - - case G_D12_CmdKind_Rasterize: - { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; - { - G_D12_PipelineDesc pipeline_desc; - ZeroStruct(&pipeline_desc); - pipeline_desc.vs = cmd->rasterize.vs; - pipeline_desc.ps = cmd->rasterize.ps; - { - pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - switch (cmd->rasterize.raster_mode) + if (cmd->draw.draw_mode == G_DrawMode_WireTriangleList || cmd->draw.draw_mode == G_DrawMode_WireTriangleStrip) { - default: Assert(0); break; - case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + pipeline_desc.is_wireframe = 1; } - } - if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) - { - pipeline_desc.is_wireframe = 1; - } - for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) - { - G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; - G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - if (rt) + for (u32 i = 0; i < countof(cmd->draw.render_target_descs); ++i) { - pipeline_desc.render_target_formats[i] = rt->texture_format; - pipeline_desc.render_target_blend_modes[i] = desc.blend; - } - else - { - pipeline_desc.render_target_formats[i] = G_Format_Unknown; - } - } - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - // Create ibv - u32 indices_count = 0; - D3D12_INDEX_BUFFER_VIEW ibv = Zi; - { - G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; - if (desc.count > 0) - { - G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); - ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; - ibv.SizeInBytes = desc.stride * desc.count; - if (desc.stride == 2) - { - ibv.Format = DXGI_FORMAT_R16_UINT; - indices_count = ibv.SizeInBytes / 2; - } - else if (desc.stride == 4) - { - ibv.Format = DXGI_FORMAT_R32_UINT; - indices_count = ibv.SizeInBytes / 4; - } - else - { - Assert(0); // Invalid index size - } - } - } - - // Prepare & dispatch - if (pipeline && indices_count > 0) - { - // Set descriptor heaps - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - // Bind rootsig - if (!graphics_rootsig_set) - { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); - graphics_rootsig_set = 1; - } - - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } - - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_graphics_constants[slot] != slotted_constants[slot]) - { - ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_graphics_constants[slot] = slotted_constants[slot]; - } - } - - // Set viewport - { - D3D12_VIEWPORT viewport = Zi; - { - Rng3 range = cmd->rasterize.viewport; - viewport.TopLeftX = range.p0.x; - viewport.TopLeftY = range.p0.y; - viewport.Width = range.p1.x - range.p0.x; - viewport.Height = range.p1.y - range.p0.y; - viewport.MinDepth = range.p0.z; - viewport.MaxDepth = range.p1.z; - } - if (!MatchStruct(&viewport, &bound_viewport)) - { - bound_viewport = viewport; - ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); - } - } - - // Set scissor - { - D3D12_RECT scissor = Zi; - { - Rng2 range = cmd->rasterize.scissor; - scissor.left = range.p0.x; - scissor.top = range.p0.y; - scissor.right = range.p1.x; - scissor.bottom = range.p1.y; - } - if (!MatchStruct(&scissor, &bound_scissor)) - { - bound_scissor = scissor; - ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); - } - } - - // Set topology - { - D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - switch (cmd->rasterize.raster_mode) - { - default: Assert(0); break; - case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - } - if (topology != bound_primitive_topology) - { - ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); - } - } - - // Set index buffer - if (!MatchStruct(&ibv, &bound_ibv)) - { - ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); - bound_ibv = ibv; - } - - // Bind render targets - { - b32 om_dirty = 0; - u32 rtvs_count = 0; - for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) - { - G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; + G_RenderTargetDesc desc = cmd->draw.render_target_descs[i]; G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); if (rt) { - Assert(AnyBit(rt->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); - if (bound_render_target_uids[i] != rt->uid + desc.mip) - { - G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; - G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip); - bound_render_target_uids[i] = rt->uid + desc.mip; - om_dirty = 1; - } - ++rtvs_count; + pipeline_desc.render_target_formats[i] = rt->texture_format; + pipeline_desc.render_target_blend_modes[i] = desc.blend; } else { - break; + pipeline_desc.render_target_formats[i] = G_Format_Unknown; } } - if (om_dirty) - { - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; - for (u32 i = 0; i < rtvs_count; ++i) - { - rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle; - } - ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); - } + pipeline = G_D12_PipelineFromDesc(pipeline_desc); } - // Dispatch - ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - } - - cmd_idx += 1; - } break; - - //- Clear rtv - - case G_D12_CmdKind_ClearRtv: - { - G_D12_Resource *rt = cmd->clear_rtv.render_target; - f32 clear_color[4] = Zi; - { - clear_color[0] = cmd->clear_rtv.color.x; - clear_color[1] = cmd->clear_rtv.color.y; - clear_color[2] = cmd->clear_rtv.color.z; - clear_color[3] = cmd->clear_rtv.color.w; - } - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle; - if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) - { - G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); - bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; - } - ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); - cmd_idx += 1; - } break; - - //- Log - - case G_D12_CmdKind_Log: - { - G_D12_Resource *resource = cmd->log.resource; - String resource_name = STRING(resource->name_len, resource->name_text); - - String layouts_str = Zi; - { - StringList layout_names = Zi; - for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + // Create ibv + u32 indices_count = 0; + D3D12_INDEX_BUFFER_VIEW ibv = Zi; { - String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); - String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); - PushStringToList(scratch.arena, &layout_names, layout_str); + G_IndexBufferDesc desc = cmd->draw.index_buffer_desc; + if (desc.count > 0) + { + G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); + ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; + ibv.SizeInBytes = desc.stride * desc.count; + if (desc.stride == 2) + { + ibv.Format = DXGI_FORMAT_R16_UINT; + indices_count = ibv.SizeInBytes / 2; + } + else if (desc.stride == 4) + { + ibv.Format = DXGI_FORMAT_R32_UINT; + indices_count = ibv.SizeInBytes / 4; + } + else + { + Assert(0); // Invalid index size + } + } } - layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); - } - String msg = StringF( - scratch.arena, - "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", - FmtUint(resource->uid), - FmtString(resource_name), - FmtString(layouts_str) - ); - LogDebug(msg); - cmd_idx += 1; - } break; + // Prepare & dispatch + if (pipeline && indices_count > 0) + { + // Set descriptor heaps + if (!descriptor_heaps_set) + { + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } - //- Discard + // Bind rootsig + if (!graphics_rootsig_set) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); + graphics_rootsig_set = 1; + } - case G_D12_CmdKind_Discard: - { - G_D12_Resource *resource = cmd->discard.resource; - D3D12_DISCARD_REGION region = Zi; - region.FirstSubresource = 0; - region.NumSubresources = resource->texture_mips; - ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); - cmd_idx += 1; - } break; + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_graphics_constants[slot] != slotted_constants[slot]) + { + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_graphics_constants[slot] = slotted_constants[slot]; + } + } + + // Set viewport + { + D3D12_VIEWPORT viewport = Zi; + { + Rng3 range = cmd->draw.viewport; + viewport.TopLeftX = range.p0.x; + viewport.TopLeftY = range.p0.y; + viewport.Width = range.p1.x - range.p0.x; + viewport.Height = range.p1.y - range.p0.y; + viewport.MinDepth = range.p0.z; + viewport.MaxDepth = range.p1.z; + } + if (!MatchStruct(&viewport, &bound_viewport)) + { + bound_viewport = viewport; + ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); + } + } + + // Set scissor + { + D3D12_RECT scissor = Zi; + { + Rng2 range = cmd->draw.scissor; + scissor.left = range.p0.x; + scissor.top = range.p0.y; + scissor.right = range.p1.x; + scissor.bottom = range.p1.y; + } + if (!MatchStruct(&scissor, &bound_scissor)) + { + bound_scissor = scissor; + ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); + } + } + + // Set topology + { + D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + switch (cmd->draw.draw_mode) + { + default: Assert(0); break; + case G_DrawMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case G_DrawMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; + case G_DrawMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case G_DrawMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_DrawMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case G_DrawMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_DrawMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + } + if (topology != bound_primitive_topology) + { + ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); + } + } + + // Set index buffer + if (!MatchStruct(&ibv, &bound_ibv)) + { + ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); + bound_ibv = ibv; + } + + // Bind render targets + { + b32 om_dirty = 0; + u32 rtvs_count = 0; + for (u32 i = 0; i < countof(cmd->draw.render_target_descs); ++i) + { + G_RenderTargetDesc desc = cmd->draw.render_target_descs[i]; + G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); + if (rt) + { + Assert(AnyBit(rt->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); + if (bound_render_target_uids[i] != rt->uid + desc.mip) + { + G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; + G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip); + bound_render_target_uids[i] = rt->uid + desc.mip; + om_dirty = 1; + } + ++rtvs_count; + } + else + { + break; + } + } + if (om_dirty) + { + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; + for (u32 i = 0; i < rtvs_count; ++i) + { + rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle; + } + ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); + } + } + + // Dispatch + ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->draw.instances_count, 0, 0, 0); + } + } break; + + //- Clear rtv + + case G_D12_CmdKind_ClearRtv: + { + G_D12_Resource *rt = cmd->clear_rtv.resource; + f32 clear_color[4] = Zi; + { + clear_color[0] = cmd->clear_rtv.color.x; + clear_color[1] = cmd->clear_rtv.color.y; + clear_color[2] = cmd->clear_rtv.color.z; + clear_color[3] = cmd->clear_rtv.color.w; + } + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle; + if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) + { + G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); + bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; + } + ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); + } break; + + //- Discard + + case G_D12_CmdKind_Discard: + { + G_D12_Resource *resource = cmd->discard.resource; + D3D12_DISCARD_REGION region = Zi; + region.FirstSubresource = 0; + region.NumSubresources = resource->texture_mips; + ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); + } break; + } } } } - - // End dx12 command list i64 completion_target = G_D12_CommitRawCommandList(rcl); - // Attach completion info to staging regions + ////////////////////////////// + //- Attach completion info to staging regions + for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;) { G_D12_StagingRegionNode *next = n->next_in_command_list; @@ -2938,7 +2930,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) n = next; } - // Attach completion info to reset descriptors + ////////////////////////////// + //- Attach completion info to reset descriptors + for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) { G_D12_Descriptor *next = d->next; @@ -2953,7 +2947,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) d = next; } - // Attach completion info to releasables & submit for release + ////////////////////////////// + //- Attach completion info to releasables & submit for release + if (cl->releases.first) { // Attach completion info @@ -2998,6 +2994,9 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) // r = next; // } + ////////////////////////////// + //- Finish + // Free command list { Lock lock = LockE(&G_D12.free_cmd_lists_mutex); @@ -3228,14 +3227,22 @@ void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4)); } -//- Memory sync +//- Barrier -void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) +void G_Sync(G_CommandListHandle cl_handle) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); cmd->kind = G_D12_CmdKind_Barrier; - cmd->barrier.desc = desc; +} + +void G_SyncLayout(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, G_Layout layout) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.resource = G_D12_ResourceFromHandle(resource_handle); + cmd->barrier.to_exclusive = layout == G_Layout_Exclusive; } //- Compute @@ -3252,33 +3259,33 @@ void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 th } } -//- Rasterize +//- Draw -void G_Rasterize( +void G_Draw( G_CommandListHandle cl_handle, VertexShaderDesc vs, PixelShaderDesc ps, u32 instances_count, G_IndexBufferDesc index_buffer, u32 render_targets_count, G_RenderTargetDesc *render_targets, Rng3 viewport, Rng2 scissor, - G_RasterMode raster_mode + G_DrawMode draw_mode ) { if (instances_count > 0 && index_buffer.count > 0) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Rasterize; - cmd->rasterize.vs = vs; - cmd->rasterize.ps = ps; - cmd->rasterize.instances_count = instances_count; - cmd->rasterize.index_buffer_desc = index_buffer; + cmd->kind = G_D12_CmdKind_Draw; + cmd->draw.vs = vs; + cmd->draw.ps = ps; + cmd->draw.instances_count = instances_count; + cmd->draw.index_buffer_desc = index_buffer; for (u32 rt_idx = 0; rt_idx < MinU32(render_targets_count, G_MaxRenderTargets); ++rt_idx) { - cmd->rasterize.render_target_descs[rt_idx] = render_targets[rt_idx]; + cmd->draw.render_target_descs[rt_idx] = render_targets[rt_idx]; } - cmd->rasterize.viewport = viewport; - cmd->rasterize.scissor = scissor; - cmd->rasterize.raster_mode = raster_mode; + cmd->draw.viewport = viewport; + cmd->draw.scissor = scissor; + cmd->draw.draw_mode = draw_mode; } } @@ -3289,21 +3296,11 @@ void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resourc G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); cmd->kind = G_D12_CmdKind_ClearRtv; - cmd->clear_rtv.render_target = G_D12_ResourceFromHandle(resource_handle); + cmd->clear_rtv.resource = G_D12_ResourceFromHandle(resource_handle); cmd->clear_rtv.color = color; cmd->clear_rtv.mip = mip; } -//- Log - -void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Log; - cmd->log.resource = G_D12_ResourceFromHandle(resource_handle); -} - //////////////////////////////////////////////////////////// //~ @hookimpl Queue synchronization @@ -3456,6 +3453,7 @@ void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle) G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size) { G_D12_Swapchain *swapchain = G_D12_SwapchainFromHandle(swapchain_handle); + TempArena scratch = BeginScratchNoConflict(); size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); @@ -3565,13 +3563,13 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma // Initialize backbuffers { - for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) + for (u32 backbuffer_idx = 0; backbuffer_idx < countof(swapchain->backbuffers); ++backbuffer_idx) { - G_D12_Resource *backbuffer = &swapchain->backbuffers[i]; + G_D12_Resource *backbuffer = &swapchain->backbuffers[backbuffer_idx]; if (!backbuffer->d3d_resource) { ID3D12Resource *d3d_resource = 0; - HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource); + HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, backbuffer_idx, &IID_ID3D12Resource, (void **)&d3d_resource); if (FAILED(hr)) { // TODO: Don't panic @@ -3588,8 +3586,9 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma backbuffer->texture_format = format; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mips = 1; - backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; + + G_D12_SetObjectName((ID3D12Object *)backbuffer->d3d_resource, StringF(scratch.arena, "Backbuffer [%F]", FmtUint(backbuffer_idx))); } } swapchain->backbuffers_format = format; @@ -3614,6 +3613,7 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; } + EndScratch(scratch); return G_D12_MakeHandle(G_ResourceHandle, cur_backbuffer); } @@ -3651,6 +3651,32 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) } } +//////////////////////////////////////////////////////////// +//~ Debug + +void G_D12_DebugCallback( + D3D12_MESSAGE_CATEGORY category, + D3D12_MESSAGE_SEVERITY severity, + D3D12_MESSAGE_ID id, + LPCSTR description_cstr, + void *context +) +{ + if (severity <= D3D12_MESSAGE_SEVERITY_ERROR) + { + TempArena scratch = BeginScratchNoConflict(); + { + String description = StringFromCstrNoLimit((char *)description_cstr); + Echo(StringF(scratch.arena, "[D3D12 DEBUG] %F\n", FmtString(description))); + if (DebugBreakPrompt(Lit("D3D12 Debug Break"), description)) + { + DEBUGBREAK; + } + } + EndScratch(scratch); + } +} + //////////////////////////////////////////////////////////// //~ Collection worker @@ -3673,12 +3699,9 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) { // Copy print buffer to readback buffer G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); + G_Sync(cl); + // Reset counters to 0 - G_MemorySync( - cl, queue->print_buffer, - G_Stage_Copy, G_Access_CopyRead, - G_Stage_Copy, G_Access_CopyWrite - ); u8 zero[12] = Zi; G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero))); } diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index b2af8fb9..a77026c2 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -87,7 +87,6 @@ Struct(G_D12_Resource) G_Format texture_format; Vec3I32 texture_dims; i32 texture_mips; - D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips]; // Sampler info G_SamplerDesc sampler_desc; @@ -306,16 +305,14 @@ Enum(G_D12_CmdKind) G_D12_CmdKind_CopyBytes, G_D12_CmdKind_CopyTexels, G_D12_CmdKind_Compute, - G_D12_CmdKind_Rasterize, + G_D12_CmdKind_Draw, G_D12_CmdKind_ClearRtv, - G_D12_CmdKind_Log, G_D12_CmdKind_Discard, }; Struct(G_D12_Cmd) { G_D12_CmdKind kind; - b32 skip; union { struct @@ -326,11 +323,8 @@ Struct(G_D12_Cmd) struct { - G_MemoryBarrierDesc desc; - - // Post-batch data - b32 is_end_of_batch; - u64 batch_gen; + G_D12_Resource *resource; + b32 to_exclusive; } barrier; struct @@ -366,20 +360,15 @@ Struct(G_D12_Cmd) G_RenderTargetDesc render_target_descs[G_MaxRenderTargets]; Rng3 viewport; Rng2 scissor; - G_RasterMode raster_mode; - } rasterize; - - struct - { - G_D12_Resource *render_target; - Vec4 color; - i32 mip; - } clear_rtv; + G_DrawMode draw_mode; + } draw; struct { G_D12_Resource *resource; - } log; + Vec4 color; + i32 mip; + } clear_rtv; struct { @@ -413,6 +402,86 @@ Struct(G_D12_CmdList) u64 cmds_count; }; +//////////////////////////////////////////////////////////// +//~ Command batching types + +// TODO: Use a dynamic bin count, since the maximum number of tracked resources in the list is known at command list creation time +#define G_D12_TrackedResourceBinsCount 64 + +Enum(G_D12_TrackedUsageKind) +{ + G_D12_TrackedUsageKind_Untracked, + G_D12_TrackedUsageKind_MakeExclusive, + G_D12_TrackedUsageKind_DepthStencilRead, + G_D12_TrackedUsageKind_DepthStencilReadWrite, + G_D12_TrackedUsageKind_RenderTarget, + G_D12_TrackedUsageKind_MakeCommon, +}; + +Struct(G_D12_TransitionNode) +{ + G_D12_TransitionNode *next; + G_D12_Resource *resource; + + G_D12_TrackedUsageKind old; + G_D12_TrackedUsageKind new; + RngI32 mips; +}; + +Struct(G_D12_TrackedMip) +{ + G_D12_TrackedUsageKind prev_usage; + G_D12_TrackedUsageKind usage; +}; + +Struct(G_D12_TrackedResourceNode) +{ + G_D12_TrackedResourceNode *next; + G_D12_TrackedResourceNode *next_in_bin; + + u64 hash; + G_D12_Resource *resource; + G_D12_TrackedMip mips[G_MaxMips]; +}; + +Struct(G_D12_TrackedResourceBin) +{ + G_D12_TrackedResourceNode *first; +}; + +Struct(G_D12_BatchedCmdNode) +{ + G_D12_BatchedCmdNode *next; + G_D12_Cmd *cmd; +}; + +Struct(G_D12_CmdBatch) +{ + G_D12_CmdBatch *next; + + u64 tracked_resources_count; + G_D12_TrackedResourceNode *first_tracked_resource; + G_D12_TrackedResourceNode *last_tracked_resource; + G_D12_TrackedResourceBin *tracked_resource_bins; + + u64 transitions_count; + G_D12_TransitionNode *first_transition; + G_D12_TransitionNode *last_transition; + + u64 cmds_count; + G_D12_BatchedCmdNode *first_cmd; + G_D12_BatchedCmdNode *last_cmd; + + b32 contains_hazard; + + b32 contains_compute_shader; + b32 contains_draw_shader; + b32 contains_rtv; + b32 contains_dsv; + b32 contains_indirect; + b32 contains_copy; +}; + //////////////////////////////////////////////////////////// //~ Swapchain types @@ -521,10 +590,7 @@ G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle); G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); -D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages); -D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses); -D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout); -String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout); +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromUsageKind(G_QueueKind queue_kind, G_D12_TrackedUsageKind usage_kind); void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip); @@ -565,6 +631,18 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl); G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v); G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); +void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind); + +//////////////////////////////////////////////////////////// +//~ Debug + +void G_D12_DebugCallback( + D3D12_MESSAGE_CATEGORY category, + D3D12_MESSAGE_SEVERITY severity, + D3D12_MESSAGE_ID id, + LPCSTR description_cstr, + void *context +); //////////////////////////////////////////////////////////// //~ Collection worker diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat deleted file mode 100644 index 9cfa41c0..00000000 --- a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55430c370d4f012ef7b2e7854fd194ed8abb2c94a537835be12bd38f9ff80e67 -size 1662796 diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat deleted file mode 100644 index 222b2b8b..00000000 --- a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd64750d758873691414f705c2fdff08ffd25437f77198d3ee00c9040f48856a -size 1775662 diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/D3D12Core.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/D3D12Core.dat new file mode 100644 index 00000000..483e9b3b --- /dev/null +++ b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/D3D12Core.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f17689c05691a55b08921cf033ac4aa0144c26918ac801727124ef7b0490b8c +size 1689100 diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/d3d12SDKLayers.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/d3d12SDKLayers.dat new file mode 100644 index 00000000..1871728b --- /dev/null +++ b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.619.0/d3d12SDKLayers.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a81ecaeb0fe3a36d2ec1ac2150ea0a280e74a64551671fbc96a693f62b53ca +size 1784546 diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index a2a924f4..cd2b8fae 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -426,7 +426,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R8_Uint, tiles_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory, .name = Lit("Tiles") ); @@ -451,7 +451,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) ); @@ -464,7 +464,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) ); @@ -479,7 +479,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R16G16B16A16_Float, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Stains") ); @@ -491,7 +491,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R16G16B16A16_Float, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Dry stains") ); @@ -503,7 +503,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Float, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Drynesses") ); @@ -515,7 +515,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Occluders cells") ); @@ -5153,7 +5153,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, frame->screen_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) ); @@ -5166,7 +5166,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, frame->screen_dims, - G_Layout_DirectQueue_RenderTarget, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) ); @@ -5177,7 +5177,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 1), - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)), .max_mips = 4 @@ -5193,7 +5193,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 1), - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), .max_mips = G_MaxMips @@ -5208,7 +5208,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_frame_arena, cl, G_Format_R16G16B16A16_Float, frame->shade_dims, - G_Layout_DirectQueue_General, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) ); @@ -5274,7 +5274,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); // Sync - G_DumbGlobalMemorySync(cl); + G_Sync(cl); ////////////////////////////// //- Initialization pass @@ -5306,7 +5306,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_Compute2D(cl, V_BackdropDownCS, down_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } //- Upsample passes @@ -5317,11 +5317,11 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_Compute2D(cl, V_BackdropUpCS, up_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } } - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } ////////////////////////////// @@ -5331,21 +5331,20 @@ void V_TickForever(WaveLaneCtx *lane) G_ClearRenderTarget(cl, albedo_target, VEC4(0, 0, 0, 0), 0); // Draw quads - G_Rasterize( + G_Draw( cl, V_QuadVS, V_QuadPS, G_CountBuffer(quads_buff, V_Quad), G_QuadIndices(), 1, &G_Rt(albedo_target, G_BlendMode_CompositeStraightAlpha), screen_viewport, screen_scissor, - G_RasterMode_TriangleList + G_DrawMode_TriangleList ); // Emit particles G_Compute(cl, V_EmitParticlesCS, frame->emitters_count); // Sync particles, occluders, & albedo - G_DumbGlobalMemorySync(cl); - G_DumbMemoryLayoutSync(cl, albedo_target, G_Layout_DirectQueue_General); + G_Sync(cl); } ////////////////////////////// @@ -5356,7 +5355,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute(cl, V_SimParticlesCS, V_ParticlesCap); // Sync cells - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } ////////////////////////////// @@ -5368,7 +5367,7 @@ void V_TickForever(WaveLaneCtx *lane) { G_Compute2D(cl, V_ShadeCS, frame->shade_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } ////////////////////////////// @@ -5378,7 +5377,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute2D(cl, V_CompositeCS, frame->screen_dims); // Sync screen tex - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } ////////////////////////////// @@ -5400,7 +5399,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_Compute2D(cl, V_BloomDownCS, down_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } //- Upsample passes @@ -5411,7 +5410,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_Compute2D(cl, V_BloomUpCS, up_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } } @@ -5421,7 +5420,7 @@ void V_TickForever(WaveLaneCtx *lane) { G_Compute2D(cl, V_FinalizeCS, frame->screen_dims); - G_DumbGlobalMemorySync(cl); + G_Sync(cl); } ////////////////////////////// @@ -5429,18 +5428,14 @@ void V_TickForever(WaveLaneCtx *lane) if (dvert_idxs_ib.count > 0) { - G_DumbMemoryLayoutSync(cl, screen_target, G_Layout_DirectQueue_RenderTarget); - - G_Rasterize( + G_Draw( cl, V_DVertVS, V_DVertPS, 1, dvert_idxs_ib, 1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha), screen_viewport, screen_scissor, - G_RasterMode_TriangleList + G_DrawMode_TriangleList ); - - G_DumbMemoryLayoutSync(cl, screen_target, G_Layout_DirectQueue_General); } ////////////////////////////// diff --git a/src/proto/proto.c b/src/proto/proto.c index a2145ba9..fa66d486 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -5,6 +5,8 @@ void PT_RunForever(WaveLaneCtx *lane) for (;;) { WND_Frame window_frame = WND_BeginFrame(G_Format_R16G16B16A16_Float, WND_BackbufferSizeMode_MatchWindow); + G_ResourceHandle backbuffer = window_frame.backbuffer; + for (u64 cev_idx = 0; cev_idx < window_frame.controller_events.count; ++cev_idx) { ControllerEvent *cev = &window_frame.controller_events.events[cev_idx]; @@ -14,65 +16,57 @@ void PT_RunForever(WaveLaneCtx *lane) } } + G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); { + G_SyncLayout(cl, backbuffer, G_Layout_Exclusive); + + Vec2I32 final_target_size = window_frame.draw_size; + G_ResourceHandle final_target_res = G_PushTexture2D( + gpu_frame_arena, cl, + G_Format_R16G16B16A16_Float, + final_target_size, + G_Layout_Exclusive, + .flags = G_ResourceFlag_AllowShaderReadWrite + ); + G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res); + + // Prep test pass { - G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); - { - Vec2I32 final_target_size = window_frame.draw_size; - G_ResourceHandle final_target_res = G_PushTexture2D( - gpu_frame_arena, cl, - G_Format_R16G16B16A16_Float, - final_target_size, - G_Layout_DirectQueue_General, - .flags = G_ResourceFlag_AllowShaderReadWrite - ); - G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res); - - // Prep test pass - { - G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target); - G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); - G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp)); - G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target); - G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture()); - } - - // Test pass - { - G_Compute2D(cl, PT_TestCS, final_target_size); - } - G_DumbMemorySync(cl, final_target_res); - - // Prep blit pass - { - G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTarget); - } - - // Blit pass - { - G_Rasterize( - cl, - PT_BlitVS, PT_BlitPS, - 1, G_QuadIndices(), - 1, &G_Rt(window_frame.backbuffer, G_BlendMode_CompositeStraightAlpha), - G_ViewportFromTexture(window_frame.backbuffer), G_ScissorFromTexture(window_frame.backbuffer), - G_RasterMode_TriangleList - ); - } - - // Finalize backbuffer layout - { - G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_Common); - } - - // Reset - { - G_ResetArena(cl, gpu_frame_arena); - } - } - G_CommitCommandList(cl); + G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target); + G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); + G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp)); + G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target); + G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture()); } + + // Test pass + { + G_Compute2D(cl, PT_TestCS, final_target_size); + + G_Sync(cl); + } + + // Blit pass + { + G_Draw( + cl, + PT_BlitVS, PT_BlitPS, + 1, G_QuadIndices(), + 1, &G_Rt(backbuffer, G_BlendMode_CompositeStraightAlpha), + G_ViewportFromTexture(backbuffer), G_ScissorFromTexture(backbuffer), + G_DrawMode_TriangleList + ); + } + + // Reset + { + G_ResetArena(cl, gpu_frame_arena); + } + + G_SyncLayout(cl, backbuffer, G_Layout_Common); } + G_CommitCommandList(cl); + WND_EndFrame(window_frame, 1); } } diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index b622f4a5..f65f8180 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1708,7 +1708,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) UI.gpu_frame_arena, UI.cl, G_Format_R16G16B16A16_Float, monitor_size, - G_Layout_DirectQueue_RenderTarget, + G_Layout_Exclusive, .flags = G_ResourceFlag_AllowRenderTarget, .name = Lit("UI draw target") ); @@ -1740,11 +1740,11 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) ); G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(UI.gpu_frame_arena, params_buff, UI_GpuParams); - // Initial constants + // Init constants G_SetConstant(UI.cl, UI_GpuConst_Params, params_ro); // Sync - G_DumbGlobalMemorySync(UI.cl); + G_Sync(UI.cl); ////////////////////////////// //- Dispatch shaders @@ -1760,47 +1760,44 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) if (rects_count > 0) { // Render rects - G_Rasterize( + G_Draw( UI.cl, UI_DRectVS, UI_DRectPS, rects_count, G_QuadIndices(), 1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha), draw_viewport, draw_scissor, - G_RasterMode_TriangleList + G_DrawMode_TriangleList ); // Render rect wireframes if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug)) { G_SetConstant(UI.cl, UI_GpuConst_DebugDraw, 1); - G_Rasterize( + G_Draw( UI.cl, UI_DRectVS, UI_DRectPS, rects_count, G_QuadIndices(), 1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha), draw_viewport, draw_scissor, - G_RasterMode_WireTriangleList + G_DrawMode_WireTriangleList ); } } //- Backbuffer blit pass - G_DumbMemoryLayoutSync(UI.cl, draw_target, G_Layout_DirectQueue_Read); - G_DumbMemoryLayoutSync(UI.cl, backbuffer, G_Layout_DirectQueue_RenderTarget); - + G_SyncLayout(UI.cl, backbuffer, G_Layout_Exclusive); { - G_Rasterize( + G_Draw( UI.cl, UI_BlitVS, UI_BlitPS, 1, G_QuadIndices(), 1, &G_Rt(backbuffer, G_BlendMode_Opaque), monitor_viewport, monitor_scissor, - G_RasterMode_TriangleList + G_DrawMode_TriangleList ); } - - G_DumbMemoryLayoutSync(UI.cl, backbuffer, G_Layout_Common); + G_SyncLayout(UI.cl, backbuffer, G_Layout_Common); } //////////////////////////////