diff --git a/src/gpu_old/gpu.lay b/src/gpu_old/gpu.lay deleted file mode 100644 index 8250e805..00000000 --- a/src/gpu_old/gpu.lay +++ /dev/null @@ -1,30 +0,0 @@ -@Layer gpu_old - -////////////////////////////// -//- Dependencies - -@Dep platform - -////////////////////////////// -//- Resources - -@EmbedDir G_Resources gpu_res - -////////////////////////////// -//- Api - -@IncludeC gpu_shared.cgh -@IncludeC gpu_core.h -@IncludeC gpu_common.h - -@IncludeG gpu_shared.cgh - -@Bootstrap G_Bootstrap -@Bootstrap G_BootstrapCommon - -////////////////////////////// -//- Impl - -@IncludeC gpu_common.c - -@DefaultDownstream Win32 gpu_dx12 diff --git a/src/gpu_old/gpu_common.c b/src/gpu_old/gpu_common.c deleted file mode 100644 index 9c732b11..00000000 --- a/src/gpu_old/gpu_common.c +++ /dev/null @@ -1,264 +0,0 @@ -G_Ctx G = Zi; -ThreadLocal G_ThreadLocalCtx G_tl = Zi; - -//////////////////////////////////////////////////////////// -//~ Bootstrap - -void G_BootstrapCommon(void) -{ - G_ArenaHandle gpu_perm = G_PermArena(); - - G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); - { - // Init quad index buffer - { - G_ResourceHandle quad_indices = Zi; - u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; - quad_indices = G_PushBuffer(gpu_perm, cl, u16, countof(quad_data)); - G_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data))); - G.quad_indices = G_IdxBuff16(quad_indices); - } - - // Init blank texture - { - G_ResourceHandle blank_tex = G_PushTexture2D( - gpu_perm, cl, - G_Format_R8G8B8A8_Unorm, - VEC2I32(8, 8), - G_Layout_Common, - .flags = G_ResourceFlag_ZeroMemory, - .name = Lit("Blank texture") - ); - G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); - } - - // Init noise texture - { - G_ResourceHandle noise_tex = Zi; - String noise_data = DataFromResource(ResourceKeyFromStore(&G_Resources, Lit("noise_128x128x64_16.dat"))); - Vec3I32 noise_dims = VEC3I32(128, 128, 64); - if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2) - { - Panic(Lit("Unexpected noise texture size")); - } - noise_tex = G_PushTexture3D( - gpu_perm, cl, - G_Format_R16_Uint, - noise_dims, - G_Layout_Common, - .name = Lit("Noise texture") - ); - G_CopyCpuToTexture( - cl, - noise_tex, VEC3I32(0, 0, 0), - noise_data.text, noise_dims, - RNG3I32(VEC3I32(0, 0, 0), noise_dims) - ); - G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex); - } - - // Init basic samplers - for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind) - { - G_SamplerStateRef sampler = Zi; - switch (sampler_kind) - { - default: - { - // Sampler unspecified - Assert(0); - } FALLTHROUGH; - case G_BasicSamplerKind_PointClamp: - { - G_Filter filter = G_Filter_MinMagMipPoint; - G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_PointWrap: - { - G_Filter filter = G_Filter_MinMagMipPoint; - G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_PointMirror: - { - G_Filter filter = G_Filter_MinMagMipPoint; - G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_BilinearClamp: - { - G_Filter filter = G_Filter_MinMagLinearMipPoint; - G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_BilinearWrap: - { - G_Filter filter = G_Filter_MinMagLinearMipPoint; - G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_BilinearMirror: - { - G_Filter filter = G_Filter_MinMagLinearMipPoint; - G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_TrilinearClamp: - { - G_Filter filter = G_Filter_MinMagMipLinear; - G_AddressMode address_mode = G_AddressMode_Clamp; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_TrilinearWrap: - { - G_Filter filter = G_Filter_MinMagMipLinear; - G_AddressMode address_mode = G_AddressMode_Wrap; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - case G_BasicSamplerKind_TrilinearMirror: - { - G_Filter filter = G_Filter_MinMagMipLinear; - G_AddressMode address_mode = G_AddressMode_Mirror; - G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode); - sampler = G_PushSamplerStateRef(gpu_perm, sampler_res); - } break; - } - G.basic_samplers[sampler_kind] = sampler; - } - } - G_CommitCommandList(cl); - G_QueueSync(G_QueueMask_Direct, G_QueueMask_All); -} - -//////////////////////////////////////////////////////////// -//~ Utils - -//- Arena - -G_ArenaHandle G_PermArena(void) -{ - if (G_IsArenaNil(G_tl.gpu_perm)) - { - G_tl.gpu_perm = G_AcquireArena(); - } - return G_tl.gpu_perm; -} - -//- Push resource from cpu - -G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc) -{ - G_ResourceHandle buffer = G_PushResource(gpu_arena, cl, (G_ResourceDesc) { .kind = G_ResourceKind_Buffer, .buffer = desc }); - G_CopyCpuToBuffer(cl, buffer, 0, src.text, RNGU64(0, src.len)); - return buffer; -} - -//- Mip - -i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip) -{ - mip = ClampI32(mip, -31, 31); - i32 result = 0; - if (mip >= 0) - { - result = MaxI32(result >> mip, 1); - } - else - { - result = MaxI32(result << -mip, 1); - } - return result; -} - -Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip) -{ - mip = ClampI32(mip, -31, 31); - Vec2I32 result = Zi; - if (mip >= 0) - { - result.x = MaxI32(mip0_dims.x >> mip, 1); - result.y = MaxI32(mip0_dims.y >> mip, 1); - } - else - { - result.x = MaxI32(mip0_dims.x << -mip, 1); - result.y = MaxI32(mip0_dims.y << -mip, 1); - } - return result; -} - -Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip) -{ - mip = ClampI32(mip, -31, 31); - Vec3I32 result = Zi; - if (mip >= 0) - { - result.x = MaxI32(mip0_dims.x >> mip, 1); - result.y = MaxI32(mip0_dims.y >> mip, 1); - result.z = MaxI32(mip0_dims.z >> mip, 1); - } - else - { - result.x = MaxI32(mip0_dims.x << -mip, 1); - result.y = MaxI32(mip0_dims.y << -mip, 1); - result.z = MaxI32(mip0_dims.z << -mip, 1); - } - return result; -} - -//- Thread count - -Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads) -{ - return VEC3I32( - (threads.x + cs.x - 1) / cs.x, - (threads.y + cs.y - 1) / cs.y, - (threads.z + cs.z - 1) / cs.z - ); -} - -//- Viewport / scissor - -Rng3 G_ViewportFromTexture(G_ResourceHandle texture) -{ - Vec2I32 dims = G_Count2D(texture); - return RNG3(VEC3(0, 0, 0), VEC3(dims.x, dims.y, 1)); -} - -Rng2 G_ScissorFromTexture(G_ResourceHandle texture) -{ - Vec2I32 dims = G_Count2D(texture); - return RNG2(VEC2(0, 0), VEC2(dims.x, dims.y)); -} - -//- Shared resources - - -G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind) -{ - return G.basic_samplers[kind]; -} - -G_IndexBufferDesc G_QuadIndices(void) -{ - return G.quad_indices; -} - -G_Texture2DRef G_BlankTexture2D(void) -{ - return G.blank_tex; -} - -G_Texture3DRef G_BasicNoiseTexture(void) -{ - return G.basic_noise; -} diff --git a/src/gpu_old/gpu_common.h b/src/gpu_old/gpu_common.h deleted file mode 100644 index 7af854b0..00000000 --- a/src/gpu_old/gpu_common.h +++ /dev/null @@ -1,53 +0,0 @@ -//////////////////////////////////////////////////////////// -//~ State types - -Struct(G_Ctx) -{ - // Common shared resources - G_IndexBufferDesc quad_indices; - G_Texture2DRef blank_tex; - G_Texture3DRef basic_noise; - G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT]; -}; - -Struct(G_ThreadLocalCtx) -{ - G_ArenaHandle gpu_perm; -}; - -extern G_Ctx G; -extern ThreadLocal G_ThreadLocalCtx G_tl; - -//////////////////////////////////////////////////////////// -//~ Bootstrap - -void G_BootstrapCommon(void); - -//////////////////////////////////////////////////////////// -//~ Utils - -//- Arena -G_ArenaHandle G_PermArena(void); - -//- Push resource from cpu -G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc); -#define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \ - G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ }) - -//- Mip -i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip); -Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip); -Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip); - -//- Thread count -Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads); - -//- Viewport / scissor -Rng3 G_ViewportFromTexture(G_ResourceHandle texture); -Rng2 G_ScissorFromTexture(G_ResourceHandle texture); - -//- Shared resources -G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind); -G_IndexBufferDesc G_QuadIndices(void); -G_Texture2DRef G_BlankTexture2D(void); -G_Texture3DRef G_BasicNoiseTexture(void); diff --git a/src/gpu_old/gpu_core.h b/src/gpu_old/gpu_core.h deleted file mode 100644 index ccee6c07..00000000 --- a/src/gpu_old/gpu_core.h +++ /dev/null @@ -1,795 +0,0 @@ -//////////////////////////////////////////////////////////// -//~ Handle types - -Struct(G_ArenaHandle) { u64 v; }; -Struct(G_CommandListHandle) { u64 v; }; -Struct(G_ResourceHandle) { u64 v; }; -Struct(G_SwapchainHandle) { u64 v; }; - -#define G_IsArenaNil(h) ((h).v == 0) -#define G_IsCommandListNil(h) ((h).v == 0) -#define G_IsResourceNil(h) ((h).v == 0) -#define G_IsSwapchainNil(h) ((h).v == 0) - -//////////////////////////////////////////////////////////// -//~ Queue types - -#define G_IsMultiQueueEnabled 1 - -Enum(G_QueueKind) -{ - G_QueueKind_Direct = 0, -#if G_IsMultiQueueEnabled - G_QueueKind_AsyncCompute = 1, - G_QueueKind_AsyncCopy = 2, -#else - G_QueueKind_AsyncCompute = G_QueueKind_Direct, - G_QueueKind_AsyncCopy = G_QueueKind_Direct, -#endif - G_QueueKind_COUNT -}; - -Enum(G_QueueMask) -{ - G_QueueMask_None = 0, - G_QueueMask_Direct = (1 << 0), -#if G_IsMultiQueueEnabled - G_QueueMask_AsyncCompute = (1 << 1), - G_QueueMask_AsyncCopy = (1 << 2), -#else - G_QueueMask_AsyncCompute = G_QueueMask_Direct, - G_QueueMask_AsyncCopy = G_QueueMask_Direct, -#endif - G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT)) -}; -#define G_QueueMaskFromKind(queue_kind) (1 << queue_kind) - -Struct(G_QueueCompletions) -{ - i64 v[G_QueueKind_COUNT]; // Array of completions indexed by queue kind -}; - -// All waiters will wait until specified queues reach their value in the `completions` array -Struct(G_QueueBarrierDesc) -{ - G_QueueCompletions completions; // Completions that waiters should wait for - G_QueueMask wait_queues; // Mask of queues that will wait for completions - b32 wait_cpu; // Will the cpu wait for completion -}; - -//////////////////////////////////////////////////////////// -//~ Format types - -// NOTE: Matches DirectX DXGI_FORMAT -Enum(G_Format) -{ - G_Format_Unknown = 0, - G_Format_R32G32B32A32_Typeless = 1, - G_Format_R32G32B32A32_Float = 2, - G_Format_R32G32B32A32_Uint = 3, - G_Format_R32G32B32A32_Sint = 4, - G_Format_R32G32B32_Typeless = 5, - G_Format_R32G32B32_Float = 6, - G_Format_R32G32B32_Uint = 7, - G_Format_R32G32B32_Sint = 8, - G_Format_R16G16B16A16_Typeless = 9, - G_Format_R16G16B16A16_Float = 10, - G_Format_R16G16B16A16_Unorm = 11, - G_Format_R16G16B16A16_Uint = 12, - G_Format_R16G16B16A16_Snorm = 13, - G_Format_R16G16B16A16_Sint = 14, - G_Format_R32G32_Typeless = 15, - G_Format_R32G32_Float = 16, - G_Format_R32G32_Uint = 17, - G_Format_R32G32_Sint = 18, - G_Format_R32G8X24_Typeless = 19, - G_Format_D32_Float_S8X24_Uint = 20, - G_Format_R32_Float_X8X24_Typeless = 21, - G_Format_X32_Typeless_G8X24_Uint = 22, - G_Format_R10G10B10A2_Typeless = 23, - G_Format_R10G10B10A2_Unorm = 24, - G_Format_R10G10B10A2_Uint = 25, - G_Format_R11G11B10_Float = 26, - G_Format_R8G8B8A8_Typeless = 27, - G_Format_R8G8B8A8_Unorm = 28, - G_Format_R8G8B8A8_Unorm_Srgb = 29, - G_Format_R8G8B8A8_Uint = 30, - G_Format_R8G8B8A8_Snorm = 31, - G_Format_R8G8B8A8_Sint = 32, - G_Format_R16G16_Typeless = 33, - G_Format_R16G16_Float = 34, - G_Format_R16G16_Unorm = 35, - G_Format_R16G16_Uint = 36, - G_Format_R16G16_Snorm = 37, - G_Format_R16G16_Sint = 38, - G_Format_R32_Typeless = 39, - G_Format_D32_Float = 40, - G_Format_R32_Float = 41, - G_Format_R32_Uint = 42, - G_Format_R32_Sint = 43, - G_Format_R24G8_Typeless = 44, - G_Format_D24_Unorm_S8_Uint = 45, - G_Format_R24_Unorm_X8_Typeless = 46, - G_Format_X24_Typeless_G8_Uint = 47, - G_Format_R8G8_Typeless = 48, - G_Format_R8G8_Unorm = 49, - G_Format_R8G8_Uint = 50, - G_Format_R8G8_Snorm = 51, - G_Format_R8G8_Sint = 52, - G_Format_R16_Typeless = 53, - G_Format_R16_Float = 54, - G_Format_D16_Unorm = 55, - G_Format_R16_Unorm = 56, - G_Format_R16_Uint = 57, - G_Format_R16_Snorm = 58, - G_Format_R16_Sint = 59, - G_Format_R8_Typeless = 60, - G_Format_R8_Unorm = 61, - G_Format_R8_Uint = 62, - G_Format_R8_Snorm = 63, - G_Format_R8_Sint = 64, - G_Format_A8_Unorm = 65, - G_Format_R1_Unorm = 66, - G_Format_R9G9B9E5_SharedXP = 67, - G_Format_R8G8_B8G8_Unorm = 68, - G_Format_G8R8_G8B8_Unorm = 69, - G_Format_BC1_Typeless = 70, - G_Format_BC1_Unorm = 71, - G_Format_BC1_Unorm_Srgb = 72, - G_Format_BC2_Typeless = 73, - G_Format_BC2_Unorm = 74, - G_Format_BC2_Unorm_Srgb = 75, - G_Format_BC3_Typeless = 76, - G_Format_BC3_Unorm = 77, - G_Format_BC3_Unorm_Srgb = 78, - G_Format_BC4_Typeless = 79, - G_Format_BC4_Unorm = 80, - G_Format_BC4_Snorm = 81, - G_Format_BC5_Typeless = 82, - G_Format_BC5_Unorm = 83, - G_Format_BC5_Snorm = 84, - G_Format_B5G6R5_Unorm = 85, - G_Format_B5G5R5A1_Unorm = 86, - G_Format_B8G8R8A8_Unorm = 87, - G_Format_B8G8R8X8_Unorm = 88, - G_Format_R10G10B10_XR_BIAS_A2_Unorm = 89, - G_Format_B8G8R8A8_Typeless = 90, - G_Format_B8G8R8A8_Unorm_Srgb = 91, - G_Format_B8G8R8X8_Typeless = 92, - G_Format_B8G8R8X8_Unorm_Srgb = 93, - G_Format_BC6H_Typeless = 94, - G_Format_BC6H_UF16 = 95, - G_Format_BC6H_SF16 = 96, - G_Format_BC7_Typeless = 97, - G_Format_BC7_Unorm = 98, - G_Format_BC7_Unorm_Srgb = 99, - G_Format_AYUV = 100, - G_Format_Y410 = 101, - G_Format_Y416 = 102, - G_Format_NV12 = 103, - G_Format_P010 = 104, - G_Format_P016 = 105, - G_Format_420_Opaque = 106, - G_Format_YUY2 = 107, - G_Format_Y210 = 108, - G_Format_Y216 = 109, - G_Format_NV11 = 110, - G_Format_AI44 = 111, - G_Format_IA44 = 112, - G_Format_P8 = 113, - G_Format_A8P8 = 114, - G_Format_B4G4R4A4_Unorm = 115, - G_Format_P208 = 130, - G_Format_V208 = 131, - G_Format_V408 = 132, - G_Format_SamplerFeedbackMinMipOpaque = 189, - G_Format_SamplerFeedbackMipRegionUsedOpaque = 190, - G_Format_A4B4G4R4_Unorm = 191, - G_Format_COUNT = 192 -}; - -//////////////////////////////////////////////////////////// -//~ Memory sync types - -Enum(G_Stage) -{ - G_Stage_None = 0, - - // Compute stages - G_Stage_ComputeShading = (1 << 1), - - // Draw stages - G_Stage_IndexAssembly = (1 << 2), - G_Stage_VertexShading = (1 << 3), - G_Stage_PixelShading = (1 << 4), - G_Stage_DepthStencil = (1 << 5), - G_Stage_RenderTarget = (1 << 6), - - // Copy stages - G_Stage_Copy = (1 << 7), - - // Indirect stages - G_Stage_Indirect = (1 << 8), - - // Aggregate stages - G_Stage_Drawing = G_Stage_IndexAssembly | - G_Stage_VertexShading | - G_Stage_PixelShading | - G_Stage_DepthStencil | - G_Stage_RenderTarget, - - G_Stage_Shading = G_Stage_ComputeShading | - G_Stage_VertexShading | - G_Stage_PixelShading, - - G_Stage_All = 0xFFFFFFFF -}; - -Enum(G_Access) -{ - G_Access_None = 0, - - G_Access_ShaderReadWrite = (1 << 1), - G_Access_ShaderRead = (1 << 2), - - G_Access_CopyWrite = (1 << 3), - G_Access_CopyRead = (1 << 4), - - G_Access_DepthStencilRead = (1 << 5), - G_Access_DepthStencilWrite = (1 << 6), - G_Access_RenderTargetWrite = (1 << 7), - - G_Access_IndexBuffer = (1 << 8), - G_Access_IndirectArgument = (1 << 9), - - G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier -}; - -Enum(G_Layout) -{ - G_Layout_NoChange, - G_Layout_Undefined, - - ////////////////////////////// - //- Queue-agnostic - - // Simultaneous layout allows a resource to be used on any queue with any - // access type (except depth-stencil). Resources cannot transition to/from - // this layout, they must be created with it. Allows concurrent reads - // with up to 1 write to non-overlapping regions. - G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS - G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON - - ////////////////////////////// - //- Direct queue - - G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON - G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ - G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE - G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET - - ////////////////////////////// - //- Compute queue - - G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON - - ////////////////////////////// - //- Direct & Compute queue - - G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ - G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS - G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST -}; - -// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`. -// When barrier executes: -// - Necessary resource flushes will occur based on `access_prev` & `access_next` -// - Texture layout will transition based on `layout` (if specified) -Struct(G_MemoryBarrierDesc) -{ - G_ResourceHandle resource; - b32 is_global; - G_Stage stage_prev; - G_Stage stage_next; - G_Access access_prev; - G_Access access_next; - G_Layout layout; - RngI32 mips; // Inclusive range of texture mip levels to sync -}; - -//////////////////////////////////////////////////////////// -//~ Filter types - -// NOTE: Matches DirectX D3D12_FILTER -Enum(G_Filter) -{ - // Standard filter - G_Filter_MinMagMipPoint = 0, - G_Filter_MinMagPointMipLinear = 0x1, - G_Filter_MinPointMagLinearMipPoint = 0x4, - G_Filter_MinPointMagMipLinear = 0x5, - G_Filter_MinLinearMagMipPoint = 0x10, - G_Filter_MinLinearMagPointMipLinear = 0x11, - G_Filter_MinMagLinearMipPoint = 0x14, - G_Filter_MinMagMipLinear = 0x15, - G_Filter_MinMagAnisotropicMipPoint = 0x54, - G_Filter_Anisotropic = 0x55, - - // Comparison filter - G_Filter_Comparison_MinMagMipPoint = 0x80, - G_Filter_Comparison_MinMagPointMipLinear = 0x81, - G_Filter_Comparison_MinPointMagLinearMipPoint = 0x84, - G_Filter_Comparison_MinPointMagMipLinear = 0x85, - G_Filter_Comparison_MinLinearMagMipPoint = 0x90, - G_Filter_Comparison_MinLinearMagPointMipLinear = 0x91, - G_Filter_Comparison_MinMagLinearMipPoint = 0x94, - G_Filter_Comparison_MinMagMipLinear = 0x95, - G_Filter_Comparison_MinMagAnisotropicMipPoint = 0xd4, - G_Filter_Comparison_Anisotropic = 0xd5, - - // Minimum filter - G_Filter_Minimum_MinMagMipPoint = 0x100, - G_Filter_Minimum_MinMagPointMipLinear = 0x101, - G_Filter_Minimum_MinPointMagLinearMipPoint = 0x104, - G_Filter_Minimum_MinPointMagMipLinear = 0x105, - G_Filter_Minimum_MinLinearMagMipPoint = 0x110, - G_Filter_Minimum_MinLinearMagPointMipLinear = 0x111, - G_Filter_Minimum_MinMagLinearMipPoint = 0x114, - G_Filter_Minimum_MinMagMipLinear = 0x115, - G_Filter_Minimum_MinMagAnisotropicMipPoint = 0x155, - G_Filter_Minimum_Anisotropic = 0x155, - - // Maximum filter - G_Filter_Maximum_MinMagMipPoint = 0x180, - G_Filter_Maximum_MinMagPointMipLinear = 0x181, - G_Filter_Maximum_MinPointMagLinearMipPoint = 0x184, - G_Filter_Maximum_MinPointMagMipLinear = 0x185, - G_Filter_Maximum_MinLinearMagMipPoint = 0x190, - G_Filter_Maximum_MinLinearMagPointMipLinear = 0x191, - G_Filter_Maximum_MinMagLinearMipPoint = 0x194, - G_Filter_Maximum_MinMagMipLinear = 0x195, - G_Filter_Maximum_MinMagAnisotropicMipPoint = 0x1d4, - G_Filter_Maximum_Anisotropic = 0x1d5 -}; - -// NOTE: Matches DirectX D3D12_TEXTURE_ADDRESS_MODE -Enum(G_AddressMode) -{ - G_AddressMode_Wrap = 1, - G_AddressMode_Mirror = 2, - G_AddressMode_Clamp = 3, // Default - G_AddressMode_Border = 4, - G_AddressMode_MirrorOnce = 5 -}; - -// NOTE: Matches DirectX D3D12_COMPARISON_FUNC -Enum(G_ComparisonFunc) -{ - G_ComparisonFunc_None = 0, - G_ComparisonFunc_Never = 1, - G_ComparisonFunc_Less = 2, - G_ComparisonFunc_Equal = 3, - G_ComparisonFunc_LessEqual = 4, - G_ComparisonFunc_Greater = 5, - G_ComparisonFunc_NotEqual = 6, - G_ComparisonFunc_GreaterEqual = 7, - G_ComparisonFunc_Always = 8 -}; - -//////////////////////////////////////////////////////////// -//~ Resource types - -Enum(G_ResourceKind) -{ - G_ResourceKind_Buffer, - G_ResourceKind_Texture1D, - G_ResourceKind_Texture2D, - G_ResourceKind_Texture3D, - G_ResourceKind_Sampler, -}; - -Enum(G_ResourceFlag) -{ - G_ResourceFlag_None = 0, - G_ResourceFlag_AllowShaderReadWrite = (1 << 0), - G_ResourceFlag_AllowRenderTarget = (1 << 1), - G_ResourceFlag_AllowDepthStencil = (1 << 2), - G_ResourceFlag_ZeroMemory = (1 << 3), - G_ResourceFlag_HostMemory = (1 << 4), // Resource will be mapped into the cpu's address space - G_ResourceFlag_Uncached = (1 << 5), // Cpu writes will be combined & reads will be uncached - G_ResourceFlag_ForceNoReuse = (1 << 6), -}; - -Struct(G_BufferDesc) -{ - G_ResourceFlag flags; - u64 size; - String name; -}; - -Struct(G_TextureDesc) -{ - G_ResourceFlag flags; - G_Format format; - Vec3I32 dims; - G_Layout initial_layout; - Vec4 clear_color; - i32 max_mips; // Will be clamped to range [1, max mips] - String name; -}; - -Struct(G_SamplerDesc) -{ - G_ResourceFlag flags; - G_Filter filter; - G_AddressMode x; - G_AddressMode y; - G_AddressMode z; - f32 mip_lod_bias; - u32 max_anisotropy; - G_ComparisonFunc comparison; - Vec4 border_color; - f32 min_lod; - f32 max_lod; - String name; -}; - -Struct(G_ResourceDesc) -{ - G_ResourceKind kind; - G_BufferDesc buffer; - G_TextureDesc texture; - G_SamplerDesc sampler; -}; - -//////////////////////////////////////////////////////////// -//~ Ref types - -Struct(G_RefDesc) -{ - G_RefKind kind; - u64 element_size; - u64 element_offset; - RngI32 mips; // Inclusive range of texture mip indices to reference -}; - -//////////////////////////////////////////////////////////// -//~ Rasterization types - -Enum(G_RasterMode) -{ - G_RasterMode_None, - G_RasterMode_PointList, - G_RasterMode_LineList, - G_RasterMode_LineStrip, - G_RasterMode_TriangleList, - G_RasterMode_TriangleStrip, - G_RasterMode_WireTriangleList, - G_RasterMode_WireTriangleStrip, -}; - -Enum(G_BlendMode) -{ - G_BlendMode_Opaque, - G_BlendMode_CompositeStraightAlpha, - G_BlendMode_CompositePremultipliedAlpha, -}; - -Struct(G_IndexBufferDesc) -{ - u32 count; - u32 stride; // Either 2 for u16 indices, or 4 for u32 indices - G_ResourceHandle resource; -}; - -Struct(G_RenderTargetDesc) -{ - G_ResourceHandle resource; - G_BlendMode blend; - i32 mip; -}; - -//////////////////////////////////////////////////////////// -//~ Statistic types - -Struct(G_Stats) -{ - // Memory usage - u64 device_committed; - u64 device_budget; - u64 host_committed; - u64 host_budget; - - // Other stats - u64 arenas_count; - u64 cumulative_nonreuse_count; -}; - -//////////////////////////////////////////////////////////// -//~ @hookdecl Bootstrap - -void G_Bootstrap(void); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Arena - -G_ArenaHandle G_AcquireArena(void); -void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena); -void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Resource - -//- Resource creation - -G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_ResourceDesc desc); - -#define G_PushBuffer(arena, cl, _type, _count, ...) G_PushResource((arena), (cl), \ - (G_ResourceDesc) { \ - .kind = G_ResourceKind_Buffer, \ - .buffer = { \ - .size = sizeof(_type) * (_count), \ - __VA_ARGS__ \ - } \ - } \ -) - -#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ - (G_ResourceDesc) { \ - .kind = G_ResourceKind_Texture1D, \ - .texture = { \ - .format = (_format), \ - .dims = VEC3I32((_size), 1, 1), \ - .initial_layout = (_initial_layout), \ - __VA_ARGS__ \ - } \ - } \ -) - -#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ - (G_ResourceDesc) { \ - .kind = G_ResourceKind_Texture2D, \ - .texture = { \ - .format = (_format), \ - .dims = VEC3I32((_size).x, (_size).y, 1), \ - .initial_layout = (_initial_layout), \ - __VA_ARGS__ \ - } \ - } \ -) - -#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ - (G_ResourceDesc) { \ - .kind = G_ResourceKind_Texture3D, \ - .texture = { \ - .format = (_format), \ - .dims = (_size), \ - .initial_layout = (_initial_layout), \ - __VA_ARGS__ \ - } \ - } \ -) - -#define G_PushSampler(arena, cl, ...) G_PushResource((arena), (cl), \ - (G_ResourceDesc) { \ - .kind = G_ResourceKind_Sampler, \ - .sampler = { \ - .filter = G_Filter_MinMagMipPoint, \ - __VA_ARGS__ \ - } \ - } \ -) - -//- Index buffer helpers - -#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) }) -#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) }) - -//- Render target helpers - -#define G_Rt(_res, _blend_mode) ((G_RenderTargetDesc) { .resource = (_res), .blend = (_blend_mode) }) - -//- Count - -u64 G_CountBufferBytes(G_ResourceHandle buffer); -i32 G_Count1D(G_ResourceHandle texture); -Vec2I32 G_Count2D(G_ResourceHandle texture); -Vec3I32 G_Count3D(G_ResourceHandle texture); -i32 G_CountWidth(G_ResourceHandle texture); -i32 G_CountHeight(G_ResourceHandle texture); -i32 G_CountDepth(G_ResourceHandle texture); -i32 G_CountMips(G_ResourceHandle texture); - -#define G_CountBuffer(buffer, type) G_CountBufferBytes(buffer) / sizeof(type) - -//- Map - -void *G_HostPointerFromResource(G_ResourceHandle resource); -#define G_StructFromResource(resource, type) (type *)G_HostPointerFromResource(resource) - -//////////////////////////////////////////////////////////// -//~ @hookdecl Shader resource reference - -u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); - -#define G_PushStructuredBufferRef(arena, resource, type, ...) (G_StructuredBufferRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_StructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \ - ) \ -} - -#define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_ByteAddressBuffer, __VA_ARGS__ } \ - ) \ -} - -#define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_Texture1D, .mips.max = G_MaxMips, __VA_ARGS__ } \ - ) \ -} - -#define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_Texture2D, .mips.max = G_MaxMips, __VA_ARGS__ } \ - ) \ -} - -#define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_Texture3D, .mips.max = G_MaxMips, __VA_ARGS__ } \ - ) \ -} - -#define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \ - .v = G_PushRef( \ - (arena), (resource), \ - (G_RefDesc) { .kind = G_RefKind_SamplerState, __VA_ARGS__ } \ - ) \ -} - -//////////////////////////////////////////////////////////// -//~ @hookdecl Command - -//- Command list - -G_CommandListHandle G_PrepareCommandList(G_QueueKind queue); -i64 G_CommitCommandList(G_CommandListHandle cl); - -//- Cpu -> Gpu staged copy - -void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); -void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range); - -//- Gpu <-> Gpu copy - -void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range); -void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset); -void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); -void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); - -//- Constant - -void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size); - -#define G_SetConstant(cl, name, value) do { \ - CAT(name, __shaderconstanttype) __src; \ - __src.v = value; \ - G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ - } while (0) - -//- Memory sync - -void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); - -#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .resource = (_resource), \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .resource = (_resource), \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .layout = _layout, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ - G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ - .is_global = 1, \ - .stage_prev = _stage_prev, \ - .access_prev = _access_prev, \ - .stage_next = _stage_next, \ - .access_next = _access_next, \ - .mips.max = G_MaxMips, \ - __VA_ARGS__ \ - }) - -#define G_DumbMemorySync(cl, resource, ...) \ - G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) - -#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \ - G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__) - -#define G_DumbGlobalMemorySync(cl, ...) \ - G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) - -//- Compute - -void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads); - -#define G_Compute(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads), 1, 1)) -#define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1)) -#define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z)) - -//- Rasterize - -void G_Rasterize( - G_CommandListHandle cl, - VertexShaderDesc vs, PixelShaderDesc ps, - u32 instances_count, G_IndexBufferDesc index_buffer, - u32 render_targets_count, G_RenderTargetDesc *render_targets, - Rng3 viewport, Rng2 scissor, - G_RasterMode raster_mode -); - -//- Clear - -void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip); - -//- Log - -void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Queue synchronization - -i64 G_CompletionValueFromQueue(G_QueueKind queue_kind); -i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind); -G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask); -G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask); - -void G_QueueSyncEx(G_QueueBarrierDesc desc); - -#define G_QueueSync(completion_mask, ...) \ - G_QueueSyncEx((G_QueueBarrierDesc) { \ - .completions = G_CompletionTargetsFromQueues(completion_mask), \ - __VA_ARGS__ \ - }) - -#define G_QueueSyncGpu(completion_mask, wait_mask) G_QueueSync((completion_mask), .wait_queues = (wait_mask)) -#define G_QueueSyncCpu(completion_mask) G_QueueSync((completion_mask), .wait_cpu = 1); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Statistics - -G_Stats G_QueryStats(void); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Swapchain - -G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle); -void G_ReleaseSwapchain(G_SwapchainHandle swapchain); - -// Waits until a new backbuffer is ready from the swapchain. -// This should be called before rendering for minimum latency. -G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size); - -void G_CommitBackbuffer(G_ResourceHandle backbuffer, i32 vsync); diff --git a/src/gpu_old/gpu_dx12/gpu_dx12.lay b/src/gpu_old/gpu_dx12/gpu_dx12.lay deleted file mode 100644 index f70d3798..00000000 --- a/src/gpu_old/gpu_dx12/gpu_dx12.lay +++ /dev/null @@ -1,16 +0,0 @@ -@Layer gpu_dx12_old - -////////////////////////////// -//- Resources - -@EmbedDir G_D12_Resources gpu_dx12_res - -////////////////////////////// -//- Api - -@IncludeC gpu_dx12_core.h - -////////////////////////////// -//- Impl - -@IncludeC gpu_dx12_core.c diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_core.c b/src/gpu_old/gpu_dx12/gpu_dx12_core.c deleted file mode 100644 index 95b17f53..00000000 --- a/src/gpu_old/gpu_dx12/gpu_dx12_core.c +++ /dev/null @@ -1,3950 +0,0 @@ -G_D12_Ctx G_D12 = Zi; -ThreadLocal G_D12_ThreadLocalCtx G_D12_tl = Zi; - -//////////////////////////////////////////////////////////// -//~ @hookimpl Bootstrap - -void G_Bootstrap(void) -{ - TempArena scratch = BeginScratchNoConflict(); - Arena *perm = PermArena(); - - // NOTE: Nsight seems to have trouble attaching when independent devices are enabled - G_D12.independent_devices_enabled = !CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists; - G_D12.validation_layer_enabled = CommandlineArgFromName(Lit("gpu-debug-validation")).exists; - G_D12.debug_layer_enabled = G_D12.validation_layer_enabled || CommandlineArgFromName(Lit("gpu-debug")).exists; - - if (G_D12.independent_devices_enabled && IsRunningInWine()) - { - LogInfoF("Wine detected, disabling D3D12 independent devices"); - G_D12.independent_devices_enabled = 0; - } - - LogInfoF("D3D12 independent devices enabled: %F", FmtSint(G_D12.independent_devices_enabled)); - LogInfoF("D3D12 debug layer enabled: %F", FmtSint(G_D12.debug_layer_enabled)); - LogInfoF("D3D12 validation layer enabled: %F", FmtSint(G_D12.validation_layer_enabled)); - - ////////////////////////////// - //- Initialize independent device factory with Agility SDK - - - - - - - - HMODULE lib = LoadLibrary(TEXT("vulkan-1.dll")); - DEBUGBREAKABLE; - - - - - - - - if (G_D12.independent_devices_enabled) - { - ////////////////////////////// - //- Extract agility SDK - - String appdir = GetAppDirectory(); - - u32 sdk_ver_num = 618; - String sdk_ver_str = Lit("1.618.5"); - String sdk_dir_path = StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str)); - { - LogInfoF("D3D12 agility sdk path: \"%F\"", FmtString(sdk_dir_path)); - String core_path = StringF(scratch.arena, "%FD3D12Core.dll", FmtString(sdk_dir_path)); - String layers_path = StringF(scratch.arena, "%Fd3d12SDKLayers.dll", FmtString(sdk_dir_path)); - if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) - { - LogInfoF("Unpacking D3D12 Agility SDK to %F", FmtString(sdk_dir_path)); - ResourceKey core_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/D3D12Core.dat")); - ResourceKey layers_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/d3d12SDKLayers.dat")); - String core_data = PLT_Decompress(scratch.arena, DataFromResource(core_key), PLT_CompressionLevel_3); - String layers_data = PLT_Decompress(scratch.arena, DataFromResource(layers_key), PLT_CompressionLevel_3); - PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); - PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); - PLT_MkDir(StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str))); - { - PLT_File file = PLT_OpenFileWrite(core_path); - PLT_WriteFile(file, core_data); - PLT_CloseFile(file); - } - { - PLT_File file = PLT_OpenFileWrite(layers_path); - PLT_WriteFile(file, layers_data); - PLT_CloseFile(file); - } - if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) - { - Panic(StringF( - scratch.arena, - "Failed to extract D3D12 Agility SDK to \"%F\"", - FmtString(core_path) - )); - } - } - } - - ////////////////////////////// - //- Create device factory - - if (G_D12.independent_devices_enabled) - { - D3D12GetInterface(&CLSID_D3D12SDKConfiguration, &IID_ID3D12SDKConfiguration1, (void **)&G_D12.independent.sdk_config); - - // Create device factory - char *sdk_dir_path_cstr = CstrFromString(scratch.arena, PathFromString(scratch.arena, sdk_dir_path, '\\')); - HRESULT hr = ID3D12SDKConfiguration1_CreateDeviceFactory( - G_D12.independent.sdk_config, - sdk_ver_num, - sdk_dir_path_cstr, - &IID_ID3D12DeviceFactory, - (void **)&G_D12.independent.device_factory - ); - if (FAILED(hr)) - { - Panic(StringF(scratch.arena, "Failed to create ID3D12DeviceFactory: Error code 0x%F", FmtHex(hr))); - } - } - } - - ////////////////////////////// - //- Enable D3D12 debug layer - - // Enable debug layer - if (G_D12.debug_layer_enabled) - { - ID3D12Debug1 *debug = 0; - HRESULT hr = 0; - if (G_D12.independent_devices_enabled) - { - hr = ID3D12DeviceFactory_GetConfigurationInterface(G_D12.independent.device_factory, &CLSID_D3D12Debug, &IID_ID3D12Debug1, (void **)&debug); - } - else - { - hr = D3D12GetDebugInterface(&IID_ID3D12Debug1, (void **)&debug); - } - if (FAILED(hr)) - { - Panic(Lit("Failed to retrieve D3D12 Debug interface")); - } - ID3D12Debug1_EnableDebugLayer(debug); - if (G_D12.validation_layer_enabled) - { - ID3D12Debug1_SetEnableGPUBasedValidation(debug, 1); - } - } - - ////////////////////////////// - //- Initialize device - - { - HRESULT hr = 0; - - // Create dxgi factory - { - u32 dxgi_factory_flags = 0; - if (G_D12.debug_layer_enabled) - { - dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; - } - hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G_D12.dxgi_factory); - if (FAILED(hr)) - { - Panic(Lit("Failed to initialize DXGI factory")); - } - } - - // Create device - { - IDXGIAdapter3 *adapter = 0; - ID3D12Device10 *device = 0; - String adapter_name = Zi; - String error = Lit("Failed to initialize D3D12 device"); - u32 adapter_index = 0; - b32 done = 0; - i32 skips = 0; // For iGPU testing - while (!done) - { - hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); - if (SUCCEEDED(hr)) - { - { - DXGI_ADAPTER_DESC1 desc; - IDXGIAdapter3_GetDesc1(adapter, &desc); - adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description); - LogInfoF("D3D12 adapter name: '%F'", FmtString(adapter_name)); - } - if (skips <= 0) - { - if (G_D12.independent_devices_enabled) - { - hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); - } - else - { - hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); - } - done = 1; - } - else - { - skips -= 1; - adapter_index += 1; - if (device) - { - ID3D12Device_Release(device); - } - if (adapter) - { - IDXGIAdapter3_Release(adapter); - } - adapter = 0; - device = 0; - } - } - else - { - done = 1; - } - } - - if (!device || !SUCCEEDED(hr)) - { - if (adapter_name.len > 0) - { - error = StringF( - scratch.arena, - "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", - FmtString(adapter_name) - ); - } - Panic(error); - } - - if (device) - { - StringList missing = Zi; - { - D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { D3D_SHADER_MODEL_6_6 }; - D3D12_FEATURE_DATA_D3D12_OPTIONS options = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS9 options9 = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi; - { - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); - } - - if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6) - { - PushStringToList(scratch.arena, &missing, Lit(" - Shader model 6.6")); - } - if (options.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) - { - PushStringToList(scratch.arena, &missing, Lit(" - Resource binding tier 3")); - } - // if (!options.DoublePrecisionFloatShaderOps) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - Double precision shader ops")); - // } - // if (!options9.AtomicInt64OnTypedResourceSupported) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on typed resources")); - // } - // if (!options11.AtomicInt64OnDescriptorHeapResourceSupported) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on descriptor heap resources")); - // } - if (!options12.EnhancedBarriersSupported) - { - PushStringToList(scratch.arena, &missing, Lit(" - Enhanced barriers")); - } - } - if (missing.count > 0) - { - String msg = StringF( - scratch.arena, - "Could not intiialize D3D12\n\n" - "The driver for device '%F' does not support the following feature(s):\n\n" - "%F\n\n" - "Ensure drivers are up to date and the device is capable.", - FmtString(adapter_name), - FmtString(StringFromList(scratch.arena, missing, Lit("\n"))) - ); - Panic(msg); - } - } - - G_D12.dxgi_adapter = adapter; - G_D12.device = device; - } - - // Enable debug layer breaks - if (G_D12.debug_layer_enabled) - { - // Enable D3D12 Debug break - { - ID3D12InfoQueue1 *info = 0; - hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12InfoQueue1, (void **)&info); - if (FAILED(hr)) - { - Panic(Lit("Failed to query ID3D12Device interface")); - } - ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); - ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); - ID3D12InfoQueue_Release(info); - } - // Enable DXGI Debug break - { - IDXGIInfoQueue *dxgi_info = 0; - hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); - if (FAILED(hr)) - { - Panic(Lit("Failed to retrieve DXGI debug interface")); - } - IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); - IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); - IDXGIInfoQueue_Release(dxgi_info); - } - } - - // Retrieve device configuration - if (G_D12.independent_devices_enabled) - { - hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12DeviceConfiguration, (void **)&G_D12.independent.device_config); - if (FAILED(hr)) - { - Panic(Lit("Failed to query ID3D12DeviceConfiguration interface")); - } - - D3D12_DEVICE_CONFIGURATION_DESC desc = Zi; - ID3D12DeviceConfiguration_GetDesc(G_D12.independent.device_config, &desc); - StringList flags_list = Zi; - { - if (desc.Flags & D3D12_DEVICE_FLAG_DEBUG_LAYER_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DEBUG_LAYER_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_GPU_BASED_VALIDATION_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_GPU_BASED_VALIDATION_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_SYNCHRONIZED_COMMAND_QUEUE_VALIDATION_DISABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_SYNCHRONIZED_COMMAND_QUEUE_VALIDATION_DISABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_DRED_AUTO_BREADCRUMBS_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_AUTO_BREADCRUMBS_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_DRED_PAGE_FAULT_REPORTING_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_PAGE_FAULT_REPORTING_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_DRED_WATSON_REPORTING_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_WATSON_REPORTING_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_DRED_BREADCRUMB_CONTEXT_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_BREADCRUMB_CONTEXT_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_DRED_USE_MARKERS_ONLY_BREADCRUMBS) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_DRED_USE_MARKERS_ONLY_BREADCRUMBS")); - if (desc.Flags & D3D12_DEVICE_FLAG_SHADER_INSTRUMENTATION_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_SHADER_INSTRUMENTATION_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_AUTO_DEBUG_NAME_ENABLED) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_AUTO_DEBUG_NAME_ENABLED")); - if (desc.Flags & D3D12_DEVICE_FLAG_FORCE_LEGACY_STATE_VALIDATION) PushStringToList(scratch.arena, &flags_list, Lit("D3D12_DEVICE_FLAG_FORCE_LEGACY_STATE_VALIDATION")); - } - String flags_str = flags_list.count > 0 ? StringFromList(scratch.arena, flags_list, Lit(", ")) : Lit("None"); - LogInfoF("D3D12 SDKVersion: %F", FmtUint(desc.SDKVersion)); - LogInfoF("D3D12 NumEnabledExperimentalFeatures: %F", FmtUint(desc.NumEnabledExperimentalFeatures)); - LogInfoF("D3D12 device configuration flags: %F", FmtString(flags_str)); - } - } - - ////////////////////////////// - //- Initialize command queues - - { - G_D12_CommandQueueDesc descs[] = { - { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .name = Lit("Direct Queue") }, - { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Compute Queue") }, - { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .name = Lit("Copy Queue") }, - }; - for (u32 i = 0; i < MinU32(countof(descs), countof(G_D12.queues)); ++i) - { - G_D12_CommandQueueDesc desc = descs[i]; - D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority }; - G_D12_Queue *queue = &G_D12.queues[i]; - queue->desc = desc; - HRESULT hr = ID3D12Device_CreateCommandQueue(G_D12.device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); - if (SUCCEEDED(hr)) - { - hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence); - G_D12_SetObjectName((ID3D12Object *)queue->d3d_queue, desc.name); - } - if (FAILED(hr)) - { - Panic(Lit("Failed to create GPU Command Queue")); - } - } - } - - ////////////////////////////// - //- Initialize descriptor heaps - - { - Struct(Dx12HeapDesc) - { - D3D12_DESCRIPTOR_HEAP_TYPE type; - D3D12_DESCRIPTOR_HEAP_FLAGS flags; - u64 max; - u64 per_batch_count; - String name; - }; - Dx12HeapDesc descs[G_D12_DescriptorHeapKind_COUNT] = { - [G_D12_DescriptorHeapKind_CbvSrvUav] = { - .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, - .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - .max = G_D12_MaxCbvSrvUavDescriptors, - .per_batch_count = 2, // 1 read, 1 write per ref - .name = Lit("Primary Resource Descriptor Heap"), - }, - [G_D12_DescriptorHeapKind_Rtv] = { - .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, - .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, - .max = G_D12_MaxRtvDescriptors, - .per_batch_count = 1, - .name = Lit("Primary RTV Descriptor Heap"), - }, - [G_D12_DescriptorHeapKind_Sampler] = { - .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, - .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, - .max = G_D12_MaxSamplerDescriptors, - .per_batch_count = 1, - .name = Lit("Primary Sampler Descriptor Heap"), - }, - }; - for (G_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) - { - Dx12HeapDesc desc = descs[kind]; - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[kind]; - heap->descriptors_arena = AcquireArena(Gibi(1)); - - heap->kind = kind; - heap->type = desc.type; - heap->per_batch_count = desc.per_batch_count; - heap->max_count = desc.max; - heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(G_D12.device, desc.type); - - D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = Zi; - d3d_desc.Type = desc.type; - d3d_desc.Flags = desc.flags; - d3d_desc.NumDescriptors = desc.max; - - HRESULT hr = 0; - - if (SUCCEEDED(hr)) - { - hr = ID3D12Device_CreateDescriptorHeap(G_D12.device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); - } - - if (SUCCEEDED(hr)) - { - ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); - } - - if (SUCCEEDED(hr)) - { - // Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil - G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); - G_D12_Descriptor *nil_descriptor = G_D12_PushDescriptor(gpu_perm, kind); - Assert(nil_descriptor->index == 0); - G_D12_SetObjectName((ID3D12Object *)heap->d3d_heap, desc.name); - } - - if (FAILED(hr)) - { - Panic(Lit("Failed to create descriptor heap")); - } - } - } - - ////////////////////////////// - //- Initialize global root signature - - { - HRESULT hr = 0; - - // Serialize root signature - ID3D10Blob *blob = 0; - if (SUCCEEDED(hr)) - { - D3D12_ROOT_PARAMETER1 params[G_NumConstants] = Zi; - for (i32 slot = 0; slot < G_NumConstants; ++slot) - { - D3D12_ROOT_PARAMETER1 *param = ¶ms[slot]; - param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; - param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - param->Constants.ShaderRegister = slot; - param->Constants.RegisterSpace = 0; - param->Constants.Num32BitValues = 1; - } - - D3D12_VERSIONED_ROOT_SIGNATURE_DESC desc = Zi; - desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; - desc.Desc_1_1.NumParameters = countof(params); - desc.Desc_1_1.pParameters = params; - desc.Desc_1_1.NumStaticSamplers = 0; - desc.Desc_1_1.pStaticSamplers = 0; - desc.Desc_1_1.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; - if (G_D12.independent_devices_enabled) - { - hr = ID3D12DeviceConfiguration_SerializeVersionedRootSignature(G_D12.independent.device_config, &desc, &blob, 0); - } - else - { - hr = D3D12SerializeVersionedRootSignature(&desc, &blob, 0); - } - } - - // Create root signature - ID3D12RootSignature *rootsig = 0; - if (SUCCEEDED(hr)) - { - hr = ID3D12Device_CreateRootSignature(G_D12.device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); - } - G_D12.bindless_rootsig = rootsig; - - if (blob) - { - ID3D10Blob_Release(blob); - } - if (FAILED(hr)) - { - Panic(Lit("Failed to create root signature")); - } - } - - ////////////////////////////// - //- Create global resources - - { - // Create debug print buffers - if (GPU_SHADER_PRINT) - { - for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - { - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - if (queue_kind != G_QueueKind_AsyncCopy) - { - G_CommandListHandle cl = G_PrepareCommandList(queue_kind); - { - G_ArenaHandle gpu_perm = G_PermArena(); - queue->print_buffer_size = GPU_SHADER_PRINT_BUFFER_SIZE; - queue->print_buffer = G_PushBuffer( - gpu_perm, cl, - u8, - queue->print_buffer_size, - .flags = G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Debug print gpu buffer"), - ); - queue->print_readback_buffer = G_PushBuffer( - gpu_perm, cl, - u8, - queue->print_buffer_size, - .flags = G_ResourceFlag_HostMemory, - .name = Lit("Debug print readback buffer") - ); - queue->print_buffer_ref = G_PushByteAddressBufferRef(gpu_perm, queue->print_buffer); - } - G_CommitCommandList(cl); - } - } - } - } - - ////////////////////////////// - //- Start workers - - // for (G_QueueKind kind = 0; kind < G_QueueKind_COUNT; ++kind) - // { - // String name = Zi; - // if (kind == G_QueueKind_Direct) name = Lit("Gpu direct queue worker"); - // if (kind == G_QueueKind_AsyncCompute) name = Lit("Gpu compute queue worker"); - // if (kind == G_QueueKind_AsyncCopy) name = Lit("Gpu copy queue worker"); - // DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind); - // } - - OnAsyncTick(G_D12_TickAsync); - - DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0); - - EndScratch(scratch); -} - -//////////////////////////////////////////////////////////// -//~ Helpers - -G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle) -{ - return (G_D12_Arena *)handle.v; -} - -G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle) -{ - return (G_D12_CmdList *)handle.v; -} - -G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle) -{ - return (G_D12_Resource *)handle.v; -} - -G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle) -{ - return (G_D12_Swapchain *)handle.v; -} - -DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format) -{ - return (DXGI_FORMAT)format; -} - -D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages) -{ - D3D12_BARRIER_SYNC result = 0; - if (stages == G_Stage_All) - { - result = D3D12_BARRIER_SYNC_ALL; - } - else - { - result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, G_Stage_ComputeShading); - result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, G_Stage_IndexAssembly); - result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, G_Stage_VertexShading); - result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, G_Stage_PixelShading); - result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, G_Stage_DepthStencil); - result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, G_Stage_RenderTarget); - result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, G_Stage_Copy); - result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, G_Stage_Indirect); - } - return result; -} - -D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses) -{ - D3D12_BARRIER_ACCESS result = 0; - if (accesses == 0) - { - result = D3D12_BARRIER_ACCESS_NO_ACCESS; - } - else if (accesses == G_Access_All) - { - result = D3D12_BARRIER_ACCESS_COMMON; - } - else - { - result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, G_Access_ShaderReadWrite); - result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, G_Access_ShaderRead); - result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, G_Access_CopyWrite); - result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, G_Access_CopyRead); - result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, G_Access_IndexBuffer); - result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, G_Access_IndirectArgument); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, G_Access_DepthStencilRead); - result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, G_Access_DepthStencilWrite); - result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, G_Access_RenderTargetWrite); - } - return result; -} - -D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout) -{ - PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { - [G_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, - [G_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON, - [G_Layout_Common] = D3D12_BARRIER_LAYOUT_COMMON, - [G_Layout_DirectQueue_General] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, - [G_Layout_DirectQueue_Read] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, - [G_Layout_DirectQueue_DepthStencil] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, - [G_Layout_DirectQueue_RenderTarget] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, - [G_Layout_ComputeQueue_General] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, - [G_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, - [G_Layout_DirectComputeQueue_Read] = D3D12_BARRIER_LAYOUT_GENERIC_READ, - [G_Layout_DirectComputeQueue_CopyWrite] = D3D12_BARRIER_LAYOUT_COPY_DEST, - }; - D3D12_BARRIER_LAYOUT result = D3D12_BARRIER_LAYOUT_UNDEFINED; - if (layout >= 0 && layout < countof(translate)) - { - result = translate[layout]; - } - return result; -}; - -String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout) -{ - PERSIST Readonly String names[] = { - [D3D12_BARRIER_LAYOUT_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMMON"), - [D3D12_BARRIER_LAYOUT_PRESENT] = CompLit("D3D12_BARRIER_LAYOUT_PRESENT"), - [D3D12_BARRIER_LAYOUT_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_RENDER_TARGET] = CompLit("D3D12_BARRIER_LAYOUT_RENDER_TARGET"), - [D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE"), - [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ"), - [D3D12_BARRIER_LAYOUT_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE"), - [D3D12_BARRIER_LAYOUT_RESOLVE_DEST] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_DEST"), - [D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE"), - [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE"), - [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE"), - [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ"), - [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE"), - [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST"), - [D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON") - }; - String result = Zi; - if (layout >= 0 && layout < countof(names)) - { - result = names[layout]; - } - else if (layout == D3D12_BARRIER_LAYOUT_UNDEFINED) - { - result = Lit("D3D12_BARRIER_LAYOUT_UNDEFINED"); - } - return result; -} - -void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip) -{ - DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); - D3D12_RESOURCE_DESC res_d3d_desc = Zi; - { - ID3D12Resource_GetDesc(resource->d3d_resource, &res_d3d_desc); - } - D3D12_RENDER_TARGET_VIEW_DESC rtv_desc = Zi; - { - rtv_desc.Format = res_d3d_desc.Format; - if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE1D) - { - rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE1D; - rtv_desc.Texture1D.MipSlice = mip; - } - else if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) - { - rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; - rtv_desc.Texture2D.MipSlice = mip; - } - else if (res_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) - { - rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D; - rtv_desc.Texture3D.MipSlice = mip; - } - } - ID3D12Device_CreateRenderTargetView(G_D12.device, resource->d3d_resource, &rtv_desc, rtv_handle); -} - -void G_D12_SetObjectName(ID3D12Object *object, String name) -{ - TempArena scratch = BeginScratchNoConflict(); - { - wchar_t *name_wstr = WstrFromString(scratch.arena, name); - ID3D12Object_SetName(object, name_wstr); - } - EndScratch(scratch); -} - -String G_D12_NameFromObject(Arena *arena, ID3D12Object *object) -{ - String result = Zi; - { - wchar_t name_text[G_D12_MaxNameLen] = Zi; - u32 name_text_sz = sizeof(name_text); - ID3D12Object_GetPrivateData(object, &WKPDID_D3DDebugObjectNameW, &name_text_sz, name_text); - if (name_text_sz > 2) - { - String16 str16 = Zi; - str16.len = (name_text_sz / 2) - 1; - str16.text = name_text; - result = StringFromString16(arena, str16); - } - } - return result; -} - -//////////////////////////////////////////////////////////// -//~ Pipeline - -G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc) -{ - u64 hash = G_D12_HashFromPipelineDesc(desc); - - // Fetch pipeline from cache - G_D12_Pipeline *pipeline = 0; - b32 is_pipeline_new = 0; - G_D12_PipelineBin *bin = &G_D12.pipeline_bins[hash % countof(G_D12.pipeline_bins)]; - { - { - Lock lock = LockS(&bin->mutex); - for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) - { - if (pipeline->hash == hash) break; - } - Unlock(&lock); - } - if (!pipeline) - { - Lock lock = LockE(&bin->mutex); - for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) - { - if (pipeline->hash == hash) break; - } - if (!pipeline) - { - Arena *perm = PermArena(); - PushAlign(perm, IsolationSize); - { - pipeline = PushStruct(perm, G_D12_Pipeline); - pipeline->desc = desc; - pipeline->hash = hash; - is_pipeline_new = 1; - } - PushAlign(perm, IsolationSize); - SllStackPushN(bin->first, pipeline, next_in_bin); - } - Unlock(&lock); - } - } - - // Create pipeline - if (is_pipeline_new) - { - TempArena scratch = BeginScratchNoConflict(); - HRESULT hr = 0; - b32 ok = 1; - String error_str = Zi; - b32 is_compute = IsResourceNil(desc.vs.resource) || IsResourceNil(desc.ps.resource); - - String pipeline_name = Zi; - if (is_compute) - { - pipeline_name = NameFromResource(desc.cs.resource); - if (pipeline_name.len == 0) - { - pipeline_name = StringF(scratch.arena, "%F", FmtHandle(desc.cs.resource.v)); - } - } - else - { - String vs_name = NameFromResource(desc.vs.resource); - String ps_name = NameFromResource(desc.ps.resource); - if (vs_name.len == 0) - { - vs_name = StringF(scratch.arena, "%F", FmtHandle(desc.vs.resource.v)); - } - if (ps_name.len == 0) - { - ps_name = StringF(scratch.arena, "%F", FmtHandle(desc.ps.resource.v)); - } - pipeline_name = StringF( - scratch.arena, - "%F - %F", - FmtString(vs_name), - FmtString(ps_name) - ); - } - - LogInfoF("Creating pipeline %F", FmtString(pipeline_name)); - - // Create PSO - ID3D12PipelineState *pso = 0; - if (ok) - { - if (!is_compute) - { - i32 rts_count = 0; - b32 has_multiple_blend_modes = 0; - { - G_BlendMode last_blend_mode = 0; - for (i32 rt_idx = 0; rt_idx < countof(desc.render_target_formats); ++rt_idx) - { - G_BlendMode blend_mode = desc.render_target_blend_modes[rt_idx]; - DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[rt_idx]); - if (format == DXGI_FORMAT_UNKNOWN) - { - break; - } - else - { - if (rt_idx > 0 && blend_mode != last_blend_mode) - { - has_multiple_blend_modes = 1; - } - last_blend_mode = blend_mode; - rts_count += 1; - } - } - } - - D3D12_RASTERIZER_DESC raster_desc = Zi; - { - if (desc.is_wireframe) - { - raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; - } - else - { - raster_desc.FillMode = D3D12_FILL_MODE_SOLID; - } - raster_desc.CullMode = D3D12_CULL_MODE_NONE; - raster_desc.FrontCounterClockwise = 0; - raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; - raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; - raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; - raster_desc.DepthClipEnable = 0; - raster_desc.MultisampleEnable = 0; - raster_desc.AntialiasedLineEnable = 0; - raster_desc.ForcedSampleCount = 0; - raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; - } - - D3D12_BLEND_DESC blend_desc = Zi; - { - blend_desc.IndependentBlendEnable = has_multiple_blend_modes; - blend_desc.AlphaToCoverageEnable = 0; - for (i32 rt_idx = 0; rt_idx < rts_count; ++rt_idx) - { - G_BlendMode blend_mode = desc.render_target_blend_modes[rt_idx]; - D3D12_RENDER_TARGET_BLEND_DESC *rt = &blend_desc.RenderTarget[rt_idx]; - switch (blend_mode) - { - default: - { - rt->BlendEnable = 0; - rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - } break; - - case G_BlendMode_CompositeStraightAlpha: - { - rt->BlendEnable = 1; - - rt->SrcBlend = D3D12_BLEND_SRC_ALPHA; - rt->BlendOp = D3D12_BLEND_OP_ADD; - rt->DestBlend = D3D12_BLEND_INV_SRC_ALPHA; - - rt->SrcBlendAlpha = D3D12_BLEND_ONE; - rt->BlendOpAlpha = D3D12_BLEND_OP_ADD; - rt->DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; - - rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - } break; - - case G_BlendMode_CompositePremultipliedAlpha: - { - rt->BlendEnable = 1; - - rt->SrcBlend = D3D12_BLEND_ONE; - rt->BlendOp = D3D12_BLEND_OP_ADD; - rt->DestBlend = D3D12_BLEND_INV_SRC_ALPHA; - - rt->SrcBlendAlpha = D3D12_BLEND_ONE; - rt->BlendOpAlpha = D3D12_BLEND_OP_ADD; - rt->DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; - - rt->RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - } break; - } - } - } - - D3D12_DEPTH_STENCIL_DESC ds_desc = Zi; - { - ds_desc.DepthEnable = 0; - ds_desc.StencilEnable = 0; - } - - String vs = DataFromResource(desc.vs.resource); - String ps = DataFromResource(desc.ps.resource); - D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = Zi; - { - pso_desc.pRootSignature = G_D12.bindless_rootsig; - pso_desc.VS.pShaderBytecode = vs.text; - pso_desc.VS.BytecodeLength = vs.len; - pso_desc.PS.pShaderBytecode = ps.text; - pso_desc.PS.BytecodeLength = ps.len; - pso_desc.RasterizerState = raster_desc; - pso_desc.BlendState = blend_desc; - pso_desc.DepthStencilState = ds_desc; - pso_desc.PrimitiveTopologyType = desc.topology_type; - pso_desc.SampleMask = UINT_MAX; - pso_desc.SampleDesc.Count = 1; - pso_desc.SampleDesc.Quality = 0; - pso_desc.NumRenderTargets = rts_count; - for (i32 rt_idx = 0; rt_idx < rts_count; ++rt_idx) - { - DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[rt_idx]); - pso_desc.RTVFormats[rt_idx] = format; - } - } - hr = ID3D12Device_CreateGraphicsPipelineState(G_D12.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); - if (FAILED(hr)) - { - error_str = StringF(scratch.arena, "Failed to create graphics pipeline \"%F\"", FmtString(pipeline_name)); - ok = 0; - } - } - else - { - String cs = DataFromResource(desc.cs.resource); - D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = Zi; - { - pso_desc.pRootSignature = G_D12.bindless_rootsig; - pso_desc.CS.pShaderBytecode = cs.text; - pso_desc.CS.BytecodeLength = cs.len; - } - hr = ID3D12Device_CreateComputePipelineState(G_D12.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); - if (FAILED(hr)) - { - error_str = StringF(scratch.arena, "Failed to create compute pipeline \"%F\"", FmtString(pipeline_name)); - ok = 0; - } - } - } - - if (ok) - { - if (GPU_NAMES) - { - G_D12_SetObjectName((ID3D12Object *)pso, pipeline_name); - } - } - else - { - // TOOD: Don't panic - Panic(error_str); - } - - LogInfoF("Created pipeline %F", FmtString(pipeline_name)); - - pipeline->pso = pso; - pipeline->error = error_str; - pipeline->ok = ok; - EndScratch(scratch); - } - - return pipeline; -} - -u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc) -{ - return HashString(StringFromStruct(&desc)); -} - -//////////////////////////////////////////////////////////// -//~ Queue - -G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind) -{ - return &G_D12.queues[kind]; -} - -//////////////////////////////////////////////////////////// -//~ Raw command list - -G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind) -{ - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - - // Try to pull first completed command list from queue - G_D12_RawCommandList *cl = Zi; - { - Lock lock = LockE(&queue->commit_mutex); - { - u64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); - cl = queue->first_committed_cl; - if (cl && cl->commit_fence_target <= completed) - { - SllQueuePop(queue->first_committed_cl, queue->last_committed_cl); - } - else - { - cl = 0; - } - } - Unlock(&lock); - } - - // Allocate new command list if none are available - if (!cl) - { - Arena *perm = PermArena(); - { - PushAlign(perm, IsolationSize); - cl = PushStruct(perm, G_D12_RawCommandList); - PushAlign(perm, IsolationSize); - } - cl->queue = queue; - - HRESULT hr = 0; - { - if (SUCCEEDED(hr)) - { - hr = ID3D12Device_CreateCommandAllocator(G_D12.device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->d3d_ca); - } - - if (SUCCEEDED(hr)) - { - hr = ID3D12Device_CreateCommandList(G_D12.device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl); - } - - if (SUCCEEDED(hr)) - { - hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); - } - - // Initialize Direct queue CPU-only descriptors - if (SUCCEEDED(hr) && queue_kind == G_QueueKind_Direct) - { - G_D12_Arena *gpu_perm = G_D12_ArenaFromHandle(G_PermArena()); - for (u32 rtv_idx = 0; rtv_idx < countof(cl->rtv_descriptors); ++rtv_idx) - { - cl->rtv_descriptors[rtv_idx] = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); - } - cl->rtv_clear_descriptor = G_D12_PushDescriptor(gpu_perm, G_D12_DescriptorHeapKind_Rtv); - } - } - - if (FAILED(hr)) - { - Panic(Lit("Failed to create command list")); - } - } - - // Reset command list - { - HRESULT hr = 0; - { - if (SUCCEEDED(hr)) - { - hr = ID3D12CommandAllocator_Reset(cl->d3d_ca); - } - - if (SUCCEEDED(hr)) - { - hr = ID3D12GraphicsCommandList_Reset(cl->d3d_cl, cl->d3d_ca, 0); - } - } - - if (FAILED(hr)) - { - Panic(Lit("Failed to reset command list")); - } - } - - return cl; -} - -i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl) -{ - G_D12_Queue *queue = cl->queue; - - // Close - { - HRESULT hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); - if (FAILED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to close command list before execution")); - } - } - - // Commit - i64 completion_target = 0; - { - // Execute - ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl); - Lock lock = LockE(&queue->commit_mutex); - { - completion_target = ++queue->commit_fence_target; - cl->commit_fence_target = completion_target; - ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, completion_target); - - // Append - SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl); - } - Unlock(&lock); - } - - return completion_target; -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Arena - -G_ArenaHandle G_AcquireArena(void) -{ - G_D12_Arena *gpu_arena = 0; - { - Arena *perm = PermArena(); - PushAlign(perm, IsolationSize); - gpu_arena = PushStruct(perm, G_D12_Arena); - PushAlign(perm, IsolationSize); - } - gpu_arena->arena = AcquireArena(Gibi(1)); - - Atomic64FetchAdd(&G_D12.arenas_count, 1); - - return G_D12_MakeHandle(G_ArenaHandle, gpu_arena); -} - -void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena) -{ - // TODO - - // TODO: Release resources - - // TODO: Update gstats - - // TODO: Move this to actual release - // Atomic64FetchAdd(&G_D12.arenas_count, -1); -} - -void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); - G_D12_ResetArena(cl, gpu_arena); -} - -//////////////////////////////////////////////////////////// -//~ Arena - -void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) -{ - // Move resources to reset list - if (gpu_arena->resources.first) - { - if (gpu_arena->reset_resources.last) - { - gpu_arena->reset_resources.last->next = gpu_arena->resources.first; - } - else - { - gpu_arena->reset_resources.first = gpu_arena->resources.first; - } - gpu_arena->reset_resources.last = gpu_arena->resources.last; - gpu_arena->reset_resources.count += gpu_arena->resources.count; - ZeroStruct(&gpu_arena->resources); - } - - // Push descriptors to cl reset list - if (gpu_arena->descriptors.first) - { - if (cl->reset_descriptors.last) - { - cl->reset_descriptors.last->next = gpu_arena->descriptors.first; - } - else - { - cl->reset_descriptors.first = gpu_arena->descriptors.first; - } - cl->reset_descriptors.last = gpu_arena->descriptors.last; - cl->reset_descriptors.count += gpu_arena->descriptors.count; - gpu_arena->descriptors.count = 0; - gpu_arena->descriptors.first = 0; - gpu_arena->descriptors.last = 0; - } -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Resource - -G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) -{ - Arena *perm = PermArena(); - G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *resource = 0; - - b32 is_buffer = desc.kind == G_ResourceKind_Buffer; - b32 is_texture = ( - desc.kind == G_ResourceKind_Texture1D || - desc.kind == G_ResourceKind_Texture2D || - desc.kind == G_ResourceKind_Texture3D - ); - b32 is_sampler = desc.kind == G_ResourceKind_Sampler; - G_ResourceFlag flags = ( - is_buffer ? desc.buffer.flags : - is_texture ? desc.texture.flags : - desc.sampler.flags - ); - String new_name = ( - is_buffer ? desc.buffer.name : - is_texture ? desc.texture.name : - desc.sampler.name - ); - new_name.len = MinU64(new_name.len, countof(resource->name_text)); - - ////////////////////////////// - //- Initialize heap info - - b32 can_reuse = !AnyBit(flags, G_ResourceFlag_ForceNoReuse); - - D3D12_HEAP_FLAGS heap_flags = 0; - D3D12_HEAP_PROPERTIES heap_props = Zi; - b32 should_map = 0; - if (is_buffer || is_texture) - { - G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; - // Heap flags - if (flags & G_ResourceFlag_HostMemory) - { - heap_kind = G_D12_ResourceHeapKind_Cpu; - if (flags & G_ResourceFlag_Uncached) - { - heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; - } - } - if (flags & G_ResourceFlag_ZeroMemory) - { - can_reuse = 0; - } - else - { - heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - } - // Heap props - if (heap_kind == G_D12_ResourceHeapKind_Cpu) - { - heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; - heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; - heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; - should_map = 1; - } - else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined) - { - heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; - heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; - heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; - should_map = 1; - } - else - { - heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; - } - } - - ////////////////////////////// - //- Initialize d3d resource desc - - D3D12_BARRIER_LAYOUT d3d_begin_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; - D3D12_CLEAR_VALUE clear_value = Zi; - D3D12_RESOURCE_DESC1 d3d_desc = Zi; - if (is_buffer) - { - u64 min_buffer_size = 1024; - d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - d3d_desc.Format = DXGI_FORMAT_UNKNOWN; - d3d_desc.Width = NextPow2U64(MaxU64(desc.buffer.size, min_buffer_size)); - d3d_desc.Height = 1; - d3d_desc.DepthOrArraySize = 1; - d3d_desc.MipLevels = 1; - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); - } - else if (is_texture) - { - i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); - i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips); - d3d_desc.Dimension = ( - desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : - desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : - D3D12_RESOURCE_DIMENSION_TEXTURE3D - ); - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format); - d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); - d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); - d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); - d3d_desc.MipLevels = ClampF32(desc.texture.max_mips, 1, max_mips); - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); - clear_value.Color[0] = desc.texture.clear_color.x, - clear_value.Color[1] = desc.texture.clear_color.y, - clear_value.Color[2] = desc.texture.clear_color.z, - clear_value.Color[3] = desc.texture.clear_color.w, - clear_value.Format = d3d_desc.Format; - - d3d_begin_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); - if (!AnyBit(flags, G_ResourceFlag_ZeroMemory) && !AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)) - { - if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) - { - d3d_begin_layout = D3D12_BARRIER_LAYOUT_RENDER_TARGET; - } - else if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - d3d_begin_layout = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE; - } - } - } - - ////////////////////////////// - //- Check for reset-resource reusability - - // Pop reset resource - resource = gpu_arena->reset_resources.first; - if (resource) - { - DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource); - --gpu_arena->reset_resources.count; - - D3D12_RESOURCE_DESC1 reset_d3d_desc = Zi; - D3D12_RESOURCE_DESC1 compare_d3d_desc = Zi; - CopyStruct(&reset_d3d_desc, &resource->d3d_desc); - CopyStruct(&compare_d3d_desc, &reset_d3d_desc); - - // Buffers can be reused if size fits - if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - { - if (reset_d3d_desc.Width >= d3d_desc.Width) - { - compare_d3d_desc.Width = d3d_desc.Width; - } - } - - // TODO: Less stringent reuse constraints. We could even create textures as placed resources and reset their underlying heaps. - can_reuse = can_reuse && MatchStruct(&compare_d3d_desc, &d3d_desc); - if (!can_reuse) - { - // Push releasable to command list - { - G_D12_Releasable *release = 0; - { - Lock lock = LockE(&G_D12.free_releases_mutex); - { - release = G_D12.free_releases.first; - if (release) - { - SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last); - } - else - { - release = PushStructNoZero(perm, G_D12_Releasable); - } - } - Unlock(&lock); - } - ZeroStruct(release); - SllQueuePush(cl->releases.first, cl->releases.last, release); - release->d3d_resource = resource->d3d_resource; - if (GPU_NAMES) - { - StaticAssert(countof(release->name_text) == countof(resource->name_text)); - release->name_len = resource->name_len; - CopyBytes(release->name_text, resource->name_text, resource->name_len); - } - } - ZeroStruct(resource); - } - } - else - { - can_reuse = 0; - resource = PushStruct(gpu_arena->arena, G_D12_Resource); - } - - if (!can_reuse) - { - resource->d3d_desc = d3d_desc; - } - - ////////////////////////////// - //- Init resource - - resource->flags = flags; - resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels) + 1; - - if (is_buffer) - { - resource->buffer_size = desc.buffer.size; - resource->buffer_size_actual = d3d_desc.Width; - } - - if (is_texture) - { - resource->is_texture = is_texture; - resource->texture_format = desc.texture.format; - resource->texture_dims = desc.texture.dims; - resource->texture_mips = d3d_desc.MipLevels; - } - - if (is_sampler) - { - resource->sampler_desc = desc.sampler; - } - - DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource); - ++gpu_arena->resources.count; - - ////////////////////////////// - //- Allocate D3D12 resource - - if ((is_buffer || is_texture) && !resource->d3d_resource) - { - D3D12_CLEAR_VALUE *clear_value_arg = 0; - if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - clear_value_arg = &clear_value; - } - HRESULT hr = ID3D12Device10_CreateCommittedResource3( - G_D12.device, - &heap_props, - heap_flags, - &resource->d3d_desc, - d3d_begin_layout, - clear_value_arg, - 0, // pProtectedSession - 0, // NumCastableFormats - 0, // pCastableFormats - &IID_ID3D12Resource, - (void **)&resource->d3d_resource - ); - Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); - for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) - { - resource->cmdlist_texture_layouts[mip_idx] = d3d_begin_layout; - } - - // Queue initial Rtv/Dsv discard - if (!AnyBit(flags, G_ResourceFlag_ZeroMemory)) - { - if (AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Discard; - cmd->discard.resource = resource; - } - - if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET) - { - G_MemoryLayoutSync( - cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), - G_Stage_RenderTarget, G_Access_RenderTargetWrite, - G_Stage_All, G_Access_All, - desc.texture.initial_layout - ); - } - else if (d3d_begin_layout == D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE) - { - G_MemoryLayoutSync( - cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), - G_Stage_DepthStencil, G_Access_DepthStencilWrite, - G_Stage_All, G_Access_All, - desc.texture.initial_layout - ); - } - } - - if (!SUCCEEDED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to allocate D3D12 resource")); - } - - if (is_buffer) - { - resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource); - } - } - - if (should_map && !resource->mapped) - { - D3D12_RANGE read_range = Zi; - HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped); - - if (!SUCCEEDED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to map D3D12 resource")); - } - } - - ////////////////////////////// - //- Set debug information - - String old_name = STRING(resource->name_len, resource->name_text); - if (!MatchString(old_name, new_name)) - { - resource->name_len = new_name.len; - CopyBytes(resource->name_text, new_name.text, new_name.len); - if (GPU_NAMES && resource->d3d_resource) - { - G_D12_SetObjectName((ID3D12Object *)resource->d3d_resource, new_name); - } - } - - ////////////////////////////// - //- Barrier reused resource - - // TODO: These barriers are overly cautious. It's unlikely that anything - // other than an activation-layout transition is needed for textures, since - // arenas are rarely reset in the middle of a command list. In the case that - // a resource is reused within the same command list, we should insert - // barriers as described in the spec: - // https://microsoft.github.io/DirectX-Specs/d3d/D3D12EnhancedBarriers.html#resource-aliasing - - if (can_reuse) - { - if (is_buffer) - { - G_DumbMemorySync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource)); - } - else if (is_texture) - { - G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout); - } - } - - return G_D12_MakeHandle(G_ResourceHandle, resource); -} - -//////////////////////////////////////////////////////////// -//~ Descriptor - -G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index) -{ - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; - G_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, G_D12_Descriptor); - return &descriptors[index]; -} - -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind) -{ - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[heap_kind]; - u64 per_batch_count = heap->per_batch_count; - - G_D12_Descriptor *descriptor = 0; - u32 index = 0; - - // Grab completed descriptor from arena - G_D12_DescriptorList *descriptors = &gpu_arena->reset_descriptors_by_heap[heap_kind]; - descriptor = descriptors->first; - if (descriptor) - { - G_D12_Queue *queue = G_D12_QueueFromKind(descriptor->completion_queue_kind); - i64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); - if (queue_commit_completion >= descriptor->completion_queue_target) - { - // Descriptor no longer in use by gpu, reuse it - DllQueueRemove(descriptors->first, descriptors->last, descriptor); - descriptors->count -= 1; - index = descriptor->index; - } - else - { - // Descriptor may still be in use by gpu - descriptor = 0; - } - } - - // Allocate new descriptor from heap - if (!descriptor) - { - Lock lock = LockE(&heap->mutex); - { - if (heap->first_free) - { - descriptor = heap->first_free; - DllStackRemove(heap->first_free, descriptor); - index = descriptor->index; - } - else - { - u32 descriptors_count = ArenaCount(heap->descriptors_arena, G_D12_Descriptor); - if (descriptors_count >= heap->max_count) - { - Panic(Lit("Max descriptors reached in heap")); - } - descriptor = PushStructNoZero(heap->descriptors_arena, G_D12_Descriptor); - index = descriptors_count * per_batch_count; - } - } - Unlock(&lock); - } - - // Initialize descriptor handle - ZeroStruct(descriptor); - descriptor->gpu_arena = gpu_arena; - descriptor->index = index; - descriptor->first_handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); - descriptor->heap = heap; - - DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); - gpu_arena->descriptors.count += 1; - - return descriptor; -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Shader resource reference - -u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_RefDesc ref_desc) -{ - G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); - G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); - u32 result = 0; - - G_RefKind kind = ref_desc.kind; - b32 is_buffer = ( - kind == G_RefKind_StructuredBuffer || - kind == G_RefKind_ByteAddressBuffer - ); - b32 is_sampler = kind == G_RefKind_SamplerState; - b32 is_texture = !is_buffer && !is_sampler; - b32 is_raw = kind == G_RefKind_ByteAddressBuffer; - b32 is_writable = resource->flags & G_ResourceFlag_AllowShaderReadWrite; - - G_D12_Descriptor *descriptor = 0; - if (is_buffer || is_texture) - { - descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_CbvSrvUav); - - G_D12_DescriptorHeap *heap = &G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav]; - Assert(heap->per_batch_count >= 2); - D3D12_CPU_DESCRIPTOR_HANDLE readonly_handle = descriptor->first_handle; - D3D12_CPU_DESCRIPTOR_HANDLE readwrite_handle = descriptor->first_handle; - readwrite_handle.ptr += heap->descriptor_size; - - b32 srv_ok = 0; - b32 uav_ok = 0; - - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = Zi; - D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc = Zi; - - if (is_buffer) - { - if (is_raw) - { - ref_desc.element_size = 4; - ref_desc.element_offset /= 4; - } - - u64 buffer_size_actual = resource->buffer_size_actual; - u64 num_elements_in_buffer = buffer_size_actual / ref_desc.element_size; - u64 num_elements_after_offset = num_elements_in_buffer > ref_desc.element_offset ? num_elements_in_buffer - ref_desc.element_offset : 0; - - //- Create buffer SRV - { - { - srv_desc.Format = DXGI_FORMAT_UNKNOWN; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Buffer.FirstElement = ref_desc.element_offset; - srv_desc.Buffer.NumElements = num_elements_after_offset; - srv_desc.Buffer.StructureByteStride = ref_desc.element_size; - srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; - } - if (is_raw) - { - srv_desc.Format = DXGI_FORMAT_R32_TYPELESS; - srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; - srv_desc.Buffer.StructureByteStride = 0; - } - srv_ok = 1; - } - //- Create buffer UAV - { - { - uav_desc.Format = DXGI_FORMAT_UNKNOWN; - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; - uav_desc.Buffer.FirstElement = ref_desc.element_offset; - uav_desc.Buffer.NumElements = num_elements_after_offset; - uav_desc.Buffer.StructureByteStride = ref_desc.element_size; - uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; - } - if (is_raw) - { - uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; - uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; - uav_desc.Buffer.StructureByteStride = 0; - } - } - if (num_elements_after_offset > 0) - { - srv_ok = 1; - if (is_writable) - { - uav_ok = 1; - } - } - } - else if (is_texture) - { - // DXGI_FORMAT format = G_D12_DxgiFormatFromGpuFormat(resource->texture_format); - RngI32 mips = ref_desc.mips; - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - //- Create texture SRV - { - srv_desc.Format = DXGI_FORMAT_UNKNOWN; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - if (ref_desc.kind == G_RefKind_Texture1D) - { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - srv_desc.Texture1D.MostDetailedMip = mips.min; - srv_desc.Texture1D.MipLevels = mips.max - mips.min + 1; - } - else if (ref_desc.kind == G_RefKind_Texture2D) - { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srv_desc.Texture2D.MostDetailedMip = mips.min; - srv_desc.Texture2D.MipLevels = mips.max - mips.min + 1; - } - else if (ref_desc.kind == G_RefKind_Texture3D) - { - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; - srv_desc.Texture3D.MostDetailedMip = mips.min; - srv_desc.Texture3D.MipLevels = mips.max - mips.min + 1; - } - } - //- Create texture UAV - { - uav_desc.Format = DXGI_FORMAT_UNKNOWN; - if (ref_desc.kind == G_RefKind_Texture1D) - { - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - uav_desc.Texture1D.MipSlice = mips.min; - } - else if (ref_desc.kind == G_RefKind_Texture2D) - { - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - uav_desc.Texture2D.MipSlice = mips.min; - } - else if (ref_desc.kind == G_RefKind_Texture3D) - { - uav_desc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; - uav_desc.Texture3D.MipSlice = mips.min; - uav_desc.Texture3D.WSize = U32Max; - } - } - - srv_ok = 1; - if (is_writable) - { - uav_ok = 1; - } - - if (!uav_ok) - { - uav_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - } - } - - if (srv_ok) - { - ID3D12Device_CreateShaderResourceView(G_D12.device, resource->d3d_resource, &srv_desc, readonly_handle); - } - else - { - ID3D12Device_CreateShaderResourceView(G_D12.device, 0, &srv_desc, readonly_handle); - } - - if (uav_ok) - { - ID3D12Device_CreateUnorderedAccessView(G_D12.device, resource->d3d_resource, 0, &uav_desc, readwrite_handle); - } - else - { - ID3D12Device_CreateUnorderedAccessView(G_D12.device, 0, 0, &uav_desc, readwrite_handle); - } - } - else if (is_sampler) - { - descriptor = G_D12_PushDescriptor(gpu_arena, G_D12_DescriptorHeapKind_Sampler); - G_SamplerDesc sampler_desc = resource->sampler_desc; - D3D12_SAMPLER_DESC d3d_desc = Zi; - { - d3d_desc.Filter = (D3D12_FILTER)sampler_desc.filter; - d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.x; - d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.y; - d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.z; - d3d_desc.MipLODBias = sampler_desc.mip_lod_bias; - d3d_desc.MaxAnisotropy = MaxU32(sampler_desc.max_anisotropy, 1); - d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)sampler_desc.comparison; - d3d_desc.BorderColor[0] = sampler_desc.border_color.x; - d3d_desc.BorderColor[1] = sampler_desc.border_color.y; - d3d_desc.BorderColor[2] = sampler_desc.border_color.z; - d3d_desc.BorderColor[3] = sampler_desc.border_color.w; - d3d_desc.MinLOD = sampler_desc.min_lod; - d3d_desc.MaxLOD = sampler_desc.max_lod; - } - if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - if (d3d_desc.MaxLOD >= Inf) - { - d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; - } - ID3D12Device_CreateSampler(G_D12.device, &d3d_desc, descriptor->first_handle); - } - - return descriptor->index; -} - -//- Count - -u64 G_CountBufferBytes(G_ResourceHandle buffer) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(buffer); - return resource->buffer_size; -} - -i32 G_Count1D(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.x; -} - -Vec2I32 G_Count2D(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return VEC2I32(resource->texture_dims.x, resource->texture_dims.y); -} - -Vec3I32 G_Count3D(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims; -} - -i32 G_CountWidth(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.x; -} - -i32 G_CountHeight(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.y; -} - -i32 G_CountDepth(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_dims.z; -} - -i32 G_CountMips(G_ResourceHandle texture) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(texture); - return resource->texture_mips; -} - -//- Map - -void *G_HostPointerFromResource(G_ResourceHandle resource_handle) -{ - G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); - return resource->mapped; -} - -//////////////////////////////////////////////////////////// -//~ Command helpers - -G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl) -{ - // Grab chunk - G_D12_CmdChunk *chunk = cl->last_cmd_chunk; - { - if (chunk && chunk->cmds_count >= G_D12_CmdsPerChunk) - { - chunk = 0; - } - if (!chunk) - { - Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); - { - chunk = G_D12.first_free_cmd_chunk; - if (chunk) - { - G_D12.first_free_cmd_chunk = chunk->next; - } - } - Unlock(&lock); - if (chunk) - { - G_D12_Cmd *cmds = chunk->cmds; - ZeroStruct(chunk); - chunk->cmds = cmds; - } - } - if (!chunk) - { - Arena *perm = PermArena(); - chunk = PushStruct(perm, G_D12_CmdChunk); - chunk->cmds = PushStructsNoZero(perm, G_D12_Cmd, G_D12_CmdsPerChunk); - } - if (chunk != cl->last_cmd_chunk) - { - SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); - } - } - - // Push cmd to chunk - G_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; - ZeroStruct(cmd); - ++cl->cmds_count; - return cmd; -} - -G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v) -{ - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Constant; - cmd->constant.slot = slot; - CopyBytes(&cmd->constant.value, v, 4); - return cmd; -} - -G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) -{ - size = AlignU64(size, MaxU64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, 512)); - - G_QueueKind queue_kind = cl->queue_kind; - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - G_D12_StagingRegionNode *result = 0; - - Lock lock = LockE(&queue->staging_mutex); - { - G_D12_StagingRing *old_ring = 0; - G_D12_StagingRing *ring = queue->staging_ring; - i64 completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); - - // Find first completed region with matching size. - // For each region in ring: - // - If region size > size, split off a smaller region & use it - // - // - If region size < size, try to merge with next completed region - // - // - If no available completed region with eligible size, queue the - // current ring for deletion & create a new ring - // with larger size - - // Find region with large enough size - G_D12_StagingRegionNode *match = 0; - if (ring && ring->size >= size) - { - G_D12_StagingRegionNode *r = ring->head_region_node; - for (;;) - { - G_D12_StagingRegionNode *next = r->next; - b32 is_completed = completion >= Atomic64Fetch(&r->completion_target); - if (is_completed) - { - u64 region_size = 0; - if (next->pos > r->pos) - { - region_size = next->pos - r->pos; - } - else - { - region_size = ring->size - r->pos; - } - - if (region_size < size) - { - b32 next_is_completed = completion >= Atomic64Fetch(&next->completion_target); - if (next_is_completed) - { - if (next->pos > r->pos) - { - // Merge with next region & retry - if (next == ring->head_region_node) - { - ring->head_region_node = r; - } - r->next = next->next; - r->next->prev = r; - SllStackPush(ring->first_free_region_node, next); - } - else - { - // Wrap to beginning - r = next; - if (r == ring->head_region_node) - { - // No large-enough completed region found - break; - } - } - } - else - { - // No large-enough completed region found - break; - } - } - else - { - // Found matching region - match = r; - break; - } - } - else - { - // Continue to next region - r = next; - if (r == ring->head_region_node) - { - // No large-enough completed region found - break; - } - } - } - } - - // Create new ring if no match found - if (!match) - { - // Queue old ring for deletion - old_ring = ring; - ring = 0; - u64 new_ring_size = MaxU64(NextPow2U64(size), Mebi(8)); - if (old_ring) - { - new_ring_size = MaxU64(new_ring_size, old_ring->size * 2); - } - - // Create new ring - { - Arena *arena = AcquireArena(Gibi(1)); - ring = PushStruct(arena, G_D12_StagingRing); - ring->arena = arena; - ring->size = new_ring_size; - - G_ArenaHandle gpu_arena_handle = G_AcquireArena(); - ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); - - G_ResourceHandle resource_handle = G_PushBuffer( - gpu_arena_handle, G_D12_MakeHandle(G_CommandListHandle, cl), - u8, - new_ring_size, - .flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached - ); - ring->resource = G_D12_ResourceFromHandle(resource_handle); - ring->base = G_StructFromResource(resource_handle, u8); - } - - // Create initial region - match = PushStruct(ring->arena, G_D12_StagingRegionNode); - match->ring = ring; - match->next = match; - match->prev = match; - - // FIXME: Remove this - queue->staging_ring = ring; - } - - // Split extra region space - { - G_D12_StagingRegionNode *next = match->next; - u64 region_size = 0; - if (next->pos > match->pos) - { - region_size = next->pos - match->pos; - } - else - { - region_size = ring->size - match->pos; - } - - if (region_size > size) - { - G_D12_StagingRegionNode *new_next = ring->first_free_region_node; - if (new_next) - { - SllStackPop(ring->first_free_region_node); - ZeroStruct(new_next); - } - else - { - new_next = PushStruct(ring->arena, G_D12_StagingRegionNode); - } - new_next->next = next; - new_next->prev = match; - next->prev = new_next; - match->next = new_next; - - new_next->ring = ring; - new_next->pos = match->pos + size; - } - } - - ring->head_region_node = match->next; - - Atomic64Set(&match->completion_target, I64Max); - result = match; - - if (old_ring) - { - // FIXME: Queue old ring for deletion with command list - } - } - Unlock(&lock); - - // Add to command list - SllQueuePushN(cl->first_staging_region, cl->last_staging_region, result, next_in_command_list); - - return result; -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Command - -//- Command list - -G_CommandListHandle G_PrepareCommandList(G_QueueKind queue) -{ - G_D12_CmdList *cl = 0; - Lock lock = LockE(&G_D12.free_cmd_lists_mutex); - { - cl = G_D12.first_free_cmd_list; - if (cl) - { - G_D12.first_free_cmd_list = cl->next; - ZeroStruct(cl); - } - else - { - Arena *perm = PermArena(); - cl = PushStruct(perm, G_D12_CmdList); - } - } - Unlock(&lock); - cl->queue_kind = queue; - - return G_D12_MakeHandle(G_CommandListHandle, cl); -} - -i64 G_CommitCommandList(G_CommandListHandle cl_handle) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_QueueKind queue_kind = cl->queue_kind; - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - TempArena scratch = BeginScratchNoConflict(); - - // Begin dx12 command list - G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); - ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; - - // Pipeline state - b32 graphics_rootsig_set = 0; - b32 compute_rootsig_set = 0; - b32 descriptor_heaps_set = 0; - G_D12_Pipeline *bound_pipeline = 0; - - // Constants state - u64 slotted_constants[G_NumConstants]; - u64 bound_compute_constants[G_NumConstants]; - u64 bound_graphics_constants[G_NumConstants]; - for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots - for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } - for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } - - // Fill built-in constants - if (!G_IsRefNil(queue->print_buffer_ref)) - { - slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; - } - { - b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); - f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); - slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; - slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; - } - - // Rasterizer state - D3D12_VIEWPORT bound_viewport = Zi; - D3D12_RECT bound_scissor = Zi; - D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; - D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; - u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; - u64 bound_render_clear_target_uid = 0; - - // Flatten command chunks - u64 cmds_count = 0; - G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count); - { - // Flatten command chunks - { - for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) - { - for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) - { - cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; - } - } - } - // Free command chunks - { - Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); - { - G_D12_CmdChunk *chunk = cl->first_cmd_chunk; - while (chunk) - { - G_D12_CmdChunk *next = chunk->next; - G_D12.first_free_cmd_chunk = chunk; - chunk = next; - } - } - Unlock(&lock); - } - } - - // Batch barrier cmds - i64 max_buffer_barriers = 0; - i64 max_texture_barriers = 0; - i64 max_global_barriers = 0; - { - u64 cmd_idx = 0; - u64 batch_gen = 0; - G_D12_Cmd *prev_barrier_cmd = 0; - while (cmd_idx < cmds_count) - { - G_D12_Cmd *cmd = &cmds[cmd_idx]; - switch (cmd->kind) - { - // Batch-interrupting cmds - default: - { - cmd_idx += 1; - batch_gen += 1; - } break; - - // Non-batch-interrupting cmds - case G_D12_CmdKind_Constant: - { - cmd_idx += 1; - } break; - - case G_D12_CmdKind_Barrier: - { - // Determine 'before' state from lookup - if (prev_barrier_cmd && prev_barrier_cmd->barrier.batch_gen != batch_gen) - { - // This barrier is part of new batch - prev_barrier_cmd->barrier.is_end_of_batch = 1; - } - cmd->barrier.batch_gen = batch_gen; - prev_barrier_cmd = cmd; - - if (cmd->barrier.desc.is_global) - { - max_global_barriers += 1; - } - else - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource); - if (resource->is_texture) - { - RngI32 mips = cmd->barrier.desc.mips; - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - max_texture_barriers += mips.max - mips.min + 1; - } - else - { - max_buffer_barriers += 1; - } - } - - cmd_idx += 1; - } break; - } - } - - if (prev_barrier_cmd) - { - prev_barrier_cmd->barrier.is_end_of_batch = 1; - } - } - - // Build d3d commands - { - u64 batch_barrier_idx_start = 0; - u64 batch_barrier_idx_opl = 0; // One past last - - u64 cmd_idx = 0; - while (cmd_idx < cmds_count) - { - G_D12_Cmd *cmd = &cmds[cmd_idx]; - switch (cmd->kind) - { - default: - { - cmd_idx += 1; - } break; - - //- Constant - - case G_D12_CmdKind_Constant: - { - i32 slot = cmd->constant.slot; - u32 value = cmd->constant.value; - if (slot >= 0 && slot < countof(slotted_constants)) - { - slotted_constants[slot] = value; - } - cmd_idx += 1; - } break; - - //- Barrier - - case G_D12_CmdKind_Barrier: - { - batch_barrier_idx_opl = cmd_idx + 1; - - // Submit batched barriers - if (cmd->barrier.is_end_of_batch) - { - // Build barriers - u64 buffer_barriers_count = 0; - u64 texture_barriers_count = 0; - u64 global_barriers_count = 0; - D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers); - D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers); - D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers); - for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) - { - G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; - if (barrier_cmd->kind == G_D12_CmdKind_Barrier) - { - G_MemoryBarrierDesc desc = barrier_cmd->barrier.desc; - // Translate gpu barrier kind -> d3d barrier fields - D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev); - D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next); - D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); - D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); - D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL; - if (!desc.is_global) - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; - } - - // Build barrier - switch (barrier_type) - { - case D3D12_BARRIER_TYPE_BUFFER: - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - barrier->pResource = resource->d3d_resource; - barrier->Offset = 0; - barrier->Size = U64Max; - } break; - - case D3D12_BARRIER_TYPE_TEXTURE: - { - G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); - RngI32 mips = barrier_cmd->barrier.desc.mips; - { - mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); - mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); - } - // Create a barrier for each contiguous span of mips with matching layout - D3D12_TEXTURE_BARRIER *barrier = 0; - for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) - { - D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx]; - D3D12_BARRIER_LAYOUT layout_after = layout_before; - if (desc.layout != G_Layout_NoChange) - { - layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); - } - if (barrier == 0 || barrier->LayoutBefore != layout_before) - { - barrier = &texture_barriers[texture_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - barrier->LayoutBefore = layout_before; - barrier->LayoutAfter = layout_after; - barrier->pResource = resource->d3d_resource; - barrier->Subresources.IndexOrFirstMipLevel = mip_idx; - barrier->Subresources.NumArraySlices = 1; - barrier->Subresources.NumPlanes = 1; - } - barrier->Subresources.NumMipLevels += 1; - resource->cmdlist_texture_layouts[mip_idx] = layout_after; - } - } break; - - case D3D12_BARRIER_TYPE_GLOBAL: - { - D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; - barrier->SyncBefore = sync_before; - barrier->SyncAfter = sync_after; - barrier->AccessBefore = access_before; - barrier->AccessAfter = access_after; - } break; - } - } - } - - // Dispatch barriers - { - u32 barrier_groups_count = 0; - D3D12_BARRIER_GROUP barrier_groups[3] = Zi; - if (buffer_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_BUFFER; - group->NumBarriers = buffer_barriers_count; - group->pBufferBarriers = buffer_barriers; - } - if (texture_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_TEXTURE; - group->NumBarriers = texture_barriers_count; - group->pTextureBarriers = texture_barriers; - } - if (global_barriers_count > 0) - { - D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; - group->Type = D3D12_BARRIER_TYPE_GLOBAL; - group->NumBarriers = global_barriers_count; - group->pGlobalBarriers = global_barriers; - } - if (barrier_groups_count > 0) - { - ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); - } - } - - batch_barrier_idx_start = cmd_idx + 1; - } - - cmd_idx += 1; - } break; - - //- Copy bytes - - case G_D12_CmdKind_CopyBytes: - { - u64 src_offset = cmd->copy_bytes.src_range.min; - u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; - ID3D12GraphicsCommandList_CopyBufferRegion( - d3d_cl, - cmd->copy_bytes.dst->d3d_resource, - cmd->copy_bytes.dst_offset, - cmd->copy_bytes.src->d3d_resource, - src_offset, - copy_size - ); - cmd_idx += 1; - } break; - - //- Copy texels - - case G_D12_CmdKind_CopyTexels: - { - G_D12_Resource *dst = cmd->copy_texels.dst; - G_D12_Resource *src = cmd->copy_texels.src; - D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; - D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; - Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; - Rng3I32 src_range = cmd->copy_texels.src_texture_range; - - D3D12_BOX src_box = Zi; - D3D12_BOX *src_box_ptr = 0; - { - src_box.left = src_range.p0.x; - src_box.top = src_range.p0.y; - src_box.front = src_range.p0.z; - src_box.right = src_range.p1.x; - src_box.bottom = src_range.p1.y; - src_box.back = src_range.p1.z; - if (src->is_texture) - { - src_box_ptr = &src_box; - } - } - - if (dst->flags & G_ResourceFlag_AllowDepthStencil) - { - // Depth-stencil textures must have src box & dst offset set to 0 - // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); - } - else - { - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); - } - - cmd_idx += 1; - } break; - - //- Compute - - case G_D12_CmdKind_Compute: - { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; - { - G_D12_PipelineDesc pipeline_desc; - ZeroStruct(&pipeline_desc); - pipeline_desc.cs = cmd->compute.cs; - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - if (pipeline) - { - // Set descriptor heaps - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - // Bind rootsig - if (!compute_rootsig_set) - { - ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); - compute_rootsig_set = 1; - } - - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } - - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_compute_constants[slot] != slotted_constants[slot]) - { - ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_compute_constants[slot] = slotted_constants[slot]; - } - } - - // Dispatch - ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); - } - - cmd_idx += 1; - } break; - - //- Rasterize - - case G_D12_CmdKind_Rasterize: - { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; - { - G_D12_PipelineDesc pipeline_desc; - ZeroStruct(&pipeline_desc); - pipeline_desc.vs = cmd->rasterize.vs; - pipeline_desc.ps = cmd->rasterize.ps; - { - pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - switch (cmd->rasterize.raster_mode) - { - default: Assert(0); break; - case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - } - } - if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) - { - pipeline_desc.is_wireframe = 1; - } - for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) - { - G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; - G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - if (rt) - { - pipeline_desc.render_target_formats[i] = rt->texture_format; - pipeline_desc.render_target_blend_modes[i] = desc.blend; - } - else - { - pipeline_desc.render_target_formats[i] = G_Format_Unknown; - } - } - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - // Create ibv - u32 indices_count = 0; - D3D12_INDEX_BUFFER_VIEW ibv = Zi; - { - G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; - if (desc.count > 0) - { - G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); - ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; - ibv.SizeInBytes = desc.stride * desc.count; - if (desc.stride == 2) - { - ibv.Format = DXGI_FORMAT_R16_UINT; - indices_count = ibv.SizeInBytes / 2; - } - else if (desc.stride == 4) - { - ibv.Format = DXGI_FORMAT_R32_UINT; - indices_count = ibv.SizeInBytes / 4; - } - else - { - Assert(0); // Invalid index size - } - } - } - - // Prepare & dispatch - if (pipeline && indices_count > 0) - { - // Set descriptor heaps - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - // Bind rootsig - if (!graphics_rootsig_set) - { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); - graphics_rootsig_set = 1; - } - - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } - - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_graphics_constants[slot] != slotted_constants[slot]) - { - ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_graphics_constants[slot] = slotted_constants[slot]; - } - } - - // Set viewport - { - D3D12_VIEWPORT viewport = Zi; - { - Rng3 range = cmd->rasterize.viewport; - viewport.TopLeftX = range.p0.x; - viewport.TopLeftY = range.p0.y; - viewport.Width = range.p1.x - range.p0.x; - viewport.Height = range.p1.y - range.p0.y; - viewport.MinDepth = range.p0.z; - viewport.MaxDepth = range.p1.z; - } - if (!MatchStruct(&viewport, &bound_viewport)) - { - bound_viewport = viewport; - ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); - } - } - - // Set scissor - { - D3D12_RECT scissor = Zi; - { - Rng2 range = cmd->rasterize.scissor; - scissor.left = range.p0.x; - scissor.top = range.p0.y; - scissor.right = range.p1.x; - scissor.bottom = range.p1.y; - } - if (!MatchStruct(&scissor, &bound_scissor)) - { - bound_scissor = scissor; - ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); - } - } - - // Set topology - { - D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - switch (cmd->rasterize.raster_mode) - { - default: Assert(0); break; - case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - } - if (topology != bound_primitive_topology) - { - ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); - } - } - - // Set index buffer - if (!MatchStruct(&ibv, &bound_ibv)) - { - ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); - bound_ibv = ibv; - } - - // Bind render targets - { - b32 om_dirty = 0; - u32 rtvs_count = 0; - for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) - { - G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; - G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - if (rt) - { - Assert(AnyBit(rt->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)); - if (bound_render_target_uids[i] != rt->uid + desc.mip) - { - G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; - G_D12_InitRtv(rt, rtv_descriptor->first_handle, desc.mip); - bound_render_target_uids[i] = rt->uid + desc.mip; - om_dirty = 1; - } - ++rtvs_count; - } - else - { - break; - } - } - if (om_dirty) - { - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; - for (u32 i = 0; i < rtvs_count; ++i) - { - rtv_handles[i] = rcl->rtv_descriptors[i]->first_handle; - } - ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); - } - } - - // Dispatch - ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - } - - cmd_idx += 1; - } break; - - //- Clear rtv - - case G_D12_CmdKind_ClearRtv: - { - G_D12_Resource *rt = cmd->clear_rtv.render_target; - f32 clear_color[4] = Zi; - { - clear_color[0] = cmd->clear_rtv.color.x; - clear_color[1] = cmd->clear_rtv.color.y; - clear_color[2] = cmd->clear_rtv.color.z; - clear_color[3] = cmd->clear_rtv.color.w; - } - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->first_handle; - if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) - { - G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); - bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; - } - ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); - cmd_idx += 1; - } break; - - //- Log - - case G_D12_CmdKind_Log: - { - G_D12_Resource *resource = cmd->log.resource; - String resource_name = STRING(resource->name_len, resource->name_text); - - String layouts_str = Zi; - { - StringList layout_names = Zi; - for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) - { - String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); - String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); - PushStringToList(scratch.arena, &layout_names, layout_str); - } - layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); - } - - String msg = StringF( - scratch.arena, - "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", - FmtUint(resource->uid), - FmtString(resource_name), - FmtString(layouts_str) - ); - LogDebug(msg); - cmd_idx += 1; - } break; - - //- Discard - - case G_D12_CmdKind_Discard: - { - G_D12_Resource *resource = cmd->discard.resource; - D3D12_DISCARD_REGION region = Zi; - region.FirstSubresource = 0; - region.NumSubresources = resource->texture_mips; - ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); - cmd_idx += 1; - } break; - } - } - } - - // End dx12 command list - i64 completion_target = G_D12_CommitRawCommandList(rcl); - - // Attach completion info to staging regions - for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;) - { - G_D12_StagingRegionNode *next = n->next_in_command_list; - { - Atomic64Set(&n->completion_target, completion_target); - n->next_in_command_list = 0; - } - n = next; - } - - // Attach completion info to reset descriptors - for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) - { - G_D12_Descriptor *next = d->next; - { - G_D12_Arena *gpu_arena = d->gpu_arena; - d->completion_queue_kind = queue_kind; - d->completion_queue_target = completion_target; - G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind]; - DllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d); - ++gpu_arena_reset_descriptors_list->count; - } - d = next; - } - - // Attach completion info to releasables & submit for release - if (cl->releases.first) - { - // Attach completion info - for (G_D12_Releasable *release = cl->releases.first; release; release = release->next) - { - release->completion_queue_kind = queue_kind; - release->completion_queue_target = completion_target; - } - // Submit releass - Lock lock = LockE(&G_D12.pending_releases_mutex); - { - if (G_D12.pending_releases.last) - { - G_D12.pending_releases.last->next = cl->releases.first; - } - else - { - G_D12.pending_releases.first = cl->releases.first; - } - G_D12.pending_releases.last = cl->releases.last; - } - Unlock(&lock); - } - - - - - - // // Attach completion info to resources - // for (G_D12_Resource *r = cl->reset_resources.first; r;) - // { - // G_D12_Resource *next = r->next; - // { - // G_D12_ResourceHeap *heap = r->heap; - // G_D12_Arena *gpu_arena = >heap->gpu_arena; - // r->completion_queue_kind = queue->kind; - // r->completion_queue_target = completion_target; - // G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources; - // DllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r); - // ++heap_reset_resources_list->count; - // } - // r = next; - // } - - // Free command list - { - Lock lock = LockE(&G_D12.free_cmd_lists_mutex); - { - cl->next = G_D12.first_free_cmd_list; - G_D12.first_free_cmd_list = cl; - } - Unlock(&lock); - } - - EndScratch(scratch); - return completion_target; -} - -//- Cpu -> Gpu staged copy - -void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) -{ - if (src_copy_range.max > src_copy_range.min) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - u64 copy_size = src_copy_range.max - src_copy_range.min; - G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size); - CopyBytes((u8 *)region->ring->base + region->pos, (u8 *)src + src_copy_range.min, copy_size); - G_CopyBufferToBuffer( - cl_handle, - dst_handle, - dst_offset, - G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), - RNGU64(region->pos, region->pos + copy_size) - ); - } -} - -void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) -{ - Vec3I32 staged_dims = Zi; - { - staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x; - staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y; - staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z; - } - if (staged_dims.x > 0 && staged_dims.y > 0 && staged_dims.z > 0) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(dst->is_texture); - - // Grab footprint info - u64 staging_footprint_rows_count = 0; - u64 staging_footprint_row_size = 0; - u64 staging_footprint_row_pitch = 0; - u64 staging_footprint_size = 0; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT staging_footprint = Zi; - { - D3D12_RESOURCE_DESC src_desc = Zi; - { - ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); - src_desc.Width = staged_dims.x; - src_desc.Height = staged_dims.y; - src_desc.DepthOrArraySize = staged_dims.z; - } - ID3D12Device_GetCopyableFootprints(G_D12.device, &src_desc, 0, 1, 0, &staging_footprint, (u32 *)&staging_footprint_rows_count, &staging_footprint_row_size, &staging_footprint_size); - staging_footprint_row_pitch = staging_footprint.Footprint.RowPitch; - } - - i32 bytes_per_texel = staging_footprint_row_size / staged_dims.x; - u64 src_row_pitch = src_dims.x * bytes_per_texel; - - G_D12_StagingRegionNode *staging_region = G_D12_PushStagingRegion(cl, staging_footprint_size); - G_D12_Resource *staging_resource = staging_region->ring->resource; - G_ResourceHandle staging_resource_handle = G_D12_MakeHandle(G_ResourceHandle, staging_resource); - staging_footprint.Offset = staging_region->pos; - - // Fill staging buffer - { - u8 *src_base = (u8 *)src + (src_copy_range.p0.y * src_row_pitch) + (src_copy_range.p0.x * bytes_per_texel); - u8 *staged_base = (u8 *)staging_region->ring->base + staging_footprint.Offset; - u64 src_z_pitch = src_row_pitch * src_dims.y; - u64 staged_z_pitch = staging_footprint_row_size * staging_footprint_rows_count; - for (i32 z = 0; z < src_dims.z; ++z) - { - u64 src_z_offset = z * src_z_pitch; - u64 staged_z_offset = z * staged_z_pitch; - for (i32 y = 0; y < staging_footprint_rows_count; ++y) - { - u8 *src_row = src_base + y * src_row_pitch + src_z_offset; - u8 *staged_row = staged_base + y * staging_footprint_row_pitch + staged_z_offset; - CopyBytes(staged_row, src_row, staging_footprint_row_size); - } - } - } - - Rng3I32 dst_copy_range = Zi; - dst_copy_range.p0 = dst_offset; - dst_copy_range.p1.x = dst_copy_range.p0.x + staged_dims.x; - dst_copy_range.p1.y = dst_copy_range.p0.y + staged_dims.y; - dst_copy_range.p1.z = dst_copy_range.p0.z + staged_dims.z; - G_CopyBufferToTexture( - cl_handle, - dst_handle, dst_copy_range, - staging_resource_handle, staging_footprint.Offset - ); - } -} - -//- Gpu <-> Gpu copy - -void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range) -{ - if (src_copy_range.max > src_copy_range.min) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyBytes; - cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle); - cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle); - cmd->copy_bytes.dst_offset = dst_offset; - cmd->copy_bytes.src_range = src_copy_range; - } -} - -void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset) -{ - Vec3I32 src_dims = Zi; - { - src_dims.x = dst_copy_range.p1.x - dst_copy_range.p0.x; - src_dims.y = dst_copy_range.p1.y - dst_copy_range.p0.y; - src_dims.z = dst_copy_range.p1.z - dst_copy_range.p0.z; - } - if (src_dims.x > 0 && src_dims.y > 0 && src_dims.z > 0) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(!src->is_texture); - Assert(dst->is_texture); - - // Grab footprint info - D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = Zi; - { - D3D12_RESOURCE_DESC src_desc = Zi; - { - ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); - src_desc.Width = src_dims.x; - src_desc.Height = src_dims.y; - src_desc.DepthOrArraySize = src_dims.z; - } - ID3D12Device_GetCopyableFootprints(G_D12.device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0); - src_footprint.Offset = src_offset; - } - - D3D12_TEXTURE_COPY_LOCATION src_loc = Zi; - D3D12_TEXTURE_COPY_LOCATION dst_loc = Zi; - { - src_loc.pResource = src->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src_loc.PlacedFootprint = src_footprint; - } - { - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; - } - - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyTexels; - cmd->copy_texels.dst = dst; - cmd->copy_texels.src = src; - cmd->copy_texels.dst_loc = dst_loc; - cmd->copy_texels.src_loc = src_loc; - cmd->copy_texels.dst_texture_offset = dst_copy_range.p0; - } -} - -void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) -{ - if ( - src_copy_range.p1.x > src_copy_range.p0.x && - src_copy_range.p1.y > src_copy_range.p0.y && - src_copy_range.p1.z > src_copy_range.p0.z - ) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(src->is_texture); - Assert(dst->is_texture); - - D3D12_TEXTURE_COPY_LOCATION src_loc = Zi; - D3D12_TEXTURE_COPY_LOCATION dst_loc = Zi; - { - src_loc.pResource = dst->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_loc.SubresourceIndex = 0; - } - { - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; - } - - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyTexels; - cmd->copy_texels.dst = dst; - cmd->copy_texels.src = src; - cmd->copy_texels.dst_loc = dst_loc; - cmd->copy_texels.src_loc = src_loc; - cmd->copy_texels.dst_texture_offset = dst_offset; - cmd->copy_texels.src_texture_range = src_copy_range; - } -} - -void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) -{ - // TODO - Assert(0); -} - -//- Constant - -void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Constant; - cmd->constant.slot = slot; - CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4)); -} - -//- Memory sync - -void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Barrier; - cmd->barrier.desc = desc; -} - -//- Compute - -void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads) -{ - if (threads.x > 0 && threads.y > 0 && threads.z > 0) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Compute; - cmd->compute.cs = cs; - cmd->compute.groups = G_GroupCountFromThreadCount(cs, threads); - } -} - -//- Rasterize - -void G_Rasterize( - G_CommandListHandle cl_handle, - VertexShaderDesc vs, PixelShaderDesc ps, - u32 instances_count, G_IndexBufferDesc index_buffer, - u32 render_targets_count, G_RenderTargetDesc *render_targets, - Rng3 viewport, Rng2 scissor, - G_RasterMode raster_mode -) -{ - if (instances_count > 0 && index_buffer.count > 0) - { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Rasterize; - cmd->rasterize.vs = vs; - cmd->rasterize.ps = ps; - cmd->rasterize.instances_count = instances_count; - cmd->rasterize.index_buffer_desc = index_buffer; - for (u32 rt_idx = 0; rt_idx < MinU32(render_targets_count, G_MaxRenderTargets); ++rt_idx) - { - cmd->rasterize.render_target_descs[rt_idx] = render_targets[rt_idx]; - } - cmd->rasterize.viewport = viewport; - cmd->rasterize.scissor = scissor; - cmd->rasterize.raster_mode = raster_mode; - } -} - -//- Clear - -void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, Vec4 color, i32 mip) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_ClearRtv; - cmd->clear_rtv.render_target = G_D12_ResourceFromHandle(resource_handle); - cmd->clear_rtv.color = color; - cmd->clear_rtv.mip = mip; -} - -//- Log - -void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Log; - cmd->log.resource = G_D12_ResourceFromHandle(resource_handle); -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Queue synchronization - -i64 G_CompletionValueFromQueue(G_QueueKind queue_kind) -{ - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - return ID3D12Fence_GetCompletedValue(queue->commit_fence); -} - -i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind) -{ - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - i64 target = 0; - { - Lock lock = LockS(&queue->commit_mutex); - target = queue->commit_fence_target; - Unlock(&lock); - } - return target; -} - -G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask) -{ - G_QueueCompletions completions = Zi; - for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - { - if (queue_mask & (1 << queue_kind)) - { - completions.v[queue_kind] = G_CompletionValueFromQueue(queue_kind); - } - } - return completions; -} - -G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask) -{ - G_QueueCompletions completions = Zi; - for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - { - if (queue_mask & (1 << queue_kind)) - { - completions.v[queue_kind] = G_CompletionTargetFromQueue(queue_kind); - } - } - return completions; -} - -void G_QueueSyncEx(G_QueueBarrierDesc desc) -{ - u64 fences_count = 0; - ID3D12Fence *fences[G_QueueKind_COUNT] = Zi; - i64 fence_targets[G_QueueKind_COUNT] = Zi; - - // Grab fences - for (G_QueueKind completion_queue_kind = 0; completion_queue_kind < G_QueueKind_COUNT; ++ completion_queue_kind) - { - G_D12_Queue *completion_queue = G_D12_QueueFromKind(completion_queue_kind); - i64 target = desc.completions.v[completion_queue_kind]; - if (target > 0) - { - i64 fence_value = ID3D12Fence_GetCompletedValue(completion_queue->commit_fence); - if (fence_value < target) - { - fences[fences_count] = completion_queue->commit_fence; - fence_targets[fences_count] = target; - fences_count += 1; - } - } - } - - // Sync Queues - for (G_QueueKind waiter_queue_kind = 0; waiter_queue_kind < G_QueueKind_COUNT; ++ waiter_queue_kind) - { - if (desc.wait_queues & (1 << waiter_queue_kind)) - { - G_D12_Queue *waiter_queue = G_D12_QueueFromKind(waiter_queue_kind); - for (u64 fence_idx = 0; fence_idx < fences_count; ++fence_idx) - { - ID3D12Fence *fence = fences[fence_idx]; - if (waiter_queue->commit_fence != fence) - { - i64 target = fence_targets[fence_idx]; - ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, fence, target); - } - } - } - } - - // Sync Cpu - if (desc.wait_cpu && fences_count > 0) - { - if (G_D12_tl.sync_event == 0) - { - G_D12_tl.sync_event = CreateEvent(0, 0, 0, 0); - } - ID3D12Device1_SetEventOnMultipleFenceCompletion( - G_D12.device, - fences, - (u64 *)fence_targets, - fences_count, - D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL, - G_D12_tl.sync_event - ); - WaitForSingleObject(G_D12_tl.sync_event, INFINITE); - } -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Statistics - -G_Stats G_QueryStats(void) -{ - G_Stats result = Zi; - { - DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; - IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); - result.device_committed = info.CurrentUsage; - result.device_budget = info.Budget; - } - { - DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; - IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); - result.host_budget = info.Budget; - result.host_committed = info.CurrentUsage; - } - result.arenas_count = Atomic64Fetch(&G_D12.arenas_count); - result.cumulative_nonreuse_count = Atomic64Fetch(&G_D12.cumulative_nonreuse_count); - return result; -} - -//////////////////////////////////////////////////////////// -//~ @hookimpl Swapchain - -G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle) -{ - G_D12_Swapchain *swapchain = 0; - { - Arena *perm = PermArena(); - swapchain = PushStruct(perm, G_D12_Swapchain); - } - swapchain->window_hwnd = (HWND)os_window_handle; - return G_D12_MakeHandle(G_SwapchainHandle, swapchain); -} - -void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle) -{ - // TODO -} - -G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size) -{ - G_D12_Swapchain *swapchain = G_D12_SwapchainFromHandle(swapchain_handle); - size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); - G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); - - // Initialize swapchain - if (!swapchain->d3d_swapchain) - { - HRESULT hr = 0; - - // Create d3d swapchain - { - IDXGISwapChain3 *swapchain3 = 0; - { - // Create swapchain1 - IDXGISwapChain1 *swapchain1 = 0; - if (SUCCEEDED(hr)) - { - DXGI_SWAP_CHAIN_DESC1 desc = Zi; - desc.Format = G_D12_DxgiFormatFromGpuFormat(format); - desc.Width = size.x; - desc.Height = size.y; - desc.SampleDesc.Count = 1; - desc.SampleDesc.Quality = 0; - desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - desc.BufferCount = G_D12_SwapchainBufferCount; - desc.Scaling = DXGI_SCALING_NONE; - desc.Flags = G_D12_SwapchainFlags; - desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - hr = IDXGIFactory2_CreateSwapChainForHwnd(G_D12.dxgi_factory, (IUnknown *)direct_queue->d3d_queue, swapchain->window_hwnd, &desc, 0, 0, &swapchain1); - } - - // Upgrade to swapchain3 - if (SUCCEEDED(hr)) - { - hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain3); - IDXGISwapChain1_Release(swapchain1); - } - } - swapchain->d3d_swapchain = swapchain3; - swapchain->backbuffers_format = format; - swapchain->backbuffers_resolution = size; - } - - // Create waitable object - { - HANDLE waitable = 0; - if (SUCCEEDED(hr) && G_D12_FrameLatency > 0) - { - hr = IDXGISwapChain3_SetMaximumFrameLatency(swapchain->d3d_swapchain, G_D12_FrameLatency); - waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->d3d_swapchain); - } - swapchain->waitable = waitable; - } - - // Create present fence - { - HANDLE present_event = 0; - ID3D12Fence *present_fence = 0; - if (SUCCEEDED(hr)) - { - present_event = CreateEvent(0, 0, 0, 0); - hr = ID3D12Device_CreateFence(G_D12.device, 0, 0, &IID_ID3D12Fence, (void **)&present_fence); - } - swapchain->present_fence = present_fence; - swapchain->present_event = present_event; - } - - // Disable Alt+Enter - IDXGIFactory_MakeWindowAssociation(G_D12.dxgi_factory, swapchain->window_hwnd, DXGI_MWA_NO_ALT_ENTER); - - if (FAILED(hr)) - { - Panic(Lit("Failed to create swapchain")); - } - } - - // Resize backbuffers - if (!MatchVec2I32(swapchain->backbuffers_resolution, size) || swapchain->backbuffers_format != format) - { - HRESULT hr = 0; - - // Wait for any previous backbuffer commands to finish - { - ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); - WaitForSingleObject(swapchain->present_event, INFINITE); - } - - // Release backbuffers - for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) - { - G_D12_Resource *backbuffer = &swapchain->backbuffers[i]; - if (backbuffer->d3d_resource) - { - ID3D12Resource_Release(backbuffer->d3d_resource); - backbuffer->d3d_resource = 0; - } - } - - // Resize buffers - hr = IDXGISwapChain_ResizeBuffers(swapchain->d3d_swapchain, 0, size.x, size.y, DXGI_FORMAT_UNKNOWN, G_D12_SwapchainFlags); - if (FAILED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to resize swapchain")); - } - } - - // Initialize backbuffers - { - for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) - { - G_D12_Resource *backbuffer = &swapchain->backbuffers[i]; - if (!backbuffer->d3d_resource) - { - ID3D12Resource *d3d_resource = 0; - HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource); - if (FAILED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to retrieve swapchain buffer")); - } - ZeroStruct(backbuffer); - backbuffer->flags = G_ResourceFlag_AllowRenderTarget; - backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; - - ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc); - backbuffer->d3d_resource = d3d_resource; - - backbuffer->is_texture = 1; - backbuffer->texture_format = format; - backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); - backbuffer->texture_mips = 1; - backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT; - backbuffer->swapchain = swapchain; - } - } - swapchain->backbuffers_format = format; - swapchain->backbuffers_resolution = size; - } - - // Wait for available backbuffer - if (swapchain->waitable) - { - DWORD wait_result = WaitForSingleObject(swapchain->waitable, 500); - if (wait_result == WAIT_TIMEOUT) - { - ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); - WaitForSingleObject(swapchain->present_event, INFINITE); - } - } - - // Grab current backbuffer - G_D12_Resource *cur_backbuffer = 0; - { - u32 backbuffer_idx = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->d3d_swapchain); - cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; - } - - return G_D12_MakeHandle(G_ResourceHandle, cur_backbuffer); -} - -void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) -{ - G_D12_Resource *backbuffer = G_D12_ResourceFromHandle(backbuffer_handle); - G_D12_Swapchain *swapchain = backbuffer->swapchain; - G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); - - u32 present_flags = 0; - if (G_D12_TearingIsAllowed && vsync == 0) - { - present_flags |= DXGI_PRESENT_ALLOW_TEARING; - } - - // Present - { - HRESULT hr = IDXGISwapChain3_Present(swapchain->d3d_swapchain, vsync, present_flags); - if (!SUCCEEDED(hr)) - { - Assert(0); - } - } - - if (vsync != 0 && !(present_flags & DXGI_PRESENT_ALLOW_TEARING)) - { - // FIXME: Flush in windowed mode? - // DwmFlush(); - } - - // Increment swapchain fence - { - u64 target = ++swapchain->present_fence_target; - ID3D12CommandQueue_Signal(direct_queue->d3d_queue, swapchain->present_fence, target); - } -} - -//////////////////////////////////////////////////////////// -//~ Collection worker - -// TODO: Move this to common - -void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) -{ - for (;;) - { - // FIXME: Remove this - SleepSeconds(0.100); - - // Copy print-buffers to readback - for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - { - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - if (!G_IsResourceNil(queue->print_buffer)) - { - G_CommandListHandle cl = G_PrepareCommandList(queue_kind); - { - // Copy print buffer to readback buffer - G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); - // Reset counters to 0 - G_MemorySync( - cl, queue->print_buffer, - G_Stage_Copy, G_Access_CopyRead, - G_Stage_Copy, G_Access_CopyWrite - ); - u8 zero[12] = Zi; - G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero))); - } - G_CommitCommandList(cl); - } - } - - // TODO: Collect asynchronously - G_QueueSyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); - - for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - { - G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - if (!G_IsResourceNil(queue->print_buffer)) - { - u32 attempted_print_bytes_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 0); // The number of bytes shaders attempted to write - u32 prints_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 1); // The number of shader prints that are in the buffer - u32 overflows_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 2); // The number of shader prints that could not fit in the buffer - u8 *start = G_StructFromResource(queue->print_readback_buffer, u8) + 12; - - // Deserialize - if (GPU_SHADER_PRINT_LOG) - { - if (prints_count > 0) - { - LogDebugF( - "Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F", - FmtUint(prints_count), - FmtUint(prints_count + overflows_count), - FmtUint(attempted_print_bytes_count) - ); - } - - // FIXME: Remove this - TempArena scratch = BeginScratchNoConflict(); - u8 *at = start; - { - for (u32 print_idx = 0; print_idx < prints_count; ++print_idx) - { - u32 chars_count = 0; - u32 args_count = 0; - b32 internal_overflow = 0; - { - u32 header = *(u32 *)at; - chars_count = (header & 0x0000FFFF) >> 0; - args_count = (header & 0x7FFF0000) >> 16; - internal_overflow = (header & 0xF0000000) >> 31; - at += 4; - } - - String fmt = Zi; - { - fmt.len = chars_count; - fmt.text = at; - at += chars_count; - } - - FmtArgArray args = Zi; - args.count = args_count; - { - if (args_count > 0) - { - args.args = PushStructs(scratch.arena, FmtArg, args_count); - for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx) - { - G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at); - at += 1; - - FmtArg *dst = &args.args[arg_idx]; - switch (gpu_kind) - { - // Translate unsigned integer args - case G_FmtArgKind_Uint: - { - u32 gpu_value = *(u32 *)at; - *dst = FmtUint(gpu_value); - at += 4; - } break; - case G_FmtArgKind_Uint2: - { - Vec2U32 gpu_value = *(Vec2U32 *)at; - *dst = FmtUint2(gpu_value); - at += 8; - } break; - case G_FmtArgKind_Uint3: - { - Vec3U32 gpu_value = *(Vec3U32 *)at; - *dst = FmtUint3(gpu_value); - at += 12; - } break; - case G_FmtArgKind_Uint4: - { - Vec4U32 gpu_value = *(Vec4U32 *)at; - *dst = FmtUint4(gpu_value); - at += 16; - } break; - - // Translate signed integer args - case G_FmtArgKind_Sint: - { - i32 gpu_value = *(i32 *)at; - *dst = FmtSint(gpu_value); - at += 4; - } break; - case G_FmtArgKind_Sint2: - { - Vec2I32 gpu_value = *(Vec2I32 *)at; - *dst = FmtSint2(gpu_value); - at += 8; - } break; - case G_FmtArgKind_Sint3: - { - Vec3I32 gpu_value = *(Vec3I32 *)at; - *dst = FmtSint3(gpu_value); - at += 12; - } break; - case G_FmtArgKind_Sint4: - { - Vec4I32 gpu_value = *(Vec4I32 *)at; - *dst = FmtSint4(gpu_value); - at += 16; - } break; - - // Translate float args - case G_FmtArgKind_Float: - { - f32 gpu_value = *(f32 *)at; - *dst = FmtFloat(gpu_value); - at += 4; - } break; - case G_FmtArgKind_Float2: - { - Vec2 gpu_value = *(Vec2 *)at; - *dst = FmtFloat2(gpu_value); - at += 8; - } break; - case G_FmtArgKind_Float3: - { - Vec3 gpu_value = *(Vec3 *)at; - *dst = FmtFloat3(gpu_value); - at += 12; - } break; - case G_FmtArgKind_Float4: - { - Vec4 gpu_value = *(Vec4 *)at; - *dst = FmtFloat4(gpu_value); - at += 16; - } break; - } - dst->p = 16; - } - } - } - - String final_str = Zi; - if (internal_overflow) - { - final_str = Lit("[Shader PrintF is too large]"); - } - else - { - final_str = FormatString(scratch.arena, fmt, args); - } - LogDebug(final_str); - - at = (u8 *)AlignU64((u64)at, 4); - } - } - EndScratch(scratch); - } - } - } - } -} - -//////////////////////////////////////////////////////////// -//~ Async - -void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) -{ - G_D12_AsyncCtx *async = &G_D12.async_ctx; - Arena *frame_arena = base_async_lane_frame->arena; - - // TODO: Investigate if we gain anything by going wide here (resource release might be exclusive driver-side) - if (lane->idx == 0) - { - // Pop pending releases - { - Lock lock = LockE(&G_D12.pending_releases_mutex); - { - if (G_D12.pending_releases.first) - { - if (async->pending_releases.last) - { - async->pending_releases.last->next = G_D12.pending_releases.first; - } - else - { - async->pending_releases.first = G_D12.pending_releases.first; - } - async->pending_releases.last = G_D12.pending_releases.last; - G_D12.pending_releases.first = 0; - G_D12.pending_releases.last = 0; - } - } - Unlock(&lock); - } - - // Release resources until we reach an uncompleted one - G_D12_Releasable *release = async->pending_releases.first; - if (release) - { - G_QueueCompletions completions = G_CompletionValuesFromQueues(G_QueueMask_All); - while (release) - { - G_D12_Releasable *next = release->next; - if (completions.v[release->completion_queue_kind] >= release->completion_queue_target) - { - SllQueuePop(async->pending_releases.first, async->pending_releases.last); - if (release->d3d_resource) - { - ID3D12Resource_Release(release->d3d_resource); - } - SllQueuePush(async->free_releases.first, async->free_releases.last, release); - } - else - { - break; - } - release = next; - } - } - - // Push releasable nodes to free list - if (async->pending_releases.first) - { - Lock lock = LockE(&G_D12.free_releases_mutex); - { - if (G_D12.free_releases.last) - { - G_D12.free_releases.last->next = async->free_releases.first; - } - else - { - G_D12.free_releases.first = async->free_releases.first; - } - G_D12.free_releases.last = async->free_releases.last; - async->free_releases.first = 0; - async->free_releases.last = 0; - } - Unlock(&lock); - } - } -} diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_core.h b/src/gpu_old/gpu_dx12/gpu_dx12_core.h deleted file mode 100644 index b2af8fb9..00000000 --- a/src/gpu_old/gpu_dx12/gpu_dx12_core.h +++ /dev/null @@ -1,577 +0,0 @@ -//////////////////////////////////////////////////////////// -//~ DirectX12 libs - -#pragma warning(push, 0) - #include - #include - #include -#pragma warning(pop) - -#pragma comment(lib, "d3d12") -#pragma comment(lib, "dxgi") - -//////////////////////////////////////////////////////////// -//~ Tweakable definitions - -#define G_D12_TearingIsAllowed 1 -#define G_D12_FrameLatency 1 -#define G_D12_SwapchainBufferCount 2 -#define G_D12_SwapchainFlags ( \ - ((G_D12_TearingIsAllowed != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | \ - ((G_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) \ - ) - -#define G_D12_MaxCbvSrvUavDescriptors (1024 * 128) -#define G_D12_MaxSamplerDescriptors (1024 * 1) -#define G_D12_MaxRtvDescriptors (1024 * 64) - -#define G_D12_MaxMips 16 -#define G_D12_MaxNameLen 64 - -//////////////////////////////////////////////////////////// -//~ Pipeline types - -// NOTE: Must be zero initialized (including padding bits) for hashing -Struct(G_D12_PipelineDesc) -{ - VertexShaderDesc vs; - PixelShaderDesc ps; - ComputeShaderDesc cs; - b32 is_wireframe; - D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; - G_Format render_target_formats[G_MaxRenderTargets]; - G_BlendMode render_target_blend_modes[G_MaxRenderTargets]; -}; - -Struct(G_D12_Pipeline) -{ - G_D12_Pipeline *next_in_bin; - u64 hash; - - G_D12_PipelineDesc desc; - ID3D12PipelineState *pso; - - b32 ok; - String error; -}; - -Struct(G_D12_PipelineBin) -{ - Mutex mutex; - G_D12_Pipeline *first; -}; - -//////////////////////////////////////////////////////////// -//~ Resource types - -Struct(G_D12_Resource) -{ - G_D12_Resource *next; - G_D12_Resource *prev; - - G_ResourceFlag flags; - u64 uid; - - // D3D12 resource - D3D12_RESOURCE_DESC1 d3d_desc; - ID3D12Resource *d3d_resource; - D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; - void *mapped; - - // Buffer info - u64 buffer_size; - u64 buffer_size_actual; - - // Texture info - b32 is_texture; - G_Format texture_format; - Vec3I32 texture_dims; - i32 texture_mips; - D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips]; - - // Sampler info - G_SamplerDesc sampler_desc; - - // Backbuffer info - struct G_D12_Swapchain *swapchain; - - u64 name_len; - u8 name_text[G_D12_MaxNameLen]; -}; - -Struct(G_D12_ResourceList) -{ - u64 count; - G_D12_Resource *first; - G_D12_Resource *last; -}; - -//////////////////////////////////////////////////////////// -//~ Descriptor types - -Enum(G_D12_DescriptorHeapKind) -{ - G_D12_DescriptorHeapKind_CbvSrvUav, - G_D12_DescriptorHeapKind_Rtv, - G_D12_DescriptorHeapKind_Sampler, - - G_D12_DescriptorHeapKind_COUNT -}; - -Struct(G_D12_DescriptorHeap) -{ - Arena *descriptors_arena; - G_D12_DescriptorHeapKind kind; - - D3D12_DESCRIPTOR_HEAP_TYPE type; - u32 per_batch_count; - u32 descriptor_size; - ID3D12DescriptorHeap *d3d_heap; - D3D12_CPU_DESCRIPTOR_HANDLE start_handle; - - Mutex mutex; - struct G_D12_Descriptor *first_free; - u32 max_count; -}; - -Struct(G_D12_Descriptor) -{ - G_D12_Descriptor *next; - G_D12_Descriptor *prev; - - struct G_D12_Arena *gpu_arena; - G_QueueKind completion_queue_kind; - i64 completion_queue_target; - - G_D12_DescriptorHeap *heap; - D3D12_CPU_DESCRIPTOR_HANDLE first_handle; - u32 index; -}; - -Struct(G_D12_DescriptorList) -{ - u64 count; - G_D12_Descriptor *first; - G_D12_Descriptor *last; -}; - -//////////////////////////////////////////////////////////// -//~ Arena types - -// TODO: -// To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for: -// - Buffers -// - Non-render target & non-depth stencil textures -// - Render target or depth stencil textures -Enum(G_D12_ResourceHeapKind) -{ - G_D12_ResourceHeapKind_Gpu, - G_D12_ResourceHeapKind_Cpu, - G_D12_ResourceHeapKind_CpuWriteCombined, - - G_D12_ResourceHeapKind_COUNT -}; - -Struct(G_D12_Arena) -{ - Arena *arena; - - G_D12_DescriptorList descriptors; - G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT]; - - G_D12_ResourceList resources; - G_D12_ResourceList reset_resources; - // G_D12_ResourceList free_resources; -}; - -//////////////////////////////////////////////////////////// -//~ Staging types - -Struct(G_D12_StagingRing) -{ - Arena *arena; - G_D12_Arena *gpu_arena; - u64 size; - - G_D12_Resource *resource; - u8 *base; - - struct G_D12_StagingRegionNode *head_region_node; - struct G_D12_StagingRegionNode *first_free_region_node; - -}; - -Struct(G_D12_StagingRegionNode) -{ - G_D12_StagingRing *ring; - - // Ring links (requires ring lock to read) - G_D12_StagingRegionNode *prev; - G_D12_StagingRegionNode *next; - - // Command list links - G_D12_StagingRegionNode *next_in_command_list; - - // Region info - Atomic64 completion_target; - u64 pos; -}; - -//////////////////////////////////////////////////////////// -//~ Command queue types - -Struct(G_D12_CommandQueueDesc) -{ - D3D12_COMMAND_LIST_TYPE type; - D3D12_COMMAND_QUEUE_PRIORITY priority; - String name; -}; - -Struct(G_D12_Queue) -{ - ID3D12CommandQueue *d3d_queue; - G_D12_CommandQueueDesc desc; - - Mutex commit_mutex; - ID3D12Fence *commit_fence; - u64 commit_fence_target; - - // Global resources - u64 print_buffer_size; - G_ResourceHandle print_buffer; - G_ResourceHandle print_readback_buffer; - G_ByteAddressBufferRef print_buffer_ref; - - // Raw command lists - struct G_D12_RawCommandList *first_committed_cl; - struct G_D12_RawCommandList *last_committed_cl; - - // Staging heap - Mutex staging_mutex; - G_D12_StagingRing *staging_ring; - - Fence sync_fence; -}; - -//////////////////////////////////////////////////////////// -//~ Raw command list types - -Struct(G_D12_RawCommandList) -{ - G_D12_Queue *queue; - G_D12_RawCommandList *next; - - u64 commit_fence_target; - - ID3D12CommandAllocator *d3d_ca; - ID3D12GraphicsCommandList7 *d3d_cl; - - // Direct queue command lists keep a constant list of CPU-only descriptors - G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets]; - G_D12_Descriptor *rtv_clear_descriptor; -}; - -//////////////////////////////////////////////////////////// -//~ Releasable types - -Struct(G_D12_Releasable) -{ - G_D12_Releasable *next; - - G_QueueKind completion_queue_kind; - i64 completion_queue_target; - - ID3D12Resource *d3d_resource; - - u64 name_len; - u8 name_text[G_D12_MaxNameLen]; -}; - -Struct(G_D12_ReleasableList) -{ - G_D12_Releasable *first; - G_D12_Releasable *last; -}; - -//////////////////////////////////////////////////////////// -//~ Command list types - -#define G_D12_CmdsPerChunk 256 - -Enum(G_D12_CmdKind) -{ - G_D12_CmdKind_None, - G_D12_CmdKind_Barrier, - G_D12_CmdKind_Constant, - G_D12_CmdKind_CopyBytes, - G_D12_CmdKind_CopyTexels, - G_D12_CmdKind_Compute, - G_D12_CmdKind_Rasterize, - G_D12_CmdKind_ClearRtv, - G_D12_CmdKind_Log, - G_D12_CmdKind_Discard, -}; - -Struct(G_D12_Cmd) -{ - G_D12_CmdKind kind; - b32 skip; - union - { - struct - { - i32 slot; - u32 value; - } constant; - - struct - { - G_MemoryBarrierDesc desc; - - // Post-batch data - b32 is_end_of_batch; - u64 batch_gen; - } barrier; - - struct - { - G_D12_Resource *dst; - G_D12_Resource *src; - u64 dst_offset; - RngU64 src_range; - } copy_bytes; - - struct - { - G_D12_Resource *dst; - G_D12_Resource *src; - D3D12_TEXTURE_COPY_LOCATION dst_loc; - D3D12_TEXTURE_COPY_LOCATION src_loc; - Vec3I32 dst_texture_offset; - Rng3I32 src_texture_range; - } copy_texels; - - struct - { - ComputeShaderDesc cs; - Vec3I32 groups; - } compute; - - struct - { - VertexShaderDesc vs; - PixelShaderDesc ps; - u32 instances_count; - G_IndexBufferDesc index_buffer_desc; - G_RenderTargetDesc render_target_descs[G_MaxRenderTargets]; - Rng3 viewport; - Rng2 scissor; - G_RasterMode raster_mode; - } rasterize; - - struct - { - G_D12_Resource *render_target; - Vec4 color; - i32 mip; - } clear_rtv; - - struct - { - G_D12_Resource *resource; - } log; - - struct - { - G_D12_Resource *resource; - } discard; - }; -}; - -Struct(G_D12_CmdChunk) -{ - G_D12_CmdChunk *next; - struct G_D12_CmdList *cl; - G_D12_Cmd *cmds; - u64 cmds_count; -}; - -Struct(G_D12_CmdList) -{ - G_D12_CmdList *next; - G_QueueKind queue_kind; - - G_D12_DescriptorList reset_descriptors; - G_D12_ReleasableList releases; - - G_D12_StagingRegionNode *first_staging_region; - G_D12_StagingRegionNode *last_staging_region; - - G_D12_CmdChunk *first_cmd_chunk; - G_D12_CmdChunk *last_cmd_chunk; - u64 chunks_count; - u64 cmds_count; -}; - -//////////////////////////////////////////////////////////// -//~ Swapchain types - -Struct(G_D12_Swapchain) -{ - - IDXGISwapChain3 *d3d_swapchain; - HWND window_hwnd; - HANDLE waitable; - - HANDLE present_event; - ID3D12Fence *present_fence; - u64 present_fence_target; - - G_Format backbuffers_format; - Vec2I32 backbuffers_resolution; - G_D12_Resource backbuffers[G_D12_SwapchainBufferCount]; -}; - -//////////////////////////////////////////////////////////// -//~ State types - -Struct(G_D12_AsyncCtx) -{ - G_D12_ReleasableList pending_releases; - G_D12_ReleasableList free_releases; -}; - -Struct(G_D12_Ctx) -{ - IsolatedAtomic64 resource_creation_gen; - b32 independent_devices_enabled; - b32 debug_layer_enabled; - b32 validation_layer_enabled; - - // Stats - Atomic64 arenas_count; - Atomic64 cumulative_nonreuse_count; - - Atomic64 driver_resources_allocated; - Atomic64 driver_descriptors_allocated; - - // Queues - G_D12_Queue queues[G_QueueKind_COUNT]; - - // Descriptor heaps - G_D12_DescriptorHeap descriptor_heaps[G_D12_DescriptorHeapKind_COUNT]; - - // Rootsig - ID3D12RootSignature *bindless_rootsig; - - // Pipelines - G_D12_PipelineBin pipeline_bins[1024]; - - // Command lists - Mutex free_cmd_lists_mutex; - G_D12_CmdList *first_free_cmd_list; - - // Command chunks - Mutex free_cmd_chunks_mutex; - G_D12_CmdChunk *first_free_cmd_chunk; - - // Swapchains - Mutex free_swapchains_mutex; - G_D12_Swapchain *first_free_swapchain; - - // Independent device (only valid when independent_devices_enabled = 1) - struct - { - ID3D12SDKConfiguration1 *sdk_config; - ID3D12DeviceConfiguration *device_config; - ID3D12DeviceFactory *device_factory; - } independent; - - // Device - IDXGIFactory6 *dxgi_factory; - IDXGIAdapter3 *dxgi_adapter; - ID3D12Device10 *device; - - // Release-queue - Mutex pending_releases_mutex; - Mutex free_releases_mutex; - G_D12_ReleasableList pending_releases; - G_D12_ReleasableList free_releases; - - // Async - G_D12_AsyncCtx async_ctx; -}; - -Struct(G_D12_ThreadLocalCtx) -{ - HANDLE sync_event; -}; - -extern G_D12_Ctx G_D12; -extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl; - -//////////////////////////////////////////////////////////// -//~ Helpers - -#define G_D12_MakeHandle(type, ptr) (type) { .v = (u64)(ptr) } - -G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle); -G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle); -G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle); -G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); - -DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); -D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages); -D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses); -D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout); -String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout); - -void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip); - -void G_D12_SetObjectName(ID3D12Object *object, String name); -String G_D12_NameFromObject(Arena *arena, ID3D12Object *object); - -//////////////////////////////////////////////////////////// -//~ Pipeline - -G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc); -u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc); - -//////////////////////////////////////////////////////////// -//~ Queue - -G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind); - -//////////////////////////////////////////////////////////// -//~ Raw command list - -G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind); -i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl); - -//////////////////////////////////////////////////////////// -//~ Arena - -void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena); - -//////////////////////////////////////////////////////////// -//~ Descriptor - -G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index); -G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind); - -//////////////////////////////////////////////////////////// -//~ Command helpers - -G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl); -G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v); -G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); - -//////////////////////////////////////////////////////////// -//~ Collection worker - -void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane); - -//////////////////////////////////////////////////////////// -//~ Async - -void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame); diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat deleted file mode 100644 index 9cfa41c0..00000000 --- a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:55430c370d4f012ef7b2e7854fd194ed8abb2c94a537835be12bd38f9ff80e67 -size 1662796 diff --git a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat deleted file mode 100644 index 222b2b8b..00000000 --- a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dd64750d758873691414f705c2fdff08ffd25437f77198d3ee00c9040f48856a -size 1775662 diff --git a/src/gpu_old/gpu_res/noise_128x128x64_16.dat b/src/gpu_old/gpu_res/noise_128x128x64_16.dat deleted file mode 100644 index bfbdffc9..00000000 --- a/src/gpu_old/gpu_res/noise_128x128x64_16.dat +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35c141664e6879a3a336816112a8fbabe72067d5dcdd57c130d836de6dda5b2e -size 2097152 diff --git a/src/gpu_old/gpu_shared.cgh b/src/gpu_old/gpu_shared.cgh deleted file mode 100644 index c88f6d80..00000000 --- a/src/gpu_old/gpu_shared.cgh +++ /dev/null @@ -1,333 +0,0 @@ -//////////////////////////////////////////////////////////// -//~ Ref types - -Enum(G_RefKind) -{ - G_RefKind_StructuredBuffer, - G_RefKind_ByteAddressBuffer, - G_RefKind_Texture1D, - G_RefKind_Texture2D, - G_RefKind_Texture3D, - G_RefKind_SamplerState, -}; - -Struct(G_StructuredBufferRef) { u32 v; }; -Struct(G_ByteAddressBufferRef) { u32 v; }; -Struct(G_Texture1DRef) { u32 v; }; -Struct(G_Texture2DRef) { u32 v; }; -Struct(G_Texture3DRef) { u32 v; }; -Struct(G_SamplerStateRef) { u32 v; }; - -#define G_IsRefNil(r) ((r).v == 0) - -//////////////////////////////////////////////////////////// -//~ Constant types - -// -// D3D12 exposes 64 root constants and Vulkan exposes 32 push constants. -// Supposedly AMD hardware will start spilling constants once more than -// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/ -// -#define G_NumGeneralPurposeConstants (24) // Constants available for any usage -#define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer -#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants) - -#if IsCpu - #define G_ForceDeclConstant(type, name, slot) \ - enum { name = slot }; \ - Struct(name##__shaderconstanttype) { type v; } - #define G_DeclConstant(type, name, slot) \ - StaticAssert(sizeof(type) <= 4); \ - StaticAssert(slot < G_NumGeneralPurposeConstants); \ - G_ForceDeclConstant(type, name, slot) -#else - #define G_ForceDeclConstant(type, name, slot) cbuffer name : register(b##slot) { type name; } - #define G_DeclConstant(type, name, slot) G_ForceDeclConstant(type, name, slot) -#endif - -//////////////////////////////////////////////////////////// -//~ Reserved constants - -// The constants declared below assume this configuration is accurate for slot usage -StaticAssert(G_NumGeneralPurposeConstants == 24); -StaticAssert(G_NumReservedConstants >= 3); - -G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24); -G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25); -G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26); - -#if IsGpu - #define G_TweakBool G_ShaderConst_TweakB32 - #define G_TweakFloat G_ShaderConst_TweakF32 -#endif - -//////////////////////////////////////////////////////////// -//~ Basic samplers - -Enum(G_BasicSamplerKind) -{ - G_BasicSamplerKind_PointClamp, - G_BasicSamplerKind_PointWrap, - G_BasicSamplerKind_PointMirror, - G_BasicSamplerKind_BilinearClamp, - G_BasicSamplerKind_BilinearWrap, - G_BasicSamplerKind_BilinearMirror, - G_BasicSamplerKind_TrilinearClamp, - G_BasicSamplerKind_TrilinearWrap, - G_BasicSamplerKind_TrilinearMirror, - - G_BasicSamplerKind_COUNT -}; - -//////////////////////////////////////////////////////////// -//~ Resource dereference - -#if IsGpu - // NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware - - //- Scalar/Uniform dereference - SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; } - template StructuredBuffer G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; } - ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; } - template Texture1D G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; } - template Texture2D G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; } - template Texture3D G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; } - template RWStructuredBuffer G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } - RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } - template RWTexture1D G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; } - template RWTexture2D G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; } - template RWTexture3D G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; } - - //- Vector/Non-Uniform dereference - SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template StructuredBuffer G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template Texture1D G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template Texture2D G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template Texture3D G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } - template RWStructuredBuffer G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - template RWTexture1D G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - template RWTexture2D G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } - template RWTexture3D G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } -#endif - -//////////////////////////////////////////////////////////// -//~ Resource countof - -#define G_MaxMips 16 -#define G_MaxRenderTargets 8 - -#if IsGpu - template u32 countof(StructuredBuffer obj) { u32 result; obj.GetDimensions(result); return result; } - template u32 countof(RWStructuredBuffer obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; } - u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } - u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; } - template u32 countof(Texture1D obj) { u32 result; obj.GetDimensions(result); return result; } - template u32 countof(RWTexture1D obj) { u32 result; obj.GetDimensions(result); return result; } - template Vec2U32 countof(Texture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } - template Vec2U32 countof(RWTexture2D obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; } - template Vec3U32 countof(Texture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } - template Vec3U32 countof(RWTexture3D obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; } -#endif - -//////////////////////////////////////////////////////////// -//~ Debug printf - -// This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/ - -Enum(G_FmtArgKind) -{ - G_FmtArgKind_None, - G_FmtArgKind_End, - - G_FmtArgKind_BEGINSIZE1, - - G_FmtArgKind_Uint, - G_FmtArgKind_Sint, - G_FmtArgKind_Float, - - G_FmtArgKind_BEGINSIZE2, - - G_FmtArgKind_Uint2, - G_FmtArgKind_Sint2, - G_FmtArgKind_Float2, - - G_FmtArgKind_BEGINSIZE3, - - G_FmtArgKind_Uint3, - G_FmtArgKind_Sint3, - G_FmtArgKind_Float3, - - G_FmtArgKind_BEGINSIZE4, - - G_FmtArgKind_Uint4, - G_FmtArgKind_Sint4, - G_FmtArgKind_Float4, -}; - -Struct(G_FmtArg) -{ - G_FmtArgKind kind; - Vec4U32 v; -}; - -#if IsGpu && GPU_SHADER_PRINT - G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint; result.v.x = v; return result; } - G_FmtArg G_Fmt(Vec2U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint2; result.v.xy = v.xy; return result; } - G_FmtArg G_Fmt(Vec3U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint3; result.v.xyz = v.xyz; return result; } - G_FmtArg G_Fmt(Vec4U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint4; result.v.xyzw = v.xyzw; return result; } - - G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint; result.v.x = v; return result; } - G_FmtArg G_Fmt(Vec2I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint2; result.v.xy = v.xy; return result; } - G_FmtArg G_Fmt(Vec3I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint3; result.v.xyz = v.xyz; return result; } - G_FmtArg G_Fmt(Vec4I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint4; result.v.xyzw = v.xyzw; return result; } - - G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float; result.v.x = asuint(v); return result; } - G_FmtArg G_Fmt(Vec2 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float2; result.v.xy = asuint(v.xy); return result; } - G_FmtArg G_Fmt(Vec3 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float3; result.v.xyz = asuint(v.xyz); return result; } - G_FmtArg G_Fmt(Vec4 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float4; result.v.xyzw = asuint(v.xyzw); return result; } - - G_FmtArg G_FmtEnd(void) { G_FmtArg result; result.kind = G_FmtArgKind_End; return result; } - - Struct(G_TempPrintBuffer) - { - // NOTE: The larger the array size, the longer the compilation time - u32 byte_chunks[64]; - u32 bytes_count; - u32 chars_count; - u32 args_count; - b32 overflowed; - }; - - void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v) - { - u32 chunk_idx = buff.bytes_count / 4; - if (chunk_idx < countof(buff.byte_chunks)) - { - u32 byte_idx_in_chunk = buff.bytes_count & 0x03; - if (byte_idx_in_chunk == 0) - { - // Since buff is not zero initialized, we set the chunk on first write here - buff.byte_chunks[chunk_idx] = v & 0xFF; - } - else - { - buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8); - } - buff.bytes_count += 1; - } - else - { - buff.overflowed = 1; - } - } - - void G_CommitPrint(G_TempPrintBuffer buff) - { - RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef); - - if (buff.overflowed) - { - buff.bytes_count = 0; - buff.chars_count = 0; - buff.args_count = 0; - } - - u32 chunks_count = (buff.bytes_count + 3) / 4; - u32 alloc_size = 0; - alloc_size += 4; // Header - alloc_size += chunks_count * 4; // Chunks - - // Atomic fetch + add to base counter - u32 base; - rw.InterlockedAdd(0, alloc_size, base); - base += 4; // Offset for allocation counter - base += 4; // Offset for success counter - base += 4; // Offset for overflow counter - - if ((base + alloc_size) < countof(rw)) - { - // Increment success counter - rw.InterlockedAdd(4, 1); - u32 pos = 0; - - // Write header - { - u32 header = 0; - header |= (buff.chars_count << 0) & 0x0000FFFF; - header |= (buff.args_count << 16) & 0x7FFF0000; - header |= (buff.overflowed << 31) & 0xF0000000; - rw.Store(base + pos, header); - pos += 4; - } - - // Write chunks - for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx) - { - u32 chunk = buff.byte_chunks[chunk_idx]; - rw.Store(base + pos, chunk); - pos += 4; - } - } - else - { - // Increment overflow counter - rw.InterlockedAdd(8, 1); - } - } - - #define G_PrintF_(fmt, ...) do { \ - G_TempPrintBuffer __tmp; \ - __tmp.bytes_count = 0; \ - __tmp.overflowed = 0; \ - u32 __char_idx = 0; \ - while (U32FromChar(fmt[__char_idx]) != 0) \ - { \ - G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx])); \ - ++__char_idx; \ - } \ - G_FmtArg __args[] = { __VA_ARGS__ }; \ - __tmp.chars_count = __char_idx; \ - __tmp.args_count = (countof(__args) - 1); \ - for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx) \ - { \ - G_FmtArg __arg = __args[__arg_idx]; \ - G_PushPrintByte(__tmp, __arg.kind); \ - if (__arg.kind > G_FmtArgKind_BEGINSIZE1) \ - { \ - G_PushPrintByte(__tmp, __arg.v.x >> 0); \ - G_PushPrintByte(__tmp, __arg.v.x >> 8); \ - G_PushPrintByte(__tmp, __arg.v.x >> 16); \ - G_PushPrintByte(__tmp, __arg.v.x >> 24); \ - } \ - if (__arg.kind > G_FmtArgKind_BEGINSIZE2) \ - { \ - G_PushPrintByte(__tmp, __arg.v.y >> 0); \ - G_PushPrintByte(__tmp, __arg.v.y >> 8); \ - G_PushPrintByte(__tmp, __arg.v.y >> 16); \ - G_PushPrintByte(__tmp, __arg.v.y >> 24); \ - } \ - if (__arg.kind > G_FmtArgKind_BEGINSIZE3) \ - { \ - G_PushPrintByte(__tmp, __arg.v.z >> 0); \ - G_PushPrintByte(__tmp, __arg.v.z >> 8); \ - G_PushPrintByte(__tmp, __arg.v.z >> 16); \ - G_PushPrintByte(__tmp, __arg.v.z >> 24); \ - } \ - if (__arg.kind > G_FmtArgKind_BEGINSIZE4) \ - { \ - G_PushPrintByte(__tmp, __arg.v.w >> 0); \ - G_PushPrintByte(__tmp, __arg.v.w >> 8); \ - G_PushPrintByte(__tmp, __arg.v.w >> 16); \ - G_PushPrintByte(__tmp, __arg.v.w >> 24); \ - } \ - } \ - G_CommitPrint(__tmp); \ - } while (0) - - #define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd()) - -#else - #define G_PrintF(fmt) -#endif