700 lines
32 KiB
C
700 lines
32 KiB
C
////////////////////////////////////////////////////////////
|
|
//~ Handle types
|
|
|
|
Struct(GPU_ArenaHandle) { u64 v; };
|
|
Struct(GPU_CommandListHandle) { u64 v; };
|
|
Struct(GPU_ResourceHandle) { u64 v; };
|
|
Struct(GPU_SwapchainHandle) { u64 v; };
|
|
|
|
#define GPU_IsArenaNil(h) ((h).v == 0)
|
|
#define GPU_IsCommandListNil(h) ((h).v == 0)
|
|
#define GPU_IsResourceNil(h) ((h).v == 0)
|
|
#define GPU_IsSwapchainNil(h) ((h).v == 0)
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Queue types
|
|
|
|
#define GPU_IsMultiQueueEnabled 1
|
|
|
|
Enum(GPU_QueueKind)
|
|
{
|
|
#if GPU_IsMultiQueueEnabled
|
|
GPU_QueueKind_Direct = 0,
|
|
GPU_QueueKind_AsyncCompute = 1,
|
|
GPU_QueueKind_AsyncCopy = 2,
|
|
GPU_NumQueues = 3
|
|
#else
|
|
GPU_QueueKind_Direct = 0,
|
|
GPU_QueueKind_AsyncCompute = 0,
|
|
GPU_QueueKind_AsyncCopy = 0,
|
|
GPU_NumQueues = 1
|
|
#endif
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Format types
|
|
|
|
/* NOTE: Matches DirectX DXGI_FORMAT */
|
|
Enum(GPU_Format)
|
|
{
|
|
GPU_Format_Unknown = 0,
|
|
GPU_Format_R32G32B32A32_Typeless = 1,
|
|
GPU_Format_R32G32B32A32_Float = 2,
|
|
GPU_Format_R32G32B32A32_Uint = 3,
|
|
GPU_Format_R32G32B32A32_Sint = 4,
|
|
GPU_Format_R32G32B32_Typeless = 5,
|
|
GPU_Format_R32G32B32_Float = 6,
|
|
GPU_Format_R32G32B32_Uint = 7,
|
|
GPU_Format_R32G32B32_Sint = 8,
|
|
GPU_Format_R16G16B16A16_Typeless = 9,
|
|
GPU_Format_R16G16B16A16_Float = 10,
|
|
GPU_Format_R16G16B16A16_Unorm = 11,
|
|
GPU_Format_R16G16B16A16_Uint = 12,
|
|
GPU_Format_R16G16B16A16_Snorm = 13,
|
|
GPU_Format_R16G16B16A16_Sint = 14,
|
|
GPU_Format_R32G32_Typeless = 15,
|
|
GPU_Format_R32G32_Float = 16,
|
|
GPU_Format_R32G32_Uint = 17,
|
|
GPU_Format_R32G32_Sint = 18,
|
|
GPU_Format_R32G8X24_Typeless = 19,
|
|
GPU_Format_D32_Float_S8X24_Uint = 20,
|
|
GPU_Format_R32_Float_X8X24_Typeless = 21,
|
|
GPU_Format_X32_Typeless_G8X24_Uint = 22,
|
|
GPU_Format_R10G10B10A2_Typeless = 23,
|
|
GPU_Format_R10G10B10A2_Unorm = 24,
|
|
GPU_Format_R10G10B10A2_Uint = 25,
|
|
GPU_Format_R11G11B10_Float = 26,
|
|
GPU_Format_R8G8B8A8_Typeless = 27,
|
|
GPU_Format_R8G8B8A8_Unorm = 28,
|
|
GPU_Format_R8G8B8A8_Unorm_Srgb = 29,
|
|
GPU_Format_R8G8B8A8_Uint = 30,
|
|
GPU_Format_R8G8B8A8_Snorm = 31,
|
|
GPU_Format_R8G8B8A8_Sint = 32,
|
|
GPU_Format_R16G16_Typeless = 33,
|
|
GPU_Format_R16G16_Float = 34,
|
|
GPU_Format_R16G16_Unorm = 35,
|
|
GPU_Format_R16G16_Uint = 36,
|
|
GPU_Format_R16G16_Snorm = 37,
|
|
GPU_Format_R16G16_Sint = 38,
|
|
GPU_Format_R32_Typeless = 39,
|
|
GPU_Format_D32_Float = 40,
|
|
GPU_Format_R32_Float = 41,
|
|
GPU_Format_R32_Uint = 42,
|
|
GPU_Format_R32_Sint = 43,
|
|
GPU_Format_R24G8_Typeless = 44,
|
|
GPU_Format_D24_Unorm_S8_Uint = 45,
|
|
GPU_Format_R24_Unorm_X8_Typeless = 46,
|
|
GPU_Format_X24_Typeless_G8_Uint = 47,
|
|
GPU_Format_R8G8_Typeless = 48,
|
|
GPU_Format_R8G8_Unorm = 49,
|
|
GPU_Format_R8G8_Uint = 50,
|
|
GPU_Format_R8G8_Snorm = 51,
|
|
GPU_Format_R8G8_Sint = 52,
|
|
GPU_Format_R16_Typeless = 53,
|
|
GPU_Format_R16_Float = 54,
|
|
GPU_Format_D16_Unorm = 55,
|
|
GPU_Format_R16_Unorm = 56,
|
|
GPU_Format_R16_Uint = 57,
|
|
GPU_Format_R16_Snorm = 58,
|
|
GPU_Format_R16_Sint = 59,
|
|
GPU_Format_R8_Typeless = 60,
|
|
GPU_Format_R8_Unorm = 61,
|
|
GPU_Format_R8_Uint = 62,
|
|
GPU_Format_R8_Snorm = 63,
|
|
GPU_Format_R8_Sint = 64,
|
|
GPU_Format_A8_Unorm = 65,
|
|
GPU_Format_R1_Unorm = 66,
|
|
GPU_Format_R9G9B9E5_SharedXP = 67,
|
|
GPU_Format_R8G8_B8G8_Unorm = 68,
|
|
GPU_Format_G8R8_G8B8_Unorm = 69,
|
|
GPU_Format_BC1_Typeless = 70,
|
|
GPU_Format_BC1_Unorm = 71,
|
|
GPU_Format_BC1_Unorm_Srgb = 72,
|
|
GPU_Format_BC2_Typeless = 73,
|
|
GPU_Format_BC2_Unorm = 74,
|
|
GPU_Format_BC2_Unorm_Srgb = 75,
|
|
GPU_Format_BC3_Typeless = 76,
|
|
GPU_Format_BC3_Unorm = 77,
|
|
GPU_Format_BC3_Unorm_Srgb = 78,
|
|
GPU_Format_BC4_Typeless = 79,
|
|
GPU_Format_BC4_Unorm = 80,
|
|
GPU_Format_BC4_Snorm = 81,
|
|
GPU_Format_BC5_Typeless = 82,
|
|
GPU_Format_BC5_Unorm = 83,
|
|
GPU_Format_BC5_Snorm = 84,
|
|
GPU_Format_B5G6R5_Unorm = 85,
|
|
GPU_Format_B5G5R5A1_Unorm = 86,
|
|
GPU_Format_B8G8R8A8_Unorm = 87,
|
|
GPU_Format_B8G8R8X8_Unorm = 88,
|
|
GPU_Format_R10G10B10_XR_BIAS_A2_Unorm = 89,
|
|
GPU_Format_B8G8R8A8_Typeless = 90,
|
|
GPU_Format_B8G8R8A8_Unorm_Srgb = 91,
|
|
GPU_Format_B8G8R8X8_Typeless = 92,
|
|
GPU_Format_B8G8R8X8_Unorm_Srgb = 93,
|
|
GPU_Format_BC6H_Typeless = 94,
|
|
GPU_Format_BC6H_UF16 = 95,
|
|
GPU_Format_BC6H_SF16 = 96,
|
|
GPU_Format_BC7_Typeless = 97,
|
|
GPU_Format_BC7_Unorm = 98,
|
|
GPU_Format_BC7_Unorm_Srgb = 99,
|
|
GPU_Format_AYUV = 100,
|
|
GPU_Format_Y410 = 101,
|
|
GPU_Format_Y416 = 102,
|
|
GPU_Format_NV12 = 103,
|
|
GPU_Format_P010 = 104,
|
|
GPU_Format_P016 = 105,
|
|
GPU_Format_420_Opaque = 106,
|
|
GPU_Format_YUY2 = 107,
|
|
GPU_Format_Y210 = 108,
|
|
GPU_Format_Y216 = 109,
|
|
GPU_Format_NV11 = 110,
|
|
GPU_Format_AI44 = 111,
|
|
GPU_Format_IA44 = 112,
|
|
GPU_Format_P8 = 113,
|
|
GPU_Format_A8P8 = 114,
|
|
GPU_Format_B4G4R4A4_Unorm = 115,
|
|
GPU_Format_P208 = 130,
|
|
GPU_Format_V208 = 131,
|
|
GPU_Format_V408 = 132,
|
|
GPU_Format_SamplerFeedbackMinMipOpaque = 189,
|
|
GPU_Format_SamplerFeedbackMipRegionUsedOpaque = 190,
|
|
GPU_Format_A4B4G4R4_Unorm = 191,
|
|
GPU_Format_Count = 192
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Barrier types
|
|
|
|
Enum(GPU_Stage)
|
|
{
|
|
GPU_Stage_None = 0,
|
|
|
|
/* Compute stages */
|
|
GPU_Stage_ComputeShading = (1 << 1),
|
|
|
|
/* Draw stages */
|
|
GPU_Stage_IndexAssembly = (1 << 2),
|
|
GPU_Stage_VertexShading = (1 << 3),
|
|
GPU_Stage_PixelShading = (1 << 4),
|
|
GPU_Stage_DepthStencil = (1 << 5),
|
|
GPU_Stage_RenderTarget = (1 << 6),
|
|
|
|
/* Copy stages */
|
|
GPU_Stage_Copy = (1 << 7),
|
|
|
|
/* Indirect stages */
|
|
GPU_Stage_Indirect = (1 << 8),
|
|
|
|
/* Aggregate stages */
|
|
GPU_Stage_AllDraw = GPU_Stage_IndexAssembly |
|
|
GPU_Stage_VertexShading |
|
|
GPU_Stage_PixelShading |
|
|
GPU_Stage_DepthStencil |
|
|
GPU_Stage_RenderTarget,
|
|
|
|
GPU_Stage_AllShading = GPU_Stage_ComputeShading |
|
|
GPU_Stage_VertexShading |
|
|
GPU_Stage_PixelShading,
|
|
|
|
GPU_Stage_All = 0xFFFFFFFF
|
|
};
|
|
|
|
Enum(GPU_Access)
|
|
{
|
|
GPU_Access_None = 0,
|
|
|
|
GPU_Access_ShaderReadWrite = (1 << 1),
|
|
GPU_Access_ShaderRead = (1 << 2),
|
|
|
|
GPU_Access_CopyWrite = (1 << 3),
|
|
GPU_Access_CopyRead = (1 << 4),
|
|
|
|
GPU_Access_DepthStencilRead = (1 << 5),
|
|
GPU_Access_DepthStencilWrite = (1 << 6),
|
|
GPU_Access_RenderTargetWrite = (1 << 7),
|
|
|
|
GPU_Access_IndexBuffer = (1 << 8),
|
|
GPU_Access_IndirectArgument = (1 << 9),
|
|
|
|
GPU_Access_All = 0xFFFFFFFF
|
|
};
|
|
|
|
Enum(GPU_Layout)
|
|
{
|
|
GPU_Layout_NoChange,
|
|
|
|
/* Allows a resource to be used on any queue with any access type, as long
|
|
* as there is only one writer at a time, and the writer is not writing to
|
|
* any texels currently being read.
|
|
*
|
|
* Resources cannot transition to/from this layout. They must be created
|
|
* with it and are locked to it.
|
|
*/
|
|
GPU_Layout_Simultaneous, /* D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS */
|
|
|
|
GPU_Layout_Undefined, /* D3D12_BARRIER_LAYOUT_UNDEFINED */
|
|
|
|
//////////////////////////////
|
|
//- Queue-agnostic
|
|
|
|
GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, /* D3D12_BARRIER_LAYOUT_COMMON */
|
|
|
|
//////////////////////////////
|
|
//- Direct & Compute queue
|
|
|
|
GPU_Layout_DirectComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_GENERIC_READ */
|
|
|
|
GPU_Layout_DirectComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS */
|
|
GPU_Layout_DirectComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_SHADER_RESOURCE */
|
|
GPU_Layout_DirectComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COPY_SOURCE */
|
|
|
|
//////////////////////////////
|
|
//- Direct queue
|
|
|
|
GPU_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON */
|
|
GPU_Layout_DirectQueue_ShaderRead_CopyRead_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ */
|
|
|
|
GPU_Layout_DirectQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS */
|
|
GPU_Layout_DirectQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE */
|
|
GPU_Layout_DirectQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE */
|
|
|
|
GPU_Layout_DirectQueue_DepthStencilRead_DepthStencilWrite, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE */
|
|
GPU_Layout_DirectQueue_DepthStencilRead, /* D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ */
|
|
GPU_Layout_DirectQueue_RenderTargetWrite, /* D3D12_BARRIER_LAYOUT_RENDER_TARGET */
|
|
|
|
//////////////////////////////
|
|
//- Compute queue
|
|
|
|
GPU_Layout_ComputeQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON */
|
|
GPU_Layout_ComputeQueue_ShaderRead_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ */
|
|
|
|
GPU_Layout_ComputeQueue_ShaderReadWrite, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS */
|
|
GPU_Layout_ComputeQueue_ShaderRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE */
|
|
GPU_Layout_ComputeQueue_CopyRead, /* D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE */
|
|
};
|
|
|
|
/* Barrier will execute after previous stages specified by `sync_prev`, and before next stages specified by `sync_next`.
|
|
* When barrier executes:
|
|
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
|
* - Texture layout will transition based on `layout` (if specified)
|
|
*/
|
|
Struct(GPU_BarrierDesc)
|
|
{
|
|
GPU_ResourceHandle resource;
|
|
b32 is_global;
|
|
GPU_Stage sync_prev;
|
|
GPU_Stage sync_next;
|
|
GPU_Access access_prev;
|
|
GPU_Access access_next;
|
|
GPU_Layout layout;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Resource types
|
|
|
|
Enum(GPU_ResourceFlag)
|
|
{
|
|
GPU_ResourceFlag_None = 0,
|
|
GPU_ResourceFlag_AllowShaderReadWrite = (1 << 0),
|
|
GPU_ResourceFlag_AllowRenderTarget = (1 << 1),
|
|
GPU_ResourceFlag_AllowDepthStencil = (1 << 2),
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Buffer types
|
|
|
|
Struct(GPU_BufferDesc)
|
|
{
|
|
u64 size;
|
|
GPU_ResourceFlag flags;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Texture types
|
|
|
|
#define GPU_MaxRenderTargets 8
|
|
|
|
Enum(GPU_TextureKind)
|
|
{
|
|
GPU_TextureKind_1D,
|
|
GPU_TextureKind_2D,
|
|
GPU_TextureKind_3D,
|
|
};
|
|
|
|
Struct(GPU_TextureDesc)
|
|
{
|
|
GPU_TextureKind kind;
|
|
GPU_ResourceFlag flags;
|
|
GPU_Format format;
|
|
Vec3I32 dims;
|
|
GPU_Layout initial_layout;
|
|
i32 mip_levels; /* Will be clamped to range [1, inf) */
|
|
Vec4 clear_color;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Sampler types
|
|
|
|
/* NOTE: Matches DirectX D3D12_FILTER */
|
|
Enum(GPU_Filter)
|
|
{
|
|
/* Standard filter */
|
|
GPU_Filter_MinMagMipPoint = 0,
|
|
GPU_Filter_MinMagPointMipLinear = 0x1,
|
|
GPU_Filter_MinPointMagLinearMipPoint = 0x4,
|
|
GPU_Filter_MinPointMagMipLinear = 0x5,
|
|
GPU_Filter_MinLinearMagMipPoint = 0x10,
|
|
GPU_Filter_MinLinearMagPointMipLinear = 0x11,
|
|
GPU_Filter_MinMagLinearMipPoint = 0x14,
|
|
GPU_Filter_MinMagMipLinear = 0x15,
|
|
GPU_Filter_MinMagAnisotropicMipPoint = 0x54,
|
|
GPU_Filter_Anisotropic = 0x55,
|
|
|
|
/* Comparison filter */
|
|
GPU_Filter_Comparison_MinMagMipPoint = 0x80,
|
|
GPU_Filter_Comparison_MinMagPointMipLinear = 0x81,
|
|
GPU_Filter_Comparison_MinPointMagLinearMipPoint = 0x84,
|
|
GPU_Filter_Comparison_MinPointMagMipLinear = 0x85,
|
|
GPU_Filter_Comparison_MinLinearMagMipPoint = 0x90,
|
|
GPU_Filter_Comparison_MinLinearMagPointMipLinear = 0x91,
|
|
GPU_Filter_Comparison_MinMagLinearMipPoint = 0x94,
|
|
GPU_Filter_Comparison_MinMagMipLinear = 0x95,
|
|
GPU_Filter_Comparison_MinMagAnisotropicMipPoint = 0xd4,
|
|
GPU_Filter_Comparison_Anisotropic = 0xd5,
|
|
|
|
/* Minimum filter */
|
|
GPU_Filter_Minimum_MinMagMipPoint = 0x100,
|
|
GPU_Filter_Minimum_MinMagPointMipLinear = 0x101,
|
|
GPU_Filter_Minimum_MinPointMagLinearMipPoint = 0x104,
|
|
GPU_Filter_Minimum_MinPointMagMipLinear = 0x105,
|
|
GPU_Filter_Minimum_MinLinearMagMipPoint = 0x110,
|
|
GPU_Filter_Minimum_MinLinearMagPointMipLinear = 0x111,
|
|
GPU_Filter_Minimum_MinMagLinearMipPoint = 0x114,
|
|
GPU_Filter_Minimum_MinMagMipLinear = 0x115,
|
|
GPU_Filter_Minimum_MinMagAnisotropicMipPoint = 0x155,
|
|
GPU_Filter_Minimum_Anisotropic = 0x155,
|
|
|
|
/* Maximum filter */
|
|
GPU_Filter_Maximum_MinMagMipPoint = 0x180,
|
|
GPU_Filter_Maximum_MinMagPointMipLinear = 0x181,
|
|
GPU_Filter_Maximum_MinPointMagLinearMipPoint = 0x184,
|
|
GPU_Filter_Maximum_MinPointMagMipLinear = 0x185,
|
|
GPU_Filter_Maximum_MinLinearMagMipPoint = 0x190,
|
|
GPU_Filter_Maximum_MinLinearMagPointMipLinear = 0x191,
|
|
GPU_Filter_Maximum_MinMagLinearMipPoint = 0x194,
|
|
GPU_Filter_Maximum_MinMagMipLinear = 0x195,
|
|
GPU_Filter_Maximum_MinMagAnisotropicMipPoint = 0x1d4,
|
|
GPU_Filter_Maximum_Anisotropic = 0x1d5
|
|
};
|
|
|
|
/* NOTE: Matches DirectX D3D12_TEXTURE_ADDRESS_MODE */
|
|
Enum(GPU_AddressMode)
|
|
{
|
|
GPU_AddressMode_Wrap = 1,
|
|
GPU_AddressMode_Mirror = 2,
|
|
GPU_AddressMode_Clamp = 3, /* Default */
|
|
GPU_AddressMode_Border = 4,
|
|
GPU_AddressMode_MirrorOnce = 5
|
|
};
|
|
|
|
/* NOTE: Matches DirectX D3D12_COMPARISON_FUNC */
|
|
Enum(GPU_ComparisonFunc)
|
|
{
|
|
GPU_ComparisonFunc_None = 0,
|
|
GPU_ComparisonFunc_Never = 1,
|
|
GPU_ComparisonFunc_Less = 2,
|
|
GPU_ComparisonFunc_Equal = 3,
|
|
GPU_ComparisonFunc_LessEqual = 4,
|
|
GPU_ComparisonFunc_Greater = 5,
|
|
GPU_ComparisonFunc_NotEqual = 6,
|
|
GPU_ComparisonFunc_GreaterEqual = 7,
|
|
GPU_ComparisonFunc_Always = 8
|
|
};
|
|
|
|
Struct(GPU_SamplerDesc)
|
|
{
|
|
GPU_Filter filter;
|
|
GPU_AddressMode x;
|
|
GPU_AddressMode y;
|
|
GPU_AddressMode z;
|
|
f32 mip_lod_bias;
|
|
u32 max_anisotropy;
|
|
GPU_ComparisonFunc comparison;
|
|
Vec4 border_color;
|
|
f32 min_lod;
|
|
f32 max_lod;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Rasterization types
|
|
|
|
Enum(GPU_RasterMode)
|
|
{
|
|
GPU_RasterMode_None,
|
|
GPU_RasterMode_PointList,
|
|
GPU_RasterMode_LineList,
|
|
GPU_RasterMode_LineStrip,
|
|
GPU_RasterMode_TriangleList,
|
|
GPU_RasterMode_TriangleStrip,
|
|
GPU_RasterMode_WireTriangleList,
|
|
GPU_RasterMode_WireTriangleStrip,
|
|
};
|
|
|
|
Struct(GPU_IndexBufferDesc)
|
|
{
|
|
GPU_ResourceHandle resource;
|
|
u32 index_size; /* Either 2 for u16 indices, or 4 for u32 indices */
|
|
u32 index_count;
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Synchronization types
|
|
|
|
Enum(GPU_FenceOpKind)
|
|
{
|
|
GPU_FenceOpKind_Set,
|
|
GPU_FenceOpKind_Add,
|
|
};
|
|
|
|
Struct(GPU_FenceOp)
|
|
{
|
|
GPU_FenceOpKind kind;
|
|
Fence *fence;
|
|
i64 v;
|
|
};
|
|
|
|
#define GPU_SetFence(_fence, _v) ((GPU_FenceOp) { .kind = GPU_FenceOpKind_Set, .fence = (_fence), .v = (_v) })
|
|
#define GPU_AddFence(_fence, _v) ((GPU_FenceOp) { .kind = GPU_FenceOpKind_Add, .fence = (_fence), .v = (_v) })
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Statistic types
|
|
|
|
Struct(GPU_Stats)
|
|
{
|
|
/* Memory usage */
|
|
u64 local_committed;
|
|
u64 local_budget;
|
|
u64 non_local_committed;
|
|
u64 non_local_budget;
|
|
|
|
/* Resources */
|
|
u64 driver_resources_allocated;
|
|
u64 driver_descriptors_allocated;
|
|
|
|
/* TODO: Arena stats (committed, reserved, etc) */
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Startup
|
|
|
|
void GPU_Startup(void);
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Arena
|
|
|
|
GPU_ArenaHandle GPU_AcquireArena(void);
|
|
void GPU_ReleaseArena(GPU_ArenaHandle arena);
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Resource
|
|
|
|
//- Resource creation
|
|
|
|
GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc);
|
|
GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc);
|
|
GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc);
|
|
|
|
#define GPU_PushBuffer(arena, type, count, ...) GPU_PushBufferEx((arena), \
|
|
(GPU_BufferDesc) { \
|
|
.size = sizeof(type) * (count), \
|
|
__VA_ARGS__ \
|
|
} \
|
|
)
|
|
|
|
#define GPU_PushTexture1D(arena, _format, _size, _initial_layout, ...) GPU_PushTextureEx((arena), \
|
|
(GPU_TextureDesc) { \
|
|
.kind = GPU_TextureKind_1D, \
|
|
.format = (_format), \
|
|
.dims = VEC3I32((_size), 1, 1), \
|
|
.initial_layout = (_initial_layout), \
|
|
__VA_ARGS__ \
|
|
} \
|
|
)
|
|
|
|
#define GPU_PushTexture2D(arena, _format, _size, _initial_layout, ...) GPU_PushTextureEx((arena), \
|
|
(GPU_TextureDesc) { \
|
|
.kind = GPU_TextureKind_2D, \
|
|
.format = (_format), \
|
|
.dims = VEC3I32((_size).x, (_size).y, 1), \
|
|
.initial_layout = (_initial_layout), \
|
|
__VA_ARGS__ \
|
|
} \
|
|
)
|
|
|
|
#define GPU_PushTexture3D(arena, _format, _size, _initial_layout, ...) GPU_PushTextureEx((arena), \
|
|
(GPU_TextureDesc) { \
|
|
.kind = GPU_TextureKind_3D, \
|
|
.format = (_format), \
|
|
.dims = (_size), \
|
|
.initial_layout = (_initial_layout), \
|
|
__VA_ARGS__ \
|
|
} \
|
|
)
|
|
|
|
//- Shader handle creation
|
|
|
|
StructuredBufferHandle GPU_PushStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 element_size, u64 element_offset);
|
|
RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 element_size, u64 element_offset);
|
|
ByteAddressBufferHandle GPU_PushByteAddressBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 u32_offset);
|
|
RWByteAddressBufferHandle GPU_PushRWByteAddressBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 u32_offset);
|
|
Texture1DHandle GPU_PushTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
RWTexture1DHandle GPU_PushRWTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
Texture2DHandle GPU_PushTexture2DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
RWTexture2DHandle GPU_PushRWTexture2DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
Texture3DHandle GPU_PushTexture3DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
RWTexture3DHandle GPU_PushRWTexture3DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
SamplerStateHandle GPU_PushSamplerStateHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource);
|
|
|
|
#define GPU_PushStructuredBufferHandle(arena, resource, type) GPU_PushStructuredBufferHandleEx((arena), (resource), sizeof(type), 0)
|
|
#define GPU_PushRWStructuredBufferHandle(arena, resource, type) GPU_PushRWStructuredBufferHandleEx((arena), (resource), sizeof(type), 0)
|
|
|
|
#define GPU_PushByteAddressBufferHandle(arena, resource) GPU_PushByteAddressBufferHandleEx((arena), (resource), 0)
|
|
#define GPU_PushRWByteAddressBufferHandle(arena, resource) GPU_PushRWByteAddressBufferHandleEx((arena), (resource), 0)
|
|
|
|
//- Index buffer creation
|
|
|
|
#define GPU_IdxBuff16(_res) ((GPU_IndexBufferDesc) { .resource = (_res), .index_size = 2, .index_count = (GPU_CountBuffer((_res), i16)) })
|
|
#define GPU_IdxBuff32(_res) ((GPU_IndexBufferDesc) { .resource = (_res), .index_size = 4, .index_count = (GPU_CountBuffer((_res), i32)) })
|
|
|
|
//- Count
|
|
|
|
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer);
|
|
i32 GPU_Count1D(GPU_ResourceHandle texture);
|
|
Vec2I32 GPU_Count2D(GPU_ResourceHandle texture);
|
|
Vec3I32 GPU_Count3D(GPU_ResourceHandle texture);
|
|
i32 GPU_CountWidth(GPU_ResourceHandle texture);
|
|
i32 GPU_CountHeight(GPU_ResourceHandle texture);
|
|
i32 GPU_CountDepth(GPU_ResourceHandle texture);
|
|
|
|
#define GPU_CountBuffer(buffer, type) GPU_CountBufferBytes(buffer) / sizeof(type)
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Command
|
|
|
|
//- Command list
|
|
|
|
GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue);
|
|
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, u64 fence_ops_count, GPU_FenceOp *fence_ops);
|
|
|
|
#define GPU_CommitCommandList(cl) GPU_CommitCommandListEx((cl), 0, 0)
|
|
|
|
//- Arena
|
|
|
|
void GPU_ResetArena(GPU_CommandListHandle cl, GPU_ArenaHandle arena);
|
|
|
|
//- Cpu -> Gpu copy
|
|
|
|
void GPU_CopyCpuToBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
|
|
void GPU_CopyCpuToTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range);
|
|
|
|
//- Gpu <-> Gpu copy
|
|
|
|
void GPU_CopyBufferToBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range);
|
|
void GPU_CopyBufferToTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Vec3I32 src_dims, Rng3I32 src_copy_range);
|
|
void GPU_CopyTextureToTexture(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range);
|
|
void GPU_CopyTextureToBuffer(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range);
|
|
|
|
//- Constant
|
|
|
|
void GPU_SetConstant_(GPU_CommandListHandle cl, i32 slot, void *src_32bit, u32 size);
|
|
|
|
#define GPU_SetConstant(cl, name, value) do { \
|
|
name##__shaderconstanttype __src; \
|
|
__src.v = value; \
|
|
GPU_SetConstant_((cl), (name), &__src, sizeof(__src)); \
|
|
} while (0)
|
|
|
|
//- Barrier
|
|
|
|
void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
|
|
|
|
#define GPU_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
|
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
|
.resource = (_resource), \
|
|
.sync_prev = _sync_prev, \
|
|
.access_prev = _access_prev, \
|
|
.sync_next = _sync_next, \
|
|
.access_next = _access_next, \
|
|
})
|
|
|
|
#define GPU_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
|
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
|
.resource = (_resource), \
|
|
.sync_prev = _sync_prev, \
|
|
.access_prev = _access_prev, \
|
|
.sync_next = _sync_next, \
|
|
.access_next = _access_next, \
|
|
.layout = _layout, \
|
|
})
|
|
|
|
#define GPU_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
|
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
|
.is_global = 1, \
|
|
.sync_prev = _sync_prev, \
|
|
.access_prev = _access_prev, \
|
|
.sync_next = _sync_next, \
|
|
.access_next = _access_next, \
|
|
})
|
|
|
|
#define GPU_DumbMemorySync(cl, resource) \
|
|
GPU_MemorySync((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
|
|
|
#define GPU_DumbMemoryLayoutSync(cl, resource, layout) \
|
|
GPU_MemoryLayoutSync((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (layout))
|
|
|
|
#define GPU_DumbGlobalMemorySync(cl) \
|
|
GPU_GlobalMemorySync((cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
|
|
|
//- Compute
|
|
|
|
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
|
|
|
|
//- Rasterize
|
|
|
|
void GPU_Rasterize(GPU_CommandListHandle cl,
|
|
VertexShader vs, PixelShader ps,
|
|
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
|
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
|
Rng3 viewport, Rng2 scissor,
|
|
GPU_RasterMode mode);
|
|
|
|
//- Clear
|
|
|
|
void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_target, Vec4 color);
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Synchronization
|
|
|
|
/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
|
|
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue);
|
|
|
|
/* All queues will block until `completion_queue` completes all submitted commands */
|
|
void GPU_SyncAllQueues(GPU_QueueKind completion_queue);
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Statistics
|
|
|
|
GPU_Stats GPU_QueryStats(void);
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookdecl Swapchain
|
|
|
|
GPU_SwapchainHandle GPU_AcquireSwapchain(u64 os_window_handle);
|
|
void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain);
|
|
|
|
/* Waits until a new backbuffer is ready from the swapchain.
|
|
* This should be called before rendering for minimum latency. */
|
|
GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, GPU_Format format, Vec2I32 size);
|
|
|
|
void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer, i32 vsync);
|