vk testing

This commit is contained in:
jacob 2026-03-03 23:49:29 -06:00
parent 38196a8eb7
commit a1cf0a46b5
15 changed files with 6325 additions and 1 deletions

3
src/gpu/gpu.lay generated
View File

@ -27,4 +27,5 @@
@IncludeC gpu_common.c
@DefaultDownstream Win32 gpu_dx12
// @DefaultDownstream Win32 gpu_dx12
@DefaultDownstream Any gpu_vk

11
src/gpu/gpu_vk/gpu_vk.lay generated Normal file
View File

@ -0,0 +1,11 @@
@Layer gpu_vk
//////////////////////////////
//- Api
@IncludeC gpu_vk_core.h
//////////////////////////////
//- Impl
@IncludeC gpu_vk_core.c

View File

@ -0,0 +1,270 @@
G_VK_Ctx G_VK = Zi;
ThreadLocal G_VK_ThreadLocalCtx G_VK_tl = Zi;
////////////////////////////////////////////////////////////
//~ @hookimpl Bootstrap
void G_Bootstrap(void)
{
}
////////////////////////////////////////////////////////////
//~ @hookimpl Arena
G_ArenaHandle G_AcquireArena(void)
{
// TODO: Impl
return (G_ArenaHandle) Zi;
}
void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena)
{
// TODO: Impl
}
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
{
// TODO: Impl
}
////////////////////////////////////////////////////////////
//~ @hookimpl Resource
G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc)
{
// TODO: Impl
return (G_ResourceHandle) Zi;
}
////////////////////////////////////////////////////////////
//~ @hookimpl Shader resource reference
u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_RefDesc ref_desc)
{
// TODO: Impl
return 0;
}
//- Count
u64 G_CountBufferBytes(G_ResourceHandle buffer)
{
// TODO: Impl
return 0;
}
i32 G_Count1D(G_ResourceHandle texture)
{
// TODO: Impl
return 0;
}
Vec2I32 G_Count2D(G_ResourceHandle texture)
{
// TODO: Impl
return (Vec2I32) Zi;
}
Vec3I32 G_Count3D(G_ResourceHandle texture)
{
// TODO: Impl
return (Vec3I32) Zi;
}
i32 G_CountWidth(G_ResourceHandle texture)
{
// TODO: Impl
return 0;
}
i32 G_CountHeight(G_ResourceHandle texture)
{
// TODO: Impl
return 0;
}
i32 G_CountDepth(G_ResourceHandle texture)
{
// TODO: Impl
return 0;
}
i32 G_CountMips(G_ResourceHandle texture)
{
// TODO: Impl
return 0;
}
//- Map
void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
{
// TODO: Impl
return 0;
}
////////////////////////////////////////////////////////////
//~ @hookimpl Command
//- Command list
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue)
{
// TODO: Impl
return (G_CommandListHandle) Zi;
}
i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
// TODO: Impl
return (i64) Zi;
}
//- Cpu -> Gpu staged copy
void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
{
// TODO: Impl
}
void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
{
// TODO: Impl
}
//- Gpu <-> Gpu copy
void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range)
{
// TODO: Impl
}
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset)
{
// TODO: Impl
}
void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
{
// TODO: Impl
}
void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
{
// TODO: Impl
}
//- Constant
void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size)
{
// TODO: Impl
}
//- Memory sync
void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc)
{
// TODO: Impl
}
//- Compute
void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads)
{
// TODO: Impl
}
//- Rasterize
void G_Rasterize(
G_CommandListHandle cl_handle,
VertexShaderDesc vs, PixelShaderDesc ps,
u32 instances_count, G_IndexBufferDesc index_buffer,
u32 render_targets_count, G_RenderTargetDesc *render_targets,
Rng3 viewport, Rng2 scissor,
G_RasterMode raster_mode
)
{
// TODO: Impl
}
//- Clear
void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, Vec4 color, i32 mip)
{
// TODO: Impl
}
//- Log
void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle)
{
// TODO: Impl
}
////////////////////////////////////////////////////////////
//~ @hookimpl Queue synchronization
i64 G_CompletionValueFromQueue(G_QueueKind queue_kind)
{
// TODO: Impl
return (i64) Zi;
}
i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind)
{
// TODO: Impl
return (i64) Zi;
}
G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask)
{
// TODO: Impl
return (G_QueueCompletions) Zi;
}
G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask)
{
// TODO: Impl
return (G_QueueCompletions) Zi;
}
void G_QueueSyncEx(G_QueueBarrierDesc desc)
{
// TODO: Impl
}
////////////////////////////////////////////////////////////
//~ @hookimpl Statistics
G_Stats G_QueryStats(void)
{
// TODO: Impl
return (G_Stats) Zi;
}
////////////////////////////////////////////////////////////
//~ @hookimpl Swapchain
G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle)
{
// TODO: Impl
return (G_SwapchainHandle) Zi;
}
void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle)
{
// TODO: Impl
}
G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size)
{
// TODO: Impl
return (G_ResourceHandle) Zi;
}
void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
{
// TODO: Impl
}

View File

@ -0,0 +1,15 @@
////////////////////////////////////////////////////////////
//~ State types
Struct(G_VK_Ctx)
{
i32 _;
};
Struct(G_VK_ThreadLocalCtx)
{
i32 _;
};
extern G_VK_Ctx G_Vk;
extern ThreadLocal G_VK_ThreadLocalCtx G_VK_tl;

30
src/gpu_old/gpu.lay generated Normal file
View File

@ -0,0 +1,30 @@
@Layer gpu_old
//////////////////////////////
//- Dependencies
@Dep platform
//////////////////////////////
//- Resources
@EmbedDir G_Resources gpu_res
//////////////////////////////
//- Api
@IncludeC gpu_shared.cgh
@IncludeC gpu_core.h
@IncludeC gpu_common.h
@IncludeG gpu_shared.cgh
@Bootstrap G_Bootstrap
@Bootstrap G_BootstrapCommon
//////////////////////////////
//- Impl
@IncludeC gpu_common.c
@DefaultDownstream Win32 gpu_dx12

264
src/gpu_old/gpu_common.c Normal file
View File

@ -0,0 +1,264 @@
G_Ctx G = Zi;
ThreadLocal G_ThreadLocalCtx G_tl = Zi;
////////////////////////////////////////////////////////////
//~ Bootstrap
void G_BootstrapCommon(void)
{
G_ArenaHandle gpu_perm = G_PermArena();
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{
// Init quad index buffer
{
G_ResourceHandle quad_indices = Zi;
u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
quad_indices = G_PushBuffer(gpu_perm, cl, u16, countof(quad_data));
G_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
G.quad_indices = G_IdxBuff16(quad_indices);
}
// Init blank texture
{
G_ResourceHandle blank_tex = G_PushTexture2D(
gpu_perm, cl,
G_Format_R8G8B8A8_Unorm,
VEC2I32(8, 8),
G_Layout_Common,
.flags = G_ResourceFlag_ZeroMemory,
.name = Lit("Blank texture")
);
G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
}
// Init noise texture
{
G_ResourceHandle noise_tex = Zi;
String noise_data = DataFromResource(ResourceKeyFromStore(&G_Resources, Lit("noise_128x128x64_16.dat")));
Vec3I32 noise_dims = VEC3I32(128, 128, 64);
if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2)
{
Panic(Lit("Unexpected noise texture size"));
}
noise_tex = G_PushTexture3D(
gpu_perm, cl,
G_Format_R16_Uint,
noise_dims,
G_Layout_Common,
.name = Lit("Noise texture")
);
G_CopyCpuToTexture(
cl,
noise_tex, VEC3I32(0, 0, 0),
noise_data.text, noise_dims,
RNG3I32(VEC3I32(0, 0, 0), noise_dims)
);
G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex);
}
// Init basic samplers
for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind)
{
G_SamplerStateRef sampler = Zi;
switch (sampler_kind)
{
default:
{
// Sampler unspecified
Assert(0);
} FALLTHROUGH;
case G_BasicSamplerKind_PointClamp:
{
G_Filter filter = G_Filter_MinMagMipPoint;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_PointWrap:
{
G_Filter filter = G_Filter_MinMagMipPoint;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_PointMirror:
{
G_Filter filter = G_Filter_MinMagMipPoint;
G_AddressMode address_mode = G_AddressMode_Mirror;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_BilinearClamp:
{
G_Filter filter = G_Filter_MinMagLinearMipPoint;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_BilinearWrap:
{
G_Filter filter = G_Filter_MinMagLinearMipPoint;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_BilinearMirror:
{
G_Filter filter = G_Filter_MinMagLinearMipPoint;
G_AddressMode address_mode = G_AddressMode_Mirror;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_TrilinearClamp:
{
G_Filter filter = G_Filter_MinMagMipLinear;
G_AddressMode address_mode = G_AddressMode_Clamp;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_TrilinearWrap:
{
G_Filter filter = G_Filter_MinMagMipLinear;
G_AddressMode address_mode = G_AddressMode_Wrap;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
case G_BasicSamplerKind_TrilinearMirror:
{
G_Filter filter = G_Filter_MinMagMipLinear;
G_AddressMode address_mode = G_AddressMode_Mirror;
G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
} break;
}
G.basic_samplers[sampler_kind] = sampler;
}
}
G_CommitCommandList(cl);
G_QueueSync(G_QueueMask_Direct, G_QueueMask_All);
}
////////////////////////////////////////////////////////////
//~ Utils
//- Arena
G_ArenaHandle G_PermArena(void)
{
if (G_IsArenaNil(G_tl.gpu_perm))
{
G_tl.gpu_perm = G_AcquireArena();
}
return G_tl.gpu_perm;
}
//- Push resource from cpu
G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc)
{
G_ResourceHandle buffer = G_PushResource(gpu_arena, cl, (G_ResourceDesc) { .kind = G_ResourceKind_Buffer, .buffer = desc });
G_CopyCpuToBuffer(cl, buffer, 0, src.text, RNGU64(0, src.len));
return buffer;
}
//- Mip
i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
{
mip = ClampI32(mip, -31, 31);
i32 result = 0;
if (mip >= 0)
{
result = MaxI32(result >> mip, 1);
}
else
{
result = MaxI32(result << -mip, 1);
}
return result;
}
Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
{
mip = ClampI32(mip, -31, 31);
Vec2I32 result = Zi;
if (mip >= 0)
{
result.x = MaxI32(mip0_dims.x >> mip, 1);
result.y = MaxI32(mip0_dims.y >> mip, 1);
}
else
{
result.x = MaxI32(mip0_dims.x << -mip, 1);
result.y = MaxI32(mip0_dims.y << -mip, 1);
}
return result;
}
Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
{
mip = ClampI32(mip, -31, 31);
Vec3I32 result = Zi;
if (mip >= 0)
{
result.x = MaxI32(mip0_dims.x >> mip, 1);
result.y = MaxI32(mip0_dims.y >> mip, 1);
result.z = MaxI32(mip0_dims.z >> mip, 1);
}
else
{
result.x = MaxI32(mip0_dims.x << -mip, 1);
result.y = MaxI32(mip0_dims.y << -mip, 1);
result.z = MaxI32(mip0_dims.z << -mip, 1);
}
return result;
}
//- Thread count
Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads)
{
return VEC3I32(
(threads.x + cs.x - 1) / cs.x,
(threads.y + cs.y - 1) / cs.y,
(threads.z + cs.z - 1) / cs.z
);
}
//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture)
{
Vec2I32 dims = G_Count2D(texture);
return RNG3(VEC3(0, 0, 0), VEC3(dims.x, dims.y, 1));
}
Rng2 G_ScissorFromTexture(G_ResourceHandle texture)
{
Vec2I32 dims = G_Count2D(texture);
return RNG2(VEC2(0, 0), VEC2(dims.x, dims.y));
}
//- Shared resources
G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind)
{
return G.basic_samplers[kind];
}
G_IndexBufferDesc G_QuadIndices(void)
{
return G.quad_indices;
}
G_Texture2DRef G_BlankTexture2D(void)
{
return G.blank_tex;
}
G_Texture3DRef G_BasicNoiseTexture(void)
{
return G.basic_noise;
}

53
src/gpu_old/gpu_common.h Normal file
View File

@ -0,0 +1,53 @@
////////////////////////////////////////////////////////////
//~ State types
Struct(G_Ctx)
{
// Common shared resources
G_IndexBufferDesc quad_indices;
G_Texture2DRef blank_tex;
G_Texture3DRef basic_noise;
G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT];
};
Struct(G_ThreadLocalCtx)
{
G_ArenaHandle gpu_perm;
};
extern G_Ctx G;
extern ThreadLocal G_ThreadLocalCtx G_tl;
////////////////////////////////////////////////////////////
//~ Bootstrap
void G_BootstrapCommon(void);
////////////////////////////////////////////////////////////
//~ Utils
//- Arena
G_ArenaHandle G_PermArena(void);
//- Push resource from cpu
G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc);
#define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \
G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
//- Mip
i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);
//- Thread count
Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads);
//- Viewport / scissor
Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
Rng2 G_ScissorFromTexture(G_ResourceHandle texture);
//- Shared resources
G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind);
G_IndexBufferDesc G_QuadIndices(void);
G_Texture2DRef G_BlankTexture2D(void);
G_Texture3DRef G_BasicNoiseTexture(void);

795
src/gpu_old/gpu_core.h Normal file
View File

@ -0,0 +1,795 @@
////////////////////////////////////////////////////////////
//~ Handle types
Struct(G_ArenaHandle) { u64 v; };
Struct(G_CommandListHandle) { u64 v; };
Struct(G_ResourceHandle) { u64 v; };
Struct(G_SwapchainHandle) { u64 v; };
#define G_IsArenaNil(h) ((h).v == 0)
#define G_IsCommandListNil(h) ((h).v == 0)
#define G_IsResourceNil(h) ((h).v == 0)
#define G_IsSwapchainNil(h) ((h).v == 0)
////////////////////////////////////////////////////////////
//~ Queue types
#define G_IsMultiQueueEnabled 1
Enum(G_QueueKind)
{
G_QueueKind_Direct = 0,
#if G_IsMultiQueueEnabled
G_QueueKind_AsyncCompute = 1,
G_QueueKind_AsyncCopy = 2,
#else
G_QueueKind_AsyncCompute = G_QueueKind_Direct,
G_QueueKind_AsyncCopy = G_QueueKind_Direct,
#endif
G_QueueKind_COUNT
};
Enum(G_QueueMask)
{
G_QueueMask_None = 0,
G_QueueMask_Direct = (1 << 0),
#if G_IsMultiQueueEnabled
G_QueueMask_AsyncCompute = (1 << 1),
G_QueueMask_AsyncCopy = (1 << 2),
#else
G_QueueMask_AsyncCompute = G_QueueMask_Direct,
G_QueueMask_AsyncCopy = G_QueueMask_Direct,
#endif
G_QueueMask_All = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT))
};
#define G_QueueMaskFromKind(queue_kind) (1 << queue_kind)
Struct(G_QueueCompletions)
{
i64 v[G_QueueKind_COUNT]; // Array of completions indexed by queue kind
};
// All waiters will wait until specified queues reach their value in the `completions` array
Struct(G_QueueBarrierDesc)
{
G_QueueCompletions completions; // Completions that waiters should wait for
G_QueueMask wait_queues; // Mask of queues that will wait for completions
b32 wait_cpu; // Will the cpu wait for completion
};
////////////////////////////////////////////////////////////
//~ Format types
// NOTE: Matches DirectX DXGI_FORMAT
Enum(G_Format)
{
G_Format_Unknown = 0,
G_Format_R32G32B32A32_Typeless = 1,
G_Format_R32G32B32A32_Float = 2,
G_Format_R32G32B32A32_Uint = 3,
G_Format_R32G32B32A32_Sint = 4,
G_Format_R32G32B32_Typeless = 5,
G_Format_R32G32B32_Float = 6,
G_Format_R32G32B32_Uint = 7,
G_Format_R32G32B32_Sint = 8,
G_Format_R16G16B16A16_Typeless = 9,
G_Format_R16G16B16A16_Float = 10,
G_Format_R16G16B16A16_Unorm = 11,
G_Format_R16G16B16A16_Uint = 12,
G_Format_R16G16B16A16_Snorm = 13,
G_Format_R16G16B16A16_Sint = 14,
G_Format_R32G32_Typeless = 15,
G_Format_R32G32_Float = 16,
G_Format_R32G32_Uint = 17,
G_Format_R32G32_Sint = 18,
G_Format_R32G8X24_Typeless = 19,
G_Format_D32_Float_S8X24_Uint = 20,
G_Format_R32_Float_X8X24_Typeless = 21,
G_Format_X32_Typeless_G8X24_Uint = 22,
G_Format_R10G10B10A2_Typeless = 23,
G_Format_R10G10B10A2_Unorm = 24,
G_Format_R10G10B10A2_Uint = 25,
G_Format_R11G11B10_Float = 26,
G_Format_R8G8B8A8_Typeless = 27,
G_Format_R8G8B8A8_Unorm = 28,
G_Format_R8G8B8A8_Unorm_Srgb = 29,
G_Format_R8G8B8A8_Uint = 30,
G_Format_R8G8B8A8_Snorm = 31,
G_Format_R8G8B8A8_Sint = 32,
G_Format_R16G16_Typeless = 33,
G_Format_R16G16_Float = 34,
G_Format_R16G16_Unorm = 35,
G_Format_R16G16_Uint = 36,
G_Format_R16G16_Snorm = 37,
G_Format_R16G16_Sint = 38,
G_Format_R32_Typeless = 39,
G_Format_D32_Float = 40,
G_Format_R32_Float = 41,
G_Format_R32_Uint = 42,
G_Format_R32_Sint = 43,
G_Format_R24G8_Typeless = 44,
G_Format_D24_Unorm_S8_Uint = 45,
G_Format_R24_Unorm_X8_Typeless = 46,
G_Format_X24_Typeless_G8_Uint = 47,
G_Format_R8G8_Typeless = 48,
G_Format_R8G8_Unorm = 49,
G_Format_R8G8_Uint = 50,
G_Format_R8G8_Snorm = 51,
G_Format_R8G8_Sint = 52,
G_Format_R16_Typeless = 53,
G_Format_R16_Float = 54,
G_Format_D16_Unorm = 55,
G_Format_R16_Unorm = 56,
G_Format_R16_Uint = 57,
G_Format_R16_Snorm = 58,
G_Format_R16_Sint = 59,
G_Format_R8_Typeless = 60,
G_Format_R8_Unorm = 61,
G_Format_R8_Uint = 62,
G_Format_R8_Snorm = 63,
G_Format_R8_Sint = 64,
G_Format_A8_Unorm = 65,
G_Format_R1_Unorm = 66,
G_Format_R9G9B9E5_SharedXP = 67,
G_Format_R8G8_B8G8_Unorm = 68,
G_Format_G8R8_G8B8_Unorm = 69,
G_Format_BC1_Typeless = 70,
G_Format_BC1_Unorm = 71,
G_Format_BC1_Unorm_Srgb = 72,
G_Format_BC2_Typeless = 73,
G_Format_BC2_Unorm = 74,
G_Format_BC2_Unorm_Srgb = 75,
G_Format_BC3_Typeless = 76,
G_Format_BC3_Unorm = 77,
G_Format_BC3_Unorm_Srgb = 78,
G_Format_BC4_Typeless = 79,
G_Format_BC4_Unorm = 80,
G_Format_BC4_Snorm = 81,
G_Format_BC5_Typeless = 82,
G_Format_BC5_Unorm = 83,
G_Format_BC5_Snorm = 84,
G_Format_B5G6R5_Unorm = 85,
G_Format_B5G5R5A1_Unorm = 86,
G_Format_B8G8R8A8_Unorm = 87,
G_Format_B8G8R8X8_Unorm = 88,
G_Format_R10G10B10_XR_BIAS_A2_Unorm = 89,
G_Format_B8G8R8A8_Typeless = 90,
G_Format_B8G8R8A8_Unorm_Srgb = 91,
G_Format_B8G8R8X8_Typeless = 92,
G_Format_B8G8R8X8_Unorm_Srgb = 93,
G_Format_BC6H_Typeless = 94,
G_Format_BC6H_UF16 = 95,
G_Format_BC6H_SF16 = 96,
G_Format_BC7_Typeless = 97,
G_Format_BC7_Unorm = 98,
G_Format_BC7_Unorm_Srgb = 99,
G_Format_AYUV = 100,
G_Format_Y410 = 101,
G_Format_Y416 = 102,
G_Format_NV12 = 103,
G_Format_P010 = 104,
G_Format_P016 = 105,
G_Format_420_Opaque = 106,
G_Format_YUY2 = 107,
G_Format_Y210 = 108,
G_Format_Y216 = 109,
G_Format_NV11 = 110,
G_Format_AI44 = 111,
G_Format_IA44 = 112,
G_Format_P8 = 113,
G_Format_A8P8 = 114,
G_Format_B4G4R4A4_Unorm = 115,
G_Format_P208 = 130,
G_Format_V208 = 131,
G_Format_V408 = 132,
G_Format_SamplerFeedbackMinMipOpaque = 189,
G_Format_SamplerFeedbackMipRegionUsedOpaque = 190,
G_Format_A4B4G4R4_Unorm = 191,
G_Format_COUNT = 192
};
////////////////////////////////////////////////////////////
//~ Memory sync types
Enum(G_Stage)
{
G_Stage_None = 0,
// Compute stages
G_Stage_ComputeShading = (1 << 1),
// Draw stages
G_Stage_IndexAssembly = (1 << 2),
G_Stage_VertexShading = (1 << 3),
G_Stage_PixelShading = (1 << 4),
G_Stage_DepthStencil = (1 << 5),
G_Stage_RenderTarget = (1 << 6),
// Copy stages
G_Stage_Copy = (1 << 7),
// Indirect stages
G_Stage_Indirect = (1 << 8),
// Aggregate stages
G_Stage_Drawing = G_Stage_IndexAssembly |
G_Stage_VertexShading |
G_Stage_PixelShading |
G_Stage_DepthStencil |
G_Stage_RenderTarget,
G_Stage_Shading = G_Stage_ComputeShading |
G_Stage_VertexShading |
G_Stage_PixelShading,
G_Stage_All = 0xFFFFFFFF
};
Enum(G_Access)
{
G_Access_None = 0,
G_Access_ShaderReadWrite = (1 << 1),
G_Access_ShaderRead = (1 << 2),
G_Access_CopyWrite = (1 << 3),
G_Access_CopyRead = (1 << 4),
G_Access_DepthStencilRead = (1 << 5),
G_Access_DepthStencilWrite = (1 << 6),
G_Access_RenderTargetWrite = (1 << 7),
G_Access_IndexBuffer = (1 << 8),
G_Access_IndirectArgument = (1 << 9),
G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier
};
Enum(G_Layout)
{
G_Layout_NoChange,
G_Layout_Undefined,
//////////////////////////////
//- Queue-agnostic
// Simultaneous layout allows a resource to be used on any queue with any
// access type (except depth-stencil). Resources cannot transition to/from
// this layout, they must be created with it. Allows concurrent reads
// with up to 1 write to non-overlapping regions.
G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON
//////////////////////////////
//- Direct queue
G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON
G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ
G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE
G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET
//////////////////////////////
//- Compute queue
G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON
//////////////////////////////
//- Direct & Compute queue
G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ
G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS
G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST
};
// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`.
// When barrier executes:
// - Necessary resource flushes will occur based on `access_prev` & `access_next`
// - Texture layout will transition based on `layout` (if specified)
Struct(G_MemoryBarrierDesc)
{
G_ResourceHandle resource;
b32 is_global;
G_Stage stage_prev;
G_Stage stage_next;
G_Access access_prev;
G_Access access_next;
G_Layout layout;
RngI32 mips; // Inclusive range of texture mip levels to sync
};
////////////////////////////////////////////////////////////
//~ Filter types
// NOTE: Matches DirectX D3D12_FILTER
Enum(G_Filter)
{
// Standard filter
G_Filter_MinMagMipPoint = 0,
G_Filter_MinMagPointMipLinear = 0x1,
G_Filter_MinPointMagLinearMipPoint = 0x4,
G_Filter_MinPointMagMipLinear = 0x5,
G_Filter_MinLinearMagMipPoint = 0x10,
G_Filter_MinLinearMagPointMipLinear = 0x11,
G_Filter_MinMagLinearMipPoint = 0x14,
G_Filter_MinMagMipLinear = 0x15,
G_Filter_MinMagAnisotropicMipPoint = 0x54,
G_Filter_Anisotropic = 0x55,
// Comparison filter
G_Filter_Comparison_MinMagMipPoint = 0x80,
G_Filter_Comparison_MinMagPointMipLinear = 0x81,
G_Filter_Comparison_MinPointMagLinearMipPoint = 0x84,
G_Filter_Comparison_MinPointMagMipLinear = 0x85,
G_Filter_Comparison_MinLinearMagMipPoint = 0x90,
G_Filter_Comparison_MinLinearMagPointMipLinear = 0x91,
G_Filter_Comparison_MinMagLinearMipPoint = 0x94,
G_Filter_Comparison_MinMagMipLinear = 0x95,
G_Filter_Comparison_MinMagAnisotropicMipPoint = 0xd4,
G_Filter_Comparison_Anisotropic = 0xd5,
// Minimum filter
G_Filter_Minimum_MinMagMipPoint = 0x100,
G_Filter_Minimum_MinMagPointMipLinear = 0x101,
G_Filter_Minimum_MinPointMagLinearMipPoint = 0x104,
G_Filter_Minimum_MinPointMagMipLinear = 0x105,
G_Filter_Minimum_MinLinearMagMipPoint = 0x110,
G_Filter_Minimum_MinLinearMagPointMipLinear = 0x111,
G_Filter_Minimum_MinMagLinearMipPoint = 0x114,
G_Filter_Minimum_MinMagMipLinear = 0x115,
G_Filter_Minimum_MinMagAnisotropicMipPoint = 0x155,
G_Filter_Minimum_Anisotropic = 0x155,
// Maximum filter
G_Filter_Maximum_MinMagMipPoint = 0x180,
G_Filter_Maximum_MinMagPointMipLinear = 0x181,
G_Filter_Maximum_MinPointMagLinearMipPoint = 0x184,
G_Filter_Maximum_MinPointMagMipLinear = 0x185,
G_Filter_Maximum_MinLinearMagMipPoint = 0x190,
G_Filter_Maximum_MinLinearMagPointMipLinear = 0x191,
G_Filter_Maximum_MinMagLinearMipPoint = 0x194,
G_Filter_Maximum_MinMagMipLinear = 0x195,
G_Filter_Maximum_MinMagAnisotropicMipPoint = 0x1d4,
G_Filter_Maximum_Anisotropic = 0x1d5
};
// NOTE: Matches DirectX D3D12_TEXTURE_ADDRESS_MODE
Enum(G_AddressMode)
{
G_AddressMode_Wrap = 1,
G_AddressMode_Mirror = 2,
G_AddressMode_Clamp = 3, // Default
G_AddressMode_Border = 4,
G_AddressMode_MirrorOnce = 5
};
// NOTE: Matches DirectX D3D12_COMPARISON_FUNC
Enum(G_ComparisonFunc)
{
G_ComparisonFunc_None = 0,
G_ComparisonFunc_Never = 1,
G_ComparisonFunc_Less = 2,
G_ComparisonFunc_Equal = 3,
G_ComparisonFunc_LessEqual = 4,
G_ComparisonFunc_Greater = 5,
G_ComparisonFunc_NotEqual = 6,
G_ComparisonFunc_GreaterEqual = 7,
G_ComparisonFunc_Always = 8
};
////////////////////////////////////////////////////////////
//~ Resource types
Enum(G_ResourceKind)
{
G_ResourceKind_Buffer,
G_ResourceKind_Texture1D,
G_ResourceKind_Texture2D,
G_ResourceKind_Texture3D,
G_ResourceKind_Sampler,
};
Enum(G_ResourceFlag)
{
G_ResourceFlag_None = 0,
G_ResourceFlag_AllowShaderReadWrite = (1 << 0),
G_ResourceFlag_AllowRenderTarget = (1 << 1),
G_ResourceFlag_AllowDepthStencil = (1 << 2),
G_ResourceFlag_ZeroMemory = (1 << 3),
G_ResourceFlag_HostMemory = (1 << 4), // Resource will be mapped into the cpu's address space
G_ResourceFlag_Uncached = (1 << 5), // Cpu writes will be combined & reads will be uncached
G_ResourceFlag_ForceNoReuse = (1 << 6),
};
Struct(G_BufferDesc)
{
G_ResourceFlag flags;
u64 size;
String name;
};
Struct(G_TextureDesc)
{
G_ResourceFlag flags;
G_Format format;
Vec3I32 dims;
G_Layout initial_layout;
Vec4 clear_color;
i32 max_mips; // Will be clamped to range [1, max mips]
String name;
};
Struct(G_SamplerDesc)
{
G_ResourceFlag flags;
G_Filter filter;
G_AddressMode x;
G_AddressMode y;
G_AddressMode z;
f32 mip_lod_bias;
u32 max_anisotropy;
G_ComparisonFunc comparison;
Vec4 border_color;
f32 min_lod;
f32 max_lod;
String name;
};
Struct(G_ResourceDesc)
{
G_ResourceKind kind;
G_BufferDesc buffer;
G_TextureDesc texture;
G_SamplerDesc sampler;
};
////////////////////////////////////////////////////////////
//~ Ref types
Struct(G_RefDesc)
{
G_RefKind kind;
u64 element_size;
u64 element_offset;
RngI32 mips; // Inclusive range of texture mip indices to reference
};
////////////////////////////////////////////////////////////
//~ Rasterization types
Enum(G_RasterMode)
{
G_RasterMode_None,
G_RasterMode_PointList,
G_RasterMode_LineList,
G_RasterMode_LineStrip,
G_RasterMode_TriangleList,
G_RasterMode_TriangleStrip,
G_RasterMode_WireTriangleList,
G_RasterMode_WireTriangleStrip,
};
Enum(G_BlendMode)
{
G_BlendMode_Opaque,
G_BlendMode_CompositeStraightAlpha,
G_BlendMode_CompositePremultipliedAlpha,
};
Struct(G_IndexBufferDesc)
{
u32 count;
u32 stride; // Either 2 for u16 indices, or 4 for u32 indices
G_ResourceHandle resource;
};
Struct(G_RenderTargetDesc)
{
G_ResourceHandle resource;
G_BlendMode blend;
i32 mip;
};
////////////////////////////////////////////////////////////
//~ Statistic types
Struct(G_Stats)
{
// Memory usage
u64 device_committed;
u64 device_budget;
u64 host_committed;
u64 host_budget;
// Other stats
u64 arenas_count;
u64 cumulative_nonreuse_count;
};
////////////////////////////////////////////////////////////
//~ @hookdecl Bootstrap
void G_Bootstrap(void);
////////////////////////////////////////////////////////////
//~ @hookdecl Arena
G_ArenaHandle G_AcquireArena(void);
void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena);
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
////////////////////////////////////////////////////////////
//~ @hookdecl Resource
//- Resource creation
G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_ResourceDesc desc);
#define G_PushBuffer(arena, cl, _type, _count, ...) G_PushResource((arena), (cl), \
(G_ResourceDesc) { \
.kind = G_ResourceKind_Buffer, \
.buffer = { \
.size = sizeof(_type) * (_count), \
__VA_ARGS__ \
} \
} \
)
#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
(G_ResourceDesc) { \
.kind = G_ResourceKind_Texture1D, \
.texture = { \
.format = (_format), \
.dims = VEC3I32((_size), 1, 1), \
.initial_layout = (_initial_layout), \
__VA_ARGS__ \
} \
} \
)
#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
(G_ResourceDesc) { \
.kind = G_ResourceKind_Texture2D, \
.texture = { \
.format = (_format), \
.dims = VEC3I32((_size).x, (_size).y, 1), \
.initial_layout = (_initial_layout), \
__VA_ARGS__ \
} \
} \
)
#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
(G_ResourceDesc) { \
.kind = G_ResourceKind_Texture3D, \
.texture = { \
.format = (_format), \
.dims = (_size), \
.initial_layout = (_initial_layout), \
__VA_ARGS__ \
} \
} \
)
#define G_PushSampler(arena, cl, ...) G_PushResource((arena), (cl), \
(G_ResourceDesc) { \
.kind = G_ResourceKind_Sampler, \
.sampler = { \
.filter = G_Filter_MinMagMipPoint, \
__VA_ARGS__ \
} \
} \
)
//- Index buffer helpers
#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) })
#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) })
//- Render target helpers
#define G_Rt(_res, _blend_mode) ((G_RenderTargetDesc) { .resource = (_res), .blend = (_blend_mode) })
//- Count
u64 G_CountBufferBytes(G_ResourceHandle buffer);
i32 G_Count1D(G_ResourceHandle texture);
Vec2I32 G_Count2D(G_ResourceHandle texture);
Vec3I32 G_Count3D(G_ResourceHandle texture);
i32 G_CountWidth(G_ResourceHandle texture);
i32 G_CountHeight(G_ResourceHandle texture);
i32 G_CountDepth(G_ResourceHandle texture);
i32 G_CountMips(G_ResourceHandle texture);
#define G_CountBuffer(buffer, type) G_CountBufferBytes(buffer) / sizeof(type)
//- Map
void *G_HostPointerFromResource(G_ResourceHandle resource);
#define G_StructFromResource(resource, type) (type *)G_HostPointerFromResource(resource)
////////////////////////////////////////////////////////////
//~ @hookdecl Shader resource reference
u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
#define G_PushStructuredBufferRef(arena, resource, type, ...) (G_StructuredBufferRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_StructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ } \
) \
}
#define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_ByteAddressBuffer, __VA_ARGS__ } \
) \
}
#define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_Texture1D, .mips.max = G_MaxMips, __VA_ARGS__ } \
) \
}
#define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_Texture2D, .mips.max = G_MaxMips, __VA_ARGS__ } \
) \
}
#define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_Texture3D, .mips.max = G_MaxMips, __VA_ARGS__ } \
) \
}
#define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) { \
.v = G_PushRef( \
(arena), (resource), \
(G_RefDesc) { .kind = G_RefKind_SamplerState, __VA_ARGS__ } \
) \
}
////////////////////////////////////////////////////////////
//~ @hookdecl Command
//- Command list
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
i64 G_CommitCommandList(G_CommandListHandle cl);
//- Cpu -> Gpu staged copy
void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range);
//- Gpu <-> Gpu copy
void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range);
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset);
void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
//- Constant
void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size);
#define G_SetConstant(cl, name, value) do { \
CAT(name, __shaderconstanttype) __src; \
__src.v = value; \
G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \
} while (0)
//- Memory sync
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.layout = _layout, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.is_global = 1, \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_DumbMemorySync(cl, resource, ...) \
G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \
G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__)
#define G_DumbGlobalMemorySync(cl, ...) \
G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
//- Compute
void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads);
#define G_Compute(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads), 1, 1))
#define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1))
#define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z))
//- Rasterize
void G_Rasterize(
G_CommandListHandle cl,
VertexShaderDesc vs, PixelShaderDesc ps,
u32 instances_count, G_IndexBufferDesc index_buffer,
u32 render_targets_count, G_RenderTargetDesc *render_targets,
Rng3 viewport, Rng2 scissor,
G_RasterMode raster_mode
);
//- Clear
void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip);
//- Log
void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource);
////////////////////////////////////////////////////////////
//~ @hookdecl Queue synchronization
i64 G_CompletionValueFromQueue(G_QueueKind queue_kind);
i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind);
G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask);
G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask);
void G_QueueSyncEx(G_QueueBarrierDesc desc);
#define G_QueueSync(completion_mask, ...) \
G_QueueSyncEx((G_QueueBarrierDesc) { \
.completions = G_CompletionTargetsFromQueues(completion_mask), \
__VA_ARGS__ \
})
#define G_QueueSyncGpu(completion_mask, wait_mask) G_QueueSync((completion_mask), .wait_queues = (wait_mask))
#define G_QueueSyncCpu(completion_mask) G_QueueSync((completion_mask), .wait_cpu = 1);
////////////////////////////////////////////////////////////
//~ @hookdecl Statistics
G_Stats G_QueryStats(void);
////////////////////////////////////////////////////////////
//~ @hookdecl Swapchain
G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle);
void G_ReleaseSwapchain(G_SwapchainHandle swapchain);
// Waits until a new backbuffer is ready from the swapchain.
// This should be called before rendering for minimum latency.
G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size);
void G_CommitBackbuffer(G_ResourceHandle backbuffer, i32 vsync);

16
src/gpu_old/gpu_dx12/gpu_dx12.lay generated Normal file
View File

@ -0,0 +1,16 @@
@Layer gpu_dx12_old
//////////////////////////////
//- Resources
@EmbedDir G_D12_Resources gpu_dx12_res
//////////////////////////////
//- Api
@IncludeC gpu_dx12_core.h
//////////////////////////////
//- Impl
@IncludeC gpu_dx12_core.c

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,577 @@
////////////////////////////////////////////////////////////
//~ DirectX12 libs
#pragma warning(push, 0)
#include <d3d12.h>
#include <dxgidebug.h>
#include <dxgi1_6.h>
#pragma warning(pop)
#pragma comment(lib, "d3d12")
#pragma comment(lib, "dxgi")
////////////////////////////////////////////////////////////
//~ Tweakable definitions
#define G_D12_TearingIsAllowed 1
#define G_D12_FrameLatency 1
#define G_D12_SwapchainBufferCount 2
#define G_D12_SwapchainFlags ( \
((G_D12_TearingIsAllowed != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | \
((G_D12_FrameLatency != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) \
)
#define G_D12_MaxCbvSrvUavDescriptors (1024 * 128)
#define G_D12_MaxSamplerDescriptors (1024 * 1)
#define G_D12_MaxRtvDescriptors (1024 * 64)
#define G_D12_MaxMips 16
#define G_D12_MaxNameLen 64
////////////////////////////////////////////////////////////
//~ Pipeline types
// NOTE: Must be zero initialized (including padding bits) for hashing
Struct(G_D12_PipelineDesc)
{
VertexShaderDesc vs;
PixelShaderDesc ps;
ComputeShaderDesc cs;
b32 is_wireframe;
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
G_Format render_target_formats[G_MaxRenderTargets];
G_BlendMode render_target_blend_modes[G_MaxRenderTargets];
};
Struct(G_D12_Pipeline)
{
G_D12_Pipeline *next_in_bin;
u64 hash;
G_D12_PipelineDesc desc;
ID3D12PipelineState *pso;
b32 ok;
String error;
};
Struct(G_D12_PipelineBin)
{
Mutex mutex;
G_D12_Pipeline *first;
};
////////////////////////////////////////////////////////////
//~ Resource types
Struct(G_D12_Resource)
{
G_D12_Resource *next;
G_D12_Resource *prev;
G_ResourceFlag flags;
u64 uid;
// D3D12 resource
D3D12_RESOURCE_DESC1 d3d_desc;
ID3D12Resource *d3d_resource;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
void *mapped;
// Buffer info
u64 buffer_size;
u64 buffer_size_actual;
// Texture info
b32 is_texture;
G_Format texture_format;
Vec3I32 texture_dims;
i32 texture_mips;
D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips];
// Sampler info
G_SamplerDesc sampler_desc;
// Backbuffer info
struct G_D12_Swapchain *swapchain;
u64 name_len;
u8 name_text[G_D12_MaxNameLen];
};
Struct(G_D12_ResourceList)
{
u64 count;
G_D12_Resource *first;
G_D12_Resource *last;
};
////////////////////////////////////////////////////////////
//~ Descriptor types
Enum(G_D12_DescriptorHeapKind)
{
G_D12_DescriptorHeapKind_CbvSrvUav,
G_D12_DescriptorHeapKind_Rtv,
G_D12_DescriptorHeapKind_Sampler,
G_D12_DescriptorHeapKind_COUNT
};
Struct(G_D12_DescriptorHeap)
{
Arena *descriptors_arena;
G_D12_DescriptorHeapKind kind;
D3D12_DESCRIPTOR_HEAP_TYPE type;
u32 per_batch_count;
u32 descriptor_size;
ID3D12DescriptorHeap *d3d_heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
Mutex mutex;
struct G_D12_Descriptor *first_free;
u32 max_count;
};
Struct(G_D12_Descriptor)
{
G_D12_Descriptor *next;
G_D12_Descriptor *prev;
struct G_D12_Arena *gpu_arena;
G_QueueKind completion_queue_kind;
i64 completion_queue_target;
G_D12_DescriptorHeap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE first_handle;
u32 index;
};
Struct(G_D12_DescriptorList)
{
u64 count;
G_D12_Descriptor *first;
G_D12_Descriptor *last;
};
////////////////////////////////////////////////////////////
//~ Arena types
// TODO:
// To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for:
// - Buffers
// - Non-render target & non-depth stencil textures
// - Render target or depth stencil textures
Enum(G_D12_ResourceHeapKind)
{
G_D12_ResourceHeapKind_Gpu,
G_D12_ResourceHeapKind_Cpu,
G_D12_ResourceHeapKind_CpuWriteCombined,
G_D12_ResourceHeapKind_COUNT
};
Struct(G_D12_Arena)
{
Arena *arena;
G_D12_DescriptorList descriptors;
G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT];
G_D12_ResourceList resources;
G_D12_ResourceList reset_resources;
// G_D12_ResourceList free_resources;
};
////////////////////////////////////////////////////////////
//~ Staging types
Struct(G_D12_StagingRing)
{
Arena *arena;
G_D12_Arena *gpu_arena;
u64 size;
G_D12_Resource *resource;
u8 *base;
struct G_D12_StagingRegionNode *head_region_node;
struct G_D12_StagingRegionNode *first_free_region_node;
};
Struct(G_D12_StagingRegionNode)
{
G_D12_StagingRing *ring;
// Ring links (requires ring lock to read)
G_D12_StagingRegionNode *prev;
G_D12_StagingRegionNode *next;
// Command list links
G_D12_StagingRegionNode *next_in_command_list;
// Region info
Atomic64 completion_target;
u64 pos;
};
////////////////////////////////////////////////////////////
//~ Command queue types
Struct(G_D12_CommandQueueDesc)
{
D3D12_COMMAND_LIST_TYPE type;
D3D12_COMMAND_QUEUE_PRIORITY priority;
String name;
};
Struct(G_D12_Queue)
{
ID3D12CommandQueue *d3d_queue;
G_D12_CommandQueueDesc desc;
Mutex commit_mutex;
ID3D12Fence *commit_fence;
u64 commit_fence_target;
// Global resources
u64 print_buffer_size;
G_ResourceHandle print_buffer;
G_ResourceHandle print_readback_buffer;
G_ByteAddressBufferRef print_buffer_ref;
// Raw command lists
struct G_D12_RawCommandList *first_committed_cl;
struct G_D12_RawCommandList *last_committed_cl;
// Staging heap
Mutex staging_mutex;
G_D12_StagingRing *staging_ring;
Fence sync_fence;
};
////////////////////////////////////////////////////////////
//~ Raw command list types
Struct(G_D12_RawCommandList)
{
G_D12_Queue *queue;
G_D12_RawCommandList *next;
u64 commit_fence_target;
ID3D12CommandAllocator *d3d_ca;
ID3D12GraphicsCommandList7 *d3d_cl;
// Direct queue command lists keep a constant list of CPU-only descriptors
G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets];
G_D12_Descriptor *rtv_clear_descriptor;
};
////////////////////////////////////////////////////////////
//~ Releasable types
Struct(G_D12_Releasable)
{
G_D12_Releasable *next;
G_QueueKind completion_queue_kind;
i64 completion_queue_target;
ID3D12Resource *d3d_resource;
u64 name_len;
u8 name_text[G_D12_MaxNameLen];
};
Struct(G_D12_ReleasableList)
{
G_D12_Releasable *first;
G_D12_Releasable *last;
};
////////////////////////////////////////////////////////////
//~ Command list types
#define G_D12_CmdsPerChunk 256
Enum(G_D12_CmdKind)
{
G_D12_CmdKind_None,
G_D12_CmdKind_Barrier,
G_D12_CmdKind_Constant,
G_D12_CmdKind_CopyBytes,
G_D12_CmdKind_CopyTexels,
G_D12_CmdKind_Compute,
G_D12_CmdKind_Rasterize,
G_D12_CmdKind_ClearRtv,
G_D12_CmdKind_Log,
G_D12_CmdKind_Discard,
};
Struct(G_D12_Cmd)
{
G_D12_CmdKind kind;
b32 skip;
union
{
struct
{
i32 slot;
u32 value;
} constant;
struct
{
G_MemoryBarrierDesc desc;
// Post-batch data
b32 is_end_of_batch;
u64 batch_gen;
} barrier;
struct
{
G_D12_Resource *dst;
G_D12_Resource *src;
u64 dst_offset;
RngU64 src_range;
} copy_bytes;
struct
{
G_D12_Resource *dst;
G_D12_Resource *src;
D3D12_TEXTURE_COPY_LOCATION dst_loc;
D3D12_TEXTURE_COPY_LOCATION src_loc;
Vec3I32 dst_texture_offset;
Rng3I32 src_texture_range;
} copy_texels;
struct
{
ComputeShaderDesc cs;
Vec3I32 groups;
} compute;
struct
{
VertexShaderDesc vs;
PixelShaderDesc ps;
u32 instances_count;
G_IndexBufferDesc index_buffer_desc;
G_RenderTargetDesc render_target_descs[G_MaxRenderTargets];
Rng3 viewport;
Rng2 scissor;
G_RasterMode raster_mode;
} rasterize;
struct
{
G_D12_Resource *render_target;
Vec4 color;
i32 mip;
} clear_rtv;
struct
{
G_D12_Resource *resource;
} log;
struct
{
G_D12_Resource *resource;
} discard;
};
};
Struct(G_D12_CmdChunk)
{
G_D12_CmdChunk *next;
struct G_D12_CmdList *cl;
G_D12_Cmd *cmds;
u64 cmds_count;
};
Struct(G_D12_CmdList)
{
G_D12_CmdList *next;
G_QueueKind queue_kind;
G_D12_DescriptorList reset_descriptors;
G_D12_ReleasableList releases;
G_D12_StagingRegionNode *first_staging_region;
G_D12_StagingRegionNode *last_staging_region;
G_D12_CmdChunk *first_cmd_chunk;
G_D12_CmdChunk *last_cmd_chunk;
u64 chunks_count;
u64 cmds_count;
};
////////////////////////////////////////////////////////////
//~ Swapchain types
Struct(G_D12_Swapchain)
{
IDXGISwapChain3 *d3d_swapchain;
HWND window_hwnd;
HANDLE waitable;
HANDLE present_event;
ID3D12Fence *present_fence;
u64 present_fence_target;
G_Format backbuffers_format;
Vec2I32 backbuffers_resolution;
G_D12_Resource backbuffers[G_D12_SwapchainBufferCount];
};
////////////////////////////////////////////////////////////
//~ State types
Struct(G_D12_AsyncCtx)
{
G_D12_ReleasableList pending_releases;
G_D12_ReleasableList free_releases;
};
Struct(G_D12_Ctx)
{
IsolatedAtomic64 resource_creation_gen;
b32 independent_devices_enabled;
b32 debug_layer_enabled;
b32 validation_layer_enabled;
// Stats
Atomic64 arenas_count;
Atomic64 cumulative_nonreuse_count;
Atomic64 driver_resources_allocated;
Atomic64 driver_descriptors_allocated;
// Queues
G_D12_Queue queues[G_QueueKind_COUNT];
// Descriptor heaps
G_D12_DescriptorHeap descriptor_heaps[G_D12_DescriptorHeapKind_COUNT];
// Rootsig
ID3D12RootSignature *bindless_rootsig;
// Pipelines
G_D12_PipelineBin pipeline_bins[1024];
// Command lists
Mutex free_cmd_lists_mutex;
G_D12_CmdList *first_free_cmd_list;
// Command chunks
Mutex free_cmd_chunks_mutex;
G_D12_CmdChunk *first_free_cmd_chunk;
// Swapchains
Mutex free_swapchains_mutex;
G_D12_Swapchain *first_free_swapchain;
// Independent device (only valid when independent_devices_enabled = 1)
struct
{
ID3D12SDKConfiguration1 *sdk_config;
ID3D12DeviceConfiguration *device_config;
ID3D12DeviceFactory *device_factory;
} independent;
// Device
IDXGIFactory6 *dxgi_factory;
IDXGIAdapter3 *dxgi_adapter;
ID3D12Device10 *device;
// Release-queue
Mutex pending_releases_mutex;
Mutex free_releases_mutex;
G_D12_ReleasableList pending_releases;
G_D12_ReleasableList free_releases;
// Async
G_D12_AsyncCtx async_ctx;
};
Struct(G_D12_ThreadLocalCtx)
{
HANDLE sync_event;
};
extern G_D12_Ctx G_D12;
extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl;
////////////////////////////////////////////////////////////
//~ Helpers
#define G_D12_MakeHandle(type, ptr) (type) { .v = (u64)(ptr) }
G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle);
G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle);
G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle);
G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle);
DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format);
D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages);
D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses);
D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout);
String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout);
void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip);
void G_D12_SetObjectName(ID3D12Object *object, String name);
String G_D12_NameFromObject(Arena *arena, ID3D12Object *object);
////////////////////////////////////////////////////////////
//~ Pipeline
G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc);
u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc);
////////////////////////////////////////////////////////////
//~ Queue
G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind);
////////////////////////////////////////////////////////////
//~ Raw command list
G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind);
i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl);
////////////////////////////////////////////////////////////
//~ Arena
void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena);
////////////////////////////////////////////////////////////
//~ Descriptor
G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index);
G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind);
////////////////////////////////////////////////////////////
//~ Command helpers
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl);
G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
////////////////////////////////////////////////////////////
//~ Collection worker
void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane);
////////////////////////////////////////////////////////////
//~ Async
void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);

Binary file not shown.

Binary file not shown.

BIN
src/gpu_old/gpu_res/noise_128x128x64_16.dat (Stored with Git LFS) Normal file

Binary file not shown.

333
src/gpu_old/gpu_shared.cgh Normal file
View File

@ -0,0 +1,333 @@
////////////////////////////////////////////////////////////
//~ Ref types
Enum(G_RefKind)
{
G_RefKind_StructuredBuffer,
G_RefKind_ByteAddressBuffer,
G_RefKind_Texture1D,
G_RefKind_Texture2D,
G_RefKind_Texture3D,
G_RefKind_SamplerState,
};
Struct(G_StructuredBufferRef) { u32 v; };
Struct(G_ByteAddressBufferRef) { u32 v; };
Struct(G_Texture1DRef) { u32 v; };
Struct(G_Texture2DRef) { u32 v; };
Struct(G_Texture3DRef) { u32 v; };
Struct(G_SamplerStateRef) { u32 v; };
#define G_IsRefNil(r) ((r).v == 0)
////////////////////////////////////////////////////////////
//~ Constant types
//
// D3D12 exposes 64 root constants and Vulkan exposes 32 push constants.
// Supposedly AMD hardware will start spilling constants once more than
// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/
//
#define G_NumGeneralPurposeConstants (24) // Constants available for any usage
#define G_NumReservedConstants (4) // Constants reserved for internal usage by the GPU layer
#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants)
#if IsCpu
#define G_ForceDeclConstant(type, name, slot) \
enum { name = slot }; \
Struct(name##__shaderconstanttype) { type v; }
#define G_DeclConstant(type, name, slot) \
StaticAssert(sizeof(type) <= 4); \
StaticAssert(slot < G_NumGeneralPurposeConstants); \
G_ForceDeclConstant(type, name, slot)
#else
#define G_ForceDeclConstant(type, name, slot) cbuffer name : register(b##slot) { type name; }
#define G_DeclConstant(type, name, slot) G_ForceDeclConstant(type, name, slot)
#endif
////////////////////////////////////////////////////////////
//~ Reserved constants
// The constants declared below assume this configuration is accurate for slot usage
StaticAssert(G_NumGeneralPurposeConstants == 24);
StaticAssert(G_NumReservedConstants >= 3);
G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 24);
G_ForceDeclConstant(b32, G_ShaderConst_TweakB32, 25);
G_ForceDeclConstant(f32, G_ShaderConst_TweakF32, 26);
#if IsGpu
#define G_TweakBool G_ShaderConst_TweakB32
#define G_TweakFloat G_ShaderConst_TweakF32
#endif
////////////////////////////////////////////////////////////
//~ Basic samplers
Enum(G_BasicSamplerKind)
{
G_BasicSamplerKind_PointClamp,
G_BasicSamplerKind_PointWrap,
G_BasicSamplerKind_PointMirror,
G_BasicSamplerKind_BilinearClamp,
G_BasicSamplerKind_BilinearWrap,
G_BasicSamplerKind_BilinearMirror,
G_BasicSamplerKind_TrilinearClamp,
G_BasicSamplerKind_TrilinearWrap,
G_BasicSamplerKind_TrilinearMirror,
G_BasicSamplerKind_COUNT
};
////////////////////////////////////////////////////////////
//~ Resource dereference
#if IsGpu
// NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware
//- Scalar/Uniform dereference
SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; }
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; }
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
//- Vector/Non-Uniform dereference
SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
#endif
////////////////////////////////////////////////////////////
//~ Resource countof
#define G_MaxMips 16
#define G_MaxRenderTargets 8
#if IsGpu
template<typename T> u32 countof(StructuredBuffer<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWStructuredBuffer<T> obj) { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
u32 countof(ByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
u32 countof(RWByteAddressBuffer obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(Texture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> u32 countof(RWTexture1D<T> obj) { u32 result; obj.GetDimensions(result); return result; }
template<typename T> Vec2U32 countof(Texture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec2U32 countof(RWTexture2D<T> obj) { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
template<typename T> Vec3U32 countof(Texture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
template<typename T> Vec3U32 countof(RWTexture3D<T> obj) { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
#endif
////////////////////////////////////////////////////////////
//~ Debug printf
// This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/
Enum(G_FmtArgKind)
{
G_FmtArgKind_None,
G_FmtArgKind_End,
G_FmtArgKind_BEGINSIZE1,
G_FmtArgKind_Uint,
G_FmtArgKind_Sint,
G_FmtArgKind_Float,
G_FmtArgKind_BEGINSIZE2,
G_FmtArgKind_Uint2,
G_FmtArgKind_Sint2,
G_FmtArgKind_Float2,
G_FmtArgKind_BEGINSIZE3,
G_FmtArgKind_Uint3,
G_FmtArgKind_Sint3,
G_FmtArgKind_Float3,
G_FmtArgKind_BEGINSIZE4,
G_FmtArgKind_Uint4,
G_FmtArgKind_Sint4,
G_FmtArgKind_Float4,
};
Struct(G_FmtArg)
{
G_FmtArgKind kind;
Vec4U32 v;
};
#if IsGpu && GPU_SHADER_PRINT
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint; result.v.x = v; return result; }
G_FmtArg G_Fmt(Vec2U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint2; result.v.xy = v.xy; return result; }
G_FmtArg G_Fmt(Vec3U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint3; result.v.xyz = v.xyz; return result; }
G_FmtArg G_Fmt(Vec4U32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Uint4; result.v.xyzw = v.xyzw; return result; }
G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint; result.v.x = v; return result; }
G_FmtArg G_Fmt(Vec2I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint2; result.v.xy = v.xy; return result; }
G_FmtArg G_Fmt(Vec3I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint3; result.v.xyz = v.xyz; return result; }
G_FmtArg G_Fmt(Vec4I32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Sint4; result.v.xyzw = v.xyzw; return result; }
G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float; result.v.x = asuint(v); return result; }
G_FmtArg G_Fmt(Vec2 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float2; result.v.xy = asuint(v.xy); return result; }
G_FmtArg G_Fmt(Vec3 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float3; result.v.xyz = asuint(v.xyz); return result; }
G_FmtArg G_Fmt(Vec4 v) { G_FmtArg result; result.kind = G_FmtArgKind_Float4; result.v.xyzw = asuint(v.xyzw); return result; }
G_FmtArg G_FmtEnd(void) { G_FmtArg result; result.kind = G_FmtArgKind_End; return result; }
Struct(G_TempPrintBuffer)
{
// NOTE: The larger the array size, the longer the compilation time
u32 byte_chunks[64];
u32 bytes_count;
u32 chars_count;
u32 args_count;
b32 overflowed;
};
void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v)
{
u32 chunk_idx = buff.bytes_count / 4;
if (chunk_idx < countof(buff.byte_chunks))
{
u32 byte_idx_in_chunk = buff.bytes_count & 0x03;
if (byte_idx_in_chunk == 0)
{
// Since buff is not zero initialized, we set the chunk on first write here
buff.byte_chunks[chunk_idx] = v & 0xFF;
}
else
{
buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8);
}
buff.bytes_count += 1;
}
else
{
buff.overflowed = 1;
}
}
void G_CommitPrint(G_TempPrintBuffer buff)
{
RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef);
if (buff.overflowed)
{
buff.bytes_count = 0;
buff.chars_count = 0;
buff.args_count = 0;
}
u32 chunks_count = (buff.bytes_count + 3) / 4;
u32 alloc_size = 0;
alloc_size += 4; // Header
alloc_size += chunks_count * 4; // Chunks
// Atomic fetch + add to base counter
u32 base;
rw.InterlockedAdd(0, alloc_size, base);
base += 4; // Offset for allocation counter
base += 4; // Offset for success counter
base += 4; // Offset for overflow counter
if ((base + alloc_size) < countof(rw))
{
// Increment success counter
rw.InterlockedAdd(4, 1);
u32 pos = 0;
// Write header
{
u32 header = 0;
header |= (buff.chars_count << 0) & 0x0000FFFF;
header |= (buff.args_count << 16) & 0x7FFF0000;
header |= (buff.overflowed << 31) & 0xF0000000;
rw.Store(base + pos, header);
pos += 4;
}
// Write chunks
for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
{
u32 chunk = buff.byte_chunks[chunk_idx];
rw.Store(base + pos, chunk);
pos += 4;
}
}
else
{
// Increment overflow counter
rw.InterlockedAdd(8, 1);
}
}
#define G_PrintF_(fmt, ...) do { \
G_TempPrintBuffer __tmp; \
__tmp.bytes_count = 0; \
__tmp.overflowed = 0; \
u32 __char_idx = 0; \
while (U32FromChar(fmt[__char_idx]) != 0) \
{ \
G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx])); \
++__char_idx; \
} \
G_FmtArg __args[] = { __VA_ARGS__ }; \
__tmp.chars_count = __char_idx; \
__tmp.args_count = (countof(__args) - 1); \
for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx) \
{ \
G_FmtArg __arg = __args[__arg_idx]; \
G_PushPrintByte(__tmp, __arg.kind); \
if (__arg.kind > G_FmtArgKind_BEGINSIZE1) \
{ \
G_PushPrintByte(__tmp, __arg.v.x >> 0); \
G_PushPrintByte(__tmp, __arg.v.x >> 8); \
G_PushPrintByte(__tmp, __arg.v.x >> 16); \
G_PushPrintByte(__tmp, __arg.v.x >> 24); \
} \
if (__arg.kind > G_FmtArgKind_BEGINSIZE2) \
{ \
G_PushPrintByte(__tmp, __arg.v.y >> 0); \
G_PushPrintByte(__tmp, __arg.v.y >> 8); \
G_PushPrintByte(__tmp, __arg.v.y >> 16); \
G_PushPrintByte(__tmp, __arg.v.y >> 24); \
} \
if (__arg.kind > G_FmtArgKind_BEGINSIZE3) \
{ \
G_PushPrintByte(__tmp, __arg.v.z >> 0); \
G_PushPrintByte(__tmp, __arg.v.z >> 8); \
G_PushPrintByte(__tmp, __arg.v.z >> 16); \
G_PushPrintByte(__tmp, __arg.v.z >> 24); \
} \
if (__arg.kind > G_FmtArgKind_BEGINSIZE4) \
{ \
G_PushPrintByte(__tmp, __arg.v.w >> 0); \
G_PushPrintByte(__tmp, __arg.v.w >> 8); \
G_PushPrintByte(__tmp, __arg.v.w >> 16); \
G_PushPrintByte(__tmp, __arg.v.w >> 24); \
} \
} \
G_CommitPrint(__tmp); \
} while (0)
#define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd())
#else
#define G_PrintF(fmt)
#endif