vk testing

2026-03-03 23:49:29 -06:00 · 2026-03-03 23:49:29 -06:00 · a1cf0a46b5
commit a1cf0a46b5
parent 38196a8eb7
15 changed files with 6325 additions and 1 deletions
--- a/src/gpu/gpu.lay
+++ b/src/gpu/gpu.lay
@ -27,4 +27,5 @@

@IncludeC gpu_common.c

-@DefaultDownstream Win32 gpu_dx12
+// @DefaultDownstream Win32 gpu_dx12
+@DefaultDownstream Any gpu_vk
--- a/src/gpu/gpu_vk/gpu_vk.lay
+++ b/src/gpu/gpu_vk/gpu_vk.lay
@ -0,0 +1,11 @@
+@Layer gpu_vk
+
+//////////////////////////////
+//- Api
+
+@IncludeC gpu_vk_core.h
+
+//////////////////////////////
+//- Impl
+
+@IncludeC gpu_vk_core.c
--- a/src/gpu/gpu_vk/gpu_vk_core.c
+++ b/src/gpu/gpu_vk/gpu_vk_core.c
@ -0,0 +1,270 @@
+G_VK_Ctx G_VK = Zi;
+ThreadLocal G_VK_ThreadLocalCtx G_VK_tl = Zi;
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Bootstrap
+
+void G_Bootstrap(void)
+{
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Arena
+
+G_ArenaHandle G_AcquireArena(void)
+{
+  // TODO: Impl
+  return (G_ArenaHandle) Zi;
+}
+
+void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena)
+{
+  // TODO: Impl
+}
+
+void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
+{
+  // TODO: Impl
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Resource
+
+G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc)
+{
+  // TODO: Impl
+  return (G_ResourceHandle) Zi;
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Shader resource reference
+
+u32 G_PushRef(G_ArenaHandle arena_handle, G_ResourceHandle resource_handle, G_RefDesc ref_desc)
+{
+  // TODO: Impl
+  return 0;
+}
+
+//- Count
+
+u64 G_CountBufferBytes(G_ResourceHandle buffer)
+{
+  // TODO: Impl
+  return 0;
+}
+
+i32 G_Count1D(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return 0;
+}
+
+Vec2I32 G_Count2D(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return (Vec2I32) Zi;
+}
+
+Vec3I32 G_Count3D(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return (Vec3I32) Zi;
+}
+
+i32 G_CountWidth(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return 0;
+}
+
+i32 G_CountHeight(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return 0;
+}
+
+i32 G_CountDepth(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return 0;
+}
+
+i32 G_CountMips(G_ResourceHandle texture)
+{
+  // TODO: Impl
+  return 0;
+}
+
+//- Map
+
+void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
+{
+  // TODO: Impl
+  return 0;
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Command
+
+//- Command list
+
+G_CommandListHandle G_PrepareCommandList(G_QueueKind queue)
+{
+  // TODO: Impl
+  return (G_CommandListHandle) Zi;
+}
+
+i64 G_CommitCommandList(G_CommandListHandle cl_handle)
+{
+  // TODO: Impl
+  return (i64) Zi;
+}
+
+//- Cpu -> Gpu staged copy
+
+void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
+{
+  // TODO: Impl
+}
+
+void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
+{
+  // TODO: Impl
+}
+
+//- Gpu <-> Gpu copy
+
+void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range)
+{
+  // TODO: Impl
+}
+
+void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset)
+{
+  // TODO: Impl
+}
+
+void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
+{
+  // TODO: Impl
+}
+
+void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
+{
+  // TODO: Impl
+}
+
+//- Constant
+
+void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size)
+{
+  // TODO: Impl
+}
+
+//- Memory sync
+
+void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc)
+{
+  // TODO: Impl
+}
+
+//- Compute
+
+void G_ComputeEx(G_CommandListHandle cl_handle, ComputeShaderDesc cs, Vec3I32 threads)
+{
+  // TODO: Impl
+}
+
+//- Rasterize
+
+void G_Rasterize(
+  G_CommandListHandle cl_handle,
+  VertexShaderDesc vs, PixelShaderDesc ps,
+  u32 instances_count, G_IndexBufferDesc index_buffer,
+  u32 render_targets_count, G_RenderTargetDesc *render_targets,
+  Rng3 viewport, Rng2 scissor,
+  G_RasterMode raster_mode
+)
+{
+  // TODO: Impl
+}
+
+//- Clear
+
+void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle, Vec4 color, i32 mip)
+{
+  // TODO: Impl
+}
+
+//- Log
+
+void G_LogResource(G_CommandListHandle cl_handle, G_ResourceHandle resource_handle)
+{
+  // TODO: Impl
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Queue synchronization
+
+i64 G_CompletionValueFromQueue(G_QueueKind queue_kind)
+{
+  // TODO: Impl
+  return (i64) Zi;
+}
+
+i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind)
+{
+  // TODO: Impl
+  return (i64) Zi;
+}
+
+G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask)
+{
+  // TODO: Impl
+  return (G_QueueCompletions) Zi;
+}
+
+G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask)
+{
+  // TODO: Impl
+  return (G_QueueCompletions) Zi;
+}
+
+void G_QueueSyncEx(G_QueueBarrierDesc desc)
+{
+  // TODO: Impl
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Statistics
+
+G_Stats G_QueryStats(void)
+{
+  // TODO: Impl
+  return (G_Stats) Zi;
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookimpl Swapchain
+
+G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle)
+{
+  // TODO: Impl
+  return (G_SwapchainHandle) Zi;
+}
+
+void G_ReleaseSwapchain(G_SwapchainHandle swapchain_handle)
+{
+  // TODO: Impl
+}
+
+G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size)
+{
+  // TODO: Impl
+  return (G_ResourceHandle) Zi;
+}
+
+void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync)
+{
+  // TODO: Impl
+}
--- a/src/gpu/gpu_vk/gpu_vk_core.h
+++ b/src/gpu/gpu_vk/gpu_vk_core.h
@ -0,0 +1,15 @@
+////////////////////////////////////////////////////////////
+//~ State types
+
+Struct(G_VK_Ctx)
+{
+  i32 _;
+};
+
+Struct(G_VK_ThreadLocalCtx)
+{
+  i32 _;
+};
+
+extern G_VK_Ctx G_Vk;
+extern ThreadLocal G_VK_ThreadLocalCtx G_VK_tl;
--- a/src/gpu_old/gpu.lay
+++ b/src/gpu_old/gpu.lay
@ -0,0 +1,30 @@
+@Layer gpu_old
+
+//////////////////////////////
+//- Dependencies
+
+@Dep platform
+
+//////////////////////////////
+//- Resources
+
+@EmbedDir G_Resources gpu_res
+
+//////////////////////////////
+//- Api
+
+@IncludeC gpu_shared.cgh
+@IncludeC gpu_core.h
+@IncludeC gpu_common.h
+
+@IncludeG gpu_shared.cgh
+
+@Bootstrap G_Bootstrap
+@Bootstrap G_BootstrapCommon
+
+//////////////////////////////
+//- Impl
+
+@IncludeC gpu_common.c
+
+@DefaultDownstream Win32 gpu_dx12
--- a/src/gpu_old/gpu_common.c
+++ b/src/gpu_old/gpu_common.c
@ -0,0 +1,264 @@
+G_Ctx G = Zi;
+ThreadLocal G_ThreadLocalCtx G_tl = Zi;
+
+////////////////////////////////////////////////////////////
+//~ Bootstrap
+
+void G_BootstrapCommon(void)
+{
+  G_ArenaHandle gpu_perm = G_PermArena();
+
+  G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
+  {
+    // Init quad index buffer
+    {
+      G_ResourceHandle quad_indices = Zi;
+      u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
+      quad_indices = G_PushBuffer(gpu_perm, cl, u16, countof(quad_data));
+      G_CopyCpuToBuffer(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
+      G.quad_indices = G_IdxBuff16(quad_indices);
+    }
+
+    // Init blank texture
+    {
+      G_ResourceHandle blank_tex = G_PushTexture2D(
+        gpu_perm, cl,
+        G_Format_R8G8B8A8_Unorm,
+        VEC2I32(8, 8),
+        G_Layout_Common,
+        .flags = G_ResourceFlag_ZeroMemory,
+        .name = Lit("Blank texture")
+      );
+      G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex);
+    }
+
+    // Init noise texture
+    {
+      G_ResourceHandle noise_tex = Zi;
+      String noise_data = DataFromResource(ResourceKeyFromStore(&G_Resources, Lit("noise_128x128x64_16.dat")));
+      Vec3I32 noise_dims = VEC3I32(128, 128, 64);
+      if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2)
+      {
+        Panic(Lit("Unexpected noise texture size"));
+      }
+      noise_tex = G_PushTexture3D(
+        gpu_perm, cl,
+        G_Format_R16_Uint,
+        noise_dims,
+        G_Layout_Common,
+        .name = Lit("Noise texture")
+      );
+      G_CopyCpuToTexture(
+        cl,
+        noise_tex, VEC3I32(0, 0, 0),
+        noise_data.text, noise_dims,
+        RNG3I32(VEC3I32(0, 0, 0), noise_dims)
+      );
+      G.basic_noise = G_PushTexture3DRef(gpu_perm, noise_tex);
+    }
+
+    // Init basic samplers
+    for (G_BasicSamplerKind sampler_kind = 0; sampler_kind < countof(G.basic_samplers); ++sampler_kind)
+    {
+      G_SamplerStateRef sampler = Zi;
+      switch (sampler_kind)
+      {
+        default:
+        {
+          // Sampler unspecified
+          Assert(0);
+        } FALLTHROUGH;
+        case G_BasicSamplerKind_PointClamp:
+        {
+          G_Filter filter              = G_Filter_MinMagMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Clamp;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_PointWrap:
+        {
+          G_Filter filter              = G_Filter_MinMagMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Wrap;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_PointMirror:
+        {
+          G_Filter filter              = G_Filter_MinMagMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Mirror;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_BilinearClamp:
+        {
+          G_Filter filter              = G_Filter_MinMagLinearMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Clamp;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_BilinearWrap:
+        {
+          G_Filter filter              = G_Filter_MinMagLinearMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Wrap;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_BilinearMirror:
+        {
+          G_Filter filter              = G_Filter_MinMagLinearMipPoint;
+          G_AddressMode address_mode   = G_AddressMode_Mirror;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_TrilinearClamp:
+        {
+          G_Filter filter              = G_Filter_MinMagMipLinear;
+          G_AddressMode address_mode   = G_AddressMode_Clamp;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_TrilinearWrap:
+        {
+          G_Filter filter              = G_Filter_MinMagMipLinear;
+          G_AddressMode address_mode   = G_AddressMode_Wrap;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+        case G_BasicSamplerKind_TrilinearMirror:
+        {
+          G_Filter filter              = G_Filter_MinMagMipLinear;
+          G_AddressMode address_mode   = G_AddressMode_Mirror;
+          G_ResourceHandle sampler_res = G_PushSampler(gpu_perm, cl, .filter = filter, .x = address_mode, .y = address_mode, .z = address_mode);
+          sampler = G_PushSamplerStateRef(gpu_perm, sampler_res);
+        } break;
+      }
+      G.basic_samplers[sampler_kind] = sampler;
+    }
+  }
+  G_CommitCommandList(cl);
+  G_QueueSync(G_QueueMask_Direct, G_QueueMask_All);
+}
+
+////////////////////////////////////////////////////////////
+//~ Utils
+
+//- Arena
+
+G_ArenaHandle G_PermArena(void)
+{
+  if (G_IsArenaNil(G_tl.gpu_perm))
+  {
+    G_tl.gpu_perm = G_AcquireArena();
+  }
+  return G_tl.gpu_perm;
+}
+
+//- Push resource from cpu
+
+G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc)
+{
+  G_ResourceHandle buffer = G_PushResource(gpu_arena, cl, (G_ResourceDesc) { .kind = G_ResourceKind_Buffer, .buffer = desc });
+  G_CopyCpuToBuffer(cl, buffer, 0, src.text, RNGU64(0, src.len));
+  return buffer;
+}
+
+//- Mip
+
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip)
+{
+  mip = ClampI32(mip, -31, 31);
+  i32 result = 0;
+  if (mip >= 0)
+  {
+    result = MaxI32(result >> mip, 1);
+  }
+  else
+  {
+    result = MaxI32(result << -mip, 1);
+  }
+  return result;
+}
+
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip)
+{
+  mip = ClampI32(mip, -31, 31);
+  Vec2I32 result = Zi;
+  if (mip >= 0)
+  {
+    result.x = MaxI32(mip0_dims.x >> mip, 1);
+    result.y = MaxI32(mip0_dims.y >> mip, 1);
+  }
+  else
+  {
+    result.x = MaxI32(mip0_dims.x << -mip, 1);
+    result.y = MaxI32(mip0_dims.y << -mip, 1);
+  }
+  return result;
+}
+
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip)
+{
+  mip = ClampI32(mip, -31, 31);
+  Vec3I32 result = Zi;
+  if (mip >= 0)
+  {
+    result.x = MaxI32(mip0_dims.x >> mip, 1);
+    result.y = MaxI32(mip0_dims.y >> mip, 1);
+    result.z = MaxI32(mip0_dims.z >> mip, 1);
+  }
+  else
+  {
+    result.x = MaxI32(mip0_dims.x << -mip, 1);
+    result.y = MaxI32(mip0_dims.y << -mip, 1);
+    result.z = MaxI32(mip0_dims.z << -mip, 1);
+  }
+  return result;
+}
+
+//- Thread count
+
+Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads)
+{
+  return VEC3I32(
+    (threads.x + cs.x - 1) / cs.x,
+    (threads.y + cs.y - 1) / cs.y,
+    (threads.z + cs.z - 1) / cs.z
+  );
+}
+
+//- Viewport / scissor
+
+Rng3 G_ViewportFromTexture(G_ResourceHandle texture)
+{
+  Vec2I32 dims = G_Count2D(texture);
+  return RNG3(VEC3(0, 0, 0), VEC3(dims.x, dims.y, 1));
+}
+
+Rng2 G_ScissorFromTexture(G_ResourceHandle texture)
+{
+  Vec2I32 dims = G_Count2D(texture);
+  return RNG2(VEC2(0, 0), VEC2(dims.x, dims.y));
+}
+
+//- Shared resources
+
+
+G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind)
+{
+  return G.basic_samplers[kind];
+}
+
+G_IndexBufferDesc G_QuadIndices(void)
+{
+  return G.quad_indices;
+}
+
+G_Texture2DRef G_BlankTexture2D(void)
+{
+  return G.blank_tex;
+}
+
+G_Texture3DRef G_BasicNoiseTexture(void)
+{
+  return G.basic_noise;
+}
--- a/src/gpu_old/gpu_common.h
+++ b/src/gpu_old/gpu_common.h
@ -0,0 +1,53 @@
+////////////////////////////////////////////////////////////
+//~ State types
+
+Struct(G_Ctx)
+{
+  // Common shared resources
+  G_IndexBufferDesc quad_indices;
+  G_Texture2DRef blank_tex;
+  G_Texture3DRef basic_noise;
+  G_SamplerStateRef basic_samplers[G_BasicSamplerKind_COUNT];
+};
+
+Struct(G_ThreadLocalCtx)
+{
+  G_ArenaHandle gpu_perm;
+};
+
+extern G_Ctx G;
+extern ThreadLocal G_ThreadLocalCtx G_tl;
+
+////////////////////////////////////////////////////////////
+//~ Bootstrap
+
+void G_BootstrapCommon(void);
+
+////////////////////////////////////////////////////////////
+//~ Utils
+
+//- Arena
+G_ArenaHandle G_PermArena(void);
+
+//- Push resource from cpu
+G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandListHandle cl, String src, G_BufferDesc desc);
+#define G_PushBufferFromCpuCopy(_arena, _cl, _src, ...) \
+  G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ })
+
+//- Mip
+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip);
+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip);
+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip);
+
+//- Thread count
+Vec3I32 G_GroupCountFromThreadCount(ComputeShaderDesc cs, Vec3I32 threads);
+
+//- Viewport / scissor
+Rng3 G_ViewportFromTexture(G_ResourceHandle texture);
+Rng2 G_ScissorFromTexture(G_ResourceHandle texture);
+
+//- Shared resources
+G_SamplerStateRef G_BasicSamplerFromKind(G_BasicSamplerKind kind);
+G_IndexBufferDesc G_QuadIndices(void);
+G_Texture2DRef G_BlankTexture2D(void);
+G_Texture3DRef G_BasicNoiseTexture(void);
--- a/src/gpu_old/gpu_core.h
+++ b/src/gpu_old/gpu_core.h
@ -0,0 +1,795 @@
+////////////////////////////////////////////////////////////
+//~ Handle types
+
+Struct(G_ArenaHandle)       { u64 v; };
+Struct(G_CommandListHandle) { u64 v; };
+Struct(G_ResourceHandle)    { u64 v; };
+Struct(G_SwapchainHandle)   { u64 v; };
+
+#define G_IsArenaNil(h)       ((h).v == 0)
+#define G_IsCommandListNil(h) ((h).v == 0)
+#define G_IsResourceNil(h)    ((h).v == 0)
+#define G_IsSwapchainNil(h)   ((h).v == 0)
+
+////////////////////////////////////////////////////////////
+//~ Queue types
+
+#define G_IsMultiQueueEnabled 1
+
+Enum(G_QueueKind)
+{
+  G_QueueKind_Direct            = 0,
+#if G_IsMultiQueueEnabled
+  G_QueueKind_AsyncCompute      = 1,
+  G_QueueKind_AsyncCopy         = 2,
+#else
+  G_QueueKind_AsyncCompute      = G_QueueKind_Direct,
+  G_QueueKind_AsyncCopy         = G_QueueKind_Direct,
+#endif
+  G_QueueKind_COUNT
+};
+
+Enum(G_QueueMask)
+{
+  G_QueueMask_None              = 0,
+  G_QueueMask_Direct            = (1 << 0),
+#if G_IsMultiQueueEnabled
+  G_QueueMask_AsyncCompute      = (1 << 1),
+  G_QueueMask_AsyncCopy         = (1 << 2),
+#else
+  G_QueueMask_AsyncCompute      = G_QueueMask_Direct,
+  G_QueueMask_AsyncCopy         = G_QueueMask_Direct,
+#endif
+  G_QueueMask_All               = (0xFFFFFFFF >> (32 - G_QueueKind_COUNT))
+};
+#define G_QueueMaskFromKind(queue_kind) (1 << queue_kind)
+
+Struct(G_QueueCompletions)
+{
+  i64 v[G_QueueKind_COUNT];  // Array of completions indexed by queue kind
+};
+
+// All waiters will wait until specified queues reach their value in the `completions` array
+Struct(G_QueueBarrierDesc)
+{
+  G_QueueCompletions completions; // Completions that waiters should wait for
+  G_QueueMask wait_queues;        // Mask of queues that will wait for completions
+  b32 wait_cpu;                   // Will the cpu wait for completion
+};
+
+////////////////////////////////////////////////////////////
+//~ Format types
+
+// NOTE: Matches DirectX DXGI_FORMAT
+Enum(G_Format)
+{
+  G_Format_Unknown	                               = 0,
+  G_Format_R32G32B32A32_Typeless                   = 1,
+  G_Format_R32G32B32A32_Float                      = 2,
+  G_Format_R32G32B32A32_Uint                       = 3,
+  G_Format_R32G32B32A32_Sint                       = 4,
+  G_Format_R32G32B32_Typeless                      = 5,
+  G_Format_R32G32B32_Float                         = 6,
+  G_Format_R32G32B32_Uint                          = 7,
+  G_Format_R32G32B32_Sint                          = 8,
+  G_Format_R16G16B16A16_Typeless                   = 9,
+  G_Format_R16G16B16A16_Float                      = 10,
+  G_Format_R16G16B16A16_Unorm                      = 11,
+  G_Format_R16G16B16A16_Uint                       = 12,
+  G_Format_R16G16B16A16_Snorm                      = 13,
+  G_Format_R16G16B16A16_Sint                       = 14,
+  G_Format_R32G32_Typeless                         = 15,
+  G_Format_R32G32_Float                            = 16,
+  G_Format_R32G32_Uint                             = 17,
+  G_Format_R32G32_Sint                             = 18,
+  G_Format_R32G8X24_Typeless                       = 19,
+  G_Format_D32_Float_S8X24_Uint                    = 20,
+  G_Format_R32_Float_X8X24_Typeless                = 21,
+  G_Format_X32_Typeless_G8X24_Uint                 = 22,
+  G_Format_R10G10B10A2_Typeless                    = 23,
+  G_Format_R10G10B10A2_Unorm                       = 24,
+  G_Format_R10G10B10A2_Uint                        = 25,
+  G_Format_R11G11B10_Float                         = 26,
+  G_Format_R8G8B8A8_Typeless                       = 27,
+  G_Format_R8G8B8A8_Unorm                          = 28,
+  G_Format_R8G8B8A8_Unorm_Srgb                     = 29,
+  G_Format_R8G8B8A8_Uint                           = 30,
+  G_Format_R8G8B8A8_Snorm                          = 31,
+  G_Format_R8G8B8A8_Sint                           = 32,
+  G_Format_R16G16_Typeless                         = 33,
+  G_Format_R16G16_Float                            = 34,
+  G_Format_R16G16_Unorm                            = 35,
+  G_Format_R16G16_Uint                             = 36,
+  G_Format_R16G16_Snorm                            = 37,
+  G_Format_R16G16_Sint                             = 38,
+  G_Format_R32_Typeless                            = 39,
+  G_Format_D32_Float                               = 40,
+  G_Format_R32_Float                               = 41,
+  G_Format_R32_Uint                                = 42,
+  G_Format_R32_Sint                                = 43,
+  G_Format_R24G8_Typeless                          = 44,
+  G_Format_D24_Unorm_S8_Uint                       = 45,
+  G_Format_R24_Unorm_X8_Typeless                   = 46,
+  G_Format_X24_Typeless_G8_Uint                    = 47,
+  G_Format_R8G8_Typeless                           = 48,
+  G_Format_R8G8_Unorm                              = 49,
+  G_Format_R8G8_Uint                               = 50,
+  G_Format_R8G8_Snorm                              = 51,
+  G_Format_R8G8_Sint                               = 52,
+  G_Format_R16_Typeless                            = 53,
+  G_Format_R16_Float                               = 54,
+  G_Format_D16_Unorm                               = 55,
+  G_Format_R16_Unorm                               = 56,
+  G_Format_R16_Uint                                = 57,
+  G_Format_R16_Snorm                               = 58,
+  G_Format_R16_Sint                                = 59,
+  G_Format_R8_Typeless                             = 60,
+  G_Format_R8_Unorm                                = 61,
+  G_Format_R8_Uint                                 = 62,
+  G_Format_R8_Snorm                                = 63,
+  G_Format_R8_Sint                                 = 64,
+  G_Format_A8_Unorm                                = 65,
+  G_Format_R1_Unorm                                = 66,
+  G_Format_R9G9B9E5_SharedXP                       = 67,
+  G_Format_R8G8_B8G8_Unorm                         = 68,
+  G_Format_G8R8_G8B8_Unorm                         = 69,
+  G_Format_BC1_Typeless                            = 70,
+  G_Format_BC1_Unorm                               = 71,
+  G_Format_BC1_Unorm_Srgb                          = 72,
+  G_Format_BC2_Typeless                            = 73,
+  G_Format_BC2_Unorm                               = 74,
+  G_Format_BC2_Unorm_Srgb                          = 75,
+  G_Format_BC3_Typeless                            = 76,
+  G_Format_BC3_Unorm                               = 77,
+  G_Format_BC3_Unorm_Srgb                          = 78,
+  G_Format_BC4_Typeless                            = 79,
+  G_Format_BC4_Unorm                               = 80,
+  G_Format_BC4_Snorm                               = 81,
+  G_Format_BC5_Typeless                            = 82,
+  G_Format_BC5_Unorm                               = 83,
+  G_Format_BC5_Snorm                               = 84,
+  G_Format_B5G6R5_Unorm                            = 85,
+  G_Format_B5G5R5A1_Unorm                          = 86,
+  G_Format_B8G8R8A8_Unorm                          = 87,
+  G_Format_B8G8R8X8_Unorm                          = 88,
+  G_Format_R10G10B10_XR_BIAS_A2_Unorm              = 89,
+  G_Format_B8G8R8A8_Typeless                       = 90,
+  G_Format_B8G8R8A8_Unorm_Srgb                     = 91,
+  G_Format_B8G8R8X8_Typeless                       = 92,
+  G_Format_B8G8R8X8_Unorm_Srgb                     = 93,
+  G_Format_BC6H_Typeless                           = 94,
+  G_Format_BC6H_UF16                               = 95,
+  G_Format_BC6H_SF16                               = 96,
+  G_Format_BC7_Typeless                            = 97,
+  G_Format_BC7_Unorm                               = 98,
+  G_Format_BC7_Unorm_Srgb                          = 99,
+  G_Format_AYUV                                    = 100,
+  G_Format_Y410                                    = 101,
+  G_Format_Y416                                    = 102,
+  G_Format_NV12                                    = 103,
+  G_Format_P010                                    = 104,
+  G_Format_P016                                    = 105,
+  G_Format_420_Opaque                              = 106,
+  G_Format_YUY2                                    = 107,
+  G_Format_Y210                                    = 108,
+  G_Format_Y216                                    = 109,
+  G_Format_NV11                                    = 110,
+  G_Format_AI44                                    = 111,
+  G_Format_IA44                                    = 112,
+  G_Format_P8                                      = 113,
+  G_Format_A8P8                                    = 114,
+  G_Format_B4G4R4A4_Unorm                          = 115,
+  G_Format_P208                                    = 130,
+  G_Format_V208                                    = 131,
+  G_Format_V408                                    = 132,
+  G_Format_SamplerFeedbackMinMipOpaque             = 189,
+  G_Format_SamplerFeedbackMipRegionUsedOpaque      = 190,
+  G_Format_A4B4G4R4_Unorm                          = 191,
+  G_Format_COUNT                                   = 192
+};
+
+////////////////////////////////////////////////////////////
+//~ Memory sync types
+
+Enum(G_Stage)
+{
+  G_Stage_None           = 0,
+
+  // Compute stages
+  G_Stage_ComputeShading = (1 << 1),
+
+  // Draw stages
+  G_Stage_IndexAssembly  = (1 << 2),
+  G_Stage_VertexShading  = (1 << 3),
+  G_Stage_PixelShading   = (1 << 4),
+  G_Stage_DepthStencil   = (1 << 5),
+  G_Stage_RenderTarget   = (1 << 6),
+
+  // Copy stages
+  G_Stage_Copy           = (1 << 7),
+
+  // Indirect stages
+  G_Stage_Indirect       = (1 << 8),
+
+  // Aggregate stages
+  G_Stage_Drawing = G_Stage_IndexAssembly |
+                    G_Stage_VertexShading |
+                    G_Stage_PixelShading  |
+                    G_Stage_DepthStencil  |
+                    G_Stage_RenderTarget,
+
+  G_Stage_Shading = G_Stage_ComputeShading |
+                    G_Stage_VertexShading  |
+                    G_Stage_PixelShading,
+
+  G_Stage_All            = 0xFFFFFFFF
+};
+
+Enum(G_Access)
+{
+  G_Access_None                 = 0,
+
+  G_Access_ShaderReadWrite      = (1 << 1),
+  G_Access_ShaderRead           = (1 << 2),
+
+  G_Access_CopyWrite            = (1 << 3),
+  G_Access_CopyRead             = (1 << 4),
+
+  G_Access_DepthStencilRead     = (1 << 5),
+  G_Access_DepthStencilWrite    = (1 << 6),
+  G_Access_RenderTargetWrite    = (1 << 7),
+
+  G_Access_IndexBuffer          = (1 << 8),
+  G_Access_IndirectArgument     = (1 << 9),
+
+  G_Access_All                  = 0xFFFFFFFF  // Represents all accesses relevant to the stage specified in the barrier
+};
+
+Enum(G_Layout)
+{
+  G_Layout_NoChange,
+  G_Layout_Undefined,
+
+  //////////////////////////////
+  //- Queue-agnostic
+
+  // Simultaneous layout allows a resource to be used on any queue with any
+  // access type (except depth-stencil). Resources cannot transition to/from
+  // this layout, they must be created with it. Allows concurrent reads
+  // with up to 1 write to non-overlapping regions.
+  G_Layout_Simultaneous,                       // Any access except depth-stencil                 <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
+  G_Layout_Common,                             // ShaderRead/CopyRead/CopyWrite/Present           <-- D3D12_BARRIER_LAYOUT_COMMON
+
+  //////////////////////////////
+  //- Direct queue
+
+  G_Layout_DirectQueue_General,                // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite   <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON
+  G_Layout_DirectQueue_Read,                   // ShaderRead/CopyRead/DepthStencilRead            <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ
+  G_Layout_DirectQueue_DepthStencil,           // DepthStencilRead/DepthStencilWrite              <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE
+  G_Layout_DirectQueue_RenderTarget,           // RenderTargetWrite                               <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET
+
+  //////////////////////////////
+  //- Compute queue
+
+  G_Layout_ComputeQueue_General,               // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite   <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON
+
+  //////////////////////////////
+  //- Direct & Compute queue
+
+  G_Layout_DirectComputeQueue_Read,            // ShaderRead/CopyRead                             <-- D3D12_BARRIER_LAYOUT_GENERIC_READ
+  G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite                                 <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS
+  G_Layout_DirectComputeQueue_CopyWrite,       // CopyWrite                                       <-- D3D12_BARRIER_LAYOUT_COPY_DEST
+};
+
+// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`.
+// When barrier executes:
+//   - Necessary resource flushes will occur based on `access_prev` & `access_next`
+//   - Texture layout will transition based on `layout` (if specified)
+Struct(G_MemoryBarrierDesc)
+{
+  G_ResourceHandle resource;
+  b32 is_global;
+  G_Stage stage_prev;
+  G_Stage stage_next;
+  G_Access access_prev;
+  G_Access access_next;
+  G_Layout layout;
+  RngI32 mips;  // Inclusive range of texture mip levels to sync
+};
+
+////////////////////////////////////////////////////////////
+//~ Filter types
+
+// NOTE: Matches DirectX D3D12_FILTER
+Enum(G_Filter)
+{
+  // Standard filter
+  G_Filter_MinMagMipPoint                         = 0,
+  G_Filter_MinMagPointMipLinear                   = 0x1,
+  G_Filter_MinPointMagLinearMipPoint              = 0x4,
+  G_Filter_MinPointMagMipLinear                   = 0x5,
+  G_Filter_MinLinearMagMipPoint                   = 0x10,
+  G_Filter_MinLinearMagPointMipLinear             = 0x11,
+  G_Filter_MinMagLinearMipPoint                   = 0x14,
+  G_Filter_MinMagMipLinear                        = 0x15,
+  G_Filter_MinMagAnisotropicMipPoint              = 0x54,
+  G_Filter_Anisotropic                            = 0x55,
+
+  // Comparison filter
+  G_Filter_Comparison_MinMagMipPoint              = 0x80,
+  G_Filter_Comparison_MinMagPointMipLinear        = 0x81,
+  G_Filter_Comparison_MinPointMagLinearMipPoint   = 0x84,
+  G_Filter_Comparison_MinPointMagMipLinear        = 0x85,
+  G_Filter_Comparison_MinLinearMagMipPoint        = 0x90,
+  G_Filter_Comparison_MinLinearMagPointMipLinear  = 0x91,
+  G_Filter_Comparison_MinMagLinearMipPoint        = 0x94,
+  G_Filter_Comparison_MinMagMipLinear             = 0x95,
+  G_Filter_Comparison_MinMagAnisotropicMipPoint   = 0xd4,
+  G_Filter_Comparison_Anisotropic                 = 0xd5,
+
+  // Minimum filter
+  G_Filter_Minimum_MinMagMipPoint                 = 0x100,
+  G_Filter_Minimum_MinMagPointMipLinear           = 0x101,
+  G_Filter_Minimum_MinPointMagLinearMipPoint      = 0x104,
+  G_Filter_Minimum_MinPointMagMipLinear           = 0x105,
+  G_Filter_Minimum_MinLinearMagMipPoint           = 0x110,
+  G_Filter_Minimum_MinLinearMagPointMipLinear     = 0x111,
+  G_Filter_Minimum_MinMagLinearMipPoint           = 0x114,
+  G_Filter_Minimum_MinMagMipLinear                = 0x115,
+  G_Filter_Minimum_MinMagAnisotropicMipPoint      = 0x155,
+  G_Filter_Minimum_Anisotropic                    = 0x155,
+
+  // Maximum filter
+  G_Filter_Maximum_MinMagMipPoint                 = 0x180,
+  G_Filter_Maximum_MinMagPointMipLinear           = 0x181,
+  G_Filter_Maximum_MinPointMagLinearMipPoint      = 0x184,
+  G_Filter_Maximum_MinPointMagMipLinear           = 0x185,
+  G_Filter_Maximum_MinLinearMagMipPoint           = 0x190,
+  G_Filter_Maximum_MinLinearMagPointMipLinear     = 0x191,
+  G_Filter_Maximum_MinMagLinearMipPoint           = 0x194,
+  G_Filter_Maximum_MinMagMipLinear                = 0x195,
+  G_Filter_Maximum_MinMagAnisotropicMipPoint      = 0x1d4,
+  G_Filter_Maximum_Anisotropic                    = 0x1d5
+};
+
+// NOTE: Matches DirectX D3D12_TEXTURE_ADDRESS_MODE
+Enum(G_AddressMode)
+{
+  G_AddressMode_Wrap       = 1,
+  G_AddressMode_Mirror     = 2,
+  G_AddressMode_Clamp      = 3,  // Default
+  G_AddressMode_Border     = 4,
+  G_AddressMode_MirrorOnce = 5
+};
+
+// NOTE: Matches DirectX D3D12_COMPARISON_FUNC
+Enum(G_ComparisonFunc)
+{
+  G_ComparisonFunc_None           = 0,
+  G_ComparisonFunc_Never          = 1,
+  G_ComparisonFunc_Less           = 2,
+  G_ComparisonFunc_Equal          = 3,
+  G_ComparisonFunc_LessEqual      = 4,
+  G_ComparisonFunc_Greater        = 5,
+  G_ComparisonFunc_NotEqual       = 6,
+  G_ComparisonFunc_GreaterEqual   = 7,
+  G_ComparisonFunc_Always         = 8
+};
+
+////////////////////////////////////////////////////////////
+//~ Resource types
+
+Enum(G_ResourceKind)
+{
+  G_ResourceKind_Buffer,
+  G_ResourceKind_Texture1D,
+  G_ResourceKind_Texture2D,
+  G_ResourceKind_Texture3D,
+  G_ResourceKind_Sampler,
+};
+
+Enum(G_ResourceFlag)
+{
+  G_ResourceFlag_None                    = 0,
+  G_ResourceFlag_AllowShaderReadWrite    = (1 << 0),
+  G_ResourceFlag_AllowRenderTarget       = (1 << 1),
+  G_ResourceFlag_AllowDepthStencil       = (1 << 2),
+  G_ResourceFlag_ZeroMemory              = (1 << 3),
+  G_ResourceFlag_HostMemory              = (1 << 4),  // Resource will be mapped into the cpu's address space
+  G_ResourceFlag_Uncached                = (1 << 5),  // Cpu writes will be combined & reads will be uncached
+  G_ResourceFlag_ForceNoReuse            = (1 << 6),
+};
+
+Struct(G_BufferDesc)
+{
+  G_ResourceFlag flags;
+  u64 size;
+  String name;
+};
+
+Struct(G_TextureDesc)
+{
+  G_ResourceFlag flags;
+  G_Format format;
+  Vec3I32 dims;
+  G_Layout initial_layout;
+  Vec4 clear_color;
+  i32 max_mips;  // Will be clamped to range [1, max mips]
+  String name;
+};
+
+Struct(G_SamplerDesc)
+{
+  G_ResourceFlag flags;
+  G_Filter filter;
+  G_AddressMode x;
+  G_AddressMode y;
+  G_AddressMode z;
+  f32 mip_lod_bias;
+  u32 max_anisotropy;
+  G_ComparisonFunc comparison;
+  Vec4 border_color;
+  f32 min_lod;
+  f32 max_lod;
+  String name;
+};
+
+Struct(G_ResourceDesc)
+{
+  G_ResourceKind kind;
+  G_BufferDesc buffer;
+  G_TextureDesc texture;
+  G_SamplerDesc sampler;
+};
+
+////////////////////////////////////////////////////////////
+//~ Ref types
+
+Struct(G_RefDesc)
+{
+  G_RefKind kind;
+  u64 element_size;
+  u64 element_offset;
+  RngI32 mips;  // Inclusive range of texture mip indices to reference
+};
+
+////////////////////////////////////////////////////////////
+//~ Rasterization types
+
+Enum(G_RasterMode)
+{
+  G_RasterMode_None,
+  G_RasterMode_PointList,
+  G_RasterMode_LineList,
+  G_RasterMode_LineStrip,
+  G_RasterMode_TriangleList,
+  G_RasterMode_TriangleStrip,
+  G_RasterMode_WireTriangleList,
+  G_RasterMode_WireTriangleStrip,
+};
+
+Enum(G_BlendMode)
+{
+  G_BlendMode_Opaque,
+  G_BlendMode_CompositeStraightAlpha,
+  G_BlendMode_CompositePremultipliedAlpha,
+};
+
+Struct(G_IndexBufferDesc)
+{
+  u32 count;
+  u32 stride;  // Either 2 for u16 indices, or 4 for u32 indices
+  G_ResourceHandle resource;
+};
+
+Struct(G_RenderTargetDesc)
+{
+  G_ResourceHandle resource;
+  G_BlendMode blend;
+  i32 mip;
+};
+
+////////////////////////////////////////////////////////////
+//~ Statistic types
+
+Struct(G_Stats)
+{
+  // Memory usage
+  u64 device_committed;
+  u64 device_budget;
+  u64 host_committed;
+  u64 host_budget;
+
+  // Other stats
+  u64 arenas_count;
+  u64 cumulative_nonreuse_count;
+};
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Bootstrap
+
+void G_Bootstrap(void);
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Arena
+
+G_ArenaHandle G_AcquireArena(void);
+void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena);
+void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Resource
+
+//- Resource creation
+
+G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_ResourceDesc desc);
+
+#define G_PushBuffer(arena, cl, _type, _count, ...) G_PushResource((arena), (cl),                      \
+  (G_ResourceDesc) {                                                                                   \
+    .kind = G_ResourceKind_Buffer,                                                                     \
+    .buffer = {                                                                                        \
+      .size = sizeof(_type) * (_count),                                                                \
+      __VA_ARGS__                                                                                      \
+    }                                                                                                  \
+  }                                                                                                    \
+)
+
+#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
+  (G_ResourceDesc) {                                                                                   \
+    .kind = G_ResourceKind_Texture1D,                                                                  \
+    .texture = {                                                                                       \
+      .format = (_format),                                                                             \
+      .dims = VEC3I32((_size), 1, 1),                                                                  \
+      .initial_layout = (_initial_layout),                                                             \
+      __VA_ARGS__                                                                                      \
+    }                                                                                                  \
+  }                                                                                                    \
+)
+
+#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
+  (G_ResourceDesc) {                                                                                   \
+    .kind = G_ResourceKind_Texture2D,                                                                  \
+    .texture = {                                                                                       \
+      .format = (_format),                                                                             \
+      .dims = VEC3I32((_size).x, (_size).y, 1),                                                        \
+      .initial_layout = (_initial_layout),                                                             \
+      __VA_ARGS__                                                                                      \
+    }                                                                                                  \
+  }                                                                                                    \
+)
+
+#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \
+  (G_ResourceDesc) {                                                                                   \
+    .kind = G_ResourceKind_Texture3D,                                                                  \
+    .texture = {                                                                                       \
+      .format = (_format),                                                                             \
+      .dims = (_size),                                                                                 \
+      .initial_layout = (_initial_layout),                                                             \
+      __VA_ARGS__                                                                                      \
+    }                                                                                                  \
+  }                                                                                                    \
+)
+
+#define G_PushSampler(arena, cl, ...) G_PushResource((arena), (cl), \
+  (G_ResourceDesc) {                                                \
+    .kind = G_ResourceKind_Sampler,                                 \
+    .sampler = {                                                    \
+      .filter = G_Filter_MinMagMipPoint,                            \
+      __VA_ARGS__                                                   \
+    }                                                               \
+  }                                                                 \
+)
+
+//- Index buffer helpers
+
+#define G_IdxBuff16(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 2, .count = (G_CountBuffer((_res), i16)) })
+#define G_IdxBuff32(_res) ((G_IndexBufferDesc) { .resource = (_res), .stride = 4, .count = (G_CountBuffer((_res), i32)) })
+
+//- Render target helpers
+
+#define G_Rt(_res, _blend_mode) ((G_RenderTargetDesc) { .resource = (_res), .blend = (_blend_mode) })
+
+//- Count
+
+u64     G_CountBufferBytes(G_ResourceHandle buffer);
+i32     G_Count1D(G_ResourceHandle texture);
+Vec2I32 G_Count2D(G_ResourceHandle texture);
+Vec3I32 G_Count3D(G_ResourceHandle texture);
+i32     G_CountWidth(G_ResourceHandle texture);
+i32     G_CountHeight(G_ResourceHandle texture);
+i32     G_CountDepth(G_ResourceHandle texture);
+i32     G_CountMips(G_ResourceHandle texture);
+
+#define G_CountBuffer(buffer, type) G_CountBufferBytes(buffer) / sizeof(type)
+
+//- Map
+
+void *G_HostPointerFromResource(G_ResourceHandle resource);
+#define G_StructFromResource(resource, type) (type *)G_HostPointerFromResource(resource)
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Shader resource reference
+
+u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
+
+#define G_PushStructuredBufferRef(arena, resource, type, ...) (G_StructuredBufferRef) {               \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_StructuredBuffer, .element_size = sizeof(type), __VA_ARGS__ }     \
+  )                                                                                                   \
+}
+
+#define G_PushByteAddressBufferRef(arena, resource, ...) (G_ByteAddressBufferRef) {                   \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_ByteAddressBuffer, __VA_ARGS__ }                                  \
+  )                                                                                                   \
+}
+
+#define G_PushTexture1DRef(arena, resource, ...) (G_Texture1DRef) {                                   \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_Texture1D, .mips.max = G_MaxMips, __VA_ARGS__ }                   \
+  )                                                                                                   \
+}
+
+#define G_PushTexture2DRef(arena, resource, ...) (G_Texture2DRef) {                                   \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_Texture2D, .mips.max = G_MaxMips, __VA_ARGS__ }                   \
+  )                                                                                                   \
+}
+
+#define G_PushTexture3DRef(arena, resource, ...) (G_Texture3DRef) {                                   \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_Texture3D, .mips.max = G_MaxMips, __VA_ARGS__ }                   \
+  )                                                                                                   \
+}
+
+#define G_PushSamplerStateRef(arena, resource, ...) (G_SamplerStateRef) {                             \
+  .v = G_PushRef(                                                                                     \
+    (arena), (resource),                                                                              \
+    (G_RefDesc) { .kind = G_RefKind_SamplerState, __VA_ARGS__ }                                       \
+  )                                                                                                   \
+}
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Command
+
+//- Command list
+
+G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
+i64 G_CommitCommandList(G_CommandListHandle cl);
+
+//- Cpu -> Gpu staged copy
+
+void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
+void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range);
+
+//- Gpu <-> Gpu copy
+
+void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range);
+void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset);
+void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
+void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
+
+//- Constant
+
+void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size);
+
+#define G_SetConstant(cl, name, value) do {                    \
+    CAT(name, __shaderconstanttype) __src;                     \
+    __src.v = value;                                           \
+    G_SetConstantEx((cl), (name), &__src, sizeof(__src));      \
+  } while (0)
+
+//- Memory sync
+
+void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
+
+#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...)                 \
+  G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) {                                                                 \
+    .resource = (_resource),                                                                                    \
+    .stage_prev = _stage_prev,                                                                                  \
+    .access_prev = _access_prev,                                                                                \
+    .stage_next = _stage_next,                                                                                  \
+    .access_next = _access_next,                                                                                \
+    .mips.max = G_MaxMips,                                                                                      \
+    __VA_ARGS__                                                                                                 \
+  })
+
+#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...)  \
+  G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) {                                                                 \
+    .resource = (_resource),                                                                                    \
+    .stage_prev = _stage_prev,                                                                                  \
+    .access_prev = _access_prev,                                                                                \
+    .stage_next = _stage_next,                                                                                  \
+    .access_next = _access_next,                                                                                \
+    .layout = _layout,                                                                                          \
+    .mips.max = G_MaxMips,                                                                                      \
+    __VA_ARGS__                                                                                                 \
+  })
+
+#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...)                      \
+  G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) {                                                                 \
+    .is_global = 1,                                                                                             \
+    .stage_prev = _stage_prev,                                                                                  \
+    .access_prev = _access_prev,                                                                                \
+    .stage_next = _stage_next,                                                                                  \
+    .access_next = _access_next,                                                                                \
+    .mips.max = G_MaxMips,                                                                                      \
+    __VA_ARGS__                                                                                                 \
+  })
+
+#define G_DumbMemorySync(cl, resource, ...) \
+  G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
+
+#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \
+  G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__)
+
+#define G_DumbGlobalMemorySync(cl, ...) \
+  G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
+
+//- Compute
+
+void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads);
+
+#define G_Compute(cl, cs, threads)    G_ComputeEx((cl), (cs), VEC3I32((threads), 1, 1))
+#define G_Compute2D(cl, cs, threads)  G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1))
+#define G_Compute3D(cl, cs, threads)  G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z))
+
+//- Rasterize
+
+void G_Rasterize(
+  G_CommandListHandle cl,
+  VertexShaderDesc vs, PixelShaderDesc ps,
+  u32 instances_count, G_IndexBufferDesc index_buffer,
+  u32 render_targets_count, G_RenderTargetDesc *render_targets,
+  Rng3 viewport, Rng2 scissor,
+  G_RasterMode raster_mode
+);
+
+//- Clear
+
+void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip);
+
+//- Log
+
+void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource);
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Queue synchronization
+
+i64 G_CompletionValueFromQueue(G_QueueKind queue_kind);
+i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind);
+G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask);
+G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask);
+
+void G_QueueSyncEx(G_QueueBarrierDesc desc);
+
+#define G_QueueSync(completion_mask, ...)                           \
+  G_QueueSyncEx((G_QueueBarrierDesc) {                              \
+    .completions = G_CompletionTargetsFromQueues(completion_mask),  \
+    __VA_ARGS__                                                     \
+  })
+
+#define G_QueueSyncGpu(completion_mask, wait_mask)   G_QueueSync((completion_mask), .wait_queues = (wait_mask))
+#define G_QueueSyncCpu(completion_mask)              G_QueueSync((completion_mask), .wait_cpu = 1);
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Statistics
+
+G_Stats G_QueryStats(void);
+
+////////////////////////////////////////////////////////////
+//~ @hookdecl Swapchain
+
+G_SwapchainHandle G_AcquireSwapchain(u64 os_window_handle);
+void G_ReleaseSwapchain(G_SwapchainHandle swapchain);
+
+// Waits until a new backbuffer is ready from the swapchain.
+// This should be called before rendering for minimum latency.
+G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Format format, Vec2I32 size);
+
+void G_CommitBackbuffer(G_ResourceHandle backbuffer, i32 vsync);
--- a/src/gpu_old/gpu_dx12/gpu_dx12.lay
+++ b/src/gpu_old/gpu_dx12/gpu_dx12.lay
@ -0,0 +1,16 @@
+@Layer gpu_dx12_old
+
+//////////////////////////////
+//- Resources
+
+@EmbedDir G_D12_Resources gpu_dx12_res
+
+//////////////////////////////
+//- Api
+
+@IncludeC gpu_dx12_core.h
+
+//////////////////////////////
+//- Impl
+
+@IncludeC gpu_dx12_core.c
--- a/src/gpu_old/gpu_dx12/gpu_dx12_core.c
+++ b/src/gpu_old/gpu_dx12/gpu_dx12_core.c
--- a/src/gpu_old/gpu_dx12/gpu_dx12_core.h
+++ b/src/gpu_old/gpu_dx12/gpu_dx12_core.h
@ -0,0 +1,577 @@
+////////////////////////////////////////////////////////////
+//~ DirectX12 libs
+
+#pragma warning(push, 0)
+  #include <d3d12.h>
+  #include <dxgidebug.h>
+  #include <dxgi1_6.h>
+#pragma warning(pop)
+
+#pragma comment(lib, "d3d12")
+#pragma comment(lib, "dxgi")
+
+////////////////////////////////////////////////////////////
+//~ Tweakable definitions
+
+#define G_D12_TearingIsAllowed        1
+#define G_D12_FrameLatency            1
+#define G_D12_SwapchainBufferCount    2
+#define G_D12_SwapchainFlags (                                                             \
+    ((G_D12_TearingIsAllowed != 0)   * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) |               \
+    ((G_D12_FrameLatency != 0)       * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT) \
+  )
+
+#define G_D12_MaxCbvSrvUavDescriptors (1024 * 128)
+#define G_D12_MaxSamplerDescriptors   (1024 * 1)
+#define G_D12_MaxRtvDescriptors       (1024 * 64)
+
+#define G_D12_MaxMips 16
+#define G_D12_MaxNameLen 64
+
+////////////////////////////////////////////////////////////
+//~ Pipeline types
+
+// NOTE: Must be zero initialized (including padding bits) for hashing
+Struct(G_D12_PipelineDesc)
+{
+  VertexShaderDesc vs;
+  PixelShaderDesc ps;
+  ComputeShaderDesc cs;
+  b32 is_wireframe;
+  D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
+  G_Format render_target_formats[G_MaxRenderTargets];
+  G_BlendMode render_target_blend_modes[G_MaxRenderTargets];
+};
+
+Struct(G_D12_Pipeline)
+{
+  G_D12_Pipeline *next_in_bin;
+  u64 hash;
+
+  G_D12_PipelineDesc desc;
+  ID3D12PipelineState *pso;
+
+  b32 ok;
+  String error;
+};
+
+Struct(G_D12_PipelineBin)
+{
+  Mutex mutex;
+  G_D12_Pipeline *first;
+};
+
+////////////////////////////////////////////////////////////
+//~ Resource types
+
+Struct(G_D12_Resource)
+{
+  G_D12_Resource *next;
+  G_D12_Resource *prev;
+
+  G_ResourceFlag flags;
+  u64 uid;
+
+  // D3D12 resource
+  D3D12_RESOURCE_DESC1 d3d_desc;
+  ID3D12Resource *d3d_resource;
+  D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
+  void *mapped;
+
+  // Buffer info
+  u64 buffer_size;
+  u64 buffer_size_actual;
+
+  // Texture info
+  b32 is_texture;
+  G_Format texture_format;
+  Vec3I32 texture_dims;
+  i32 texture_mips;
+  D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips];
+
+  // Sampler info
+  G_SamplerDesc sampler_desc;
+
+  // Backbuffer info
+  struct G_D12_Swapchain *swapchain;
+
+  u64 name_len;
+  u8 name_text[G_D12_MaxNameLen];
+};
+
+Struct(G_D12_ResourceList)
+{
+  u64 count;
+  G_D12_Resource *first;
+  G_D12_Resource *last;
+};
+
+////////////////////////////////////////////////////////////
+//~ Descriptor types
+
+Enum(G_D12_DescriptorHeapKind)
+{
+  G_D12_DescriptorHeapKind_CbvSrvUav,
+  G_D12_DescriptorHeapKind_Rtv,
+  G_D12_DescriptorHeapKind_Sampler,
+
+  G_D12_DescriptorHeapKind_COUNT
+};
+
+Struct(G_D12_DescriptorHeap)
+{
+  Arena *descriptors_arena;
+  G_D12_DescriptorHeapKind kind;
+
+  D3D12_DESCRIPTOR_HEAP_TYPE type;
+  u32 per_batch_count;
+  u32 descriptor_size;
+  ID3D12DescriptorHeap *d3d_heap;
+  D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
+
+  Mutex mutex;
+  struct G_D12_Descriptor *first_free;
+  u32 max_count;
+};
+
+Struct(G_D12_Descriptor)
+{
+  G_D12_Descriptor *next;
+  G_D12_Descriptor *prev;
+
+  struct G_D12_Arena *gpu_arena;
+  G_QueueKind completion_queue_kind;
+  i64 completion_queue_target;
+
+  G_D12_DescriptorHeap *heap;
+  D3D12_CPU_DESCRIPTOR_HANDLE first_handle;
+  u32 index;
+};
+
+Struct(G_D12_DescriptorList)
+{
+  u64 count;
+  G_D12_Descriptor *first;
+  G_D12_Descriptor *last;
+};
+
+////////////////////////////////////////////////////////////
+//~ Arena types
+
+// TODO:
+// To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for:
+// - Buffers
+// - Non-render target & non-depth stencil textures
+// - Render target or depth stencil textures
+Enum(G_D12_ResourceHeapKind)
+{
+  G_D12_ResourceHeapKind_Gpu,
+  G_D12_ResourceHeapKind_Cpu,
+  G_D12_ResourceHeapKind_CpuWriteCombined,
+
+  G_D12_ResourceHeapKind_COUNT
+};
+
+Struct(G_D12_Arena)
+{
+  Arena *arena;
+
+  G_D12_DescriptorList descriptors;
+  G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT];
+
+  G_D12_ResourceList resources;
+  G_D12_ResourceList reset_resources;
+  // G_D12_ResourceList free_resources;
+};
+
+////////////////////////////////////////////////////////////
+//~ Staging types
+
+Struct(G_D12_StagingRing)
+{
+  Arena *arena;
+  G_D12_Arena *gpu_arena;
+  u64 size;
+
+  G_D12_Resource *resource;
+  u8 *base;
+
+  struct G_D12_StagingRegionNode *head_region_node;
+  struct G_D12_StagingRegionNode *first_free_region_node;
+
+};
+
+Struct(G_D12_StagingRegionNode)
+{
+  G_D12_StagingRing *ring;
+
+  // Ring links (requires ring lock to read)
+  G_D12_StagingRegionNode *prev;
+  G_D12_StagingRegionNode *next;
+
+  // Command list links
+  G_D12_StagingRegionNode *next_in_command_list;
+
+  // Region info
+  Atomic64 completion_target;
+  u64 pos;
+};
+
+////////////////////////////////////////////////////////////
+//~ Command queue types
+
+Struct(G_D12_CommandQueueDesc)
+{
+  D3D12_COMMAND_LIST_TYPE type;
+  D3D12_COMMAND_QUEUE_PRIORITY priority;
+  String name;
+};
+
+Struct(G_D12_Queue)
+{
+  ID3D12CommandQueue *d3d_queue;
+  G_D12_CommandQueueDesc desc;
+
+  Mutex commit_mutex;
+  ID3D12Fence *commit_fence;
+  u64 commit_fence_target;
+
+  // Global resources
+  u64 print_buffer_size;
+  G_ResourceHandle print_buffer;
+  G_ResourceHandle print_readback_buffer;
+  G_ByteAddressBufferRef print_buffer_ref;
+
+  // Raw command lists
+  struct G_D12_RawCommandList *first_committed_cl;
+  struct G_D12_RawCommandList *last_committed_cl;
+
+  // Staging heap
+  Mutex staging_mutex;
+  G_D12_StagingRing *staging_ring;
+
+  Fence sync_fence;
+};
+
+////////////////////////////////////////////////////////////
+//~ Raw command list types
+
+Struct(G_D12_RawCommandList)
+{
+  G_D12_Queue *queue;
+  G_D12_RawCommandList *next;
+
+  u64 commit_fence_target;
+
+  ID3D12CommandAllocator *d3d_ca;
+  ID3D12GraphicsCommandList7 *d3d_cl;
+
+  // Direct queue command lists keep a constant list of CPU-only descriptors
+  G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets];
+  G_D12_Descriptor *rtv_clear_descriptor;
+};
+
+////////////////////////////////////////////////////////////
+//~ Releasable types
+
+Struct(G_D12_Releasable)
+{
+  G_D12_Releasable *next;
+
+  G_QueueKind completion_queue_kind;
+  i64 completion_queue_target;
+
+  ID3D12Resource *d3d_resource;
+
+  u64 name_len;
+  u8 name_text[G_D12_MaxNameLen];
+};
+
+Struct(G_D12_ReleasableList)
+{
+  G_D12_Releasable *first;
+  G_D12_Releasable *last;
+};
+
+////////////////////////////////////////////////////////////
+//~ Command list types
+
+#define G_D12_CmdsPerChunk 256
+
+Enum(G_D12_CmdKind)
+{
+  G_D12_CmdKind_None,
+  G_D12_CmdKind_Barrier,
+  G_D12_CmdKind_Constant,
+  G_D12_CmdKind_CopyBytes,
+  G_D12_CmdKind_CopyTexels,
+  G_D12_CmdKind_Compute,
+  G_D12_CmdKind_Rasterize,
+  G_D12_CmdKind_ClearRtv,
+  G_D12_CmdKind_Log,
+  G_D12_CmdKind_Discard,
+};
+
+Struct(G_D12_Cmd)
+{
+  G_D12_CmdKind kind;
+  b32 skip;
+  union
+  {
+    struct
+    {
+      i32 slot;
+      u32 value;
+    } constant;
+
+    struct
+    {
+      G_MemoryBarrierDesc desc;
+
+      // Post-batch data
+      b32 is_end_of_batch;
+      u64 batch_gen;
+    } barrier;
+
+    struct
+    {
+      G_D12_Resource *dst;
+      G_D12_Resource *src;
+      u64 dst_offset;
+      RngU64 src_range;
+    } copy_bytes;
+
+    struct
+    {
+      G_D12_Resource *dst;
+      G_D12_Resource *src;
+      D3D12_TEXTURE_COPY_LOCATION dst_loc;
+      D3D12_TEXTURE_COPY_LOCATION src_loc;
+      Vec3I32 dst_texture_offset;
+      Rng3I32 src_texture_range;
+    } copy_texels;
+
+    struct
+    {
+      ComputeShaderDesc cs;
+      Vec3I32 groups;
+    } compute;
+
+    struct
+    {
+      VertexShaderDesc vs;
+      PixelShaderDesc ps;
+      u32 instances_count;
+      G_IndexBufferDesc index_buffer_desc;
+      G_RenderTargetDesc render_target_descs[G_MaxRenderTargets];
+      Rng3 viewport;
+      Rng2 scissor;
+      G_RasterMode raster_mode;
+    } rasterize;
+
+    struct
+    {
+      G_D12_Resource *render_target;
+      Vec4 color;
+      i32 mip;
+    } clear_rtv;
+
+    struct
+    {
+      G_D12_Resource *resource;
+    } log;
+
+    struct
+    {
+      G_D12_Resource *resource;
+    } discard;
+  };
+};
+
+Struct(G_D12_CmdChunk)
+{
+  G_D12_CmdChunk *next;
+  struct G_D12_CmdList *cl;
+  G_D12_Cmd *cmds;
+  u64 cmds_count;
+};
+
+Struct(G_D12_CmdList)
+{
+  G_D12_CmdList *next;
+  G_QueueKind queue_kind;
+
+  G_D12_DescriptorList reset_descriptors;
+  G_D12_ReleasableList releases;
+
+  G_D12_StagingRegionNode *first_staging_region;
+  G_D12_StagingRegionNode *last_staging_region;
+
+  G_D12_CmdChunk *first_cmd_chunk;
+  G_D12_CmdChunk *last_cmd_chunk;
+  u64 chunks_count;
+  u64 cmds_count;
+};
+
+////////////////////////////////////////////////////////////
+//~ Swapchain types
+
+Struct(G_D12_Swapchain)
+{
+
+  IDXGISwapChain3 *d3d_swapchain;
+  HWND window_hwnd;
+  HANDLE waitable;
+
+  HANDLE present_event;
+  ID3D12Fence *present_fence;
+  u64 present_fence_target;
+
+  G_Format backbuffers_format;
+  Vec2I32 backbuffers_resolution;
+  G_D12_Resource backbuffers[G_D12_SwapchainBufferCount];
+};
+
+////////////////////////////////////////////////////////////
+//~ State types
+
+Struct(G_D12_AsyncCtx)
+{
+  G_D12_ReleasableList pending_releases;
+  G_D12_ReleasableList free_releases;
+};
+
+Struct(G_D12_Ctx)
+{
+  IsolatedAtomic64 resource_creation_gen;
+  b32 independent_devices_enabled;
+  b32 debug_layer_enabled;
+  b32 validation_layer_enabled;
+
+  // Stats
+  Atomic64 arenas_count;
+  Atomic64 cumulative_nonreuse_count;
+
+  Atomic64 driver_resources_allocated;
+  Atomic64 driver_descriptors_allocated;
+
+  // Queues
+  G_D12_Queue queues[G_QueueKind_COUNT];
+
+  // Descriptor heaps
+  G_D12_DescriptorHeap descriptor_heaps[G_D12_DescriptorHeapKind_COUNT];
+
+  // Rootsig
+  ID3D12RootSignature *bindless_rootsig;
+
+  // Pipelines
+  G_D12_PipelineBin pipeline_bins[1024];
+
+  // Command lists
+  Mutex free_cmd_lists_mutex;
+  G_D12_CmdList *first_free_cmd_list;
+
+  // Command chunks
+  Mutex free_cmd_chunks_mutex;
+  G_D12_CmdChunk *first_free_cmd_chunk;
+
+  // Swapchains
+  Mutex free_swapchains_mutex;
+  G_D12_Swapchain *first_free_swapchain;
+
+  // Independent device (only valid when independent_devices_enabled = 1)
+  struct
+  {
+    ID3D12SDKConfiguration1 *sdk_config;
+    ID3D12DeviceConfiguration *device_config;
+    ID3D12DeviceFactory *device_factory;
+  } independent;
+
+  // Device
+  IDXGIFactory6 *dxgi_factory;
+  IDXGIAdapter3 *dxgi_adapter;
+  ID3D12Device10 *device;
+
+  // Release-queue
+  Mutex pending_releases_mutex;
+  Mutex free_releases_mutex;
+  G_D12_ReleasableList pending_releases;
+  G_D12_ReleasableList free_releases;
+
+  // Async
+  G_D12_AsyncCtx async_ctx;
+};
+
+Struct(G_D12_ThreadLocalCtx)
+{
+  HANDLE sync_event;
+};
+
+extern G_D12_Ctx G_D12;
+extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl;
+
+////////////////////////////////////////////////////////////
+//~ Helpers
+
+#define G_D12_MakeHandle(type, ptr) (type) { .v = (u64)(ptr) }
+
+G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle);
+G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle);
+G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle);
+G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle);
+
+DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format);
+D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages);
+D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses);
+D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout);
+String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout);
+
+void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip);
+
+void G_D12_SetObjectName(ID3D12Object *object, String name);
+String G_D12_NameFromObject(Arena *arena, ID3D12Object *object);
+
+////////////////////////////////////////////////////////////
+//~ Pipeline
+
+G_D12_Pipeline *G_D12_PipelineFromDesc(G_D12_PipelineDesc desc);
+u64 G_D12_HashFromPipelineDesc(G_D12_PipelineDesc desc);
+
+////////////////////////////////////////////////////////////
+//~ Queue
+
+G_D12_Queue *G_D12_QueueFromKind(G_QueueKind kind);
+
+////////////////////////////////////////////////////////////
+//~ Raw command list
+
+G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind);
+i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl);
+
+////////////////////////////////////////////////////////////
+//~ Arena
+
+void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena);
+
+////////////////////////////////////////////////////////////
+//~ Descriptor
+
+G_D12_Descriptor *G_D12_DescriptorFromIndex(G_D12_DescriptorHeapKind heap_kind, u32 index);
+G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorHeapKind heap_kind);
+
+////////////////////////////////////////////////////////////
+//~ Command helpers
+
+G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl);
+G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
+G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
+
+////////////////////////////////////////////////////////////
+//~ Collection worker
+
+void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane);
+
+////////////////////////////////////////////////////////////
+//~ Async
+
+void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);
--- a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat
+++ b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat
--- a/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat
+++ b/src/gpu_old/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat
--- a/src/gpu_old/gpu_res/noise_128x128x64_16.dat
+++ b/src/gpu_old/gpu_res/noise_128x128x64_16.dat
--- a/src/gpu_old/gpu_shared.cgh
+++ b/src/gpu_old/gpu_shared.cgh
@ -0,0 +1,333 @@
+////////////////////////////////////////////////////////////
+//~ Ref types
+
+Enum(G_RefKind)
+{
+  G_RefKind_StructuredBuffer,
+  G_RefKind_ByteAddressBuffer,
+  G_RefKind_Texture1D,
+  G_RefKind_Texture2D,
+  G_RefKind_Texture3D,
+  G_RefKind_SamplerState,
+};
+
+Struct(G_StructuredBufferRef)      { u32 v; };
+Struct(G_ByteAddressBufferRef)     { u32 v; };
+Struct(G_Texture1DRef)             { u32 v; };
+Struct(G_Texture2DRef)             { u32 v; };
+Struct(G_Texture3DRef)             { u32 v; };
+Struct(G_SamplerStateRef)          { u32 v; };
+
+#define G_IsRefNil(r) ((r).v == 0)
+
+////////////////////////////////////////////////////////////
+//~ Constant types
+
+//
+// D3D12 exposes 64 root constants and Vulkan exposes 32 push constants.
+// Supposedly AMD hardware will start spilling constants once more than
+// 12 are in use - https://gpuopen.com/learn/rdna-performance-guide/
+//
+#define G_NumGeneralPurposeConstants    (24)  // Constants available for any usage
+#define G_NumReservedConstants          (4)   // Constants reserved for internal usage by the GPU layer
+#define G_NumConstants (G_NumGeneralPurposeConstants + G_NumReservedConstants)
+
+#if IsCpu
+  #define G_ForceDeclConstant(type, name, slot)                   \
+    enum { name = slot };                                         \
+    Struct(name##__shaderconstanttype) { type v; }
+  #define G_DeclConstant(type, name, slot)                        \
+    StaticAssert(sizeof(type) <= 4);                              \
+    StaticAssert(slot < G_NumGeneralPurposeConstants);            \
+    G_ForceDeclConstant(type, name, slot)
+#else
+  #define G_ForceDeclConstant(type, name, slot) cbuffer name : register(b##slot) { type name; }
+  #define G_DeclConstant(type, name, slot) G_ForceDeclConstant(type, name, slot)
+#endif
+
+////////////////////////////////////////////////////////////
+//~ Reserved constants
+
+// The constants declared below assume this configuration is accurate for slot usage
+StaticAssert(G_NumGeneralPurposeConstants == 24);
+StaticAssert(G_NumReservedConstants >= 3);
+
+G_ForceDeclConstant(G_ByteAddressBufferRef, G_ShaderConst_PrintBufferRef,   24);
+G_ForceDeclConstant(b32,                    G_ShaderConst_TweakB32,         25);
+G_ForceDeclConstant(f32,                    G_ShaderConst_TweakF32,         26);
+
+#if IsGpu
+  #define G_TweakBool  G_ShaderConst_TweakB32
+  #define G_TweakFloat G_ShaderConst_TweakF32
+#endif
+
+////////////////////////////////////////////////////////////
+//~ Basic samplers
+
+Enum(G_BasicSamplerKind)
+{
+  G_BasicSamplerKind_PointClamp,
+  G_BasicSamplerKind_PointWrap,
+  G_BasicSamplerKind_PointMirror,
+  G_BasicSamplerKind_BilinearClamp,
+  G_BasicSamplerKind_BilinearWrap,
+  G_BasicSamplerKind_BilinearMirror,
+  G_BasicSamplerKind_TrilinearClamp,
+  G_BasicSamplerKind_TrilinearWrap,
+  G_BasicSamplerKind_TrilinearMirror,
+
+  G_BasicSamplerKind_COUNT
+};
+
+////////////////////////////////////////////////////////////
+//~ Resource dereference
+
+#if IsGpu
+  // NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware
+
+  //- Scalar/Uniform dereference
+  SamplerState                               G_SDeref(G_SamplerStateRef r)         { return SamplerDescriptorHeap[r.v]; }
+  template<typename T> StructuredBuffer<T>   G_SDeref(G_StructuredBufferRef r)     { return ResourceDescriptorHeap[r.v]; }
+  ByteAddressBuffer                          G_SDeref(G_ByteAddressBufferRef r)    { return ResourceDescriptorHeap[r.v]; }
+  template<typename T> Texture1D<T>          G_SDeref(G_Texture1DRef r)            { return ResourceDescriptorHeap[r.v]; }
+  template<typename T> Texture2D<T>          G_SDeref(G_Texture2DRef r)            { return ResourceDescriptorHeap[r.v]; }
+  template<typename T> Texture3D<T>          G_SDeref(G_Texture3DRef r)            { return ResourceDescriptorHeap[r.v]; }
+  template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r)   { return ResourceDescriptorHeap[r.v + 1]; }
+  RWByteAddressBuffer                        G_SDerefRW(G_ByteAddressBufferRef r)  { return ResourceDescriptorHeap[r.v + 1]; }
+  template<typename T> RWTexture1D<T>        G_SDerefRW(G_Texture1DRef r)          { return ResourceDescriptorHeap[r.v + 1]; }
+  template<typename T> RWTexture2D<T>        G_SDerefRW(G_Texture2DRef r)          { return ResourceDescriptorHeap[r.v + 1]; }
+  template<typename T> RWTexture3D<T>        G_SDerefRW(G_Texture3DRef r)          { return ResourceDescriptorHeap[r.v + 1]; }
+
+  //- Vector/Non-Uniform dereference
+  SamplerState                               G_VDeref(G_SamplerStateRef r)         { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  template<typename T> StructuredBuffer<T>   G_VDeref(G_StructuredBufferRef r)     { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  ByteAddressBuffer                          G_VDeref(G_ByteAddressBufferRef r)    { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  template<typename T> Texture1D<T>          G_VDeref(G_Texture1DRef r)            { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  template<typename T> Texture2D<T>          G_VDeref(G_Texture2DRef r)            { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  template<typename T> Texture3D<T>          G_VDeref(G_Texture3DRef r)            { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
+  template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r)   { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
+  RWByteAddressBuffer                        G_VDerefRW(G_ByteAddressBufferRef r)  { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
+  template<typename T> RWTexture1D<T>        G_VDerefRW(G_Texture1DRef r)          { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
+  template<typename T> RWTexture2D<T>        G_VDerefRW(G_Texture2DRef r)          { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
+  template<typename T> RWTexture3D<T>        G_VDerefRW(G_Texture3DRef r)          { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
+#endif
+
+////////////////////////////////////////////////////////////
+//~ Resource countof
+
+#define G_MaxMips 16
+#define G_MaxRenderTargets 8
+
+#if IsGpu
+  template<typename T> u32      countof(StructuredBuffer<T> obj)    { u32 result; obj.GetDimensions(result); return result; }
+  template<typename T> u32      countof(RWStructuredBuffer<T> obj)  { u32 result; u32 stride; obj.GetDimensions(result, stride); return result; }
+  u32                           countof(ByteAddressBuffer obj)      { u32 result; obj.GetDimensions(result); return result; }
+  u32                           countof(RWByteAddressBuffer obj)    { u32 result; obj.GetDimensions(result); return result; }
+  template<typename T> u32      countof(Texture1D<T> obj)           { u32 result; obj.GetDimensions(result); return result; }
+  template<typename T> u32      countof(RWTexture1D<T> obj)         { u32 result; obj.GetDimensions(result); return result; }
+  template<typename T> Vec2U32  countof(Texture2D<T> obj)           { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
+  template<typename T> Vec2U32  countof(RWTexture2D<T> obj)         { Vec2U32 result; obj.GetDimensions(result.x, result.y); return result; }
+  template<typename T> Vec3U32  countof(Texture3D<T> obj)           { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
+  template<typename T> Vec3U32  countof(RWTexture3D<T> obj)         { Vec3U32 result; obj.GetDimensions(result.x, result.y, result.z); return result; }
+#endif
+
+////////////////////////////////////////////////////////////
+//~ Debug printf
+
+// This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/
+
+Enum(G_FmtArgKind)
+{
+  G_FmtArgKind_None,
+  G_FmtArgKind_End,
+
+  G_FmtArgKind_BEGINSIZE1,
+
+  G_FmtArgKind_Uint,
+  G_FmtArgKind_Sint,
+  G_FmtArgKind_Float,
+
+  G_FmtArgKind_BEGINSIZE2,
+
+  G_FmtArgKind_Uint2,
+  G_FmtArgKind_Sint2,
+  G_FmtArgKind_Float2,
+
+  G_FmtArgKind_BEGINSIZE3,
+
+  G_FmtArgKind_Uint3,
+  G_FmtArgKind_Sint3,
+  G_FmtArgKind_Float3,
+
+  G_FmtArgKind_BEGINSIZE4,
+
+  G_FmtArgKind_Uint4,
+  G_FmtArgKind_Sint4,
+  G_FmtArgKind_Float4,
+};
+
+Struct(G_FmtArg)
+{
+  G_FmtArgKind kind;
+  Vec4U32 v;
+};
+
+#if IsGpu && GPU_SHADER_PRINT
+  G_FmtArg G_Fmt(u32 v)       { G_FmtArg result;    result.kind = G_FmtArgKind_Uint;      result.v.x = v;                     return result; }
+  G_FmtArg G_Fmt(Vec2U32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Uint2;     result.v.xy = v.xy;                 return result; }
+  G_FmtArg G_Fmt(Vec3U32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Uint3;     result.v.xyz = v.xyz;               return result; }
+  G_FmtArg G_Fmt(Vec4U32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Uint4;     result.v.xyzw = v.xyzw;             return result; }
+
+  G_FmtArg G_Fmt(i32 v)       { G_FmtArg result;    result.kind = G_FmtArgKind_Sint;      result.v.x = v;                     return result; }
+  G_FmtArg G_Fmt(Vec2I32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Sint2;     result.v.xy = v.xy;                 return result; }
+  G_FmtArg G_Fmt(Vec3I32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Sint3;     result.v.xyz = v.xyz;               return result; }
+  G_FmtArg G_Fmt(Vec4I32 v)   { G_FmtArg result;    result.kind = G_FmtArgKind_Sint4;     result.v.xyzw = v.xyzw;             return result; }
+
+  G_FmtArg G_Fmt(f32 v)       { G_FmtArg result;    result.kind = G_FmtArgKind_Float;     result.v.x = asuint(v);             return result; }
+  G_FmtArg G_Fmt(Vec2 v)      { G_FmtArg result;    result.kind = G_FmtArgKind_Float2;    result.v.xy = asuint(v.xy);         return result; }
+  G_FmtArg G_Fmt(Vec3 v)      { G_FmtArg result;    result.kind = G_FmtArgKind_Float3;    result.v.xyz = asuint(v.xyz);       return result; }
+  G_FmtArg G_Fmt(Vec4 v)      { G_FmtArg result;    result.kind = G_FmtArgKind_Float4;    result.v.xyzw = asuint(v.xyzw);     return result; }
+
+  G_FmtArg G_FmtEnd(void)     { G_FmtArg result;    result.kind = G_FmtArgKind_End;    return result; }
+
+  Struct(G_TempPrintBuffer)
+  {
+    // NOTE: The larger the array size, the longer the compilation time
+    u32 byte_chunks[64];
+    u32 bytes_count;
+    u32 chars_count;
+    u32 args_count;
+    b32 overflowed;
+  };
+
+  void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v)
+  {
+    u32 chunk_idx = buff.bytes_count / 4;
+    if (chunk_idx < countof(buff.byte_chunks))
+    {
+      u32 byte_idx_in_chunk = buff.bytes_count & 0x03;
+      if (byte_idx_in_chunk == 0)
+      {
+        // Since buff is not zero initialized, we set the chunk on first write here
+        buff.byte_chunks[chunk_idx] = v & 0xFF;
+      }
+      else
+      {
+        buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8);
+      }
+      buff.bytes_count += 1;
+    }
+    else
+    {
+      buff.overflowed = 1;
+    }
+  }
+
+  void G_CommitPrint(G_TempPrintBuffer buff)
+  {
+    RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef);
+
+    if (buff.overflowed)
+    {
+      buff.bytes_count = 0;
+      buff.chars_count = 0;
+      buff.args_count = 0;
+    }
+
+    u32 chunks_count = (buff.bytes_count + 3) / 4;
+    u32 alloc_size = 0;
+    alloc_size += 4;                 // Header
+    alloc_size += chunks_count * 4;  // Chunks
+
+    // Atomic fetch + add to base counter
+    u32 base;
+    rw.InterlockedAdd(0, alloc_size, base);
+    base += 4;  // Offset for allocation counter
+    base += 4;  // Offset for success counter
+    base += 4;  // Offset for overflow counter
+
+    if ((base + alloc_size) < countof(rw))
+    {
+      // Increment success counter
+      rw.InterlockedAdd(4, 1);
+      u32 pos = 0;
+
+      // Write header
+      {
+        u32 header = 0;
+        header |= (buff.chars_count <<  0) & 0x0000FFFF;
+        header |= (buff.args_count  << 16) & 0x7FFF0000;
+        header |= (buff.overflowed  << 31) & 0xF0000000;
+        rw.Store(base + pos, header);
+        pos += 4;
+      }
+
+      // Write chunks
+      for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
+      {
+        u32 chunk = buff.byte_chunks[chunk_idx];
+        rw.Store(base + pos, chunk);
+        pos += 4;
+      }
+    }
+    else
+    {
+      // Increment overflow counter
+      rw.InterlockedAdd(8, 1);
+    }
+  }
+
+  #define G_PrintF_(fmt, ...) do {                                        \
+    G_TempPrintBuffer __tmp;                                              \
+    __tmp.bytes_count = 0;                                                \
+    __tmp.overflowed = 0;                                                 \
+    u32 __char_idx = 0;                                                   \
+    while (U32FromChar(fmt[__char_idx]) != 0)                             \
+    {                                                                     \
+      G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx]));               \
+      ++__char_idx;                                                       \
+    }                                                                     \
+    G_FmtArg __args[] = { __VA_ARGS__ };                                  \
+    __tmp.chars_count = __char_idx;                                       \
+    __tmp.args_count = (countof(__args) - 1);                             \
+    for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx)    \
+    {                                                                     \
+      G_FmtArg __arg = __args[__arg_idx];                                 \
+      G_PushPrintByte(__tmp, __arg.kind);                                 \
+      if (__arg.kind > G_FmtArgKind_BEGINSIZE1)                           \
+      {                                                                   \
+        G_PushPrintByte(__tmp, __arg.v.x >>  0);                          \
+        G_PushPrintByte(__tmp, __arg.v.x >>  8);                          \
+        G_PushPrintByte(__tmp, __arg.v.x >> 16);                          \
+        G_PushPrintByte(__tmp, __arg.v.x >> 24);                          \
+      }                                                                   \
+      if (__arg.kind > G_FmtArgKind_BEGINSIZE2)                           \
+      {                                                                   \
+        G_PushPrintByte(__tmp, __arg.v.y >>  0);                          \
+        G_PushPrintByte(__tmp, __arg.v.y >>  8);                          \
+        G_PushPrintByte(__tmp, __arg.v.y >> 16);                          \
+        G_PushPrintByte(__tmp, __arg.v.y >> 24);                          \
+      }                                                                   \
+      if (__arg.kind > G_FmtArgKind_BEGINSIZE3)                           \
+      {                                                                   \
+        G_PushPrintByte(__tmp, __arg.v.z >>  0);                          \
+        G_PushPrintByte(__tmp, __arg.v.z >>  8);                          \
+        G_PushPrintByte(__tmp, __arg.v.z >> 16);                          \
+        G_PushPrintByte(__tmp, __arg.v.z >> 24);                          \
+      }                                                                   \
+      if (__arg.kind > G_FmtArgKind_BEGINSIZE4)                           \
+      {                                                                   \
+        G_PushPrintByte(__tmp, __arg.v.w >>  0);                          \
+        G_PushPrintByte(__tmp, __arg.v.w >>  8);                          \
+        G_PushPrintByte(__tmp, __arg.v.w >> 16);                          \
+        G_PushPrintByte(__tmp, __arg.v.w >> 24);                          \
+      }                                                                   \
+    }                                                                     \
+    G_CommitPrint(__tmp);                                                 \
+  } while (0)
+
+  #define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd())
+
+#else
+  #define G_PrintF(fmt)
+#endif