diff --git a/src/base/base.h b/src/base/base.h
index b511b45c..3ea85d4b 100644
--- a/src/base/base.h
+++ b/src/base/base.h
@@ -704,9 +704,10 @@
 #endif
 
 ////////////////////////////////////////////////////////////
-//~ Shader types
+//~ C <-> Shader interop types
 
 //- Shader linkage
+
 #if IsLanguageC
     Struct(VertexShader)    { ResourceKey resource; };
     Struct(PixelShader)     { ResourceKey resource; };
@@ -721,6 +722,7 @@
 #endif
 
 //- Shader resource handles
+
 Struct(StructuredBufferHandle)      { u32 v; };
 Struct(RWStructuredBufferHandle)    { u32 v; };
 Struct(Texture1DHandle)             { u32 v; };
diff --git a/src/base/base_gpu.h b/src/base/base_gpu.h
index bdc81ec3..807d6f35 100644
--- a/src/base/base_gpu.h
+++ b/src/base/base_gpu.h
@@ -110,3 +110,12 @@ Vec2 NdcFromPos(Vec2 pos, Vec2 size)
     result += Vec2(-1, 1);
     return result;
 }
+
+Vec2 NdcFromUv(Vec2 uv)
+{
+    Vec2 result;
+    result = uv;
+    result *= Vec2(2, -2);
+    result += Vec2(-1, 1);
+    return result;
+}
diff --git a/src/base/base_math.c b/src/base/base_math.c
index 998fde78..b192dde8 100644
--- a/src/base/base_math.c
+++ b/src/base/base_math.c
@@ -122,7 +122,7 @@ f64 ModF64(f64 x, f64 m)
 }
 
 ////////////////////////////////////////////////////////////
-//~ Floating point sign
+//~ Abs
 
 f32 AbsF32(f32 f)
 {
@@ -159,7 +159,9 @@ i64 SignF64(f64 f)
 }
 
 ////////////////////////////////////////////////////////////
-//~ U64 pow
+//~ Exponential ops
+
+//- Pow u64
 
 /* Taken from https://gist.github.com/orlp/3551590 */
 u64 PowU64(u64 base, u8 exp)
@@ -254,28 +256,7 @@ u64 PowU64(u64 base, u8 exp)
     }
 }
 
-////////////////////////////////////////////////////////////
-//~ Align up
-
-u64 AlignU64Pow2(u64 x)
-{
-    u64 result = 0;
-    if (x > 0)
-    {
-        result = x - 1;
-        result |= result >> 1;
-        result |= result >> 2;
-        result |= result >> 4;
-        result |= result >> 8;
-        result |= result >> 16;
-        result |= result >> 32;
-        ++result;
-    }
-    return result;
-}
-
-////////////////////////////////////////////////////////////
-//~ Logn
+//- Logn
 
 /* Based on FreeBSD's implementation
  * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */
@@ -369,8 +350,7 @@ f32 LnF32(f32 x)
     }
 }
 
-////////////////////////////////////////////////////////////
-//~ Exp
+//- Exp
 
 /* Based on FreeBSD's implementation
  * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */
@@ -481,8 +461,7 @@ f32 ExpF32(f32 x)
     }
 }
 
-////////////////////////////////////////////////////////////
-//~ Pow
+//- Pow
 
 f32 PowF32(f32 a, f32 b)
 {
@@ -499,8 +478,7 @@ f32 PowF32(f32 a, f32 b)
     }
 }
 
-////////////////////////////////////////////////////////////
-//~ Sqrt
+//- Sqrt
 
 f32 SqrtF32(f32 x)
 {
@@ -517,6 +495,34 @@ f32 RSqrtF32(f32 x)
     return IxRsqrtF32(x);
 }
 
+////////////////////////////////////////////////////////////
+//~ Align
+
+u64 AlignU64(u64 x, u64 align)
+{
+    align = MaxU64(align, 1);
+    u64 result = (x + (align - 1));
+    result -= result % align;
+    return result;
+}
+
+u64 AlignU64ToNextPow2(u64 x)
+{
+    u64 result = 0;
+    if (x > 0)
+    {
+        result = x - 1;
+        result |= result >> 1;
+        result |= result >> 2;
+        result |= result >> 4;
+        result |= result >> 8;
+        result |= result >> 16;
+        result |= result >> 32;
+        ++result;
+    }
+    return result;
+}
+
 ////////////////////////////////////////////////////////////
 //~ Trig
 
diff --git a/src/base/base_math.h b/src/base/base_math.h
index b7e3ffc2..77af776b 100644
--- a/src/base/base_math.h
+++ b/src/base/base_math.h
@@ -235,7 +235,6 @@ i64 SignF64(f64 f);
 //~ Exponential ops
 
 u64 PowU64(u64 base, u8 exp);
-u64 AlignU64Pow2(u64 x);
 f32 LnF32(f32 x);
 f32 ExpF32(f32 x);
 f32 PowF32(f32 a, f32 b);
@@ -243,6 +242,12 @@ f32 SqrtF32(f32 x);
 f64 SqrtF64(f64 x);
 f32 RSqrtF32(f32 x);
 
+////////////////////////////////////////////////////////////
+//~ Align
+
+u64 AlignU64(u64 x, u64 align);
+u64 AlignU64ToNextPow2(u64 x);
+
 ////////////////////////////////////////////////////////////
 //~ Trig
 
diff --git a/src/config.h b/src/config.h
index b25f9992..8a224e25 100644
--- a/src/config.h
+++ b/src/config.h
@@ -70,7 +70,7 @@
 #define FLOOD_DEBUG 0
 
 #define GPU_DEBUG 1
-#define GPU_DEBUG_VALIDATION 0
+#define GPU_DEBUG_VALIDATION 1
 
 /* If virtual fibers are enabled, each fiber will get its own OS thread,
  * and fiber suspend/resume will be emulated using OS thread primitives.
diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c
index 67533d50..6ad27090 100644
--- a/src/gpu/gpu_common.c
+++ b/src/gpu/gpu_common.c
@@ -7,11 +7,38 @@ void GPU_StartupCommon(void)
 {
     GPU_SharedUtilState *g = &GPU_shared_util_state;
 
-    // GPU_ArenaHandle gpu_perm = GPU_PermArena();
+    GPU_ArenaHandle gpu_perm = GPU_PermArena();
+
+    /* Init point sampler */
+    {
+        GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
+        g->pt_sampler = GPU_PushSamplerStateHandle(gpu_perm, pt_sampler);
+    }
+
+    GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
+    {
+        /* Init quad index buffer */
+        {
+            u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
+            GPU_ResourceHandle quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data));
+            GPU_CopyCpuBytes(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
+            g->quad_indices.resource = quad_indices;
+            g->quad_indices.index_size = sizeof(quad_data[0]);
+            g->quad_indices.index_count = countof(quad_data);
+        }
+
+        /* TODO: Init noise texture */
+        {
+        }
+    }
+    GPU_CommitCommandList(cl);
+
+    GPU_SyncAllQueues(GPU_QueueKind_Direct);
+
+
+
+
 
-    // /* Init point sampler */
-    // GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
-    // g->pt_sampler = GPU_PushSamplerPtr(gpu_perm, pt_sampler);
 
     // GPU_CommandListHandle cl = GPU_PrepareCommandList();
     // {
diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h
index 2c5cc2e8..0b777f2a 100644
--- a/src/gpu/gpu_core.h
+++ b/src/gpu/gpu_core.h
@@ -326,7 +326,7 @@ Struct(GPU_TextureDesc)
     GPU_Format format;
     Vec3I32 dims;
     GPU_Layout initial_layout;
-    i32 mip_levels;  /* Will be clamped to range [1, max] */
+    i32 mip_levels;  /* Will be clamped to range [1, inf) */
     Vec4 clear_color;
 };
 
@@ -560,10 +560,13 @@ SamplerStateHandle          GPU_PushSamplerStateHandle          (GPU_ArenaHandle
 
 //- Count
 
-u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer);
-u64 GPU_Count1D(GPU_ResourceHandle texture1d);
-u64 GPU_Count2D(GPU_ResourceHandle texture2d);
-u64 GPU_Count3D(GPU_ResourceHandle texture3d);
+u64     GPU_CountBufferBytes(GPU_ResourceHandle buffer);
+i32     GPU_Count1D(GPU_ResourceHandle texture);
+Vec2I32 GPU_Count2D(GPU_ResourceHandle texture);
+Vec3I32 GPU_Count3D(GPU_ResourceHandle texture);
+i32     GPU_CountWidth(GPU_ResourceHandle texture);
+i32     GPU_CountHeight(GPU_ResourceHandle texture);
+i32     GPU_CountDepth(GPU_ResourceHandle texture);
 
 #define GPU_CountBuffer(buffer, type) GPU_CountBufferSize(buffer) / sizeof(type)
 
@@ -572,10 +575,10 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d);
 
 //- Command list
 
-GPU_CommandListHandle GPU_PrepareCommandList(void);
-void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops);
+GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue);
+void GPU_CommitCommandListEx(GPU_CommandListHandle cl, u64 fence_ops_count, GPU_FenceOp *fence_ops);
 
-#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0)
+#define GPU_CommitCommandList(cl) GPU_CommitCommandListEx((cl), 0, 0)
 
 //- Arena
 
@@ -605,42 +608,42 @@ void GPU_SetConstant_(GPU_CommandListHandle cl, i32 slot, void *src_32bit, u32 s
 
 void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
 
-#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next)           \
-    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                            \
-        .resource = (_resource),                                                                        \
-        .sync_prev = _sync_prev,                                                                        \
-        .sync_next = _sync_next,                                                                        \
-        .access_prev = _access_prev,                                                                    \
-        .access_next = _access_next,                                                                    \
+#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next)                 \
+    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                                  \
+        .resource = (_resource),                                                                              \
+        .sync_prev = _sync_prev,                                                                              \
+        .sync_next = _sync_next,                                                                              \
+        .access_prev = _access_prev,                                                                          \
+        .access_next = _access_next,                                                                          \
     })
 
-#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next)                \
-    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                            \
-        .is_global = 1,                                                                                 \
-        .sync_prev = _sync_prev,                                                                        \
-        .sync_next = _sync_next,                                                                        \
-        .access_prev = _access_prev,                                                                    \
-        .access_next = _access_next,                                                                    \
+#define GPU_MemoryLayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout)  \
+    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                                  \
+        .resource = (_resource),                                                                              \
+        .sync_prev = _sync_prev,                                                                              \
+        .sync_next = _sync_next,                                                                              \
+        .access_prev = _access_prev,                                                                          \
+        .access_next = _access_next,                                                                          \
+        .layout = _layout,                                                                                    \
     })
 
-#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout)  \
-    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                            \
-        .resource = (_resource),                                                                        \
-        .sync_prev = _sync_prev,                                                                        \
-        .sync_next = _sync_next,                                                                        \
-        .access_prev = _access_prev,                                                                    \
-        .access_next = _access_next,                                                                    \
-        .layout = _layout,                                                                              \
+#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next)                      \
+    GPU_BarrierEx((_cl), (GPU_BarrierDesc) {                                                                  \
+        .is_global = 1,                                                                                       \
+        .sync_prev = _sync_prev,                                                                              \
+        .sync_next = _sync_next,                                                                              \
+        .access_prev = _access_prev,                                                                          \
+        .access_next = _access_next,                                                                          \
     })
 
-#define GPU_DumbMemoryBarrier(_cl, _resource) \
-    GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
+#define GPU_DumbMemoryBarrier(cl, resource) \
+    GPU_MemoryBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
 
-#define GPU_DumbGlobalMemoryBarrier(_cl) \
-    GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
+#define GPU_DumbMemoryLayoutBarrier(cl, resource, layout) \
+    GPU_MemoryLayoutBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (layout))
 
-#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
-    GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
+#define GPU_DumbGlobalMemoryBarrier(cl) \
+    GPU_GlobalMemoryBarrier((cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
 
 //- Compute
 
@@ -648,12 +651,29 @@ void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
 
 //- Rasterize
 
-void GPU_Rasterize(GPU_CommandListHandle cl,
-                   VertexShader vs, PixelShader ps,
-                   u32 instances_count, GPU_IndexBufferDesc index_buffer,
-                   u32 render_targets_count, GPU_ResourceHandle *render_targets,
-                   Rng3 viewport, Rng2 scissor,
-                   GPU_RasterMode mode);
+void GPU_RasterizeEx(GPU_CommandListHandle cl,
+                     VertexShader vs, PixelShader ps,
+                     u32 instances_count, GPU_IndexBufferDesc index_buffer,
+                     u32 render_targets_count, GPU_ResourceHandle *render_targets,
+                     Rng3 viewport, Rng2 scissor,
+                     GPU_RasterMode mode);
+
+#define GPU_Rasterize(cl, vs, ps, instances_count, index_buffer, render_target, mode)  \
+    GPU_RasterizeEx(                                                                   \
+        (cl),                                                                          \
+        (vs), (ps),                                                                    \
+        (instances_count), (index_buffer),                                             \
+        1, &(render_target),                                                           \
+        RNG3(                                                                          \
+            VEC3(0, 0, 0),                                                             \
+            VEC3(GPU_CountWidth(render_target), GPU_CountHeight(render_target), 1)     \
+        ),                                                                             \
+        RNG2(                                                                          \
+            VEC2(0, 0),                                                                \
+            Vec2FromVec(GPU_Count2D(render_target))                                    \
+        ),                                                                             \
+        (mode)                                                                         \
+    )
 
 //- Clear
 
@@ -663,6 +683,15 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_t
 
 void GPU_ProfN(GPU_CommandListHandle cl, String name);
 
+////////////////////////////////////////////////////////////
+//~ @hookdecl Synchronization
+
+/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
+void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue);
+
+/* All queues will block until `completion_queue` completes all submitted commands */
+void GPU_SyncAllQueues(GPU_QueueKind completion_queue);
+
 ////////////////////////////////////////////////////////////
 //~ @hookdecl Statistics
 
diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c
index fc711b2c..d90e7c23 100644
--- a/src/gpu/gpu_dx12/gpu_dx12.c
+++ b/src/gpu/gpu_dx12/gpu_dx12.c
@@ -62,7 +62,7 @@ void GPU_Startup(void)
         {
             __profn("Create device");
             IDXGIAdapter3 *adapter = 0;
-            ID3D12Device *device = 0;
+            ID3D12Device10 *device = 0;
             String error = Lit("Could not initialize GPU device.");
             String first_gpu_name = ZI;
             u32 adapter_index = 0;
@@ -86,7 +86,7 @@ void GPU_Startup(void)
                          * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
                          * - EnhancedBarriersSupported == 1
                          */
-                        hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
+                        hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
                     }
                     if (SUCCEEDED(hr) && !skip)
                     {
@@ -769,7 +769,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
     if (desc.kind == GPU_ResourceKind_Buffer)
     {
         desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
-        buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
+        buffer_size = MaxU64(AlignU64ToNextPow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
     }
 
     u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
@@ -1137,9 +1137,13 @@ GPU_ArenaHandle GPU_AcquireArena(void)
     GPU_D12_Arena *gpu_arena = 0;
     {
         Arena *perm = PermArena();
+        PushAlign(perm, CachelineSize);
         gpu_arena = PushStruct(perm, GPU_D12_Arena);
+        PushAlign(perm, CachelineSize);
     }
-    return (GPU_ArenaHandle) { .v = (u64)gpu_arena };
+    gpu_arena->arena = AcquireArena(Gibi(1));
+
+    return GPU_D12_HandleFromPointer(GPU_ArenaHandle, gpu_arena);
 }
 
 void GPU_ReleaseArena(GPU_ArenaHandle arena)
@@ -1238,19 +1242,109 @@ GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_
 GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc)
 {
     /* TODO */
-    return (GPU_ResourceHandle) { 0 };
+    return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
 }
 
-GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc)
+GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena_handle, GPU_TextureDesc desc)
 {
-    /* TODO */
-    return (GPU_ResourceHandle) { 0 };
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
+    D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout);
+
+    /* Create resource heap */
+    if (!gpu_arena->d3d_resource_heap)
+    {
+        /* FIXME: Dynamic size */
+        D3D12_HEAP_DESC d3d_desc = ZI;
+        d3d_desc.SizeInBytes = Mebi(64);
+        d3d_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES;  /* TODO: Remove this and support tier 1 resource heaps */
+        d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
+
+        ID3D12Heap *heap = 0;
+        HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap);
+        if (!SUCCEEDED(hr))
+        {
+            /* TODO: Don't panic */
+            Panic(Lit("Failed to create D3D12 resource heap"));
+        }
+
+        gpu_arena->d3d_resource_heap = heap;
+        gpu_arena->heap_size = d3d_desc.SizeInBytes;
+    }
+
+    ID3D12Resource *d3d_resource = 0;
+    {
+        D3D12_RESOURCE_DESC1 d3d_desc = ZI;
+        d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
+                                          GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
+                                          D3D12_RESOURCE_DIMENSION_TEXTURE3D;
+        d3d_desc.Width = MaxI32(desc.dims.x, 1);
+        d3d_desc.Height = MaxI32(desc.dims.y, 1);
+        d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1);
+        d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1);
+        d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format);
+        d3d_desc.SampleDesc.Count      = 1;
+        d3d_desc.SampleDesc.Quality    = 0;
+        d3d_desc.Flags                |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite);
+        d3d_desc.Flags                |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET    * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget);
+        d3d_desc.Flags                |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL    * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil);
+
+        u64 alloc_size = 0;
+        u64 alloc_align = 0;
+        {
+            D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI;
+            ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
+            alloc_size = alloc_info.SizeInBytes;
+            alloc_align = alloc_info.Alignment;
+        }
+
+        u64 alloc_pos = gpu_arena->heap_pos;
+        alloc_pos = AlignU64(alloc_pos, alloc_align);
+        gpu_arena->heap_pos = alloc_pos + alloc_size;
+
+        if (alloc_pos + alloc_size > gpu_arena->heap_size)
+        {
+            Panic(Lit("Gpu arena overflow"));
+        }
+
+        D3D12_CLEAR_VALUE clear_value = {
+            .Color[0] = desc.clear_color.x,
+            .Color[1] = desc.clear_color.y,
+            .Color[2] = desc.clear_color.z,
+            .Color[3] = desc.clear_color.w,
+            .Format = d3d_desc.Format
+        };
+
+        HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device,
+                                                          gpu_arena->d3d_resource_heap,
+                                                          alloc_pos,
+                                                          &d3d_desc,
+                                                          initial_layout,
+                                                          (d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
+                                                          0,
+                                                          0,
+                                                          &IID_ID3D12Resource,
+                                                          (void **)&d3d_resource);
+    }
+
+    GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
+    resource->d3d_resource = d3d_resource;
+    resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
+    resource->flags = desc.flags;
+
+    resource->is_texture = 1;
+    resource->texture_format = desc.format;
+    resource->texture_dims = desc.dims;
+    resource->texture_mip_levels = desc.mip_levels;
+    resource->texture_layout = initial_layout;
+
+    return GPU_D12_HandleFromPointer(GPU_ResourceHandle, resource);
 }
 
 GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc)
 {
     /* TODO */
-    return (GPU_ResourceHandle) { 0 };
+    return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
 }
 
 b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
@@ -1263,81 +1357,103 @@ b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
 StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
 {
     /* TODO */
-    return (StructuredBufferHandle) { 0 };
+    return GPU_D12_HandleFromPointer(StructuredBufferHandle, 0);
 }
 
 RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
 {
     /* TODO */
-    return (RWStructuredBufferHandle) { 0 };
+    return GPU_D12_HandleFromPointer(RWStructuredBufferHandle, 0);
 }
 
 Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (Texture1DHandle) { 0 };
+    return GPU_D12_HandleFromPointer(Texture1DHandle, 0);
 }
 
 RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (RWTexture1DHandle) { 0 };
+    return GPU_D12_HandleFromPointer(RWTexture1DHandle, 0);
 }
 
 Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (Texture2DHandle) { 0 };
+    return GPU_D12_HandleFromPointer(Texture2DHandle, 0);
 }
 
-RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
+RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
 {
-    /* TODO */
-    return (RWTexture2DHandle) { 0 };
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle);
+    GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav);
+    ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle);
+    return GPU_D12_HandleFromPointer(RWTexture2DHandle, descriptor->index);
 }
 
 Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (Texture3DHandle) { 0 };
+    return GPU_D12_HandleFromPointer(Texture3DHandle, 0);
 }
 
 RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (RWTexture3DHandle) { 0 };
+    return GPU_D12_HandleFromPointer(RWTexture3DHandle, 0);
 }
 
 SamplerStateHandle GPU_PushSamplerStateHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
 {
     /* TODO */
-    return (SamplerStateHandle) { 0 };
+    return GPU_D12_HandleFromPointer(SamplerStateHandle, 0);
 }
 
 //- Count
 
 u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer)
 {
-    /* TODO */
-    return 0;
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer);
+    return resource->buffer_size;
 }
 
-u64 GPU_Count1D(GPU_ResourceHandle texture1d)
+i32 GPU_Count1D(GPU_ResourceHandle texture)
 {
-    /* TODO */
-    return 0;
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return resource->texture_dims.x;
 }
 
-u64 GPU_Count2D(GPU_ResourceHandle texture2d)
+Vec2I32 GPU_Count2D(GPU_ResourceHandle texture)
 {
-    /* TODO */
-    return 0;
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return VEC2I32(resource->texture_dims.x, resource->texture_dims.y);
 }
 
-u64 GPU_Count3D(GPU_ResourceHandle texture3d)
+Vec3I32 GPU_Count3D(GPU_ResourceHandle texture)
 {
-    /* TODO */
-    return 0;
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return resource->texture_dims;
+}
+
+i32 GPU_CountWidth(GPU_ResourceHandle texture)
+{
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return resource->texture_dims.x;
+}
+
+i32 GPU_CountHeight(GPU_ResourceHandle texture)
+{
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return resource->texture_dims.y;
+}
+
+i32 GPU_CountDepth(GPU_ResourceHandle texture)
+{
+    GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
+    return resource->texture_dims.z;
 }
 
 ////////////////////////////////////////////////////////////
@@ -1399,12 +1515,210 @@ GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v)
     return cmd;
 }
 
+GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size)
+{
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    GPU_QueueKind queue_kind = cl->queue_kind;
+    GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
+    GPU_D12_StagingRegionNode *result = 0;
+
+    Lock lock = LockE(&queue->staging_mutex);
+    {
+        GPU_D12_StagingHeap *heap = queue->staging_heap;
+        i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
+
+        /* Find first completed region with matching size.
+         * For each region in heap:
+         *   - If region size > size, split off a smaller region & use it
+         *
+         *   - If region size < size, try to merge with next completed region
+         *
+         *   - If no available completed region with eligible size, queue the
+         *     current heap for deletion & create a new heap
+         *     with larger size
+         */
+
+
+
+
+        /* FIXME: Region completion target should be atomic, and initialized to
+         * u64/i64 max until cl submission actually sets value */
+
+        /* Find region with large enough size */
+        GPU_D12_StagingRegionNode *match = 0;
+        if (heap && heap->size >= size)
+        {
+            GPU_D12_StagingRegionNode *r = heap->head_region_node;
+            for (;;)
+            {
+                b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
+                if (is_completed)
+                {
+                    GPU_D12_StagingRegionNode *next = r->next;
+                    u64 region_size = 0;
+                    if (next->pos > r->pos)
+                    {
+                        region_size = next->pos - r->pos;
+                    }
+                    else
+                    {
+                        region_size = heap->size - r->pos;
+                    }
+
+                    if (region_size < size)
+                    {
+                        GPU_D12_StagingRegionNode *prev = r->prev;
+                        b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target);
+                        if (prev_is_completed && prev->pos < r->pos)
+                        {
+                            /* Merge with previous region & retry */
+                            prev->next = next;
+                            SllStackPush(heap->first_free_region_node, r);
+                            r = prev;
+                        }
+                        else
+                        {
+                            /* Continue to next region */
+                            r = next;
+                        }
+                    }
+                    else
+                    {
+                        /* Found matching region */
+                        match = r;
+                        break;
+                    }
+                }
+                else
+                {
+                    /* No large-enough completed region found */
+                    break;
+                }
+            }
+        }
+
+        /* Create new heap if no match found */
+        if (!match)
+        {
+            /* Queue old heap for deletion */
+            u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
+            if (heap)
+            {
+                /* FIXME: Queue for deletion here */
+                new_heap_size = MaxU64(new_heap_size, heap->size * 2);
+                heap = 0;
+            }
+
+            /* Create new heap */
+            {
+                Arena *arena = AcquireArena(Gibi(1));
+                heap = PushStruct(arena, GPU_D12_StagingHeap);
+                heap->arena = arena;
+                heap->size = new_heap_size;
+
+                /* Create backing upload heap resource */
+                ID3D12Resource *d3d_resource = 0;
+                {
+                    D3D12_RESOURCE_DESC d3d_desc = ZI;
+                    d3d_desc.Dimension          = D3D12_RESOURCE_DIMENSION_BUFFER;
+                    d3d_desc.Layout             = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+                    d3d_desc.Format             = DXGI_FORMAT_UNKNOWN;
+                    d3d_desc.Alignment          = 0;
+                    d3d_desc.Width              = new_heap_size;
+                    d3d_desc.Height             = 1;
+                    d3d_desc.DepthOrArraySize   = 1;
+                    d3d_desc.MipLevels          = 1;
+                    d3d_desc.SampleDesc.Count   = 1;
+                    d3d_desc.SampleDesc.Quality = 0;
+
+                    D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
+                    HRESULT hr = ID3D12Device_CreateCommittedResource(g->device,
+                                                                      &heap_props,
+                                                                      D3D12_HEAP_FLAG_CREATE_NOT_ZEROED,
+                                                                      &d3d_desc,
+                                                                      D3D12_RESOURCE_STATE_COMMON,
+                                                                      0,
+                                                                      &IID_ID3D12Resource,
+                                                                      (void **)&d3d_resource);
+                    if (!SUCCEEDED(hr))
+                    {
+                        /* TODO: Don't panic */
+                        Panic(Lit("Failed to create upload heap"));
+                    }
+                }
+                heap->resource.d3d_resource = d3d_resource;
+                heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
+                heap->resource.buffer_size = new_heap_size;
+                heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
+
+                /* Map */
+                {
+                    D3D12_RANGE read_range = ZI;
+                    HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
+                    if (!SUCCEEDED(hr))
+                    {
+                        /* TODO: Don't panic */
+                        Panic(Lit("Failed to map upload heap"));
+                    }
+                }
+            }
+
+            /* Create initial region */
+            match = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
+            match->heap = heap;
+            match->next = match;
+            match->prev = match;
+            heap->head_region_node = match;
+        }
+
+        /* Split extra region space */
+        {
+            GPU_D12_StagingRegionNode *next = match->next;
+            u64 region_size = 0;
+            if (next->pos > match->pos)
+            {
+                region_size = next->pos - match->pos;
+            }
+            else
+            {
+                region_size = heap->size - match->pos;
+            }
+
+            if (region_size > size)
+            {
+                GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node;
+                if (new_next)
+                {
+                    SllStackPop(heap->first_free_region_node);
+                }
+                else
+                {
+                    new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
+                }
+                new_next->next = next;
+                new_next->prev = match;
+                next->prev = new_next;
+                match->next = new_next;
+
+                new_next->heap = heap;
+                new_next->pos = match->pos + size;
+            }
+        }
+
+        Atomic64Set(&match->completion_target, I64Max);
+        result = match;
+    }
+    Unlock(&lock);
+
+    return result;
+}
+
 ////////////////////////////////////////////////////////////
 //~ @hookimpl Command
 
 //- Command list
 
-GPU_CommandListHandle GPU_PrepareCommandList(void)
+GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue)
 {
     GPU_D12_SharedState *g = &GPU_D12_shared_state;
     GPU_D12_CmdList *cl = 0;
@@ -1423,13 +1737,16 @@ GPU_CommandListHandle GPU_PrepareCommandList(void)
         }
     }
     Unlock(&lock);
-    return (GPU_CommandListHandle) { .v = (u64)cl };
+    cl->queue_kind = queue;
+
+    return GPU_D12_HandleFromPointer(GPU_CommandListHandle, cl);
 }
 
-void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops)
+void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops)
 {
     GPU_D12_SharedState *g = &GPU_D12_shared_state;
     GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
+    GPU_QueueKind queue_kind = cl->queue_kind;
     GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
     TempArena scratch = BeginScratchNoConflict();
 
@@ -1849,8 +2166,9 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
                         u32 indices_count = 0;
                         D3D12_INDEX_BUFFER_VIEW ibv = ZI;
                         {
+                            GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
+                            if (desc.index_count > 0)
                             {
-                                GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
                                 GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource);
                                 ibv.BufferLocation = index_buffer_resource->buffer_gpu_address;
                                 ibv.SizeInBytes = desc.index_size * desc.index_count;
@@ -2057,16 +2375,27 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
 
 //- Arena
 
-void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena)
+void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle)
 {
+    GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
+
     /* TODO */
+
+    /* FIXME: Move descriptors into committed lists */
+
+    /* FIXME: Release id3d12 resource com object references */
+    gpu_arena->heap_pos = 0;
 }
 
 //- Cpu -> Gpu copy
 
-void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range)
+void GPU_CopyCpuBytes(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
 {
-    /* TODO */
+    GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
+    u64 size = src_copy_range.max - src_copy_range.min;
+    GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, size);
+    CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, size);
+    GPU_CopyBytes(cl_handle, dst_handle, dst_offset, GPU_D12_HandleFromPointer(GPU_ResourceHandle, &region->heap->resource), RNGU64(region->pos, region->pos + size));
 }
 
 void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range)
@@ -2180,12 +2509,12 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou
 
 //- Rasterize
 
-void GPU_Rasterize(GPU_CommandListHandle cl_handle,
-                   VertexShader vs, PixelShader ps,
-                   u32 instances_count, GPU_IndexBufferDesc index_buffer,
-                   u32 render_targets_count, GPU_ResourceHandle *render_targets,
-                   Rng3 viewport, Rng2 scissor,
-                   GPU_RasterMode mode)
+void GPU_RasterizeEx(GPU_CommandListHandle cl_handle,
+                     VertexShader vs, PixelShader ps,
+                     u32 instances_count, GPU_IndexBufferDesc index_buffer,
+                     u32 render_targets_count, GPU_ResourceHandle *render_targets,
+                     Rng3 viewport, Rng2 scissor,
+                     GPU_RasterMode mode)
 {
     GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
     GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
@@ -2221,6 +2550,19 @@ void GPU_ProfN(GPU_CommandListHandle cl, String name)
     /* TODO */
 }
 
+////////////////////////////////////////////////////////////
+//~ @hookimpl Synchronization
+
+void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue)
+{
+    /* TODO */
+}
+
+void GPU_SyncAllQueues(GPU_QueueKind completion_queue)
+{
+    /* TODO */
+}
+
 ////////////////////////////////////////////////////////////
 //~ @hookimpl Map hooks
 
@@ -2315,7 +2657,7 @@ GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window)
         swapchain = PushStruct(perm, GPU_D12_Swapchain);
     }
     swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window);
-    return (GPU_SwapchainHandle) { .v = (u64)swapchain };
+    return GPU_D12_HandleFromPointer(GPU_SwapchainHandle, swapchain);
 }
 
 void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle)
@@ -2483,7 +2825,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
         cur_backbuffer = &swapchain->backbuffers[backbuffer_idx];
     }
 
-    return (GPU_ResourceHandle) { .v = (u64)cur_backbuffer };
+    return GPU_D12_HandleFromPointer(GPU_ResourceHandle, cur_backbuffer);
 }
 
 void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync)
diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h
index 8f1efa99..af6b92d9 100644
--- a/src/gpu/gpu_dx12/gpu_dx12.h
+++ b/src/gpu/gpu_dx12/gpu_dx12.h
@@ -104,7 +104,18 @@ Struct(GPU_D12_DescriptorList)
 
 Struct(GPU_D12_Arena)
 {
+    Arena *arena;
     GPU_D12_DescriptorList committed_descriptors_by_heap_and_queue[GPU_D12_DescriptorHeapKind_Count][GPU_NumQueues];
+
+    /* TODO:
+     * To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for:
+     * - Buffers
+     * - Non-render target & non-depth stencil textures
+     * - Render target or depth stencil textures
+     */
+    ID3D12Heap *d3d_resource_heap;
+    u64 heap_pos;
+    u64 heap_size;
 };
 
 ////////////////////////////////////////////////////////////
@@ -132,6 +143,35 @@ Struct(GPU_D12_Resource)
     struct GPU_D12_Swapchain *swapchain;
 };
 
+////////////////////////////////////////////////////////////
+//~ Staging types
+
+Struct(GPU_D12_StagingHeap)
+{
+    Arena *arena;
+
+    GPU_D12_Resource resource;
+    void *mapped;
+    u64 size;
+
+    struct GPU_D12_StagingRegionNode *head_region_node;
+    struct GPU_D12_StagingRegionNode *first_free_region_node;
+
+};
+
+Struct(GPU_D12_StagingRegionNode)
+{
+    GPU_D12_StagingHeap *heap;
+
+    /* Heap links (requires heap lock to read) */
+    GPU_D12_StagingRegionNode *prev;
+    GPU_D12_StagingRegionNode *next;
+
+    /* Region info */
+    Atomic64 completion_target;
+    u64 pos;
+};
+
 ////////////////////////////////////////////////////////////
 //~ Command queue types
 
@@ -149,9 +189,15 @@ Struct(GPU_D12_Queue)
     Mutex commit_mutex;
     ID3D12Fence *commit_fence;
     u64 commit_fence_target;
+
+    /* Raw command lists */
     struct GPU_D12_RawCommandList *first_committed_cl;
     struct GPU_D12_RawCommandList *last_committed_cl;
 
+    /* Staging heap */
+    Mutex staging_mutex;
+    GPU_D12_StagingHeap *staging_heap;
+
     Fence sync_fence;
 };
 
@@ -264,6 +310,7 @@ Struct(GPU_D12_CmdChunk)
 
 Struct(GPU_D12_CmdList)
 {
+    GPU_QueueKind queue_kind;
     GPU_D12_CmdList *next;
 
     GPU_D12_CmdChunk *first_cmd_chunk;
@@ -332,12 +379,14 @@ Struct(GPU_D12_SharedState)
     /* Device */
     IDXGIFactory6 *factory;
     IDXGIAdapter3 *adapter;
-    ID3D12Device *device;
+    ID3D12Device10 *device;
 } extern GPU_D12_shared_state;
 
 ////////////////////////////////////////////////////////////
 //~ Helpers
 
+#define GPU_D12_HandleFromPointer(type, ptr) (type) { .v = (u64)(ptr) }
+
 GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
 GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
 GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
@@ -376,6 +425,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl);
 
 GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl);
 GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v);
+GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size);
 
 ////////////////////////////////////////////////////////////
 //~ Sync job
diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c
index 8204eb20..aa7274d6 100644
--- a/src/pp/pp_vis/pp_vis_core.c
+++ b/src/pp/pp_vis/pp_vis_core.c
@@ -33,9 +33,9 @@ JobImpl(V_VisWorker, _, __)
     Arena *frame_arena = AcquireArena(Gibi(64));
     Arena *perm = PermArena();
 
-    GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable);
-    GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
-    GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
+    GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mebi(8), GPU_CpuAccessFlag_Writable);
+    GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
+    GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
 
     //////////////////////////////
     //- State
diff --git a/src/proto/proto.c b/src/proto/proto.c
index f92b9a75..cb3fb713 100644
--- a/src/proto/proto.c
+++ b/src/proto/proto.c
@@ -1,5 +1,5 @@
-JobDecl(PR_RunForever, EmptySig);
-JobImpl(PR_RunForever, _sig, _id)
+JobDecl(PT_RunForever, EmptySig);
+JobImpl(PT_RunForever, _sig, _id)
 {
     GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena();
 
@@ -28,7 +28,7 @@ JobImpl(PR_RunForever, _sig, _id)
 
             GPU_ResourceHandle backbuffer = GPU_PrepareBackbuffer(swapchain, GPU_Format_R16G16B16A16_Float, window_frame.draw_size);
             {
-                GPU_CommandListHandle cl = GPU_PrepareCommandList();
+                GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
                 {
                     /* Push resources */
                     Vec2I32 final_target_size = window_frame.draw_size;
@@ -36,43 +36,44 @@ JobImpl(PR_RunForever, _sig, _id)
                                                                         GPU_Format_R16G16B16A16_Float,
                                                                         final_target_size,
                                                                         GPU_Layout_DirectQueue_ShaderReadWrite,
-                                                                        .flags = GPU_ResourceFlag_AllowShaderReadWrite | GPU_ResourceFlag_AllowRenderTarget);
+                                                                        .flags = GPU_ResourceFlag_AllowShaderReadWrite);
 
                     /* Push resource handles */
+                    Texture2DHandle final_target_rhandle = GPU_PushTexture2DHandle(gpu_frame_arena, final_target);
                     RWTexture2DHandle final_target_rwhandle = GPU_PushRWTexture2DHandle(gpu_frame_arena, final_target);
 
                     /* Prep test pass */
                     {
-                        GPU_SetConstant(cl, PR_ShaderConst_TestTarget,   final_target_rwhandle);
-                        GPU_SetConstant(cl, PR_ShaderConst_TestConst,    3.123);
+                        GPU_SetConstant(cl, PT_ShaderConst_TestTarget,   final_target_rwhandle);
+                        GPU_SetConstant(cl, PT_ShaderConst_TestConst,    3.123);
+                        GPU_SetConstant(cl, PT_ShaderConst_BlitSampler,  GPU_GetCommonPointSampler());
+                        GPU_SetConstant(cl, PT_ShaderConst_BlitSrc,      final_target_rhandle);
                     }
 
                     /* Test pass */
                     {
-                        GPU_Compute(cl, PR_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
+                        GPU_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
                     }
-
                     GPU_DumbMemoryBarrier(cl, final_target);
 
-                    /* Prep clear pass */
+                    /* Prep blit pass */
                     {
-                        GPU_LayoutBarrier(cl, backbuffer,
-                                          GPU_Stage_None,         GPU_Access_None,
-                                          GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
-                                          GPU_Layout_DirectQueue_RenderTargetWrite);
+                        GPU_DumbMemoryLayoutBarrier(cl, final_target, GPU_Layout_DirectQueue_ShaderRead);
+                        GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_DirectQueue_RenderTargetWrite);
                     }
 
-                    /* Clear pass */
+                    /* Blit pass */
                     {
+                        GPU_Rasterize(cl,
+                                      PT_BlitVS, PT_BlitPS,
+                                      1, GPU_GetCommonQuadIndices(),
+                                      backbuffer, GPU_RasterMode_TriangleList);
                         GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1));
                     }
 
                     /* Finalize backbuffer layout */
                     {
-                        GPU_LayoutBarrier(cl, backbuffer,
-                                          GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
-                                          GPU_Stage_None,         GPU_Access_None,
-                                          GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
+                        GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
                     }
 
                     /* Reset */
@@ -80,7 +81,7 @@ JobImpl(PR_RunForever, _sig, _id)
                         GPU_ResetArena(cl, gpu_frame_arena);
                     }
                 }
-                GPU_CommitCommandList(cl, GPU_QueueKind_Direct);
+                GPU_CommitCommandList(cl);
             }
             GPU_CommitBackbuffer(backbuffer, VSYNC);
         }
@@ -88,8 +89,8 @@ JobImpl(PR_RunForever, _sig, _id)
     }
 }
 
-void PR_Startup(void);
-void PR_Startup(void)
+void PT_Startup(void);
+void PT_Startup(void)
 {
-    RunJob(PR_RunForever);
+    RunJob(PT_RunForever);
 }
diff --git a/src/proto/proto.lay b/src/proto/proto.lay
index 35e3a08d..097eb871 100644
--- a/src/proto/proto.lay
+++ b/src/proto/proto.lay
@@ -13,7 +13,9 @@
 @IncludeGpu proto_shaders.gpu
 
 //- Shaders
-@ComputeShader PR_TestCS
+@ComputeShader PT_TestCS
+@VertexShader PT_BlitVS
+@PixelShader PT_BlitPS
 
 //- Startup
-@Startup PR_Startup
+@Startup PT_Startup
diff --git a/src/proto/proto_shaders.gpu b/src/proto/proto_shaders.gpu
index c28e9a8e..1765df08 100644
--- a/src/proto/proto_shaders.gpu
+++ b/src/proto/proto_shaders.gpu
@@ -6,11 +6,11 @@ Struct(TestStruct)
     i32 i;
 };
 
-ComputeShader2D(PR_TestCS, 8, 8)
+ComputeShader2D(PT_TestCS, 8, 8)
 {
-    StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff);
+    StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PT_ShaderConst_TestBuff);
 
-    RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget);
+    RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PT_ShaderConst_TestTarget);
     Vec2U32 target_tex_size = Count2D(target_tex);
 
     Vec2I32 id = SV_DispatchThreadID;
@@ -19,3 +19,46 @@ ComputeShader2D(PR_TestCS, 8, 8)
         target_tex[id] = Vec4(0, 1, 0, 1);
     }
 }
+
+////////////////////////////////////////////////////////////
+//~ Blit shader
+
+Struct(PT_BlitPSInput)
+{
+    Semantic(Vec4, SV_Position);
+    Semantic(Vec2, src_uv);
+};
+
+Struct(PT_BlitPSOutput)
+{
+    Semantic(Vec4, SV_Target0);
+};
+
+//////////////////////////////
+//- Vertex shader
+
+VertexShader(PT_BlitVS, PT_BlitPSInput)
+{
+    Vec2 uv = RectUvFromVertexId(SV_VertexID);
+
+    PT_BlitPSInput result;
+    result.SV_Position = Vec4(NdcFromUv(uv).xy, 0, 1);
+    result.src_uv = uv;
+    return result;
+}
+
+//////////////////////////////
+//- Pixel shader
+
+PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
+{
+    SamplerState sampler = SamplerStateFromHandle(PT_ShaderConst_BlitSampler);
+    Texture2D<Vec4> tex = Texture2DFromHandle<Vec4>(PT_ShaderConst_BlitSrc);
+
+    Vec2 uv = input.src_uv;
+    Vec4 result = tex.Sample(sampler, uv);
+
+    PT_BlitPSOutput output;
+    output.SV_Target0 = result;
+    return output;
+}
diff --git a/src/proto/proto_shaders.h b/src/proto/proto_shaders.h
index 35c5f9fe..0e7e5abd 100644
--- a/src/proto/proto_shaders.h
+++ b/src/proto/proto_shaders.h
@@ -1,6 +1,11 @@
 ////////////////////////////////////////////////////////////
 //~ Constants
 
-ShaderConstant(RWTexture2DHandle,           PR_ShaderConst_TestTarget,      0);
-ShaderConstant(StructuredBufferHandle,      PR_ShaderConst_TestBuff,        1);
-ShaderConstant(f32,                         PR_ShaderConst_TestConst,       2);
+/* Test shader */
+ShaderConstant(RWTexture2DHandle,           PT_ShaderConst_TestTarget,      0);
+ShaderConstant(StructuredBufferHandle,      PT_ShaderConst_TestBuff,        1);
+ShaderConstant(f32,                         PT_ShaderConst_TestConst,       2);
+
+/* Blit shader */
+ShaderConstant(SamplerStateHandle,          PT_ShaderConst_BlitSampler,     3);
+ShaderConstant(Texture2DHandle,             PT_ShaderConst_BlitSrc,         4);
diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c
index d94080ff..df3bcfe8 100644
--- a/src/sprite/sprite.c
+++ b/src/sprite/sprite.c
@@ -84,7 +84,7 @@ JobImpl(SPR_LoadSheet, sig, _)
 
         /* Init spans */
         sheet->spans_count = decoded.num_spans;
-        sheet->span_bins_count = MaxU32(AlignU64Pow2(sheet->spans_count * 2), 1);
+        sheet->span_bins_count = MaxU32(AlignU64ToNextPow2(sheet->spans_count * 2), 1);
         sheet->spans = PushStructs(perm, SPR_Span, sheet->spans_count);
         sheet->span_bins = PushStructs(perm, SPR_SpanBin, sheet->span_bins_count);
         {
@@ -107,7 +107,7 @@ JobImpl(SPR_LoadSheet, sig, _)
 
         /* Init slice groups */
         sheet->slice_groups_count = decoded.num_slice_keys;
-        sheet->slice_group_bins_count = MaxU32(AlignU64Pow2(sheet->slice_groups_count * 2), 1);
+        sheet->slice_group_bins_count = MaxU32(AlignU64ToNextPow2(sheet->slice_groups_count * 2), 1);
         sheet->slice_groups = PushStructs(perm, SPR_SliceGroup, sheet->slice_groups_count);
         sheet->slice_group_bins = PushStructs(perm, SPR_SliceGroupBin, sheet->slice_group_bins_count);
         {
diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c
index 0cde39e2..bbb34ff4 100644
--- a/src/ui/ui_core.c
+++ b/src/ui/ui_core.c
@@ -698,8 +698,8 @@ i64 UI_EndFrame(UI_Frame frame)
     {
         g->eframe.layout_arena = AcquireArena(Gibi(64));
         g->eframe.tex_gpu_arena = GPU_AcquireTextureArena();
-        g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16));
-        g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16));
+        g->eframe.frame_gpu_arena = GPU_AcquireArena(Mebi(16));
+        g->eframe.drects_gpu_arena = GPU_AcquireArena(Mebi(16));
     }
     ResetArena(g->eframe.layout_arena);