gpu refactor progress

2025-09-18 10:16:57 -05:00 · 2025-09-18 10:16:57 -05:00 · 77affdd9b0
commit 77affdd9b0
parent f231c8322b
14 changed files with 561 additions and 65 deletions
--- a/src/base/base_job.h
+++ b/src/base/base_job.h
@ -13,7 +13,9 @@ Enum(JobPool)
    JobPool_Inherit = -1,

    /* Contains un-affinitized worker threads.
-     * Meant to take on temporary high-throughput work that is allowed to interfere with all other pools (e.g. loading a level). */
+     * Meant to take on temporary high-throughput work that is allowed to
+     * interfere with all other pools (e.g. program startup, loading a level,
+     * etc). */
    JobPool_Hyper = 0,

    /* Contains un-affinitized worker threads.
@ -46,18 +48,18 @@ Enum(JobFlag)
    /* A dedicated job is a heavy weight job that will receive its own OS
     * thread and will never yield. When the fiber running the job suspends
     * itself, the dedicated thread will perform a blocking wait rather than
-     * yielding the thread to another fiber. This is mainly useful long-running
-     * dispatcher-esque jobs that block on OS primitives, since occupying a
-     * worker thread (and thereby preventing non-blocking jobs from running on
-     * that worker) is unwanted.
+     * yielding the thread to another fiber. This is mainly useful for
+     * long-running dispatcher-esque jobs that block on OS primitives, since
+     * occupying a worker thread (and thereby preventing non-blocking jobs from
+     * running on that worker) is unwanted.
     *
     * For example, Win32 window message processing is required by the OS to
     * occur on the same thread that initially created the window, which means
     * it actually must run inside a dedicated job to prevent message processing
     * from yielding & resuming on another thread. The message processing loop
     * can block until messages are received from the OS without having to
-     * occupy a job worker while it blocks, and can then wake yielding
-     * jobs onto job worker pools based on the messages it received.
+     * occupy a job worker while it blocks, and can then schedule yielded
+     * jobs onto job worker pools based on the processed messages.
     */
    JobFlag_Dedicated   = (1 << 0),
 };
--- a/src/base/base_snc.c
+++ b/src/base/base_snc.c
@ -193,7 +193,7 @@ i64 FetchAddFence(Fence *fence, i64 x)
    return fetch;
 }

-void YieldOnFence(Fence *fence, i64 target)
+i64 YieldOnFence(Fence *fence, i64 target)
 {
    i64 v = Atomic64Fetch(&fence->v.v);
    while (v < target)
@ -201,4 +201,5 @@ void YieldOnFence(Fence *fence, i64 target)
        FutexYieldGte(&fence->v.v, &v, sizeof(v));
        v = Atomic64Fetch(&fence->v.v);
    }
+    return v;
 }
--- a/src/base/base_snc.h
+++ b/src/base/base_snc.h
@ -74,4 +74,4 @@ void SetFence(Fence *fence, i64 x);
 i64 FetchSetFence(Fence *fence, i64 x);
 i64 FetchAddFence(Fence *fence, i64 x);

-void YieldOnFence(Fence *fence, i64 target);
+i64 YieldOnFence(Fence *fence, i64 target);
--- a/src/font/font.c
+++ b/src/font/font.c
@ -55,9 +55,10 @@ JobDef(F_Load, sig, _)
        desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
        desc.texture.size = VEC3I32(64, 64, 1);
        texture = GPU_AcquireResource(desc);
-        GPU_Mapped mapped = GPU_Map(texture);
-        GPU_CopyToMapped(&mapped, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
-        GPU_Unmap(&mapped);
+        /* FIXME: Copy to GPU resource here */
+        //GPU_Mapped mapped = GPU_Map(texture);
+        //GPU_CopyToMapped(&mapped, STRING(desc.texture.size.x * desc.texture.size.y * 4, (u8 *)result.image_pixels));
+        //GPU_Unmap(&mapped);
    }

    /* Acquire store memory */
--- a/src/gpu/gpu.h
+++ b/src/gpu/gpu.h
@ -367,7 +367,7 @@ void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain);

 /* Waits until a new backbuffer is ready to be written to.
 * This should be called before rendering for minimum latency. */
-void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain);
+void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain);

 /* 1. Clears the backbuffer and ensures it's at size `backbuffer_resolution`
 * 2. Blits `texture` to the backbuffer using `texture_xf`
--- a/src/gpu/gpu_dx12/gpu_dx12.c
+++ b/src/gpu/gpu_dx12/gpu_dx12.c
@ -27,13 +27,13 @@ GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
    GPU_D12_Command *cmd = f->first_free_command;
    if (cmd)
    {
-        StackPop(f->first_free_command);
-        ZeroStruct(cmd);
+        f->first_free_command = cmd->next;
    }
    else
    {
-        cmd = PushStruct(perm, GPU_D12_Command);
+        cmd = PushStructNoZero(perm, GPU_D12_Command);
    }
+    ZeroStruct(cmd);
    QueuePush(cl->first, cl->last, cmd);
    ++cl->count;
    return cmd;
@ -51,6 +51,8 @@ u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc)

 void GPU_D12_Startup(void)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+
    /* Init device */
    GPU_D12_InitDevice();

@ -67,6 +69,20 @@ void GPU_D12_Startup(void)
        YieldOnFence(&job_fence, job_count);
    }

+    /* Init descriptor heaps */
+    g->cbv_srv_uav_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+                                                        GPU_D12_MaxCbvSrvUavDescriptors,
+                                                        ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
+
+    g->sampler_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
+                                                    GPU_D12_MaxSamplerDescriptors,
+                                                    ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
+
+    g->rtv_heap = GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
+                                                GPU_D12_MaxRtvDescriptors,
+                                                ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV));
+
+
    /* Start queue sync job */
    RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated);
 }
@ -230,7 +246,7 @@ JobDef(GPU_D12_InitQueue, sig, id)
    D3D12_COMMAND_QUEUE_DESC d3d_desc = ZI;
    d3d_desc.Type = desc.d3d_type;
    d3d_desc.Priority = desc.d3d_priority;
-    hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->cq);
+    hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue);
    if (FAILED(hr))
    {
        Panic(Lit("Failed to create command queue"));
@ -245,6 +261,32 @@ JobDef(GPU_D12_InitQueue, sig, id)
    g->queues[desc.kind] = queue;
 }

+//- Heap initialization
+
+GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, u32 max_descs, u32 desc_size)
+{
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    Arena *arena = AcquireArena(Gibi(64));
+    GPU_D12_CpuDescriptorHeap *heap = PushStruct(arena, GPU_D12_CpuDescriptorHeap);
+    heap->arena = arena;
+
+    heap->type = type;
+    heap->max_count = max_descs;
+    heap->descriptor_size = desc_size;
+
+    D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI;
+    d3d_desc.Type = type;
+    d3d_desc.NumDescriptors = max_descs;
+    HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap);
+    if (FAILED(hr))
+    {
+        Panic(Lit("Failed to create CPU descriptor heap"));
+    }
+    ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle);
+
+    return heap;
+}
+
 ////////////////////////////////
 //~ Pipeline operations

@ -263,6 +305,53 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
    return g->queues[kind];
 }

+////////////////////////////////
+//~ Descriptor operations
+
+GPU_D12_CpuDescriptor *GPU_D12_AcquireCpuDescriptor(GPU_D12_CpuDescriptorHeap *heap)
+{
+    GPU_D12_CpuDescriptor *d = 0;
+    u32 index = 0;
+    D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
+    {
+        Lock lock = LockE(&heap->mutex);
+        if (heap->first_free)
+        {
+            d = heap->first_free;
+            heap->first_free = d->next_free;
+            handle = d->handle;
+            index = d->index;
+        }
+        else
+        {
+            if (heap->allocated_count >= heap->max_count)
+            {
+                Panic(Lit("Max descriptors reached in heap"));
+            }
+            d = PushStructNoZero(heap->arena, GPU_D12_CpuDescriptor);
+            index = heap->allocated_count++;
+            handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
+        }
+        Unlock(&lock);
+    }
+    ZeroStruct(d);
+    d->heap = heap;
+    d->handle = handle;
+    d->index = index;
+    return d;
+}
+
+void GPU_D12_ReleaseCpuDescriptor(GPU_D12_CpuDescriptor *descriptor)
+{
+    GPU_D12_CpuDescriptorHeap *dh = descriptor->heap;
+    Lock lock = LockE(&dh->mutex);
+    {
+        descriptor->next_free = dh->first_free;
+        dh->first_free = descriptor;
+    }
+    Unlock(&lock);
+}
+
 ////////////////////////////////
 //~ Raw command list

@ -362,8 +451,8 @@ u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
            target = ++queue->submit_fence_target;
            cl->submit_fence_target = target;
            /* Execute */
-            ID3D12CommandQueue_ExecuteCommandLists(queue->cq, 1, (ID3D12CommandList **)&cl->cl);
-            ID3D12CommandQueue_Signal(queue->cq, queue->submit_fence, target);
+            ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl);
+            ID3D12CommandQueue_Signal(queue->d3d_queue, queue->submit_fence, target);
            /* Append */
            QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl);
        }
@ -373,6 +462,197 @@ u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
    return target;
 }

+////////////////////////////////
+//~ Swapchain helpers
+
+void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain)
+{
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    for (u32 i = 0; i < countof(swapchain->buffers); ++i)
+    {
+        ID3D12Resource *resource = 0;
+        HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
+        if (FAILED(hr))
+        {
+            /* TODO: Don't panic */
+            Panic(Lit("Failed to get swapchain buffer"));
+        }
+        GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
+        ZeroStruct(sb);
+        sb->swapchain = swapchain;
+        sb->d3d_resource = resource;
+        sb->rtv_descriptor = GPU_D12_AcquireCpuDescriptor(g->rtv_heap);
+        sb->state = D3D12_RESOURCE_STATE_COMMON;
+        ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle);
+    }
+}
+
+GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution)
+{
+    __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    resolution.x = MaxI32(resolution.x, 1);
+    resolution.y = MaxI32(resolution.y, 1);
+    b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
+    if (should_rebuild)
+    {
+        HRESULT hr = 0;
+        GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
+        /* Lock direct queue submissions (in case any write to backbuffer) */
+        /* TODO: Less overkill approach - Only flush GPU_D12_BlitToSwapchain since we know it's the only operation targeting backbuffer */
+        Lock lock = LockE(&queue->submit_mutex);
+        //DEBUGBREAKABLE;
+        //Lock lock = LockE(&g->global_command_list_record_mutex);
+        {
+            /* Flush direct queue */
+            //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
+            {
+                HANDLE event = CreateEvent(0, 0, 0, 0);
+                ID3D12Fence_SetEventOnCompletion(queue->submit_fence, queue->submit_fence_target, event);
+                WaitForSingleObject(event, INFINITE);
+                CloseHandle(event);
+            }
+
+            /* Release buffers */
+            for (u32 i = 0; i < countof(swapchain->buffers); ++i)
+            {
+                GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
+                GPU_D12_ReleaseCpuDescriptor(sb->rtv_descriptor);
+                ID3D12Resource_Release(sb->d3d_resource);
+            }
+
+            /* Resize buffers */
+            hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags);
+            if (FAILED(hr))
+            {
+                /* TODO: Don't panic */
+                Panic(Lit("Failed to resize swapchain"));
+            }
+        }
+        Unlock(&lock);
+
+        GPU_D12_InitSwapchainResources(swapchain);
+
+        swapchain->resolution = resolution;
+    }
+
+    u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
+    return &swapchain->buffers[backbuffer_index];
+}
+
+void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture_resource, Xform texture_xf)
+{
+#if 1
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+
+#else
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+
+    GPU_D12_Pipeline *blit_pl = 0;
+    {
+        GPU_D12_PipelineDesc desc = ZI;
+        desc.vs = GPU_BlitVS;
+        desc.ps = GPU_BlitPS;
+        desc.render_target_formats[0] = GPU_Format_R8G8B8A8_Unorm;
+        blit_pl = GPU_D12_PipelineFromDesc(desc);
+    }
+
+    GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
+
+    if (blit_pl)
+    {
+        GPU_D12_CommandList *cl = GPU_D12_BeginCommandList(cq->cl_pool);
+        {
+            __profn("Present blit");
+            __profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2));
+            GPU_D12_Swapchain *swapchain = dst->swapchain;
+
+            /* Upload dummmy vert & index buffer */
+            /* TODO: Make these static */
+            /* Dummy vertex buffer */
+            LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
+            GPU_D12_CommandBuffer *dummy_vertex_buffer = GPU_D12_PushCommandBuffer(cl, 0, (u8 *)0);
+            GPU_D12_CommandBuffer *quad_index_buffer = GPU_D12_PushCommandBuffer(cl, countof(quad_indices), quad_indices);
+
+            /* Upload descriptor heap */
+            GPU_D12_CommandDescriptorHeap *descriptor_heap = GPU_D12_PushDescriptorHeap(cl, g->cbv_srv_uav_heap);
+            ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
+            ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
+
+            Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y));
+            D3D12_VIEWPORT viewport = GPU_D12_ViewportFromRect(viewport_rect);
+            D3D12_RECT scissor = GPU_D12_ScissorRectFromRect(viewport_rect);
+
+            Mat4x4 vp_matrix = ZI;
+            {
+                Xform xf = src_xf;
+                xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y));
+                xf = TranslateXform(xf, VEC2(0.5, 0.5));
+                vp_matrix = ProjectMat4x4View(xf, viewport.Width, viewport.Height);
+            }
+
+            /* Transition dst to render target */
+            {
+                struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
+                rtb.pResource = dst->resource;
+                rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+                rtb.StateBefore = dst->state;
+                rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
+                struct D3D12_RESOURCE_BARRIER rb = ZI;
+                rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+                rb.Flags = 0;
+                rb.Transition = rtb;
+                ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
+                dst->state = rtb.StateAfter;
+            }
+            ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0);
+
+            /* Clear */
+            f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+            ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0);
+
+            /* Bind pipeline */
+            GPU_D12_SetPipeline(cl, blit_pipeline);
+
+            /* Set Rasterizer State */
+            ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
+            ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
+
+            /* Set sig */
+            K_BlitSig sig = ZI;
+            sig.projection = vp_matrix;
+            sig.flags = K_BLIT_FLAG_NONE;
+            sig.tex_urid = src->srv_descriptor->index;
+            GPU_D12_SetSig(cl, &sig, sizeof(sig));
+
+            /* Draw */
+            D3D12_VERTEX_BUFFER_VIEW vbv = GPU_D12_VbvFromCommandBuffer(dummy_vertex_buffer, 0);
+            D3D12_INDEX_BUFFER_VIEW ibv = GPU_D12_IbvFromCommandBuffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
+            ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+            ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
+            ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
+            ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
+
+            /* Transition dst to presentable */
+            {
+                struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
+                rtb.pResource = dst->resource;
+                rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+                rtb.StateBefore = dst->state;
+                rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
+                struct D3D12_RESOURCE_BARRIER rb = ZI;
+                rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+                rb.Flags = 0;
+                rb.Transition = rtb;
+                ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
+                dst->state = rtb.StateAfter;
+            }
+        }
+        GPU_D12_EndCommandList(cl);
+    }
+#endif
+}
+
 ////////////////////////////////
 //~ Queue sync job

@ -526,12 +806,13 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
                d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS    * !!(desc.flags & GPU_ResourceFlag_AllowUav);
                d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET       * !!(desc.flags & GPU_ResourceFlag_AllowRtv);
                D3D12_RESOURCE_STATES initial_state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
-                HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->raw);
+                HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource);
                if (FAILED(hr))
                {
                    /* TODO: Don't panic */
                    Panic(Lit("Failed to create buffer resource"));
                }
+                r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource);
            } break;

            /* Texture */
@ -563,7 +844,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
                D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
                D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
                D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
-                HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->raw);
+                HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource);
                if (FAILED(hr))
                {
                    /* TODO: Don't panic */
@ -593,7 +874,7 @@ void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
        case GPU_ResourceKind_Texture2D:
        case GPU_ResourceKind_Texture3D:
        {
-            ID3D12Resource_Release(r->raw);
+            ID3D12Resource_Release(r->d3d_resource);
        }

        /* TODO: Sampler */
@ -700,7 +981,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
                    {

                        /* Bind pipeline */
-                        ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
+                        ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline);

                        /* Fill signature */
                        {
@ -750,7 +1031,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
                            if (indices)
                            {
                                D3D12_INDEX_BUFFER_VIEW ibv = ZI;
-                                ibv.BufferLocation = indices->gpu_address;
+                                ibv.BufferLocation = indices->buffer_gpu_address;
                                if (indices->desc.buffer.element_size == 2)
                                {
                                    ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
@ -783,7 +1064,7 @@ u64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
                    if (pipeline)
                    {
                        /* Bind pipeline */
-                        ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->raw);
+                        ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->d3d_pipeline);

                        /* Fill signature */
                        {
@ -948,7 +1229,7 @@ GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
    result.resource = gpu_r;
    GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r;
    D3D12_RANGE read_range = ZI;
-    HRESULT hr = ID3D12Resource_Map(r->raw, 0, &read_range, &result.mem);
+    HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
    if (FAILED(hr) || !result.mem)
    {
        /* TODO: Don't panic */
@ -960,7 +1241,7 @@ GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
 void GPU_Unmap(GPU_Mapped *m)
 {
    GPU_D12_Resource *r = (GPU_D12_Resource *)m->resource;
-    ID3D12Resource_Unmap(r->raw, 0, 0);
+    ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
 }

 void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
@ -969,7 +1250,7 @@ void GPU_CopyToMapped(GPU_Mapped *mapped, String data)
    GPU_D12_Resource *r = (GPU_D12_Resource *)mapped->resource;

    D3D12_RESOURCE_DESC desc = ZI;
-    ID3D12Resource_GetDesc(r->raw, &desc);
+    ID3D12Resource_GetDesc(r->d3d_resource, &desc);

    u64 upload_size = 0;
    u64 upload_row_size = 0;
@ -1021,8 +1302,77 @@ GPU_MemoryInfo GPU_QueryMemoryInfo(void)

 GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, Vec2I32 size)
 {
-    /* TODO */
-    return 0;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    HRESULT hr = 0;
+    HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
+    GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
+
+    GPU_D12_Swapchain *swapchain = 0;
+    {
+        Lock lock = LockE(&g->free_swapchains_mutex);
+        {
+            swapchain = g->first_free_swapchain;
+            if (swapchain)
+            {
+                g->first_free_swapchain = swapchain->next;
+            }
+        }
+        Unlock(&lock);
+    }
+    if (!swapchain)
+    {
+        Arena *perm = PermArena();
+        PushAlign(perm, CachelineSize);
+        swapchain = PushStructNoZero(perm, GPU_D12_Swapchain);
+        PushAlign(perm, CachelineSize);
+    }
+    ZeroStruct(swapchain);
+
+    /* Create swapchain1 */
+    IDXGISwapChain1 *swapchain1 = 0;
+    {
+        DXGI_SWAP_CHAIN_DESC1 desc = ZI;
+        desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+        desc.Width = size.x;
+        desc.Height = size.y;
+        desc.SampleDesc.Count = 1;
+        desc.SampleDesc.Quality = 0;
+        desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
+        desc.BufferCount = GPU_D12_SwapchainBufferCount;
+        desc.Scaling = DXGI_SCALING_NONE;
+        desc.Flags = GPU_D12_SwapchainFlags;
+        desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
+        desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+        hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)queue->d3d_queue, hwnd, &desc, 0, 0, &swapchain1);
+        if (FAILED(hr))
+        {
+            Panic(Lit("Failed to create IDXGISwapChain1"));
+        }
+    }
+
+    /* Upgrade to swapchain3 */
+    hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
+    if (FAILED(hr))
+    {
+        Panic(Lit("Failed to create IDXGISwapChain3"));
+    }
+
+    /* Create waitable object */
+#if GPU_D12_FrameLatency > 0
+    IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, GPU_D12_FrameLatency);
+    swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
+    Assert(swapchain->waitable);
+#endif
+
+    /* Disable Alt+Enter changing monitor resolution to match window size */
+    IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
+
+    IDXGISwapChain1_Release(swapchain1);
+    swapchain->window_hwnd = hwnd;
+
+    GPU_D12_InitSwapchainResources(swapchain);
+
+    return (GPU_Swapchain *)swapchain;
 }

 void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
@ -1030,7 +1380,7 @@ void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
    /* TODO */
 }

-void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain)
+void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain)
 {
    /* TODO */
 }
@ -1038,11 +1388,11 @@ void GPU_WaitOnSwapchain(GPU_Swapchain *swapchain)
 void GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
 {
    GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain;
-    // GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_resolution);
-    // GPU_D12_Resource *texture_resource = (GPU_D12_Resource *)texture;
+    GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_resolution);
+    GPU_D12_Resource *texture_resource = (GPU_D12_Resource *)texture;

    /* Blit */
-    // GPU_D12_BlitToSwapchain(swapchain_buffer, texture_resource, texture_xf);
+    GPU_D12_BlitToSwapchain(swapchain_buffer, texture_resource, texture_xf);

    u32 present_flags = 0;
    if (GPU_D12_TearingIsAllowed && vsync == 0)
@ -1053,7 +1403,7 @@ void GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, Vec2I32 backbuffer_resol
    /* Present */
    {
        __profn("Present");
-        HRESULT hr = IDXGISwapChain3_Present(swapchain->raw, vsync, present_flags);
+        HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
        if (!SUCCEEDED(hr))
        {
            Assert(0);
--- a/src/gpu/gpu_dx12/gpu_dx12.h
+++ b/src/gpu/gpu_dx12/gpu_dx12.h
@ -12,10 +12,15 @@
 //~ Tweakable defines

 #define GPU_D12_TearingIsAllowed        1
-#define GPU_D12_FrameLatency            1
+#define GPU_D12_FrameLatency            0  /* TODO: Set this to 1 */
+#define GPU_D12_SwapchainBufferCount    4
 #define GPU_D12_SwapchainFlags          (((GPU_D12_TearingIsAllowed != 0)   * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) \
                                      | ((GPU_D12_FrameLatency != 0)        * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
-#define GPU_D12_SwapchainBufferCount    (4)
+
+
+#define GPU_D12_MaxCbvSrvUavDescriptors (1024 * 64)
+#define GPU_D12_MaxSamplerDescriptors   (1024 * 1)
+#define GPU_D12_MaxRtvDescriptors       (1024 * 1)

 ////////////////////////////////
 //~ Pipeline types
@ -30,7 +35,7 @@ Struct(GPU_D12_PipelineDesc)

 Struct(GPU_D12_Pipeline)
 {
-    ID3D12PipelineState *raw;
+    ID3D12PipelineState *d3d_pipeline;
    ID3D12RootSignature *rootsig;
 };

@ -42,10 +47,10 @@ Struct(GPU_D12_Resource)
    GPU_D12_Resource *next_free;
    GPU_ResourceDesc desc;

-    ID3D12Resource *raw;
+    ID3D12Resource *d3d_resource;
    u64 reuse_hash;

-    D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
+    D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
 };

 ////////////////////////////////
@ -62,7 +67,7 @@ Struct(GPU_D12_QueueDesc)
 Struct(GPU_D12_Queue)
 {
    GPU_D12_QueueDesc desc;
-    ID3D12CommandQueue *cq;
+    ID3D12CommandQueue *d3d_queue;

    Mutex submit_mutex;
    ID3D12Fence *submit_fence;
@ -73,6 +78,33 @@ Struct(GPU_D12_Queue)
    Fence sync_fence;
 };

+////////////////////////////////
+//~ Descriptor types
+
+Struct(GPU_D12_CpuDescriptor)
+{
+    GPU_D12_CpuDescriptor *next_free;
+    struct GPU_D12_CpuDescriptorHeap *heap;
+
+    u32 index;
+    D3D12_CPU_DESCRIPTOR_HANDLE handle;
+};
+
+Struct(GPU_D12_CpuDescriptorHeap)
+{
+    Arena *arena;
+
+    D3D12_DESCRIPTOR_HEAP_TYPE type;
+    u32 descriptor_size;
+    ID3D12DescriptorHeap *d3d_heap;
+    D3D12_CPU_DESCRIPTOR_HANDLE start_handle;
+
+    Mutex mutex;
+    GPU_D12_CpuDescriptor *first_free;
+    u32 allocated_count;
+    u32 max_count;
+};
+
 ////////////////////////////////
 //~ Raw command list types

@ -173,12 +205,24 @@ Struct(GPU_D12_CommandList)
 ////////////////////////////////
 //~ Swapchain types

+Struct(GPU_D12_SwapchainBuffer)
+{
+    struct GPU_D12_Swapchain *swapchain;
+    ID3D12Resource *d3d_resource;
+    GPU_D12_CpuDescriptor *rtv_descriptor;
+    D3D12_RESOURCE_STATES state;
+};
+
 Struct(GPU_D12_Swapchain)
 {
-    IDXGISwapChain3 *raw;
+    GPU_D12_Swapchain *next;
+
+    IDXGISwapChain3 *swapchain;
    HWND window_hwnd;
    HANDLE waitable;
-    Vec3I32 resolution;
+    Vec2I32 resolution;
+
+    GPU_D12_SwapchainBuffer buffers[GPU_D12_SwapchainBufferCount];
 };

 ////////////////////////////////
@ -197,10 +241,19 @@ Struct(GPU_D12_SharedState)
    /* Queues */
    GPU_D12_Queue *queues[GPU_NumQueues];

+    /* Descriptor heaps */
+    GPU_D12_CpuDescriptorHeap *cbv_srv_uav_heap;
+    GPU_D12_CpuDescriptorHeap *sampler_heap;
+    GPU_D12_CpuDescriptorHeap *rtv_heap;
+
    /* Resources */
    Mutex free_resources_mutex;
    GPU_D12_Resource *first_free_resource;

+    /* Swapchains */
+    Mutex free_swapchains_mutex;
+    GPU_D12_Swapchain *first_free_swapchain;
+
    /* Device */
    IDXGIFactory6 *factory;
    IDXGIAdapter1 *adapter;
@ -229,6 +282,9 @@ void GPU_D12_InitDevice(void);
 //- Queue initialization
 JobDecl(GPU_D12_InitQueue, { GPU_D12_QueueDesc *descs; });

+//- Heap initialization
+GPU_D12_CpuDescriptorHeap *GPU_D12_InitCpuDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, u32 max_descs, u32 desc_size);
+
 ////////////////////////////////
 //~ Pipeline operations

@ -239,12 +295,25 @@ GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc);

 GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind);

+////////////////////////////////
+//~ Descriptor operations
+
+GPU_D12_CpuDescriptor *GPU_D12_AcquireCpuDescriptor(GPU_D12_CpuDescriptorHeap *heap);
+void GPU_D12_ReleaseCpuDescriptor(GPU_D12_CpuDescriptor *descriptor);
+
 ////////////////////////////////
 //~ Raw command list operations

 GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind);
 u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl);

+////////////////////////////////
+//~ Swapchain helpers
+
+void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain);
+GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution);
+void GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *swapchain_buffer, GPU_D12_Resource *texture_resource, Xform texture_xf);
+
 ////////////////////////////////
 //~ Sync job

--- a/src/gpu/gpu_dx12/gpu_dx12.lay
+++ b/src/gpu/gpu_dx12/gpu_dx12.lay
@ -2,6 +2,12 @@

 //- Api
@IncludeC gpu_dx12.h
+@IncludeGpu gpu_dx12_blit.h

 //- Impl
@IncludeC gpu_dx12.c
+@IncludeGpu gpu_dx12_blit.gpu
+
+//- Shaders
+@VertexShader  GPU_D12_BlitVS
+@PixelShader   GPU_D12_BlitPS
--- a/src/gpu/gpu_dx12/gpu_dx12_blit.gpu
+++ b/src/gpu/gpu_dx12/gpu_dx12_blit.gpu
@ -0,0 +1,49 @@
+ConstantBuffer<GPU_D12_BlitSig> GPU_D12_blit_sig : register (b0);
+
+////////////////////////////////
+//~ Ui Blit
+
+Struct(GPU_D12_BlitPS_Input)
+{
+    Semantic(Vec4, SV_Position);
+    Semantic(Vec2, uv);
+};
+
+Struct(GPU_D12_BlitPS_Output)
+{
+    Semantic(Vec4, SV_Target);
+};
+
+//- Vertex shader
+
+GPU_D12_BlitPS_Input VSDef(GPU_D12_BlitVS, Semantic(u32, SV_VertexID))
+{
+    ConstantBuffer<GPU_D12_BlitSig> sig = GPU_D12_blit_sig;
+    static const Vec2 unit_quad_verts[4] = {
+        Vec2(-0.5f, -0.5f),
+        Vec2(0.5f, -0.5f),
+        Vec2(0.5f, 0.5f),
+        Vec2(-0.5f, 0.5f)
+    };
+    Vec2 vert = unit_quad_verts[SV_VertexID];
+
+    GPU_D12_BlitPS_Input output;
+    output.SV_Position = mul(sig.projection, Vec4(vert, 0, 1));
+    output.uv = vert + 0.5;
+    return output;
+}
+
+//- Pixel shader
+
+GPU_D12_BlitPS_Output PSDef(GPU_D12_BlitPS, GPU_D12_BlitPS_Input input)
+{
+    ConstantBuffer<GPU_D12_BlitSig> sig = GPU_D12_blit_sig;
+    SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);
+
+    GPU_D12_BlitPS_Output output;
+    Texture2D<Vec4> tex = GpuResourceFromUrid(sig.tex_urid);
+    Vec4 color = tex.Sample(sampler, input.uv);
+
+    output.SV_Target = color;
+    return output;
+}
--- a/src/gpu/gpu_dx12/gpu_dx12_blit.h
+++ b/src/gpu/gpu_dx12/gpu_dx12_blit.h
@ -0,0 +1,15 @@
+////////////////////////////////
+//~ Blit types
+
+Struct(GPU_D12_BlitSig)
+{
+    /* ----------------------------------------------------- */
+    Mat4x4 projection;                          /* 16 consts */
+    /* ----------------------------------------------------- */
+    u32 tex_urid;                               /* 01 consts */
+    u32 tex_sampler_urid;                       /* 01 consts */
+    u32 _pad0;                                  /* 01 consts (padding) */
+    u32 _pad1;                                  /* 01 consts (padding) */
+    /* ----------------------------------------------------- */
+};
+AssertRootConst(GPU_D12_BlitSig, 20);
--- a/src/platform/platform_win32/platform_win32.c
+++ b/src/platform/platform_win32/platform_win32.c
@ -435,7 +435,7 @@ void P_W32_ProcessWindowEvent(P_W32_Window *window, P_WindowEvent event)
 void P_W32_WakeWindow(P_W32_Window *window)
 {
    /* Post a blank message to the window's thread message queue to wake it. */
-    PostMessageW(window->hwnd, 0, 0, 0);
+    PostMessageW(window->hwnd, WM_NULL, 0, 0);
 }

 LRESULT CALLBACK P_W32_Win32WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam)
--- a/src/pp/pp.c
+++ b/src/pp/pp.c
@ -2434,10 +2434,9 @@ void UpdateUser(P_Window *window)
        }
        g->gpu_render_fence_target = GPU_EndCommandList(cl);

-        /* Release transfer buffers */
+        /* Reset transfer buffers & queue their release */
        {
            {
-                /* FIXME: Release resources */
                GPU_Resource *release_resources[] = {
                    quad_index_buffer,
                    material_instance_buffer,
@ -2469,6 +2468,8 @@ void UpdateUser(P_Window *window)
            g->ui_shape_indices_count = 0;
            g->grids_count = 0;
        }
+
+        GPU_PresentSwapchain(g->swapchain, g->ui_size, g->ui_target, g->ui_to_screen_xf, 1);
    }

    EndScratch(scratch);
@ -2488,7 +2489,7 @@ JobDef(UpdateUserOrSleep, UNUSED sig, UNUSED id)
            __profn("User sleep");
            {
                __profn("Swapchain wait");
-                GPU_WaitOnSwapchain(g->swapchain);
+                GPU_YieldOnSwapchain(g->swapchain);
            }
            {
                __profn("Frame limiter wait");
--- a/src/pp/pp_draw.gpu
+++ b/src/pp/pp_draw.gpu
@ -1,9 +1,9 @@
-ConstantBuffer<MaterialSig>  g_mat_sig       : register (b0);
-ConstantBuffer<FloodSig>     g_flood_sig     : register (b0);
-ConstantBuffer<ShadeSig>     g_shade_sig     : register (b0);
-ConstantBuffer<UiBlitSig>    g_ui_blit_sig   : register (b0);
-ConstantBuffer<UiRectSig>    g_ui_rect_sig   : register (b0);
-ConstantBuffer<UiShapeSig>   g_ui_shape_sig  : register (b0);
+ConstantBuffer<MaterialSig>  mat_sig       : register (b0);
+ConstantBuffer<FloodSig>     flood_sig     : register (b0);
+ConstantBuffer<ShadeSig>     shade_sig     : register (b0);
+ConstantBuffer<UiBlitSig>    ui_blit_sig   : register (b0);
+ConstantBuffer<UiRectSig>    ui_rect_sig   : register (b0);
+ConstantBuffer<UiShapeSig>   ui_shape_sig  : register (b0);

 ////////////////////////////////
 //~ Material
@ -28,7 +28,7 @@ Struct(MaterialPS_Output)

 MaterialPS_Input VSDef(MaterialVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
 {
-    ConstantBuffer<MaterialSig> sig = g_mat_sig;
+    ConstantBuffer<MaterialSig> sig = mat_sig;
    static const Vec2 unit_quad_verts[4] = {
        Vec2(-0.5f, -0.5f),
        Vec2(0.5f, -0.5f),
@ -56,7 +56,7 @@ MaterialPS_Input VSDef(MaterialVS, Semantic(u32, SV_InstanceID), Semantic(u32, S

 MaterialPS_Output PSDef(MaterialPS, MaterialPS_Input input)
 {
-    ConstantBuffer<MaterialSig> sig = g_mat_sig;
+    ConstantBuffer<MaterialSig> sig = mat_sig;

    MaterialPS_Output output;
    Vec4 albedo = input.tint_lin;
@ -128,7 +128,7 @@ MaterialPS_Output PSDef(MaterialPS, MaterialPS_Input input)
 [numthreads(8, 8, 1)]
 void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID))
 {
-    ConstantBuffer<FloodSig> sig = g_flood_sig;
+    ConstantBuffer<FloodSig> sig = flood_sig;

    uint2 id = SV_DispatchThreadID.xy;
    uint2 tex_size = uint2(sig.tex_width, sig.tex_height);
@ -194,7 +194,7 @@ void CSDef(FloodCS, Semantic(uint3, SV_DispatchThreadID))

 float RandAngle(uint2 pos, u32 ray_index)
 {
-    ConstantBuffer<ShadeSig> sig = g_shade_sig;
+    ConstantBuffer<ShadeSig> sig = shade_sig;
    Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);

    Vec3I32 noise_coord = Vec3I32(1, 1, 1);
@ -208,7 +208,7 @@ float RandAngle(uint2 pos, u32 ray_index)

 Vec3 ColorFromDir(uint2 ray_start, Vec2 ray_dir)
 {
-    ConstantBuffer<ShadeSig> sig = g_shade_sig;
+    ConstantBuffer<ShadeSig> sig = shade_sig;
    Texture2D<uint2> flood_tex = GpuResourceFromUrid(sig.emittance_flood_tex_urid);
    Texture2D<Vec4> emittance_tex = GpuResourceFromUrid(sig.emittance_tex_urid);
    Texture3D<u32> noise_tex = GpuResourceFromUrid(sig.noise_tex_urid);
@ -263,7 +263,7 @@ Vec3 ColorFromPos(uint2 pos)
 [numthreads(8, 8, 1)]
 void CSDef(ShadeCS, Semantic(uint3, SV_DispatchThreadID))
 {
-    ConstantBuffer<ShadeSig> sig = g_shade_sig;
+    ConstantBuffer<ShadeSig> sig = shade_sig;

    uint2 id = SV_DispatchThreadID.xy;
    if (id.x < sig.tex_width && id.y < sig.tex_height)
@ -321,7 +321,7 @@ Vec3 ToneMap(Vec3 v)

 UiBlitPS_Input VSDef(UiBlitVS, Semantic(u32, SV_VertexID))
 {
-    ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
+    ConstantBuffer<UiBlitSig> sig = ui_blit_sig;
    static const Vec2 unit_quad_verts[4] = {
        Vec2(-0.5f, -0.5f),
        Vec2(0.5f, -0.5f),
@ -340,7 +340,7 @@ UiBlitPS_Input VSDef(UiBlitVS, Semantic(u32, SV_VertexID))

 UiBlitPS_Output PSDef(UiBlitPS, UiBlitPS_Input input)
 {
-    ConstantBuffer<UiBlitSig> sig = g_ui_blit_sig;
+    ConstantBuffer<UiBlitSig> sig = ui_blit_sig;
    SamplerState sampler = GpuSamplerFromUrid(sig.tex_sampler_urid);

    UiBlitPS_Output output;
@ -385,7 +385,7 @@ Struct(UiRectPS_Output)

 UiRectPS_Input VSDef(UiRectVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
 {
-    ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
+    ConstantBuffer<UiRectSig> sig = ui_rect_sig;
    static const Vec2 unit_quad_verts[4] = {
        Vec2(-0.5f, -0.5f),
        Vec2(0.5f, -0.5f),
@ -410,7 +410,7 @@ UiRectPS_Input VSDef(UiRectVS, Semantic(u32, SV_InstanceID), Semantic(u32, SV_Ve

 UiRectPS_Output PSDef(UiRectPS, UiRectPS_Input input)
 {
-    ConstantBuffer<UiRectSig> sig = g_ui_rect_sig;
+    ConstantBuffer<UiRectSig> sig = ui_rect_sig;
    UiRectPS_Output output;
    Vec4 color = input.tint_srgb;

@ -444,7 +444,7 @@ Struct(UiShapePS_Output)

 UiShapePS_Input VSDef(UiShapeVS, Semantic(u32, SV_VertexID))
 {
-    ConstantBuffer<UiShapeSig> sig = g_ui_shape_sig;
+    ConstantBuffer<UiShapeSig> sig = ui_shape_sig;
    StructuredBuffer<UiShapeVert> verts = GpuResourceFromUrid(sig.verts_urid);
    UiShapeVert vert = verts[SV_VertexID];
    UiShapePS_Input output;
--- a/src/sprite/sprite.c
+++ b/src/sprite/sprite.c
@ -24,15 +24,17 @@ JobDef(S_LoadTexture, sig, _)
    {
        GPU_ResourceDesc desc = ZI;
        desc.kind = GPU_ResourceKind_Texture2D;
-        desc.flags = GPU_ResourceFlag_AllowUav;
+        desc.flags = GPU_ResourceFlag_None;
        desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
        desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
        desc.texture.mip_levels = 1;
        texture->gpu_resource = GPU_AcquireResource(desc);
        texture->width = decoded.width;
        texture->height = decoded.height;
+        /* FIXME: Upload to resource here */
    }

+
    texture->loaded = 1;
    SetFence(&entry->texture_ready_fence, 1);
    EndScratch(scratch);