From 19c01408684575b16eebc0c2645334e7b7b3321b Mon Sep 17 00:00:00 2001
From: jacob <jacob@cagori.com>
Date: Thu, 31 Jul 2025 21:45:53 -0500
Subject: [PATCH] gpu layer refactor progress

---
 src/app/app_core.c                      |    2 +-
 src/draw/draw_core.c                    |   12 +-
 src/font/font_core.c                    |    2 +-
 src/gpu/gpu.c                           |    4 +-
 src/gpu/gpu.h                           |    4 +
 src/gpu/gpu_core.h                      |   27 +-
 src/gpu/{gpu_core_dx12.c => gpu_dx12.c} | 1728 +++++++++++------------
 src/gpu/gpu_dx12.h                      |  692 +++++++++
 src/sprite/sprite_core.c                |    6 +-
 src/user/user_core.c                    |   12 +-
 10 files changed, 1523 insertions(+), 966 deletions(-)
 rename src/gpu/{gpu_core_dx12.c => gpu_dx12.c} (73%)
 create mode 100644 src/gpu/gpu_dx12.h

diff --git a/src/app/app_core.c b/src/app/app_core.c
index b52a1aad..7b29a87a 100644
--- a/src/app/app_core.c
+++ b/src/app/app_core.c
@@ -233,7 +233,7 @@ void P_AppStartup(String args_str)
     /* Global systems */
     RES_Startup();
     W_Startup();
-    gp_startup();
+    GPU_Startup();
 
     /* Subsystems */
     AC_StartupReceipt asset_cache_sr    = AC_Startup();
diff --git a/src/draw/draw_core.c b/src/draw/draw_core.c
index 29dabf35..d5dfc5ba 100644
--- a/src/draw/draw_core.c
+++ b/src/draw/draw_core.c
@@ -9,7 +9,7 @@ D_StartupReceipt D_Startup(F_StartupReceipt *font_sr)
     D_SharedState *g = &D_shared_state;
     (UNUSED)font_sr;
     u32 pixel_white = 0xFFFFFFFF;
-    g->solid_white_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(1, 1), &pixel_white);
+    g->solid_white_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(1, 1), &pixel_white);
     return (D_StartupReceipt) { 0 };
 }
 
@@ -26,7 +26,7 @@ void D_DrawMaterial(GPU_RenderSig *sig, D_MaterialParams params)
     cmd.material.tint = params.tint;
     cmd.material.is_light = params.is_light;
     cmd.material.light_emittance = params.light_emittance;
-    gp_push_render_cmd(sig, &cmd);
+    GPU_PushRenderCmd(sig, &cmd);
 }
 
 ////////////////////////////////
@@ -39,7 +39,7 @@ void D_DrawPolyEx(GPU_RenderSig *sig, Vec2Array vertices, GPU_Indices indices, u
     cmd.ui_shape.vertices = vertices;
     cmd.ui_shape.indices = indices;
     cmd.ui_shape.color = color;
-    gp_push_render_cmd(sig, &cmd);
+    GPU_PushRenderCmd(sig, &cmd);
 }
 
 /* Draws a filled polygon using triangles in a fan pattern */
@@ -269,7 +269,7 @@ void D_DrawGrid(GPU_RenderSig *sig, Xform xf, u32 bg0_color, u32 bg1_color, u32
         cmd.grid.line_thickness = thickness;
         cmd.grid.line_spacing = spacing;
         cmd.grid.offset = offset;
-        grid_id = gp_push_render_cmd(sig, &cmd);
+        grid_id = GPU_PushRenderCmd(sig, &cmd);
     }
 
     GPU_RenderCmdDesc cmd = ZI;
@@ -277,7 +277,7 @@ void D_DrawGrid(GPU_RenderSig *sig, Xform xf, u32 bg0_color, u32 bg1_color, u32
     cmd.material.xf = xf;
     cmd.material.tint = ColorWhite;
     cmd.material.grid_cmd_id = grid_id;
-    gp_push_render_cmd(sig, &cmd);
+    GPU_PushRenderCmd(sig, &cmd);
 }
 
 ////////////////////////////////
@@ -291,7 +291,7 @@ void D_DrawUiRect(GPU_RenderSig *sig, D_UiRectParams params)
     cmd.ui_rect.texture = params.texture;
     cmd.ui_rect.clip = params.clip;
     cmd.ui_rect.tint = params.tint;
-    gp_push_render_cmd(sig, &cmd);
+    GPU_PushRenderCmd(sig, &cmd);
 }
 
 ////////////////////////////////
diff --git a/src/font/font_core.c b/src/font/font_core.c
index eb174f1e..62f281e0 100644
--- a/src/font/font_core.c
+++ b/src/font/font_core.c
@@ -93,7 +93,7 @@ P_JobDef(F_LoadAssetJob, job)
     RES_CloseResource(&res);
 
     /* Send texture to GPU */
-    GPU_Resource *texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(result.image_width, result.image_height), result.image_pixels);
+    GPU_Resource *texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(result.image_width, result.image_height), result.image_pixels);
 
     /* Allocate store memory */
     F_Font *font = 0;
diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c
index 87684511..271b0e7a 100644
--- a/src/gpu/gpu.c
+++ b/src/gpu/gpu.c
@@ -3,7 +3,7 @@
 #include "../kernel/kernel.h"
 
 #if PlatformIsWindows
-# include "gpu_core_dx12.c"
+# include "gpu_dx12.c"
 #else
-# error Gp core not implemented for this platform
+# error Gpu layer not implemented for this platform
 #endif
diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h
index 4f433130..8d248362 100644
--- a/src/gpu/gpu.h
+++ b/src/gpu/gpu.h
@@ -12,4 +12,8 @@
 
 #include "gpu_core.h"
 
+#if PlatformIsWindows
+# include "gpu_dx12.h"
+#endif
+
 #endif
diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h
index cd5b8cb0..24da4936 100644
--- a/src/gpu/gpu_core.h
+++ b/src/gpu/gpu_core.h
@@ -110,7 +110,7 @@ Struct(GPU_MemoryInfo)
 ////////////////////////////////
 //~ Startup
 
-void gp_startup(void);
+void GPU_Startup(void);
 
 ////////////////////////////////
 //~ Resource operations
@@ -120,45 +120,42 @@ void gp_startup(void);
  * the caller to make sure the released resources aren't then referenced in
  * any runs
  */
-void gp_resource_release(GPU_Resource *resource);
+void GPU_ReleaseResource(GPU_Resource *resource);
 
 ////////////////////////////////
 //~ Texture operations
 
-GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data);
+GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data);
 
-Vec2I32 gp_texture_get_size(GPU_Resource *texture);
+Vec2I32 GPU_GetTextureSize(GPU_Resource *texture);
 
 ////////////////////////////////
 //~ Render operations
 
-GPU_RenderSig *gp_render_sig_alloc(void);
+GPU_RenderSig *GPU_AllocRenderSig(void);
 
 /* Returns a cmd id internal to the sig */
-u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *desc);
+u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *desc);
 
-GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams render_params);
+GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams render_params);
 
 ////////////////////////////////
 //~ Memory query
 
-GPU_MemoryInfo gp_query_memory_info(void);
+GPU_MemoryInfo GPU_QueryMemoryInfo(void);
 
 ////////////////////////////////
 //~ Swapchain
 
-GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution);
+GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution);
 
-void gp_swapchain_release(GPU_Swapchain *gp_swapchain);
+void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain);
 
 /* Waits until a new backbuffer is ready to be written to.
  * This should be called before rendering for minimum latency. */
-void gp_swapchain_wait(GPU_Swapchain *gp_swapchain);
-
-////////////////////////////////
-//~ Present
+void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain);
 
 /* 1. Clears the backbuffer and ensures it's at size `backbuffer_resolution`
  * 2. Blits `texture` to the backbuffer using `texture_xf`
  * 3. Presents the backbuffer */
-void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync);
+void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync);
diff --git a/src/gpu/gpu_core_dx12.c b/src/gpu/gpu_dx12.c
similarity index 73%
rename from src/gpu/gpu_core_dx12.c
rename to src/gpu/gpu_dx12.c
index 062411d0..40fe3632 100644
--- a/src/gpu/gpu_core_dx12.c
+++ b/src/gpu/gpu_dx12.c
@@ -1,16 +1,7 @@
-////////////////////////////////
-//~ Windows headers
+GPU_D12_SharedState GPU_D12_shared_state = ZI;
 
-#pragma warning(push, 0)
-# define UNICODE
-# define COBJMACROS
-# include <Windows.h>
-# include <d3d12.h>
-# include <dxgidebug.h>
-# include <dxgi1_6.h>
-# include <combaseapi.h>
-# include <d3dcompiler.h>
-#pragma warning(pop)
+////////////////////////////////
+//~ Windows libs
 
 #pragma comment(lib, "d3d12")
 #pragma comment(lib, "dxgi")
@@ -23,402 +14,48 @@
 # pragma comment(lib, "advapi32")
 #endif
 
-////////////////////////////////
-//~ Dx12
-
-#define DX12_ALLOW_TEARING 1
-#define DX12_WAIT_FRAME_LATENCY 1
-#define DX12_SWAPCHAIN_FLAGS            (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
-#define DX12_SWAPCHAIN_BUFFER_COUNT     (4)
-
-/* Arbitrary limits */
-#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS    (1024 * 64)
-#define DX12_NUM_RTV_DESCRIPTORS            (1024 * 1)
-#define DX12_COMMAND_BUFFER_MIN_SIZE        (1024 * 64)
-
-#define DX12_MULTI_QUEUE !ProfilingIsEnabled
-#if DX12_MULTI_QUEUE
-# define DX12_QUEUE_DIRECT 0
-# define DX12_QUEUE_COMPUTE 1
-# define DX12_QUEUE_COPY 2
-# define DX12_QUEUE_COPY_BACKGROUND 3
-# define DX12_NUM_QUEUES 4
-#else
-# define DX12_QUEUE_DIRECT 0
-# define DX12_QUEUE_COMPUTE 0
-# define DX12_QUEUE_COPY 0
-# define DX12_QUEUE_COPY_BACKGROUND 0
-# define DX12_NUM_QUEUES 1
-#endif
-
-#if RtcIsEnabled
-//# define DX12_DEBUG 1
-# define DX12_DEBUG 0
-#else
-# define DX12_DEBUG 0
-#endif
-
-/* ========================== *
- * internal structs
- * ========================== */
-
-struct shader_desc {
-    String file;
-    String func;
-};
-
-struct pipeline_rtv_desc {
-    DXGI_FORMAT format;
-    b32 blending;
-};
-
-struct pipeline_desc {
-    String name;
-
-    /* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */
-    String vs_dxc;
-    String ps_dxc;
-    String cs_dxc;
-
-    struct pipeline_rtv_desc rtvs[8];
-};
-
-struct pipeline {
-    String name;
-    u64 hash;
-    b32 success;
-    b32 is_gfx;
-    String error;
-    i64 compilation_time_ns;
-
-    /* Lock global pipelines mutex when accessing */
-    i64 refcount;
-
-    ID3D12PipelineState *pso;
-    ID3D12RootSignature *rootsig;
-    struct pipeline_desc desc;
-
-    struct pipeline *next;
-};
-
-struct pipeline_error {
-    String msg;
-    struct pipeline_error *next;
-};
-
-struct pipeline_include {
-    String name;
-    u64 name_hash;
-    struct pipeline_include *next;
-};
-
-struct pipeline_scope {
-    Arena *arena;
-    Dict *refs;
-    struct pipeline_scope *next_free;
-};
-
-struct command_queue_desc {
-    enum D3D12_COMMAND_LIST_TYPE type;
-    enum D3D12_COMMAND_QUEUE_PRIORITY priority;
-    String dbg_name;
-};
-
-struct command_queue {
-    struct command_queue_desc desc;
-    ID3D12CommandQueue *cq;
-    Arena *arena;
-
-    P_Mutex submit_fence_mutex;
-    u64 submit_fence_target;
-    ID3D12Fence *submit_fence;
-
-    struct command_list_pool *cl_pool;
-
-#if ProfilingGpu
-    __prof_dx12_ctx(prof);
-#endif
-};
-
-struct command_list_pool {
-    struct command_queue *cq;
-    Arena *arena;
-    P_Mutex mutex;
-    struct command_list *first_submitted_command_list;
-    struct command_list *last_submitted_command_list;
-};
-
-struct command_list {
-    struct command_queue *cq;
-    struct command_list_pool *pool;
-    struct ID3D12CommandAllocator *ca;
-    struct ID3D12GraphicsCommandList *cl;
-    P_Lock global_record_lock;
-
-    struct pipeline *cur_pipeline;
-
-    struct command_descriptor_heap *first_command_descriptor_heap;
-    struct command_buffer *first_command_buffer;
-
-    u64 submitted_fence_target;
-    struct command_list *prev_submitted;
-    struct command_list *next_submitted;
-};
-
-struct command_descriptor_heap {
-    D3D12_DESCRIPTOR_HEAP_TYPE type;
-    ID3D12DescriptorHeap *heap;
-    D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle;
-    D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle;
-
-    struct command_descriptor_heap *next_in_command_list;
-
-    u64 submitted_fence_target;
-    struct command_queue *submitted_cq;
-    struct command_descriptor_heap *prev_submitted;
-    struct command_descriptor_heap *next_submitted;
-};
-
-struct command_buffer {
-    struct command_buffer_group *group;
-
-    u64 size;
-    struct dx12_resource *resource;
-    D3D12_VERTEX_BUFFER_VIEW vbv;
-    D3D12_INDEX_BUFFER_VIEW Ibv;
-
-    struct command_buffer *next_in_command_list;
-
-    u64 submitted_fence_target;
-    struct command_queue *submitted_cq;
-    struct command_buffer *prev_submitted;
-    struct command_buffer *next_submitted;
-};
-
-struct command_buffer_group {
-    struct command_buffer *first_submitted;
-    struct command_buffer *last_submitted;
-};
-
-struct descriptor {
-    struct cpu_descriptor_heap *heap;
-
-    u32 index;
-    D3D12_CPU_DESCRIPTOR_HANDLE handle;
-
-    struct descriptor *next_free;
-};
-
-struct dx12_resource {
-    enum D3D12_RESOURCE_STATES state;
-    ID3D12Resource *resource;
-    struct descriptor *cbv_descriptor;
-    struct descriptor *srv_descriptor;
-    struct descriptor *uav_descriptor;
-    struct descriptor *rtv_descriptor;
-
-    D3D12_GPU_VIRTUAL_ADDRESS gpu_address;  /* NOTE: 0 for textures */
-
-    Vec2I32 texture_size;
-    struct dx12_resource *next_free;
-};
-
-struct swapchain_buffer {
-    struct swapchain *swapchain;
-    ID3D12Resource *resource;
-    struct descriptor *rtv_descriptor;
-    D3D12_RESOURCE_STATES state;
-};
-
-struct swapchain {
-    IDXGISwapChain3 *swapchain;
-    HWND hwnd;
-    HANDLE waitable;
-    Vec2I32 resolution;
-    struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT];
-
-    struct swapchain *next_free;
-};
-
-struct cpu_descriptor_heap {
-    enum D3D12_DESCRIPTOR_HEAP_TYPE type;
-    Arena *arena;
-    P_Mutex mutex;
-
-    u32 descriptor_size;
-    u32 num_descriptors_reserved;
-    u32 num_descriptors_capacity;
-
-    struct descriptor *first_free_descriptor;
-
-    ID3D12DescriptorHeap *heap;
-    struct D3D12_CPU_DESCRIPTOR_HANDLE handle;
-};
-
-enum fenced_release_kind {
-    FENCED_RELEASE_KIND_NONE,
-    FENCED_RELEASE_KIND_RESOURCE,
-    FENCED_RELEASE_KIND_PIPELINE
-};
-
-struct fenced_release_data {
-    enum fenced_release_kind kind;
-    void *ptr;
-};
-
-/* ========================== *
- * internal procs
- * ========================== */
-
-internal P_ExitFuncDef(gp_shutdown);
-
-internal void dx12_init_device(void);
-
-internal void dx12_init_objects(void);
-
-internal void dx12_init_pipelines(void);
-
-internal void dx12_init_noise(void);
-
-internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
-
-internal void command_queue_release(struct command_queue *cq);
-
-internal P_JobDef(dx12_evictor_job, _);
-
-internal void fenced_release(void *data, enum fenced_release_kind kind);
-
-internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state);
-
-internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh);
-
-struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; };
-internal P_JobDef(command_queue_alloc_job, job);
-
-struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; };
-internal P_JobDef(pipeline_alloc_job, job);
-
-struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; };
-internal P_JobDef(dx12_upload_job, job);
-
-#if RESOURCE_RELOADING
-internal W_CallbackFuncDef(pipeline_watch_callback, name);
-#endif
-
-/* ========================== *
- * Global state
- * ========================== */
-
-Global struct {
-    Atomic32 initialized;
-
-    /* Descriptor heaps pool */
-    P_Mutex command_descriptor_heaps_mutex;
-    Arena *command_descriptor_heaps_arena;
-    struct command_descriptor_heap *first_submitted_command_descriptor_heap;
-    struct command_descriptor_heap *last_submitted_command_descriptor_heap;
-
-    /* Command buffers pool */
-    P_Mutex command_buffers_mutex;
-    Arena *command_buffers_arena;
-    Dict *command_buffers_dict;
-
-    /* Resources pool */
-    P_Mutex resources_mutex;
-    Arena *resources_arena;
-    struct dx12_resource *first_free_resource;
-
-    /* Swapchains pool */
-    P_Mutex swapchains_mutex;
-    Arena *swapchains_arena;
-    struct swapchain *first_free_swapchain;
-
-    /* Shader bytecode archive */
-    TAR_Archive dxc_archive;
-
-    /* Pipeline cache */
-    P_Mutex pipelines_mutex;
-    Arena *pipelines_arena;
-    struct pipeline *first_free_pipeline;
-    Dict *pipeline_descs;
-    Dict *top_pipelines;  /* Latest pipelines */
-    Dict *top_successful_pipelines;  /* Latest pipelines that successfully compiled */
-    struct pipeline_scope *first_free_pipeline_scope;
-
-    /* Fenced release queue */
-    P_Mutex fenced_releases_mutex;
-    Arena *fenced_releases_arena;
-    u64 fenced_release_targets[DX12_NUM_QUEUES];
-
-    /* Factory */
-    IDXGIFactory6 *factory;
-
-    /* Adapter */
-    IDXGIAdapter1 *adapter;
-
-    /* Device */
-    ID3D12Device *device;
-
-    /* Descriptor sizes */
-    u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
-    u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
-
-    /* Global descriptor heaps */
-    struct cpu_descriptor_heap *cbv_srv_uav_heap;
-    struct cpu_descriptor_heap *rtv_heap;
-
-    /* Command queues */
-    P_Mutex global_command_list_record_mutex;
-    P_Mutex global_submit_mutex;
-    struct command_queue *command_queues[DX12_NUM_QUEUES];
-
-    /* Evictor job */
-    P_Counter evictor_job_counter;
-    P_Cv evictor_wake_cv;
-    P_Mutex evictor_wake_mutex;
-    i64 evictor_wake_gen;
-    b32 evictor_shutdown;
-} G = ZI, DebugAlias(G, G_gp_dx12);
-
 /* ========================== *
  * Startup
  * ========================== */
 
-void gp_startup(void)
+void GPU_Startup(void)
 {
     __prof;
-    if (Atomic32FetchTestSet(&G.initialized, 0, 1) != 0) {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    if (Atomic32FetchTestSet(&g->initialized, 0, 1) != 0)
+    {
         P_Panic(Lit("GP layer already initialized"));
     }
 
     /* Initialize command descriptor heaps pool */
-    G.command_descriptor_heaps_arena = AllocArena(Gibi(64));
+    g->command_descriptor_heaps_arena = AllocArena(Gibi(64));
 
     /* Initialize command buffers pool */
-    G.command_buffers_arena = AllocArena(Gibi(64));
-    G.command_buffers_dict = InitDict(G.command_buffers_arena, 4096);
+    g->command_buffers_arena = AllocArena(Gibi(64));
+    g->command_buffers_dict = InitDict(g->command_buffers_arena, 4096);
 
     /* Initialize resources pool */
-    G.resources_arena = AllocArena(Gibi(64));
+    g->resources_arena = AllocArena(Gibi(64));
 
     /* Initialize swapchains pool */
-    G.swapchains_arena = AllocArena(Gibi(64));
+    g->swapchains_arena = AllocArena(Gibi(64));
 
     /* Initialize pipeline cache */
-    G.pipelines_arena = AllocArena(Gibi(64));
-    G.pipeline_descs = InitDict(G.pipelines_arena, 1024);
-    G.top_pipelines = InitDict(G.pipelines_arena, 1024);
-    G.top_successful_pipelines = InitDict(G.pipelines_arena, 1024);
+    g->pipelines_arena = AllocArena(Gibi(64));
+    g->pipeline_descs = InitDict(g->pipelines_arena, 1024);
+    g->top_pipelines = InitDict(g->pipelines_arena, 1024);
+    g->top_successful_pipelines = InitDict(g->pipelines_arena, 1024);
 
     /* Initialize fenced releases queue */
-    G.fenced_releases_arena = AllocArena(Gibi(64));
+    g->fenced_releases_arena = AllocArena(Gibi(64));
 
     /* Initialize embedded shader archive */
     String embedded_data = INC_GetDxcTar();
-    if (embedded_data.len <= 0) {
+    if (embedded_data.len <= 0)
+    {
         P_Panic(Lit("No embedded shaders found"));
     }
-    G.dxc_archive = TAR_ArchiveFromString(G.pipelines_arena, embedded_data, Lit(""));
+    g->dxc_archive = TAR_ArchiveFromString(g->pipelines_arena, embedded_data, Lit(""));
 
     /* Initialize dx12 */
     /* TODO: Parallelize phases */
@@ -434,38 +71,40 @@ void gp_startup(void)
     P_OnExit(gp_shutdown);
 
     /* Start evictor job */
-    P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &G.evictor_job_counter);
+    P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &g->evictor_job_counter);
 }
 
-internal P_ExitFuncDef(gp_shutdown)
+P_ExitFuncDef(gp_shutdown)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
 #if 0
     /* Release objects to make live object reporting less noisy */
-    //IDXGISwapChain3_Release(G.swapchain);
-    for (u32 i = 0; i < countof(G.command_queues); ++i) {
-        struct command_queue *cq = G.command_queues[i];
+    //IDXGISwapChain3_Release(g->swapchain);
+    for (u32 i = 0; i < countof(g->command_queues); ++i)
+    {
+        struct command_queue *cq = g->command_queues[i];
         cmomand_queue_release(cq);
     }
-    ID3D12Device_Release(G.device);
+    ID3D12Device_Release(g->device);
 #else
     (UNUSED)command_queue_release;
 #endif
 
     {
-        P_Lock lock = P_LockE(&G.evictor_wake_mutex);
-        G.evictor_shutdown = 1;
-        P_SignalCv(&G.evictor_wake_cv, I32Max);
+        P_Lock lock = P_LockE(&g->evictor_wake_mutex);
+        g->evictor_shutdown = 1;
+        P_SignalCv(&g->evictor_wake_cv, I32Max);
         P_Unlock(&lock);
     }
-    P_WaitOnCounter(&G.evictor_job_counter);
+    P_WaitOnCounter(&g->evictor_job_counter);
 }
 
 /* ========================== *
  * Dx12 device initialization
  * ========================== */
 
-internal void dx12_init_error(String error)
+void dx12_init_error(String error)
 {
     TempArena scratch = BeginScratchNoConflict();
     String msg = StringFormat(scratch.arena, Lit("Failed to initialize DirectX 12.\n\n%F"), FmtString(error));
@@ -473,9 +112,10 @@ internal void dx12_init_error(String error)
     EndScratch(scratch);
 }
 
-internal void dx12_init_device(void)
+void dx12_init_device(void)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     TempArena scratch = BeginScratchNoConflict();
     HRESULT hr = 0;
 
@@ -486,13 +126,15 @@ internal void dx12_init_device(void)
         __profn("Enable debug layer");
         ID3D12Debug *debug_controller0 = 0;
         hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             dx12_init_error(Lit("Failed to create ID3D12Debug0"));
         }
 
         ID3D12Debug1 *debug_controller1 = 0;
         hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             dx12_init_error(Lit("Failed to create ID3D12Debug1"));
         }
 
@@ -510,8 +152,9 @@ internal void dx12_init_device(void)
     /* Create factory */
     {
         __profn("Create factory");
-        hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory);
-        if (FAILED(hr)) {
+        hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory);
+        if (FAILED(hr))
+        {
             dx12_init_error(Lit("Failed to initialize DXGI factory"));
         }
     }
@@ -525,41 +168,49 @@ internal void dx12_init_device(void)
         String first_gpu_name = ZI;
         u32 adapter_index = 0;
         b32 skip = 0;  /* For debugging iGPU */
-        for (;;) {
+        for (;;)
+        {
             {
-                hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
+                hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
             }
-            if (SUCCEEDED(hr)) {
+            if (SUCCEEDED(hr))
+            {
                 DXGI_ADAPTER_DESC1 desc;
                 IDXGIAdapter1_GetDesc1(adapter, &desc);
-                if (first_gpu_name.len == 0) {
+                if (first_gpu_name.len == 0)
+                {
                     first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
                 }
                 {
                     hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
                 }
-                if (SUCCEEDED(hr) && !skip ) {
+                if (SUCCEEDED(hr) && !skip)
+                {
                     break;
                 }
-                skip  = 0;
+                skip = 0;
                 ID3D12Device_Release(device);
                 IDXGIAdapter1_Release(adapter);
                 adapter = 0;
                 device = 0;
                 ++adapter_index;
-            } else {
+            }
+            else
+            {
                 break;
             }
         }
-        if (!device) {
-            if (first_gpu_name.len > 0) {
+        if (!device)
+        {
+            if (first_gpu_name.len > 0)
+            {
                 String fmt = Lit("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.");
                 error = StringFormat(scratch.arena, fmt, FmtString(first_gpu_name));
             }
             dx12_init_error(error);
         }
-        G.adapter = adapter;
-        G.device = device;
+        g->adapter = adapter;
+        g->device = device;
     }
 
 #if DX12_DEBUG
@@ -567,8 +218,9 @@ internal void dx12_init_device(void)
     {
         __profn("Enable d3d12 debug break");
         ID3D12InfoQueue *info = 0;
-        hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info);
-        if (FAILED(hr)) {
+        hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info);
+        if (FAILED(hr))
+        {
             dx12_init_error(Lit("Failed to query ID3D12Device interface"));
         }
         ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
@@ -581,7 +233,8 @@ internal void dx12_init_device(void)
         __profn("Enable dxgi debug break");
         IDXGIInfoQueue *dxgi_info = 0;
         hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             dx12_init_error(Lit("Failed to get DXGI debug interface"));
         }
         IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
@@ -597,29 +250,38 @@ internal void dx12_init_device(void)
         b32 success = 1;
         HKEY key = 0;
         success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
-        if (success) {
+        if (success)
+        {
             DWORD value = ZI;
             DWORD dword_size = sizeof(DWORD);
             success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS;
             RegCloseKey(key);
-            if (success) {
+            if (success)
+            {
                 success = value != 0;
             }
         }
         P_LogInfoF("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)");
-        if (success) {
+        if (success)
+        {
             P_LogInfoF("Machine is in developer mode, calling ID3D12Device::SetStablePowerState");
-            hr = ID3D12Device_SetStablePowerState(G.device, 1);
-            if (SUCCEEDED(hr)) {
+            hr = ID3D12Device_SetStablePowerState(g->device, 1);
+            if (SUCCEEDED(hr))
+            {
                 P_LogInfoF("ID3D12Device::SetStablePowerState succeeded");
-            } else {
+            }
+            else
+            {
                 success = 0;
                 P_LogErrorF("ID3D12Device::SetStablePowerState failed");
             }
-        } else {
+        }
+        else
+        {
             P_LogWarningF("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState");
         }
-        if (!success) {
+        if (!success)
+        {
             P_LogWarningF("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable.");
         }
     }
@@ -632,23 +294,24 @@ internal void dx12_init_device(void)
  * Dx12 object initialization
  * ========================== */
 
-internal void dx12_init_objects(void)
+void dx12_init_objects(void)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
 
     /* Initialize desc sizes */
-    G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
-    G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
-    G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
-    G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
+    g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+    g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+    g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+    g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
 
     /* Initialize desc counts */
-    G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
-    G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
+    g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
+    g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
 
     /* Create global descriptor heaps */
-    G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
-    G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+    g->cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+    g->rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
 
     /* Create command queues */
     {
@@ -661,7 +324,7 @@ internal void dx12_init_objects(void)
         };
         struct command_queue_alloc_job_sig sig = ZI;
         sig.descs_in = params;
-        sig.cqs_out = G.command_queues;
+        sig.cqs_out = g->command_queues;
         {
             P_Counter counter = ZI;
             P_Run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
@@ -671,10 +334,11 @@ internal void dx12_init_objects(void)
         {
             /* Initialize serially for consistent order in profiler */
             __profn("Initialize command queue profiling contexts");
-            for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) {
-                struct command_queue *cq = G.command_queues[i];
+            for (i32 i = 0; i < DX12_NUM_QUEUES; ++i)
+            {
+                struct command_queue *cq = g->command_queues[i];
                 String dbg_name = params[i].dbg_name;
-                __prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len);
+                __prof_dx12_ctx_alloc(cq->prof, g->device, cq->cq, dbg_name.text, dbg_name.len);
                 (UNUSED)dbg_name;
             }
         }
@@ -686,67 +350,69 @@ internal void dx12_init_objects(void)
  * Dx12 pipeline initialization
  * ========================== */
 
-internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
+void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
 
-internal void dx12_init_pipelines(void)
+void dx12_init_pipelines(void)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     TempArena scratch = BeginScratchNoConflict();
 
     /* Register pipeline descs */
     {
         /* Material pipeline */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_material");
             desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
             desc->rtvs[0].blending = 1;
             desc->rtvs[1].format = DXGI_FORMAT_R16G16B16A16_FLOAT;
             desc->rtvs[1].blending = 1;
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
         /* Flood pipeline */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_flood");
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
         /* Shade pipeline */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_shade");
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
         /* Shape pipeline */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_shape");
             desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
             desc->rtvs[0].blending = 1;
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
         /* UI pipeline */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_ui");
             desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
             desc->rtvs[0].blending = 1;
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
         /* Blit pipeilne */
         {
-            struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
+            struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
             desc->name = Lit("kernel_blit");
             desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
             desc->rtvs[0].blending = 1;
-            SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
+            SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
         }
     }
 
     /* Compile pipelines */
     u32 num_pipelines = 0;
     struct pipeline_desc *descs = PushDry(scratch.arena, struct pipeline_desc);
-    for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) {
+    for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next)
+    {
         struct pipeline_desc *desc = (struct pipeline_desc *)entry->value;
         *PushStruct(scratch.arena, struct pipeline_desc) = *desc;
         ++num_pipelines;
@@ -761,15 +427,20 @@ internal void dx12_init_pipelines(void)
         P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
         P_WaitOnCounter(&counter);
     }
-    for (u32 i = 0; i < num_pipelines; ++i) {
+    for (u32 i = 0; i < num_pipelines; ++i)
+    {
         struct pipeline *pipeline = pipelines[i];
-        if (pipeline->success) {
+        if (pipeline->success)
+        {
             P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
-            if (pipeline->error.len) {
+            if (pipeline->error.len)
+            {
                 String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
                 P_LogWarning(msg);
             }
-        } else {
+        }
+        else
+        {
             String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
             String msg = StringFormat(scratch.arena, Lit("Error initializing pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
             P_LogError(msg);
@@ -785,8 +456,9 @@ internal void dx12_init_pipelines(void)
  * Noise texture initialization
  * ========================== */
 
-internal void dx12_init_noise(void)
+void dx12_init_noise(void)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     TempArena scratch = BeginScratchNoConflict();
 
     {
@@ -795,13 +467,15 @@ internal void dx12_init_noise(void)
         DXGI_FORMAT format = DXGI_FORMAT_R16_UINT;
         //u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
         u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
-        if (RES_ResourceExists(&noise_res)) {
+        if (RES_ResourceExists(&noise_res))
+        {
             String data = RES_GetResourceData(&noise_res);
-            if (data.len != expected_size) {
+            if (data.len != expected_size)
+            {
                 P_Panic(StringFormat(scratch.arena,
-                                        Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"),
-                                        FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH),
-                                        FmtUint(expected_size), FmtUint(data.len)));
+                                     Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"),
+                                     FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH),
+                                     FmtUint(expected_size), FmtUint(data.len)));
             }
             {
                 D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
@@ -824,8 +498,8 @@ internal void dx12_init_noise(void)
                 desc.SampleDesc.Quality = 0;
 
                 struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST);
-                r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
-                ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
+                r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
+                ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
 
                 /* Upload texture */
                 {
@@ -837,7 +511,9 @@ internal void dx12_init_noise(void)
                     P_WaitOnCounter(&counter);
                 }
             }
-        } else {
+        }
+        else
+        {
             P_Panic(StringFormat(scratch.arena, Lit("Noise resource \"%F\" not found"), FmtString(noise_res_name)));
         }
         RES_CloseResource(&noise_res);
@@ -852,27 +528,7 @@ internal void dx12_init_noise(void)
 
 #if RESOURCE_RELOADING
 
-struct shader_compile_desc {
-    String src;
-    String friendly_name;
-    String entry;
-    String target;
-};
-
-struct shader_compile_result {
-    i64 elapsed_ns;
-    String dxc;
-    String errors;
-    b32 success;
-};
-
-struct shader_compile_job_sig {
-    Arena *arena;
-    struct shader_compile_desc *descs;
-    struct shader_compile_result *results;
-};
-
-internal P_JobDef(shader_compile_job, job)
+P_JobDef(shader_compile_job, job)
 {
     __prof;
     struct shader_compile_job_sig *sig = job.sig;
@@ -898,10 +554,12 @@ internal P_JobDef(shader_compile_job, job)
             };
             u32 num_args = countof(shader_args) + dxc_args_array.count;
             String *args = PushStructs(scratch.arena, String, num_args);
-            for (u32 i = 0; i < countof(shader_args); ++i) {
+            for (u32 i = 0; i < countof(shader_args); ++i)
+            {
                 args[i] = shader_args[i];
             }
-            for (u32 i = 0; i < dxc_args_array.count; ++i) {
+            for (u32 i = 0; i < dxc_args_array.count; ++i)
+            {
                 args[i + countof(shader_args)] = dxc_args_array.strings[i];
             }
             dxc_result = DXC_Compile(arena, desc->src, num_args, args);
@@ -921,21 +579,25 @@ internal P_JobDef(shader_compile_job, job)
  * Pipeline
  * ========================== */
 
-internal P_JobDef(pipeline_alloc_job, job)
+P_JobDef(pipeline_alloc_job, job)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct pipeline_alloc_job_sig *sig = job.sig;
     struct pipeline_desc *desc = &sig->descs_in[job.id];
     struct pipeline **pipelines_out = sig->pipelines_out;
 
     struct pipeline *pipeline = 0;
     {
-        P_Lock lock = P_LockE(&G.pipelines_mutex);
-        if (G.first_free_pipeline) {
-            pipeline = G.first_free_pipeline;
-            G.first_free_pipeline = pipeline->next;
-        } else {
-            pipeline = PushStructNoZero(G.pipelines_arena, struct pipeline);
+        P_Lock lock = P_LockE(&g->pipelines_mutex);
+        if (g->first_free_pipeline)
+        {
+            pipeline = g->first_free_pipeline;
+            g->first_free_pipeline = pipeline->next;
+        }
+        else
+        {
+            pipeline = PushStructNoZero(g->pipelines_arena, struct pipeline);
         }
         P_Unlock(&lock);
     }
@@ -955,22 +617,26 @@ internal P_JobDef(pipeline_alloc_job, job)
 
         String error_str = ZI;
 
-        String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data;
-        String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data;
-        String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data;
-        if (success && vs_dxc.len > 0 && ps_dxc.len <= 0) {
+        String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data;
+        String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data;
+        String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data;
+        if (success && vs_dxc.len > 0 && ps_dxc.len <= 0)
+        {
             error_str = Lit("Pipeline has vertex shader without pixel shader");
             success = 0;
         }
-        if (success && vs_dxc.len <= 0 && ps_dxc.len > 0) {
+        if (success && vs_dxc.len <= 0 && ps_dxc.len > 0)
+        {
             error_str = Lit("Pipeline has pixel shader without vertex shader");
             success = 0;
         }
-        if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0)) {
+        if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0))
+        {
             error_str = Lit("Pipeline has a compute shader with a vertex/pixel shader");
             success = 0;
         }
-        if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0) {
+        if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0)
+        {
             error_str = Lit("Pipeline has no shaders");
             success = 0;
         }
@@ -978,29 +644,41 @@ internal P_JobDef(pipeline_alloc_job, job)
         ID3D10Blob *vs_blob = 0;
         ID3D10Blob *ps_blob = 0;
         ID3D10Blob *cs_blob = 0;
-        if (success && vs_dxc.len > 0) {
+        if (success && vs_dxc.len > 0)
+        {
             hr = D3DCreateBlob(vs_dxc.len, &vs_blob);
-            if (SUCCEEDED(hr)) {
+            if (SUCCEEDED(hr))
+            {
                 CopyBytes(ID3D10Blob_GetBufferPointer(vs_blob), vs_dxc.text, vs_dxc.len);
-            } else {
+            }
+            else
+            {
                 error_str = Lit("Failed to create vertex shader blob");
                 success = 0;
             }
         }
-        if (success && ps_dxc.len > 0) {
+        if (success && ps_dxc.len > 0)
+        {
             hr = D3DCreateBlob(ps_dxc.len, &ps_blob);
-            if (SUCCEEDED(hr)) {
+            if (SUCCEEDED(hr))
+            {
                 CopyBytes(ID3D10Blob_GetBufferPointer(ps_blob), ps_dxc.text, ps_dxc.len);
-            } else {
+            }
+            else
+            {
                 error_str = Lit("Failed to create pixel shader blob");
                 success = 0;
             }
         }
-        if (success && cs_dxc.len > 0) {
+        if (success && cs_dxc.len > 0)
+        {
             hr = D3DCreateBlob(cs_dxc.len, &cs_blob);
-            if (SUCCEEDED(hr)) {
+            if (SUCCEEDED(hr))
+            {
                 CopyBytes(ID3D10Blob_GetBufferPointer(cs_blob), cs_dxc.text, cs_dxc.len);
-            } else {
+            }
+            else
+            {
                 error_str = Lit("Failed to create compute shader blob");
                 success = 0;
             }
@@ -1011,22 +689,30 @@ internal P_JobDef(pipeline_alloc_job, job)
          * could reuse the shader blob), however we'd like to verify that the
          * root signature exists and matches between vs & ps shaders. */
         ID3D10Blob *rootsig_blob = 0;
-        if (success) {
+        if (success)
+        {
             __profn("Validate root signatures");
-            if (cs_dxc.len > 0) {
+            if (cs_dxc.len > 0)
+            {
                 u32 cs_rootsig_data_len = 0;
                 ID3D10Blob *cs_rootsig_blob = 0;
                 D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob);
-                if (cs_rootsig_blob) {
+                if (cs_rootsig_blob)
+                {
                     cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob);
                 }
-                if (cs_rootsig_data_len == 0) {
+                if (cs_rootsig_data_len == 0)
+                {
                     success = 0;
                     error_str = Lit("Compute shader is missing root signature");
-                } else {
+                }
+                else
+                {
                     rootsig_blob = cs_rootsig_blob;
                 }
-            } else {
+            }
+            else
+            {
                 char *vs_rootsig_data = 0;
                 char *ps_rootsig_data = 0;
                 u32 vs_rootsig_data_len = 0;
@@ -1035,27 +721,37 @@ internal P_JobDef(pipeline_alloc_job, job)
                 ID3D10Blob *ps_rootsig_blob = 0;
                 D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob);
                 D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob);
-                if (vs_rootsig_blob) {
+                if (vs_rootsig_blob)
+                {
                     vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob);
                     vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob);
                 }
-                if (ps_rootsig_blob) {
+                if (ps_rootsig_blob)
+                {
                     ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob);
                     ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob);
                 }
-                if (vs_rootsig_data_len == 0) {
+                if (vs_rootsig_data_len == 0)
+                {
                     success = 0;
                     error_str = Lit("Vertex shader is missing root signature");
-                } else if (ps_rootsig_data_len == 0) {
+                }
+                else if (ps_rootsig_data_len == 0)
+                {
                     success = 0;
                     error_str = Lit("Pixel shader is missing root signature");
-                } else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) {
+                }
+                else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len))
+                {
                     success = 0;
                     error_str = Lit("Root signature mismatch between vertex and pixel shader");
-                } else {
+                }
+                else
+                {
                     rootsig_blob = vs_rootsig_blob;
                 }
-                if (ps_rootsig_blob) {
+                if (ps_rootsig_blob)
+                {
                     ID3D10Blob_Release(ps_rootsig_blob);
                 }
             }
@@ -1063,10 +759,12 @@ internal P_JobDef(pipeline_alloc_job, job)
 
         /* Create root signature */
         ID3D12RootSignature *rootsig = 0;
-        if (success) {
+        if (success)
+        {
             __profn("Create root signature");
-            hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
-            if (FAILED(hr)) {
+            hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
+            if (FAILED(hr))
+            {
                 error_str = Lit("Failed to create root signature");
                 success = 0;
             }
@@ -1074,15 +772,19 @@ internal P_JobDef(pipeline_alloc_job, job)
 
         /* Create PSO */
         ID3D12PipelineState *pso = 0;
-        if (success) {
-            if (cs_dxc.len > 0) {
+        if (success)
+        {
+            if (cs_dxc.len > 0)
+            {
                 __profn("Create compute PSO");
                 D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 };
                 pso_desc.pRootSignature = rootsig;
                 pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs_blob);
                 pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs_blob);
-                hr = ID3D12Device_CreateComputePipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
-            } else {
+                hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
+            }
+            else
+            {
                 __profn("Create graphics PSO");
 
                 /* Default rasterizer state */
@@ -1108,9 +810,11 @@ internal P_JobDef(pipeline_alloc_job, job)
                     .AlphaToCoverageEnable = 0,
                     .IndependentBlendEnable = 1
                 };
-                for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
+                for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i)
+                {
                     StaticAssert(countof(blend_desc.RenderTarget) <= countof(desc->rtvs));
-                    if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) {
+                    if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN)
+                    {
                         b32 blending_enabled = desc->rtvs[i].blending;
                         blend_desc.RenderTarget[i].BlendEnable = blending_enabled;
                         blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA;
@@ -1120,7 +824,9 @@ internal P_JobDef(pipeline_alloc_job, job)
                         blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
                         blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
                         blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
-                    } else {
+                    }
+                    else
+                    {
                         break;
                     }
                 }
@@ -1144,27 +850,33 @@ internal P_JobDef(pipeline_alloc_job, job)
                 pso_desc.DepthStencilState = depth_stencil_desc;
                 pso_desc.InputLayout = input_layout_desc;
                 pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
-                for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
+                for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i)
+                {
                     StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc->rtvs));
                     DXGI_FORMAT format = desc->rtvs[i].format;
-                    if (format != DXGI_FORMAT_UNKNOWN) {
+                    if (format != DXGI_FORMAT_UNKNOWN)
+                    {
                         pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
-                    } else {
+                    }
+                    else
+                    {
                         break;
                     }
                 }
                 pso_desc.SampleDesc.Count = 1;
                 pso_desc.SampleDesc.Quality = 0;
-                hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
+                hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
             }
-            if (FAILED(hr)) {
+            if (FAILED(hr))
+            {
                 error_str = Lit("Failed to create pipeline state object");
                 success = 0;
             }
         }
 
         /* Parse errors */
-        if (!success && error_str.len <= 0) {
+        if (!success && error_str.len <= 0)
+        {
             error_str = Lit("Unknown error");
         }
 
@@ -1175,32 +887,38 @@ internal P_JobDef(pipeline_alloc_job, job)
         pipeline->is_gfx = cs_dxc.len == 0;
         pipeline->error = error_str;
 
-        if (rootsig_blob) {
+        if (rootsig_blob)
+        {
             ID3D10Blob_Release(rootsig_blob);
         }
-        if (vs_blob) {
+        if (vs_blob)
+        {
             ID3D10Blob_Release(vs_blob);
         }
-        if (ps_blob) {
+        if (ps_blob)
+        {
             ID3D10Blob_Release(ps_blob);
         }
-        if (cs_blob) {
+        if (cs_blob)
+        {
             ID3D10Blob_Release(cs_blob);
         }
     }
     EndScratch(scratch);
 }
 
-internal void pipeline_release_now(struct pipeline *pipeline)
+void pipeline_release_now(struct pipeline *pipeline)
 {
     __prof;
-    if (pipeline->pso) {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    if (pipeline->pso)
+    {
         ID3D12PipelineState_Release(pipeline->pso);
     }
-    P_Lock lock = P_LockE(&G.pipelines_mutex);
+    P_Lock lock = P_LockE(&g->pipelines_mutex);
     {
-        pipeline->next = G.first_free_pipeline;
-        G.first_free_pipeline = pipeline;
+        pipeline->next = g->first_free_pipeline;
+        g->first_free_pipeline = pipeline;
     }
     P_Unlock(&lock);
 }
@@ -1209,22 +927,27 @@ internal void pipeline_release_now(struct pipeline *pipeline)
  * Pipeline cache
  * ========================== */
 
-internal struct pipeline_scope *pipeline_scope_begin(void)
+struct pipeline_scope *pipeline_scope_begin(void)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct pipeline_scope *scope = 0;
     {
-        P_Lock lock = P_LockE(&G.pipelines_mutex);
-        if (G.first_free_pipeline_scope) {
-            scope = G.first_free_pipeline_scope;
-            G.first_free_pipeline_scope = scope->next_free;
+        P_Lock lock = P_LockE(&g->pipelines_mutex);
+        if (g->first_free_pipeline_scope)
+        {
+            scope = g->first_free_pipeline_scope;
+            g->first_free_pipeline_scope = scope->next_free;
         }
         P_Unlock(&lock);
     }
     Arena *arena = 0;
-    if (scope) {
+    if (scope)
+    {
         arena = scope->arena;
-    } else {
+    }
+    else
+    {
         arena = AllocArena(Mebi(64));
     }
     ResetArena(arena);
@@ -1234,43 +957,52 @@ internal struct pipeline_scope *pipeline_scope_begin(void)
     return scope;
 }
 
-internal void pipeline_scope_end(struct pipeline_scope *scope)
+void pipeline_scope_end(struct pipeline_scope *scope)
 {
     __prof;
-    P_Lock lock = P_LockE(&G.pipelines_mutex);
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    P_Lock lock = P_LockE(&g->pipelines_mutex);
     {
-        for (DictEntry *entry = scope->refs->first; entry; entry = entry->next) {
+        for (DictEntry *entry = scope->refs->first; entry; entry = entry->next)
+        {
             struct pipeline *pipeline = (struct pipeline *)entry->value;
-            if (--pipeline->refcount <= 0) {
+            if (--pipeline->refcount <= 0)
+            {
                 fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE);
             }
         }
-        scope->next_free = G.first_free_pipeline_scope;
-        G.first_free_pipeline_scope = scope;
+        scope->next_free = g->first_free_pipeline_scope;
+        g->first_free_pipeline_scope = scope;
     }
     P_Unlock(&lock);
 }
 
-internal Readonly struct pipeline g_nil_pipeline = ZI;
-internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name)
+Readonly struct pipeline g_nil_pipeline = ZI;
+struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct pipeline *result = &g_nil_pipeline;
     u64 hash = HashFnv64(Fnv64Basis, name);
 
     struct pipeline *tmp = (struct pipeline *)DictValueFromHash(scope->refs, hash);
-    if (tmp) {
+    if (tmp)
+    {
         result = tmp;
-    } else {
+    }
+    else
+    {
         {
-            P_Lock lock = P_LockE(&G.pipelines_mutex);
-            tmp = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash);
-            if (tmp) {
+            P_Lock lock = P_LockE(&g->pipelines_mutex);
+            tmp = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash);
+            if (tmp)
+            {
                 ++tmp->refcount;
             }
             P_Unlock(&lock);
         }
-        if (tmp) {
+        if (tmp)
+        {
             SetDictValue(scope->arena, scope->refs, hash, (u64)tmp);
             result = tmp;
         }
@@ -1279,30 +1011,35 @@ internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, Strin
     return result;
 }
 
-internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
+void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
 {
     __prof;
-    P_Lock lock = P_LockE(&G.pipelines_mutex);
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    P_Lock lock = P_LockE(&g->pipelines_mutex);
     {
-        for (u64 i = 0; i < num_pipelines; ++i) {
+        for (u64 i = 0; i < num_pipelines; ++i)
+        {
             struct pipeline *pipeline = pipelines[i];
             u64 hash = pipeline->hash;
             /* Insert into top dict */
             {
-                struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_pipelines, hash);
-                if (old_pipeline && --old_pipeline->refcount <= 0) {
+                struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_pipelines, hash);
+                if (old_pipeline && --old_pipeline->refcount <= 0)
+                {
                     fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
                 }
-                SetDictValue(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline);
+                SetDictValue(g->pipelines_arena, g->top_pipelines, hash, (u64)pipeline);
                 ++pipeline->refcount;
             }
             /* Insert into success dict */
-            if (pipeline->success) {
-                struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash);
-                if (old_pipeline && --old_pipeline->refcount <= 0) {
+            if (pipeline->success)
+            {
+                struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash);
+                if (old_pipeline && --old_pipeline->refcount <= 0)
+                {
                     fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
                 }
-                SetDictValue(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline);
+                SetDictValue(g->pipelines_arena, g->top_successful_pipelines, hash, (u64)pipeline);
                 ++pipeline->refcount;
             }
         }
@@ -1311,9 +1048,10 @@ internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
 }
 
 #if RESOURCE_RELOADING
-internal W_CallbackFuncDef(pipeline_watch_callback, name)
+W_CallbackFuncDef(pipeline_watch_callback, name)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     TempArena scratch = BeginScratchNoConflict();
 
     String rst_extension = Lit(".rst");
@@ -1330,7 +1068,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
     i32 num_shaders = 0;
     struct shader_compile_desc *shader_descs = 0;
     struct shader_compile_result *shader_results = 0;
-    if (is_rs || is_cs) {
+    if (is_rs || is_cs)
+    {
         P_LogDebugF("Change detected in shader source file \"%F\", recompiling...", FmtString(name));
         success = 1;
         P_File file = P_OpenFileReadWait(name);
@@ -1350,7 +1089,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
         {
             struct shader_compile_job_sig sig = ZI;
             sig.arena = scratch.arena;
-            if (is_rs) {
+            if (is_rs)
+            {
                 num_shaders = 2;
                 shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
                 shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
@@ -1364,7 +1104,9 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
                 sig.descs[1].friendly_name = friendly_name;
                 sig.descs[1].entry = Lit("ps");
                 sig.descs[1].target = Lit("ps_6_6");
-            } else if (is_cs) {
+            }
+            else if (is_cs)
+            {
                 num_shaders = 1;
                 shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
                 shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
@@ -1385,34 +1127,45 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
     }
 
 
-    for (i32 i = 0; i < num_shaders; ++i) {
+    for (i32 i = 0; i < num_shaders; ++i)
+    {
         struct shader_compile_desc *desc = &shader_descs[i];
         struct shader_compile_result *result = &shader_results[i];
-        if (result->success) {
+        if (result->success)
+        {
             P_LogSuccessF("Finished compiling shader \"%F:%F\" in %F seconds", FmtString(desc->friendly_name), FmtString(desc->entry), FmtFloat(SecondsFromNs(result->elapsed_ns)));
-            if (result->errors.len > 0) {
+            if (result->errors.len > 0)
+            {
                 String msg = result->errors;
                 P_LogWarning(msg);
             }
-        } else {
+        }
+        else
+        {
             String msg = result->errors;
             P_LogError(msg);
             success = 0;
         }
     }
 
-    if (success) {
+    if (success)
+    {
         /* Create pipeline descs */
         u32 num_pipelines = 0;
         struct pipeline_desc *pipeline_descs = PushDry(scratch.arena, struct pipeline_desc);
-        for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) {
+        for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next)
+        {
             struct pipeline_desc *pipeline_desc = (struct pipeline_desc *)entry->value;
             struct pipeline_desc new_pipeline_desc = *pipeline_desc;
-            if (EqString(pipeline_desc->name, pipeline_name)) {
-                if (is_rs) {
+            if (EqString(pipeline_desc->name, pipeline_name))
+            {
+                if (is_rs)
+                {
                     new_pipeline_desc.vs_dxc = shader_results[0].dxc;
                     new_pipeline_desc.ps_dxc = shader_results[1].dxc;
-                } else if (is_cs) {
+                }
+                else if (is_cs)
+                {
                     new_pipeline_desc.cs_dxc = shader_results[0].dxc;
                 }
                 *PushStructNoZero(scratch.arena, struct pipeline_desc) = new_pipeline_desc;
@@ -1421,7 +1174,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
         }
 
         /* Recompile dirty pipelines */
-        if (num_pipelines > 0) {
+        if (num_pipelines > 0)
+        {
             __profn("Compile dirty pipelines");
             struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines);
             {
@@ -1433,23 +1187,29 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
                 P_WaitOnCounter(&counter);
             }
             {
-                P_Lock lock = P_LockS(&G.pipelines_mutex);
-                for (u32 i = 0; i < num_pipelines; ++i) {
+                P_Lock lock = P_LockS(&g->pipelines_mutex);
+                for (u32 i = 0; i < num_pipelines; ++i)
+                {
                     struct pipeline *pipeline = pipelines[i];
-                    if (pipeline->success) {
+                    if (pipeline->success)
+                    {
                         P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
-                        if (pipeline->error.len > 0) {
+                        if (pipeline->error.len > 0)
+                        {
                             String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
                             P_LogWarning(msg);
                         }
-                    } else {
+                    }
+                    else
+                    {
                         {
                             String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
                             String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(error));
                             P_LogError(msg);
                         }
-                        struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, pipeline->hash);
-                        if (!old_pipeline) {
+                        struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, pipeline->hash);
+                        if (!old_pipeline)
+                        {
                             /* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */
                             String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
                             String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
@@ -1472,7 +1232,7 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name)
  * Descriptor
  * ========================== */
 
-internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
+struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
 {
     __prof;
     struct descriptor *d = 0;
@@ -1480,13 +1240,17 @@ internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
     D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
     {
         P_Lock lock = P_LockE(&dh->mutex);
-        if (dh->first_free_descriptor) {
+        if (dh->first_free_descriptor)
+        {
             d = dh->first_free_descriptor;
             dh->first_free_descriptor = d->next_free;
             handle = d->handle;
             index = d->index;
-        } else {
-            if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) {
+        }
+        else
+        {
+            if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity)
+            {
                 P_Panic(Lit("Max descriptors reached in heap"));
             }
             d = PushStructNoZero(dh->arena, struct descriptor);
@@ -1502,7 +1266,7 @@ internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
     return d;
 }
 
-internal void descriptor_release(struct descriptor *descriptor)
+void descriptor_release(struct descriptor *descriptor)
 {
     struct cpu_descriptor_heap *dh = descriptor->heap;
     P_Lock lock = P_LockE(&dh->mutex);
@@ -1517,9 +1281,10 @@ internal void descriptor_release(struct descriptor *descriptor)
  * CPU descriptor heap
  * ========================== */
 
-internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
+struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct cpu_descriptor_heap *dh = 0;
     {
         Arena *arena = AllocArena(Mebi(64));
@@ -1529,11 +1294,13 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI
 
     u32 num_descriptors = 0;
     u32 descriptor_size = 0;
-    if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) {
-        num_descriptors = G.desc_counts[type];
-        descriptor_size = G.desc_sizes[type];
+    if (type < (i32)countof(g->desc_counts) && type < (i32)countof(g->desc_sizes))
+    {
+        num_descriptors = g->desc_counts[type];
+        descriptor_size = g->desc_sizes[type];
     }
-    if (num_descriptors == 0 || descriptor_size == 0) {
+    if (num_descriptors == 0 || descriptor_size == 0)
+    {
         P_Panic(Lit("Unsupported CPU descriptor type"));
     }
     dh->num_descriptors_capacity = num_descriptors;
@@ -1542,8 +1309,9 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI
     D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
     desc.Type = type;
     desc.NumDescriptors = num_descriptors;
-    HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
-    if (FAILED(hr)) {
+    HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
+    if (FAILED(hr))
+    {
         P_Panic(Lit("Failed to create CPU descriptor heap"));
     }
     ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle);
@@ -1552,7 +1320,7 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI
 }
 
 #if 0
-internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
+void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
 {
     /* TODO */
     (UNUSED)dh;
@@ -1563,17 +1331,19 @@ internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
  * Fenced release
  * ========================== */
 
-internal void fenced_release(void *data, enum fenced_release_kind kind)
+void fenced_release(void *data, enum fenced_release_kind kind)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct fenced_release_data fr = ZI;
     fr.kind = kind;
     fr.ptr = data;
 
-    u64 fr_targets[countof(G.fenced_release_targets)] = ZI;
+    u64 fr_targets[countof(g->fenced_release_targets)] = ZI;
 
     /* Read current fence target values from command queues */
-    for (u32 i = 0; i < countof(G.command_queues); ++i) {
-        struct command_queue *cq = G.command_queues[i];
+    for (u32 i = 0; i < countof(g->command_queues); ++i)
+    {
+        struct command_queue *cq = g->command_queues[i];
         P_Lock lock = P_LockS(&cq->submit_fence_mutex);
         {
             fr_targets[i] = cq->submit_fence_target;
@@ -1583,20 +1353,20 @@ internal void fenced_release(void *data, enum fenced_release_kind kind)
 
     /* PushStruct data to release queue */
     {
-        P_Lock lock = P_LockE(&G.fenced_releases_mutex);
+        P_Lock lock = P_LockE(&g->fenced_releases_mutex);
         {
-            *PushStruct(G.fenced_releases_arena, struct fenced_release_data) = fr;
-            CopyBytes(G.fenced_release_targets, fr_targets, sizeof(fr_targets));
+            *PushStruct(g->fenced_releases_arena, struct fenced_release_data) = fr;
+            CopyBytes(g->fenced_release_targets, fr_targets, sizeof(fr_targets));
         }
         P_Unlock(&lock);
     }
 
     /* Wake evictor */
     {
-        P_Lock lock = P_LockE(&G.evictor_wake_mutex);
+        P_Lock lock = P_LockE(&g->evictor_wake_mutex);
         {
-            ++G.evictor_wake_gen;
-            P_SignalCv(&G.evictor_wake_cv, I32Max);
+            ++g->evictor_wake_gen;
+            P_SignalCv(&g->evictor_wake_cv, I32Max);
         }
         P_Unlock(&lock);
     }
@@ -1606,17 +1376,21 @@ internal void fenced_release(void *data, enum fenced_release_kind kind)
  * Resource
  * ========================== */
 
-internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state)
+struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct dx12_resource *r = 0;
     {
-        P_Lock lock = P_LockE(&G.resources_mutex);
-        if (G.first_free_resource) {
-            r = G.first_free_resource;
-            G.first_free_resource = r->next_free;
-        } else {
-            r = PushStructNoZero(G.resources_arena, struct dx12_resource);
+        P_Lock lock = P_LockE(&g->resources_mutex);
+        if (g->first_free_resource)
+        {
+            r = g->first_free_resource;
+            g->first_free_resource = r->next_free;
+        }
+        else
+        {
+            r = PushStructNoZero(g->resources_arena, struct dx12_resource);
         }
         P_Unlock(&lock);
     }
@@ -1624,37 +1398,44 @@ internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_pr
 
     D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } };
     D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
-    HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource);
-    if (FAILED(hr)) {
+    HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource);
+    if (FAILED(hr))
+    {
         /* TODO: Don't panic */
         P_Panic(Lit("Failed to create resource"));
     }
 
     r->state = initial_state;
 
-    if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
+    if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
+    {
         r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource);
     }
 
     return r;
 }
 
-internal void dx12_resource_release_now(struct dx12_resource *t)
+void dx12_resource_release_now(struct dx12_resource *t)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
 
     /* Release descriptors */
     /* TODO: Batch lock heaps */
-    if (t->cbv_descriptor) {
+    if (t->cbv_descriptor)
+    {
         descriptor_release(t->cbv_descriptor);
     }
-    if (t->srv_descriptor) {
+    if (t->srv_descriptor)
+    {
         descriptor_release(t->srv_descriptor);
     }
-    if (t->uav_descriptor) {
+    if (t->uav_descriptor)
+    {
         descriptor_release(t->uav_descriptor);
     }
-    if (t->rtv_descriptor) {
+    if (t->rtv_descriptor)
+    {
         descriptor_release(t->rtv_descriptor);
     }
 
@@ -1662,13 +1443,13 @@ internal void dx12_resource_release_now(struct dx12_resource *t)
     ID3D12Resource_Release(t->resource);
 
     /* Add to free list */
-    P_Lock lock = P_LockE(&G.resources_mutex);
-    t->next_free = G.first_free_resource;
-    G.first_free_resource = t;
+    P_Lock lock = P_LockE(&g->resources_mutex);
+    t->next_free = g->first_free_resource;
+    g->first_free_resource = t;
     P_Unlock(&lock);
 }
 
-void gp_resource_release(GPU_Resource *resource)
+void GPU_ReleaseResource(GPU_Resource *resource)
 {
     struct dx12_resource *r = (struct dx12_resource *)resource;
     fenced_release(r, FENCED_RELEASE_KIND_RESOURCE);
@@ -1678,27 +1459,24 @@ void gp_resource_release(GPU_Resource *resource)
  * Resource barrier
  * ========================== */
 
-struct dx12_resource_barrier_desc {
-    enum D3D12_RESOURCE_BARRIER_TYPE type;
-    struct dx12_resource *resource;
-    enum D3D12_RESOURCE_STATES new_state;  /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */
-};
-
-internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs)
+void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs)
 {
     __prof;
     TempArena scratch = BeginScratchNoConflict();
 
     i32 num_rbs = 0;
     struct D3D12_RESOURCE_BARRIER *rbs = PushStructsNoZero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs);
-    for (i32 i = 0; i < num_descs; ++i) {
+    for (i32 i = 0; i < num_descs; ++i)
+    {
         struct dx12_resource_barrier_desc *desc = &descs[i];
         struct dx12_resource *resource = desc->resource;
         enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type;
-        if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) {
+        if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION)
+        {
             enum D3D12_RESOURCE_STATES old_state = resource->state;
             enum D3D12_RESOURCE_STATES new_state = desc->new_state;
-            if (new_state != old_state) {
+            if (new_state != old_state)
+            {
                 struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
                 ZeroStruct(rb);
                 rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
@@ -1709,19 +1487,24 @@ internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_desc
                 rb->Transition.StateAfter = new_state;
                 resource->state = new_state;
             }
-        } else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) {
+        }
+        else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV)
+        {
             struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
             ZeroStruct(rb);
             rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
             rb->Flags = 0;
             rb->UAV.pResource = resource->resource;
-        } else {
+        }
+        else
+        {
             /* Unknown barrier type */
             Assert(0);
         }
     }
 
-    if (num_rbs > 0) {
+    if (num_rbs > 0)
+    {
         ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs);
     }
 
@@ -1732,11 +1515,12 @@ internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_desc
  * Command queue
  * ========================== */
 
-internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
+struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
 
-internal P_JobDef(command_queue_alloc_job, job)
+P_JobDef(command_queue_alloc_job, job)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct command_queue_alloc_job_sig *sig = job.sig;
     struct command_queue_desc *desc = &sig->descs_in[job.id];
     {
@@ -1752,13 +1536,15 @@ internal P_JobDef(command_queue_alloc_job, job)
         dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
         dx12_desc.Type = desc->type;
         dx12_desc.Priority = desc->priority;
-        HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq);
-        if (FAILED(hr)) {
+        HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create command queue"));
         }
 
-        hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence);
-        if (FAILED(hr)) {
+        hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create command queue fence"));
         }
 
@@ -1768,7 +1554,7 @@ internal P_JobDef(command_queue_alloc_job, job)
     }
 }
 
-internal void command_queue_release(struct command_queue *cq)
+void command_queue_release(struct command_queue *cq)
 {
     __prof;
     /* TODO */
@@ -1780,7 +1566,7 @@ internal void command_queue_release(struct command_queue *cq)
  * Command list
  * ========================== */
 
-internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq)
+struct command_list_pool *command_list_pool_alloc(struct command_queue *cq)
 {
     struct command_list_pool *pool = 0;
     {
@@ -1792,9 +1578,10 @@ internal struct command_list_pool *command_list_pool_alloc(struct command_queue
     return pool;
 }
 
-internal struct command_list *command_list_open(struct command_list_pool *pool)
+struct command_list *command_list_open(struct command_list_pool *pool)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct command_queue *cq = pool->cq;
     u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence);
 
@@ -1804,29 +1591,40 @@ internal struct command_list *command_list_open(struct command_list_pool *pool)
     {
         P_Lock lock = P_LockE(&pool->mutex);
         /* Find first command list ready for reuse */
-        for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) {
-            if (completed_fence_value >= tmp->submitted_fence_target) {
+        for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted)
+        {
+            if (completed_fence_value >= tmp->submitted_fence_target)
+            {
                 cl = tmp;
                 break;
             }
         }
-        if (cl) {
+        if (cl)
+        {
             /* Remove from submitted list */
             old_cl = cl->cl;
             old_ca = cl->ca;
             struct command_list *prev = cl->prev_submitted;
             struct command_list *next = cl->next_submitted;
-            if (prev) {
+            if (prev)
+            {
                 prev->next_submitted = next;
-            } else {
+            }
+            else
+            {
                 pool->first_submitted_command_list = next;
             }
-            if (next) {
+            if (next)
+            {
                 next->prev_submitted = prev;
-            } else {
+            }
+            else
+            {
                 pool->last_submitted_command_list = prev;
             }
-        } else {
+        }
+        else
+        {
             cl = PushStructNoZero(pool->arena, struct command_list);
         }
         P_Unlock(&lock);
@@ -1834,37 +1632,45 @@ internal struct command_list *command_list_open(struct command_list_pool *pool)
     ZeroStruct(cl);
     cl->cq = cq;
     cl->pool = pool;
-    cl->global_record_lock = P_LockS(&G.global_command_list_record_mutex);
+    cl->global_record_lock = P_LockS(&g->global_command_list_record_mutex);
 
     HRESULT hr = 0;
-    if (old_cl) {
+    if (old_cl)
+    {
         cl->cl = old_cl;
         cl->ca = old_ca;
-    } else {
-        hr = ID3D12Device_CreateCommandAllocator(G.device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
-        if (FAILED(hr)) {
+    }
+    else
+    {
+        hr = ID3D12Device_CreateCommandAllocator(g->device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create command allocator"));
         }
 
-        hr = ID3D12Device_CreateCommandList(G.device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
-        if (FAILED(hr)) {
+        hr = ID3D12Device_CreateCommandList(g->device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create command list"));
         }
 
         hr = ID3D12GraphicsCommandList_Close(cl->cl);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to close command list during initialization"));
         }
     }
 
     /* Reset */
     hr = ID3D12CommandAllocator_Reset(cl->ca);
-    if (FAILED(hr)) {
+    if (FAILED(hr))
+    {
         P_Panic(Lit("Failed to reset command allocator"));
     }
 
     hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
-    if (FAILED(hr)) {
+    if (FAILED(hr))
+    {
         P_Panic(Lit("Failed to reset command list"));
     }
 
@@ -1872,9 +1678,10 @@ internal struct command_list *command_list_open(struct command_list_pool *pool)
 }
 
 /* TODO: Allow multiple command list submissions */
-internal u64 command_list_close(struct command_list *cl)
+u64 command_list_close(struct command_list *cl)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct command_queue *cq = cl->cq;
     struct command_list_pool *pool = cl->pool;
 
@@ -1882,7 +1689,8 @@ internal u64 command_list_close(struct command_list *cl)
     {
         __profn("Close DX12 command list");
         HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             /* TODO: Don't panic */
             P_Panic(Lit("Failed to close command list before execution"));
         }
@@ -1892,7 +1700,7 @@ internal u64 command_list_close(struct command_list *cl)
     u64 submit_fence_target = 0;
     {
         __profn("Execute");
-        P_Lock submit_lock = P_LockS(&G.global_submit_mutex);
+        P_Lock submit_lock = P_LockS(&g->global_submit_mutex);
         P_Lock fence_lock = P_LockE(&cq->submit_fence_mutex);
         {
             submit_fence_target = ++cq->submit_fence_target;
@@ -1905,30 +1713,38 @@ internal u64 command_list_close(struct command_list *cl)
 
     /* Add descriptor heaps to submitted list */
     {
-        P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex);
-        for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) {
+        P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex);
+        for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list)
+        {
             cdh->submitted_cq = cq;
             cdh->submitted_fence_target = submit_fence_target;
-            if (G.last_submitted_command_descriptor_heap) {
-                G.last_submitted_command_descriptor_heap->next_submitted = cdh;
-            } else {
-                G.first_submitted_command_descriptor_heap = cdh;
+            if (g->last_submitted_command_descriptor_heap)
+            {
+                g->last_submitted_command_descriptor_heap->next_submitted = cdh;
             }
-            G.last_submitted_command_descriptor_heap = cdh;
+            else
+            {
+                g->first_submitted_command_descriptor_heap = cdh;
+            }
+            g->last_submitted_command_descriptor_heap = cdh;
         }
         P_Unlock(&lock);
     }
 
     /* Add command buffers to submitted list */
     {
-        P_Lock lock = P_LockE(&G.command_buffers_mutex);
-        for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) {
+        P_Lock lock = P_LockE(&g->command_buffers_mutex);
+        for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list)
+        {
             struct command_buffer_group *group = cb->group;
             cb->submitted_cq = cq;
             cb->submitted_fence_target = submit_fence_target;
-            if (group->last_submitted) {
+            if (group->last_submitted)
+            {
                 group->last_submitted->next_submitted = cb;
-            } else {
+            }
+            else
+            {
                 group->first_submitted = cb;
             }
             group->last_submitted = cb;
@@ -1941,9 +1757,12 @@ internal u64 command_list_close(struct command_list *cl)
     cl->submitted_fence_target = submit_fence_target;
     {
         P_Lock lock = P_LockE(&pool->mutex);
-        if (pool->last_submitted_command_list) {
+        if (pool->last_submitted_command_list)
+        {
             pool->last_submitted_command_list->next_submitted = cl;
-        } else {
+        }
+        else
+        {
             pool->first_submitted_command_list = cl;
         }
         pool->last_submitted_command_list = cl;
@@ -1957,9 +1776,10 @@ internal u64 command_list_close(struct command_list *cl)
  * Command descriptor heap (GPU / shader visible descriptor heap)
  * ========================== */
 
-internal struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu)
+struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     Assert(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);  /* Src heap must have expected type */
 
     /* Allocate GPU heap */
@@ -1968,52 +1788,67 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc
     D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI;
     D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI;
     {
-        P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex);
+        P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex);
         /* Find first heap ready for reuse */
-        for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) {
+        for (struct command_descriptor_heap *tmp = g->first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted)
+        {
             /* TODO: Cache completed fence values */
             u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
-            if (completed_fence_value >= tmp->submitted_fence_target) {
+            if (completed_fence_value >= tmp->submitted_fence_target)
+            {
                 cdh = tmp;
                 break;
             }
         }
-        if (cdh) {
+        if (cdh)
+        {
             /* Remove from submitted list */
             old_heap = cdh->heap;
             old_start_cpu_handle = cdh->start_cpu_handle;
             old_start_gpu_handle = cdh->start_gpu_handle;
             struct command_descriptor_heap *prev = cdh->prev_submitted;
             struct command_descriptor_heap *next = cdh->next_submitted;
-            if (prev) {
+            if (prev)
+            {
                 prev->next_submitted = next;
-            } else {
-                G.first_submitted_command_descriptor_heap = next;
             }
-            if (next) {
+            else
+            {
+                g->first_submitted_command_descriptor_heap = next;
+            }
+            if (next)
+            {
                 next->prev_submitted = prev;
-            } else {
-                G.last_submitted_command_descriptor_heap = prev;
             }
-        } else {
+            else
+            {
+                g->last_submitted_command_descriptor_heap = prev;
+            }
+        }
+        else
+        {
             /* No available heap available for reuse, allocate new */
-            cdh = PushStructNoZero(G.command_descriptor_heaps_arena, struct command_descriptor_heap);
+            cdh = PushStructNoZero(g->command_descriptor_heaps_arena, struct command_descriptor_heap);
         }
         P_Unlock(&lock);
     }
     ZeroStruct(cdh);
 
-    if (old_heap) {
+    if (old_heap)
+    {
         cdh->heap = old_heap;
         cdh->start_cpu_handle = old_start_cpu_handle;
         cdh->start_gpu_handle = old_start_gpu_handle;
-    } else {
+    }
+    else
+    {
         D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
         desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
         desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
         desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
-        HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap);
-        if (FAILED(hr)) {
+        HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create GPU descriptor heap"));
         }
         ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle);
@@ -2023,7 +1858,7 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc
     /* CopyCPU heap */
     {
         P_Lock lock = P_LockS(&dh_cpu->mutex);
-        ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+        ID3D12Device_CopyDescriptorsSimple(g->device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
         P_Unlock(&lock);
     }
 
@@ -2038,16 +1873,17 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc
  * Command buffer
  * ========================== */
 
-internal u64 command_buffer_hash_from_size(u64 size)
+u64 command_buffer_hash_from_size(u64 size)
 {
     u64 hash = RandU64FromSeed(size);
     return hash;
 }
 
-internal u64 align_up_pow2(u64 v)
+u64 align_up_pow2(u64 v)
 {
     u64 result = 0;
-    if (v > 0) {
+    if (v > 0)
+    {
         result = v - 1;
         result |= result >> 1;
         result |= result >> 2;
@@ -2061,9 +1897,10 @@ internal u64 align_up_pow2(u64 v)
 }
 
 #define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1)
-internal struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride)
+struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
 
     /* Data length should be a multiple of stride */
     Assert(data_len % data_stride == 0);
@@ -2076,45 +1913,57 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
     struct command_buffer *cb = 0;
     struct dx12_resource *r = 0;
     {
-        P_Lock lock = P_LockE(&G.command_buffers_mutex);
+        P_Lock lock = P_LockE(&g->command_buffers_mutex);
 
         {
             u64 group_hash = command_buffer_hash_from_size(size);
-            DictEntry *cb_group_entry = EnsureDictEntry(G.command_buffers_arena, G.command_buffers_dict, group_hash);
+            DictEntry *cb_group_entry = EnsureDictEntry(g->command_buffers_arena, g->command_buffers_dict, group_hash);
             cb_group = (struct command_buffer_group *)cb_group_entry->value;
-            if (!cb_group) {
+            if (!cb_group)
+            {
                 /* Create group */
-                cb_group = PushStruct(G.command_buffers_arena, struct command_buffer_group);
+                cb_group = PushStruct(g->command_buffers_arena, struct command_buffer_group);
                 cb_group_entry->value = (u64)cb_group;
             }
         }
         /* Find first command buffer ready for reuse */
-        for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) {
+        for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted)
+        {
             /* TODO: Cache completed fence values */
             u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
-            if (completed_fence_value >= tmp->submitted_fence_target) {
+            if (completed_fence_value >= tmp->submitted_fence_target)
+            {
                 cb = tmp;
                 break;
             }
         }
-        if (cb) {
+        if (cb)
+        {
             /* Remove from submitted list */
             r = cb->resource;
             struct command_buffer *prev = cb->prev_submitted;
             struct command_buffer *next = cb->next_submitted;
-            if (prev) {
+            if (prev)
+            {
                 prev->next_submitted = next;
-            } else {
+            }
+            else
+            {
                 cb_group->first_submitted = next;
             }
-            if (next) {
+            if (next)
+            {
                 next->prev_submitted = prev;
-            } else {
+            }
+            else
+            {
                 cb_group->last_submitted = prev;
             }
-        } else {
+        }
+        else
+        {
             /* Allocate new */
-            cb = PushStructNoZero(G.command_buffers_arena, struct command_buffer);
+            cb = PushStructNoZero(g->command_buffers_arena, struct command_buffer);
         }
         P_Unlock(&lock);
     }
@@ -2123,7 +1972,8 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
     cb->size = data_len;
 
     /* Create upload heap */
-    if (!r) {
+    if (!r)
+    {
         D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
         heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
         heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
@@ -2145,7 +1995,7 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
         D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
 
         r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
-        r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
+        r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
     }
     cb->resource = r;
 
@@ -2158,7 +2008,7 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
         desc.Buffer.NumElements = MaxU32(data_len / data_stride, 1);
         desc.Buffer.StructureByteStride = data_stride;
         desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
-        ID3D12Device_CreateShaderResourceView(G.device, r->resource, &desc, r->srv_descriptor->handle);
+        ID3D12Device_CreateShaderResourceView(g->device, r->resource, &desc, r->srv_descriptor->handle);
     }
 
     /* Write data to resource */
@@ -2166,7 +2016,8 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
         D3D12_RANGE read_range = ZI;
         void *dst = 0;
         HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst);
-        if (FAILED(hr) || !dst) {
+        if (FAILED(hr) || !dst)
+        {
             /* TODO: Don't panic */
             P_Panic(Lit("Failed to map command buffer resource"));
         }
@@ -2185,18 +2036,14 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c
  * Wait job
  * ========================== */
 
-struct dx12_wait_fence_job_sig {
-    ID3D12Fence *fence;
-    u64 target;
-};
-
-internal P_JobDef(dx12_wait_fence_job, job)
+P_JobDef(dx12_wait_fence_job, job)
 {
     __prof;
     struct dx12_wait_fence_job_sig *sig = job.sig;
     ID3D12Fence *fence = sig->fence;
     u64 target = sig->target;
-    if (ID3D12Fence_GetCompletedValue(fence) < target) {
+    if (ID3D12Fence_GetCompletedValue(fence) < target)
+    {
         /* TODO: Pool events */
         HANDLE event = CreateEvent(0, 0, 0, 0);
         ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event);
@@ -2209,10 +2056,12 @@ internal P_JobDef(dx12_wait_fence_job, job)
  * Texture
  * ========================== */
 
-GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data)
+GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data)
 {
     __prof;
-    if (size.x <= 0 || size.y <= 0) {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    if (size.x <= 0 || size.y <= 0)
+    {
         P_Panic(Lit("Tried to create texture with dimension <= 0"));
     }
     LocalPersist const DXGI_FORMAT formats[] = {
@@ -2223,10 +2072,12 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size
     };
 
     DXGI_FORMAT dxgi_format = ZI;
-    if (format >= 0 && format < (i32)countof(formats)) {
+    if (format >= 0 && format < (i32)countof(formats))
+    {
         dxgi_format = formats[format];
     }
-    if (format == 0) {
+    if (format == 0)
+    {
         P_Panic(Lit("Tried to create texture with unknown format"));
     }
 
@@ -2252,18 +2103,20 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size
 
     struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
     r->texture_size = size;
-    r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
-    ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
-    if (flags & GP_TEXTURE_FLAG_TARGETABLE) {
+    r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
+    ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
+    if (flags & GP_TEXTURE_FLAG_TARGETABLE)
+    {
         desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
-        r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
-        r->rtv_descriptor = descriptor_alloc(G.rtv_heap);
-        ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle);
-        ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle);
+        r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
+        r->rtv_descriptor = descriptor_alloc(g->rtv_heap);
+        ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle);
+        ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle);
     }
 
     /* Upload texture */
-    if (initial_data) {
+    if (initial_data)
+    {
         /* TODO: Make wait optional */
         P_Counter counter = ZI;
         struct dx12_upload_job_sig sig = ZI;
@@ -2276,7 +2129,7 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size
     return (GPU_Resource *)r;
 }
 
-Vec2I32 gp_texture_get_size(GPU_Resource *resource)
+Vec2I32 GPU_GetTextureSize(GPU_Resource *resource)
 {
     struct dx12_resource *r = (struct dx12_resource *)resource;
     return r->texture_size;
@@ -2286,8 +2139,9 @@ Vec2I32 gp_texture_get_size(GPU_Resource *resource)
  * Upload
  * ========================== */
 
-internal P_JobDef(dx12_upload_job, job)
+P_JobDef(dx12_upload_job, job)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct dx12_upload_job_sig *sig = job.sig;
     struct dx12_resource *r = sig->resource;
     void *data = sig->data;
@@ -2302,7 +2156,7 @@ internal P_JobDef(dx12_upload_job, job)
         u64 upload_row_size = 0;
         u32 upload_num_rows = 0;
         D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
-        ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
+        ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
         D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
 
         /* Create upload heap */
@@ -2330,7 +2184,7 @@ internal P_JobDef(dx12_upload_job, job)
             upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state);
         }
 
-        struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND];
+        struct command_queue *cq = g->command_queues[DX12_QUEUE_COPY_BACKGROUND];
         struct command_list *cl = command_list_open(cq->cl_pool);
         {
             /* Copyto upload heap */
@@ -2338,7 +2192,8 @@ internal P_JobDef(dx12_upload_job, job)
                 D3D12_RANGE read_range = ZI;
                 void *mapped = 0;
                 HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped);
-                if (FAILED(hr) || !mapped) {
+                if (FAILED(hr) || !mapped)
+                {
                     /* TODO: Don't panic */
                     P_Panic(Lit("Failed to map texture upload resource"));
                 }
@@ -2347,9 +2202,11 @@ internal P_JobDef(dx12_upload_job, job)
 
                 u32 z_size = upload_row_size * upload_num_rows;
 
-                for (u32 z = 0; z < desc.DepthOrArraySize; ++z) {
+                for (u32 z = 0; z < desc.DepthOrArraySize; ++z)
+                {
                     u32 z_offset = z * z_size;
-                    for (u32 y = 0; y < upload_num_rows; ++y) {
+                    for (u32 y = 0; y < upload_num_rows; ++y)
+                    {
                         CopyBytes(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size);
                     }
                 }
@@ -2376,7 +2233,8 @@ internal P_JobDef(dx12_upload_job, job)
         }        u64 fence_target = command_list_close(cl);
 
         /* Wait on fence so we know it's safe to release upload heap */
-        if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) {
+        if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target)
+        {
             struct dx12_wait_fence_job_sig wait_sig = ZI;
             wait_sig.fence = cq->submit_fence;
             wait_sig.target = fence_target;
@@ -2394,36 +2252,43 @@ internal P_JobDef(dx12_upload_job, job)
  * Run utils
  * ========================== */
 
-internal void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline)
+void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline)
 {
     ID3D12GraphicsCommandList_SetPipelineState(cl->cl, pipeline->pso);
-    if (pipeline->is_gfx) {
+    if (pipeline->is_gfx)
+    {
         ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, pipeline->rootsig);
-    } else {
+    }
+    else
+    {
         ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, pipeline->rootsig);
     }
     cl->cur_pipeline = pipeline;
 }
 
-internal void command_list_set_sig(struct command_list *cl, void *src, u32 size)
+void command_list_set_sig(struct command_list *cl, void *src, u32 size)
 {
     __prof;
     Assert(size % 16 == 0);  /* Root constant structs must pad to 16 bytes */
     Assert(size <= 256);    /* Only 64 32-bit root constants allowed in signature */
     u32 num32bit = size / 4;
     b32 is_gfx = cl->cur_pipeline->is_gfx;
-    for (u32 i = 0; i < num32bit; ++i) {
+    for (u32 i = 0; i < num32bit; ++i)
+    {
         u32 val = 0;
         CopyBytes(&val, (((u32 *)src) + i), 4);
-        if (is_gfx) {
+        if (is_gfx)
+        {
             ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i);
-        } else {
+        }
+        else
+        {
             ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i);
         }
     }
 }
 
-internal struct D3D12_VIEWPORT viewport_from_rect(Rect r)
+struct D3D12_VIEWPORT viewport_from_rect(Rect r)
 {
     struct D3D12_VIEWPORT viewport = ZI;
     viewport.TopLeftX = r.x;
@@ -2435,7 +2300,7 @@ internal struct D3D12_VIEWPORT viewport_from_rect(Rect r)
     return viewport;
 }
 
-internal D3D12_RECT scissor_from_rect(Rect r)
+D3D12_RECT scissor_from_rect(Rect r)
 {
     D3D12_RECT scissor = ZI;
     scissor.left = r.x;
@@ -2445,7 +2310,7 @@ internal D3D12_RECT scissor_from_rect(Rect r)
     return scissor;
 }
 
-internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size)
+D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size)
 {
     D3D12_VERTEX_BUFFER_VIEW vbv = ZI;
     vbv.BufferLocation = cb->resource->gpu_address;
@@ -2454,7 +2319,7 @@ internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer
     return vbv;
 }
 
-internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format)
+D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format)
 {
     D3D12_INDEX_BUFFER_VIEW ibv = ZI;
     ibv.BufferLocation = cb->resource->gpu_address;
@@ -2463,9 +2328,10 @@ internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *
     return ibv;
 }
 
-internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state)
+struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
     heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
     heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
@@ -2486,12 +2352,12 @@ internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D
     desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
 
     struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
-    r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
-    r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
-    r->rtv_descriptor = descriptor_alloc(G.rtv_heap);
-    ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
-    ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle);
-    ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle);
+    r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
+    r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
+    r->rtv_descriptor = descriptor_alloc(g->rtv_heap);
+    ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
+    ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle);
+    ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle);
 
     r->texture_size = size;
     return r;
@@ -2505,10 +2371,11 @@ Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height)
     return MulMat4x4(projection, view4x4);
 }
 
-internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh)
+D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct D3D12_GPU_DESCRIPTOR_HANDLE result = ZI;
-    result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * G.desc_sizes[descriptor->heap->type];
+    result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * g->desc_sizes[descriptor->heap->type];
     return result;
 }
 
@@ -2516,66 +2383,7 @@ internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descripto
  * Render sig
  * ========================== */
 
-struct render_sig {
-    Arena *arena;
-    RandState rand;
-    u32 frame_index;
-
-    /* Material instances */
-    u32 num_material_instance_descs;
-    Arena *material_instance_descs_arena;
-
-    /* Ui instances */
-    u32 num_ui_rect_instance_descs;
-    Arena *ui_rect_instance_descs_arena;
-
-    /* UI shapes */
-    Arena *ui_shape_verts_arena;
-    Arena *ui_shape_indices_arena;
-
-    /* Grids */
-    u32 num_material_grid_descs;
-    Arena *material_grid_descs_arena;
-
-    /* Resources */
-    struct dx12_resource *albedo;
-    struct dx12_resource *emittance;
-    struct dx12_resource *emittance_flood_read;
-    struct dx12_resource *emittance_flood_target;
-    struct dx12_resource *shade_read;
-    struct dx12_resource *shade_target;
-    struct dx12_resource *ui_target;
-};
-
-struct material_instance_desc {
-    Xform xf;
-    u32 texture_id;
-    ClipRect clip;
-    u32 tint;
-    b32 is_light;
-    Vec3 light_emittance;
-    u32 grid_id;
-};
-
-struct ui_rect_instance_desc {
-    Xform xf;
-    u32 texture_id;
-    ClipRect clip;
-    u32 tint;
-};
-
-struct material_grid_desc {
-    f32 line_thickness;
-    f32 line_spacing;
-    Vec2 offset;
-    u32 bg0_color;
-    u32 bg1_color;
-    u32 line_color;
-    u32 x_color;
-    u32 y_color;
-};
-
-internal struct render_sig *render_sig_alloc(void)
+struct render_sig *render_sig_alloc(void)
 {
     __prof;
     struct render_sig *sig = 0;
@@ -2594,7 +2402,7 @@ internal struct render_sig *render_sig_alloc(void)
     return sig;
 }
 
-internal void render_sig_reset(struct render_sig *sig)
+void render_sig_reset(struct render_sig *sig)
 {
     __prof;
 
@@ -2615,19 +2423,21 @@ internal void render_sig_reset(struct render_sig *sig)
     ResetArena(sig->material_grid_descs_arena);
 }
 
-GPU_RenderSig *gp_render_sig_alloc(void)
+GPU_RenderSig *GPU_AllocRenderSig(void)
 {
     __prof;
     struct render_sig *sig = render_sig_alloc();
     return (GPU_RenderSig *)sig;
 }
 
-u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc)
+u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc)
 {
     u32 ret = 0;
     struct render_sig *sig = (struct render_sig *)render_sig;
-    if (sig) {
-        switch (cmd_desc->kind) {
+    if (sig)
+    {
+        switch (cmd_desc->kind)
+        {
             default: break;
 
             case GP_RENDER_CMD_KIND_DRAW_MATERIAL:
@@ -2660,13 +2470,15 @@ u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc)
                 u32 color = cmd_desc->ui_shape.color;
                 K_ShapeVert *verts = PushStructsNoZero(sig->ui_shape_verts_arena, K_ShapeVert, cmd_desc->ui_shape.vertices.count);
                 u32 *indices = PushStructsNoZero(sig->ui_shape_indices_arena, u32, cmd_desc->ui_shape.indices.count);
-                for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i) {
+                for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i)
+                {
                     K_ShapeVert *v = &verts[i];
                     v->pos = cmd_desc->ui_shape.vertices.points[i];
                     v->color_srgb = color;
                 }
                 u32 vert_offset = verts - (K_ShapeVert *)ArenaBase(sig->ui_shape_verts_arena);
-                for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i) {
+                for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i)
+                {
                     indices[i] = cmd_desc->ui_shape.indices.indices[i] + vert_offset;
                 }
             } break;
@@ -2693,9 +2505,10 @@ u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc)
  * Render
  * ========================== */
 
-GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams params)
+GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams params)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     TempArena scratch = BeginScratchNoConflict();
     struct render_sig *rsig = (struct render_sig *)gp_render_sig;
     ++rsig->frame_index;
@@ -2710,7 +2523,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
 
 
     /* Allocate render buffers */
-    if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size)) {
+    if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size))
+    {
         __profn("Release sig resources");
         fenced_release(rsig->albedo, FENCED_RELEASE_KIND_RESOURCE);
         fenced_release(rsig->emittance, FENCED_RELEASE_KIND_RESOURCE);
@@ -2720,7 +2534,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         fenced_release(rsig->shade_target, FENCED_RELEASE_KIND_RESOURCE);
         rsig->shade_target = 0;
     }
-    if (!rsig->shade_target) {
+    if (!rsig->shade_target)
+    {
         __profn("Allocate sig resources");
         rsig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
         rsig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
@@ -2731,11 +2546,13 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
     }
 
     /* Allocate ui buffers */
-    if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size)) {
+    if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size))
+    {
         fenced_release(rsig->ui_target, FENCED_RELEASE_KIND_RESOURCE);
         rsig->ui_target = 0;
     }
-    if (!rsig->ui_target) {
+    if (!rsig->ui_target)
+    {
         rsig->ui_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, ui_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
     }
 
@@ -2746,7 +2563,7 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
     struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
     struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_ui"));
     struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shape"));
-    struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
+    struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
     struct command_list *cl = command_list_open(cq->cl_pool);
     {
         __profn("Run render");
@@ -2778,7 +2595,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
             /* Process material instances */
             {
                 __profn("Process material instances");
-                for (u32 i = 0; i < rsig->num_material_instance_descs; ++i) {
+                for (u32 i = 0; i < rsig->num_material_instance_descs; ++i)
+                {
                     struct material_instance_desc *desc = &((struct material_instance_desc *)ArenaBase(rsig->material_instance_descs_arena))[i];
                     K_MaterialInstance *instance = &material_instances[i];
                     instance->tex_nurid = desc->texture_id;
@@ -2795,7 +2613,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
             /* Process ui rect instances */
             {
                 __profn("Process ui rect instances");
-                for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i) {
+                for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i)
+                {
                     struct ui_rect_instance_desc *desc = &((struct ui_rect_instance_desc *)ArenaBase(rsig->ui_rect_instance_descs_arena))[i];
                     K_UiInstance *instance = &ui_rect_instances[i];
                     instance->tex_nurid = desc->texture_id;
@@ -2809,7 +2628,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
             /* Process grids */
             {
                 __profn("Process grids");
-                for (u32 i = 0; i < rsig->num_material_grid_descs; ++i) {
+                for (u32 i = 0; i < rsig->num_material_grid_descs; ++i)
+                {
                     struct material_grid_desc *desc = &((struct material_grid_desc *)ArenaBase(rsig->material_grid_descs_arena))[i];
                     K_MaterialGrid *grid = &grids[i];
                     grid->line_thickness = desc->line_thickness;
@@ -2834,7 +2654,7 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         struct command_buffer *grid_buffer = command_list_push_buffer(cl, rsig->num_material_grid_descs, grids);
 
         /* Upload descriptor heap */
-        struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
+        struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap);
         ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
         ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
 
@@ -2864,7 +2684,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* Material pass */
-        if (material_pipeline->success) {
+        if (material_pipeline->success)
+        {
             __profn("Material pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "Material pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -2911,7 +2732,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* Flood pass */
-        if (flood_pipeline->success && !params.effects_disabled) {
+        if (flood_pipeline->success && !params.effects_disabled)
+        {
             __profn("Flood pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -2923,7 +2745,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
             /* TODO: Remove this */
             u64 max_steps = GetGstat(GSTAT_DEBUG_STEPS);
             u64 step = 0;
-            while (step_length != 0 && step < max_steps) {
+            while (step_length != 0 && step < max_steps)
+            {
                 __profn("Flood step");
                 __profnc_dx12(cl->cq->prof, cl->cl, "Flood step", Rgb32F(0.5, 0.2, 0.2));
 
@@ -2954,9 +2777,12 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
                 rsig->emittance_flood_target = swp;
 
                 /* Update step */
-                if (step_length == -1) {
+                if (step_length == -1)
+                {
                     step_length = MaxI32(render_size.x, render_size.y) / 2;
-                } else {
+                }
+                else
+                {
                     step_length /= 2;
                 }
                 ++step;
@@ -2991,7 +2817,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* Shade pass */
-        if (shade_pipeline->success) {
+        if (shade_pipeline->success)
+        {
             __profn("Shade pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -2999,7 +2826,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
             command_list_set_pipeline(cl, shade_pipeline);
 
             u32 shade_flags = K_SHADE_FLAG_NONE;
-            if (params.effects_disabled) {
+            if (params.effects_disabled)
+            {
                 shade_flags |= K_SHADE_FLAG_DISABLE_EFFECTS;
             }
 
@@ -3052,7 +2880,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* UI blit pass */
-        if (blit_pipeline->success) {
+        if (blit_pipeline->success)
+        {
             __profn("UI blit pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -3084,7 +2913,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* UI rect pass */
-        if (ui_pipeline->success) {
+        if (ui_pipeline->success)
+        {
             __profn("UI rect pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "UI rect pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -3114,7 +2944,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
         }
 
         /* UI shape pass */
-        if (shape_pipeline->success) {
+        if (shape_pipeline->success)
+        {
             __profn("UI shape pass");
             __profnc_dx12(cl->cq->prof, cl->cl, "UI shape pass", Rgb32F(0.5, 0.2, 0.2));
 
@@ -3156,28 +2987,33 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param
  * Memory info
  * ========================== */
 
-GPU_MemoryInfo gp_query_memory_info(void)
+GPU_MemoryInfo GPU_QueryMemoryInfo(void)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     GPU_MemoryInfo result = ZI;
 
     HRESULT hr = 0;
     IDXGIAdapter3 *dxgiAdapter3 = 0;
-    if (SUCCEEDED(hr)) {
-        hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
+    if (SUCCEEDED(hr))
+    {
+        hr = IDXGIAdapter_QueryInterface(g->adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
     }
-    if (SUCCEEDED(hr)) {
+    if (SUCCEEDED(hr))
+    {
         struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
         IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
         result.local_used = info.CurrentUsage;
         result.local_budget = info.Budget;
     }
-    if (SUCCEEDED(hr)) {
+    if (SUCCEEDED(hr))
+    {
         struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
         IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
         result.non_local_used = info.CurrentUsage;
         result.non_local_budget = info.Budget;
     }
-    if (dxgiAdapter3) {
+    if (dxgiAdapter3)
+    {
         IDXGIAdapter_Release(dxgiAdapter3);
     }
     return result;
@@ -3187,12 +3023,15 @@ GPU_MemoryInfo gp_query_memory_info(void)
  * Swapchain
  * ========================== */
 
-internal void swapchain_init_resources(struct swapchain *swapchain)
+void swapchain_init_resources(struct swapchain *swapchain)
 {
-    for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
+    for (u32 i = 0; i < countof(swapchain->buffers); ++i)
+    {
         ID3D12Resource *resource = 0;
         HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
-        if (FAILED(hr)) {
+        if (FAILED(hr))
+        {
             /* TODO: Don't panic */
             P_Panic(Lit("Failed to get swapchain buffer"));
         }
@@ -3200,26 +3039,30 @@ internal void swapchain_init_resources(struct swapchain *swapchain)
         ZeroStruct(sb);
         sb->swapchain = swapchain;
         sb->resource = resource;
-        sb->rtv_descriptor = descriptor_alloc(G.rtv_heap);
+        sb->rtv_descriptor = descriptor_alloc(g->rtv_heap);
         sb->state = D3D12_RESOURCE_STATE_COMMON;
-        ID3D12Device_CreateRenderTargetView(G.device, sb->resource, 0, sb->rtv_descriptor->handle);
+        ID3D12Device_CreateRenderTargetView(g->device, sb->resource, 0, sb->rtv_descriptor->handle);
     }
 }
 
-GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution)
+GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     HRESULT hr = 0;
     HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
-    struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
+    struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
 
     struct swapchain *swapchain = 0;
     {
-        P_Lock lock = P_LockE(&G.swapchains_mutex);
-        if (G.first_free_swapchain) {
-            swapchain = G.first_free_swapchain;
-            G.first_free_swapchain = swapchain->next_free;
-        } else {
-            swapchain = PushStruct(G.swapchains_arena, struct swapchain);
+        P_Lock lock = P_LockE(&g->swapchains_mutex);
+        if (g->first_free_swapchain)
+        {
+            swapchain = g->first_free_swapchain;
+            g->first_free_swapchain = swapchain->next_free;
+        }
+        else
+        {
+            swapchain = PushStruct(g->swapchains_arena, struct swapchain);
         }
         P_Unlock(&lock);
     }
@@ -3239,15 +3082,17 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution)
         desc.Flags = DX12_SWAPCHAIN_FLAGS;
         desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
         desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
-        hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1);
-        if (FAILED(hr)) {
+        hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1);
+        if (FAILED(hr))
+        {
             P_Panic(Lit("Failed to create IDXGISwapChain1"));
         }
     }
 
     /* Upgrade to swapchain3 */
     hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
-    if (FAILED(hr)) {
+    if (FAILED(hr))
+    {
         P_Panic(Lit("Failed to create IDXGISwapChain3"));
     }
 
@@ -3259,7 +3104,7 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution)
 #endif
 
     /* Disable Alt+Enter changing monitor resolution to match window size */
-    IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
+    IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
 
     IDXGISwapChain1_Release(swapchain1);
     swapchain->hwnd = hwnd;
@@ -3269,17 +3114,18 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution)
     return (GPU_Swapchain *)swapchain;
 }
 
-void gp_swapchain_release(GPU_Swapchain *gp_swapchain)
+void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain)
 {
     /* TODO */
     (UNUSED)gp_swapchain;
 }
 
-void gp_swapchain_wait(GPU_Swapchain *gp_swapchain)
+void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain)
 {
 #if DX12_WAIT_FRAME_LATENCY > 0
     struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
-    if (swapchain->waitable) {
+    if (swapchain->waitable)
+    {
         WaitForSingleObjectEx(swapchain->waitable, 1000, 1);
     }
 #else
@@ -3287,20 +3133,22 @@ void gp_swapchain_wait(GPU_Swapchain *gp_swapchain)
 #endif
 }
 
-internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution)
+struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     resolution.x = MaxI32(resolution.x, 1);
     resolution.y = MaxI32(resolution.y, 1);
     b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
-    if (should_rebuild) {
+    if (should_rebuild)
+    {
         HRESULT hr = 0;
-        struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
+        struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
         /* Lock direct queue submissions (in case any write to backbuffer) */
         /* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */
         P_Lock lock = P_LockE(&cq->submit_fence_mutex);
         //DEBUGBREAKABLE;
-        //P_Lock lock = P_LockE(&G.global_command_list_record_mutex);
+        //P_Lock lock = P_LockE(&g->global_command_list_record_mutex);
         {
             /* Flush direct queue */
             //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
@@ -3312,7 +3160,8 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain,
             }
 
             /* Release buffers */
-            for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
+            for (u32 i = 0; i < countof(swapchain->buffers); ++i)
+            {
                 struct swapchain_buffer *sb = &swapchain->buffers[i];
                 descriptor_release(sb->rtv_descriptor);
                 ID3D12Resource_Release(sb->resource);
@@ -3320,7 +3169,8 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain,
 
             /* Resize buffers */
             hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS);
-            if (FAILED(hr)) {
+            if (FAILED(hr))
+            {
                 /* TODO: Don't panic */
                 P_Panic(Lit("Failed to resize swapchain"));
             }
@@ -3340,13 +3190,15 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain,
  * Present
  * ========================== */
 
-internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf)
+void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf)
 {
     __prof;
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
     struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
-    if (blit_pipeline->success) {
-        struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
+    if (blit_pipeline->success)
+    {
+        struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
         struct command_list *cl = command_list_open(cq->cl_pool);
         {
             __profn("Present blit");
@@ -3361,7 +3213,7 @@ internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s
             struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices);
 
             /* Upload descriptor heap */
-            struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
+            struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap);
             ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
             ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
 
@@ -3439,7 +3291,7 @@ internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s
     pipeline_scope_end(pipeline_scope);
 }
 
-void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
+void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
 {
     __prof;
     struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
@@ -3450,7 +3302,8 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_
     present_blit(swapchain_buffer, texture_resource, texture_xf);
 
     u32 present_flags = 0;
-    if (vsync == 0) {
+    if (vsync == 0)
+    {
         present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING);
     }
 
@@ -3458,7 +3311,8 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_
     {
         __profn("Present");
         HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
-        if (!SUCCEEDED(hr)) {
+        if (!SUCCEEDED(hr))
+        {
             Assert(0);
         }
     }
@@ -3469,10 +3323,11 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_
 
         __profn("Mark queue frames");
         /* Lock because frame marks shouldn't occur while command lists are recording */
-        P_Lock lock = P_LockE(&G.global_command_list_record_mutex);
-        for (u32 i = 0; i < countof(G.command_queues); ++i) {
+        P_Lock lock = P_LockE(&g->global_command_list_record_mutex);
+        for (u32 i = 0; i < countof(g->command_queues); ++i)
+        {
             {
-                struct command_queue *cq = G.command_queues[i];
+                struct command_queue *cq = g->command_queues[i];
                 __prof_dx12_new_frame(cq->prof);
             }
         }
@@ -3480,8 +3335,9 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_
     }
     {
         __profn("Collect queues");
-        for (u32 i = 0; i < countof(G.command_queues); ++i) {
-            struct command_queue *cq = G.command_queues[i];
+        for (u32 i = 0; i < countof(g->command_queues); ++i)
+        {
+            struct command_queue *cq = g->command_queues[i];
             __prof_dx12_collect(cq->prof);
         }
     }
@@ -3489,15 +3345,17 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_
 }
 
 /* ========================== *
- * Evictor thread
+ * Evictor job
  * ========================== */
 
-internal P_JobDef(dx12_evictor_job, _)
+P_JobDef(dx12_evictor_job, _)
 {
+    GPU_D12_SharedState *g = &GPU_D12_shared_state;
     u64 completed_targets[DX12_NUM_QUEUES] = ZI;
 
     b32 shutdown = 0;
-    while (!shutdown) {
+    while (!shutdown)
+    {
         {
             __profn("Dx12 evictor run");
             TempArena scratch = BeginScratchNoConflict();
@@ -3508,23 +3366,26 @@ internal P_JobDef(dx12_evictor_job, _)
             struct fenced_release_data *fenced_releases = 0;
             {
                 __profn("Copyqueued releases");
-                P_Lock lock = P_LockE(&G.fenced_releases_mutex);
-                num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data);
+                P_Lock lock = P_LockE(&g->fenced_releases_mutex);
+                num_fenced_releases = g->fenced_releases_arena->pos / sizeof(struct fenced_release_data);
                 fenced_releases = PushStructsNoZero(scratch.arena, struct fenced_release_data, num_fenced_releases);
-                CopyBytes(fenced_releases, ArenaBase(G.fenced_releases_arena), G.fenced_releases_arena->pos);
-                ResetArena(G.fenced_releases_arena);
-                CopyBytes(targets, G.fenced_release_targets, sizeof(targets));
+                CopyBytes(fenced_releases, ArenaBase(g->fenced_releases_arena), g->fenced_releases_arena->pos);
+                ResetArena(g->fenced_releases_arena);
+                CopyBytes(targets, g->fenced_release_targets, sizeof(targets));
                 P_Unlock(&lock);
             }
 
             /* Wait until fences reach target */
             {
                 __profn("Check fences");
-                for (u32 i = 0; i < countof(targets); ++i) {
-                    while (completed_targets[i] < targets[i]) {
-                        struct command_queue *cq = G.command_queues[i];
+                for (u32 i = 0; i < countof(targets); ++i)
+                {
+                    while (completed_targets[i] < targets[i])
+                    {
+                        struct command_queue *cq = g->command_queues[i];
                         completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence);
-                        if (completed_targets[i] < targets[i]) {
+                        if (completed_targets[i] < targets[i])
+                        {
                             __profn("Wait on fence");
                             {
                                 struct dx12_wait_fence_job_sig sig = ZI;
@@ -3542,9 +3403,11 @@ internal P_JobDef(dx12_evictor_job, _)
             }
 
             /* Process releases */
-            for (u32 i = 0; i < num_fenced_releases; ++i) {
+            for (u32 i = 0; i < num_fenced_releases; ++i)
+            {
                 struct fenced_release_data *fr = &fenced_releases[i];
-                switch (fr->kind) {
+                switch (fr->kind)
+                {
                     default:
                     {
                         /* Unknown handle type */
@@ -3566,13 +3429,14 @@ internal P_JobDef(dx12_evictor_job, _)
             }
             EndScratch(scratch);
         }
-        P_Lock lock = P_LockE(&G.evictor_wake_mutex);
+        P_Lock lock = P_LockE(&g->evictor_wake_mutex);
         {
-            while (!G.evictor_shutdown && G.evictor_wake_gen == 0) {
-                P_WaitOnCv(&G.evictor_wake_cv, &lock);
+            while (!g->evictor_shutdown && g->evictor_wake_gen == 0)
+            {
+                P_WaitOnCv(&g->evictor_wake_cv, &lock);
             }
-            shutdown = G.evictor_shutdown;
-            G.evictor_wake_gen = 0;
+            shutdown = g->evictor_shutdown;
+            g->evictor_wake_gen = 0;
         }
         P_Unlock(&lock);
     }
diff --git a/src/gpu/gpu_dx12.h b/src/gpu/gpu_dx12.h
new file mode 100644
index 00000000..ce27c7cc
--- /dev/null
+++ b/src/gpu/gpu_dx12.h
@@ -0,0 +1,692 @@
+////////////////////////////////
+//~ D3D12 headers
+
+#pragma warning(push, 0)
+# define UNICODE
+# define COBJMACROS
+# include <Windows.h>
+# include <d3d12.h>
+# include <dxgidebug.h>
+# include <dxgi1_6.h>
+# include <combaseapi.h>
+# include <d3dcompiler.h>
+#pragma warning(pop)
+
+////////////////////////////////
+//~ Dx12
+
+#define DX12_ALLOW_TEARING 1
+#define DX12_WAIT_FRAME_LATENCY 1
+#define DX12_SWAPCHAIN_FLAGS            (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
+#define DX12_SWAPCHAIN_BUFFER_COUNT     (4)
+
+/* Arbitrary limits */
+#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS    (1024 * 64)
+#define DX12_NUM_RTV_DESCRIPTORS            (1024 * 1)
+#define DX12_COMMAND_BUFFER_MIN_SIZE        (1024 * 64)
+
+#define DX12_MULTI_QUEUE !ProfilingIsEnabled
+#if DX12_MULTI_QUEUE
+# define DX12_QUEUE_DIRECT 0
+# define DX12_QUEUE_COMPUTE 1
+# define DX12_QUEUE_COPY 2
+# define DX12_QUEUE_COPY_BACKGROUND 3
+# define DX12_NUM_QUEUES 4
+#else
+# define DX12_QUEUE_DIRECT 0
+# define DX12_QUEUE_COMPUTE 0
+# define DX12_QUEUE_COPY 0
+# define DX12_QUEUE_COPY_BACKGROUND 0
+# define DX12_NUM_QUEUES 1
+#endif
+
+#if RtcIsEnabled
+//# define DX12_DEBUG 1
+# define DX12_DEBUG 0
+#else
+# define DX12_DEBUG 0
+#endif
+
+/* ========================== *
+ * structs
+ * ========================== */
+
+struct shader_desc
+{
+    String file;
+    String func;
+};
+
+struct pipeline_rtv_desc
+{
+    DXGI_FORMAT format;
+    b32 blending;
+};
+
+struct pipeline_desc
+{
+    String name;
+
+    /* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */
+    String vs_dxc;
+    String ps_dxc;
+    String cs_dxc;
+
+    struct pipeline_rtv_desc rtvs[8];
+};
+
+struct pipeline
+{
+    String name;
+    u64 hash;
+    b32 success;
+    b32 is_gfx;
+    String error;
+    i64 compilation_time_ns;
+
+    /* Lock global pipelines mutex when accessing */
+    i64 refcount;
+
+    ID3D12PipelineState *pso;
+    ID3D12RootSignature *rootsig;
+    struct pipeline_desc desc;
+
+    struct pipeline *next;
+};
+
+struct pipeline_error
+{
+    String msg;
+    struct pipeline_error *next;
+};
+
+struct pipeline_include
+{
+    String name;
+    u64 name_hash;
+    struct pipeline_include *next;
+};
+
+struct pipeline_scope
+{
+    Arena *arena;
+    Dict *refs;
+    struct pipeline_scope *next_free;
+};
+
+struct command_queue_desc
+{
+    enum D3D12_COMMAND_LIST_TYPE type;
+    enum D3D12_COMMAND_QUEUE_PRIORITY priority;
+    String dbg_name;
+};
+
+struct command_queue
+{
+    struct command_queue_desc desc;
+    ID3D12CommandQueue *cq;
+    Arena *arena;
+
+    P_Mutex submit_fence_mutex;
+    u64 submit_fence_target;
+    ID3D12Fence *submit_fence;
+
+    struct command_list_pool *cl_pool;
+
+#if ProfilingGpu
+    __prof_dx12_ctx(prof);
+#endif
+};
+
+struct command_list_pool
+{
+    struct command_queue *cq;
+    Arena *arena;
+    P_Mutex mutex;
+    struct command_list *first_submitted_command_list;
+    struct command_list *last_submitted_command_list;
+};
+
+struct command_list
+{
+    struct command_queue *cq;
+    struct command_list_pool *pool;
+    struct ID3D12CommandAllocator *ca;
+    struct ID3D12GraphicsCommandList *cl;
+    P_Lock global_record_lock;
+
+    struct pipeline *cur_pipeline;
+
+    struct command_descriptor_heap *first_command_descriptor_heap;
+    struct command_buffer *first_command_buffer;
+
+    u64 submitted_fence_target;
+    struct command_list *prev_submitted;
+    struct command_list *next_submitted;
+};
+
+struct command_descriptor_heap
+{
+    D3D12_DESCRIPTOR_HEAP_TYPE type;
+    ID3D12DescriptorHeap *heap;
+    D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle;
+    D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle;
+
+    struct command_descriptor_heap *next_in_command_list;
+
+    u64 submitted_fence_target;
+    struct command_queue *submitted_cq;
+    struct command_descriptor_heap *prev_submitted;
+    struct command_descriptor_heap *next_submitted;
+};
+
+struct command_buffer
+{
+    struct command_buffer_group *group;
+
+    u64 size;
+    struct dx12_resource *resource;
+    D3D12_VERTEX_BUFFER_VIEW vbv;
+    D3D12_INDEX_BUFFER_VIEW Ibv;
+
+    struct command_buffer *next_in_command_list;
+
+    u64 submitted_fence_target;
+    struct command_queue *submitted_cq;
+    struct command_buffer *prev_submitted;
+    struct command_buffer *next_submitted;
+};
+
+struct command_buffer_group
+{
+    struct command_buffer *first_submitted;
+    struct command_buffer *last_submitted;
+};
+
+struct descriptor
+{
+    struct cpu_descriptor_heap *heap;
+
+    u32 index;
+    D3D12_CPU_DESCRIPTOR_HANDLE handle;
+
+    struct descriptor *next_free;
+};
+
+struct dx12_resource
+{
+    enum D3D12_RESOURCE_STATES state;
+    ID3D12Resource *resource;
+    struct descriptor *cbv_descriptor;
+    struct descriptor *srv_descriptor;
+    struct descriptor *uav_descriptor;
+    struct descriptor *rtv_descriptor;
+
+    D3D12_GPU_VIRTUAL_ADDRESS gpu_address;  /* NOTE: 0 for textures */
+
+    Vec2I32 texture_size;
+    struct dx12_resource *next_free;
+};
+
+struct swapchain_buffer
+{
+    struct swapchain *swapchain;
+    ID3D12Resource *resource;
+    struct descriptor *rtv_descriptor;
+    D3D12_RESOURCE_STATES state;
+};
+
+struct swapchain
+{
+    IDXGISwapChain3 *swapchain;
+    HWND hwnd;
+    HANDLE waitable;
+    Vec2I32 resolution;
+    struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT];
+
+    struct swapchain *next_free;
+};
+
+struct cpu_descriptor_heap
+{
+    enum D3D12_DESCRIPTOR_HEAP_TYPE type;
+    Arena *arena;
+    P_Mutex mutex;
+
+    u32 descriptor_size;
+    u32 num_descriptors_reserved;
+    u32 num_descriptors_capacity;
+
+    struct descriptor *first_free_descriptor;
+
+    ID3D12DescriptorHeap *heap;
+    struct D3D12_CPU_DESCRIPTOR_HANDLE handle;
+};
+
+enum fenced_release_kind
+{
+    FENCED_RELEASE_KIND_NONE,
+    FENCED_RELEASE_KIND_RESOURCE,
+    FENCED_RELEASE_KIND_PIPELINE
+};
+
+struct fenced_release_data
+{
+    enum fenced_release_kind kind;
+    void *ptr;
+};
+
+struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; };
+
+struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; };
+
+struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; };
+
+struct shader_compile_desc
+{
+    String src;
+    String friendly_name;
+    String entry;
+    String target;
+};
+
+struct shader_compile_result
+{
+    i64 elapsed_ns;
+    String dxc;
+    String errors;
+    b32 success;
+};
+
+struct shader_compile_job_sig
+{
+    Arena *arena;
+    struct shader_compile_desc *descs;
+    struct shader_compile_result *results;
+};
+
+
+struct render_sig
+{
+    Arena *arena;
+    RandState rand;
+    u32 frame_index;
+
+    /* Material instances */
+    u32 num_material_instance_descs;
+    Arena *material_instance_descs_arena;
+
+    /* Ui instances */
+    u32 num_ui_rect_instance_descs;
+    Arena *ui_rect_instance_descs_arena;
+
+    /* UI shapes */
+    Arena *ui_shape_verts_arena;
+    Arena *ui_shape_indices_arena;
+
+    /* Grids */
+    u32 num_material_grid_descs;
+    Arena *material_grid_descs_arena;
+
+    /* Resources */
+    struct dx12_resource *albedo;
+    struct dx12_resource *emittance;
+    struct dx12_resource *emittance_flood_read;
+    struct dx12_resource *emittance_flood_target;
+    struct dx12_resource *shade_read;
+    struct dx12_resource *shade_target;
+    struct dx12_resource *ui_target;
+};
+
+struct material_instance_desc
+{
+    Xform xf;
+    u32 texture_id;
+    ClipRect clip;
+    u32 tint;
+    b32 is_light;
+    Vec3 light_emittance;
+    u32 grid_id;
+};
+
+struct ui_rect_instance_desc
+{
+    Xform xf;
+    u32 texture_id;
+    ClipRect clip;
+    u32 tint;
+};
+
+struct material_grid_desc
+{
+    f32 line_thickness;
+    f32 line_spacing;
+    Vec2 offset;
+    u32 bg0_color;
+    u32 bg1_color;
+    u32 line_color;
+    u32 x_color;
+    u32 y_color;
+};
+
+/* ========================== *
+ * Global state
+ * ========================== */
+
+Struct(GPU_D12_SharedState)
+{
+    Atomic32 initialized;
+
+    /* Descriptor heaps pool */
+    P_Mutex command_descriptor_heaps_mutex;
+    Arena *command_descriptor_heaps_arena;
+    struct command_descriptor_heap *first_submitted_command_descriptor_heap;
+    struct command_descriptor_heap *last_submitted_command_descriptor_heap;
+
+    /* Command buffers pool */
+    P_Mutex command_buffers_mutex;
+    Arena *command_buffers_arena;
+    Dict *command_buffers_dict;
+
+    /* Resources pool */
+    P_Mutex resources_mutex;
+    Arena *resources_arena;
+    struct dx12_resource *first_free_resource;
+
+    /* Swapchains pool */
+    P_Mutex swapchains_mutex;
+    Arena *swapchains_arena;
+    struct swapchain *first_free_swapchain;
+
+    /* Shader bytecode archive */
+    TAR_Archive dxc_archive;
+
+    /* Pipeline cache */
+    P_Mutex pipelines_mutex;
+    Arena *pipelines_arena;
+    struct pipeline *first_free_pipeline;
+    Dict *pipeline_descs;
+    Dict *top_pipelines;  /* Latest pipelines */
+    Dict *top_successful_pipelines;  /* Latest pipelines that successfully compiled */
+    struct pipeline_scope *first_free_pipeline_scope;
+
+    /* Fenced release queue */
+    P_Mutex fenced_releases_mutex;
+    Arena *fenced_releases_arena;
+    u64 fenced_release_targets[DX12_NUM_QUEUES];
+
+    /* Factory */
+    IDXGIFactory6 *factory;
+
+    /* Adapter */
+    IDXGIAdapter1 *adapter;
+
+    /* Device */
+    ID3D12Device *device;
+
+    /* Descriptor sizes */
+    u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
+    u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
+
+    /* Global descriptor heaps */
+    struct cpu_descriptor_heap *cbv_srv_uav_heap;
+    struct cpu_descriptor_heap *rtv_heap;
+
+    /* Command queues */
+    P_Mutex global_command_list_record_mutex;
+    P_Mutex global_submit_mutex;
+    struct command_queue *command_queues[DX12_NUM_QUEUES];
+
+    /* Evictor job */
+    P_Counter evictor_job_counter;
+    P_Cv evictor_wake_cv;
+    P_Mutex evictor_wake_mutex;
+    i64 evictor_wake_gen;
+    b32 evictor_shutdown;
+};
+
+extern GPU_D12_SharedState GPU_D12_shared_state;
+
+/* ========================== *
+ * Startup
+ * ========================== */
+
+void GPU_Startup(void);
+
+P_ExitFuncDef(gp_shutdown);
+
+/* ========================== *
+ * Dx12 device initialization
+ * ========================== */
+
+void dx12_init_error(String error);
+
+void dx12_init_device(void);
+
+/* ========================== *
+ * Dx12 object initialization
+ * ========================== */
+
+void dx12_init_objects(void);
+
+/* ========================== *
+ * Dx12 pipeline initialization
+ * ========================== */
+
+void dx12_init_pipelines(void);
+
+/* ========================== *
+ * Noise texture initialization
+ * ========================== */
+
+void dx12_init_noise(void);
+
+/* ========================== *
+ * Shader compilation
+ * ========================== */
+
+P_JobDef(shader_compile_job, job);
+
+/* ========================== *
+ * Pipeline
+ * ========================== */
+
+    P_JobDef(pipeline_alloc_job, job);
+
+void pipeline_release_now(struct pipeline *pipeline);
+
+/* ========================== *
+ * Pipeline cache
+ * ========================== */
+
+struct pipeline_scope *pipeline_scope_begin(void);
+
+void pipeline_scope_end(struct pipeline_scope *scope);
+
+extern Readonly struct pipeline g_nil_pipeline;
+struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name);
+
+void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
+
+W_CallbackFuncDef(pipeline_watch_callback, name);
+
+/* ========================== *
+ * Descriptor
+ * ========================== */
+
+struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh);
+
+void descriptor_release(struct descriptor *descriptor);
+
+/* ========================== *
+ * CPU descriptor heap
+ * ========================== */
+
+struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
+
+/* ========================== *
+ * Fenced release
+ * ========================== */
+
+void fenced_release(void *data, enum fenced_release_kind kind);
+
+/* ========================== *
+ * Resource
+ * ========================== */
+
+struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state);
+
+void dx12_resource_release_now(struct dx12_resource *t);
+
+void GPU_ReleaseResource(GPU_Resource *resource);
+
+/* ========================== *
+ * Resource barrier
+ * ========================== */
+
+struct dx12_resource_barrier_desc
+{
+    enum D3D12_RESOURCE_BARRIER_TYPE type;
+    struct dx12_resource *resource;
+    enum D3D12_RESOURCE_STATES new_state;  /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */
+};
+
+void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs);
+
+/* ========================== *
+ * Command queue
+ * ========================== */
+
+P_JobDef(command_queue_alloc_job, job);
+
+void command_queue_release(struct command_queue *cq);
+
+/* ========================== *
+ * Command list
+ * ========================== */
+
+struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
+
+struct command_list *command_list_open(struct command_list_pool *pool);
+
+/* TODO: Allow multiple command list submissions */
+u64 command_list_close(struct command_list *cl);
+
+/* ========================== *
+ * Command descriptor heap (GPU / shader visible descriptor heap)
+ * ========================== */
+
+struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu);
+
+/* ========================== *
+ * Command buffer
+ * ========================== */
+
+u64 command_buffer_hash_from_size(u64 size);
+
+u64 align_up_pow2(u64 v);
+
+#define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1)
+struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride);
+
+/* ========================== *
+ * Wait job
+ * ========================== */
+
+struct dx12_wait_fence_job_sig
+{
+    ID3D12Fence *fence;
+    u64 target;
+};
+
+P_JobDef(dx12_wait_fence_job, job);
+
+/* ========================== *
+ * Texture
+ * ========================== */
+
+GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data);
+
+Vec2I32 GPU_GetTextureSize(GPU_Resource *resource);
+
+/* ========================== *
+ * Upload
+ * ========================== */
+
+P_JobDef(dx12_upload_job, job);
+
+/* ========================== *
+ * Run utils
+ * ========================== */
+
+void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline);
+
+
+void command_list_set_sig(struct command_list *cl, void *src, u32 size);
+
+struct D3D12_VIEWPORT viewport_from_rect(Rect r);
+
+D3D12_RECT scissor_from_rect(Rect r);
+
+D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size);
+
+D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format);
+
+struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state);
+
+/* Calculate the view projection matrix */
+Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height);
+
+D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh);
+
+/* ========================== *
+ * Render sig
+ * ========================== */
+
+struct render_sig *render_sig_alloc(void);
+
+void render_sig_reset(struct render_sig *sig);
+
+GPU_RenderSig *GPU_AllocRenderSig(void);
+
+u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc);
+
+/* ========================== *
+ * Render
+ * ========================== */
+
+GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams params);
+
+/* ========================== *
+ * Memory info
+ * ========================== */
+
+GPU_MemoryInfo GPU_QueryMemoryInfo(void);
+
+/* ========================== *
+ * Swapchain
+ * ========================== */
+
+void swapchain_init_resources(struct swapchain *swapchain);
+
+GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution);
+
+void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain);
+
+void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain);
+
+struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution);
+
+/* ========================== *
+ * Present
+ * ========================== */
+
+void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf);
+
+void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync);
+
+/* ========================== *
+ * Evictor job
+ * ========================== */
+
+P_JobDef(dx12_evictor_job, _);
diff --git a/src/sprite/sprite_core.c b/src/sprite/sprite_core.c
index 5a9af689..de5cff60 100644
--- a/src/sprite/sprite_core.c
+++ b/src/sprite/sprite_core.c
@@ -206,7 +206,7 @@ S_StartupReceipt sprite_startup(void)
             u32 width = 64;
             u32 height = 64;
             u32 *pixels = generate_purple_black_image(scratch.arena, width, height);
-            G.nil_texture->gp_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(width, height), pixels);
+            G.nil_texture->gp_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(width, height), pixels);
             EndScratch(scratch);
         }
 
@@ -355,7 +355,7 @@ internal void cache_entry_load_texture(struct cache_ref ref, S_Tag tag)
             e->texture->height = decoded.height;
             e->texture->valid = 1;
             e->texture->loaded = 1;
-            e->texture->gp_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB, 0, VEC2I32(decoded.width, decoded.height), decoded.pixels);
+            e->texture->gp_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB, 0, VEC2I32(decoded.width, decoded.height), decoded.pixels);
             /* TODO: Query gpu for more accurate texture size in VRAM */
             memory_size += (decoded.width * decoded.height) * sizeof(*decoded.pixels);
             success = 1;
@@ -1323,7 +1323,7 @@ internal P_JobDef(sprite_evictor_job, _)
                         for (struct evict_node *en = first_evicted; en; en = en->next_evicted) {
                             struct cache_entry *n = en->cache_entry;
                             if (n->kind == CACHE_ENTRY_KIND_TEXTURE && n->texture->valid) {
-                                gp_resource_release(n->texture->gp_texture);
+                                GPU_ReleaseResource(n->texture->gp_texture);
                             }
                             ReleaseArena(n->arena);
                         }
diff --git a/src/user/user_core.c b/src/user/user_core.c
index c533aeb4..fd2407bf 100644
--- a/src/user/user_core.c
+++ b/src/user/user_core.c
@@ -220,14 +220,14 @@ struct user_startup_receipt user_startup(F_StartupReceipt *font_sr,
     /* GPU handles */
     G.world_to_ui_xf = XformIdentity;
     G.world_to_render_xf = XformIdentity;
-    G.render_sig = gp_render_sig_alloc();
+    G.render_sig = GPU_AllocRenderSig();
 
     G.console_logs_arena = AllocArena(Gibi(64));
     //P_RegisterLogCallback(debug_console_log_callback, P_LogLevel_Success);
     P_RegisterLogCallback(debug_console_log_callback, P_LogLevel_Debug);
 
     G.window = P_AllocWindow();
-    G.swapchain = gp_swapchain_alloc(G.window, VEC2I32(100, 100));
+    G.swapchain = GPU_AllocSwapchain(G.window, VEC2I32(100, 100));
     P_ShowWindow(G.window);
 
     /* Start jobs  */
@@ -1917,7 +1917,7 @@ internal void user_update(P_Window *window)
      * Query vram
      * ========================== */
 
-    GPU_MemoryInfo vram = gp_query_memory_info();
+    GPU_MemoryInfo vram = GPU_QueryMemoryInfo();
 
     /* ========================== *
      * Draw global debug info
@@ -2063,11 +2063,11 @@ internal void user_update(P_Window *window)
             params.world_to_render_xf = G.world_to_render_xf;
             params.render_to_ui_xf = G.render_to_ui_xf;
             params.effects_disabled = effects_disabled;
-            render_texture = gp_run_render(G.render_sig, params);
+            render_texture = GPU_RunRender(G.render_sig, params);
         }
 
         /* Present */
-        gp_present(G.swapchain, backbuffer_resolution, render_texture, G.ui_to_screen_xf, VSYNC);
+        GPU_PresentSwapchain(G.swapchain, backbuffer_resolution, render_texture, G.ui_to_screen_xf, VSYNC);
     }
 
     /* ========================== *
@@ -2088,7 +2088,7 @@ internal P_JobDef(user_update_job, _)
             __profn("User sleep");
             {
                 __profn("Swapchain wait");
-                gp_swapchain_wait(G.swapchain);
+                GPU_WaitOnSwapchain(G.swapchain);
             }
             {
                 __profn("Frame limiter wait");