From 19c01408684575b16eebc0c2645334e7b7b3321b Mon Sep 17 00:00:00 2001 From: jacob Date: Thu, 31 Jul 2025 21:45:53 -0500 Subject: [PATCH] gpu layer refactor progress --- src/app/app_core.c | 2 +- src/draw/draw_core.c | 12 +- src/font/font_core.c | 2 +- src/gpu/gpu.c | 4 +- src/gpu/gpu.h | 4 + src/gpu/gpu_core.h | 27 +- src/gpu/{gpu_core_dx12.c => gpu_dx12.c} | 1728 +++++++++++------------ src/gpu/gpu_dx12.h | 692 +++++++++ src/sprite/sprite_core.c | 6 +- src/user/user_core.c | 12 +- 10 files changed, 1523 insertions(+), 966 deletions(-) rename src/gpu/{gpu_core_dx12.c => gpu_dx12.c} (73%) create mode 100644 src/gpu/gpu_dx12.h diff --git a/src/app/app_core.c b/src/app/app_core.c index b52a1aad..7b29a87a 100644 --- a/src/app/app_core.c +++ b/src/app/app_core.c @@ -233,7 +233,7 @@ void P_AppStartup(String args_str) /* Global systems */ RES_Startup(); W_Startup(); - gp_startup(); + GPU_Startup(); /* Subsystems */ AC_StartupReceipt asset_cache_sr = AC_Startup(); diff --git a/src/draw/draw_core.c b/src/draw/draw_core.c index 29dabf35..d5dfc5ba 100644 --- a/src/draw/draw_core.c +++ b/src/draw/draw_core.c @@ -9,7 +9,7 @@ D_StartupReceipt D_Startup(F_StartupReceipt *font_sr) D_SharedState *g = &D_shared_state; (UNUSED)font_sr; u32 pixel_white = 0xFFFFFFFF; - g->solid_white_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(1, 1), &pixel_white); + g->solid_white_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(1, 1), &pixel_white); return (D_StartupReceipt) { 0 }; } @@ -26,7 +26,7 @@ void D_DrawMaterial(GPU_RenderSig *sig, D_MaterialParams params) cmd.material.tint = params.tint; cmd.material.is_light = params.is_light; cmd.material.light_emittance = params.light_emittance; - gp_push_render_cmd(sig, &cmd); + GPU_PushRenderCmd(sig, &cmd); } //////////////////////////////// @@ -39,7 +39,7 @@ void D_DrawPolyEx(GPU_RenderSig *sig, Vec2Array vertices, GPU_Indices indices, u cmd.ui_shape.vertices = vertices; cmd.ui_shape.indices = indices; cmd.ui_shape.color = color; - gp_push_render_cmd(sig, &cmd); + GPU_PushRenderCmd(sig, &cmd); } /* Draws a filled polygon using triangles in a fan pattern */ @@ -269,7 +269,7 @@ void D_DrawGrid(GPU_RenderSig *sig, Xform xf, u32 bg0_color, u32 bg1_color, u32 cmd.grid.line_thickness = thickness; cmd.grid.line_spacing = spacing; cmd.grid.offset = offset; - grid_id = gp_push_render_cmd(sig, &cmd); + grid_id = GPU_PushRenderCmd(sig, &cmd); } GPU_RenderCmdDesc cmd = ZI; @@ -277,7 +277,7 @@ void D_DrawGrid(GPU_RenderSig *sig, Xform xf, u32 bg0_color, u32 bg1_color, u32 cmd.material.xf = xf; cmd.material.tint = ColorWhite; cmd.material.grid_cmd_id = grid_id; - gp_push_render_cmd(sig, &cmd); + GPU_PushRenderCmd(sig, &cmd); } //////////////////////////////// @@ -291,7 +291,7 @@ void D_DrawUiRect(GPU_RenderSig *sig, D_UiRectParams params) cmd.ui_rect.texture = params.texture; cmd.ui_rect.clip = params.clip; cmd.ui_rect.tint = params.tint; - gp_push_render_cmd(sig, &cmd); + GPU_PushRenderCmd(sig, &cmd); } //////////////////////////////// diff --git a/src/font/font_core.c b/src/font/font_core.c index eb174f1e..62f281e0 100644 --- a/src/font/font_core.c +++ b/src/font/font_core.c @@ -93,7 +93,7 @@ P_JobDef(F_LoadAssetJob, job) RES_CloseResource(&res); /* Send texture to GPU */ - GPU_Resource *texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(result.image_width, result.image_height), result.image_pixels); + GPU_Resource *texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(result.image_width, result.image_height), result.image_pixels); /* Allocate store memory */ F_Font *font = 0; diff --git a/src/gpu/gpu.c b/src/gpu/gpu.c index 87684511..271b0e7a 100644 --- a/src/gpu/gpu.c +++ b/src/gpu/gpu.c @@ -3,7 +3,7 @@ #include "../kernel/kernel.h" #if PlatformIsWindows -# include "gpu_core_dx12.c" +# include "gpu_dx12.c" #else -# error Gp core not implemented for this platform +# error Gpu layer not implemented for this platform #endif diff --git a/src/gpu/gpu.h b/src/gpu/gpu.h index 4f433130..8d248362 100644 --- a/src/gpu/gpu.h +++ b/src/gpu/gpu.h @@ -12,4 +12,8 @@ #include "gpu_core.h" +#if PlatformIsWindows +# include "gpu_dx12.h" +#endif + #endif diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index cd5b8cb0..24da4936 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -110,7 +110,7 @@ Struct(GPU_MemoryInfo) //////////////////////////////// //~ Startup -void gp_startup(void); +void GPU_Startup(void); //////////////////////////////// //~ Resource operations @@ -120,45 +120,42 @@ void gp_startup(void); * the caller to make sure the released resources aren't then referenced in * any runs */ -void gp_resource_release(GPU_Resource *resource); +void GPU_ReleaseResource(GPU_Resource *resource); //////////////////////////////// //~ Texture operations -GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data); +GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data); -Vec2I32 gp_texture_get_size(GPU_Resource *texture); +Vec2I32 GPU_GetTextureSize(GPU_Resource *texture); //////////////////////////////// //~ Render operations -GPU_RenderSig *gp_render_sig_alloc(void); +GPU_RenderSig *GPU_AllocRenderSig(void); /* Returns a cmd id internal to the sig */ -u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *desc); +u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *desc); -GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams render_params); +GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams render_params); //////////////////////////////// //~ Memory query -GPU_MemoryInfo gp_query_memory_info(void); +GPU_MemoryInfo GPU_QueryMemoryInfo(void); //////////////////////////////// //~ Swapchain -GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution); +GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution); -void gp_swapchain_release(GPU_Swapchain *gp_swapchain); +void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain); /* Waits until a new backbuffer is ready to be written to. * This should be called before rendering for minimum latency. */ -void gp_swapchain_wait(GPU_Swapchain *gp_swapchain); - -//////////////////////////////// -//~ Present +void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain); /* 1. Clears the backbuffer and ensures it's at size `backbuffer_resolution` * 2. Blits `texture` to the backbuffer using `texture_xf` * 3. Presents the backbuffer */ -void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync); +void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync); diff --git a/src/gpu/gpu_core_dx12.c b/src/gpu/gpu_dx12.c similarity index 73% rename from src/gpu/gpu_core_dx12.c rename to src/gpu/gpu_dx12.c index 062411d0..40fe3632 100644 --- a/src/gpu/gpu_core_dx12.c +++ b/src/gpu/gpu_dx12.c @@ -1,16 +1,7 @@ -//////////////////////////////// -//~ Windows headers +GPU_D12_SharedState GPU_D12_shared_state = ZI; -#pragma warning(push, 0) -# define UNICODE -# define COBJMACROS -# include -# include -# include -# include -# include -# include -#pragma warning(pop) +//////////////////////////////// +//~ Windows libs #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") @@ -23,402 +14,48 @@ # pragma comment(lib, "advapi32") #endif -//////////////////////////////// -//~ Dx12 - -#define DX12_ALLOW_TEARING 1 -#define DX12_WAIT_FRAME_LATENCY 1 -#define DX12_SWAPCHAIN_FLAGS (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) -#define DX12_SWAPCHAIN_BUFFER_COUNT (4) - -/* Arbitrary limits */ -#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64) -#define DX12_NUM_RTV_DESCRIPTORS (1024 * 1) -#define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64) - -#define DX12_MULTI_QUEUE !ProfilingIsEnabled -#if DX12_MULTI_QUEUE -# define DX12_QUEUE_DIRECT 0 -# define DX12_QUEUE_COMPUTE 1 -# define DX12_QUEUE_COPY 2 -# define DX12_QUEUE_COPY_BACKGROUND 3 -# define DX12_NUM_QUEUES 4 -#else -# define DX12_QUEUE_DIRECT 0 -# define DX12_QUEUE_COMPUTE 0 -# define DX12_QUEUE_COPY 0 -# define DX12_QUEUE_COPY_BACKGROUND 0 -# define DX12_NUM_QUEUES 1 -#endif - -#if RtcIsEnabled -//# define DX12_DEBUG 1 -# define DX12_DEBUG 0 -#else -# define DX12_DEBUG 0 -#endif - -/* ========================== * - * internal structs - * ========================== */ - -struct shader_desc { - String file; - String func; -}; - -struct pipeline_rtv_desc { - DXGI_FORMAT format; - b32 blending; -}; - -struct pipeline_desc { - String name; - - /* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */ - String vs_dxc; - String ps_dxc; - String cs_dxc; - - struct pipeline_rtv_desc rtvs[8]; -}; - -struct pipeline { - String name; - u64 hash; - b32 success; - b32 is_gfx; - String error; - i64 compilation_time_ns; - - /* Lock global pipelines mutex when accessing */ - i64 refcount; - - ID3D12PipelineState *pso; - ID3D12RootSignature *rootsig; - struct pipeline_desc desc; - - struct pipeline *next; -}; - -struct pipeline_error { - String msg; - struct pipeline_error *next; -}; - -struct pipeline_include { - String name; - u64 name_hash; - struct pipeline_include *next; -}; - -struct pipeline_scope { - Arena *arena; - Dict *refs; - struct pipeline_scope *next_free; -}; - -struct command_queue_desc { - enum D3D12_COMMAND_LIST_TYPE type; - enum D3D12_COMMAND_QUEUE_PRIORITY priority; - String dbg_name; -}; - -struct command_queue { - struct command_queue_desc desc; - ID3D12CommandQueue *cq; - Arena *arena; - - P_Mutex submit_fence_mutex; - u64 submit_fence_target; - ID3D12Fence *submit_fence; - - struct command_list_pool *cl_pool; - -#if ProfilingGpu - __prof_dx12_ctx(prof); -#endif -}; - -struct command_list_pool { - struct command_queue *cq; - Arena *arena; - P_Mutex mutex; - struct command_list *first_submitted_command_list; - struct command_list *last_submitted_command_list; -}; - -struct command_list { - struct command_queue *cq; - struct command_list_pool *pool; - struct ID3D12CommandAllocator *ca; - struct ID3D12GraphicsCommandList *cl; - P_Lock global_record_lock; - - struct pipeline *cur_pipeline; - - struct command_descriptor_heap *first_command_descriptor_heap; - struct command_buffer *first_command_buffer; - - u64 submitted_fence_target; - struct command_list *prev_submitted; - struct command_list *next_submitted; -}; - -struct command_descriptor_heap { - D3D12_DESCRIPTOR_HEAP_TYPE type; - ID3D12DescriptorHeap *heap; - D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle; - D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle; - - struct command_descriptor_heap *next_in_command_list; - - u64 submitted_fence_target; - struct command_queue *submitted_cq; - struct command_descriptor_heap *prev_submitted; - struct command_descriptor_heap *next_submitted; -}; - -struct command_buffer { - struct command_buffer_group *group; - - u64 size; - struct dx12_resource *resource; - D3D12_VERTEX_BUFFER_VIEW vbv; - D3D12_INDEX_BUFFER_VIEW Ibv; - - struct command_buffer *next_in_command_list; - - u64 submitted_fence_target; - struct command_queue *submitted_cq; - struct command_buffer *prev_submitted; - struct command_buffer *next_submitted; -}; - -struct command_buffer_group { - struct command_buffer *first_submitted; - struct command_buffer *last_submitted; -}; - -struct descriptor { - struct cpu_descriptor_heap *heap; - - u32 index; - D3D12_CPU_DESCRIPTOR_HANDLE handle; - - struct descriptor *next_free; -}; - -struct dx12_resource { - enum D3D12_RESOURCE_STATES state; - ID3D12Resource *resource; - struct descriptor *cbv_descriptor; - struct descriptor *srv_descriptor; - struct descriptor *uav_descriptor; - struct descriptor *rtv_descriptor; - - D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */ - - Vec2I32 texture_size; - struct dx12_resource *next_free; -}; - -struct swapchain_buffer { - struct swapchain *swapchain; - ID3D12Resource *resource; - struct descriptor *rtv_descriptor; - D3D12_RESOURCE_STATES state; -}; - -struct swapchain { - IDXGISwapChain3 *swapchain; - HWND hwnd; - HANDLE waitable; - Vec2I32 resolution; - struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT]; - - struct swapchain *next_free; -}; - -struct cpu_descriptor_heap { - enum D3D12_DESCRIPTOR_HEAP_TYPE type; - Arena *arena; - P_Mutex mutex; - - u32 descriptor_size; - u32 num_descriptors_reserved; - u32 num_descriptors_capacity; - - struct descriptor *first_free_descriptor; - - ID3D12DescriptorHeap *heap; - struct D3D12_CPU_DESCRIPTOR_HANDLE handle; -}; - -enum fenced_release_kind { - FENCED_RELEASE_KIND_NONE, - FENCED_RELEASE_KIND_RESOURCE, - FENCED_RELEASE_KIND_PIPELINE -}; - -struct fenced_release_data { - enum fenced_release_kind kind; - void *ptr; -}; - -/* ========================== * - * internal procs - * ========================== */ - -internal P_ExitFuncDef(gp_shutdown); - -internal void dx12_init_device(void); - -internal void dx12_init_objects(void); - -internal void dx12_init_pipelines(void); - -internal void dx12_init_noise(void); - -internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); - -internal void command_queue_release(struct command_queue *cq); - -internal P_JobDef(dx12_evictor_job, _); - -internal void fenced_release(void *data, enum fenced_release_kind kind); - -internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state); - -internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh); - -struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; -internal P_JobDef(command_queue_alloc_job, job); - -struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; -internal P_JobDef(pipeline_alloc_job, job); - -struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; }; -internal P_JobDef(dx12_upload_job, job); - -#if RESOURCE_RELOADING -internal W_CallbackFuncDef(pipeline_watch_callback, name); -#endif - -/* ========================== * - * Global state - * ========================== */ - -Global struct { - Atomic32 initialized; - - /* Descriptor heaps pool */ - P_Mutex command_descriptor_heaps_mutex; - Arena *command_descriptor_heaps_arena; - struct command_descriptor_heap *first_submitted_command_descriptor_heap; - struct command_descriptor_heap *last_submitted_command_descriptor_heap; - - /* Command buffers pool */ - P_Mutex command_buffers_mutex; - Arena *command_buffers_arena; - Dict *command_buffers_dict; - - /* Resources pool */ - P_Mutex resources_mutex; - Arena *resources_arena; - struct dx12_resource *first_free_resource; - - /* Swapchains pool */ - P_Mutex swapchains_mutex; - Arena *swapchains_arena; - struct swapchain *first_free_swapchain; - - /* Shader bytecode archive */ - TAR_Archive dxc_archive; - - /* Pipeline cache */ - P_Mutex pipelines_mutex; - Arena *pipelines_arena; - struct pipeline *first_free_pipeline; - Dict *pipeline_descs; - Dict *top_pipelines; /* Latest pipelines */ - Dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */ - struct pipeline_scope *first_free_pipeline_scope; - - /* Fenced release queue */ - P_Mutex fenced_releases_mutex; - Arena *fenced_releases_arena; - u64 fenced_release_targets[DX12_NUM_QUEUES]; - - /* Factory */ - IDXGIFactory6 *factory; - - /* Adapter */ - IDXGIAdapter1 *adapter; - - /* Device */ - ID3D12Device *device; - - /* Descriptor sizes */ - u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; - u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; - - /* Global descriptor heaps */ - struct cpu_descriptor_heap *cbv_srv_uav_heap; - struct cpu_descriptor_heap *rtv_heap; - - /* Command queues */ - P_Mutex global_command_list_record_mutex; - P_Mutex global_submit_mutex; - struct command_queue *command_queues[DX12_NUM_QUEUES]; - - /* Evictor job */ - P_Counter evictor_job_counter; - P_Cv evictor_wake_cv; - P_Mutex evictor_wake_mutex; - i64 evictor_wake_gen; - b32 evictor_shutdown; -} G = ZI, DebugAlias(G, G_gp_dx12); - /* ========================== * * Startup * ========================== */ -void gp_startup(void) +void GPU_Startup(void) { __prof; - if (Atomic32FetchTestSet(&G.initialized, 0, 1) != 0) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; + if (Atomic32FetchTestSet(&g->initialized, 0, 1) != 0) + { P_Panic(Lit("GP layer already initialized")); } /* Initialize command descriptor heaps pool */ - G.command_descriptor_heaps_arena = AllocArena(Gibi(64)); + g->command_descriptor_heaps_arena = AllocArena(Gibi(64)); /* Initialize command buffers pool */ - G.command_buffers_arena = AllocArena(Gibi(64)); - G.command_buffers_dict = InitDict(G.command_buffers_arena, 4096); + g->command_buffers_arena = AllocArena(Gibi(64)); + g->command_buffers_dict = InitDict(g->command_buffers_arena, 4096); /* Initialize resources pool */ - G.resources_arena = AllocArena(Gibi(64)); + g->resources_arena = AllocArena(Gibi(64)); /* Initialize swapchains pool */ - G.swapchains_arena = AllocArena(Gibi(64)); + g->swapchains_arena = AllocArena(Gibi(64)); /* Initialize pipeline cache */ - G.pipelines_arena = AllocArena(Gibi(64)); - G.pipeline_descs = InitDict(G.pipelines_arena, 1024); - G.top_pipelines = InitDict(G.pipelines_arena, 1024); - G.top_successful_pipelines = InitDict(G.pipelines_arena, 1024); + g->pipelines_arena = AllocArena(Gibi(64)); + g->pipeline_descs = InitDict(g->pipelines_arena, 1024); + g->top_pipelines = InitDict(g->pipelines_arena, 1024); + g->top_successful_pipelines = InitDict(g->pipelines_arena, 1024); /* Initialize fenced releases queue */ - G.fenced_releases_arena = AllocArena(Gibi(64)); + g->fenced_releases_arena = AllocArena(Gibi(64)); /* Initialize embedded shader archive */ String embedded_data = INC_GetDxcTar(); - if (embedded_data.len <= 0) { + if (embedded_data.len <= 0) + { P_Panic(Lit("No embedded shaders found")); } - G.dxc_archive = TAR_ArchiveFromString(G.pipelines_arena, embedded_data, Lit("")); + g->dxc_archive = TAR_ArchiveFromString(g->pipelines_arena, embedded_data, Lit("")); /* Initialize dx12 */ /* TODO: Parallelize phases */ @@ -434,38 +71,40 @@ void gp_startup(void) P_OnExit(gp_shutdown); /* Start evictor job */ - P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &G.evictor_job_counter); + P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &g->evictor_job_counter); } -internal P_ExitFuncDef(gp_shutdown) +P_ExitFuncDef(gp_shutdown) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; #if 0 /* Release objects to make live object reporting less noisy */ - //IDXGISwapChain3_Release(G.swapchain); - for (u32 i = 0; i < countof(G.command_queues); ++i) { - struct command_queue *cq = G.command_queues[i]; + //IDXGISwapChain3_Release(g->swapchain); + for (u32 i = 0; i < countof(g->command_queues); ++i) + { + struct command_queue *cq = g->command_queues[i]; cmomand_queue_release(cq); } - ID3D12Device_Release(G.device); + ID3D12Device_Release(g->device); #else (UNUSED)command_queue_release; #endif { - P_Lock lock = P_LockE(&G.evictor_wake_mutex); - G.evictor_shutdown = 1; - P_SignalCv(&G.evictor_wake_cv, I32Max); + P_Lock lock = P_LockE(&g->evictor_wake_mutex); + g->evictor_shutdown = 1; + P_SignalCv(&g->evictor_wake_cv, I32Max); P_Unlock(&lock); } - P_WaitOnCounter(&G.evictor_job_counter); + P_WaitOnCounter(&g->evictor_job_counter); } /* ========================== * * Dx12 device initialization * ========================== */ -internal void dx12_init_error(String error) +void dx12_init_error(String error) { TempArena scratch = BeginScratchNoConflict(); String msg = StringFormat(scratch.arena, Lit("Failed to initialize DirectX 12.\n\n%F"), FmtString(error)); @@ -473,9 +112,10 @@ internal void dx12_init_error(String error) EndScratch(scratch); } -internal void dx12_init_device(void) +void dx12_init_device(void) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); HRESULT hr = 0; @@ -486,13 +126,15 @@ internal void dx12_init_device(void) __profn("Enable debug layer"); ID3D12Debug *debug_controller0 = 0; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); - if (FAILED(hr)) { + if (FAILED(hr)) + { dx12_init_error(Lit("Failed to create ID3D12Debug0")); } ID3D12Debug1 *debug_controller1 = 0; hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); - if (FAILED(hr)) { + if (FAILED(hr)) + { dx12_init_error(Lit("Failed to create ID3D12Debug1")); } @@ -510,8 +152,9 @@ internal void dx12_init_device(void) /* Create factory */ { __profn("Create factory"); - hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); - if (FAILED(hr)) { + hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); + if (FAILED(hr)) + { dx12_init_error(Lit("Failed to initialize DXGI factory")); } } @@ -525,41 +168,49 @@ internal void dx12_init_device(void) String first_gpu_name = ZI; u32 adapter_index = 0; b32 skip = 0; /* For debugging iGPU */ - for (;;) { + for (;;) + { { - hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); + hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); } - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); - if (first_gpu_name.len == 0) { + if (first_gpu_name.len == 0) + { first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); } - if (SUCCEEDED(hr) && !skip ) { + if (SUCCEEDED(hr) && !skip) + { break; } - skip = 0; + skip = 0; ID3D12Device_Release(device); IDXGIAdapter1_Release(adapter); adapter = 0; device = 0; ++adapter_index; - } else { + } + else + { break; } } - if (!device) { - if (first_gpu_name.len > 0) { + if (!device) + { + if (first_gpu_name.len > 0) + { String fmt = Lit("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date."); error = StringFormat(scratch.arena, fmt, FmtString(first_gpu_name)); } dx12_init_error(error); } - G.adapter = adapter; - G.device = device; + g->adapter = adapter; + g->device = device; } #if DX12_DEBUG @@ -567,8 +218,9 @@ internal void dx12_init_device(void) { __profn("Enable d3d12 debug break"); ID3D12InfoQueue *info = 0; - hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info); - if (FAILED(hr)) { + hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info); + if (FAILED(hr)) + { dx12_init_error(Lit("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); @@ -581,7 +233,8 @@ internal void dx12_init_device(void) __profn("Enable dxgi debug break"); IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); - if (FAILED(hr)) { + if (FAILED(hr)) + { dx12_init_error(Lit("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); @@ -597,29 +250,38 @@ internal void dx12_init_device(void) b32 success = 1; HKEY key = 0; success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS; - if (success) { + if (success) + { DWORD value = ZI; DWORD dword_size = sizeof(DWORD); success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS; RegCloseKey(key); - if (success) { + if (success) + { success = value != 0; } } P_LogInfoF("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)"); - if (success) { + if (success) + { P_LogInfoF("Machine is in developer mode, calling ID3D12Device::SetStablePowerState"); - hr = ID3D12Device_SetStablePowerState(G.device, 1); - if (SUCCEEDED(hr)) { + hr = ID3D12Device_SetStablePowerState(g->device, 1); + if (SUCCEEDED(hr)) + { P_LogInfoF("ID3D12Device::SetStablePowerState succeeded"); - } else { + } + else + { success = 0; P_LogErrorF("ID3D12Device::SetStablePowerState failed"); } - } else { + } + else + { P_LogWarningF("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState"); } - if (!success) { + if (!success) + { P_LogWarningF("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable."); } } @@ -632,23 +294,24 @@ internal void dx12_init_device(void) * Dx12 object initialization * ========================== */ -internal void dx12_init_objects(void) +void dx12_init_objects(void) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Initialize desc sizes */ - G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); - G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); - G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); /* Initialize desc counts */ - G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; - G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS; + g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; + g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS; /* Create global descriptor heaps */ - G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + g->cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + g->rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); /* Create command queues */ { @@ -661,7 +324,7 @@ internal void dx12_init_objects(void) }; struct command_queue_alloc_job_sig sig = ZI; sig.descs_in = params; - sig.cqs_out = G.command_queues; + sig.cqs_out = g->command_queues; { P_Counter counter = ZI; P_Run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); @@ -671,10 +334,11 @@ internal void dx12_init_objects(void) { /* Initialize serially for consistent order in profiler */ __profn("Initialize command queue profiling contexts"); - for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) { - struct command_queue *cq = G.command_queues[i]; + for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) + { + struct command_queue *cq = g->command_queues[i]; String dbg_name = params[i].dbg_name; - __prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len); + __prof_dx12_ctx_alloc(cq->prof, g->device, cq->cq, dbg_name.text, dbg_name.len); (UNUSED)dbg_name; } } @@ -686,67 +350,69 @@ internal void dx12_init_objects(void) * Dx12 pipeline initialization * ========================== */ -internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); +void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); -internal void dx12_init_pipelines(void) +void dx12_init_pipelines(void) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); /* Register pipeline descs */ { /* Material pipeline */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_material"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; desc->rtvs[1].format = DXGI_FORMAT_R16G16B16A16_FLOAT; desc->rtvs[1].blending = 1; - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Flood pipeline */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_flood"); - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Shade pipeline */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_shade"); - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Shape pipeline */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_shape"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* UI pipeline */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_ui"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Blit pipeilne */ { - struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); + struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_blit"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; - SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); + SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } } /* Compile pipelines */ u32 num_pipelines = 0; struct pipeline_desc *descs = PushDry(scratch.arena, struct pipeline_desc); - for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) { + for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next) + { struct pipeline_desc *desc = (struct pipeline_desc *)entry->value; *PushStruct(scratch.arena, struct pipeline_desc) = *desc; ++num_pipelines; @@ -761,15 +427,20 @@ internal void dx12_init_pipelines(void) P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } - for (u32 i = 0; i < num_pipelines; ++i) { + for (u32 i = 0; i < num_pipelines; ++i) + { struct pipeline *pipeline = pipelines[i]; - if (pipeline->success) { + if (pipeline->success) + { P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns))); - if (pipeline->error.len) { + if (pipeline->error.len) + { String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error)); P_LogWarning(msg); } - } else { + } + else + { String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error initializing pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error)); P_LogError(msg); @@ -785,8 +456,9 @@ internal void dx12_init_pipelines(void) * Noise texture initialization * ========================== */ -internal void dx12_init_noise(void) +void dx12_init_noise(void) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); { @@ -795,13 +467,15 @@ internal void dx12_init_noise(void) DXGI_FORMAT format = DXGI_FORMAT_R16_UINT; //u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2; u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2; - if (RES_ResourceExists(&noise_res)) { + if (RES_ResourceExists(&noise_res)) + { String data = RES_GetResourceData(&noise_res); - if (data.len != expected_size) { + if (data.len != expected_size) + { P_Panic(StringFormat(scratch.arena, - Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"), - FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH), - FmtUint(expected_size), FmtUint(data.len))); + Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"), + FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH), + FmtUint(expected_size), FmtUint(data.len))); } { D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; @@ -824,8 +498,8 @@ internal void dx12_init_noise(void) desc.SampleDesc.Quality = 0; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST); - r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); - ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); + r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); + ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle); /* Upload texture */ { @@ -837,7 +511,9 @@ internal void dx12_init_noise(void) P_WaitOnCounter(&counter); } } - } else { + } + else + { P_Panic(StringFormat(scratch.arena, Lit("Noise resource \"%F\" not found"), FmtString(noise_res_name))); } RES_CloseResource(&noise_res); @@ -852,27 +528,7 @@ internal void dx12_init_noise(void) #if RESOURCE_RELOADING -struct shader_compile_desc { - String src; - String friendly_name; - String entry; - String target; -}; - -struct shader_compile_result { - i64 elapsed_ns; - String dxc; - String errors; - b32 success; -}; - -struct shader_compile_job_sig { - Arena *arena; - struct shader_compile_desc *descs; - struct shader_compile_result *results; -}; - -internal P_JobDef(shader_compile_job, job) +P_JobDef(shader_compile_job, job) { __prof; struct shader_compile_job_sig *sig = job.sig; @@ -898,10 +554,12 @@ internal P_JobDef(shader_compile_job, job) }; u32 num_args = countof(shader_args) + dxc_args_array.count; String *args = PushStructs(scratch.arena, String, num_args); - for (u32 i = 0; i < countof(shader_args); ++i) { + for (u32 i = 0; i < countof(shader_args); ++i) + { args[i] = shader_args[i]; } - for (u32 i = 0; i < dxc_args_array.count; ++i) { + for (u32 i = 0; i < dxc_args_array.count; ++i) + { args[i + countof(shader_args)] = dxc_args_array.strings[i]; } dxc_result = DXC_Compile(arena, desc->src, num_args, args); @@ -921,21 +579,25 @@ internal P_JobDef(shader_compile_job, job) * Pipeline * ========================== */ -internal P_JobDef(pipeline_alloc_job, job) +P_JobDef(pipeline_alloc_job, job) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct pipeline_alloc_job_sig *sig = job.sig; struct pipeline_desc *desc = &sig->descs_in[job.id]; struct pipeline **pipelines_out = sig->pipelines_out; struct pipeline *pipeline = 0; { - P_Lock lock = P_LockE(&G.pipelines_mutex); - if (G.first_free_pipeline) { - pipeline = G.first_free_pipeline; - G.first_free_pipeline = pipeline->next; - } else { - pipeline = PushStructNoZero(G.pipelines_arena, struct pipeline); + P_Lock lock = P_LockE(&g->pipelines_mutex); + if (g->first_free_pipeline) + { + pipeline = g->first_free_pipeline; + g->first_free_pipeline = pipeline->next; + } + else + { + pipeline = PushStructNoZero(g->pipelines_arena, struct pipeline); } P_Unlock(&lock); } @@ -955,22 +617,26 @@ internal P_JobDef(pipeline_alloc_job, job) String error_str = ZI; - String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data; - String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data; - String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : TAR_EntryFromName(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data; - if (success && vs_dxc.len > 0 && ps_dxc.len <= 0) { + String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data; + String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data; + String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data; + if (success && vs_dxc.len > 0 && ps_dxc.len <= 0) + { error_str = Lit("Pipeline has vertex shader without pixel shader"); success = 0; } - if (success && vs_dxc.len <= 0 && ps_dxc.len > 0) { + if (success && vs_dxc.len <= 0 && ps_dxc.len > 0) + { error_str = Lit("Pipeline has pixel shader without vertex shader"); success = 0; } - if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0)) { + if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0)) + { error_str = Lit("Pipeline has a compute shader with a vertex/pixel shader"); success = 0; } - if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0) { + if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0) + { error_str = Lit("Pipeline has no shaders"); success = 0; } @@ -978,29 +644,41 @@ internal P_JobDef(pipeline_alloc_job, job) ID3D10Blob *vs_blob = 0; ID3D10Blob *ps_blob = 0; ID3D10Blob *cs_blob = 0; - if (success && vs_dxc.len > 0) { + if (success && vs_dxc.len > 0) + { hr = D3DCreateBlob(vs_dxc.len, &vs_blob); - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { CopyBytes(ID3D10Blob_GetBufferPointer(vs_blob), vs_dxc.text, vs_dxc.len); - } else { + } + else + { error_str = Lit("Failed to create vertex shader blob"); success = 0; } } - if (success && ps_dxc.len > 0) { + if (success && ps_dxc.len > 0) + { hr = D3DCreateBlob(ps_dxc.len, &ps_blob); - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { CopyBytes(ID3D10Blob_GetBufferPointer(ps_blob), ps_dxc.text, ps_dxc.len); - } else { + } + else + { error_str = Lit("Failed to create pixel shader blob"); success = 0; } } - if (success && cs_dxc.len > 0) { + if (success && cs_dxc.len > 0) + { hr = D3DCreateBlob(cs_dxc.len, &cs_blob); - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { CopyBytes(ID3D10Blob_GetBufferPointer(cs_blob), cs_dxc.text, cs_dxc.len); - } else { + } + else + { error_str = Lit("Failed to create compute shader blob"); success = 0; } @@ -1011,22 +689,30 @@ internal P_JobDef(pipeline_alloc_job, job) * could reuse the shader blob), however we'd like to verify that the * root signature exists and matches between vs & ps shaders. */ ID3D10Blob *rootsig_blob = 0; - if (success) { + if (success) + { __profn("Validate root signatures"); - if (cs_dxc.len > 0) { + if (cs_dxc.len > 0) + { u32 cs_rootsig_data_len = 0; ID3D10Blob *cs_rootsig_blob = 0; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob); - if (cs_rootsig_blob) { + if (cs_rootsig_blob) + { cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob); } - if (cs_rootsig_data_len == 0) { + if (cs_rootsig_data_len == 0) + { success = 0; error_str = Lit("Compute shader is missing root signature"); - } else { + } + else + { rootsig_blob = cs_rootsig_blob; } - } else { + } + else + { char *vs_rootsig_data = 0; char *ps_rootsig_data = 0; u32 vs_rootsig_data_len = 0; @@ -1035,27 +721,37 @@ internal P_JobDef(pipeline_alloc_job, job) ID3D10Blob *ps_rootsig_blob = 0; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob); D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob); - if (vs_rootsig_blob) { + if (vs_rootsig_blob) + { vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob); vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob); } - if (ps_rootsig_blob) { + if (ps_rootsig_blob) + { ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob); ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob); } - if (vs_rootsig_data_len == 0) { + if (vs_rootsig_data_len == 0) + { success = 0; error_str = Lit("Vertex shader is missing root signature"); - } else if (ps_rootsig_data_len == 0) { + } + else if (ps_rootsig_data_len == 0) + { success = 0; error_str = Lit("Pixel shader is missing root signature"); - } else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) { + } + else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) + { success = 0; error_str = Lit("Root signature mismatch between vertex and pixel shader"); - } else { + } + else + { rootsig_blob = vs_rootsig_blob; } - if (ps_rootsig_blob) { + if (ps_rootsig_blob) + { ID3D10Blob_Release(ps_rootsig_blob); } } @@ -1063,10 +759,12 @@ internal P_JobDef(pipeline_alloc_job, job) /* Create root signature */ ID3D12RootSignature *rootsig = 0; - if (success) { + if (success) + { __profn("Create root signature"); - hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig); - if (FAILED(hr)) { + hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig); + if (FAILED(hr)) + { error_str = Lit("Failed to create root signature"); success = 0; } @@ -1074,15 +772,19 @@ internal P_JobDef(pipeline_alloc_job, job) /* Create PSO */ ID3D12PipelineState *pso = 0; - if (success) { - if (cs_dxc.len > 0) { + if (success) + { + if (cs_dxc.len > 0) + { __profn("Create compute PSO"); D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 }; pso_desc.pRootSignature = rootsig; pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs_blob); pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs_blob); - hr = ID3D12Device_CreateComputePipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); - } else { + hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + } + else + { __profn("Create graphics PSO"); /* Default rasterizer state */ @@ -1108,9 +810,11 @@ internal P_JobDef(pipeline_alloc_job, job) .AlphaToCoverageEnable = 0, .IndependentBlendEnable = 1 }; - for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) { + for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) + { StaticAssert(countof(blend_desc.RenderTarget) <= countof(desc->rtvs)); - if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) { + if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) + { b32 blending_enabled = desc->rtvs[i].blending; blend_desc.RenderTarget[i].BlendEnable = blending_enabled; blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA; @@ -1120,7 +824,9 @@ internal P_JobDef(pipeline_alloc_job, job) blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; - } else { + } + else + { break; } } @@ -1144,27 +850,33 @@ internal P_JobDef(pipeline_alloc_job, job) pso_desc.DepthStencilState = depth_stencil_desc; pso_desc.InputLayout = input_layout_desc; pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; - for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) { + for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) + { StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc->rtvs)); DXGI_FORMAT format = desc->rtvs[i].format; - if (format != DXGI_FORMAT_UNKNOWN) { + if (format != DXGI_FORMAT_UNKNOWN) + { pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; - } else { + } + else + { break; } } pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; - hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); + hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); } - if (FAILED(hr)) { + if (FAILED(hr)) + { error_str = Lit("Failed to create pipeline state object"); success = 0; } } /* Parse errors */ - if (!success && error_str.len <= 0) { + if (!success && error_str.len <= 0) + { error_str = Lit("Unknown error"); } @@ -1175,32 +887,38 @@ internal P_JobDef(pipeline_alloc_job, job) pipeline->is_gfx = cs_dxc.len == 0; pipeline->error = error_str; - if (rootsig_blob) { + if (rootsig_blob) + { ID3D10Blob_Release(rootsig_blob); } - if (vs_blob) { + if (vs_blob) + { ID3D10Blob_Release(vs_blob); } - if (ps_blob) { + if (ps_blob) + { ID3D10Blob_Release(ps_blob); } - if (cs_blob) { + if (cs_blob) + { ID3D10Blob_Release(cs_blob); } } EndScratch(scratch); } -internal void pipeline_release_now(struct pipeline *pipeline) +void pipeline_release_now(struct pipeline *pipeline) { __prof; - if (pipeline->pso) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; + if (pipeline->pso) + { ID3D12PipelineState_Release(pipeline->pso); } - P_Lock lock = P_LockE(&G.pipelines_mutex); + P_Lock lock = P_LockE(&g->pipelines_mutex); { - pipeline->next = G.first_free_pipeline; - G.first_free_pipeline = pipeline; + pipeline->next = g->first_free_pipeline; + g->first_free_pipeline = pipeline; } P_Unlock(&lock); } @@ -1209,22 +927,27 @@ internal void pipeline_release_now(struct pipeline *pipeline) * Pipeline cache * ========================== */ -internal struct pipeline_scope *pipeline_scope_begin(void) +struct pipeline_scope *pipeline_scope_begin(void) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct pipeline_scope *scope = 0; { - P_Lock lock = P_LockE(&G.pipelines_mutex); - if (G.first_free_pipeline_scope) { - scope = G.first_free_pipeline_scope; - G.first_free_pipeline_scope = scope->next_free; + P_Lock lock = P_LockE(&g->pipelines_mutex); + if (g->first_free_pipeline_scope) + { + scope = g->first_free_pipeline_scope; + g->first_free_pipeline_scope = scope->next_free; } P_Unlock(&lock); } Arena *arena = 0; - if (scope) { + if (scope) + { arena = scope->arena; - } else { + } + else + { arena = AllocArena(Mebi(64)); } ResetArena(arena); @@ -1234,43 +957,52 @@ internal struct pipeline_scope *pipeline_scope_begin(void) return scope; } -internal void pipeline_scope_end(struct pipeline_scope *scope) +void pipeline_scope_end(struct pipeline_scope *scope) { __prof; - P_Lock lock = P_LockE(&G.pipelines_mutex); + GPU_D12_SharedState *g = &GPU_D12_shared_state; + P_Lock lock = P_LockE(&g->pipelines_mutex); { - for (DictEntry *entry = scope->refs->first; entry; entry = entry->next) { + for (DictEntry *entry = scope->refs->first; entry; entry = entry->next) + { struct pipeline *pipeline = (struct pipeline *)entry->value; - if (--pipeline->refcount <= 0) { + if (--pipeline->refcount <= 0) + { fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE); } } - scope->next_free = G.first_free_pipeline_scope; - G.first_free_pipeline_scope = scope; + scope->next_free = g->first_free_pipeline_scope; + g->first_free_pipeline_scope = scope; } P_Unlock(&lock); } -internal Readonly struct pipeline g_nil_pipeline = ZI; -internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name) +Readonly struct pipeline g_nil_pipeline = ZI; +struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct pipeline *result = &g_nil_pipeline; u64 hash = HashFnv64(Fnv64Basis, name); struct pipeline *tmp = (struct pipeline *)DictValueFromHash(scope->refs, hash); - if (tmp) { + if (tmp) + { result = tmp; - } else { + } + else + { { - P_Lock lock = P_LockE(&G.pipelines_mutex); - tmp = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash); - if (tmp) { + P_Lock lock = P_LockE(&g->pipelines_mutex); + tmp = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash); + if (tmp) + { ++tmp->refcount; } P_Unlock(&lock); } - if (tmp) { + if (tmp) + { SetDictValue(scope->arena, scope->refs, hash, (u64)tmp); result = tmp; } @@ -1279,30 +1011,35 @@ internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, Strin return result; } -internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines) +void pipeline_register(u64 num_pipelines, struct pipeline **pipelines) { __prof; - P_Lock lock = P_LockE(&G.pipelines_mutex); + GPU_D12_SharedState *g = &GPU_D12_shared_state; + P_Lock lock = P_LockE(&g->pipelines_mutex); { - for (u64 i = 0; i < num_pipelines; ++i) { + for (u64 i = 0; i < num_pipelines; ++i) + { struct pipeline *pipeline = pipelines[i]; u64 hash = pipeline->hash; /* Insert into top dict */ { - struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_pipelines, hash); - if (old_pipeline && --old_pipeline->refcount <= 0) { + struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_pipelines, hash); + if (old_pipeline && --old_pipeline->refcount <= 0) + { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } - SetDictValue(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline); + SetDictValue(g->pipelines_arena, g->top_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } /* Insert into success dict */ - if (pipeline->success) { - struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash); - if (old_pipeline && --old_pipeline->refcount <= 0) { + if (pipeline->success) + { + struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash); + if (old_pipeline && --old_pipeline->refcount <= 0) + { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } - SetDictValue(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline); + SetDictValue(g->pipelines_arena, g->top_successful_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } } @@ -1311,9 +1048,10 @@ internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines) } #if RESOURCE_RELOADING -internal W_CallbackFuncDef(pipeline_watch_callback, name) +W_CallbackFuncDef(pipeline_watch_callback, name) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); String rst_extension = Lit(".rst"); @@ -1330,7 +1068,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) i32 num_shaders = 0; struct shader_compile_desc *shader_descs = 0; struct shader_compile_result *shader_results = 0; - if (is_rs || is_cs) { + if (is_rs || is_cs) + { P_LogDebugF("Change detected in shader source file \"%F\", recompiling...", FmtString(name)); success = 1; P_File file = P_OpenFileReadWait(name); @@ -1350,7 +1089,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) { struct shader_compile_job_sig sig = ZI; sig.arena = scratch.arena; - if (is_rs) { + if (is_rs) + { num_shaders = 2; shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders); shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders); @@ -1364,7 +1104,9 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) sig.descs[1].friendly_name = friendly_name; sig.descs[1].entry = Lit("ps"); sig.descs[1].target = Lit("ps_6_6"); - } else if (is_cs) { + } + else if (is_cs) + { num_shaders = 1; shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders); shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders); @@ -1385,34 +1127,45 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) } - for (i32 i = 0; i < num_shaders; ++i) { + for (i32 i = 0; i < num_shaders; ++i) + { struct shader_compile_desc *desc = &shader_descs[i]; struct shader_compile_result *result = &shader_results[i]; - if (result->success) { + if (result->success) + { P_LogSuccessF("Finished compiling shader \"%F:%F\" in %F seconds", FmtString(desc->friendly_name), FmtString(desc->entry), FmtFloat(SecondsFromNs(result->elapsed_ns))); - if (result->errors.len > 0) { + if (result->errors.len > 0) + { String msg = result->errors; P_LogWarning(msg); } - } else { + } + else + { String msg = result->errors; P_LogError(msg); success = 0; } } - if (success) { + if (success) + { /* Create pipeline descs */ u32 num_pipelines = 0; struct pipeline_desc *pipeline_descs = PushDry(scratch.arena, struct pipeline_desc); - for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) { + for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next) + { struct pipeline_desc *pipeline_desc = (struct pipeline_desc *)entry->value; struct pipeline_desc new_pipeline_desc = *pipeline_desc; - if (EqString(pipeline_desc->name, pipeline_name)) { - if (is_rs) { + if (EqString(pipeline_desc->name, pipeline_name)) + { + if (is_rs) + { new_pipeline_desc.vs_dxc = shader_results[0].dxc; new_pipeline_desc.ps_dxc = shader_results[1].dxc; - } else if (is_cs) { + } + else if (is_cs) + { new_pipeline_desc.cs_dxc = shader_results[0].dxc; } *PushStructNoZero(scratch.arena, struct pipeline_desc) = new_pipeline_desc; @@ -1421,7 +1174,8 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) } /* Recompile dirty pipelines */ - if (num_pipelines > 0) { + if (num_pipelines > 0) + { __profn("Compile dirty pipelines"); struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines); { @@ -1433,23 +1187,29 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) P_WaitOnCounter(&counter); } { - P_Lock lock = P_LockS(&G.pipelines_mutex); - for (u32 i = 0; i < num_pipelines; ++i) { + P_Lock lock = P_LockS(&g->pipelines_mutex); + for (u32 i = 0; i < num_pipelines; ++i) + { struct pipeline *pipeline = pipelines[i]; - if (pipeline->success) { + if (pipeline->success) + { P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns))); - if (pipeline->error.len > 0) { + if (pipeline->error.len > 0) + { String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error)); P_LogWarning(msg); } - } else { + } + else + { { String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(error)); P_LogError(msg); } - struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, pipeline->hash); - if (!old_pipeline) { + struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, pipeline->hash); + if (!old_pipeline) + { /* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */ String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error)); @@ -1472,7 +1232,7 @@ internal W_CallbackFuncDef(pipeline_watch_callback, name) * Descriptor * ========================== */ -internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) +struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) { __prof; struct descriptor *d = 0; @@ -1480,13 +1240,17 @@ internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; { P_Lock lock = P_LockE(&dh->mutex); - if (dh->first_free_descriptor) { + if (dh->first_free_descriptor) + { d = dh->first_free_descriptor; dh->first_free_descriptor = d->next_free; handle = d->handle; index = d->index; - } else { - if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) { + } + else + { + if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) + { P_Panic(Lit("Max descriptors reached in heap")); } d = PushStructNoZero(dh->arena, struct descriptor); @@ -1502,7 +1266,7 @@ internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) return d; } -internal void descriptor_release(struct descriptor *descriptor) +void descriptor_release(struct descriptor *descriptor) { struct cpu_descriptor_heap *dh = descriptor->heap; P_Lock lock = P_LockE(&dh->mutex); @@ -1517,9 +1281,10 @@ internal void descriptor_release(struct descriptor *descriptor) * CPU descriptor heap * ========================== */ -internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type) +struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct cpu_descriptor_heap *dh = 0; { Arena *arena = AllocArena(Mebi(64)); @@ -1529,11 +1294,13 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI u32 num_descriptors = 0; u32 descriptor_size = 0; - if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) { - num_descriptors = G.desc_counts[type]; - descriptor_size = G.desc_sizes[type]; + if (type < (i32)countof(g->desc_counts) && type < (i32)countof(g->desc_sizes)) + { + num_descriptors = g->desc_counts[type]; + descriptor_size = g->desc_sizes[type]; } - if (num_descriptors == 0 || descriptor_size == 0) { + if (num_descriptors == 0 || descriptor_size == 0) + { P_Panic(Lit("Unsupported CPU descriptor type")); } dh->num_descriptors_capacity = num_descriptors; @@ -1542,8 +1309,9 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = type; desc.NumDescriptors = num_descriptors; - HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap); - if (FAILED(hr)) { + HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create CPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle); @@ -1552,7 +1320,7 @@ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRI } #if 0 -internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh) +void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh) { /* TODO */ (UNUSED)dh; @@ -1563,17 +1331,19 @@ internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh) * Fenced release * ========================== */ -internal void fenced_release(void *data, enum fenced_release_kind kind) +void fenced_release(void *data, enum fenced_release_kind kind) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct fenced_release_data fr = ZI; fr.kind = kind; fr.ptr = data; - u64 fr_targets[countof(G.fenced_release_targets)] = ZI; + u64 fr_targets[countof(g->fenced_release_targets)] = ZI; /* Read current fence target values from command queues */ - for (u32 i = 0; i < countof(G.command_queues); ++i) { - struct command_queue *cq = G.command_queues[i]; + for (u32 i = 0; i < countof(g->command_queues); ++i) + { + struct command_queue *cq = g->command_queues[i]; P_Lock lock = P_LockS(&cq->submit_fence_mutex); { fr_targets[i] = cq->submit_fence_target; @@ -1583,20 +1353,20 @@ internal void fenced_release(void *data, enum fenced_release_kind kind) /* PushStruct data to release queue */ { - P_Lock lock = P_LockE(&G.fenced_releases_mutex); + P_Lock lock = P_LockE(&g->fenced_releases_mutex); { - *PushStruct(G.fenced_releases_arena, struct fenced_release_data) = fr; - CopyBytes(G.fenced_release_targets, fr_targets, sizeof(fr_targets)); + *PushStruct(g->fenced_releases_arena, struct fenced_release_data) = fr; + CopyBytes(g->fenced_release_targets, fr_targets, sizeof(fr_targets)); } P_Unlock(&lock); } /* Wake evictor */ { - P_Lock lock = P_LockE(&G.evictor_wake_mutex); + P_Lock lock = P_LockE(&g->evictor_wake_mutex); { - ++G.evictor_wake_gen; - P_SignalCv(&G.evictor_wake_cv, I32Max); + ++g->evictor_wake_gen; + P_SignalCv(&g->evictor_wake_cv, I32Max); } P_Unlock(&lock); } @@ -1606,17 +1376,21 @@ internal void fenced_release(void *data, enum fenced_release_kind kind) * Resource * ========================== */ -internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state) +struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct dx12_resource *r = 0; { - P_Lock lock = P_LockE(&G.resources_mutex); - if (G.first_free_resource) { - r = G.first_free_resource; - G.first_free_resource = r->next_free; - } else { - r = PushStructNoZero(G.resources_arena, struct dx12_resource); + P_Lock lock = P_LockE(&g->resources_mutex); + if (g->first_free_resource) + { + r = g->first_free_resource; + g->first_free_resource = r->next_free; + } + else + { + r = PushStructNoZero(g->resources_arena, struct dx12_resource); } P_Unlock(&lock); } @@ -1624,37 +1398,44 @@ internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_pr D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } }; D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; - HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource); - if (FAILED(hr)) { + HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource); + if (FAILED(hr)) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to create resource")); } r->state = initial_state; - if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { + if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource); } return r; } -internal void dx12_resource_release_now(struct dx12_resource *t) +void dx12_resource_release_now(struct dx12_resource *t) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Release descriptors */ /* TODO: Batch lock heaps */ - if (t->cbv_descriptor) { + if (t->cbv_descriptor) + { descriptor_release(t->cbv_descriptor); } - if (t->srv_descriptor) { + if (t->srv_descriptor) + { descriptor_release(t->srv_descriptor); } - if (t->uav_descriptor) { + if (t->uav_descriptor) + { descriptor_release(t->uav_descriptor); } - if (t->rtv_descriptor) { + if (t->rtv_descriptor) + { descriptor_release(t->rtv_descriptor); } @@ -1662,13 +1443,13 @@ internal void dx12_resource_release_now(struct dx12_resource *t) ID3D12Resource_Release(t->resource); /* Add to free list */ - P_Lock lock = P_LockE(&G.resources_mutex); - t->next_free = G.first_free_resource; - G.first_free_resource = t; + P_Lock lock = P_LockE(&g->resources_mutex); + t->next_free = g->first_free_resource; + g->first_free_resource = t; P_Unlock(&lock); } -void gp_resource_release(GPU_Resource *resource) +void GPU_ReleaseResource(GPU_Resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; fenced_release(r, FENCED_RELEASE_KIND_RESOURCE); @@ -1678,27 +1459,24 @@ void gp_resource_release(GPU_Resource *resource) * Resource barrier * ========================== */ -struct dx12_resource_barrier_desc { - enum D3D12_RESOURCE_BARRIER_TYPE type; - struct dx12_resource *resource; - enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */ -}; - -internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs) +void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs) { __prof; TempArena scratch = BeginScratchNoConflict(); i32 num_rbs = 0; struct D3D12_RESOURCE_BARRIER *rbs = PushStructsNoZero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs); - for (i32 i = 0; i < num_descs; ++i) { + for (i32 i = 0; i < num_descs; ++i) + { struct dx12_resource_barrier_desc *desc = &descs[i]; struct dx12_resource *resource = desc->resource; enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type; - if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) { + if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) + { enum D3D12_RESOURCE_STATES old_state = resource->state; enum D3D12_RESOURCE_STATES new_state = desc->new_state; - if (new_state != old_state) { + if (new_state != old_state) + { struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++]; ZeroStruct(rb); rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; @@ -1709,19 +1487,24 @@ internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_desc rb->Transition.StateAfter = new_state; resource->state = new_state; } - } else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { + } + else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) + { struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++]; ZeroStruct(rb); rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; rb->Flags = 0; rb->UAV.pResource = resource->resource; - } else { + } + else + { /* Unknown barrier type */ Assert(0); } } - if (num_rbs > 0) { + if (num_rbs > 0) + { ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs); } @@ -1732,11 +1515,12 @@ internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_desc * Command queue * ========================== */ -internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq); +struct command_list_pool *command_list_pool_alloc(struct command_queue *cq); -internal P_JobDef(command_queue_alloc_job, job) +P_JobDef(command_queue_alloc_job, job) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct command_queue_alloc_job_sig *sig = job.sig; struct command_queue_desc *desc = &sig->descs_in[job.id]; { @@ -1752,13 +1536,15 @@ internal P_JobDef(command_queue_alloc_job, job) dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; dx12_desc.Type = desc->type; dx12_desc.Priority = desc->priority; - HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq); - if (FAILED(hr)) { + HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create command queue")); } - hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence); - if (FAILED(hr)) { + hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create command queue fence")); } @@ -1768,7 +1554,7 @@ internal P_JobDef(command_queue_alloc_job, job) } } -internal void command_queue_release(struct command_queue *cq) +void command_queue_release(struct command_queue *cq) { __prof; /* TODO */ @@ -1780,7 +1566,7 @@ internal void command_queue_release(struct command_queue *cq) * Command list * ========================== */ -internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq) +struct command_list_pool *command_list_pool_alloc(struct command_queue *cq) { struct command_list_pool *pool = 0; { @@ -1792,9 +1578,10 @@ internal struct command_list_pool *command_list_pool_alloc(struct command_queue return pool; } -internal struct command_list *command_list_open(struct command_list_pool *pool) +struct command_list *command_list_open(struct command_list_pool *pool) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct command_queue *cq = pool->cq; u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence); @@ -1804,29 +1591,40 @@ internal struct command_list *command_list_open(struct command_list_pool *pool) { P_Lock lock = P_LockE(&pool->mutex); /* Find first command list ready for reuse */ - for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) { - if (completed_fence_value >= tmp->submitted_fence_target) { + for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) + { + if (completed_fence_value >= tmp->submitted_fence_target) + { cl = tmp; break; } } - if (cl) { + if (cl) + { /* Remove from submitted list */ old_cl = cl->cl; old_ca = cl->ca; struct command_list *prev = cl->prev_submitted; struct command_list *next = cl->next_submitted; - if (prev) { + if (prev) + { prev->next_submitted = next; - } else { + } + else + { pool->first_submitted_command_list = next; } - if (next) { + if (next) + { next->prev_submitted = prev; - } else { + } + else + { pool->last_submitted_command_list = prev; } - } else { + } + else + { cl = PushStructNoZero(pool->arena, struct command_list); } P_Unlock(&lock); @@ -1834,37 +1632,45 @@ internal struct command_list *command_list_open(struct command_list_pool *pool) ZeroStruct(cl); cl->cq = cq; cl->pool = pool; - cl->global_record_lock = P_LockS(&G.global_command_list_record_mutex); + cl->global_record_lock = P_LockS(&g->global_command_list_record_mutex); HRESULT hr = 0; - if (old_cl) { + if (old_cl) + { cl->cl = old_cl; cl->ca = old_ca; - } else { - hr = ID3D12Device_CreateCommandAllocator(G.device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); - if (FAILED(hr)) { + } + else + { + hr = ID3D12Device_CreateCommandAllocator(g->device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create command allocator")); } - hr = ID3D12Device_CreateCommandList(G.device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); - if (FAILED(hr)) { + hr = ID3D12Device_CreateCommandList(g->device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(cl->cl); - if (FAILED(hr)) { + if (FAILED(hr)) + { P_Panic(Lit("Failed to close command list during initialization")); } } /* Reset */ hr = ID3D12CommandAllocator_Reset(cl->ca); - if (FAILED(hr)) { + if (FAILED(hr)) + { P_Panic(Lit("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); - if (FAILED(hr)) { + if (FAILED(hr)) + { P_Panic(Lit("Failed to reset command list")); } @@ -1872,9 +1678,10 @@ internal struct command_list *command_list_open(struct command_list_pool *pool) } /* TODO: Allow multiple command list submissions */ -internal u64 command_list_close(struct command_list *cl) +u64 command_list_close(struct command_list *cl) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct command_queue *cq = cl->cq; struct command_list_pool *pool = cl->pool; @@ -1882,7 +1689,8 @@ internal u64 command_list_close(struct command_list *cl) { __profn("Close DX12 command list"); HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); - if (FAILED(hr)) { + if (FAILED(hr)) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to close command list before execution")); } @@ -1892,7 +1700,7 @@ internal u64 command_list_close(struct command_list *cl) u64 submit_fence_target = 0; { __profn("Execute"); - P_Lock submit_lock = P_LockS(&G.global_submit_mutex); + P_Lock submit_lock = P_LockS(&g->global_submit_mutex); P_Lock fence_lock = P_LockE(&cq->submit_fence_mutex); { submit_fence_target = ++cq->submit_fence_target; @@ -1905,30 +1713,38 @@ internal u64 command_list_close(struct command_list *cl) /* Add descriptor heaps to submitted list */ { - P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex); - for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) { + P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex); + for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) + { cdh->submitted_cq = cq; cdh->submitted_fence_target = submit_fence_target; - if (G.last_submitted_command_descriptor_heap) { - G.last_submitted_command_descriptor_heap->next_submitted = cdh; - } else { - G.first_submitted_command_descriptor_heap = cdh; + if (g->last_submitted_command_descriptor_heap) + { + g->last_submitted_command_descriptor_heap->next_submitted = cdh; } - G.last_submitted_command_descriptor_heap = cdh; + else + { + g->first_submitted_command_descriptor_heap = cdh; + } + g->last_submitted_command_descriptor_heap = cdh; } P_Unlock(&lock); } /* Add command buffers to submitted list */ { - P_Lock lock = P_LockE(&G.command_buffers_mutex); - for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) { + P_Lock lock = P_LockE(&g->command_buffers_mutex); + for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) + { struct command_buffer_group *group = cb->group; cb->submitted_cq = cq; cb->submitted_fence_target = submit_fence_target; - if (group->last_submitted) { + if (group->last_submitted) + { group->last_submitted->next_submitted = cb; - } else { + } + else + { group->first_submitted = cb; } group->last_submitted = cb; @@ -1941,9 +1757,12 @@ internal u64 command_list_close(struct command_list *cl) cl->submitted_fence_target = submit_fence_target; { P_Lock lock = P_LockE(&pool->mutex); - if (pool->last_submitted_command_list) { + if (pool->last_submitted_command_list) + { pool->last_submitted_command_list->next_submitted = cl; - } else { + } + else + { pool->first_submitted_command_list = cl; } pool->last_submitted_command_list = cl; @@ -1957,9 +1776,10 @@ internal u64 command_list_close(struct command_list *cl) * Command descriptor heap (GPU / shader visible descriptor heap) * ========================== */ -internal struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu) +struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; Assert(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */ /* Allocate GPU heap */ @@ -1968,52 +1788,67 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI; D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI; { - P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex); + P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex); /* Find first heap ready for reuse */ - for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) { + for (struct command_descriptor_heap *tmp = g->first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) + { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); - if (completed_fence_value >= tmp->submitted_fence_target) { + if (completed_fence_value >= tmp->submitted_fence_target) + { cdh = tmp; break; } } - if (cdh) { + if (cdh) + { /* Remove from submitted list */ old_heap = cdh->heap; old_start_cpu_handle = cdh->start_cpu_handle; old_start_gpu_handle = cdh->start_gpu_handle; struct command_descriptor_heap *prev = cdh->prev_submitted; struct command_descriptor_heap *next = cdh->next_submitted; - if (prev) { + if (prev) + { prev->next_submitted = next; - } else { - G.first_submitted_command_descriptor_heap = next; } - if (next) { + else + { + g->first_submitted_command_descriptor_heap = next; + } + if (next) + { next->prev_submitted = prev; - } else { - G.last_submitted_command_descriptor_heap = prev; } - } else { + else + { + g->last_submitted_command_descriptor_heap = prev; + } + } + else + { /* No available heap available for reuse, allocate new */ - cdh = PushStructNoZero(G.command_descriptor_heaps_arena, struct command_descriptor_heap); + cdh = PushStructNoZero(g->command_descriptor_heaps_arena, struct command_descriptor_heap); } P_Unlock(&lock); } ZeroStruct(cdh); - if (old_heap) { + if (old_heap) + { cdh->heap = old_heap; cdh->start_cpu_handle = old_start_cpu_handle; cdh->start_gpu_handle = old_start_gpu_handle; - } else { + } + else + { D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap); - if (FAILED(hr)) { + HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create GPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle); @@ -2023,7 +1858,7 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc /* CopyCPU heap */ { P_Lock lock = P_LockS(&dh_cpu->mutex); - ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + ID3D12Device_CopyDescriptorsSimple(g->device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); P_Unlock(&lock); } @@ -2038,16 +1873,17 @@ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struc * Command buffer * ========================== */ -internal u64 command_buffer_hash_from_size(u64 size) +u64 command_buffer_hash_from_size(u64 size) { u64 hash = RandU64FromSeed(size); return hash; } -internal u64 align_up_pow2(u64 v) +u64 align_up_pow2(u64 v) { u64 result = 0; - if (v > 0) { + if (v > 0) + { result = v - 1; result |= result >> 1; result |= result >> 2; @@ -2061,9 +1897,10 @@ internal u64 align_up_pow2(u64 v) } #define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1) -internal struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride) +struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Data length should be a multiple of stride */ Assert(data_len % data_stride == 0); @@ -2076,45 +1913,57 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c struct command_buffer *cb = 0; struct dx12_resource *r = 0; { - P_Lock lock = P_LockE(&G.command_buffers_mutex); + P_Lock lock = P_LockE(&g->command_buffers_mutex); { u64 group_hash = command_buffer_hash_from_size(size); - DictEntry *cb_group_entry = EnsureDictEntry(G.command_buffers_arena, G.command_buffers_dict, group_hash); + DictEntry *cb_group_entry = EnsureDictEntry(g->command_buffers_arena, g->command_buffers_dict, group_hash); cb_group = (struct command_buffer_group *)cb_group_entry->value; - if (!cb_group) { + if (!cb_group) + { /* Create group */ - cb_group = PushStruct(G.command_buffers_arena, struct command_buffer_group); + cb_group = PushStruct(g->command_buffers_arena, struct command_buffer_group); cb_group_entry->value = (u64)cb_group; } } /* Find first command buffer ready for reuse */ - for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) { + for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) + { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); - if (completed_fence_value >= tmp->submitted_fence_target) { + if (completed_fence_value >= tmp->submitted_fence_target) + { cb = tmp; break; } } - if (cb) { + if (cb) + { /* Remove from submitted list */ r = cb->resource; struct command_buffer *prev = cb->prev_submitted; struct command_buffer *next = cb->next_submitted; - if (prev) { + if (prev) + { prev->next_submitted = next; - } else { + } + else + { cb_group->first_submitted = next; } - if (next) { + if (next) + { next->prev_submitted = prev; - } else { + } + else + { cb_group->last_submitted = prev; } - } else { + } + else + { /* Allocate new */ - cb = PushStructNoZero(G.command_buffers_arena, struct command_buffer); + cb = PushStructNoZero(g->command_buffers_arena, struct command_buffer); } P_Unlock(&lock); } @@ -2123,7 +1972,8 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c cb->size = data_len; /* Create upload heap */ - if (!r) { + if (!r) + { D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; @@ -2145,7 +1995,7 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); - r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); + r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); } cb->resource = r; @@ -2158,7 +2008,7 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c desc.Buffer.NumElements = MaxU32(data_len / data_stride, 1); desc.Buffer.StructureByteStride = data_stride; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; - ID3D12Device_CreateShaderResourceView(G.device, r->resource, &desc, r->srv_descriptor->handle); + ID3D12Device_CreateShaderResourceView(g->device, r->resource, &desc, r->srv_descriptor->handle); } /* Write data to resource */ @@ -2166,7 +2016,8 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c D3D12_RANGE read_range = ZI; void *dst = 0; HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst); - if (FAILED(hr) || !dst) { + if (FAILED(hr) || !dst) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to map command buffer resource")); } @@ -2185,18 +2036,14 @@ internal struct command_buffer *_command_list_push_buffer(struct command_list *c * Wait job * ========================== */ -struct dx12_wait_fence_job_sig { - ID3D12Fence *fence; - u64 target; -}; - -internal P_JobDef(dx12_wait_fence_job, job) +P_JobDef(dx12_wait_fence_job, job) { __prof; struct dx12_wait_fence_job_sig *sig = job.sig; ID3D12Fence *fence = sig->fence; u64 target = sig->target; - if (ID3D12Fence_GetCompletedValue(fence) < target) { + if (ID3D12Fence_GetCompletedValue(fence) < target) + { /* TODO: Pool events */ HANDLE event = CreateEvent(0, 0, 0, 0); ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event); @@ -2209,10 +2056,12 @@ internal P_JobDef(dx12_wait_fence_job, job) * Texture * ========================== */ -GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data) +GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data) { __prof; - if (size.x <= 0 || size.y <= 0) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; + if (size.x <= 0 || size.y <= 0) + { P_Panic(Lit("Tried to create texture with dimension <= 0")); } LocalPersist const DXGI_FORMAT formats[] = { @@ -2223,10 +2072,12 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size }; DXGI_FORMAT dxgi_format = ZI; - if (format >= 0 && format < (i32)countof(formats)) { + if (format >= 0 && format < (i32)countof(formats)) + { dxgi_format = formats[format]; } - if (format == 0) { + if (format == 0) + { P_Panic(Lit("Tried to create texture with unknown format")); } @@ -2252,18 +2103,20 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); r->texture_size = size; - r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); - ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); - if (flags & GP_TEXTURE_FLAG_TARGETABLE) { + r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); + ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle); + if (flags & GP_TEXTURE_FLAG_TARGETABLE) + { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; - r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); - r->rtv_descriptor = descriptor_alloc(G.rtv_heap); - ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle); - ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle); + r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); + r->rtv_descriptor = descriptor_alloc(g->rtv_heap); + ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle); + ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle); } /* Upload texture */ - if (initial_data) { + if (initial_data) + { /* TODO: Make wait optional */ P_Counter counter = ZI; struct dx12_upload_job_sig sig = ZI; @@ -2276,7 +2129,7 @@ GPU_Resource *gp_texture_alloc(GPU_TextureFormat format, u32 flags, Vec2I32 size return (GPU_Resource *)r; } -Vec2I32 gp_texture_get_size(GPU_Resource *resource) +Vec2I32 GPU_GetTextureSize(GPU_Resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; return r->texture_size; @@ -2286,8 +2139,9 @@ Vec2I32 gp_texture_get_size(GPU_Resource *resource) * Upload * ========================== */ -internal P_JobDef(dx12_upload_job, job) +P_JobDef(dx12_upload_job, job) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct dx12_upload_job_sig *sig = job.sig; struct dx12_resource *r = sig->resource; void *data = sig->data; @@ -2302,7 +2156,7 @@ internal P_JobDef(dx12_upload_job, job) u64 upload_row_size = 0; u32 upload_num_rows = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; - ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); + ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; /* Create upload heap */ @@ -2330,7 +2184,7 @@ internal P_JobDef(dx12_upload_job, job) upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state); } - struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; + struct command_queue *cq = g->command_queues[DX12_QUEUE_COPY_BACKGROUND]; struct command_list *cl = command_list_open(cq->cl_pool); { /* Copyto upload heap */ @@ -2338,7 +2192,8 @@ internal P_JobDef(dx12_upload_job, job) D3D12_RANGE read_range = ZI; void *mapped = 0; HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); - if (FAILED(hr) || !mapped) { + if (FAILED(hr) || !mapped) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to map texture upload resource")); } @@ -2347,9 +2202,11 @@ internal P_JobDef(dx12_upload_job, job) u32 z_size = upload_row_size * upload_num_rows; - for (u32 z = 0; z < desc.DepthOrArraySize; ++z) { + for (u32 z = 0; z < desc.DepthOrArraySize; ++z) + { u32 z_offset = z * z_size; - for (u32 y = 0; y < upload_num_rows; ++y) { + for (u32 y = 0; y < upload_num_rows; ++y) + { CopyBytes(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size); } } @@ -2376,7 +2233,8 @@ internal P_JobDef(dx12_upload_job, job) } u64 fence_target = command_list_close(cl); /* Wait on fence so we know it's safe to release upload heap */ - if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) { + if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) + { struct dx12_wait_fence_job_sig wait_sig = ZI; wait_sig.fence = cq->submit_fence; wait_sig.target = fence_target; @@ -2394,36 +2252,43 @@ internal P_JobDef(dx12_upload_job, job) * Run utils * ========================== */ -internal void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline) +void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline) { ID3D12GraphicsCommandList_SetPipelineState(cl->cl, pipeline->pso); - if (pipeline->is_gfx) { + if (pipeline->is_gfx) + { ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, pipeline->rootsig); - } else { + } + else + { ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, pipeline->rootsig); } cl->cur_pipeline = pipeline; } -internal void command_list_set_sig(struct command_list *cl, void *src, u32 size) +void command_list_set_sig(struct command_list *cl, void *src, u32 size) { __prof; Assert(size % 16 == 0); /* Root constant structs must pad to 16 bytes */ Assert(size <= 256); /* Only 64 32-bit root constants allowed in signature */ u32 num32bit = size / 4; b32 is_gfx = cl->cur_pipeline->is_gfx; - for (u32 i = 0; i < num32bit; ++i) { + for (u32 i = 0; i < num32bit; ++i) + { u32 val = 0; CopyBytes(&val, (((u32 *)src) + i), 4); - if (is_gfx) { + if (is_gfx) + { ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i); - } else { + } + else + { ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i); } } } -internal struct D3D12_VIEWPORT viewport_from_rect(Rect r) +struct D3D12_VIEWPORT viewport_from_rect(Rect r) { struct D3D12_VIEWPORT viewport = ZI; viewport.TopLeftX = r.x; @@ -2435,7 +2300,7 @@ internal struct D3D12_VIEWPORT viewport_from_rect(Rect r) return viewport; } -internal D3D12_RECT scissor_from_rect(Rect r) +D3D12_RECT scissor_from_rect(Rect r) { D3D12_RECT scissor = ZI; scissor.left = r.x; @@ -2445,7 +2310,7 @@ internal D3D12_RECT scissor_from_rect(Rect r) return scissor; } -internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size) +D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size) { D3D12_VERTEX_BUFFER_VIEW vbv = ZI; vbv.BufferLocation = cb->resource->gpu_address; @@ -2454,7 +2319,7 @@ internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer return vbv; } -internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format) +D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format) { D3D12_INDEX_BUFFER_VIEW ibv = ZI; ibv.BufferLocation = cb->resource->gpu_address; @@ -2463,9 +2328,10 @@ internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer * return ibv; } -internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state) +struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; @@ -2486,12 +2352,12 @@ internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); - r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); - r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); - r->rtv_descriptor = descriptor_alloc(G.rtv_heap); - ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); - ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle); - ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle); + r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); + r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap); + r->rtv_descriptor = descriptor_alloc(g->rtv_heap); + ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle); + ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle); + ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle); r->texture_size = size; return r; @@ -2505,10 +2371,11 @@ Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height) return MulMat4x4(projection, view4x4); } -internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh) +D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct D3D12_GPU_DESCRIPTOR_HANDLE result = ZI; - result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * G.desc_sizes[descriptor->heap->type]; + result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * g->desc_sizes[descriptor->heap->type]; return result; } @@ -2516,66 +2383,7 @@ internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descripto * Render sig * ========================== */ -struct render_sig { - Arena *arena; - RandState rand; - u32 frame_index; - - /* Material instances */ - u32 num_material_instance_descs; - Arena *material_instance_descs_arena; - - /* Ui instances */ - u32 num_ui_rect_instance_descs; - Arena *ui_rect_instance_descs_arena; - - /* UI shapes */ - Arena *ui_shape_verts_arena; - Arena *ui_shape_indices_arena; - - /* Grids */ - u32 num_material_grid_descs; - Arena *material_grid_descs_arena; - - /* Resources */ - struct dx12_resource *albedo; - struct dx12_resource *emittance; - struct dx12_resource *emittance_flood_read; - struct dx12_resource *emittance_flood_target; - struct dx12_resource *shade_read; - struct dx12_resource *shade_target; - struct dx12_resource *ui_target; -}; - -struct material_instance_desc { - Xform xf; - u32 texture_id; - ClipRect clip; - u32 tint; - b32 is_light; - Vec3 light_emittance; - u32 grid_id; -}; - -struct ui_rect_instance_desc { - Xform xf; - u32 texture_id; - ClipRect clip; - u32 tint; -}; - -struct material_grid_desc { - f32 line_thickness; - f32 line_spacing; - Vec2 offset; - u32 bg0_color; - u32 bg1_color; - u32 line_color; - u32 x_color; - u32 y_color; -}; - -internal struct render_sig *render_sig_alloc(void) +struct render_sig *render_sig_alloc(void) { __prof; struct render_sig *sig = 0; @@ -2594,7 +2402,7 @@ internal struct render_sig *render_sig_alloc(void) return sig; } -internal void render_sig_reset(struct render_sig *sig) +void render_sig_reset(struct render_sig *sig) { __prof; @@ -2615,19 +2423,21 @@ internal void render_sig_reset(struct render_sig *sig) ResetArena(sig->material_grid_descs_arena); } -GPU_RenderSig *gp_render_sig_alloc(void) +GPU_RenderSig *GPU_AllocRenderSig(void) { __prof; struct render_sig *sig = render_sig_alloc(); return (GPU_RenderSig *)sig; } -u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc) +u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc) { u32 ret = 0; struct render_sig *sig = (struct render_sig *)render_sig; - if (sig) { - switch (cmd_desc->kind) { + if (sig) + { + switch (cmd_desc->kind) + { default: break; case GP_RENDER_CMD_KIND_DRAW_MATERIAL: @@ -2660,13 +2470,15 @@ u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc) u32 color = cmd_desc->ui_shape.color; K_ShapeVert *verts = PushStructsNoZero(sig->ui_shape_verts_arena, K_ShapeVert, cmd_desc->ui_shape.vertices.count); u32 *indices = PushStructsNoZero(sig->ui_shape_indices_arena, u32, cmd_desc->ui_shape.indices.count); - for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i) { + for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i) + { K_ShapeVert *v = &verts[i]; v->pos = cmd_desc->ui_shape.vertices.points[i]; v->color_srgb = color; } u32 vert_offset = verts - (K_ShapeVert *)ArenaBase(sig->ui_shape_verts_arena); - for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i) { + for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i) + { indices[i] = cmd_desc->ui_shape.indices.indices[i] + vert_offset; } } break; @@ -2693,9 +2505,10 @@ u32 gp_push_render_cmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc) * Render * ========================== */ -GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams params) +GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams params) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); struct render_sig *rsig = (struct render_sig *)gp_render_sig; ++rsig->frame_index; @@ -2710,7 +2523,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param /* Allocate render buffers */ - if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size)) { + if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size)) + { __profn("Release sig resources"); fenced_release(rsig->albedo, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->emittance, FENCED_RELEASE_KIND_RESOURCE); @@ -2720,7 +2534,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param fenced_release(rsig->shade_target, FENCED_RELEASE_KIND_RESOURCE); rsig->shade_target = 0; } - if (!rsig->shade_target) { + if (!rsig->shade_target) + { __profn("Allocate sig resources"); rsig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET); rsig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET); @@ -2731,11 +2546,13 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* Allocate ui buffers */ - if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size)) { + if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size)) + { fenced_release(rsig->ui_target, FENCED_RELEASE_KIND_RESOURCE); rsig->ui_target = 0; } - if (!rsig->ui_target) { + if (!rsig->ui_target) + { rsig->ui_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, ui_size, D3D12_RESOURCE_STATE_RENDER_TARGET); } @@ -2746,7 +2563,7 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit")); struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_ui")); struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shape")); - struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; + struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profn("Run render"); @@ -2778,7 +2595,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param /* Process material instances */ { __profn("Process material instances"); - for (u32 i = 0; i < rsig->num_material_instance_descs; ++i) { + for (u32 i = 0; i < rsig->num_material_instance_descs; ++i) + { struct material_instance_desc *desc = &((struct material_instance_desc *)ArenaBase(rsig->material_instance_descs_arena))[i]; K_MaterialInstance *instance = &material_instances[i]; instance->tex_nurid = desc->texture_id; @@ -2795,7 +2613,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param /* Process ui rect instances */ { __profn("Process ui rect instances"); - for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i) { + for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i) + { struct ui_rect_instance_desc *desc = &((struct ui_rect_instance_desc *)ArenaBase(rsig->ui_rect_instance_descs_arena))[i]; K_UiInstance *instance = &ui_rect_instances[i]; instance->tex_nurid = desc->texture_id; @@ -2809,7 +2628,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param /* Process grids */ { __profn("Process grids"); - for (u32 i = 0; i < rsig->num_material_grid_descs; ++i) { + for (u32 i = 0; i < rsig->num_material_grid_descs; ++i) + { struct material_grid_desc *desc = &((struct material_grid_desc *)ArenaBase(rsig->material_grid_descs_arena))[i]; K_MaterialGrid *grid = &grids[i]; grid->line_thickness = desc->line_thickness; @@ -2834,7 +2654,7 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param struct command_buffer *grid_buffer = command_list_push_buffer(cl, rsig->num_material_grid_descs, grids); /* Upload descriptor heap */ - struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); + struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap); ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); @@ -2864,7 +2684,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* Material pass */ - if (material_pipeline->success) { + if (material_pipeline->success) + { __profn("Material pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Material pass", Rgb32F(0.5, 0.2, 0.2)); @@ -2911,7 +2732,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* Flood pass */ - if (flood_pipeline->success && !params.effects_disabled) { + if (flood_pipeline->success && !params.effects_disabled) + { __profn("Flood pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", Rgb32F(0.5, 0.2, 0.2)); @@ -2923,7 +2745,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param /* TODO: Remove this */ u64 max_steps = GetGstat(GSTAT_DEBUG_STEPS); u64 step = 0; - while (step_length != 0 && step < max_steps) { + while (step_length != 0 && step < max_steps) + { __profn("Flood step"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood step", Rgb32F(0.5, 0.2, 0.2)); @@ -2954,9 +2777,12 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param rsig->emittance_flood_target = swp; /* Update step */ - if (step_length == -1) { + if (step_length == -1) + { step_length = MaxI32(render_size.x, render_size.y) / 2; - } else { + } + else + { step_length /= 2; } ++step; @@ -2991,7 +2817,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* Shade pass */ - if (shade_pipeline->success) { + if (shade_pipeline->success) + { __profn("Shade pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", Rgb32F(0.5, 0.2, 0.2)); @@ -2999,7 +2826,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param command_list_set_pipeline(cl, shade_pipeline); u32 shade_flags = K_SHADE_FLAG_NONE; - if (params.effects_disabled) { + if (params.effects_disabled) + { shade_flags |= K_SHADE_FLAG_DISABLE_EFFECTS; } @@ -3052,7 +2880,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* UI blit pass */ - if (blit_pipeline->success) { + if (blit_pipeline->success) + { __profn("UI blit pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", Rgb32F(0.5, 0.2, 0.2)); @@ -3084,7 +2913,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* UI rect pass */ - if (ui_pipeline->success) { + if (ui_pipeline->success) + { __profn("UI rect pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI rect pass", Rgb32F(0.5, 0.2, 0.2)); @@ -3114,7 +2944,8 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param } /* UI shape pass */ - if (shape_pipeline->success) { + if (shape_pipeline->success) + { __profn("UI shape pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI shape pass", Rgb32F(0.5, 0.2, 0.2)); @@ -3156,28 +2987,33 @@ GPU_Resource *gp_run_render(GPU_RenderSig *gp_render_sig, GPU_RenderParams param * Memory info * ========================== */ -GPU_MemoryInfo gp_query_memory_info(void) +GPU_MemoryInfo GPU_QueryMemoryInfo(void) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_MemoryInfo result = ZI; HRESULT hr = 0; IDXGIAdapter3 *dxgiAdapter3 = 0; - if (SUCCEEDED(hr)) { - hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3); + if (SUCCEEDED(hr)) + { + hr = IDXGIAdapter_QueryInterface(g->adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3); } - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); result.local_used = info.CurrentUsage; result.local_budget = info.Budget; } - if (SUCCEEDED(hr)) { + if (SUCCEEDED(hr)) + { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); result.non_local_used = info.CurrentUsage; result.non_local_budget = info.Budget; } - if (dxgiAdapter3) { + if (dxgiAdapter3) + { IDXGIAdapter_Release(dxgiAdapter3); } return result; @@ -3187,12 +3023,15 @@ GPU_MemoryInfo gp_query_memory_info(void) * Swapchain * ========================== */ -internal void swapchain_init_resources(struct swapchain *swapchain) +void swapchain_init_resources(struct swapchain *swapchain) { - for (u32 i = 0; i < countof(swapchain->buffers); ++i) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; + for (u32 i = 0; i < countof(swapchain->buffers); ++i) + { ID3D12Resource *resource = 0; HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); - if (FAILED(hr)) { + if (FAILED(hr)) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to get swapchain buffer")); } @@ -3200,26 +3039,30 @@ internal void swapchain_init_resources(struct swapchain *swapchain) ZeroStruct(sb); sb->swapchain = swapchain; sb->resource = resource; - sb->rtv_descriptor = descriptor_alloc(G.rtv_heap); + sb->rtv_descriptor = descriptor_alloc(g->rtv_heap); sb->state = D3D12_RESOURCE_STATE_COMMON; - ID3D12Device_CreateRenderTargetView(G.device, sb->resource, 0, sb->rtv_descriptor->handle); + ID3D12Device_CreateRenderTargetView(g->device, sb->resource, 0, sb->rtv_descriptor->handle); } } -GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution) +GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; HRESULT hr = 0; HWND hwnd = (HWND)P_GetInternalWindowHandle(window); - struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; + struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT]; struct swapchain *swapchain = 0; { - P_Lock lock = P_LockE(&G.swapchains_mutex); - if (G.first_free_swapchain) { - swapchain = G.first_free_swapchain; - G.first_free_swapchain = swapchain->next_free; - } else { - swapchain = PushStruct(G.swapchains_arena, struct swapchain); + P_Lock lock = P_LockE(&g->swapchains_mutex); + if (g->first_free_swapchain) + { + swapchain = g->first_free_swapchain; + g->first_free_swapchain = swapchain->next_free; + } + else + { + swapchain = PushStruct(g->swapchains_arena, struct swapchain); } P_Unlock(&lock); } @@ -3239,15 +3082,17 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution) desc.Flags = DX12_SWAPCHAIN_FLAGS; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; - hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1); - if (FAILED(hr)) { + hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1); + if (FAILED(hr)) + { P_Panic(Lit("Failed to create IDXGISwapChain1")); } } /* Upgrade to swapchain3 */ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain); - if (FAILED(hr)) { + if (FAILED(hr)) + { P_Panic(Lit("Failed to create IDXGISwapChain3")); } @@ -3259,7 +3104,7 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution) #endif /* Disable Alt+Enter changing monitor resolution to match window size */ - IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER); + IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER); IDXGISwapChain1_Release(swapchain1); swapchain->hwnd = hwnd; @@ -3269,17 +3114,18 @@ GPU_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution) return (GPU_Swapchain *)swapchain; } -void gp_swapchain_release(GPU_Swapchain *gp_swapchain) +void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain) { /* TODO */ (UNUSED)gp_swapchain; } -void gp_swapchain_wait(GPU_Swapchain *gp_swapchain) +void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain) { #if DX12_WAIT_FRAME_LATENCY > 0 struct swapchain *swapchain = (struct swapchain *)gp_swapchain; - if (swapchain->waitable) { + if (swapchain->waitable) + { WaitForSingleObjectEx(swapchain->waitable, 1000, 1); } #else @@ -3287,20 +3133,22 @@ void gp_swapchain_wait(GPU_Swapchain *gp_swapchain) #endif } -internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution) +struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; resolution.x = MaxI32(resolution.x, 1); resolution.y = MaxI32(resolution.y, 1); b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution); - if (should_rebuild) { + if (should_rebuild) + { HRESULT hr = 0; - struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; + struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT]; /* Lock direct queue submissions (in case any write to backbuffer) */ /* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */ P_Lock lock = P_LockE(&cq->submit_fence_mutex); //DEBUGBREAKABLE; - //P_Lock lock = P_LockE(&G.global_command_list_record_mutex); + //P_Lock lock = P_LockE(&g->global_command_list_record_mutex); { /* Flush direct queue */ //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target); @@ -3312,7 +3160,8 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, } /* Release buffers */ - for (u32 i = 0; i < countof(swapchain->buffers); ++i) { + for (u32 i = 0; i < countof(swapchain->buffers); ++i) + { struct swapchain_buffer *sb = &swapchain->buffers[i]; descriptor_release(sb->rtv_descriptor); ID3D12Resource_Release(sb->resource); @@ -3320,7 +3169,8 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS); - if (FAILED(hr)) { + if (FAILED(hr)) + { /* TODO: Don't panic */ P_Panic(Lit("Failed to resize swapchain")); } @@ -3340,13 +3190,15 @@ internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, * Present * ========================== */ -internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf) +void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf) { __prof; + GPU_D12_SharedState *g = &GPU_D12_shared_state; struct pipeline_scope *pipeline_scope = pipeline_scope_begin(); struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit")); - if (blit_pipeline->success) { - struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; + if (blit_pipeline->success) + { + struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profn("Present blit"); @@ -3361,7 +3213,7 @@ internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices); /* Upload descriptor heap */ - struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); + struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap); ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); @@ -3439,7 +3291,7 @@ internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *s pipeline_scope_end(pipeline_scope); } -void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync) +void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync) { __prof; struct swapchain *swapchain = (struct swapchain *)gp_swapchain; @@ -3450,7 +3302,8 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_ present_blit(swapchain_buffer, texture_resource, texture_xf); u32 present_flags = 0; - if (vsync == 0) { + if (vsync == 0) + { present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING); } @@ -3458,7 +3311,8 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_ { __profn("Present"); HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); - if (!SUCCEEDED(hr)) { + if (!SUCCEEDED(hr)) + { Assert(0); } } @@ -3469,10 +3323,11 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_ __profn("Mark queue frames"); /* Lock because frame marks shouldn't occur while command lists are recording */ - P_Lock lock = P_LockE(&G.global_command_list_record_mutex); - for (u32 i = 0; i < countof(G.command_queues); ++i) { + P_Lock lock = P_LockE(&g->global_command_list_record_mutex); + for (u32 i = 0; i < countof(g->command_queues); ++i) + { { - struct command_queue *cq = G.command_queues[i]; + struct command_queue *cq = g->command_queues[i]; __prof_dx12_new_frame(cq->prof); } } @@ -3480,8 +3335,9 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_ } { __profn("Collect queues"); - for (u32 i = 0; i < countof(G.command_queues); ++i) { - struct command_queue *cq = G.command_queues[i]; + for (u32 i = 0; i < countof(g->command_queues); ++i) + { + struct command_queue *cq = g->command_queues[i]; __prof_dx12_collect(cq->prof); } } @@ -3489,15 +3345,17 @@ void gp_present(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_ } /* ========================== * - * Evictor thread + * Evictor job * ========================== */ -internal P_JobDef(dx12_evictor_job, _) +P_JobDef(dx12_evictor_job, _) { + GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 completed_targets[DX12_NUM_QUEUES] = ZI; b32 shutdown = 0; - while (!shutdown) { + while (!shutdown) + { { __profn("Dx12 evictor run"); TempArena scratch = BeginScratchNoConflict(); @@ -3508,23 +3366,26 @@ internal P_JobDef(dx12_evictor_job, _) struct fenced_release_data *fenced_releases = 0; { __profn("Copyqueued releases"); - P_Lock lock = P_LockE(&G.fenced_releases_mutex); - num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data); + P_Lock lock = P_LockE(&g->fenced_releases_mutex); + num_fenced_releases = g->fenced_releases_arena->pos / sizeof(struct fenced_release_data); fenced_releases = PushStructsNoZero(scratch.arena, struct fenced_release_data, num_fenced_releases); - CopyBytes(fenced_releases, ArenaBase(G.fenced_releases_arena), G.fenced_releases_arena->pos); - ResetArena(G.fenced_releases_arena); - CopyBytes(targets, G.fenced_release_targets, sizeof(targets)); + CopyBytes(fenced_releases, ArenaBase(g->fenced_releases_arena), g->fenced_releases_arena->pos); + ResetArena(g->fenced_releases_arena); + CopyBytes(targets, g->fenced_release_targets, sizeof(targets)); P_Unlock(&lock); } /* Wait until fences reach target */ { __profn("Check fences"); - for (u32 i = 0; i < countof(targets); ++i) { - while (completed_targets[i] < targets[i]) { - struct command_queue *cq = G.command_queues[i]; + for (u32 i = 0; i < countof(targets); ++i) + { + while (completed_targets[i] < targets[i]) + { + struct command_queue *cq = g->command_queues[i]; completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence); - if (completed_targets[i] < targets[i]) { + if (completed_targets[i] < targets[i]) + { __profn("Wait on fence"); { struct dx12_wait_fence_job_sig sig = ZI; @@ -3542,9 +3403,11 @@ internal P_JobDef(dx12_evictor_job, _) } /* Process releases */ - for (u32 i = 0; i < num_fenced_releases; ++i) { + for (u32 i = 0; i < num_fenced_releases; ++i) + { struct fenced_release_data *fr = &fenced_releases[i]; - switch (fr->kind) { + switch (fr->kind) + { default: { /* Unknown handle type */ @@ -3566,13 +3429,14 @@ internal P_JobDef(dx12_evictor_job, _) } EndScratch(scratch); } - P_Lock lock = P_LockE(&G.evictor_wake_mutex); + P_Lock lock = P_LockE(&g->evictor_wake_mutex); { - while (!G.evictor_shutdown && G.evictor_wake_gen == 0) { - P_WaitOnCv(&G.evictor_wake_cv, &lock); + while (!g->evictor_shutdown && g->evictor_wake_gen == 0) + { + P_WaitOnCv(&g->evictor_wake_cv, &lock); } - shutdown = G.evictor_shutdown; - G.evictor_wake_gen = 0; + shutdown = g->evictor_shutdown; + g->evictor_wake_gen = 0; } P_Unlock(&lock); } diff --git a/src/gpu/gpu_dx12.h b/src/gpu/gpu_dx12.h new file mode 100644 index 00000000..ce27c7cc --- /dev/null +++ b/src/gpu/gpu_dx12.h @@ -0,0 +1,692 @@ +//////////////////////////////// +//~ D3D12 headers + +#pragma warning(push, 0) +# define UNICODE +# define COBJMACROS +# include +# include +# include +# include +# include +# include +#pragma warning(pop) + +//////////////////////////////// +//~ Dx12 + +#define DX12_ALLOW_TEARING 1 +#define DX12_WAIT_FRAME_LATENCY 1 +#define DX12_SWAPCHAIN_FLAGS (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) +#define DX12_SWAPCHAIN_BUFFER_COUNT (4) + +/* Arbitrary limits */ +#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64) +#define DX12_NUM_RTV_DESCRIPTORS (1024 * 1) +#define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64) + +#define DX12_MULTI_QUEUE !ProfilingIsEnabled +#if DX12_MULTI_QUEUE +# define DX12_QUEUE_DIRECT 0 +# define DX12_QUEUE_COMPUTE 1 +# define DX12_QUEUE_COPY 2 +# define DX12_QUEUE_COPY_BACKGROUND 3 +# define DX12_NUM_QUEUES 4 +#else +# define DX12_QUEUE_DIRECT 0 +# define DX12_QUEUE_COMPUTE 0 +# define DX12_QUEUE_COPY 0 +# define DX12_QUEUE_COPY_BACKGROUND 0 +# define DX12_NUM_QUEUES 1 +#endif + +#if RtcIsEnabled +//# define DX12_DEBUG 1 +# define DX12_DEBUG 0 +#else +# define DX12_DEBUG 0 +#endif + +/* ========================== * + * structs + * ========================== */ + +struct shader_desc +{ + String file; + String func; +}; + +struct pipeline_rtv_desc +{ + DXGI_FORMAT format; + b32 blending; +}; + +struct pipeline_desc +{ + String name; + + /* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */ + String vs_dxc; + String ps_dxc; + String cs_dxc; + + struct pipeline_rtv_desc rtvs[8]; +}; + +struct pipeline +{ + String name; + u64 hash; + b32 success; + b32 is_gfx; + String error; + i64 compilation_time_ns; + + /* Lock global pipelines mutex when accessing */ + i64 refcount; + + ID3D12PipelineState *pso; + ID3D12RootSignature *rootsig; + struct pipeline_desc desc; + + struct pipeline *next; +}; + +struct pipeline_error +{ + String msg; + struct pipeline_error *next; +}; + +struct pipeline_include +{ + String name; + u64 name_hash; + struct pipeline_include *next; +}; + +struct pipeline_scope +{ + Arena *arena; + Dict *refs; + struct pipeline_scope *next_free; +}; + +struct command_queue_desc +{ + enum D3D12_COMMAND_LIST_TYPE type; + enum D3D12_COMMAND_QUEUE_PRIORITY priority; + String dbg_name; +}; + +struct command_queue +{ + struct command_queue_desc desc; + ID3D12CommandQueue *cq; + Arena *arena; + + P_Mutex submit_fence_mutex; + u64 submit_fence_target; + ID3D12Fence *submit_fence; + + struct command_list_pool *cl_pool; + +#if ProfilingGpu + __prof_dx12_ctx(prof); +#endif +}; + +struct command_list_pool +{ + struct command_queue *cq; + Arena *arena; + P_Mutex mutex; + struct command_list *first_submitted_command_list; + struct command_list *last_submitted_command_list; +}; + +struct command_list +{ + struct command_queue *cq; + struct command_list_pool *pool; + struct ID3D12CommandAllocator *ca; + struct ID3D12GraphicsCommandList *cl; + P_Lock global_record_lock; + + struct pipeline *cur_pipeline; + + struct command_descriptor_heap *first_command_descriptor_heap; + struct command_buffer *first_command_buffer; + + u64 submitted_fence_target; + struct command_list *prev_submitted; + struct command_list *next_submitted; +}; + +struct command_descriptor_heap +{ + D3D12_DESCRIPTOR_HEAP_TYPE type; + ID3D12DescriptorHeap *heap; + D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle; + D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle; + + struct command_descriptor_heap *next_in_command_list; + + u64 submitted_fence_target; + struct command_queue *submitted_cq; + struct command_descriptor_heap *prev_submitted; + struct command_descriptor_heap *next_submitted; +}; + +struct command_buffer +{ + struct command_buffer_group *group; + + u64 size; + struct dx12_resource *resource; + D3D12_VERTEX_BUFFER_VIEW vbv; + D3D12_INDEX_BUFFER_VIEW Ibv; + + struct command_buffer *next_in_command_list; + + u64 submitted_fence_target; + struct command_queue *submitted_cq; + struct command_buffer *prev_submitted; + struct command_buffer *next_submitted; +}; + +struct command_buffer_group +{ + struct command_buffer *first_submitted; + struct command_buffer *last_submitted; +}; + +struct descriptor +{ + struct cpu_descriptor_heap *heap; + + u32 index; + D3D12_CPU_DESCRIPTOR_HANDLE handle; + + struct descriptor *next_free; +}; + +struct dx12_resource +{ + enum D3D12_RESOURCE_STATES state; + ID3D12Resource *resource; + struct descriptor *cbv_descriptor; + struct descriptor *srv_descriptor; + struct descriptor *uav_descriptor; + struct descriptor *rtv_descriptor; + + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */ + + Vec2I32 texture_size; + struct dx12_resource *next_free; +}; + +struct swapchain_buffer +{ + struct swapchain *swapchain; + ID3D12Resource *resource; + struct descriptor *rtv_descriptor; + D3D12_RESOURCE_STATES state; +}; + +struct swapchain +{ + IDXGISwapChain3 *swapchain; + HWND hwnd; + HANDLE waitable; + Vec2I32 resolution; + struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT]; + + struct swapchain *next_free; +}; + +struct cpu_descriptor_heap +{ + enum D3D12_DESCRIPTOR_HEAP_TYPE type; + Arena *arena; + P_Mutex mutex; + + u32 descriptor_size; + u32 num_descriptors_reserved; + u32 num_descriptors_capacity; + + struct descriptor *first_free_descriptor; + + ID3D12DescriptorHeap *heap; + struct D3D12_CPU_DESCRIPTOR_HANDLE handle; +}; + +enum fenced_release_kind +{ + FENCED_RELEASE_KIND_NONE, + FENCED_RELEASE_KIND_RESOURCE, + FENCED_RELEASE_KIND_PIPELINE +}; + +struct fenced_release_data +{ + enum fenced_release_kind kind; + void *ptr; +}; + +struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; + +struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; + +struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; }; + +struct shader_compile_desc +{ + String src; + String friendly_name; + String entry; + String target; +}; + +struct shader_compile_result +{ + i64 elapsed_ns; + String dxc; + String errors; + b32 success; +}; + +struct shader_compile_job_sig +{ + Arena *arena; + struct shader_compile_desc *descs; + struct shader_compile_result *results; +}; + + +struct render_sig +{ + Arena *arena; + RandState rand; + u32 frame_index; + + /* Material instances */ + u32 num_material_instance_descs; + Arena *material_instance_descs_arena; + + /* Ui instances */ + u32 num_ui_rect_instance_descs; + Arena *ui_rect_instance_descs_arena; + + /* UI shapes */ + Arena *ui_shape_verts_arena; + Arena *ui_shape_indices_arena; + + /* Grids */ + u32 num_material_grid_descs; + Arena *material_grid_descs_arena; + + /* Resources */ + struct dx12_resource *albedo; + struct dx12_resource *emittance; + struct dx12_resource *emittance_flood_read; + struct dx12_resource *emittance_flood_target; + struct dx12_resource *shade_read; + struct dx12_resource *shade_target; + struct dx12_resource *ui_target; +}; + +struct material_instance_desc +{ + Xform xf; + u32 texture_id; + ClipRect clip; + u32 tint; + b32 is_light; + Vec3 light_emittance; + u32 grid_id; +}; + +struct ui_rect_instance_desc +{ + Xform xf; + u32 texture_id; + ClipRect clip; + u32 tint; +}; + +struct material_grid_desc +{ + f32 line_thickness; + f32 line_spacing; + Vec2 offset; + u32 bg0_color; + u32 bg1_color; + u32 line_color; + u32 x_color; + u32 y_color; +}; + +/* ========================== * + * Global state + * ========================== */ + +Struct(GPU_D12_SharedState) +{ + Atomic32 initialized; + + /* Descriptor heaps pool */ + P_Mutex command_descriptor_heaps_mutex; + Arena *command_descriptor_heaps_arena; + struct command_descriptor_heap *first_submitted_command_descriptor_heap; + struct command_descriptor_heap *last_submitted_command_descriptor_heap; + + /* Command buffers pool */ + P_Mutex command_buffers_mutex; + Arena *command_buffers_arena; + Dict *command_buffers_dict; + + /* Resources pool */ + P_Mutex resources_mutex; + Arena *resources_arena; + struct dx12_resource *first_free_resource; + + /* Swapchains pool */ + P_Mutex swapchains_mutex; + Arena *swapchains_arena; + struct swapchain *first_free_swapchain; + + /* Shader bytecode archive */ + TAR_Archive dxc_archive; + + /* Pipeline cache */ + P_Mutex pipelines_mutex; + Arena *pipelines_arena; + struct pipeline *first_free_pipeline; + Dict *pipeline_descs; + Dict *top_pipelines; /* Latest pipelines */ + Dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */ + struct pipeline_scope *first_free_pipeline_scope; + + /* Fenced release queue */ + P_Mutex fenced_releases_mutex; + Arena *fenced_releases_arena; + u64 fenced_release_targets[DX12_NUM_QUEUES]; + + /* Factory */ + IDXGIFactory6 *factory; + + /* Adapter */ + IDXGIAdapter1 *adapter; + + /* Device */ + ID3D12Device *device; + + /* Descriptor sizes */ + u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; + + /* Global descriptor heaps */ + struct cpu_descriptor_heap *cbv_srv_uav_heap; + struct cpu_descriptor_heap *rtv_heap; + + /* Command queues */ + P_Mutex global_command_list_record_mutex; + P_Mutex global_submit_mutex; + struct command_queue *command_queues[DX12_NUM_QUEUES]; + + /* Evictor job */ + P_Counter evictor_job_counter; + P_Cv evictor_wake_cv; + P_Mutex evictor_wake_mutex; + i64 evictor_wake_gen; + b32 evictor_shutdown; +}; + +extern GPU_D12_SharedState GPU_D12_shared_state; + +/* ========================== * + * Startup + * ========================== */ + +void GPU_Startup(void); + +P_ExitFuncDef(gp_shutdown); + +/* ========================== * + * Dx12 device initialization + * ========================== */ + +void dx12_init_error(String error); + +void dx12_init_device(void); + +/* ========================== * + * Dx12 object initialization + * ========================== */ + +void dx12_init_objects(void); + +/* ========================== * + * Dx12 pipeline initialization + * ========================== */ + +void dx12_init_pipelines(void); + +/* ========================== * + * Noise texture initialization + * ========================== */ + +void dx12_init_noise(void); + +/* ========================== * + * Shader compilation + * ========================== */ + +P_JobDef(shader_compile_job, job); + +/* ========================== * + * Pipeline + * ========================== */ + + P_JobDef(pipeline_alloc_job, job); + +void pipeline_release_now(struct pipeline *pipeline); + +/* ========================== * + * Pipeline cache + * ========================== */ + +struct pipeline_scope *pipeline_scope_begin(void); + +void pipeline_scope_end(struct pipeline_scope *scope); + +extern Readonly struct pipeline g_nil_pipeline; +struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name); + +void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); + +W_CallbackFuncDef(pipeline_watch_callback, name); + +/* ========================== * + * Descriptor + * ========================== */ + +struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh); + +void descriptor_release(struct descriptor *descriptor); + +/* ========================== * + * CPU descriptor heap + * ========================== */ + +struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); + +/* ========================== * + * Fenced release + * ========================== */ + +void fenced_release(void *data, enum fenced_release_kind kind); + +/* ========================== * + * Resource + * ========================== */ + +struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state); + +void dx12_resource_release_now(struct dx12_resource *t); + +void GPU_ReleaseResource(GPU_Resource *resource); + +/* ========================== * + * Resource barrier + * ========================== */ + +struct dx12_resource_barrier_desc +{ + enum D3D12_RESOURCE_BARRIER_TYPE type; + struct dx12_resource *resource; + enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */ +}; + +void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs); + +/* ========================== * + * Command queue + * ========================== */ + +P_JobDef(command_queue_alloc_job, job); + +void command_queue_release(struct command_queue *cq); + +/* ========================== * + * Command list + * ========================== */ + +struct command_list_pool *command_list_pool_alloc(struct command_queue *cq); + +struct command_list *command_list_open(struct command_list_pool *pool); + +/* TODO: Allow multiple command list submissions */ +u64 command_list_close(struct command_list *cl); + +/* ========================== * + * Command descriptor heap (GPU / shader visible descriptor heap) + * ========================== */ + +struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu); + +/* ========================== * + * Command buffer + * ========================== */ + +u64 command_buffer_hash_from_size(u64 size); + +u64 align_up_pow2(u64 v); + +#define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1) +struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride); + +/* ========================== * + * Wait job + * ========================== */ + +struct dx12_wait_fence_job_sig +{ + ID3D12Fence *fence; + u64 target; +}; + +P_JobDef(dx12_wait_fence_job, job); + +/* ========================== * + * Texture + * ========================== */ + +GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data); + +Vec2I32 GPU_GetTextureSize(GPU_Resource *resource); + +/* ========================== * + * Upload + * ========================== */ + +P_JobDef(dx12_upload_job, job); + +/* ========================== * + * Run utils + * ========================== */ + +void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline); + + +void command_list_set_sig(struct command_list *cl, void *src, u32 size); + +struct D3D12_VIEWPORT viewport_from_rect(Rect r); + +D3D12_RECT scissor_from_rect(Rect r); + +D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size); + +D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format); + +struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state); + +/* Calculate the view projection matrix */ +Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height); + +D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh); + +/* ========================== * + * Render sig + * ========================== */ + +struct render_sig *render_sig_alloc(void); + +void render_sig_reset(struct render_sig *sig); + +GPU_RenderSig *GPU_AllocRenderSig(void); + +u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc); + +/* ========================== * + * Render + * ========================== */ + +GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams params); + +/* ========================== * + * Memory info + * ========================== */ + +GPU_MemoryInfo GPU_QueryMemoryInfo(void); + +/* ========================== * + * Swapchain + * ========================== */ + +void swapchain_init_resources(struct swapchain *swapchain); + +GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution); + +void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain); + +void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain); + +struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution); + +/* ========================== * + * Present + * ========================== */ + +void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf); + +void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync); + +/* ========================== * + * Evictor job + * ========================== */ + +P_JobDef(dx12_evictor_job, _); diff --git a/src/sprite/sprite_core.c b/src/sprite/sprite_core.c index 5a9af689..de5cff60 100644 --- a/src/sprite/sprite_core.c +++ b/src/sprite/sprite_core.c @@ -206,7 +206,7 @@ S_StartupReceipt sprite_startup(void) u32 width = 64; u32 height = 64; u32 *pixels = generate_purple_black_image(scratch.arena, width, height); - G.nil_texture->gp_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(width, height), pixels); + G.nil_texture->gp_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM, 0, VEC2I32(width, height), pixels); EndScratch(scratch); } @@ -355,7 +355,7 @@ internal void cache_entry_load_texture(struct cache_ref ref, S_Tag tag) e->texture->height = decoded.height; e->texture->valid = 1; e->texture->loaded = 1; - e->texture->gp_texture = gp_texture_alloc(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB, 0, VEC2I32(decoded.width, decoded.height), decoded.pixels); + e->texture->gp_texture = GPU_AllocTexture(GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB, 0, VEC2I32(decoded.width, decoded.height), decoded.pixels); /* TODO: Query gpu for more accurate texture size in VRAM */ memory_size += (decoded.width * decoded.height) * sizeof(*decoded.pixels); success = 1; @@ -1323,7 +1323,7 @@ internal P_JobDef(sprite_evictor_job, _) for (struct evict_node *en = first_evicted; en; en = en->next_evicted) { struct cache_entry *n = en->cache_entry; if (n->kind == CACHE_ENTRY_KIND_TEXTURE && n->texture->valid) { - gp_resource_release(n->texture->gp_texture); + GPU_ReleaseResource(n->texture->gp_texture); } ReleaseArena(n->arena); } diff --git a/src/user/user_core.c b/src/user/user_core.c index c533aeb4..fd2407bf 100644 --- a/src/user/user_core.c +++ b/src/user/user_core.c @@ -220,14 +220,14 @@ struct user_startup_receipt user_startup(F_StartupReceipt *font_sr, /* GPU handles */ G.world_to_ui_xf = XformIdentity; G.world_to_render_xf = XformIdentity; - G.render_sig = gp_render_sig_alloc(); + G.render_sig = GPU_AllocRenderSig(); G.console_logs_arena = AllocArena(Gibi(64)); //P_RegisterLogCallback(debug_console_log_callback, P_LogLevel_Success); P_RegisterLogCallback(debug_console_log_callback, P_LogLevel_Debug); G.window = P_AllocWindow(); - G.swapchain = gp_swapchain_alloc(G.window, VEC2I32(100, 100)); + G.swapchain = GPU_AllocSwapchain(G.window, VEC2I32(100, 100)); P_ShowWindow(G.window); /* Start jobs */ @@ -1917,7 +1917,7 @@ internal void user_update(P_Window *window) * Query vram * ========================== */ - GPU_MemoryInfo vram = gp_query_memory_info(); + GPU_MemoryInfo vram = GPU_QueryMemoryInfo(); /* ========================== * * Draw global debug info @@ -2063,11 +2063,11 @@ internal void user_update(P_Window *window) params.world_to_render_xf = G.world_to_render_xf; params.render_to_ui_xf = G.render_to_ui_xf; params.effects_disabled = effects_disabled; - render_texture = gp_run_render(G.render_sig, params); + render_texture = GPU_RunRender(G.render_sig, params); } /* Present */ - gp_present(G.swapchain, backbuffer_resolution, render_texture, G.ui_to_screen_xf, VSYNC); + GPU_PresentSwapchain(G.swapchain, backbuffer_resolution, render_texture, G.ui_to_screen_xf, VSYNC); } /* ========================== * @@ -2088,7 +2088,7 @@ internal P_JobDef(user_update_job, _) __profn("User sleep"); { __profn("Swapchain wait"); - gp_swapchain_wait(G.swapchain); + GPU_WaitOnSwapchain(G.swapchain); } { __profn("Frame limiter wait");