#include "gp.h" #include "sys.h" #include "arena.h" #include "memory.h" #include "string.h" #include "app.h" #include "log.h" #include "resource.h" #include "atomic.h" #include "util.h" #include "rand.h" #include "sprite.h" #include "gstat.h" /* Include common shader types */ #define SH_CPU 1 #include "../res/sh/sh_common.h" #pragma warning(push, 0) # define UNICODE # define COBJMACROS # include # include # include # include # include # include #pragma warning(pop) #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") #pragma comment(lib, "dxguid") #pragma comment(lib, "d3dcompiler") #if PROFILING_D3D /* For RegOpenKeyEx */ # include # pragma comment(lib, "advapi32") #endif //#define DX12_WAIT_FRAME_LATENCY 1 //#define DX12_SWAPCHAIN_FLAGS ((DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | (DX12_WAIT_FRAME_LATENCY * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) #define DX12_ALLOW_TEARING 1 #define DX12_SWAPCHAIN_FLAGS (DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) #define DX12_SWAPCHAIN_BUFFER_COUNT (3) #define DX12_SWAPCHAIN_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM) //#define DX12_SWAPCHAIN_RTV_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) /* Arbitrary limits */ #define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64) #define DX12_NUM_RTV_DESCRIPTORS (1024 * 1) #define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64) #define DX12_MULTI_QUEUE 1 #if DX12_MULTI_QUEUE # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 1 # define DX12_QUEUE_COPY 2 # define DX12_QUEUE_COPY_BACKGROUND 3 # define DX12_NUM_QUEUES 4 #else # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 0 # define DX12_QUEUE_COPY 0 # define DX12_QUEUE_COPY_BACKGROUND 0 # define DX12_NUM_QUEUES 1 #endif #if RTC # define DX12_DEBUG 1 # define DX12_SHADER_DEBUG 1 #else # define DX12_DEBUG 0 # define DX12_SHADER_DEBUG 0 #endif struct shader_desc { struct string file; struct string func; }; struct pipeline_desc { struct string name; struct shader_desc vs; struct shader_desc ps; D3D12_INPUT_ELEMENT_DESC ia[8]; }; struct pipeline { b32 success; struct arena *arena; struct string name; u64 hash; struct pipeline_error *first_error; struct pipeline_error *last_error; i64 compilation_time_ns; /* Dict with shader source & included file names as keys */ struct dict *dependencies; /* Lock global pipelines mutex when accessing */ i64 refcount; ID3D12PipelineState *pso; ID3D12RootSignature *rootsig; struct pipeline_desc desc; }; struct pipeline_error { struct string msg; struct pipeline_error *next; }; struct pipeline_include { struct string name; u64 name_hash; struct pipeline_include *next; }; struct pipeline_scope { struct arena *arena; struct dict *refs; struct pipeline_scope *next_free; }; struct command_queue { D3D12_COMMAND_LIST_TYPE type; ID3D12CommandQueue *cq; struct arena *arena; struct sys_mutex *submit_fence_mutex; u64 submit_fence_target; ID3D12Fence *submit_fence; struct command_list_pool *cl_pool; #if PROFILING_D3D __prof_dx12_ctx(prof); #endif }; struct command_list_pool { struct command_queue *cq; struct arena *arena; struct sys_mutex *mutex; struct command_list *first_submitted_command_list; struct command_list *last_submitted_command_list; }; struct command_list { struct command_queue *cq; struct command_list_pool *pool; struct ID3D12CommandAllocator *ca; struct ID3D12GraphicsCommandList *cl; struct sys_lock global_record_lock; struct command_descriptor_heap *first_command_descriptor_heap; struct command_buffer *first_command_buffer; u64 submitted_fence_target; struct command_list *prev_submitted; struct command_list *next_submitted; }; struct command_descriptor_heap { D3D12_DESCRIPTOR_HEAP_TYPE type; ID3D12DescriptorHeap *heap; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; struct command_descriptor_heap *next_in_command_list; u64 submitted_fence_target; struct command_queue *submitted_cq; struct command_descriptor_heap *prev_submitted; struct command_descriptor_heap *next_submitted; }; struct command_buffer { struct command_buffer_group *group; u64 size; struct dx12_resource *resource; D3D12_VERTEX_BUFFER_VIEW vbv; D3D12_INDEX_BUFFER_VIEW Ibv; struct command_buffer *next_in_command_list; u64 submitted_fence_target; struct command_queue *submitted_cq; struct command_buffer *prev_submitted; struct command_buffer *next_submitted; }; struct command_buffer_group { struct command_buffer *first_submitted; struct command_buffer *last_submitted; }; struct descriptor { struct cpu_descriptor_heap *heap; u32 index; D3D12_CPU_DESCRIPTOR_HANDLE handle; struct descriptor *next_free; }; struct dx12_resource { enum D3D12_RESOURCE_STATES state; ID3D12Resource *resource; struct descriptor *cbv_descriptor; struct descriptor *srv_descriptor; struct descriptor *uav_descriptor; struct descriptor *rtv_descriptor; D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */ struct v2i32 texture_size; struct dx12_resource *next_free; }; struct swapchain_buffer { struct swapchain *swapchain; ID3D12Resource *resource; struct descriptor *rtv_descriptor; D3D12_RESOURCE_STATES state; }; struct swapchain { IDXGISwapChain3 *swapchain; HWND hwnd; struct v2i32 resolution; struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT]; }; struct cpu_descriptor_heap { enum D3D12_DESCRIPTOR_HEAP_TYPE type; struct arena *arena; struct sys_mutex *mutex; u32 descriptor_size; u32 num_descriptors_reserved; u32 num_descriptors_capacity; struct descriptor *first_free_descriptor; ID3D12DescriptorHeap *heap; struct D3D12_CPU_DESCRIPTOR_HANDLE handle; }; enum fenced_release_kind { FENCED_RELEASE_KIND_NONE, FENCED_RELEASE_KIND_RESOURCE, FENCED_RELEASE_KIND_PIPELINE }; struct fenced_release_data { enum fenced_release_kind kind; void *ptr; }; /* ========================== * * Global state * ========================== */ GLOBAL struct { /* Descriptor heaps pool */ struct sys_mutex *command_descriptor_heaps_mutex; struct arena *command_descriptor_heaps_arena; struct command_descriptor_heap *first_submitted_command_descriptor_heap; struct command_descriptor_heap *last_submitted_command_descriptor_heap; /* Command buffers pool */ struct sys_mutex *command_buffers_mutex; struct arena *command_buffers_arena; struct dict *command_buffers_dict; /* Resources pool */ struct sys_mutex *resources_mutex; struct arena *resources_arena; struct dx12_resource *first_free_resource; /* Pipeline cache */ struct sys_mutex *pipelines_mutex; struct arena *pipelines_arena; struct dict *pipeline_descs; struct dict *top_pipelines; /* Latest pipelines */ struct dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */ struct pipeline_scope *first_free_pipeline_scope; /* Fenced release queue */ struct sys_mutex *fenced_releases_mutex; struct arena *fenced_releases_arena; u64 fenced_release_targets[DX12_NUM_QUEUES]; /* Factory */ IDXGIFactory6 *factory; /* Adapter */ IDXGIAdapter1 *adapter; /* Device */ ID3D12Device *device; /* Descriptor sizes */ u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; /* Global descriptor heaps */ struct cpu_descriptor_heap *cbv_srv_uav_heap; struct cpu_descriptor_heap *rtv_heap; /* Command queues */ struct sys_mutex *global_command_list_record_mutex; struct sys_mutex *global_submit_mutex; struct command_queue *command_queues[DX12_NUM_QUEUES]; /* Swapchain */ struct swapchain swapchain; /* Evictor thread */ struct atomic_i32 evictor_thread_shutdown; HANDLE evictor_thread_wake_event; struct sys_thread *evictor_thread; } G = ZI, DEBUG_ALIAS(G, G_gp_dx12); /* ========================== * * Startup * ========================== */ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gp_shutdown); INTERNAL void dx12_init_device(void); INTERNAL void dx12_init_objects(void); INTERNAL void dx12_init_pipelines(void); INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name); INTERNAL void command_queue_release(struct command_queue *cq); INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg); INTERNAL void fenced_release(void *data, enum fenced_release_kind kind); #if RESOURCE_RELOADING INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name); #endif struct gp_startup_receipt gp_startup(void) { __prof; /* Initialize command descriptor heaps pool */ G.command_descriptor_heaps_mutex = sys_mutex_alloc(); G.command_descriptor_heaps_arena = arena_alloc(GIGABYTE(64)); /* Initialize command buffers pool */ G.command_buffers_mutex = sys_mutex_alloc(); G.command_buffers_arena = arena_alloc(GIGABYTE(64)); G.command_buffers_dict = dict_init(G.command_buffers_arena, 4096); /* Initialize resources pool */ G.resources_mutex = sys_mutex_alloc(); G.resources_arena = arena_alloc(GIGABYTE(64)); /* Initialize pipeline cache */ G.pipelines_mutex = sys_mutex_alloc(); G.pipelines_arena = arena_alloc(GIGABYTE(64)); G.pipeline_descs = dict_init(G.pipelines_arena, 1024); G.top_pipelines = dict_init(G.pipelines_arena, 1024); G.top_successful_pipelines = dict_init(G.pipelines_arena, 1024); /* Initialize fenced releases queue */ G.fenced_releases_mutex = sys_mutex_alloc(); G.fenced_releases_arena = arena_alloc(GIGABYTE(64)); /* Initialize dx12 */ dx12_init_device(); dx12_init_objects(); dx12_init_pipelines(); /* Register callbacks */ #if RESOURCE_RELOADING resource_register_watch_callback(pipeline_resource_watch_callback); #endif app_register_exit_callback(gp_shutdown); /* Start evictor thread */ G.evictor_thread_wake_event = CreateEvent(NULL, false, false, NULL); G.evictor_thread = sys_thread_alloc(evictor_thread_entry_point, NULL, LIT("GPU resource evictor thread"), PROF_THREAD_GROUP_EVICTORS); struct gp_startup_receipt res = ZI; return res; } INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gp_shutdown) { __prof; #if 0 /* Release objects to make live object reporting less noisy */ //IDXGISwapChain3_Release(G.swapchain); for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; cmomand_queue_release(cq); } ID3D12Device_Release(G.device); #else (UNUSED)command_queue_release; #endif atomic_i32_fetch_set(&G.evictor_thread_shutdown, 1); SetEvent(G.evictor_thread_wake_event); sys_thread_wait_release(G.evictor_thread); } /* ========================== * * Dx12 device initialization * ========================== */ INTERNAL void dx12_init_error(struct string error) { struct arena_temp scratch = scratch_begin_no_conflict(); struct string msg = string_format(scratch.arena, LIT("Failed to initialize DirectX 12.\n\n%F"), FMT_STR(error)); sys_panic(msg); scratch_end(scratch); } INTERNAL void dx12_init_device(void) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if DX12_DEBUG { ID3D12Debug *debug_controller0 = NULL; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create ID3D12Debug0")); } ID3D12Debug1 *debug_controller1 = NULL; hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create ID3D12Debug1")); } ID3D12Debug_EnableDebugLayer(debug_controller0); /* FIXME: Enable this */ //ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, true); ID3D12Debug_Release(debug_controller1); ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); if (FAILED(hr)) { dx12_init_error(LIT("Failed to initialize DXGI factory")); } /* Create device */ { IDXGIAdapter1 *adapter = NULL; ID3D12Device *device = NULL; struct string error = LIT("Could not initialize GPU device."); struct string first_gpu_name = ZI; u32 adapter_index = 0; while (true) { hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = string_from_wstr_no_limit(scratch.arena, desc.Description); } hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); if (SUCCEEDED(hr)) { break; } ID3D12Device_Release(device); IDXGIAdapter1_Release(adapter); adapter = NULL; device = NULL; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { struct string fmt = LIT("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date."); error = string_format(scratch.arena, fmt, FMT_STR(first_gpu_name)); } dx12_init_error(error); } G.adapter = adapter; G.device = device; } #if DX12_DEBUG /* Enable D3D12 Debug break */ { ID3D12InfoQueue *info = NULL; hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { dx12_init_error(LIT("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, TRUE); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { IDXGIInfoQueue *dxgi_info = NULL; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { dx12_init_error(LIT("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE); IDXGIInfoQueue_Release(dxgi_info); } #endif #if PROFILING_D3D /* Enable stable power state */ { b32 success = true; __profn("Set stable power state"); HKEY key = 0; success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS; if (success) { DWORD value = ZI; DWORD dword_size = sizeof(DWORD); success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, NULL, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS; RegCloseKey(key); if (success) { success = value != 0; } } logf_info("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)"); if (success) { logf_info("Machine is in developer mode, calling ID3D12Device::SetStablePowerState"); hr = ID3D12Device_SetStablePowerState(G.device, 1); if (SUCCEEDED(hr)) { logf_info("ID3D12Device::SetStablePowerState succeeded"); } else { success = false; logf_error("ID3D12Device::SetStablePowerState failed"); } } else { logf_warning("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState"); } if (!success) { logf_warning("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable."); } } #endif scratch_end(scratch); } /* ========================== * * Dx12 object initialization * ========================== */ INTERNAL void dx12_init_objects(void) { /* Initialize desc sizes */ G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); /* Initialize desc counts */ G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS; /* Create global descriptor heaps */ G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); /* Create command queues */ G.global_command_list_record_mutex = sys_mutex_alloc(); G.global_submit_mutex = sys_mutex_alloc(); for (u32 i = 0; i < DX12_NUM_QUEUES; ++i) { if (i == DX12_QUEUE_DIRECT) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Direct queue")); } else if (i == DX12_QUEUE_COMPUTE) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Compute queue")); } else if (i == DX12_QUEUE_COPY) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_HIGH, LIT("Copy queue")); } else if (i == DX12_QUEUE_COPY_BACKGROUND) { G.command_queues[i] = command_queue_alloc(D3D12_COMMAND_LIST_TYPE_COPY, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, LIT("Background copy queue")); } } } /* ========================== * * Dx12 pipeline initialization * ========================== */ INTERNAL void pipeline_alloc(u64 num_pipelines, struct pipeline_desc *descs_in, struct pipeline **pipelines_out); INTERNAL void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); INTERNAL void dx12_init_pipelines(void) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); /* Register pipeline descs */ { /* Material pipeline */ { struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc); desc->name = LIT("material"); desc->vs.file = LIT("sh/material.hlsl"); desc->ps.file = LIT("sh/material.hlsl"); desc->vs.func = LIT("vs"); desc->ps.func = LIT("ps"); dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc); } /* Shape pipeline */ { struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc); desc->name = LIT("shape"); desc->vs.file = LIT("sh/shape.hlsl"); desc->ps.file = LIT("sh/shape.hlsl"); desc->vs.func = LIT("vs"); desc->ps.func = LIT("ps"); desc->ia[0] = (D3D12_INPUT_ELEMENT_DESC) { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }; desc->ia[1] = (D3D12_INPUT_ELEMENT_DESC) { "color_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 }; dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc); } /* Blit pipeilne */ { struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc); desc->name = LIT("blit"); desc->vs.file = LIT("sh/blit.hlsl"); desc->ps.file = LIT("sh/blit.hlsl"); desc->vs.func = LIT("vs"); desc->ps.func = LIT("ps"); dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc); } } /* Compile pipelines */ u32 num_pipelines = 0; struct pipeline_desc *descs = arena_push_dry(scratch.arena, struct pipeline_desc); for (struct dict_entry *entry = G.pipeline_descs->first; entry; entry = entry->next) { struct pipeline_desc *desc = (struct pipeline_desc *)entry->value; *arena_push(scratch.arena, struct pipeline_desc) = *desc; ++num_pipelines; } struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines); pipeline_alloc(num_pipelines, descs, pipelines); for (u32 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; if (!pipeline->success) { struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error"); struct string msg = string_format(scratch.arena, LIT("Error initializing pipeline \"%F\":\n\n%F"), FMT_STR(pipeline->name), FMT_STR(error)); log_error(msg); sys_message_box(SYS_MESSAGE_BOX_KIND_WARNING, msg); } } pipeline_register(num_pipelines, pipelines); scratch_end(scratch); } /* ========================== * * Shader compilation * ========================== */ struct dx12_include_handler { ID3DInclude d3d_handler; ID3DIncludeVtbl vtbl; struct pipeline *pipeline; struct sys_mutex *pipeline_mutex; u64 num_open_resources; struct resource open_resources[1024]; }; INTERNAL HRESULT dx12_include_open(ID3DInclude *d3d_handler, D3D_INCLUDE_TYPE include_type, LPCSTR name_cstr, LPCVOID parent_data, LPCVOID *data_out, UINT *data_len_out) { __prof; (UNUSED)include_type; (UNUSED)parent_data; HRESULT result = E_FAIL; struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; struct string name = string_from_cstr_no_limit((char *)name_cstr); u64 hash = hash_fnv64(HASH_FNV64_BASIS, name); if (handler->num_open_resources >= countof(handler->open_resources)) { sys_panic(LIT("Dx12 include handler resource overflow")); } struct sys_lock lock = sys_mutex_lock_e(handler->pipeline_mutex); { struct pipeline *pipeline = handler->pipeline; dict_set(pipeline->arena, pipeline->dependencies, hash, 1); } sys_mutex_unlock(&lock); struct resource *res = &handler->open_resources[handler->num_open_resources++]; *res = resource_open(name); if (resource_exists(res)) { ++handler->num_open_resources; struct string data = resource_get_data(res); *data_out = data.text; *data_len_out = data.len; result = S_OK; } return result; } INTERNAL HRESULT dx12_include_close(ID3DInclude *d3d_handler, LPCVOID data) { __prof; (UNUSED)data; struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; for (u64 i = 0; i < handler->num_open_resources; ++i) { struct resource *res = &handler->open_resources[i]; resource_close(res); } handler->num_open_resources = 0; return S_OK; } INTERNAL struct dx12_include_handler *dx12_include_handler_alloc(struct arena *arena, struct pipeline *pipeline) { struct dx12_include_handler *handler = arena_push(arena, struct dx12_include_handler); handler->d3d_handler.lpVtbl = &handler->vtbl; handler->vtbl.Open = dx12_include_open; handler->vtbl.Close = dx12_include_close; handler->pipeline = pipeline; handler->pipeline_mutex = sys_mutex_alloc(); return handler; } INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler) { for (u64 i = 0; i < handler->num_open_resources; ++i) { ASSERT(false); /* Resource should have been closed by handler by now */ struct resource *res = &handler->open_resources[i]; resource_close(res); } handler->num_open_resources = 0; sys_mutex_release(handler->pipeline_mutex); } enum shader_compile_job_kind { SHADER_COMPILE_TASK_KIND_VS, SHADER_COMPILE_TASK_KIND_PS }; struct shader_compile_job_param { /* In */ enum shader_compile_job_kind kind; struct pipeline *pipeline; struct shader_desc shader_desc; struct resource *shader_res; /* Out */ b32 success; ID3DBlob *blob; ID3DBlob *error_blob; i64 elapsed; }; struct shader_compile_job_sig { struct shader_compile_job_param **params; }; /* TODO: Compile shaders offline w/ dxc for performance & language features */ INTERNAL SYS_JOB_DEF(shader_compile_job, job) { __prof; struct shader_compile_job_sig *sig = job.sig; struct shader_compile_job_param *param = sig->params[job.id]; enum shader_compile_job_kind kind = param->kind; struct pipeline *pipeline = param->pipeline; struct shader_desc shader_desc = param->shader_desc; struct resource *shader_res = param->shader_res; struct arena_temp scratch = scratch_begin_no_conflict(); { i64 start_ns = sys_time_ns(); b32 success = false; ID3DBlob *blob = NULL; ID3DBlob *error_blob = NULL; if (resource_exists(shader_res)) { struct dx12_include_handler *include_handler = dx12_include_handler_alloc(scratch.arena, pipeline); char *func_cstr = cstr_from_string(scratch.arena, shader_desc.func); u32 d3d_compile_flags = D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES; #if DX12_SHADER_DEBUG d3d_compile_flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_ENABLE_STRICTNESS; #else d3d_compile_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3; #endif /* Compile shader */ { struct string shader_src = resource_get_data(shader_res); logf_info("Compiling shader \"%F:%F\"", FMT_STR(shader_desc.file), FMT_STR(shader_desc.func)); /* Compile shader */ struct string friendly_name = string_cat(scratch.arena, LIT("res/"), shader_desc.file); char *friendly_name_cstr = cstr_from_string(scratch.arena, friendly_name); char *target = NULL; switch (kind) { case SHADER_COMPILE_TASK_KIND_VS: { target = "vs_5_1"; } break; case SHADER_COMPILE_TASK_KIND_PS: { target = "ps_5_1"; } break; } D3D_SHADER_MACRO defines[] = { { "SH_CPU", "0" }, { NULL, NULL } }; HRESULT hr = D3DCompile(shader_src.text, shader_src.len, friendly_name_cstr, defines, (ID3DInclude *)include_handler, func_cstr, target, d3d_compile_flags, 0, &blob, &error_blob); success = SUCCEEDED(hr) && !error_blob; } dx12_include_handler_release(include_handler); } #if 0 if (success) { logf_success("Finished compiling shader \"%F\" in %F seconds", FMT_STR(src_name), FMT_FLOAT(SECONDS_FROM_NS(sys_time_ns() - start_ns))); } #endif param->success = success; param->blob = blob; param->error_blob = error_blob; param->elapsed = sys_time_ns() - start_ns; } scratch_end(scratch); } /* ========================== * * Pipeline * ========================== */ struct pipeline_init_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; INTERNAL SYS_JOB_DEF(pipeline_init_job, job) { __prof; struct pipeline_init_job_sig *sig = job.sig; struct pipeline_desc *desc = &sig->descs_in[job.id]; struct pipeline **pipelines_out = sig->pipelines_out; struct pipeline *pipeline = NULL; { struct arena *pipeline_arena = arena_alloc(MEGABYTE(64)); pipeline = arena_push(pipeline_arena, struct pipeline); pipeline->arena = pipeline_arena; pipelines_out[job.id] = pipeline; } pipeline->desc = *desc; pipeline->name = string_copy(pipeline->arena, desc->name); pipeline->hash = hash_fnv64(HASH_FNV64_BASIS, pipeline->name); pipeline->dependencies = dict_init(pipeline->arena, 64); struct arena_temp scratch = scratch_begin_no_conflict(); { i64 start_ns = sys_time_ns(); struct string pipeline_name = pipeline->name; logf_info("Loading pipeline \"%F\"", FMT_STR(pipeline_name)); b32 success = true; HRESULT hr = 0; struct string error_str = LIT("Unknown error"); b32 ps_res_is_shared = string_eq(desc->vs.file, desc->ps.file); struct resource vs_res = resource_open(desc->vs.file); struct resource ps_res = vs_res; if (!ps_res_is_shared) { ps_res = resource_open(desc->ps.file); } dict_set(pipeline->arena, pipeline->dependencies, hash_fnv64(HASH_FNV64_BASIS, desc->vs.file), 1); dict_set(pipeline->arena, pipeline->dependencies, hash_fnv64(HASH_FNV64_BASIS, desc->ps.file), 1); if (success) { if (!resource_exists(&vs_res)) { error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(desc->vs.file)); success = false; } else if (!resource_exists(&ps_res)) { error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(desc->ps.file)); success = false; } } struct shader_compile_job_param vs = ZI; vs.kind = SHADER_COMPILE_TASK_KIND_VS; vs.pipeline = pipeline; vs.shader_desc = desc->vs; vs.shader_res = &vs_res; struct shader_compile_job_param ps = ZI; ps.kind = SHADER_COMPILE_TASK_KIND_PS; ps.pipeline = pipeline; ps.shader_desc = desc->ps; ps.shader_res = &ps_res; /* Compile shaders */ if (success) { struct shader_compile_job_param *params[] = { &vs, &ps }; struct shader_compile_job_sig comp_sig = { .params = params }; struct sys_counter *counter = sys_counter_alloc(); { sys_run(countof(params), shader_compile_job, &comp_sig, SYS_PRIORITY_HIGH, counter); sys_counter_wait(counter); } sys_counter_release(counter); success = vs.success && ps.success; } /* Get root signature blob * NOTE: This isn't necessary for creating the root signature (since it * could reuse the shader blob), however we'd like to verify that the * root signature exists and matches between shaders. */ ID3D10Blob *rootsig_blob = NULL; if (success) { __profn("Validate root signatures"); char *vs_rootsig_data = NULL; char *ps_rootsig_data = NULL; u32 vs_rootsig_data_len = 0; u32 ps_rootsig_data_len = 0; ID3D10Blob *vs_rootsig_blob = NULL; ID3D10Blob *ps_rootsig_blob = NULL; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs.blob), ID3D10Blob_GetBufferSize(vs.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob); D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps.blob), ID3D10Blob_GetBufferSize(ps.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob); if (vs_rootsig_blob) { vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob); vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob); } if (ps_rootsig_blob) { ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob); ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob); } if (vs_rootsig_data_len == 0) { success = false; error_str = LIT("Vertex shader is missing root signature"); } else if (ps_rootsig_data_len == 0) { success = false; error_str = LIT("Pixel shader is missing root signature"); } else if (vs_rootsig_data_len != ps_rootsig_data_len || !MEMEQ(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) { success = false; error_str = LIT("Root signature mismatch between vertex and pixel shader"); } else { rootsig_blob = vs_rootsig_blob; } if (ps_rootsig_blob) { ID3D10Blob_Release(ps_rootsig_blob); } } /* Create root signature */ ID3D12RootSignature *rootsig = NULL; if (success) { __profn("Create root signature"); hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig); if (FAILED(hr)) { error_str = LIT("Failed to create root signature"); success = false; } } /* Create PSO */ ID3D12PipelineState *pso = NULL; if (success) { /* Default rasterizer state */ __profn("Create PSO"); D3D12_RASTERIZER_DESC raster_desc = { .FillMode = D3D12_FILL_MODE_SOLID, .CullMode = D3D12_CULL_MODE_NONE, .FrontCounterClockwise = FALSE, .DepthBias = D3D12_DEFAULT_DEPTH_BIAS, .DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP, .SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, .DepthClipEnable = TRUE, .MultisampleEnable = FALSE, .AntialiasedLineEnable = FALSE, .ForcedSampleCount = 0, .ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF }; /* Input layout */ u32 num_input_layout_elements = 0; for (u32 i = 0; i < countof(desc->ia); ++i) { if (desc->ia[i].SemanticName == NULL) { break; } ++num_input_layout_elements; } D3D12_INPUT_LAYOUT_DESC input_layout_desc = { .pInputElementDescs = desc->ia, .NumElements = num_input_layout_elements }; /* Blend state */ D3D12_BLEND_DESC blend_desc = { .AlphaToCoverageEnable = FALSE, .IndependentBlendEnable = FALSE }; blend_desc.RenderTarget[0].BlendEnable = TRUE; blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; /* Disable depth stencil */ D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = { .DepthEnable = FALSE, .StencilEnable = FALSE }; /* PSO */ D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 }; pso_desc.pRootSignature = rootsig; if (vs.success) { pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs.blob); pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs.blob); } if (ps.success) { pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps.blob); pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps.blob); } pso_desc.BlendState = blend_desc; pso_desc.SampleMask = UINT_MAX; pso_desc.RasterizerState = raster_desc; pso_desc.DepthStencilState = depth_stencil_desc; pso_desc.InputLayout = input_layout_desc; pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; pso_desc.NumRenderTargets = 1; pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = LIT("Failed to create pipeline state object"); success = false; } } /* Copy error */ if (!success) { ID3D10Blob *error_blob = vs.error_blob ? vs.error_blob : ps.error_blob; if (error_blob) { u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob); char *error_blob_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob); struct string error_blob_str = string_copy(scratch.arena, string_from_cstr(error_blob_cstr, error_blob_cstr_len)); if (string_ends_with(error_blob_str, LIT("\n"))) { /* Remove trailing newline */ error_blob_str.len -= 1; } if (error_blob_str.len > 0) { error_str = error_blob_str; } } struct pipeline_error *error = arena_push(pipeline->arena, struct pipeline_error); error->msg = string_copy(pipeline->arena, error_str); if (pipeline->last_error) { pipeline->last_error->next = error; } else { pipeline->first_error = error; } pipeline->last_error = error; } pipeline->pso = pso; pipeline->rootsig = rootsig; pipeline->compilation_time_ns = sys_time_ns() - start_ns; pipeline->success = success; resource_close(&vs_res); if (!ps_res_is_shared) { resource_close(&ps_res); } if (rootsig_blob) { ID3D10Blob_Release(rootsig_blob); } if (vs.blob) { ID3D10Blob_Release(vs.blob); } if (vs.error_blob) { ID3D10Blob_Release(vs.error_blob); } if (ps.blob) { ID3D10Blob_Release(ps.blob); } if (ps.error_blob) { ID3D10Blob_Release(ps.error_blob); } } scratch_end(scratch); } INTERNAL void pipeline_alloc(u64 num_pipelines, struct pipeline_desc *descs_in, struct pipeline **pipelines_out) { __prof; struct pipeline_init_job_sig sig = { .descs_in = descs_in, .pipelines_out = pipelines_out }; struct sys_counter *counter = sys_counter_alloc(); { sys_run(num_pipelines, pipeline_init_job, &sig, SYS_PRIORITY_HIGH, counter); sys_counter_wait(counter); } sys_counter_release(counter); } INTERNAL void pipeline_release_now(struct pipeline *pipeline) { __prof; if (pipeline->pso) { ID3D12PipelineState_Release(pipeline->pso); } arena_release(pipeline->arena); } /* ========================== * * Pipeline cache * ========================== */ INTERNAL struct pipeline_scope *pipeline_scope_begin(void) { __prof; struct pipeline_scope *scope = NULL; { struct sys_lock lock = sys_mutex_lock_e(G.pipelines_mutex); if (G.first_free_pipeline_scope) { scope = G.first_free_pipeline_scope; G.first_free_pipeline_scope = scope->next_free; } sys_mutex_unlock(&lock); } struct arena *arena = NULL; if (scope) { arena = scope->arena; } else { arena = arena_alloc(MEGABYTE(64)); } arena_reset(arena); scope = arena_push(arena, struct pipeline_scope); scope->arena = arena; scope->refs = dict_init(scope->arena, 64); return scope; } INTERNAL void pipeline_scope_end(struct pipeline_scope *scope) { __prof; struct sys_lock lock = sys_mutex_lock_e(G.pipelines_mutex); { for (struct dict_entry *entry = scope->refs->first; entry; entry = entry->next) { struct pipeline *pipeline = (struct pipeline *)entry->value; if (--pipeline->refcount <= 0) { fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE); } } scope->next_free = G.first_free_pipeline_scope; G.first_free_pipeline_scope = scope; } sys_mutex_unlock(&lock); } INTERNAL READONLY struct pipeline g_nil_pipeline = ZI; INTERNAL struct pipeline *pipeline_from_name(struct pipeline_scope *scope, struct string name) { __prof; struct pipeline *res = &g_nil_pipeline; u64 hash = hash_fnv64(HASH_FNV64_BASIS, name); struct pipeline *tmp = dict_get(scope->refs, hash); if (tmp) { res = tmp; } else { { struct sys_lock lock = sys_mutex_lock_e(G.pipelines_mutex); tmp = dict_get(G.top_successful_pipelines, hash); if (tmp) { ++tmp->refcount; } sys_mutex_unlock(&lock); } if (tmp) { dict_set(scope->arena, scope->refs, hash, (u64)tmp); res = tmp; } } return res; } INTERNAL void pipeline_register(u64 num_pipelines, struct pipeline **pipelines) { __prof; struct sys_lock lock = sys_mutex_lock_e(G.pipelines_mutex); { for (u64 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; u64 hash = pipeline->hash; /* Insert into top dict */ { struct pipeline *old_pipeline = (struct pipeline *)dict_get(G.top_pipelines, hash); if (old_pipeline && --old_pipeline->refcount <= 0) { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } dict_set(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } /* Insert into success dict */ if (pipeline->success) { struct pipeline *old_pipeline = (struct pipeline *)dict_get(G.top_successful_pipelines, hash); if (old_pipeline && --old_pipeline->refcount <= 0) { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } dict_set(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } } } sys_mutex_unlock(&lock); } #if RESOURCE_RELOADING INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); /* Find dirty pipelines */ u64 hash = hash_fnv64(HASH_FNV64_BASIS, name); u32 num_pipelines = 0; struct pipeline_desc *pipeline_descs = arena_push_dry(scratch.arena, struct pipeline_desc); { struct sys_lock lock = sys_mutex_lock_s(G.pipelines_mutex); for (struct dict_entry *entry = G.top_pipelines->first; entry; entry = entry->next) { struct pipeline *pipeline = (struct pipeline *)entry->value; if (dict_get(pipeline->dependencies, hash) == 1) { logf_debug("Change detected in shader source file \"%F\", recompiling pipeline \"%F\"", FMT_STR(name), FMT_STR(pipeline->name)); *arena_push(scratch.arena, struct pipeline_desc) = pipeline->desc; ++num_pipelines; } } sys_mutex_unlock(&lock); } /* Recompile dirty pipelines */ if (num_pipelines > 0) { struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines); pipeline_alloc(num_pipelines, pipeline_descs, pipelines); { struct sys_lock lock = sys_mutex_lock_s(G.pipelines_mutex); for (u32 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; if (pipeline->success) { logf_success("Successfully compiled pipeline \"%F\" in %F seconds", FMT_STR(pipeline->name), FMT_FLOAT(SECONDS_FROM_NS(pipeline->compilation_time_ns))); } else { { struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error"); struct string msg = string_format(scratch.arena, LIT("Error compiling pipeline \"%F\":\n%F"), FMT_STR(pipeline->name), FMT_STR(error)); log_error(msg); } struct pipeline *old_pipeline = dict_get(G.top_successful_pipelines, pipeline->hash); if (!old_pipeline) { /* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */ struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error"); struct string msg = string_format(scratch.arena, LIT("Error compiling pipeline \"%F\":\n\n%F"), FMT_STR(pipeline->name), FMT_STR(error)); sys_message_box(SYS_MESSAGE_BOX_KIND_WARNING, msg); } } } sys_mutex_unlock(&lock); } pipeline_register(num_pipelines, pipelines); } scratch_end(scratch); } #endif /* ========================== * * Descriptor * ========================== */ INTERNAL struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) { __prof; struct descriptor *d = NULL; u32 index = 0; D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; { struct sys_lock lock = sys_mutex_lock_e(dh->mutex); if (dh->first_free_descriptor) { d = dh->first_free_descriptor; dh->first_free_descriptor = d->next_free; handle = d->handle; index = d->index; } else { if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) { sys_panic(LIT("Max descriptors reached in heap")); } d = arena_push_no_zero(dh->arena, struct descriptor); index = dh->num_descriptors_reserved++; handle.ptr = dh->handle.ptr + (index * dh->descriptor_size); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(d); d->heap = dh; d->handle = handle; d->index = index; return d; } INTERNAL void descriptor_release(struct descriptor *descriptor) { struct cpu_descriptor_heap *dh = descriptor->heap; struct sys_lock lock = sys_mutex_lock_e(dh->mutex); { descriptor->next_free = dh->first_free_descriptor; dh->first_free_descriptor = descriptor; } sys_mutex_unlock(&lock); } /* ========================== * * CPU descriptor heap * ========================== */ INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type) { __prof; struct cpu_descriptor_heap *dh = NULL; { struct arena *arena = arena_alloc(MEGABYTE(64)); dh = arena_push(arena, struct cpu_descriptor_heap); dh->arena = arena; } dh->mutex = sys_mutex_alloc(); u32 num_descriptors = 0; u32 descriptor_size = 0; if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) { num_descriptors = G.desc_counts[type]; descriptor_size = G.desc_sizes[type]; } if (num_descriptors == 0 || descriptor_size == 0) { sys_panic(LIT("Unsupported CPU descriptor type")); } dh->num_descriptors_capacity = num_descriptors; dh->descriptor_size = descriptor_size; D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = type; desc.NumDescriptors = num_descriptors; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap); if (FAILED(hr)) { sys_panic(LIT("Failed to create CPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle); return dh; } #if 0 INTERNAL void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh) { /* TODO */ (UNUSED)dh; } #endif /* ========================== * * Flow * ========================== */ struct flow { struct arena *arena; /* Material instances */ u32 num_material_instance_descs; struct arena *material_instance_descs_arena; /* Grids */ u32 num_material_grid_descs; struct arena *material_grid_descs_arena; /* Shapes */ struct arena *shape_verts_arena; struct arena *shape_indices_arena; struct flow *next_free; }; struct material_instance_desc { struct xform xf; struct sprite_tag sprite; struct dx12_resource *texture; struct clip_rect clip; u32 tint; f32 emittance; i32 grid_id; }; struct material_grid_desc { f32 line_thickness; f32 line_spacing; struct v2 offset; u32 bg0_color; u32 bg1_color; u32 line_color; u32 x_color; u32 y_color; }; INTERNAL struct flow *flow_alloc(void) { __prof; struct flow *flow = NULL; { struct arena *arena = arena_alloc(MEGABYTE(64)); flow = arena_push(arena, struct flow); flow->arena = arena; } flow->material_instance_descs_arena = arena_alloc(GIGABYTE(1)); flow->material_grid_descs_arena = arena_alloc(GIGABYTE(1)); flow->shape_verts_arena = arena_alloc(GIGABYTE(1)); flow->shape_indices_arena = arena_alloc(GIGABYTE(1)); return flow; } INTERNAL void flow_reset(struct flow *flow) { __prof; /* Reset material instances */ flow->num_material_instance_descs = 0; arena_reset(flow->material_instance_descs_arena); /* Reset grids */ flow->num_material_grid_descs = 0; arena_reset(flow->material_grid_descs_arena); /* Reset shapes */ arena_reset(flow->shape_verts_arena); arena_reset(flow->shape_indices_arena); } struct gp_flow *gp_flow_alloc(void) { __prof; struct flow *flow = flow_alloc(); return (struct gp_flow *)flow; } i32 gp_push_cmd(struct gp_flow *gp_flow, struct gp_cmd_desc *cmd_desc) { i32 ret = 0; struct flow *flow = (struct flow *)gp_flow; if (flow) { switch (cmd_desc->kind) { default: break; case GP_CMD_KIND_DRAW_MATERIAL: { struct material_instance_desc *instance_desc = arena_push(flow->material_instance_descs_arena, struct material_instance_desc); instance_desc->xf = cmd_desc->material.xf; instance_desc->sprite = cmd_desc->material.sprite; instance_desc->texture = (struct dx12_resource *)cmd_desc->material.texture; instance_desc->clip = cmd_desc->material.clip; instance_desc->tint = cmd_desc->material.tint; instance_desc->emittance = cmd_desc->material.emittance; instance_desc->grid_id = cmd_desc->material.grid_cmd_id - 1; ret = ++flow->num_material_instance_descs; } break; case GP_CMD_KIND_DRAW_SHAPE: { u32 color = cmd_desc->shape.color; struct sh_shape_vert *verts = arena_push_array_no_zero(flow->shape_verts_arena, struct sh_shape_vert, cmd_desc->shape.vertices.count); u32 *indices = arena_push_array_no_zero(flow->shape_indices_arena, u32, cmd_desc->shape.indices.count); for (u32 i = 0; i < cmd_desc->shape.vertices.count; ++i) { struct sh_shape_vert *v = &verts[i]; v->pos = sh_float2_from_v2(cmd_desc->shape.vertices.points[i]); v->color_srgb = sh_uint_from_u32(color); } u32 vert_offset = verts - (struct sh_shape_vert *)arena_base(flow->shape_verts_arena); for (u32 i = 0; i < cmd_desc->shape.indices.count; ++i) { indices[i] = cmd_desc->shape.indices.indices[i] + vert_offset; } } break; case GP_CMD_KIND_PUSH_GRID: { struct material_grid_desc *grid_desc = arena_push(flow->material_grid_descs_arena, struct material_grid_desc); grid_desc->line_thickness = cmd_desc->grid.line_thickness; grid_desc->line_spacing = cmd_desc->grid.line_spacing; grid_desc->offset = cmd_desc->grid.offset; grid_desc->bg0_color = cmd_desc->grid.bg0_color; grid_desc->bg1_color = cmd_desc->grid.bg1_color; grid_desc->line_color = cmd_desc->grid.line_color; grid_desc->x_color = cmd_desc->grid.x_color; grid_desc->y_color = cmd_desc->grid.y_color; ret = ++flow->num_material_grid_descs; } break; } } return ret; } /* ========================== * * Fenced release * ========================== */ INTERNAL void fenced_release(void *data, enum fenced_release_kind kind) { struct fenced_release_data fr = ZI; fr.kind = kind; fr.ptr = data; u64 fr_targets[countof(G.fenced_release_targets)] = ZI; /* Read current fence target values from command queues */ for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; struct sys_lock lock = sys_mutex_lock_s(cq->submit_fence_mutex); fr_targets[i] = cq->submit_fence_target; sys_mutex_unlock(&lock); } /* Push data to release queue */ { struct sys_lock lock = sys_mutex_lock_e(G.fenced_releases_mutex); *arena_push(G.fenced_releases_arena, struct fenced_release_data) = fr; MEMCPY(G.fenced_release_targets, fr_targets, sizeof(fr_targets)); sys_mutex_unlock(&lock); } /* Wake evictor */ SetEvent(G.evictor_thread_wake_event); } /* ========================== * * Resource * ========================== */ enum dx12_resource_view_flags { DX12_RESOURCE_VIEW_FLAG_NONE = 0, DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1), DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2), DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3), DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4) }; INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, enum dx12_resource_view_flags view_flags) { __prof; struct dx12_resource *r = NULL; { struct sys_lock lock = sys_mutex_lock_e(G.resources_mutex); if (G.first_free_resource) { r = G.first_free_resource; G.first_free_resource = r->next_free; } else { r = arena_push_no_zero(G.resources_arena, struct dx12_resource); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(r); D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } }; D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : NULL; HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource); if (FAILED(hr)) { /* TODO: Don't panic */ sys_panic(LIT("Failed to create resource")); } r->state = initial_state; if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_CBV) { r->cbv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = ZI; cbv_desc.BufferLocation = r->gpu_address; //cbv_desc.SizeInBytes = desc.ByteWidth; /* FIXME: Get actual size */ cbv_desc.SizeInBytes = KILOBYTE(64); ID3D12Device_CreateConstantBufferView(G.device, &cbv_desc, r->cbv_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_SRV) { r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateShaderResourceView(G.device, r->resource, NULL, r->srv_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_UAV) { r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, NULL, NULL, r->uav_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_RTV) { r->rtv_descriptor = descriptor_alloc(G.rtv_heap); ID3D12Device_CreateRenderTargetView(G.device, r->resource, NULL, r->rtv_descriptor->handle); } return r; } INTERNAL void dx12_resource_release_now(struct dx12_resource *t) { __prof; /* Release descriptors */ /* TODO: Batch lock heaps */ if (t->cbv_descriptor) { descriptor_release(t->cbv_descriptor); } if (t->srv_descriptor) { descriptor_release(t->srv_descriptor); } if (t->uav_descriptor) { descriptor_release(t->uav_descriptor); } if (t->rtv_descriptor) { descriptor_release(t->rtv_descriptor); } /* Release resource */ ID3D12Resource_Release(t->resource); /* Add to free list */ struct sys_lock lock = sys_mutex_lock_e(G.resources_mutex); t->next_free = G.first_free_resource; G.first_free_resource = t; sys_mutex_unlock(&lock); } INTERNAL enum D3D12_RESOURCE_STATES dx12_resource_barrier(ID3D12GraphicsCommandList *cl, struct dx12_resource *resource, enum D3D12_RESOURCE_STATES state) { __prof; enum D3D12_RESOURCE_STATES old_state = resource->state; if (state != resource->state) { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = resource->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = resource->state; rtb.StateAfter = state; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl, 1, &rb); resource->state = state; } return old_state; } void gp_resource_release(struct gp_resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; fenced_release(r, FENCED_RELEASE_KIND_RESOURCE); } /* ========================== * * Command queue * ========================== */ INTERNAL struct command_list_pool *command_list_pool_alloc(struct command_queue *cq); INTERNAL struct command_queue *command_queue_alloc(enum D3D12_COMMAND_LIST_TYPE type, enum D3D12_COMMAND_QUEUE_PRIORITY priority, struct string dbg_name) { __prof; struct command_queue *cq = NULL; { struct arena *arena = arena_alloc(GIGABYTE(64)); cq = arena_push(arena, struct command_queue); cq->arena = arena; } D3D12_COMMAND_QUEUE_DESC desc = ZI; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; desc.Type = type; desc.Priority = priority; HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&cq->cq); if (FAILED(hr)) { sys_panic(LIT("Failed to create command queue")); } hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence); if (FAILED(hr)) { sys_panic(LIT("Failed to create command queue fence")); } __prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len); (UNUSED)dbg_name; cq->type = type; cq->submit_fence_mutex = sys_mutex_alloc(); cq->cl_pool = command_list_pool_alloc(cq); return cq; } INTERNAL void command_queue_release(struct command_queue *cq) { __prof; /* TODO */ (UNUSED)cq; //ID3D12CommandQueue_Release(cq->cq); } /* ========================== * * Command list * ========================== */ INTERNAL struct command_list_pool *command_list_pool_alloc(struct command_queue *cq) { struct command_list_pool *pool = NULL; { struct arena *arena = arena_alloc(GIGABYTE(64)); pool = arena_push(arena, struct command_list_pool); pool->arena = arena; } pool->cq = cq; pool->mutex = sys_mutex_alloc(); return pool; } INTERNAL struct command_list *command_list_open(struct command_list_pool *pool) { __prof; struct command_queue *cq = pool->cq; u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence); struct command_list *cl = NULL; struct ID3D12GraphicsCommandList *old_cl = NULL; struct ID3D12CommandAllocator *old_ca = NULL; { struct sys_lock lock = sys_mutex_lock_e(pool->mutex); /* Find first command list ready for reuse */ for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) { if (completed_fence_value >= tmp->submitted_fence_target) { cl = tmp; break; } } if (cl) { /* Remove from submitted list */ old_cl = cl->cl; old_ca = cl->ca; struct command_list *prev = cl->prev_submitted; struct command_list *next = cl->next_submitted; if (prev) { prev->next_submitted = next; } else { pool->first_submitted_command_list = next; } if (next) { next->prev_submitted = prev; } else { pool->last_submitted_command_list = prev; } } else { cl = arena_push_no_zero(pool->arena, struct command_list); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(cl); cl->cq = cq; cl->pool = pool; cl->global_record_lock = sys_mutex_lock_s(G.global_command_list_record_mutex); HRESULT hr = 0; if (old_cl) { cl->cl = old_cl; cl->ca = old_ca; } else { hr = ID3D12Device_CreateCommandAllocator(G.device, cq->type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); if (FAILED(hr)) { sys_panic(LIT("Failed to create command allocator")); } hr = ID3D12Device_CreateCommandList(G.device, 0, cq->type, cl->ca, NULL, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); if (FAILED(hr)) { sys_panic(LIT("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { sys_panic(LIT("Failed to close command list during initialization")); } } /* Reset */ hr = ID3D12CommandAllocator_Reset(cl->ca); if (FAILED(hr)) { sys_panic(LIT("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, NULL); if (FAILED(hr)) { sys_panic(LIT("Failed to reset command list")); } return cl; } /* TODO: Allow multiple command list submissions */ INTERNAL u64 command_list_close(struct command_list *cl) { __prof; struct command_queue *cq = cl->cq; struct command_list_pool *pool = cl->pool; /* Close */ { __profn("Close DX12 command list"); HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { /* TODO: Don't panic */ sys_panic(LIT("Failed to close command list before execution")); } } /* Submit */ u64 submit_fence_target = 0; { __profn("Execute"); struct sys_lock submit_lock = sys_mutex_lock_s(G.global_submit_mutex); struct sys_lock fence_lock = sys_mutex_lock_e(cq->submit_fence_mutex); { submit_fence_target = ++cq->submit_fence_target; ID3D12CommandQueue_ExecuteCommandLists(cq->cq, 1, (ID3D12CommandList **)&cl->cl); ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, submit_fence_target); } sys_mutex_unlock(&fence_lock); sys_mutex_unlock(&submit_lock); } /* Add descriptor heaps to submitted list */ { struct sys_lock lock = sys_mutex_lock_e(G.command_descriptor_heaps_mutex); for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) { cdh->submitted_cq = cq; cdh->submitted_fence_target = submit_fence_target; if (G.last_submitted_command_descriptor_heap) { G.last_submitted_command_descriptor_heap->next_submitted = cdh; } else { G.first_submitted_command_descriptor_heap = cdh; } G.last_submitted_command_descriptor_heap = cdh; } sys_mutex_unlock(&lock); } /* Add command buffers to submitted list */ { struct sys_lock lock = sys_mutex_lock_e(G.command_buffers_mutex); for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) { struct command_buffer_group *group = cb->group; cb->submitted_cq = cq; cb->submitted_fence_target = submit_fence_target; if (group->last_submitted) { group->last_submitted->next_submitted = cb; } else { group->first_submitted = cb; } group->last_submitted = cb; } sys_mutex_unlock(&lock); } /* Add command list to pool submitted list */ sys_mutex_unlock(&cl->global_record_lock); cl->submitted_fence_target = submit_fence_target; { struct sys_lock lock = sys_mutex_lock_e(pool->mutex); if (pool->last_submitted_command_list) { pool->last_submitted_command_list->next_submitted = cl; } else { pool->first_submitted_command_list = cl; } pool->last_submitted_command_list = cl; sys_mutex_unlock(&lock); } return submit_fence_target; } /* ========================== * * Command descriptor heap (GPU / shader visible descriptor heap) * ========================== */ INTERNAL struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu) { __prof; ASSERT(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */ /* Allocate GPU heap */ struct command_descriptor_heap *cdh = NULL; ID3D12DescriptorHeap *old_heap = NULL; D3D12_CPU_DESCRIPTOR_HANDLE old_cpu_handle = ZI; D3D12_GPU_DESCRIPTOR_HANDLE old_gpu_handle = ZI; { struct sys_lock lock = sys_mutex_lock_e(G.command_descriptor_heaps_mutex); /* Find first heap ready for reuse */ for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); if (completed_fence_value >= tmp->submitted_fence_target) { cdh = tmp; break; } } if (cdh) { /* Remove from submitted list */ old_heap = cdh->heap; old_cpu_handle = cdh->cpu_handle; old_gpu_handle = cdh->gpu_handle; struct command_descriptor_heap *prev = cdh->prev_submitted; struct command_descriptor_heap *next = cdh->next_submitted; if (prev) { prev->next_submitted = next; } else { G.first_submitted_command_descriptor_heap = next; } if (next) { next->prev_submitted = prev; } else { G.last_submitted_command_descriptor_heap = prev; } } else { /* No available heap available for reuse, allocate new */ cdh = arena_push_no_zero(G.command_descriptor_heaps_arena, struct command_descriptor_heap); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(cdh); if (old_heap) { cdh->heap = old_heap; cdh->cpu_handle = old_cpu_handle; cdh->gpu_handle = old_gpu_handle; } else { D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap); if (FAILED(hr)) { sys_panic(LIT("Failed to create GPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->cpu_handle); ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->gpu_handle); } /* Copy CPU heap */ { struct sys_lock lock = sys_mutex_lock_s(dh_cpu->mutex); ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); sys_mutex_unlock(&lock); } /* Insert into command list */ cdh->next_in_command_list = cl->first_command_descriptor_heap; cl->first_command_descriptor_heap = cdh; return cdh; } /* ========================== * * Command buffer * ========================== */ INTERNAL u64 command_buffer_hash_from_size(u64 size) { u64 hash = rand_u64_from_seed(size); return hash; } INTERNAL u64 align_up_pow2(u64 v) { u64 res = 0; if (v > 0) { res = v - 1; res |= res >> 1; res |= res >> 2; res |= res >> 4; res |= res >> 8; res |= res >> 16; res |= res >> 32; ++res; } return res; } INTERNAL struct command_buffer *command_list_push_buffer(struct command_list *cl, struct string data) { __prof; /* Determine size */ u64 size = max_u64(DX12_COMMAND_BUFFER_MIN_SIZE, align_up_pow2(data.len)); /* Allocate buffer */ struct command_buffer_group *cb_group = NULL; struct command_buffer *cb = NULL; struct dx12_resource *resource = NULL; { struct sys_lock lock = sys_mutex_lock_e(G.command_buffers_mutex); { u64 group_hash = command_buffer_hash_from_size(size); struct dict_entry *cb_group_entry = dict_ensure_entry(G.command_buffers_arena, G.command_buffers_dict, group_hash); cb_group = cb_group_entry->value; if (!cb_group) { /* Create group */ cb_group = arena_push(G.command_buffers_arena, struct command_buffer_group); cb_group_entry->value = (u64)cb_group; } } /* Find first command buffer ready for reuse */ for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); if (completed_fence_value >= tmp->submitted_fence_target) { cb = tmp; break; } } if (cb) { /* Remove from submitted list */ resource = cb->resource; struct command_buffer *prev = cb->prev_submitted; struct command_buffer *next = cb->next_submitted; if (prev) { prev->next_submitted = next; } else { cb_group->first_submitted = next; } if (next) { next->prev_submitted = prev; } else { cb_group->last_submitted = prev; } } else { /* Allocate new */ cb = arena_push_no_zero(G.command_buffers_arena, struct command_buffer); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(cb); cb->group = cb_group; cb->size = data.len; /* Create upload heap */ if (resource) { cb->resource = resource; } else { enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_NONE; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; desc.Format = DXGI_FORMAT_UNKNOWN; desc.Alignment = 0; desc.Width = size; desc.Height = 1; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; cb->resource = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags); } /* Copy data to resource */ { D3D12_RANGE read_range = ZI; void *dst = NULL; HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst); if (FAILED(hr) || !dst) { /* TODO: Don't panic */ sys_panic(LIT("Failed to map command buffer resource")); } MEMCPY(dst, data.text, data.len); ID3D12Resource_Unmap(cb->resource->resource, 0, NULL); } /* Insert into command list */ cb->next_in_command_list = cl->first_command_buffer; cl->first_command_buffer = cb; return cb; } /* ========================== * * Util * ========================== */ INTERNAL void command_list_set_root_constant(struct command_list *cl, void *src, u32 size) { __prof; if (size % 4 == 0) { u32 num32bit = size / 4; for (u32 i = 0; i < num32bit; ++i) { u32 val = 0; MEMCPY(&val, (((u32 *)src) + i), 4); ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i); } } else { /* Root constant structs must pad to 32 bits */ ASSERT(false); } } INTERNAL struct D3D12_VIEWPORT viewport_from_rect(struct rect r) { struct D3D12_VIEWPORT viewport = ZI; viewport.TopLeftX = r.x; viewport.TopLeftY = r.y; viewport.Width = r.width; viewport.Height = r.height; viewport.MinDepth = 0.0f; viewport.MaxDepth = 1.0f; return viewport; } INTERNAL D3D12_RECT scissor_from_rect(struct rect r) { D3D12_RECT scissor = ZI; scissor.left = r.x; scissor.top = r.y; scissor.right = r.x + r.width; scissor.bottom = r.y + r.height; return scissor; } INTERNAL D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size) { D3D12_VERTEX_BUFFER_VIEW vbv = ZI; vbv.BufferLocation = cb->resource->gpu_address; vbv.SizeInBytes = cb->size; vbv.StrideInBytes = vertex_size; return vbv; } INTERNAL D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format) { D3D12_INDEX_BUFFER_VIEW ibv = ZI; ibv.BufferLocation = cb->resource->gpu_address; ibv.Format = format; ibv.SizeInBytes = cb->size; return ibv; } /* ========================== * * Texture * ========================== */ struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, struct v2i32 size, void *initial_data) { __prof; struct dxgi_format_info { DXGI_FORMAT format; u32 size; }; LOCAL_PERSIST const struct dxgi_format_info formats[] = { [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = { DXGI_FORMAT_R8G8B8A8_UNORM, 4 }, [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 4 } }; DXGI_FORMAT dxgi_format = ZI; u32 pixel_size = 0; if (format < (i32)countof(formats)) { dxgi_format = formats[format].format; pixel_size = formats[format].size; ASSERT(dxgi_format != 0); ASSERT(pixel_size != 0); } if (format == 0) { sys_panic(LIT("Tried to create texture with unknown format")); } enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.Format = dxgi_format; desc.Alignment = 0; desc.Width = size.x; desc.Height = size.y; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; if (flags & GP_TEXTURE_FLAG_TARGETABLE) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV; } D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags); r->texture_size = size; /* Upload texture */ if (initial_data) { u64 upload_size = 0; u64 upload_row_size = 0; u32 upload_num_rows = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &footprint, &upload_num_rows, &upload_row_size, &upload_size); /* Create temp upload heap */ struct dx12_resource *upload = NULL; { enum dx12_resource_view_flags upload_view_flags = DX12_RESOURCE_VIEW_FLAG_NONE; D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC upload_desc = ZI; upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; upload_desc.Format = DXGI_FORMAT_UNKNOWN; upload_desc.Alignment = 0; upload_desc.Width = upload_size; upload_desc.Height = 1; upload_desc.DepthOrArraySize = 1; upload_desc.MipLevels = 1; upload_desc.SampleDesc.Count = 1; upload_desc.SampleDesc.Quality = 0; D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; /* FIXME: Release */ upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state, upload_view_flags); /* Copy to upload heap */ #if 0 /* FIXME: Copy based on footprint */ { D3D12_RANGE read_range = ZI; void *dst = NULL; HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &dst); if (FAILED(hr) || !dst) { /* TODO: Don't panic */ sys_panic(LIT("Failed to map texture upload resource")); } MEMCPY(dst, initial_data, size.x * size.y * pixel_size); ID3D12Resource_Unmap(upload->resource, 0, NULL); } #else /* FIXME: Copy based on footprint */ { D3D12_RANGE read_range = ZI; void *mapped = NULL; HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); if (FAILED(hr) || !mapped) { /* TODO: Don't panic */ sys_panic(LIT("Failed to map texture upload resource")); } u8 *dst = (u8 *)mapped + footprint.Offset; u8 *src = initial_data; for (u32 y = 0; y < upload_num_rows; ++y) { memcpy(dst + y * footprint.Footprint.RowPitch, src + y * size.x * pixel_size, size.x * pixel_size); } ID3D12Resource_Unmap(upload->resource, 0, NULL); } #endif } /* Copy from upload heap to texture */ struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; struct command_list *cl = command_list_open(cq->cl_pool); { __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2)); D3D12_TEXTURE_COPY_LOCATION dst_loc = { .pResource = r->resource, .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, .SubresourceIndex = 0, }; D3D12_TEXTURE_COPY_LOCATION src_loc = { .pResource = upload->resource, .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, .PlacedFootprint = footprint, }; ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, NULL); /* FIXME: Better barrier? */ //dx12_resource_barrier(cl->cl, r, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE); } u64 fence_target = command_list_close(cl); /* Wait */ /* TODO: Return async waitable to caller */ { __profn("Wait for upload"); HANDLE event = CreateEvent(NULL, false, false, NULL); ID3D12Fence_SetEventOnCompletion(cq->submit_fence, fence_target, event); WaitForSingleObject(event, INFINITE); CloseHandle(event); } } return (struct gp_resource *)r; } struct v2i32 gp_texture_get_size(struct gp_resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; return r->texture_size; } /* ========================== * * Dispatch * ========================== */ /* Calculate the view projection matrix */ INLINE struct mat4x4 calculate_vp(struct xform view, f32 viewport_width, f32 viewport_height) { struct mat4x4 projection = mat4x4_from_ortho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0); struct mat4x4 view4x4 = mat4x4_from_xform(view); return mat4x4_mul(projection, view4x4); } void gp_dispatch(struct gp_dispatch_params params) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); struct flow *flow = (struct flow *)params.flow; struct dx12_resource *target = (struct dx12_resource *)params.draw_target; struct sprite_scope *sprite_scope = sprite_scope_begin(); struct pipeline_scope *pipeline_scope = pipeline_scope_begin(); struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, LIT("material")); struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, LIT("shape")); struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profnc_dx12(cl->cq->prof, cl->cl, "Dispatch", RGB32_F(0.5, 0.2, 0.2)); struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height); /* Upload dummmy vert & index buffer */ /* TODO: Make these static */ /* Dummy vertex buffer */ LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, STRING(0, 0)); struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, STRING_FROM_ARRAY(quad_indices)); /* Process flow data into uploadable data */ struct sh_material_instance *material_instances = arena_push_array_no_zero(scratch.arena, struct sh_material_instance, flow->num_material_instance_descs); struct sh_material_grid *grids = arena_push_array_no_zero(scratch.arena, struct sh_material_grid, flow->num_material_grid_descs); { __profn("Process flow data"); /* Process material instances */ { __profn("Process material instances"); for (u32 i = 0; i < flow->num_material_instance_descs; ++i) { struct material_instance_desc *desc = &((struct material_instance_desc *)arena_base(flow->material_instance_descs_arena))[i]; struct sh_material_instance *instance = &material_instances[i]; i32 texture_id = -1; if (desc->texture != 0) { texture_id = desc->texture->srv_descriptor->index; } else if (desc->sprite.hash != 0) { struct sprite_texture *st = sprite_texture_from_tag_async(sprite_scope, desc->sprite); struct dx12_resource *texture = (struct dx12_resource *)st->gp_texture; if (texture) { texture_id = texture->srv_descriptor->index; } } instance->tex_nurid = sh_int_from_i32(texture_id); instance->grid_id = sh_int_from_i32(desc->grid_id); instance->xf = sh_float2x3_from_xform(desc->xf); instance->uv0 = sh_float2_from_v2(desc->clip.p0); instance->uv1 = sh_float2_from_v2(desc->clip.p1); instance->tint_srgb = sh_uint_from_u32(desc->tint); instance->emittance = sh_float_from_f32(desc->emittance); } } /* Process grids */ { __profn("Process grids"); for (u32 i = 0; i < flow->num_material_grid_descs; ++i) { struct material_grid_desc *desc = &((struct material_grid_desc *)arena_base(flow->material_grid_descs_arena))[i]; struct sh_material_grid *grid = &grids[i]; grid->line_thickness = sh_float_from_f32(desc->line_thickness); grid->line_spacing = sh_float_from_f32(desc->line_spacing); grid->offset = sh_float2_from_v2(desc->offset); grid->bg0_srgb = sh_uint_from_u32(desc->bg0_color); grid->bg1_srgb = sh_uint_from_u32(desc->bg1_color); grid->line_srgb = sh_uint_from_u32(desc->line_color); grid->x_srgb = sh_uint_from_u32(desc->x_color); grid->y_srgb = sh_uint_from_u32(desc->y_color); } } } /* Upload buffers */ struct command_buffer *material_instance_buffer = command_list_push_buffer(cl, STRING(sizeof(*material_instances) * flow->num_material_instance_descs, (u8 *)material_instances)); struct command_buffer *grid_buffer = command_list_push_buffer(cl, STRING(sizeof(*grids) * flow->num_material_grid_descs, (u8 *)grids)); struct command_buffer *shape_verts_buffer = command_list_push_buffer(cl, STRING_FROM_ARENA(flow->shape_verts_arena)); struct command_buffer *shape_indices_buffer = command_list_push_buffer(cl, STRING_FROM_ARENA(flow->shape_indices_arena)); /* Upload descriptor heap */ struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); /* Transition render target */ enum D3D12_RESOURCE_STATES target_old_state = dx12_resource_barrier(cl->cl, target, D3D12_RESOURCE_STATE_RENDER_TARGET); { ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &target->rtv_descriptor->handle, false, NULL); if (params.clear_target) { f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, target->rtv_descriptor->handle, clear_color, 0, NULL); } } /* Material pass */ if (material_pipeline->success) { __profn("Material pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Material pass", RGB32_F(0.5, 0.2, 0.2)); /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso); ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, material_pipeline->rootsig); /* Set constants */ struct sh_material_constants constants = ZI; constants.projection = sh_float4x4_from_mat4x4(vp_matrix); command_list_set_root_constant(cl, &constants, sizeof(constants)); /* Set instance buffer */ ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(cl->cl, 1, material_instance_buffer->resource->gpu_address); /* Set grid buffer */ ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(cl->cl, 2, grid_buffer->resource->gpu_address); /* Set descriptor heap */ ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl->cl, 3, descriptor_heap->gpu_handle); /* Setup Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(params.draw_target_viewport); D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Draw */ u32 instance_count = material_instance_buffer->size / sizeof(struct sh_material_instance); D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0); } /* Shape pass */ if (shape_pipeline->success) { __profn("Shape pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Shape pass", RGB32_F(0.5, 0.2, 0.2)); /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso); ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, shape_pipeline->rootsig); /* Set constants */ struct sh_shape_constants constants = ZI; constants.projection = sh_float4x4_from_mat4x4(vp_matrix); command_list_set_root_constant(cl, &constants, sizeof(constants)); /* Setup Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(params.draw_target_viewport); D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Draw */ u32 index_count = shape_indices_buffer->size / sizeof(u32); D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(shape_verts_buffer, sizeof(struct sh_shape_vert)); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(shape_indices_buffer, DXGI_FORMAT_R32_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, index_count, 1, 0, 0, 0); } /* Reset render target */ { dx12_resource_barrier(cl->cl, target, target_old_state); } } command_list_close(cl); pipeline_scope_end(pipeline_scope); sprite_scope_end(sprite_scope); flow_reset(flow); scratch_end(scratch); } /* ========================== * * Memory info * ========================== */ struct gp_memory_info gp_query_memory_info(void) { struct gp_memory_info res = ZI; HRESULT hr = 0; IDXGIAdapter3 *dxgiAdapter3 = NULL; if (SUCCEEDED(hr)) { hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3); } if (SUCCEEDED(hr)) { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); res.local_used = info.CurrentUsage; res.local_budget = info.Budget; } if (SUCCEEDED(hr)) { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); res.non_local_used = info.CurrentUsage; res.non_local_budget = info.Budget; } if (dxgiAdapter3) { IDXGIAdapter_Release(dxgiAdapter3); } return res; } /* ========================== * * Swapchain * ========================== */ INTERNAL struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, struct sys_window *window, struct v2i32 resolution) { __prof; resolution.x = max_i32(resolution.x, 1); resolution.y = max_i32(resolution.y, 1); b32 should_rebuild = !v2i32_eq(swapchain->resolution, resolution); if (should_rebuild) { HRESULT hr = 0; struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; HWND hwnd = (HWND)sys_window_get_internal_handle(window); if (swapchain->swapchain) { ASSERT(hwnd == swapchain->hwnd); /* Lock direct queue submissions (in case any write to backbuffer) */ /* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */ struct sys_lock lock = sys_mutex_lock_e(cq->submit_fence_mutex); //DEBUGBREAKABLE; //struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_record_mutex); { /* Flush direct queue */ //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target); { HANDLE event = CreateEvent(NULL, false, false, NULL); ID3D12Fence_SetEventOnCompletion(cq->submit_fence, cq->submit_fence_target, event); WaitForSingleObject(event, INFINITE); CloseHandle(event); } /* Release buffers */ for (u32 i = 0; i < countof(swapchain->buffers); ++i) { struct swapchain_buffer *sb = &swapchain->buffers[i]; descriptor_release(sb->rtv_descriptor); ID3D12Resource_Release(sb->resource); } /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS); if (FAILED(hr)) { /* TODO: Don't panic */ sys_panic(LIT("Failed to resize swapchain")); } } sys_mutex_unlock(&lock); } else { /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = NULL; { DXGI_SWAP_CHAIN_DESC1 desc = ZI; desc.Format = DX12_SWAPCHAIN_FORMAT; desc.Width = resolution.x; desc.Height = resolution.y; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT; desc.Scaling = DXGI_SCALING_NONE; desc.Flags = DX12_SWAPCHAIN_FLAGS; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, NULL, NULL, &swapchain1); if (FAILED(hr)) { sys_panic(LIT("Failed to create IDXGISwapChain1")); } } /* Upgrade to swapchain3 */ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain); if (FAILED(hr)) { sys_panic(LIT("Failed to create IDXGISwapChain3")); } /* Disable Alt+Enter changing monitor resolution to match window size */ IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER); IDXGISwapChain1_Release(swapchain1); swapchain->hwnd = hwnd; } /* Allocate swapchain resources */ for (u32 i = 0; i < countof(swapchain->buffers); ++i) { ID3D12Resource *resource = NULL; hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); if (FAILED(hr)) { /* TODO: Don't panic */ sys_panic(LIT("Failed to get swapchain buffer")); } struct swapchain_buffer *sb = &swapchain->buffers[i]; MEMZERO_STRUCT(sb); sb->swapchain = swapchain; sb->resource = resource; sb->rtv_descriptor = descriptor_alloc(G.rtv_heap); sb->state = D3D12_RESOURCE_STATE_COMMON; ID3D12Device_CreateRenderTargetView(G.device, sb->resource, NULL, sb->rtv_descriptor->handle); } swapchain->resolution = resolution; } u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain); return &swapchain->buffers[backbuffer_index]; } /* ========================== * * Present * ========================== */ INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, struct xform src_xf) { __prof; struct pipeline_scope *pipeline_scope = pipeline_scope_begin(); struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, LIT("blit")); if (blit_pipeline->success) { struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profnc_dx12(cl->cq->prof, cl->cl, "Blit", RGB32_F(0.5, 0.2, 0.2)); struct swapchain *swapchain = dst->swapchain; /* Upload dummmy vert & index buffer */ /* TODO: Make these static */ /* Dummy vertex buffer */ LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, STRING(0, 0)); struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, STRING_FROM_ARRAY(quad_indices)); /* Upload descriptor heap */ struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); struct rect viewport_rect = RECT_FROM_V2(V2(0, 0), V2(swapchain->resolution.x, swapchain->resolution.y)); D3D12_VIEWPORT viewport = viewport_from_rect(viewport_rect); D3D12_RECT scissor = scissor_from_rect(viewport_rect); struct mat4x4 vp_matrix = calculate_vp(src_xf, viewport.Width, viewport.Height); /* Transition dst to render target */ { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = dst->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = dst->state; rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); dst->state = rtb.StateAfter; } ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, false, NULL); /* Clear */ f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, NULL); /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl->cl, blit_pipeline->pso); ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, blit_pipeline->rootsig); /* Set constants */ struct sh_blit_constants constants = ZI; constants.projection = sh_float4x4_from_mat4x4(vp_matrix); constants.tex_urid = sh_uint_from_u32(src->srv_descriptor->index); constants.gamma = sh_float_from_f32(2.2); command_list_set_root_constant(cl, &constants, sizeof(constants)); /* Set descriptor heap */ ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl->cl, 1, descriptor_heap->gpu_handle); /* Setup Rasterizer State */ ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Draw */ D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0); /* Transition dst to presentable */ { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = dst->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = dst->state; rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); dst->state = rtb.StateAfter; } } command_list_close(cl); } pipeline_scope_end(pipeline_scope); } void gp_present(struct sys_window *window, struct v2i32 backresolution, struct gp_resource *texture, struct xform texture_xf, i32 vsync) { __prof; //sys_sleep(0.1); struct swapchain *swapchain = &G.swapchain; struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, window, backresolution); struct dx12_resource *texture_resource = (struct dx12_resource *)texture; /* Blit */ present_blit(swapchain_buffer, texture_resource, texture_xf); //sys_sleep(0.1); u32 present_flags = 0; if (!vsync) { present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING); } /* Present */ /* FIXME: Resource barrier */ { __profn("Present"); HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); if (!SUCCEEDED(hr)) { ASSERT(false); } __profframe(0); } #if PROFILING_D3D { __profn("Mark queue frames"); /* Lock because frame marks shouldn't occur while command lists are recording */ struct sys_lock lock = sys_mutex_lock_e(G.global_command_list_record_mutex); for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; __prof_dx12_new_frame(cq->prof); } sys_mutex_unlock(&lock); } { __profn("Collect queues"); for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; __prof_dx12_collect(cq->prof); } } #endif } /* ========================== * * Evictor thread * ========================== */ INTERNAL SYS_THREAD_DEF(evictor_thread_entry_point, arg) { __prof; (UNUSED)arg; struct arena_temp scratch = scratch_begin_no_conflict(); HANDLE event = CreateEvent(NULL, false, false, NULL); HANDLE events[2] = ZI; events[0] = G.evictor_thread_wake_event; events[1] = event; u64 completed_targets[DX12_NUM_QUEUES] = ZI; b32 shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown); while (!shutdown) { struct arena_temp temp = arena_temp_begin(scratch.arena); { __profn("Run"); u64 targets[countof(completed_targets)] = ZI; /* Copy queued data */ u32 num_fenced_releases = 0; struct fenced_release_data *fenced_releases = NULL; { __profn("Copy queued releases"); struct sys_lock lock = sys_mutex_lock_e(G.fenced_releases_mutex); num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data); fenced_releases = arena_push_array_no_zero(temp.arena, struct fenced_release_data, num_fenced_releases); MEMCPY(fenced_releases, arena_base(G.fenced_releases_arena), G.fenced_releases_arena->pos); arena_reset(G.fenced_releases_arena); MEMCPY(targets, G.fenced_release_targets, sizeof(targets)); sys_mutex_unlock(&lock); } /* Wait until fences reach target */ { __profn("Check fences"); for (u32 i = 0; i < countof(targets) && !shutdown; ++i) { while (completed_targets[i] < targets[i] && !shutdown) { struct command_queue *cq = G.command_queues[i]; completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence); if (completed_targets[i] < targets[i]) { ID3D12Fence_SetEventOnCompletion(cq->submit_fence, targets[i], event); { __profn("Wait on fence"); WaitForMultipleObjects(2, events, false, INFINITE); shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown); } } } } } /* Process releases */ for (u32 i = 0; i < num_fenced_releases; ++i) { __profn("Release"); struct fenced_release_data *fr = &fenced_releases[i]; switch (fr->kind) { default: { /* Unknown handle type */ ASSERT(false); } break; case FENCED_RELEASE_KIND_RESOURCE: { struct dx12_resource *resource = (struct dx12_resource *)fr->ptr; dx12_resource_release_now(resource); } break; case FENCED_RELEASE_KIND_PIPELINE: { struct pipeline *pipeline = (struct pipeline *)fr->ptr; pipeline_release_now(pipeline); } break; } } } arena_temp_end(temp); { __profn("Sleep"); WaitForSingleObject(G.evictor_thread_wake_event, INFINITE); shutdown = atomic_i32_fetch(&G.evictor_thread_shutdown); } } /* Release event */ CloseHandle(event); scratch_end(scratch); }