#if DX12_TEST #include "gpu.h" #include "sys.h" #include "arena.h" #include "memory.h" #include "string.h" #include "scratch.h" #include "app.h" #include "work.h" #include "log.h" #include "resource.h" #pragma warning(push, 0) # define UNICODE # define COBJMACROS # include # include # include # include # include # include #pragma warning(pop) #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") #pragma comment(lib, "dxguid") #pragma comment(lib, "d3dcompiler") #define SH_CPU 1 //#define DX12_WAIT_FRAME_LATENCY 1 //#define DX12_SWAPCHAIN_FLAGS ((DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | (DX12_WAIT_FRAME_LATENCY * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) #define DX12_ALLOW_TEARING 1 #define DX12_SWAPCHAIN_FLAGS (DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) #define DX12_SWAPCHAIN_BUFFER_COUNT (3) #define DX12_SWAPCHAIN_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM) //#define DX12_SWAPCHAIN_RTV_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM_SRGB) /* Arbitrary limits */ #define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64) #define DX12_NUM_RTV_DESCRIPTORS (1024 * 1) #if RTC # define DX12_DEBUG 1 # define DX12_SHADER_DEBUG 1 #else # define DX12_DEBUG 0 # define DX12_SHADER_DEBUG 0 #endif struct shader_desc { char *file; char *func; }; struct pipeline_desc { char *name; struct shader_desc vs; struct shader_desc ps; u32 flags; }; struct pipeline { struct pipeline_desc desc; ID3D12PipelineState *pso; ID3D12RootSignature *rootsig; }; struct pipeline_result { struct pipeline pipeline; i64 elapsed; u64 errors_text_len; u8 errors_text[KILOBYTE(16)]; }; struct pipeline_error { struct string msg; }; struct dx12_descriptor { struct dx12_cpu_descriptor_heap *heap; D3D12_CPU_DESCRIPTOR_HANDLE handle; struct dx12_descriptor *next_free; }; struct dx12_resource { ID3D12Resource *resource; enum D3D12_RESOURCE_STATES state; #if 0 D3D12_CPU_DESCRIPTOR_HANDLE cbv_handle; D3D12_CPU_DESCRIPTOR_HANDLE srv_handle; D3D12_CPU_DESCRIPTOR_HANDLE uav_handle; D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle; #else struct dx12_descriptor *cbv_descriptor; struct dx12_descriptor *srv_descriptor; struct dx12_descriptor *uav_descriptor; struct dx12_descriptor *rtv_descriptor; #endif D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */ struct dx12_resource *next_free; }; struct dx12_cpu_descriptor_heap { enum D3D12_DESCRIPTOR_HEAP_TYPE type; struct arena arena; struct sys_mutex mutex; u32 descriptor_size; u32 num_descriptors_reserved; u32 num_descriptors_capacity; struct dx12_descriptor *first_free_descriptor; ID3D12DescriptorHeap *heap; struct D3D12_CPU_DESCRIPTOR_HANDLE handle; }; struct dx12_gpu_descriptor_heap { D3D12_DESCRIPTOR_HEAP_TYPE type; ID3D12DescriptorHeap *heap; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle; /* If free_fence < free_fence_value, then descriptor heap is in use by the GPU */ ID3D12Fence *free_fence; u64 free_fence_value; struct dx12_gpu_descriptor_heap *prev_free; struct dx12_gpu_descriptor_heap *next_free; }; enum dx12_handle_kind { DX12_HANDLE_KIND_NONE, DX12_HANDLE_KIND_RESOURCE, DX12_HANDLE_KIND_PLAN, DX12_HANDLE_KIND_DISPATCH_STATE, NUM_DX12_HANDLE_KINDS }; struct dx12_handle_entry { enum dx12_handle_kind kind; u64 gen; u64 idx; void *data; struct dx12_handle_entry *next_free; }; /* ========================== * * Global state * ========================== */ GLOBAL struct { /* Handles pool */ struct sys_mutex handle_entries_mutex; struct arena handle_entries_arena; struct dx12_handle_entry *first_free_handle_entry; u64 num_handle_entries_reserved; /* Descriptor heaps pool */ struct sys_mutex gpu_descriptor_heaps_mutex; struct arena gpu_descriptor_heaps_arena; struct dx12_gpu_descriptor_heap *first_free_gpu_descriptor_heap; struct dx12_gpu_descriptor_heap *last_free_gpu_descriptor_heap; /* Resources pool */ struct sys_mutex resources_mutex; struct arena resources_arena; struct dx12_resource *first_free_resource; /* FIXME: Remove this (testing) */ struct pipeline test_pipeline; /* Factory */ IDXGIFactory6 *factory; /* Device */ ID3D12Device *device; /* Descriptor sizes */ u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; /* Global descriptor heaps */ struct dx12_cpu_descriptor_heap *cbv_srv_uav_heap; struct dx12_cpu_descriptor_heap *rtv_heap; /* Command queues */ /* TODO: Add optional mode to route everything to direct queue */ ID3D12CommandQueue *cq_direct; ID3D12CommandQueue *cq_compute; ID3D12CommandQueue *cq_copy_critical; ID3D12CommandQueue *cq_copy_background; /* Swapchain */ u32 swapchain_frame_index; ID3D12CommandAllocator *swapchain_ca; IDXGISwapChain3 *swapchain; ID3D12DescriptorHeap *swapchain_rtv_heap; ID3D12Resource *swapchain_buffers[DX12_SWAPCHAIN_BUFFER_COUNT]; /* Dummy vertex buffer */ struct dx12_resource *dummy_vertex_buffer; struct dx12_resource *dummy_index_buffer; D3D12_VERTEX_BUFFER_VIEW dummy_vertex_buffer_view; D3D12_INDEX_BUFFER_VIEW quad_index_buffer_view; } G = ZI, DEBUG_ALIAS(G, G_gpu_dx12); /* ========================== * * Startup * ========================== */ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown); INTERNAL struct dx12_cpu_descriptor_heap *dx12_cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); INTERNAL void dx12_init_device(void); INTERNAL void dx12_init_objects(void); INTERNAL void dx12_init_swapchain(struct sys_window *window); INTERNAL void dx12_init_pipelines(void); struct gpu_startup_receipt gpu_startup(struct work_startup_receipt *work_sr, struct sys_window *window) { __prof; (UNUSED)work_sr; /* Initialize handles pool */ G.handle_entries_mutex = sys_mutex_alloc(); G.handle_entries_arena = arena_alloc(GIGABYTE(64)); /* Initialize gpu descriptor heaps pool */ G.gpu_descriptor_heaps_mutex = sys_mutex_alloc(); G.gpu_descriptor_heaps_arena = arena_alloc(GIGABYTE(64)); /* Initialize resources pool */ G.resources_mutex = sys_mutex_alloc(); G.resources_arena = arena_alloc(GIGABYTE(64)); /* Initialize dx12 */ dx12_init_device(); dx12_init_objects(); dx12_init_swapchain(window); dx12_init_pipelines(); /* Init dummy buffers */ #if 0 { LOCAL_PERSIST const DXGI_FORMAT formats[] = { [GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM, [GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB }; enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Alignment = 0; desc.Width = size.x; desc.Height = size.y; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; if (flags & GPU_TEXTURE_FLAG_TARGETABLE) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV; } D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags); //G.dummy_vertex_buffer = dx12_resource_alloc( /* Dummy vertex buffer */ u8 dummy_data[16] = ZI; D3D11_BUFFER_DESC vdesc = ZI; vdesc.Usage = D3D11_USAGE_IMMUTABLE; vdesc.ByteWidth = sizeof(dummy_data); vdesc.BindFlags = D3D11_BIND_VERTEX_BUFFER; D3D11_SUBRESOURCE_DATA dummy_data_subres = ZI; dummy_data_subres.pSysMem = dummy_data; G.dummy_vertex_buffer = dx11_buffer_alloc(vdesc, &dummy_data_subres); /* Quad index buffer */ LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; D3D11_BUFFER_DESC idesc = ZI; idesc.Usage = D3D11_USAGE_IMMUTABLE; idesc.ByteWidth = sizeof(quad_indices); idesc.BindFlags = D3D11_BIND_INDEX_BUFFER; D3D11_SUBRESOURCE_DATA idata = ZI; idata.pSysMem = quad_indices; G.quad_index_buffer = dx11_buffer_alloc(idesc, &idata); } #endif /* Register callbacks */ app_register_exit_callback(gpu_shutdown); struct gpu_startup_receipt res = ZI; return res; } INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown) { __prof; #if DX12_DEBUG /* Release objects to make live object reporting less noisy */ for (u64 i = 0; i < ARRAY_COUNT(G.swapchain_buffers); ++i) { ID3D12Resource_Release(G.swapchain_buffers[i]); } ID3D12DescriptorHeap_Release(G.swapchain_rtv_heap); IDXGISwapChain3_Release(G.swapchain); ID3D12CommandAllocator_Release(G.swapchain_ca); ID3D12CommandQueue_Release(G.cq_copy_background); ID3D12CommandQueue_Release(G.cq_copy_critical); ID3D12CommandQueue_Release(G.cq_compute); ID3D12CommandQueue_Release(G.cq_direct); ID3D12Device_Release(G.device); #endif } /* ========================== * * Handle * ========================== */ INTERNAL void dx12_resource_release(struct dx12_resource *t); INTERNAL struct gpu_handle handle_alloc(enum dx12_handle_kind kind, void *data) { u64 old_gen = 0; u64 idx = 0; struct dx12_handle_entry *entry = NULL; { struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex); if (G.first_free_handle_entry) { entry = G.first_free_handle_entry; G.first_free_handle_entry = entry->next_free; old_gen = entry->gen; idx = entry->idx; } else { entry = arena_push_no_zero(&G.handle_entries_arena, struct dx12_handle_entry); idx = G.num_handle_entries_reserved++; } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(entry); entry->kind = kind; entry->gen = old_gen + 1; entry->idx = idx; entry->data = data; struct gpu_handle res = ZI; res.gen = entry->gen; res.idx = entry->idx; return res; } INTERNAL struct dx12_handle_entry *handle_get_entry(struct gpu_handle handle, struct sys_lock *lock) { sys_assert_locked_e_or_s(lock, &G.handle_entries_mutex); struct dx12_handle_entry *res = NULL; if (handle.idx > 0 && handle.idx < G.num_handle_entries_reserved) { struct dx12_handle_entry *tmp = &((struct dx12_handle_entry *)G.handle_entries_arena.base)[handle.idx]; if (tmp->gen == handle.gen) { res = tmp; } } return res; } INTERNAL void *handle_get_data(struct gpu_handle handle, enum dx12_handle_kind kind) { void *data = NULL; struct sys_lock lock = sys_mutex_lock_s(&G.handle_entries_mutex); { struct dx12_handle_entry *entry = handle_get_entry(handle, &lock); data = entry->data; #if RTC /* Handle should match expected kind */ ASSERT(entry->kind == kind); #endif } sys_mutex_unlock(&lock); return data; } /* TODO: The GPU api should ensure that resources freed by the caller will not cause issues on the GPU (via fencing), * however the caller is responsible for managing resource lifetimes on the CPU side (e.g. using sprites w/ sprite scopes * to ensure freed textures aren't being used in pending command lists. */ void gpu_release(struct gpu_handle handle) { enum dx12_handle_kind kind = 0; void *data = NULL; /* Release handle entry */ struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex); { struct dx12_handle_entry *entry = handle_get_entry(handle, &lock); if (entry) { kind = entry->kind; data = entry->data; } ++entry->gen; entry->next_free = G.first_free_handle_entry; G.first_free_handle_entry = entry; } sys_mutex_unlock(&lock); /* Release data */ if (data) { switch (kind) { default: break; case DX12_HANDLE_KIND_RESOURCE: { dx12_resource_release(data); } break; } } } /* ========================== * * Dx12 device initialization * ========================== */ INTERNAL void dx12_init_error(struct string error) { struct arena_temp scratch = scratch_begin_no_conflict(); struct string msg = string_format(scratch.arena, LIT("Failed to initialize DirectX 12.\n\n%F"), FMT_STR(error)); sys_panic(msg); scratch_end(scratch); } INTERNAL void dx12_init_device(void) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if DX12_DEBUG { ID3D12Debug *debug_controller0 = NULL; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create ID3D12Debug0")); } ID3D12Debug1 *debug_controller1 = NULL; hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create ID3D12Debug1")); } ID3D12Debug_EnableDebugLayer(debug_controller0); /* FIXME: Enable this */ //ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, true); ID3D12Debug_Release(debug_controller1); ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); if (FAILED(hr)) { dx12_init_error(LIT("Failed to initialize DXGI factory")); } /* Create device */ { ID3D12Device *device = NULL; struct string error = LIT("Could not initialize GPU device."); struct string first_gpu_name = ZI; u32 adapter_index = 0; while (true) { IDXGIAdapter1 *adapter = NULL; hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = string_from_wstr_no_limit(scratch.arena, desc.Description); } hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); if (SUCCEEDED(hr)) { IDXGIAdapter1_Release(adapter); adapter = NULL; break; } ID3D12Device_Release(device); IDXGIAdapter1_Release(adapter); adapter = NULL; device = NULL; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { struct string fmt = LIT("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date."); error = string_format(scratch.arena, fmt, FMT_STR(first_gpu_name)); } dx12_init_error(error); } G.device = device; } #if DX12_DEBUG /* Enable D3D12 Debug break */ { ID3D12InfoQueue *info = NULL; hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { dx12_init_error(LIT("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, TRUE); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { IDXGIInfoQueue *dxgi_info = NULL; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { dx12_init_error(LIT("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE); IDXGIInfoQueue_Release(dxgi_info); } #endif scratch_end(scratch); } /* ========================== * * Dx12 object initialization * ========================== */ INTERNAL void dx12_init_objects(void) { HRESULT hr = 0; /* Initialize desc sizes */ G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); /* Initialize desc counts */ G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS; /* Create global descriptor heaps */ G.cbv_srv_uav_heap = dx12_cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.rtv_heap = dx12_cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); /* Create direct command queue */ { D3D12_COMMAND_QUEUE_DESC desc = ZI; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_direct); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create direct command queue")); } } /* Create compute command queue */ { D3D12_COMMAND_QUEUE_DESC desc = ZI; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE; hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_compute); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create compute command queue")); } } /* Create critical copy command queue */ { D3D12_COMMAND_QUEUE_DESC desc = ZI; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; desc.Type = D3D12_COMMAND_LIST_TYPE_COPY; desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH; hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_copy_critical); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create critical copy command queue")); } } /* Create background copy command queue */ { D3D12_COMMAND_QUEUE_DESC desc = ZI; desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; desc.Type = D3D12_COMMAND_LIST_TYPE_COPY; hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_copy_background); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create background copy command queue")); } } } /* ========================== * * Dx12 swapchain initialization * ========================== */ INTERNAL void dx12_init_swapchain(struct sys_window *window) { HRESULT hr = 0; /* Create swapchain command allocator */ { hr = ID3D12Device_CreateCommandAllocator(G.device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, (void **)&G.swapchain_ca); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create swapchain command allocator")); } } /* Create swapchain */ { HWND hwnd = (HWND)sys_window_get_internal_handle(window); DXGI_SWAP_CHAIN_DESC1 desc = { .Format = DX12_SWAPCHAIN_FORMAT, .SampleDesc = { 1, 0 }, .BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT, .BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT, .Scaling = DXGI_SCALING_NONE, .Flags = DX12_SWAPCHAIN_FLAGS, .AlphaMode = DXGI_ALPHA_MODE_IGNORE, .SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD }; /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = NULL; hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)G.cq_direct, hwnd, &desc, NULL, NULL, &swapchain1); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create IDXGISwapChain1")); } /* Upgrade to swapchain3 */ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&G.swapchain); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create IDXGISwapChain3")); } /* Disable Alt+Enter changing monitor resolution to match window size */ IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER); /* Get initial frame index */ G.swapchain_frame_index = IDXGISwapChain3_GetCurrentBackBufferIndex(G.swapchain); IDXGISwapChain1_Release(swapchain1); } /* Create swapchain RTV heap */ { D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.NumDescriptors = DX12_SWAPCHAIN_BUFFER_COUNT; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&G.swapchain_rtv_heap); if (FAILED(hr)) { dx12_init_error(LIT("Failed to create swapchain RTV heap")); } } /* Create swacphain RTVs */ { D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = ZI; ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(G.swapchain_rtv_heap, &rtv_handle); for (u32 i = 0; i < DX12_SWAPCHAIN_BUFFER_COUNT; ++i) { hr = IDXGISwapChain3_GetBuffer(G.swapchain, i, &IID_ID3D12Resource, (void **)&G.swapchain_buffers[i]); if (FAILED(hr)) { dx12_init_error(LIT("Failed to get swapchain buffer")); } ID3D12Device_CreateRenderTargetView(G.device, G.swapchain_buffers[i], NULL, rtv_handle); rtv_handle.ptr += G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV]; } } } /* ========================== * * Dx12 pipeline initialization * ========================== */ /* TDOO: Rename 'mesh shader' to 'triangle shader' or something */ /* TODO: Move shader structs into shared C-HLSL header file */ /* ============= */ /* Mesh pipeline */ /* ============= */ /* Material pipeline */ PACK(struct fx_material_constant { struct mat4x4 vp; u32 instance_offset; }); PACK(struct fx_material_instance { struct xform xf; struct v2 uv0; struct v2 uv1; u32 tint_srgb; f32 emittance; }); /* ============= */ /* Grid pipeline */ /* ============= */ /* Init pipelines */ INTERNAL struct pipeline_result *pipeline_alloc_from_descs(struct arena *arena, u64 num_pipelines, struct pipeline_desc *descs); INTERNAL void pipeline_release(struct pipeline *pipeline); INTERNAL void dx12_init_pipelines(void) { __prof; struct arena_temp scratch = scratch_begin_no_conflict(); struct pipeline_desc pipeline_descs[] = { /* Material pipeline */ { .name = "material", .vs = { "sh/material.hlsl", "vs" }, .ps = { "sh/material.hlsl", "ps" } } }; struct pipeline_result *results = pipeline_alloc_from_descs(scratch.arena, ARRAY_COUNT(pipeline_descs), pipeline_descs); for (u64 i = 0; i < ARRAY_COUNT(pipeline_descs); ++i) { struct pipeline_result *result = &results[i]; if (result->errors_text_len > 0) { struct string msg = STRING(result->errors_text_len, result->errors_text); sys_panic(msg); pipeline_release(&result->pipeline); } else { /* FIXME: remove this */ G.test_pipeline = result->pipeline; } } scratch_end(scratch); } /* ========================== * * Shader compilation * ========================== */ struct dx12_include_handler { ID3DInclude d3d_handler; ID3DIncludeVtbl vtbl; struct pipeline *pipeline; b32 has_open_resource; struct resource res; }; INTERNAL HRESULT dx12_include_open(ID3DInclude *d3d_handler, D3D_INCLUDE_TYPE include_type, LPCSTR name_cstr, LPCVOID parent_data, LPCVOID *data_out, UINT *data_len_out) { __prof; (UNUSED)include_type; (UNUSED)parent_data; HRESULT result = E_FAIL; struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; struct string name = string_from_cstr_no_limit((char *)name_cstr); if (handler->has_open_resource) { sys_panic(LIT("Dx11 include handler somehow already has a resource open")); } struct resource res = resource_open(name); if (resource_exists(&res)) { handler->res = res; handler->has_open_resource = true; struct string data = resource_get_data(&res); *data_out = data.text; *data_len_out = data.len; result = S_OK; } #if 0 #if RESOURCE_RELOADING shader_add_include(&G.shader_info[handler->shader->kind], name); #endif #endif return result; } INTERNAL HRESULT dx12_include_close(ID3DInclude *d3d_handler, LPCVOID data) { __prof; (UNUSED)data; struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; if (handler->has_open_resource) { resource_close(&handler->res); handler->has_open_resource = false; } return S_OK; } INTERNAL struct dx12_include_handler dx12_include_handler_alloc(struct pipeline *pipeline) { struct dx12_include_handler handler = ZI; handler.d3d_handler.lpVtbl = &handler.vtbl; handler.vtbl.Open = dx12_include_open; handler.vtbl.Close = dx12_include_close; handler.pipeline = pipeline; return handler; } INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler) { if (handler->has_open_resource) { ASSERT(false); /* Resource should have been closed by handler by now */ resource_close(&handler->res); } } enum shader_compile_task_kind { SHADER_COMPILE_TASK_KIND_VS, SHADER_COMPILE_TASK_KIND_PS }; struct shader_compile_task_arg { /* In */ enum shader_compile_task_kind kind; struct pipeline *pipeline; struct shader_desc shader_desc; struct resource *shader_res; /* Out */ b32 success; ID3DBlob *blob; ID3DBlob *error_blob; i64 elapsed; }; /* TODO: Compile shaders offline w/ dxc for performance & language features like static_assert */ INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw) { __prof; struct shader_compile_task_arg *comp_arg = (struct shader_compile_task_arg *)comp_arg_raw; enum shader_compile_task_kind kind = comp_arg->kind; struct pipeline *pipeline = comp_arg->pipeline; struct shader_desc shader_desc = comp_arg->shader_desc; struct resource *shader_res = comp_arg->shader_res; struct arena_temp scratch = scratch_begin_no_conflict(); { i64 start_ns = sys_time_ns(); b32 success = false; ID3DBlob *blob = NULL; ID3DBlob *error_blob = NULL; struct string file_name = string_from_cstr_no_limit(shader_desc.file); struct string func_name = string_from_cstr_no_limit(shader_desc.func); if (resource_exists(shader_res)) { struct dx12_include_handler include_handler = dx12_include_handler_alloc(pipeline); u32 d3d_compile_flags = 0; #if DX12_SHADER_DEBUG d3d_compile_flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_ENABLE_STRICTNESS; #else d3d_compile_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3; #endif /* Compile shader */ { struct string shader_src = resource_get_data(shader_res); logf_info("Compiling shader \"%F:%F\"", FMT_STR(file_name), FMT_STR(func_name)); /* Compile shader */ struct string friendly_name = string_cat(scratch.arena, LIT("res/"), file_name); char *friendly_name_cstr = cstr_from_string(scratch.arena, friendly_name); char *target = NULL; switch (kind) { case SHADER_COMPILE_TASK_KIND_VS: { target = "vs_5_1"; } break; case SHADER_COMPILE_TASK_KIND_PS: { target = "ps_5_1"; } break; } D3D_SHADER_MACRO defines[] = { { "SH_CPU", "0" }, { NULL, NULL } }; HRESULT hr = D3DCompile(shader_src.text, shader_src.len, friendly_name_cstr, defines, (ID3DInclude *)&include_handler, shader_desc.func, target, d3d_compile_flags, 0, &blob, &error_blob); success = SUCCEEDED(hr) && !error_blob; } dx12_include_handler_release(&include_handler); } #if 0 if (success) { logf_success("Finished compiling shader \"%F\" in %F seconds", FMT_STR(src_name), FMT_FLOAT(SECONDS_FROM_NS(sys_time_ns() - start_ns))); } #endif comp_arg->success = success; comp_arg->blob = blob; comp_arg->error_blob = error_blob; comp_arg->elapsed = sys_time_ns() - start_ns; } scratch_end(scratch); } /* ========================== * * Pipeline * ========================== */ struct pipeline_load_task_arg { struct pipeline *pipeline; struct pipeline_result *result; }; INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw) { __prof; struct pipeline_load_task_arg *load_arg = (struct pipeline_load_task_arg *)load_arg_raw; struct pipeline *pipeline = load_arg->pipeline; struct pipeline_desc desc = pipeline->desc; struct pipeline_result *result = load_arg->result; struct arena_temp scratch = scratch_begin_no_conflict(); { i64 start_ns = sys_time_ns(); struct string pipeline_name = string_from_cstr_no_limit(desc.name); logf_info("Loading pipeline \"%F\"", FMT_STR(pipeline_name)); b32 success = true; HRESULT hr = 0; struct string error_str = LIT("Unknown error"); struct string vs_filename = string_from_cstr_no_limit(desc.vs.file); struct string ps_filename = string_from_cstr_no_limit(desc.ps.file); b32 ps_res_is_shared = string_eq(vs_filename, ps_filename); struct resource vs_res = resource_open(vs_filename); struct resource ps_res = vs_res; if (!ps_res_is_shared) { ps_res = resource_open(ps_filename); } if (success) { if (!resource_exists(&vs_res)) { error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(vs_filename)); success = false; } else if (!resource_exists(&ps_res)) { error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(ps_filename)); success = false; } } struct shader_compile_task_arg vs = ZI; vs.kind = SHADER_COMPILE_TASK_KIND_VS; vs.pipeline = pipeline; vs.shader_desc = desc.vs; vs.shader_res = &vs_res; struct shader_compile_task_arg ps = ZI; ps.kind = SHADER_COMPILE_TASK_KIND_PS; ps.pipeline = pipeline; ps.shader_desc = desc.ps; ps.shader_res = &ps_res; /* Compile shaders */ if (success) { struct work_slate ws = work_slate_begin(); work_slate_push_task(&ws, shader_compile_task, &vs); work_slate_push_task(&ws, shader_compile_task, &ps); struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH); work_wait(work); success = vs.success && ps.success; } /* Get root signature blob * NOTE: This isn't necessary for creating the root signature (since it * could reuse the shader blob), however we'd like to verify that the * root signature exists and matches between shaders. */ ID3D10Blob *rootsig_blob = NULL; if (success) { __profscope(Validate root signatures); char *vs_rootsig_data = NULL; char *ps_rootsig_data = NULL; u32 vs_rootsig_data_len = 0; u32 ps_rootsig_data_len = 0; ID3D10Blob *vs_rootsig_blob = NULL; ID3D10Blob *ps_rootsig_blob = NULL; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs.blob), ID3D10Blob_GetBufferSize(vs.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob); D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps.blob), ID3D10Blob_GetBufferSize(ps.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob); if (vs_rootsig_blob) { vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob); vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob); } if (ps_rootsig_blob) { ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob); ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob); } if (vs_rootsig_data_len == 0) { success = false; error_str = LIT("Vertex shader is missing root signature"); } else if (ps_rootsig_data_len == 0) { success = false; error_str = LIT("Pixel shader is missing root signature"); } else if (vs_rootsig_data_len != ps_rootsig_data_len || !MEMEQ(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) { success = false; error_str = LIT("Root signature mismatch between vertex and pixel shader"); } else { rootsig_blob = vs_rootsig_blob; } if (ps_rootsig_blob) { ID3D10Blob_Release(ps_rootsig_blob); } } /* Create root signature */ ID3D12RootSignature *rootsig = NULL; if (success) { __profscope(Create root signature); hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig); if (FAILED(hr)) { error_str = LIT("Failed to create root signature"); success = false; } } /* Create PSO */ ID3D12PipelineState *pso = NULL; if (success) { /* Default rasterizer state */ __profscope(Create PSO); D3D12_RASTERIZER_DESC raster_desc = { .FillMode = D3D12_FILL_MODE_SOLID, .CullMode = D3D12_CULL_MODE_BACK, .FrontCounterClockwise = FALSE, .DepthBias = D3D12_DEFAULT_DEPTH_BIAS, .DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP, .SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, .DepthClipEnable = TRUE, .MultisampleEnable = FALSE, .AntialiasedLineEnable = FALSE, .ForcedSampleCount = 0, .ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF }; /* No input layout */ D3D12_INPUT_LAYOUT_DESC input_layout_desc = { .pInputElementDescs = NULL, .NumElements = 0 }; /* Opaque blend state */ D3D12_BLEND_DESC blend_desc = { .AlphaToCoverageEnable = FALSE, .IndependentBlendEnable = FALSE }; blend_desc.RenderTarget[0].BlendEnable = FALSE; blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; /* Disable depth stencil */ D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = { .DepthEnable = FALSE, .StencilEnable = FALSE }; /* PSO */ D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 }; pso_desc.pRootSignature = rootsig; if (vs.success) { pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs.blob); pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs.blob); } if (ps.success) { pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps.blob); pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps.blob); } pso_desc.BlendState = blend_desc; pso_desc.SampleMask = UINT_MAX; pso_desc.RasterizerState = raster_desc; pso_desc.DepthStencilState = depth_stencil_desc; pso_desc.InputLayout = input_layout_desc; pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; pso_desc.NumRenderTargets = 1; pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; pso_desc.SampleDesc.Count = 1; hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = LIT("Failed to create pipeline state object"); success = false; } } /* Copy error */ if (!success) { ID3D10Blob *error_blob = vs.error_blob ? vs.error_blob : ps.error_blob; if (error_blob) { u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob); char *error_blob_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob); struct string error_blob_str = string_copy(scratch.arena, string_from_cstr(error_blob_cstr, error_blob_cstr_len)); if (string_ends_with(error_blob_str, LIT("\n"))) { /* Remove trailing newline */ error_blob_str.len -= 1; } if (error_blob_str.len > 0) { error_str = error_blob_str; } } result->errors_text_len = min_u64(error_str.len, ARRAY_COUNT(result->errors_text)); MEMCPY(result->errors_text, error_str.text, result->errors_text_len); } pipeline->pso = pso; pipeline->rootsig = rootsig; result->elapsed = sys_time_ns() - start_ns; resource_close(&vs_res); if (!ps_res_is_shared) { resource_close(&ps_res); } if (rootsig_blob) { ID3D10Blob_Release(rootsig_blob); } if (vs.blob) { ID3D10Blob_Release(vs.blob); } if (vs.error_blob) { ID3D10Blob_Release(vs.error_blob); } if (ps.blob) { ID3D10Blob_Release(ps.blob); } if (ps.error_blob) { ID3D10Blob_Release(ps.error_blob); } } scratch_end(scratch); } INTERNAL struct pipeline_result *pipeline_alloc_from_descs(struct arena *arena, u64 num_pipelines, struct pipeline_desc *descs) { __prof; struct pipeline_result *results = arena_push_array(arena, struct pipeline_result, num_pipelines); struct pipeline_load_task_arg *task_args = arena_push_array(arena, struct pipeline_load_task_arg, num_pipelines); /* Load pipelines */ struct work_slate ws = work_slate_begin(); for (u64 i = 0; i < num_pipelines; ++i) { struct pipeline_result *result = &results[i]; struct pipeline *pipeline = &results->pipeline; pipeline->desc = descs[i]; struct pipeline_load_task_arg *arg = &task_args[i]; arg->pipeline = pipeline; arg->result = result; work_slate_push_task(&ws, pipeline_load_task, arg); } struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH); work_wait(work); return results; } INTERNAL void pipeline_release(struct pipeline *pipeline) { __prof; if (pipeline->pso) { ID3D12PipelineState_Release(pipeline->pso); } } #if 1 /* ========================== * * CPU descriptor heap * ========================== */ INTERNAL struct dx12_cpu_descriptor_heap *dx12_cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type) { struct dx12_cpu_descriptor_heap *dh = NULL; { struct arena arena = arena_alloc(MEGABYTE(64)); dh = arena_push(&arena, struct dx12_cpu_descriptor_heap); dh->arena = arena; } dh->mutex = sys_mutex_alloc(); u32 num_descriptors = 0; u32 descriptor_size = 0; if (type < (i32)ARRAY_COUNT(G.desc_counts) && type < (i32)ARRAY_COUNT(G.desc_sizes)) { num_descriptors = G.desc_counts[type]; descriptor_size = G.desc_sizes[type]; } if (num_descriptors == 0 || descriptor_size == 0) { sys_panic(LIT("Unsupported CPU descriptor type")); } dh->num_descriptors_capacity = num_descriptors; dh->descriptor_size = descriptor_size; D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = type; desc.NumDescriptors = num_descriptors; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap); if (FAILED(hr)) { sys_panic(LIT("Failed to create CPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle); return dh; } #if 0 INTERNAL void dx12_cpu_descriptor_heap_release(struct dx12_cpu_descriptor_heap *dh) { /* TODO */ (UNUSED)dh; } #endif /* ========================== * * Descriptor * ========================== */ INTERNAL struct dx12_descriptor *dx12_descriptor_alloc(struct dx12_cpu_descriptor_heap *dh) { struct dx12_descriptor *d = NULL; D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; { struct sys_lock lock = sys_mutex_lock_e(&dh->mutex); if (dh->first_free_descriptor) { d = dh->first_free_descriptor; handle = d->handle; } else { if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) { sys_panic(LIT("Max descriptors reached in heap")); } d = arena_push_no_zero(&dh->arena, struct dx12_descriptor); handle.ptr = dh->handle.ptr + (dh->num_descriptors_reserved * dh->descriptor_size); ++dh->num_descriptors_reserved; } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(d); d->heap = dh; d->handle = handle; return d; } /* ========================== * * GPU (shader visible) descriptor heap * ========================== */ INTERNAL struct dx12_gpu_descriptor_heap *dx12_gpu_descriptor_heap_alloc(struct dx12_cpu_descriptor_heap *dh_cpu) { ASSERT(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */ /* Allocate GPU heap */ struct dx12_gpu_descriptor_heap *dh_gpu = NULL; ID3D12DescriptorHeap *heap = NULL; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = ZI; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = ZI; ID3D12Fence *free_fence = NULL; u64 free_fence_value = 0; { struct sys_lock lock = sys_mutex_lock_e(&G.gpu_descriptor_heaps_mutex); /* Find first free & ready heap for reuse */ /* FIXME: Rather than storing fence per heap, store & increment fence per queue and check against it */ for (struct dx12_gpu_descriptor_heap *tmp = G.first_free_gpu_descriptor_heap; tmp; tmp = tmp->next_free) { if (ID3D12Fence_GetCompletedValue(tmp->free_fence) >= tmp->free_fence_value) { dh_gpu = tmp; break; } } if (dh_gpu) { /* Free & ready heap found */ dh_gpu = G.first_free_gpu_descriptor_heap; heap = dh_gpu->heap; cpu_handle = dh_gpu->cpu_handle; gpu_handle = dh_gpu->gpu_handle; free_fence = dh_gpu->free_fence; free_fence_value = dh_gpu->free_fence_value; /* Remove from free list */ struct dx12_gpu_descriptor_heap *prev = dh_gpu->prev_free; struct dx12_gpu_descriptor_heap *next = dh_gpu->next_free; if (prev) { prev->next_free = next; } else { G.first_free_gpu_descriptor_heap = next; } if (next) { next->prev_free = prev; } else { G.last_free_gpu_descriptor_heap = prev; } } else { /* No available heap available for reuse, allocate new */ dh_gpu = arena_push_no_zero(&G.gpu_descriptor_heaps_arena, struct dx12_gpu_descriptor_heap); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(dh_gpu); if (!heap) { D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&heap); if (FAILED(hr)) { sys_panic(LIT("Failed to create GPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap, &cpu_handle); ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap, &gpu_handle); hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&free_fence); if (FAILED(hr)) { sys_panic(LIT("Failed to create GPU descriptor heap fence")); } } dh_gpu->heap = heap; dh_gpu->cpu_handle = cpu_handle; dh_gpu->gpu_handle = gpu_handle; dh_gpu->free_fence = free_fence; dh_gpu->free_fence_value = free_fence_value; /* Copy CPU heap */ { struct sys_lock lock = sys_mutex_lock_s(&dh_cpu->mutex); ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, dh_gpu->cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); sys_mutex_unlock(&lock); } return dh_gpu; } INTERNAL void dx12_gpu_descriptor_heap_release(struct dx12_gpu_descriptor_heap *dh, ID3D12CommandQueue *cq) { /* Queue fence signal */ ++dh->free_fence_value; ID3D12CommandQueue_Signal(cq, dh->free_fence, dh->free_fence_value); /* Add to free list */ struct sys_lock lock = sys_mutex_lock_e(&G.gpu_descriptor_heaps_mutex); dh->next_free = G.first_free_gpu_descriptor_heap; if (G.last_free_gpu_descriptor_heap) { G.last_free_gpu_descriptor_heap->next_free = dh; } else { G.first_free_gpu_descriptor_heap = dh; } G.last_free_gpu_descriptor_heap = dh; G.first_free_gpu_descriptor_heap = dh; sys_mutex_unlock(&lock); } /* ========================== * * Plan * ========================== */ struct dx12_plan { struct dx12_plan *next_free; }; INTERNAL struct dx12_plan *dx12_plan_alloc(void) { return NULL; } struct gpu_handle gpu_plan_alloc(void) { struct dx12_plan *plan = dx12_plan_alloc(); return handle_alloc(DX12_HANDLE_KIND_PLAN, plan); } void gpu_push_cmd(struct gpu_handle gpu_plan, struct gpu_cmd_params params) { (UNUSED)gpu_plan; (UNUSED)params; } void gpu_submit_plan(struct gpu_handle gpu_plan) { (UNUSED)gpu_plan; } /* ========================== * * Resource * ========================== */ enum dx12_resource_view_flags { DX12_RESOURCE_VIEW_FLAG_NONE = 0, DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1), DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2), DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3), DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4) }; INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, enum dx12_resource_view_flags view_flags) { struct dx12_resource *r = NULL; { struct sys_lock lock = sys_mutex_lock_e(&G.resources_mutex); if (G.first_free_resource) { r = G.first_free_resource; G.first_free_resource = r->next_free; } else { r = arena_push_no_zero(&G.resources_arena, struct dx12_resource); } sys_mutex_unlock(&lock); } MEMZERO_STRUCT(r); HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, NULL, &IID_ID3D12Resource, (void **)&r->resource); if (FAILED(hr)) { /* TODO: Don't panic */ sys_panic(LIT("Failed to create resource")); } r->state = initial_state; if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_CBV) { r->cbv_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap); D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = ZI; cbv_desc.BufferLocation = r->gpu_address; //cbv_desc.SizeInBytes = desc.ByteWidth; /* FIXME: Get actual size */ cbv_desc.SizeInBytes = KILOBYTE(64); ID3D12Device_CreateConstantBufferView(G.device, &cbv_desc, r->cbv_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_SRV) { r->srv_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateShaderResourceView(G.device, r->resource, NULL, r->srv_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_UAV) { r->uav_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, NULL, NULL, r->uav_descriptor->handle); } if (view_flags & DX12_RESOURCE_VIEW_FLAG_RTV) { r->rtv_descriptor = dx12_descriptor_alloc(G.rtv_heap); ID3D12Device_CreateRenderTargetView(G.device, r->resource, NULL, r->rtv_descriptor->handle); } return r; } INTERNAL enum D3D12_RESOURCE_STATES dx12_resource_barrier(ID3D12GraphicsCommandList *cl, struct dx12_resource *resource, enum D3D12_RESOURCE_STATES state) { enum D3D12_RESOURCE_STATES old_state = resource->state; if (state != resource->state) { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = resource->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = resource->state; rtb.StateAfter = state; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl, 1, &rb); resource->state = state; } return old_state; } INTERNAL void dx12_resource_release(struct dx12_resource *t) { (UNUSED)t; } struct gpu_handle gpu_texture_alloc(enum gpu_texture_format format, u32 flags, struct v2i32 size, void *initial_data) { LOCAL_PERSIST const DXGI_FORMAT formats[] = { [GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM, [GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB }; DXGI_FORMAT dxgi_format = 0; if (format < (i32)ARRAY_COUNT(formats)) { dxgi_format = formats[format]; } if (format == 0) { /* TODO: Don't panic */ sys_panic(LIT("Tried to create texture with unknown format")); } enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Alignment = 0; desc.Width = size.x; desc.Height = size.y; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.Format = dxgi_format; desc.SampleDesc.Count = 1; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; if (flags & GPU_TEXTURE_FLAG_TARGETABLE) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV; } D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags); (UNUSED)initial_data; return handle_alloc(DX12_HANDLE_KIND_RESOURCE, r); } void gpu_texture_clear(struct gpu_handle target_resource, u32 clear_color) { (UNUSED)target_resource; (UNUSED)clear_color; } struct v2i32 gpu_texture_get_size(struct gpu_handle resource) { (UNUSED)resource; struct v2i32 res = ZI; return res; } /* ========================== * * Dispatch * ========================== */ /* TODO: Move command list off of dispatch state */ struct dx12_dispatch_state { struct arena arena; ID3D12CommandAllocator *ca_direct; ID3D12GraphicsCommandList *cl_direct; }; INTERNAL struct dx12_dispatch_state *dx12_dispatch_state_alloc(void) { HRESULT hr = 0; struct dx12_dispatch_state *ds = NULL; { struct arena arena = arena_alloc(MEGABYTE(64)); ds = arena_push(&arena, struct dx12_dispatch_state); ds->arena = arena; } hr = ID3D12Device_CreateCommandAllocator(G.device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, (void **)&ds->ca_direct); if (FAILED(hr)) { sys_panic(LIT("Failed to create command allocator")); } hr = ID3D12Device_CreateCommandList(G.device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, ds->ca_direct, NULL, &IID_ID3D12GraphicsCommandList, (void **)&ds->cl_direct); if (FAILED(hr)) { sys_panic(LIT("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(ds->cl_direct); if (FAILED(hr)) { sys_panic(LIT("Failed to close command list during initialization")); } return ds; } struct gpu_handle gpu_dispatch_state_alloc(void) { struct dx12_dispatch_state *ds = dx12_dispatch_state_alloc(); return handle_alloc(DX12_HANDLE_KIND_DISPATCH_STATE, ds); } void gpu_dispatch(struct gpu_handle gpu_dispatch_state, struct gpu_dispatch_params params) { HRESULT hr = 0; /* Viewport */ struct rect viewport = params.draw_target_viewport; struct D3D12_VIEWPORT d3d12_viewport = ZI; d3d12_viewport.TopLeftX = viewport.x; d3d12_viewport.TopLeftY = viewport.y; d3d12_viewport.Width = viewport.width; d3d12_viewport.Height = viewport.height; d3d12_viewport.MinDepth = 0.0f; d3d12_viewport.MaxDepth = 1.0f; /* Scissor */ D3D12_RECT d3d12_scissor = ZI; d3d12_scissor.left = viewport.x; d3d12_scissor.top = viewport.y; d3d12_scissor.right = viewport.x + viewport.width; d3d12_scissor.bottom = viewport.y + viewport.height; struct dx12_dispatch_state *dispatch_state = handle_get_data(gpu_dispatch_state, DX12_HANDLE_KIND_DISPATCH_STATE); struct dx12_plan *plan = handle_get_data(params.plan, DX12_HANDLE_KIND_PLAN); struct dx12_resource *target = handle_get_data(params.draw_target, DX12_HANDLE_KIND_RESOURCE); ID3D12CommandQueue *cq = G.cq_direct; ID3D12CommandAllocator *ca = dispatch_state->ca_direct; ID3D12GraphicsCommandList *cl = dispatch_state->cl_direct; /* FIXME: Use fence to ensure command allocator has finished execution on GPU before resetting */ hr = ID3D12CommandAllocator_Reset(ca); if (FAILED(hr)) { sys_panic(LIT("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl, ca, NULL); if (FAILED(hr)) { sys_panic(LIT("Failed to reset command list")); } /* Create temporary srv heap */ struct dx12_gpu_descriptor_heap *temp_descriptor_heap = dx12_gpu_descriptor_heap_alloc(G.cbv_srv_uav_heap); /* Material pass */ { u32 instance_count = 0; (UNUSED)plan; //struct pipeline *pipeline = dx12_get_pipeline(pipeline_scope, LIT("material")); struct pipeline *pipeline = &G.test_pipeline; /* Bind pipeline */ ID3D12GraphicsCommandList_SetPipelineState(cl, pipeline->pso); ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl, pipeline->rootsig); /* Bind constant buffer */ /* TODO */ /* Bind srv heap */ ID3D12DescriptorHeap *heaps[] = { temp_descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl, ARRAY_COUNT(heaps), heaps); ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl, 2, temp_descriptor_heap->gpu_handle); /* Setup Rasterizer State */ ID3D12GraphicsCommandList_RSSetViewports(cl, 1, &d3d12_viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl, 1, &d3d12_scissor); /* Transition render target */ enum D3D12_RESOURCE_STATES old_state = dx12_resource_barrier(cl, target, D3D12_RESOURCE_STATE_RENDER_TARGET); ID3D12GraphicsCommandList_OMSetRenderTargets(cl, 1, &target->rtv_descriptor->handle, false, NULL); //f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; //ID3D12GraphicsCommandList_ClearRenderTargetView(cl, rtvHandle, clearColor, 0, nullptr); /* Draw */ ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl, 0, 1, &G.dummy_vertex_buffer_view); ID3D12GraphicsCommandList_IASetIndexBuffer(cl, &G.quad_index_buffer_view); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl, 6, instance_count, 0, 0, 0); /* Reset render target */ dx12_resource_barrier(cl, target, old_state); } /* Execute command list */ hr = ID3D12GraphicsCommandList_Close(cl); if (FAILED(hr)) { sys_panic(LIT("Failed to close command list before execution")); } dx12_gpu_descriptor_heap_release(temp_descriptor_heap, cq); #if 0 __prof; struct sprite_scope *sprite_scope = sprite_scope_begin(); struct dx11_dispatch_state *state = (struct dx11_dispatch_state *)gpu_dispatch_state.v; struct rect viewport = params.draw_target_viewport; /* Set viewport */ D3D11_VIEWPORT d3d11_viewport = ZI; d3d11_viewport.Width = viewport.width; d3d11_viewport.Height = viewport.height; d3d11_viewport.MinDepth = 0.0f; d3d11_viewport.MaxDepth = 1.0f; d3d11_viewport.TopLeftX = viewport.x; d3d11_viewport.TopLeftY = viewport.y; ID3D11DeviceContext_RSSetViewports(G.devcon, 1, &d3d11_viewport); struct dx12_resource *final_tex = (struct dx12_resource *)params.draw_target.v; struct v2i32 final_tex_size = final_tex->size; /* Texture pass */ { __profscope(Texture pass); struct dx11_shader *shader = &G.shaders[DX11_SHADER_KIND_TEXTURE]; if (shader->valid) { struct dx12_resource *texture = NULL; if (cmd->texture.texture.v) { /* Load texture if handle is set */ texture = (struct dx12_resource *)cmd->texture.texture.v; } else if (cmd->texture.sprite.hash) { /* Otherwise load sprite */ struct sprite_texture *sprite_texture = sprite_texture_from_tag_async(sprite_scope, cmd->texture.sprite); if (sprite_texture->loaded) { texture = (struct dx12_resource *)sprite_texture->texture.v; } } if (texture && texture->srv) { struct dx11_buffer *instance_buffer = list->buffers.texture.instance_buffer; u32 instance_offset = cmd->texture.instance_offset; u32 instance_count = cmd->texture.instance_count; /* Bind shader */ ID3D11DeviceContext_VSSetShader(G.devcon, shader->vs, 0, 0); ID3D11DeviceContext_PSSetShader(G.devcon, shader->ps, 0, 0); /* Fill & bind constant buffer */ { struct dx11_texture_uniform *uniform = dx11_buffer_push(constant_buffer, sizeof(struct dx11_texture_uniform)); uniform->vp = vp_matrix; uniform->instance_offset = instance_offset; dx11_buffer_submit(constant_buffer); } ID3D11DeviceContext_VSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); ID3D11DeviceContext_PSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer); /* Bind dummy vertex buffer */ u32 zero = 0; ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &G.dummy_vertex_buffer->gpu_buffer, &zero, &zero); ID3D11DeviceContext_IASetIndexBuffer(G.devcon, G.quad_index_buffer->gpu_buffer, DXGI_FORMAT_R16_UINT, zero); /* Bind SRVs */ ID3D11ShaderResourceView *srvs[] = { instance_buffer->srv, texture->srv }; ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, ARRAY_COUNT(srvs), srvs); ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, ARRAY_COUNT(srvs), srvs); /* Bind RTVs */ ID3D11RenderTargetView *rtvs[] = { final_tex->rtv }; ID3D11DeviceContext_OMSetRenderTargets(G.devcon, ARRAY_COUNT(rtvs), rtvs, NULL); /* Draw */ ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D11DeviceContext_DrawIndexedInstanced(G.devcon, 6, instance_count, 0, 0, 0); /* Unbind */ dx11_unbind(DX11_UNBIND_VS | DX11_UNBIND_PS | DX11_UNBIND_CBUFF | DX11_UNBIND_VBUFF | DX11_UNBIND_IBUFF | DX11_UNBIND_SRV | DX11_UNBIND_RTV); } } } sprite_scope_end(sprite_scope); #endif } #else void gpu_dispatch(struct gpu_handle gpu_dispatch_state, struct gpu_dispatch_params params) { (UNUSED)gpu_dispatch_state; (UNUSED)params; (UNUSED)handle_get_data; } #endif /* ========================== * * Backbuffer * ========================== */ struct gpu_handle gpu_recreate_backbuffer(struct v2i32 size) { (UNUSED)size; struct gpu_handle res = ZI; return res; } void gpu_present(i32 vsync) { (UNUSED)vsync; } #endif