#pragma warning(push, 0) # define UNICODE # define COBJMACROS # include # include # include # include # include # include #pragma warning(pop) #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") #pragma comment(lib, "dxguid") #pragma comment(lib, "d3dcompiler") #if ProfilingIsEnabled_GPU /* For RegOpenKeyEx */ # include # pragma comment(lib, "advapi32") #endif #define DX12_ALLOW_TEARING 1 #define DX12_WAIT_FRAME_LATENCY 1 #define DX12_SWAPCHAIN_FLAGS (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)) #define DX12_SWAPCHAIN_BUFFER_COUNT (4) /* Arbitrary limits */ #define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64) #define DX12_NUM_RTV_DESCRIPTORS (1024 * 1) #define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64) #define DX12_MULTI_QUEUE !ProfilingIsEnabled #if DX12_MULTI_QUEUE # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 1 # define DX12_QUEUE_COPY 2 # define DX12_QUEUE_COPY_BACKGROUND 3 # define DX12_NUM_QUEUES 4 #else # define DX12_QUEUE_DIRECT 0 # define DX12_QUEUE_COMPUTE 0 # define DX12_QUEUE_COPY 0 # define DX12_QUEUE_COPY_BACKGROUND 0 # define DX12_NUM_QUEUES 1 #endif #if RtcIsEnabled # define DX12_DEBUG 1 # define DX12_SHADER_DEBUG 1 #else # define DX12_DEBUG 0 # define DX12_SHADER_DEBUG 0 #endif /* ========================== * * internal structs * ========================== */ struct shader_desc { String file; String func; }; struct pipeline_rtv_desc { DXGI_FORMAT format; b32 blending; }; struct pipeline_desc { String name; /* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */ String vs_dxc; String ps_dxc; String cs_dxc; struct pipeline_rtv_desc rtvs[8]; }; struct pipeline { String name; u64 hash; b32 success; b32 is_gfx; String error; i64 compilation_time_ns; /* Lock global pipelines mutex when accessing */ i64 refcount; ID3D12PipelineState *pso; ID3D12RootSignature *rootsig; struct pipeline_desc desc; struct pipeline *next; }; struct pipeline_error { String msg; struct pipeline_error *next; }; struct pipeline_include { String name; u64 name_hash; struct pipeline_include *next; }; struct pipeline_scope { Arena *arena; Dict *refs; struct pipeline_scope *next_free; }; struct command_queue_desc { enum D3D12_COMMAND_LIST_TYPE type; enum D3D12_COMMAND_QUEUE_PRIORITY priority; String dbg_name; }; struct command_queue { struct command_queue_desc desc; ID3D12CommandQueue *cq; Arena *arena; P_Mutex submit_fence_mutex; u64 submit_fence_target; ID3D12Fence *submit_fence; struct command_list_pool *cl_pool; #if ProfilingIsEnabled_GPU __prof_dx12_ctx(prof); #endif }; struct command_list_pool { struct command_queue *cq; Arena *arena; P_Mutex mutex; struct command_list *first_submitted_command_list; struct command_list *last_submitted_command_list; }; struct command_list { struct command_queue *cq; struct command_list_pool *pool; struct ID3D12CommandAllocator *ca; struct ID3D12GraphicsCommandList *cl; P_Lock global_record_lock; struct pipeline *cur_pipeline; struct command_descriptor_heap *first_command_descriptor_heap; struct command_buffer *first_command_buffer; u64 submitted_fence_target; struct command_list *prev_submitted; struct command_list *next_submitted; }; struct command_descriptor_heap { D3D12_DESCRIPTOR_HEAP_TYPE type; ID3D12DescriptorHeap *heap; D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle; D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle; struct command_descriptor_heap *next_in_command_list; u64 submitted_fence_target; struct command_queue *submitted_cq; struct command_descriptor_heap *prev_submitted; struct command_descriptor_heap *next_submitted; }; struct command_buffer { struct command_buffer_group *group; u64 size; struct dx12_resource *resource; D3D12_VERTEX_BUFFER_VIEW vbv; D3D12_INDEX_BUFFER_VIEW Ibv; struct command_buffer *next_in_command_list; u64 submitted_fence_target; struct command_queue *submitted_cq; struct command_buffer *prev_submitted; struct command_buffer *next_submitted; }; struct command_buffer_group { struct command_buffer *first_submitted; struct command_buffer *last_submitted; }; struct descriptor { struct cpu_descriptor_heap *heap; u32 index; D3D12_CPU_DESCRIPTOR_HANDLE handle; struct descriptor *next_free; }; struct dx12_resource { enum D3D12_RESOURCE_STATES state; ID3D12Resource *resource; struct descriptor *cbv_descriptor; struct descriptor *srv_descriptor; struct descriptor *uav_descriptor; struct descriptor *rtv_descriptor; D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */ Vec2I32 texture_size; struct dx12_resource *next_free; }; struct swapchain_buffer { struct swapchain *swapchain; ID3D12Resource *resource; struct descriptor *rtv_descriptor; D3D12_RESOURCE_STATES state; }; struct swapchain { IDXGISwapChain3 *swapchain; HWND hwnd; HANDLE waitable; Vec2I32 resolution; struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT]; struct swapchain *next_free; }; struct cpu_descriptor_heap { enum D3D12_DESCRIPTOR_HEAP_TYPE type; Arena *arena; P_Mutex mutex; u32 descriptor_size; u32 num_descriptors_reserved; u32 num_descriptors_capacity; struct descriptor *first_free_descriptor; ID3D12DescriptorHeap *heap; struct D3D12_CPU_DESCRIPTOR_HANDLE handle; }; enum fenced_release_kind { FENCED_RELEASE_KIND_NONE, FENCED_RELEASE_KIND_RESOURCE, FENCED_RELEASE_KIND_PIPELINE }; struct fenced_release_data { enum fenced_release_kind kind; void *ptr; }; /* ========================== * * internal procs * ========================== */ internal P_ExitFuncDef(gp_shutdown); internal void dx12_init_device(void); internal void dx12_init_objects(void); internal void dx12_init_pipelines(void); internal void dx12_init_noise(void); internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type); internal void command_queue_release(struct command_queue *cq); internal P_JobDef(dx12_evictor_job, _); internal void fenced_release(void *data, enum fenced_release_kind kind); internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state); internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh); struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; }; internal P_JobDef(command_queue_alloc_job, job); struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; }; internal P_JobDef(pipeline_alloc_job, job); struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; }; internal P_JobDef(dx12_upload_job, job); #if RESOURCE_RELOADING internal WATCH_CALLBACK_FUNC_DEF(pipeline_watch_callback, name); #endif /* ========================== * * Global state * ========================== */ Global struct { Atomic32 initialized; /* Descriptor heaps pool */ P_Mutex command_descriptor_heaps_mutex; Arena *command_descriptor_heaps_arena; struct command_descriptor_heap *first_submitted_command_descriptor_heap; struct command_descriptor_heap *last_submitted_command_descriptor_heap; /* Command buffers pool */ P_Mutex command_buffers_mutex; Arena *command_buffers_arena; Dict *command_buffers_dict; /* Resources pool */ P_Mutex resources_mutex; Arena *resources_arena; struct dx12_resource *first_free_resource; /* Swapchains pool */ P_Mutex swapchains_mutex; Arena *swapchains_arena; struct swapchain *first_free_swapchain; /* Shader bytecode archive */ struct tar_archive dxc_archive; /* Pipeline cache */ P_Mutex pipelines_mutex; Arena *pipelines_arena; struct pipeline *first_free_pipeline; Dict *pipeline_descs; Dict *top_pipelines; /* Latest pipelines */ Dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */ struct pipeline_scope *first_free_pipeline_scope; /* Fenced release queue */ P_Mutex fenced_releases_mutex; Arena *fenced_releases_arena; u64 fenced_release_targets[DX12_NUM_QUEUES]; /* Factory */ IDXGIFactory6 *factory; /* Adapter */ IDXGIAdapter1 *adapter; /* Device */ ID3D12Device *device; /* Descriptor sizes */ u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES]; /* Global descriptor heaps */ struct cpu_descriptor_heap *cbv_srv_uav_heap; struct cpu_descriptor_heap *rtv_heap; /* Command queues */ P_Mutex global_command_list_record_mutex; P_Mutex global_submit_mutex; struct command_queue *command_queues[DX12_NUM_QUEUES]; /* Evictor job */ P_Counter evictor_job_counter; P_Cv evictor_wake_cv; P_Mutex evictor_wake_mutex; i64 evictor_wake_gen; b32 evictor_shutdown; } G = ZI, DebugAlias(G, G_gp_dx12); /* ========================== * * Startup * ========================== */ void gp_startup(void) { __prof; if (Atomic32FetchTestSet(&G.initialized, 0, 1) != 0) { P_Panic(Lit("GP layer already initialized")); } /* Initialize command descriptor heaps pool */ G.command_descriptor_heaps_arena = AllocArena(Gibi(64)); /* Initialize command buffers pool */ G.command_buffers_arena = AllocArena(Gibi(64)); G.command_buffers_dict = InitDict(G.command_buffers_arena, 4096); /* Initialize resources pool */ G.resources_arena = AllocArena(Gibi(64)); /* Initialize swapchains pool */ G.swapchains_arena = AllocArena(Gibi(64)); /* Initialize pipeline cache */ G.pipelines_arena = AllocArena(Gibi(64)); G.pipeline_descs = InitDict(G.pipelines_arena, 1024); G.top_pipelines = InitDict(G.pipelines_arena, 1024); G.top_successful_pipelines = InitDict(G.pipelines_arena, 1024); /* Initialize fenced releases queue */ G.fenced_releases_arena = AllocArena(Gibi(64)); /* Initialize embedded shader archive */ String embedded_data = INC_GetDxcTar(); if (embedded_data.len <= 0) { P_Panic(Lit("No embedded shaders found")); } G.dxc_archive = tar_parse(G.pipelines_arena, embedded_data, Lit("")); /* Initialize dx12 */ /* TODO: Parallelize phases */ dx12_init_device(); dx12_init_objects(); dx12_init_pipelines(); dx12_init_noise(); /* Register callbacks */ #if RESOURCE_RELOADING watch_register_callback(pipeline_watch_callback); #endif P_OnExit(gp_shutdown); /* Start evictor job */ P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &G.evictor_job_counter); } internal P_ExitFuncDef(gp_shutdown) { __prof; #if 0 /* Release objects to make live object reporting less noisy */ //IDXGISwapChain3_Release(G.swapchain); for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; cmomand_queue_release(cq); } ID3D12Device_Release(G.device); #else (UNUSED)command_queue_release; #endif { P_Lock lock = P_LockE(&G.evictor_wake_mutex); G.evictor_shutdown = 1; P_SignalCv(&G.evictor_wake_cv, I32Max); P_Unlock(&lock); } P_WaitOnCounter(&G.evictor_job_counter); } /* ========================== * * Dx12 device initialization * ========================== */ internal void dx12_init_error(String error) { TempArena scratch = BeginScratchNoConflict(); String msg = StringFormat(scratch.arena, Lit("Failed to initialize DirectX 12.\n\n%F"), FmtString(error)); P_Panic(msg); EndScratch(scratch); } internal void dx12_init_device(void) { __prof; TempArena scratch = BeginScratchNoConflict(); HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if DX12_DEBUG { __profn("Enable debug layer"); ID3D12Debug *debug_controller0 = 0; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { dx12_init_error(Lit("Failed to create ID3D12Debug0")); } ID3D12Debug1 *debug_controller1 = 0; hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { dx12_init_error(Lit("Failed to create ID3D12Debug1")); } ID3D12Debug_EnableDebugLayer(debug_controller0); /* FIXME: Enable this */ //ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1); ID3D12Debug_Release(debug_controller1); ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ { __profn("Create factory"); hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory); if (FAILED(hr)) { dx12_init_error(Lit("Failed to initialize DXGI factory")); } } /* Create device */ { __profn("Create device"); IDXGIAdapter1 *adapter = 0; ID3D12Device *device = 0; String error = Lit("Could not initialize GPU device."); String first_gpu_name = ZI; u32 adapter_index = 0; b32 skip = 0; /* For debugging iGPU */ for (;;) { { hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); } if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); } if (SUCCEEDED(hr) && !skip ) { break; } skip = 0; ID3D12Device_Release(device); IDXGIAdapter1_Release(adapter); adapter = 0; device = 0; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { String fmt = Lit("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date."); error = StringFormat(scratch.arena, fmt, FmtString(first_gpu_name)); } dx12_init_error(error); } G.adapter = adapter; G.device = device; } #if DX12_DEBUG /* Enable D3D12 Debug break */ { __profn("Enable d3d12 debug break"); ID3D12InfoQueue *info = 0; hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { dx12_init_error(Lit("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { __profn("Enable dxgi debug break"); IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { dx12_init_error(Lit("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); IDXGIInfoQueue_Release(dxgi_info); } #endif #if ProfilingIsEnabled_GPU && ProfilingIsEnabled_GPU_STABLE_POWER_STATE /* Enable stable power state */ { __profn("Set stable power state"); b32 success = 1; HKEY key = 0; success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS; if (success) { DWORD value = ZI; DWORD dword_size = sizeof(DWORD); success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS; RegCloseKey(key); if (success) { success = value != 0; } } P_LogInfoF("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)"); if (success) { P_LogInfoF("Machine is in developer mode, calling ID3D12Device::SetStablePowerState"); hr = ID3D12Device_SetStablePowerState(G.device, 1); if (SUCCEEDED(hr)) { P_LogInfoF("ID3D12Device::SetStablePowerState succeeded"); } else { success = 0; P_LogErrorF("ID3D12Device::SetStablePowerState failed"); } } else { P_LogWarningF("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState"); } if (!success) { P_LogWarningF("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable."); } } #endif EndScratch(scratch); } /* ========================== * * Dx12 object initialization * ========================== */ internal void dx12_init_objects(void) { __prof; /* Initialize desc sizes */ G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV); G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV); /* Initialize desc counts */ G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS; /* Create global descriptor heaps */ G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); /* Create command queues */ { __profn("Allocate command queues"); struct command_queue_desc params[] = { {.type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") }, {.type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") }, {.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copyqueue") }, {.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") } }; struct command_queue_alloc_job_sig sig = ZI; sig.descs_in = params; sig.cqs_out = G.command_queues; { P_Counter counter = ZI; P_Run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } #if ProfilingIsEnabled { /* Initialize serially for consistent order in profiler */ __profn("Initialize command queue profiling contexts"); for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) { struct command_queue *cq = G.command_queues[i]; String dbg_name = params[i].dbg_name; __prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len); (UNUSED)dbg_name; } } #endif } } /* ========================== * * Dx12 pipeline initialization * ========================== */ internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines); internal void dx12_init_pipelines(void) { __prof; TempArena scratch = BeginScratchNoConflict(); /* Register pipeline descs */ { /* Material pipeline */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_material"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; desc->rtvs[1].format = DXGI_FORMAT_R16G16B16A16_FLOAT; desc->rtvs[1].blending = 1; SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Flood pipeline */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_flood"); SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Shade pipeline */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_shade"); SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Shape pipeline */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_shape"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* UI pipeline */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_ui"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } /* Blit pipeilne */ { struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc); desc->name = Lit("kernel_blit"); desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM; desc->rtvs[0].blending = 1; SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc); } } /* Compile pipelines */ u32 num_pipelines = 0; struct pipeline_desc *descs = PushDry(scratch.arena, struct pipeline_desc); for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) { struct pipeline_desc *desc = (struct pipeline_desc *)entry->value; *PushStruct(scratch.arena, struct pipeline_desc) = *desc; ++num_pipelines; } struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines); { __profn("Allocate pipelines"); struct pipeline_alloc_job_sig sig = ZI; sig.descs_in = descs; sig.pipelines_out = pipelines; P_Counter counter = ZI; P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } for (u32 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; if (pipeline->success) { P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns))); if (pipeline->error.len) { String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error)); P_LogWarning(msg); } } else { String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error initializing pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error)); P_LogError(msg); P_MessageBox(P_MessageBoxKind_Warning, msg); } } pipeline_register(num_pipelines, pipelines); EndScratch(scratch); } /* ========================== * * Noise texture initialization * ========================== */ internal void dx12_init_noise(void) { TempArena scratch = BeginScratchNoConflict(); { String noise_res_name = Lit("noise_128x128x64_16.dat"); R_Resource noise_res = resource_open(noise_res_name); DXGI_FORMAT format = DXGI_FORMAT_R16_UINT; //u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2; u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2; if (resource_exists(&noise_res)) { String data = resource_get_data(&noise_res); if (data.len != expected_size) { P_Panic(StringFormat(scratch.arena, Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"), FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH), FmtUint(expected_size), FmtUint(data.len))); } { D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.Format = format; desc.Alignment = 0; desc.Width = K_BLUE_NOISE_TEX_WIDTH; desc.Height = K_BLUE_NOISE_TEX_HEIGHT; desc.DepthOrArraySize = K_BLUE_NOISE_TEX_DEPTH; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST); r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); /* Upload texture */ { P_Counter counter = ZI; struct dx12_upload_job_sig sig = ZI; sig.resource = r; sig.data = data.text; P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } } } else { P_Panic(StringFormat(scratch.arena, Lit("Noise resource \"%F\" not found"), FmtString(noise_res_name))); } resource_close(&noise_res); } EndScratch(scratch); } /* ========================== * * Shader compilation * ========================== */ #if RESOURCE_RELOADING struct shader_compile_desc { String src; String friendly_name; String entry; String target; }; struct shader_compile_result { i64 elapsed_ns; String dxc; String errors; b32 success; }; struct shader_compile_job_sig { Arena *arena; struct shader_compile_desc *descs; struct shader_compile_result *results; }; internal P_JobDef(shader_compile_job, job) { __prof; struct shader_compile_job_sig *sig = job.sig; Arena *arena = sig->arena; struct shader_compile_desc *desc = &sig->descs[job.id]; struct shader_compile_result *result = &sig->results[job.id]; TempArena scratch = BeginScratch(arena); { i64 start_ns = P_TimeNs(); DXC_Result dxc_result = ZI; { __profn("Compile shader"); P_LogInfoF("Compiling shader \"%F:%F\"", FmtString(desc->friendly_name), FmtString(desc->entry)); /* NOTE: `DXC_ARGS` is supplied by build system at compile time */ char *dxc_args_cstr = Stringize(DXC_ARGS); String dxc_args_str = StringFromCstrNoLimit(dxc_args_cstr); StringArray dxc_args_array = SplitString(scratch.arena, dxc_args_str, Lit(" ")); String shader_args[] = { desc->friendly_name, Lit("-E"), desc->entry, Lit("-T"), desc->target, }; u32 num_args = countof(shader_args) + dxc_args_array.count; String *args = PushStructs(scratch.arena, String, num_args); for (u32 i = 0; i < countof(shader_args); ++i) { args[i] = shader_args[i]; } for (u32 i = 0; i < dxc_args_array.count; ++i) { args[i + countof(shader_args)] = dxc_args_array.strings[i]; } dxc_result = DXC_Compile(arena, desc->src, num_args, args); } result->success = dxc_result.success; result->dxc = dxc_result.dxc; result->errors = dxc_result.errors; result->elapsed_ns = P_TimeNs() - start_ns; } EndScratch(scratch); } #endif /* ========================== * * Pipeline * ========================== */ internal P_JobDef(pipeline_alloc_job, job) { __prof; struct pipeline_alloc_job_sig *sig = job.sig; struct pipeline_desc *desc = &sig->descs_in[job.id]; struct pipeline **pipelines_out = sig->pipelines_out; struct pipeline *pipeline = 0; { P_Lock lock = P_LockE(&G.pipelines_mutex); if (G.first_free_pipeline) { pipeline = G.first_free_pipeline; G.first_free_pipeline = pipeline->next; } else { pipeline = PushStructNoZero(G.pipelines_arena, struct pipeline); } P_Unlock(&lock); } ZeroStruct(pipeline); pipelines_out[job.id] = pipeline; pipeline->desc = *desc; pipeline->name = desc->name; pipeline->hash = HashFnv64(Fnv64Basis, pipeline->name); TempArena scratch = BeginScratchNoConflict(); { i64 start_ns = P_TimeNs(); String pipeline_name = pipeline->name; P_LogInfoF("Loading pipeline \"%F\"", FmtString(pipeline_name)); b32 success = 1; HRESULT hr = 0; String error_str = ZI; String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data; String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data; String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data; if (success && vs_dxc.len > 0 && ps_dxc.len <= 0) { error_str = Lit("Pipeline has vertex shader without pixel shader"); success = 0; } if (success && vs_dxc.len <= 0 && ps_dxc.len > 0) { error_str = Lit("Pipeline has pixel shader without vertex shader"); success = 0; } if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0)) { error_str = Lit("Pipeline has a compute shader with a vertex/pixel shader"); success = 0; } if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0) { error_str = Lit("Pipeline has no shaders"); success = 0; } ID3D10Blob *vs_blob = 0; ID3D10Blob *ps_blob = 0; ID3D10Blob *cs_blob = 0; if (success && vs_dxc.len > 0) { hr = D3DCreateBlob(vs_dxc.len, &vs_blob); if (SUCCEEDED(hr)) { CopyBytes(ID3D10Blob_GetBufferPointer(vs_blob), vs_dxc.text, vs_dxc.len); } else { error_str = Lit("Failed to create vertex shader blob"); success = 0; } } if (success && ps_dxc.len > 0) { hr = D3DCreateBlob(ps_dxc.len, &ps_blob); if (SUCCEEDED(hr)) { CopyBytes(ID3D10Blob_GetBufferPointer(ps_blob), ps_dxc.text, ps_dxc.len); } else { error_str = Lit("Failed to create pixel shader blob"); success = 0; } } if (success && cs_dxc.len > 0) { hr = D3DCreateBlob(cs_dxc.len, &cs_blob); if (SUCCEEDED(hr)) { CopyBytes(ID3D10Blob_GetBufferPointer(cs_blob), cs_dxc.text, cs_dxc.len); } else { error_str = Lit("Failed to create compute shader blob"); success = 0; } } /* Get root signature blob * NOTE: This isn't necessary for creating the root signature (since it * could reuse the shader blob), however we'd like to verify that the * root signature exists and matches between vs & ps shaders. */ ID3D10Blob *rootsig_blob = 0; if (success) { __profn("Validate root signatures"); if (cs_dxc.len > 0) { u32 cs_rootsig_data_len = 0; ID3D10Blob *cs_rootsig_blob = 0; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob); if (cs_rootsig_blob) { cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob); } if (cs_rootsig_data_len == 0) { success = 0; error_str = Lit("Compute shader is missing root signature"); } else { rootsig_blob = cs_rootsig_blob; } } else { char *vs_rootsig_data = 0; char *ps_rootsig_data = 0; u32 vs_rootsig_data_len = 0; u32 ps_rootsig_data_len = 0; ID3D10Blob *vs_rootsig_blob = 0; ID3D10Blob *ps_rootsig_blob = 0; D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob); D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob); if (vs_rootsig_blob) { vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob); vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob); } if (ps_rootsig_blob) { ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob); ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob); } if (vs_rootsig_data_len == 0) { success = 0; error_str = Lit("Vertex shader is missing root signature"); } else if (ps_rootsig_data_len == 0) { success = 0; error_str = Lit("Pixel shader is missing root signature"); } else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) { success = 0; error_str = Lit("Root signature mismatch between vertex and pixel shader"); } else { rootsig_blob = vs_rootsig_blob; } if (ps_rootsig_blob) { ID3D10Blob_Release(ps_rootsig_blob); } } } /* Create root signature */ ID3D12RootSignature *rootsig = 0; if (success) { __profn("Create root signature"); hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig); if (FAILED(hr)) { error_str = Lit("Failed to create root signature"); success = 0; } } /* Create PSO */ ID3D12PipelineState *pso = 0; if (success) { if (cs_dxc.len > 0) { __profn("Create compute PSO"); D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 }; pso_desc.pRootSignature = rootsig; pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs_blob); pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs_blob); hr = ID3D12Device_CreateComputePipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); } else { __profn("Create graphics PSO"); /* Default rasterizer state */ D3D12_RASTERIZER_DESC raster_desc = { .FillMode = D3D12_FILL_MODE_SOLID, .CullMode = D3D12_CULL_MODE_NONE, .FrontCounterClockwise = 0, .DepthBias = D3D12_DEFAULT_DEPTH_BIAS, .DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP, .SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS, .DepthClipEnable = 1, .MultisampleEnable = 0, .AntialiasedLineEnable = 0, .ForcedSampleCount = 0, .ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF }; /* Empty input layout */ D3D12_INPUT_LAYOUT_DESC input_layout_desc = ZI; /* Blend state */ D3D12_BLEND_DESC blend_desc = { .AlphaToCoverageEnable = 0, .IndependentBlendEnable = 1 }; for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) { StaticAssert(countof(blend_desc.RenderTarget) <= countof(desc->rtvs)); if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) { b32 blending_enabled = desc->rtvs[i].blending; blend_desc.RenderTarget[i].BlendEnable = blending_enabled; blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA; blend_desc.RenderTarget[i].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE; blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; } else { break; } } /* Disable depth stencil */ D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = { .DepthEnable = 0, .StencilEnable = 0 }; /* PSO */ D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 }; pso_desc.pRootSignature = rootsig; pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs_blob); pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs_blob); pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps_blob); pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps_blob); pso_desc.BlendState = blend_desc; pso_desc.SampleMask = UINT_MAX; pso_desc.RasterizerState = raster_desc; pso_desc.DepthStencilState = depth_stencil_desc; pso_desc.InputLayout = input_layout_desc; pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) { StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc->rtvs)); DXGI_FORMAT format = desc->rtvs[i].format; if (format != DXGI_FORMAT_UNKNOWN) { pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; } else { break; } } pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); } if (FAILED(hr)) { error_str = Lit("Failed to create pipeline state object"); success = 0; } } /* Parse errors */ if (!success && error_str.len <= 0) { error_str = Lit("Unknown error"); } pipeline->pso = pso; pipeline->rootsig = rootsig; pipeline->compilation_time_ns = P_TimeNs() - start_ns; pipeline->success = success; pipeline->is_gfx = cs_dxc.len == 0; pipeline->error = error_str; if (rootsig_blob) { ID3D10Blob_Release(rootsig_blob); } if (vs_blob) { ID3D10Blob_Release(vs_blob); } if (ps_blob) { ID3D10Blob_Release(ps_blob); } if (cs_blob) { ID3D10Blob_Release(cs_blob); } } EndScratch(scratch); } internal void pipeline_release_now(struct pipeline *pipeline) { __prof; if (pipeline->pso) { ID3D12PipelineState_Release(pipeline->pso); } P_Lock lock = P_LockE(&G.pipelines_mutex); { pipeline->next = G.first_free_pipeline; G.first_free_pipeline = pipeline; } P_Unlock(&lock); } /* ========================== * * Pipeline cache * ========================== */ internal struct pipeline_scope *pipeline_scope_begin(void) { __prof; struct pipeline_scope *scope = 0; { P_Lock lock = P_LockE(&G.pipelines_mutex); if (G.first_free_pipeline_scope) { scope = G.first_free_pipeline_scope; G.first_free_pipeline_scope = scope->next_free; } P_Unlock(&lock); } Arena *arena = 0; if (scope) { arena = scope->arena; } else { arena = AllocArena(Mebi(64)); } ResetArena(arena); scope = PushStruct(arena, struct pipeline_scope); scope->arena = arena; scope->refs = InitDict(scope->arena, 64); return scope; } internal void pipeline_scope_end(struct pipeline_scope *scope) { __prof; P_Lock lock = P_LockE(&G.pipelines_mutex); { for (DictEntry *entry = scope->refs->first; entry; entry = entry->next) { struct pipeline *pipeline = (struct pipeline *)entry->value; if (--pipeline->refcount <= 0) { fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE); } } scope->next_free = G.first_free_pipeline_scope; G.first_free_pipeline_scope = scope; } P_Unlock(&lock); } internal Readonly struct pipeline g_nil_pipeline = ZI; internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name) { __prof; struct pipeline *result = &g_nil_pipeline; u64 hash = HashFnv64(Fnv64Basis, name); struct pipeline *tmp = (struct pipeline *)DictValueFromHash(scope->refs, hash); if (tmp) { result = tmp; } else { { P_Lock lock = P_LockE(&G.pipelines_mutex); tmp = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash); if (tmp) { ++tmp->refcount; } P_Unlock(&lock); } if (tmp) { SetDictValue(scope->arena, scope->refs, hash, (u64)tmp); result = tmp; } } return result; } internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines) { __prof; P_Lock lock = P_LockE(&G.pipelines_mutex); { for (u64 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; u64 hash = pipeline->hash; /* Insert into top dict */ { struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_pipelines, hash); if (old_pipeline && --old_pipeline->refcount <= 0) { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } SetDictValue(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } /* Insert into success dict */ if (pipeline->success) { struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash); if (old_pipeline && --old_pipeline->refcount <= 0) { fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE); } SetDictValue(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline); ++pipeline->refcount; } } } P_Unlock(&lock); } #if RESOURCE_RELOADING internal WATCH_CALLBACK_FUNC_DEF(pipeline_watch_callback, name) { __prof; TempArena scratch = BeginScratchNoConflict(); String rst_extension = Lit(".rst"); String knl_extension = Lit(".knl"); b32 is_src = StringStartsWith(name, Lit("src/")); b32 is_rs = is_src && StringEndsWith(name, rst_extension); b32 is_cs = is_src && !is_rs && StringEndsWith(name, knl_extension); b32 success = 0; /* Recompile shaders */ String pipeline_name = ZI; String friendly_name = ZI; i32 num_shaders = 0; struct shader_compile_desc *shader_descs = 0; struct shader_compile_result *shader_results = 0; if (is_rs || is_cs) { P_LogDebugF("Change detected in shader source file \"%F\", recompiling...", FmtString(name)); success = 1; P_File file = P_OpenFileReadWait(name); String data = P_ReadFile(scratch.arena, file); { friendly_name = name; StringArray split = SplitString(scratch.arena, friendly_name, Lit("src/")); friendly_name = split.count > 0 ? CatString(scratch.arena, Lit("src/"), split.strings[split.count - 1]) : friendly_name; } { pipeline_name = name; StringArray split = SplitString(scratch.arena, pipeline_name, Lit("/")); pipeline_name = split.count > 0 ? split.strings[split.count - 1] : pipeline_name; split = SplitString(scratch.arena, pipeline_name, Lit(".")); pipeline_name = split.count > 1 ? split.strings[split.count - 2] : pipeline_name; } { struct shader_compile_job_sig sig = ZI; sig.arena = scratch.arena; if (is_rs) { num_shaders = 2; shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders); shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders); sig.descs = shader_descs; sig.results = shader_results; sig.descs[0].src = data; sig.descs[0].friendly_name = friendly_name; sig.descs[0].entry = Lit("vs"); sig.descs[0].target = Lit("vs_6_6"); sig.descs[1].src = data; sig.descs[1].friendly_name = friendly_name; sig.descs[1].entry = Lit("ps"); sig.descs[1].target = Lit("ps_6_6"); } else if (is_cs) { num_shaders = 1; shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders); shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders); sig.descs = shader_descs; sig.results = shader_results; sig.descs[0].src = data; sig.descs[0].friendly_name = friendly_name; sig.descs[0].entry = Lit("cs"); sig.descs[0].target = Lit("cs_6_6"); } { P_Counter counter = ZI; P_Run(num_shaders, shader_compile_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } } P_CloseFIle(file); } for (i32 i = 0; i < num_shaders; ++i) { struct shader_compile_desc *desc = &shader_descs[i]; struct shader_compile_result *result = &shader_results[i]; if (result->success) { P_LogSuccessF("Finished compiling shader \"%F:%F\" in %F seconds", FmtString(desc->friendly_name), FmtString(desc->entry), FmtFloat(SecondsFromNs(result->elapsed_ns))); if (result->errors.len > 0) { String msg = result->errors; P_LogWarning(msg); } } else { String msg = result->errors; P_LogError(msg); success = 0; } } if (success) { /* Create pipeline descs */ u32 num_pipelines = 0; struct pipeline_desc *pipeline_descs = PushDry(scratch.arena, struct pipeline_desc); for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) { struct pipeline_desc *pipeline_desc = (struct pipeline_desc *)entry->value; struct pipeline_desc new_pipeline_desc = *pipeline_desc; if (EqString(pipeline_desc->name, pipeline_name)) { if (is_rs) { new_pipeline_desc.vs_dxc = shader_results[0].dxc; new_pipeline_desc.ps_dxc = shader_results[1].dxc; } else if (is_cs) { new_pipeline_desc.cs_dxc = shader_results[0].dxc; } *PushStructNoZero(scratch.arena, struct pipeline_desc) = new_pipeline_desc; ++num_pipelines; } } /* Recompile dirty pipelines */ if (num_pipelines > 0) { __profn("Compile dirty pipelines"); struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines); { struct pipeline_alloc_job_sig sig = ZI; sig.descs_in = pipeline_descs; sig.pipelines_out = pipelines; P_Counter counter = ZI; P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } { P_Lock lock = P_LockS(&G.pipelines_mutex); for (u32 i = 0; i < num_pipelines; ++i) { struct pipeline *pipeline = pipelines[i]; if (pipeline->success) { P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns))); if (pipeline->error.len > 0) { String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error)); P_LogWarning(msg); } } else { { String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(error)); P_LogError(msg); } struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, pipeline->hash); if (!old_pipeline) { /* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */ String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error"); String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error)); P_MessageBox(P_MessageBoxKind_Warning, msg); } } } P_Unlock(&lock); } pipeline_register(num_pipelines, pipelines); } } EndScratch(scratch); } #endif /* ========================== * * Descriptor * ========================== */ internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh) { __prof; struct descriptor *d = 0; u32 index = 0; D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; { P_Lock lock = P_LockE(&dh->mutex); if (dh->first_free_descriptor) { d = dh->first_free_descriptor; dh->first_free_descriptor = d->next_free; handle = d->handle; index = d->index; } else { if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) { P_Panic(Lit("Max descriptors reached in heap")); } d = PushStructNoZero(dh->arena, struct descriptor); index = dh->num_descriptors_reserved++; handle.ptr = dh->handle.ptr + (index * dh->descriptor_size); } P_Unlock(&lock); } ZeroStruct(d); d->heap = dh; d->handle = handle; d->index = index; return d; } internal void descriptor_release(struct descriptor *descriptor) { struct cpu_descriptor_heap *dh = descriptor->heap; P_Lock lock = P_LockE(&dh->mutex); { descriptor->next_free = dh->first_free_descriptor; dh->first_free_descriptor = descriptor; } P_Unlock(&lock); } /* ========================== * * CPU descriptor heap * ========================== */ internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type) { __prof; struct cpu_descriptor_heap *dh = 0; { Arena *arena = AllocArena(Mebi(64)); dh = PushStruct(arena, struct cpu_descriptor_heap); dh->arena = arena; } u32 num_descriptors = 0; u32 descriptor_size = 0; if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) { num_descriptors = G.desc_counts[type]; descriptor_size = G.desc_sizes[type]; } if (num_descriptors == 0 || descriptor_size == 0) { P_Panic(Lit("Unsupported CPU descriptor type")); } dh->num_descriptors_capacity = num_descriptors; dh->descriptor_size = descriptor_size; D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = type; desc.NumDescriptors = num_descriptors; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap); if (FAILED(hr)) { P_Panic(Lit("Failed to create CPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle); return dh; } #if 0 internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh) { /* TODO */ (UNUSED)dh; } #endif /* ========================== * * Fenced release * ========================== */ internal void fenced_release(void *data, enum fenced_release_kind kind) { struct fenced_release_data fr = ZI; fr.kind = kind; fr.ptr = data; u64 fr_targets[countof(G.fenced_release_targets)] = ZI; /* Read current fence target values from command queues */ for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; P_Lock lock = P_LockS(&cq->submit_fence_mutex); { fr_targets[i] = cq->submit_fence_target; } P_Unlock(&lock); } /* PushStruct data to release queue */ { P_Lock lock = P_LockE(&G.fenced_releases_mutex); { *PushStruct(G.fenced_releases_arena, struct fenced_release_data) = fr; CopyBytes(G.fenced_release_targets, fr_targets, sizeof(fr_targets)); } P_Unlock(&lock); } /* Wake evictor */ { P_Lock lock = P_LockE(&G.evictor_wake_mutex); { ++G.evictor_wake_gen; P_SignalCv(&G.evictor_wake_cv, I32Max); } P_Unlock(&lock); } } /* ========================== * * Resource * ========================== */ internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state) { __prof; struct dx12_resource *r = 0; { P_Lock lock = P_LockE(&G.resources_mutex); if (G.first_free_resource) { r = G.first_free_resource; G.first_free_resource = r->next_free; } else { r = PushStructNoZero(G.resources_arena, struct dx12_resource); } P_Unlock(&lock); } ZeroStruct(r); D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } }; D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource); if (FAILED(hr)) { /* TODO: Don't panic */ P_Panic(Lit("Failed to create resource")); } r->state = initial_state; if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource); } return r; } internal void dx12_resource_release_now(struct dx12_resource *t) { __prof; /* Release descriptors */ /* TODO: Batch lock heaps */ if (t->cbv_descriptor) { descriptor_release(t->cbv_descriptor); } if (t->srv_descriptor) { descriptor_release(t->srv_descriptor); } if (t->uav_descriptor) { descriptor_release(t->uav_descriptor); } if (t->rtv_descriptor) { descriptor_release(t->rtv_descriptor); } /* Release resource */ ID3D12Resource_Release(t->resource); /* Add to free list */ P_Lock lock = P_LockE(&G.resources_mutex); t->next_free = G.first_free_resource; G.first_free_resource = t; P_Unlock(&lock); } void gp_resource_release(G_Resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; fenced_release(r, FENCED_RELEASE_KIND_RESOURCE); } /* ========================== * * Resource barrier * ========================== */ struct dx12_resource_barrier_desc { enum D3D12_RESOURCE_BARRIER_TYPE type; struct dx12_resource *resource; enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */ }; internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs) { __prof; TempArena scratch = BeginScratchNoConflict(); i32 num_rbs = 0; struct D3D12_RESOURCE_BARRIER *rbs = PushStructsNoZero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs); for (i32 i = 0; i < num_descs; ++i) { struct dx12_resource_barrier_desc *desc = &descs[i]; struct dx12_resource *resource = desc->resource; enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type; if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) { enum D3D12_RESOURCE_STATES old_state = resource->state; enum D3D12_RESOURCE_STATES new_state = desc->new_state; if (new_state != old_state) { struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++]; ZeroStruct(rb); rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Flags = 0; rb->Transition.pResource = resource->resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = old_state; rb->Transition.StateAfter = new_state; resource->state = new_state; } } else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++]; ZeroStruct(rb); rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; rb->Flags = 0; rb->UAV.pResource = resource->resource; } else { /* Unknown barrier type */ Assert(0); } } if (num_rbs > 0) { ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs); } EndScratch(scratch); } /* ========================== * * Command queue * ========================== */ internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq); internal P_JobDef(command_queue_alloc_job, job) { __prof; struct command_queue_alloc_job_sig *sig = job.sig; struct command_queue_desc *desc = &sig->descs_in[job.id]; { struct command_queue *cq = 0; { Arena *arena = AllocArena(Gibi(64)); cq = PushStruct(arena, struct command_queue); cq->arena = arena; } cq->desc = *desc; D3D12_COMMAND_QUEUE_DESC dx12_desc = ZI; dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; dx12_desc.Type = desc->type; dx12_desc.Priority = desc->priority; HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq); if (FAILED(hr)) { P_Panic(Lit("Failed to create command queue")); } hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence); if (FAILED(hr)) { P_Panic(Lit("Failed to create command queue fence")); } cq->cl_pool = command_list_pool_alloc(cq); sig->cqs_out[job.id] = cq; } } internal void command_queue_release(struct command_queue *cq) { __prof; /* TODO */ (UNUSED)cq; //ID3D12CommandQueue_Release(cq->cq); } /* ========================== * * Command list * ========================== */ internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq) { struct command_list_pool *pool = 0; { Arena *arena = AllocArena(Gibi(64)); pool = PushStruct(arena, struct command_list_pool); pool->arena = arena; } pool->cq = cq; return pool; } internal struct command_list *command_list_open(struct command_list_pool *pool) { __prof; struct command_queue *cq = pool->cq; u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence); struct command_list *cl = 0; struct ID3D12GraphicsCommandList *old_cl = 0; struct ID3D12CommandAllocator *old_ca = 0; { P_Lock lock = P_LockE(&pool->mutex); /* Find first command list ready for reuse */ for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) { if (completed_fence_value >= tmp->submitted_fence_target) { cl = tmp; break; } } if (cl) { /* Remove from submitted list */ old_cl = cl->cl; old_ca = cl->ca; struct command_list *prev = cl->prev_submitted; struct command_list *next = cl->next_submitted; if (prev) { prev->next_submitted = next; } else { pool->first_submitted_command_list = next; } if (next) { next->prev_submitted = prev; } else { pool->last_submitted_command_list = prev; } } else { cl = PushStructNoZero(pool->arena, struct command_list); } P_Unlock(&lock); } ZeroStruct(cl); cl->cq = cq; cl->pool = pool; cl->global_record_lock = P_LockS(&G.global_command_list_record_mutex); HRESULT hr = 0; if (old_cl) { cl->cl = old_cl; cl->ca = old_ca; } else { hr = ID3D12Device_CreateCommandAllocator(G.device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); if (FAILED(hr)) { P_Panic(Lit("Failed to create command allocator")); } hr = ID3D12Device_CreateCommandList(G.device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); if (FAILED(hr)) { P_Panic(Lit("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { P_Panic(Lit("Failed to close command list during initialization")); } } /* Reset */ hr = ID3D12CommandAllocator_Reset(cl->ca); if (FAILED(hr)) { P_Panic(Lit("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); if (FAILED(hr)) { P_Panic(Lit("Failed to reset command list")); } return cl; } /* TODO: Allow multiple command list submissions */ internal u64 command_list_close(struct command_list *cl) { __prof; struct command_queue *cq = cl->cq; struct command_list_pool *pool = cl->pool; /* Close */ { __profn("Close DX12 command list"); HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { /* TODO: Don't panic */ P_Panic(Lit("Failed to close command list before execution")); } } /* Submit */ u64 submit_fence_target = 0; { __profn("Execute"); P_Lock submit_lock = P_LockS(&G.global_submit_mutex); P_Lock fence_lock = P_LockE(&cq->submit_fence_mutex); { submit_fence_target = ++cq->submit_fence_target; ID3D12CommandQueue_ExecuteCommandLists(cq->cq, 1, (ID3D12CommandList **)&cl->cl); ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, submit_fence_target); } P_Unlock(&fence_lock); P_Unlock(&submit_lock); } /* Add descriptor heaps to submitted list */ { P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex); for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) { cdh->submitted_cq = cq; cdh->submitted_fence_target = submit_fence_target; if (G.last_submitted_command_descriptor_heap) { G.last_submitted_command_descriptor_heap->next_submitted = cdh; } else { G.first_submitted_command_descriptor_heap = cdh; } G.last_submitted_command_descriptor_heap = cdh; } P_Unlock(&lock); } /* Add command buffers to submitted list */ { P_Lock lock = P_LockE(&G.command_buffers_mutex); for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) { struct command_buffer_group *group = cb->group; cb->submitted_cq = cq; cb->submitted_fence_target = submit_fence_target; if (group->last_submitted) { group->last_submitted->next_submitted = cb; } else { group->first_submitted = cb; } group->last_submitted = cb; } P_Unlock(&lock); } /* Add command list to pool submitted list */ P_Unlock(&cl->global_record_lock); cl->submitted_fence_target = submit_fence_target; { P_Lock lock = P_LockE(&pool->mutex); if (pool->last_submitted_command_list) { pool->last_submitted_command_list->next_submitted = cl; } else { pool->first_submitted_command_list = cl; } pool->last_submitted_command_list = cl; P_Unlock(&lock); } return submit_fence_target; } /* ========================== * * Command descriptor heap (GPU / shader visible descriptor heap) * ========================== */ internal struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu) { __prof; Assert(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */ /* Allocate GPU heap */ struct command_descriptor_heap *cdh = 0; ID3D12DescriptorHeap *old_heap = 0; D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI; D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI; { P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex); /* Find first heap ready for reuse */ for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); if (completed_fence_value >= tmp->submitted_fence_target) { cdh = tmp; break; } } if (cdh) { /* Remove from submitted list */ old_heap = cdh->heap; old_start_cpu_handle = cdh->start_cpu_handle; old_start_gpu_handle = cdh->start_gpu_handle; struct command_descriptor_heap *prev = cdh->prev_submitted; struct command_descriptor_heap *next = cdh->next_submitted; if (prev) { prev->next_submitted = next; } else { G.first_submitted_command_descriptor_heap = next; } if (next) { next->prev_submitted = prev; } else { G.last_submitted_command_descriptor_heap = prev; } } else { /* No available heap available for reuse, allocate new */ cdh = PushStructNoZero(G.command_descriptor_heaps_arena, struct command_descriptor_heap); } P_Unlock(&lock); } ZeroStruct(cdh); if (old_heap) { cdh->heap = old_heap; cdh->start_cpu_handle = old_start_cpu_handle; cdh->start_gpu_handle = old_start_gpu_handle; } else { D3D12_DESCRIPTOR_HEAP_DESC desc = ZI; desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS; desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap); if (FAILED(hr)) { P_Panic(Lit("Failed to create GPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle); ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_gpu_handle); } /* CopyCPU heap */ { P_Lock lock = P_LockS(&dh_cpu->mutex); ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); P_Unlock(&lock); } /* Insert into command list */ cdh->next_in_command_list = cl->first_command_descriptor_heap; cl->first_command_descriptor_heap = cdh; return cdh; } /* ========================== * * Command buffer * ========================== */ internal u64 command_buffer_hash_from_size(u64 size) { u64 hash = RandU64FromSeed(size); return hash; } internal u64 align_up_pow2(u64 v) { u64 result = 0; if (v > 0) { result = v - 1; result |= result >> 1; result |= result >> 2; result |= result >> 4; result |= result >> 8; result |= result >> 16; result |= result >> 32; ++result; } return result; } #define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1) internal struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride) { __prof; /* Data length should be a multiple of stride */ Assert(data_len % data_stride == 0); /* Determine size */ u64 size = MaxU64(DX12_COMMAND_BUFFER_MIN_SIZE, align_up_pow2(data_len)); /* Allocate buffer */ struct command_buffer_group *cb_group = 0; struct command_buffer *cb = 0; struct dx12_resource *r = 0; { P_Lock lock = P_LockE(&G.command_buffers_mutex); { u64 group_hash = command_buffer_hash_from_size(size); DictEntry *cb_group_entry = EnsureDictEntry(G.command_buffers_arena, G.command_buffers_dict, group_hash); cb_group = (struct command_buffer_group *)cb_group_entry->value; if (!cb_group) { /* Create group */ cb_group = PushStruct(G.command_buffers_arena, struct command_buffer_group); cb_group_entry->value = (u64)cb_group; } } /* Find first command buffer ready for reuse */ for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) { /* TODO: Cache completed fence values */ u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence); if (completed_fence_value >= tmp->submitted_fence_target) { cb = tmp; break; } } if (cb) { /* Remove from submitted list */ r = cb->resource; struct command_buffer *prev = cb->prev_submitted; struct command_buffer *next = cb->next_submitted; if (prev) { prev->next_submitted = next; } else { cb_group->first_submitted = next; } if (next) { next->prev_submitted = prev; } else { cb_group->last_submitted = prev; } } else { /* Allocate new */ cb = PushStructNoZero(G.command_buffers_arena, struct command_buffer); } P_Unlock(&lock); } ZeroStruct(cb); cb->group = cb_group; cb->size = data_len; /* Create upload heap */ if (!r) { D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Flags = 0; desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; desc.Format = DXGI_FORMAT_UNKNOWN; desc.Alignment = 0; desc.Width = size; desc.Height = 1; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); } cb->resource = r; { struct D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI; desc.Format = DXGI_FORMAT_UNKNOWN; desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; desc.Buffer.FirstElement = 0; desc.Buffer.NumElements = MaxU32(data_len / data_stride, 1); desc.Buffer.StructureByteStride = data_stride; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; ID3D12Device_CreateShaderResourceView(G.device, r->resource, &desc, r->srv_descriptor->handle); } /* Write data to resource */ { D3D12_RANGE read_range = ZI; void *dst = 0; HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst); if (FAILED(hr) || !dst) { /* TODO: Don't panic */ P_Panic(Lit("Failed to map command buffer resource")); } CopyBytes(dst, data, data_len); ID3D12Resource_Unmap(cb->resource->resource, 0, 0); } /* Insert into command list */ cb->next_in_command_list = cl->first_command_buffer; cl->first_command_buffer = cb; return cb; } /* ========================== * * Wait job * ========================== */ struct dx12_wait_fence_job_sig { ID3D12Fence *fence; u64 target; }; internal P_JobDef(dx12_wait_fence_job, job) { __prof; struct dx12_wait_fence_job_sig *sig = job.sig; ID3D12Fence *fence = sig->fence; u64 target = sig->target; if (ID3D12Fence_GetCompletedValue(fence) < target) { /* TODO: Pool events */ HANDLE event = CreateEvent(0, 0, 0, 0); ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event); WaitForSingleObject(event, INFINITE); CloseHandle(event); } } /* ========================== * * Texture * ========================== */ G_Resource *gp_texture_alloc(G_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data) { __prof; if (size.x <= 0 || size.y <= 0) { P_Panic(Lit("Tried to create texture with dimension <= 0")); } LocalPersist const DXGI_FORMAT formats[] = { [GP_TEXTURE_FORMAT_R8_UNORM] = DXGI_FORMAT_R8_UNORM, [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM, [GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, [GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = DXGI_FORMAT_R16G16B16A16_FLOAT }; DXGI_FORMAT dxgi_format = ZI; if (format >= 0 && format < (i32)countof(formats)) { dxgi_format = formats[format]; } if (format == 0) { P_Panic(Lit("Tried to create texture with unknown format")); } D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.Format = dxgi_format; desc.Alignment = 0; desc.Width = size.x; desc.Height = size.y; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); r->texture_size = size; r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); if (flags & GP_TEXTURE_FLAG_TARGETABLE) { desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); r->rtv_descriptor = descriptor_alloc(G.rtv_heap); ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle); ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle); } /* Upload texture */ if (initial_data) { /* TODO: Make wait optional */ P_Counter counter = ZI; struct dx12_upload_job_sig sig = ZI; sig.resource = r; sig.data = initial_data; P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter); P_WaitOnCounter(&counter); } return (G_Resource *)r; } Vec2I32 gp_texture_get_size(G_Resource *resource) { struct dx12_resource *r = (struct dx12_resource *)resource; return r->texture_size; } /* ========================== * * Upload * ========================== */ internal P_JobDef(dx12_upload_job, job) { struct dx12_upload_job_sig *sig = job.sig; struct dx12_resource *r = sig->resource; void *data = sig->data; Assert(r->state == D3D12_RESOURCE_STATE_COPY_DEST); D3D12_RESOURCE_DESC desc = ZI; ID3D12Resource_GetDesc(r->resource, &desc); { u64 upload_size = 0; u64 upload_row_size = 0; u32 upload_num_rows = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; /* Create upload heap */ struct dx12_resource *upload = 0; { D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC upload_desc = ZI; upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; upload_desc.Format = DXGI_FORMAT_UNKNOWN; upload_desc.Alignment = 0; upload_desc.Width = upload_size; upload_desc.Height = 1; upload_desc.DepthOrArraySize = 1; upload_desc.MipLevels = 1; upload_desc.SampleDesc.Count = 1; upload_desc.SampleDesc.Quality = 0; D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ; upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state); } struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND]; struct command_list *cl = command_list_open(cq->cl_pool); { /* Copyto upload heap */ { D3D12_RANGE read_range = ZI; void *mapped = 0; HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped); if (FAILED(hr) || !mapped) { /* TODO: Don't panic */ P_Panic(Lit("Failed to map texture upload resource")); } u8 *dst = (u8 *)mapped + placed_footprint.Offset; u8 *src = data; u32 z_size = upload_row_size * upload_num_rows; for (u32 z = 0; z < desc.DepthOrArraySize; ++z) { u32 z_offset = z * z_size; for (u32 y = 0; y < upload_num_rows; ++y) { CopyBytes(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size); } } ID3D12Resource_Unmap(upload->resource, 0, 0); } /* Copyfrom upload heap to texture */ { __profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", Rgb32F(0.2, 0.5, 0.2)); D3D12_TEXTURE_COPY_LOCATION dst_loc = { .pResource = r->resource, .Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX, .SubresourceIndex = 0, }; D3D12_TEXTURE_COPY_LOCATION src_loc = { .pResource = upload->resource, .Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT, .PlacedFootprint = placed_footprint, }; ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0); } } u64 fence_target = command_list_close(cl); /* Wait on fence so we know it's safe to release upload heap */ if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) { struct dx12_wait_fence_job_sig wait_sig = ZI; wait_sig.fence = cq->submit_fence; wait_sig.target = fence_target; P_Counter counter = ZI; P_Run(1, dx12_wait_fence_job, &wait_sig, P_Pool_Floating, P_Priority_Low, &counter); P_WaitOnCounter(&counter); } /* Release upload heap now */ dx12_resource_release_now(upload); } } /* ========================== * * Run utils * ========================== */ internal void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline) { ID3D12GraphicsCommandList_SetPipelineState(cl->cl, pipeline->pso); if (pipeline->is_gfx) { ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, pipeline->rootsig); } else { ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, pipeline->rootsig); } cl->cur_pipeline = pipeline; } internal void command_list_set_sig(struct command_list *cl, void *src, u32 size) { __prof; Assert(size % 16 == 0); /* Root constant structs must pad to 16 bytes */ Assert(size <= 256); /* Only 64 32-bit root constants allowed in signature */ u32 num32bit = size / 4; b32 is_gfx = cl->cur_pipeline->is_gfx; for (u32 i = 0; i < num32bit; ++i) { u32 val = 0; CopyBytes(&val, (((u32 *)src) + i), 4); if (is_gfx) { ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i); } else { ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i); } } } internal struct D3D12_VIEWPORT viewport_from_rect(Rect r) { struct D3D12_VIEWPORT viewport = ZI; viewport.TopLeftX = r.x; viewport.TopLeftY = r.y; viewport.Width = r.width; viewport.Height = r.height; viewport.MinDepth = 0.0f; viewport.MaxDepth = 1.0f; return viewport; } internal D3D12_RECT scissor_from_rect(Rect r) { D3D12_RECT scissor = ZI; scissor.left = r.x; scissor.top = r.y; scissor.right = r.x + r.width; scissor.bottom = r.y + r.height; return scissor; } internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size) { D3D12_VERTEX_BUFFER_VIEW vbv = ZI; vbv.BufferLocation = cb->resource->gpu_address; vbv.SizeInBytes = cb->size; vbv.StrideInBytes = vertex_size; return vbv; } internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format) { D3D12_INDEX_BUFFER_VIEW ibv = ZI; ibv.BufferLocation = cb->resource->gpu_address; ibv.Format = format; ibv.SizeInBytes = cb->size; return ibv; } internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state) { __prof; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_RESOURCE_DESC desc = ZI; desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; desc.Format = format; desc.Alignment = 0; desc.Width = size.x; desc.Height = size.y; desc.DepthOrArraySize = 1; desc.MipLevels = 1; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state); r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap); r->rtv_descriptor = descriptor_alloc(G.rtv_heap); ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle); ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle); ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle); r->texture_size = size; return r; } /* Calculate the view projection matrix */ Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height) { Mat4x4 projection = Mat4x4FromOrtho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0); Mat4x4 view4x4 = Mat4x4FromXform(view); return MulMat4x4(projection, view4x4); } internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh) { struct D3D12_GPU_DESCRIPTOR_HANDLE result = ZI; result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * G.desc_sizes[descriptor->heap->type]; return result; } /* ========================== * * Render sig * ========================== */ struct render_sig { Arena *arena; RandState rand; u32 frame_index; /* Material instances */ u32 num_material_instance_descs; Arena *material_instance_descs_arena; /* Ui instances */ u32 num_ui_rect_instance_descs; Arena *ui_rect_instance_descs_arena; /* UI shapes */ Arena *ui_shape_verts_arena; Arena *ui_shape_indices_arena; /* Grids */ u32 num_material_grid_descs; Arena *material_grid_descs_arena; /* Resources */ struct dx12_resource *albedo; struct dx12_resource *emittance; struct dx12_resource *emittance_flood_read; struct dx12_resource *emittance_flood_target; struct dx12_resource *shade_read; struct dx12_resource *shade_target; struct dx12_resource *ui_target; }; struct material_instance_desc { Xform xf; u32 texture_id; ClipRect clip; u32 tint; b32 is_light; Vec3 light_emittance; u32 grid_id; }; struct ui_rect_instance_desc { Xform xf; u32 texture_id; ClipRect clip; u32 tint; }; struct material_grid_desc { f32 line_thickness; f32 line_spacing; Vec2 offset; u32 bg0_color; u32 bg1_color; u32 line_color; u32 x_color; u32 y_color; }; internal struct render_sig *render_sig_alloc(void) { __prof; struct render_sig *sig = 0; { Arena *arena = AllocArena(Mebi(64)); sig = PushStruct(arena, struct render_sig); sig->arena = arena; } sig->material_instance_descs_arena = AllocArena(Gibi(1)); sig->material_grid_descs_arena = AllocArena(Gibi(1)); sig->ui_rect_instance_descs_arena = AllocArena(Gibi(1)); sig->ui_shape_verts_arena = AllocArena(Gibi(1)); sig->ui_shape_indices_arena = AllocArena(Gibi(1)); return sig; } internal void render_sig_reset(struct render_sig *sig) { __prof; /* Reset material instances */ sig->num_material_instance_descs = 0; ResetArena(sig->material_instance_descs_arena); /* Reset UI rect instances */ sig->num_ui_rect_instance_descs = 0; ResetArena(sig->ui_rect_instance_descs_arena); /* Reset shapes */ ResetArena(sig->ui_shape_verts_arena); ResetArena(sig->ui_shape_indices_arena); /* Reset grids */ sig->num_material_grid_descs = 0; ResetArena(sig->material_grid_descs_arena); } G_RenderSig *gp_render_sig_alloc(void) { __prof; struct render_sig *sig = render_sig_alloc(); return (G_RenderSig *)sig; } u32 gp_push_render_cmd(G_RenderSig *render_sig, G_RenderCmdDesc *cmd_desc) { u32 ret = 0; struct render_sig *sig = (struct render_sig *)render_sig; if (sig) { switch (cmd_desc->kind) { default: break; case GP_RENDER_CMD_KIND_DRAW_MATERIAL: { struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->material.texture; struct material_instance_desc *instance_desc = PushStruct(sig->material_instance_descs_arena, struct material_instance_desc); instance_desc->xf = cmd_desc->material.xf; instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF; instance_desc->clip = cmd_desc->material.clip; instance_desc->tint = cmd_desc->material.tint; instance_desc->is_light = cmd_desc->material.is_light; instance_desc->light_emittance = cmd_desc->material.light_emittance; instance_desc->grid_id = cmd_desc->material.grid_cmd_id - 1; ret = ++sig->num_material_instance_descs; } break; case GP_RENDER_CMD_KIND_DRAW_UI_RECT: { struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->ui_rect.texture; struct ui_rect_instance_desc *instance_desc = PushStruct(sig->ui_rect_instance_descs_arena, struct ui_rect_instance_desc); instance_desc->xf = cmd_desc->ui_rect.xf; instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF; instance_desc->clip = cmd_desc->ui_rect.clip; instance_desc->tint = cmd_desc->ui_rect.tint; ret = ++sig->num_ui_rect_instance_descs; } break; case GP_RENDER_CMD_KIND_DRAW_UI_SHAPE: { u32 color = cmd_desc->ui_shape.color; struct k_shape_vert *verts = PushStructsNoZero(sig->ui_shape_verts_arena, struct k_shape_vert, cmd_desc->ui_shape.vertices.count); u32 *indices = PushStructsNoZero(sig->ui_shape_indices_arena, u32, cmd_desc->ui_shape.indices.count); for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i) { struct k_shape_vert *v = &verts[i]; v->pos = K_Float2FromV2(cmd_desc->ui_shape.vertices.points[i]); v->color_srgb = K_UintFromU32(color); } u32 vert_offset = verts - (struct k_shape_vert *)ArenaBase(sig->ui_shape_verts_arena); for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i) { indices[i] = cmd_desc->ui_shape.indices.indices[i] + vert_offset; } } break; case GP_RENDER_CMD_KIND_PUSH_GRID: { struct material_grid_desc *grid_desc = PushStruct(sig->material_grid_descs_arena, struct material_grid_desc); grid_desc->line_thickness = cmd_desc->grid.line_thickness; grid_desc->line_spacing = cmd_desc->grid.line_spacing; grid_desc->offset = cmd_desc->grid.offset; grid_desc->bg0_color = cmd_desc->grid.bg0_color; grid_desc->bg1_color = cmd_desc->grid.bg1_color; grid_desc->line_color = cmd_desc->grid.line_color; grid_desc->x_color = cmd_desc->grid.x_color; grid_desc->y_color = cmd_desc->grid.y_color; ret = ++sig->num_material_grid_descs; } break; } } return ret; } /* ========================== * * Render * ========================== */ G_Resource *gp_run_render(G_RenderSig *gp_render_sig, G_RenderParams params) { __prof; TempArena scratch = BeginScratchNoConflict(); struct render_sig *rsig = (struct render_sig *)gp_render_sig; ++rsig->frame_index; Vec2I32 ui_size = VEC2I32(MaxI32(params.ui_size.x, 1), MaxI32(params.ui_size.y, 1)); Vec2I32 render_size = VEC2I32(MaxI32(params.render_size.x, 1), MaxI32(params.render_size.y, 1)); Xform world_to_render_xf = params.world_to_render_xf; Xform render_to_ui_xf = params.render_to_ui_xf; Rect ui_viewport = RectFromVec2(VEC2(0, 0), VEC2(ui_size.x, ui_size.y)); Rect render_viewport = RectFromVec2(VEC2(0, 0), VEC2(render_size.x, render_size.y)); /* Allocate render buffers */ if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size)) { __profn("Release sig resources"); fenced_release(rsig->albedo, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->emittance, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->emittance_flood_read, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->emittance_flood_target, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->shade_read, FENCED_RELEASE_KIND_RESOURCE); fenced_release(rsig->shade_target, FENCED_RELEASE_KIND_RESOURCE); rsig->shade_target = 0; } if (!rsig->shade_target) { __profn("Allocate sig resources"); rsig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET); rsig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET); rsig->emittance_flood_read = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rsig->emittance_flood_target = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rsig->shade_read = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); rsig->shade_target = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } /* Allocate ui buffers */ if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size)) { fenced_release(rsig->ui_target, FENCED_RELEASE_KIND_RESOURCE); rsig->ui_target = 0; } if (!rsig->ui_target) { rsig->ui_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, ui_size, D3D12_RESOURCE_STATE_RENDER_TARGET); } struct pipeline_scope *pipeline_scope = pipeline_scope_begin(); struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_material")); struct pipeline *flood_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_flood")); struct pipeline *shade_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shade")); struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit")); struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_ui")); struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shape")); struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profn("Run render"); __profnc_dx12(cl->cq->prof, cl->cl, "Run render", Rgb32F(0.5, 0.2, 0.2)); Mat4x4 world_to_render_vp_matrix = calculate_vp(world_to_render_xf, render_viewport.width, render_viewport.height); Mat4x4 ui_vp_matrix = calculate_vp(XformIdentity, ui_viewport.width, ui_viewport.height); Mat4x4 blit_vp_matrix = ZI; { Xform xf = render_to_ui_xf; xf = ScaleXform(xf, VEC2(render_size.x, render_size.y)); xf = TranslateXform(xf, VEC2(0.5, 0.5)); blit_vp_matrix = calculate_vp(xf, ui_viewport.width, ui_viewport.height); } /* Upload dummmy vert & index buffer */ /* TODO: Make these static */ /* Dummy vertex buffer */ LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0); struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices); /* Process sig data into uploadable data */ struct k_material_instance *material_instances = PushStructsNoZero(scratch.arena, struct k_material_instance, rsig->num_material_instance_descs); struct k_ui_instance *ui_rect_instances = PushStructsNoZero(scratch.arena, struct k_ui_instance, rsig->num_ui_rect_instance_descs); struct k_material_grid *grids = PushStructsNoZero(scratch.arena, struct k_material_grid, rsig->num_material_grid_descs); { __profn("Process sig data"); /* Process material instances */ { __profn("Process material instances"); for (u32 i = 0; i < rsig->num_material_instance_descs; ++i) { struct material_instance_desc *desc = &((struct material_instance_desc *)ArenaBase(rsig->material_instance_descs_arena))[i]; struct k_material_instance *instance = &material_instances[i]; instance->tex_nurid = K_UintFromU32(desc->texture_id); instance->grid_id = K_UintFromU32(desc->grid_id); instance->xf = K_Float2x3FromXform(desc->xf); instance->uv0 = K_Float2FromV2(desc->clip.p0); instance->uv1 = K_Float2FromV2(desc->clip.p1); instance->tint_srgb = K_UintFromU32(desc->tint); instance->is_light = K_UintFromU32(desc->is_light); instance->light_emittance_srgb = K_Float3FromV3(desc->light_emittance); } } /* Process ui rect instances */ { __profn("Process ui rect instances"); for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i) { struct ui_rect_instance_desc *desc = &((struct ui_rect_instance_desc *)ArenaBase(rsig->ui_rect_instance_descs_arena))[i]; struct k_ui_instance *instance = &ui_rect_instances[i]; instance->tex_nurid = K_UintFromU32(desc->texture_id); instance->xf = K_Float2x3FromXform(desc->xf); instance->uv0 = K_Float2FromV2(desc->clip.p0); instance->uv1 = K_Float2FromV2(desc->clip.p1); instance->tint_srgb = K_UintFromU32(desc->tint); } } /* Process grids */ { __profn("Process grids"); for (u32 i = 0; i < rsig->num_material_grid_descs; ++i) { struct material_grid_desc *desc = &((struct material_grid_desc *)ArenaBase(rsig->material_grid_descs_arena))[i]; struct k_material_grid *grid = &grids[i]; grid->line_thickness = K_FloatFromF32(desc->line_thickness); grid->line_spacing = K_FloatFromF32(desc->line_spacing); grid->offset = K_Float2FromV2(desc->offset); grid->bg0_srgb = K_UintFromU32(desc->bg0_color); grid->bg1_srgb = K_UintFromU32(desc->bg1_color); grid->line_srgb = K_UintFromU32(desc->line_color); grid->x_srgb = K_UintFromU32(desc->x_color); grid->y_srgb = K_UintFromU32(desc->y_color); } } } /* Upload buffers */ u64 num_ui_shape_verts = rsig->ui_shape_verts_arena->pos / sizeof(struct k_shape_vert); u64 num_ui_shape_indices = rsig->ui_shape_indices_arena->pos / sizeof(u32); struct command_buffer *material_instance_buffer = command_list_push_buffer(cl, rsig->num_material_instance_descs, material_instances); struct command_buffer *ui_rect_instance_buffer = command_list_push_buffer(cl, rsig->num_ui_rect_instance_descs, ui_rect_instances); struct command_buffer *ui_shape_verts_buffer = command_list_push_buffer(cl, num_ui_shape_verts, (struct k_shape_vert *)ArenaBase(rsig->ui_shape_verts_arena)); struct command_buffer *ui_shape_indices_buffer = command_list_push_buffer(cl, num_ui_shape_indices, (u32 *)ArenaBase(rsig->ui_shape_indices_arena)); struct command_buffer *grid_buffer = command_list_push_buffer(cl, rsig->num_material_grid_descs, grids); /* Upload descriptor heap */ struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); /* Prep for material pass */ { /* Barrier */ { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET } }; D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = { rsig->albedo->rtv_descriptor->handle, rsig->emittance->rtv_descriptor->handle, }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0); } /* Clear */ { __profn("Clear gbuffers"); __profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", Rgb32F(0.5, 0.2, 0.2)); f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->albedo->rtv_descriptor->handle, clear_color, 0, 0); ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->emittance->rtv_descriptor->handle, clear_color, 0, 0); } } /* Material pass */ if (material_pipeline->success) { __profn("Material pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Material pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, material_pipeline); /* Set Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(render_viewport); D3D12_RECT scissor = scissor_from_rect(render_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Set sig */ struct k_material_sig sig = ZI; sig.projection = K_Float4x4FromMat4x4(world_to_render_vp_matrix); sig.instances_urid = K_UintFromU32(material_instance_buffer->resource->srv_descriptor->index); sig.grids_urid = K_UintFromU32(grid_buffer->resource->srv_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Draw */ u32 instance_count = material_instance_buffer->size / sizeof(struct k_material_instance); D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0); } /* Prep for flood pass */ { /* Barrier */ { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, { D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } } /* Flood pass */ if (flood_pipeline->success && !params.effects_disabled) { __profn("Flood pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, flood_pipeline); i32 step_length = -1; /* TODO: Remove this */ u64 max_steps = GetGstat(GSTAT_DEBUG_STEPS); u64 step = 0; while (step_length != 0 && step < max_steps) { __profn("Flood step"); __profnc_dx12(cl->cq->prof, cl->cl, "Flood step", Rgb32F(0.5, 0.2, 0.2)); /* UAV barrier */ { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } /* Set sig */ struct k_flood_sig sig = ZI; sig.step_len = K_IntFromI32(step_length); sig.emittance_tex_urid = K_UintFromU32(rsig->emittance->srv_descriptor->index); sig.read_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_read->uav_descriptor->index); sig.target_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_target->uav_descriptor->index); sig.tex_width = K_UintFromU32(render_size.x); sig.tex_height = K_UintFromU32(render_size.y); command_list_set_sig(cl, &sig, sizeof(sig)); /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1); /* Swap buffers */ struct dx12_resource *swp = rsig->emittance_flood_read; rsig->emittance_flood_read = rsig->emittance_flood_target; rsig->emittance_flood_target = swp; /* Update step */ if (step_length == -1) { step_length = MaxI32(render_size.x, render_size.y) / 2; } else { step_length /= 2; } ++step; } } /* Prep for shade pass */ { /* Barrier */ { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, { D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, { D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); } /* Clear */ { __profn("Clear shade target"); __profnc_dx12(cl->cq->prof, cl->cl, "Clear shade target", Rgb32F(0.5, 0.2, 0.2)); f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(rsig->shade_target->uav_descriptor, descriptor_heap), rsig->shade_target->uav_descriptor->handle, rsig->shade_target->resource, clear_color, 0, 0); } } /* Shade pass */ if (shade_pipeline->success) { __profn("Shade pass"); __profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, shade_pipeline); u32 shade_flags = K_SHADE_FLAG_NONE; if (params.effects_disabled) { shade_flags |= K_SHADE_FLAG_DISABLE_EFFECTS; } /* Set sig */ struct k_shade_sig sig = ZI; sig.flags = K_UintFromU32(shade_flags); sig.tex_width = K_UintFromU32(render_size.x); sig.tex_height = K_UintFromU32(render_size.y); sig.frame_seed = K_Uint4FromU32((u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF), (u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF), (u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF), (u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF)); sig.frame_index = K_UintFromU32(rsig->frame_index); sig.camera_offset = K_Float2FromV2(world_to_render_xf.og); sig.albedo_tex_urid = K_UintFromU32(rsig->albedo->srv_descriptor->index); sig.emittance_tex_urid = K_UintFromU32(rsig->emittance->srv_descriptor->index); sig.emittance_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_read->srv_descriptor->index); sig.read_tex_urid = K_UintFromU32(rsig->shade_read->uav_descriptor->index); sig.target_tex_urid = K_UintFromU32(rsig->shade_target->uav_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1); /* Swap */ struct dx12_resource *swp = rsig->shade_read; rsig->shade_read = rsig->shade_target; rsig->shade_target = swp; } /* Prep for UI pass */ { /* Barrier */ { struct dx12_resource_barrier_desc barriers[] = { { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }, { D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 }, { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->ui_target, D3D12_RESOURCE_STATE_RENDER_TARGET } }; dx12_resource_barriers(cl->cl, countof(barriers), barriers); ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &rsig->ui_target->rtv_descriptor->handle, 0, 0); } /* Clear */ { __profn("Clear ui target"); __profnc_dx12(cl->cq->prof, cl->cl, "Clear ui target", Rgb32F(0.5, 0.2, 0.2)); f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->ui_target->rtv_descriptor->handle, clear_color, 0, 0); } } /* UI blit pass */ if (blit_pipeline->success) { __profn("UI blit pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, blit_pipeline); /* Set Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport); D3D12_RECT scissor = scissor_from_rect(ui_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Set sig */ struct k_blit_sig sig = ZI; sig.projection = K_Float4x4FromMat4x4(blit_vp_matrix); sig.flags = K_UintFromU32(K_BLIT_FLAG_TONE_MAP | K_BLIT_FLAG_GAMMA_CORRECT); sig.exposure = K_FloatFromF32(2.0); sig.gamma = K_FloatFromF32((f32)2.2); sig.tex_urid = K_UintFromU32(rsig->shade_read->uav_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Draw */ D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0); } /* UI rect pass */ if (ui_pipeline->success) { __profn("UI rect pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI rect pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, ui_pipeline); /* Set Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport); D3D12_RECT scissor = scissor_from_rect(ui_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Set sig */ struct k_ui_sig sig = ZI; sig.projection = K_Float4x4FromMat4x4(ui_vp_matrix); sig.instances_urid = K_UintFromU32(ui_rect_instance_buffer->resource->srv_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Draw */ u32 instance_count = ui_rect_instance_buffer->size / sizeof(struct k_ui_instance); D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0); } /* UI shape pass */ if (shape_pipeline->success) { __profn("UI shape pass"); __profnc_dx12(cl->cq->prof, cl->cl, "UI shape pass", Rgb32F(0.5, 0.2, 0.2)); /* Bind pipeline */ command_list_set_pipeline(cl, shape_pipeline); /* Set Rasterizer State */ D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport); D3D12_RECT scissor = scissor_from_rect(ui_viewport); ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Set sig */ struct k_shape_sig sig = ZI; sig.projection = K_Float4x4FromMat4x4(ui_vp_matrix); sig.verts_urid = K_UintFromU32(ui_shape_verts_buffer->resource->srv_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Draw */ u32 index_count = ui_shape_indices_buffer->size / sizeof(u32); D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(ui_shape_indices_buffer, DXGI_FORMAT_R32_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, index_count, 1, 0, 0, 0); } } command_list_close(cl); pipeline_scope_end(pipeline_scope); render_sig_reset(rsig); EndScratch(scratch); return (G_Resource *)rsig->ui_target; } /* ========================== * * Memory info * ========================== */ G_MemoryInfo gp_query_memory_info(void) { G_MemoryInfo result = ZI; HRESULT hr = 0; IDXGIAdapter3 *dxgiAdapter3 = 0; if (SUCCEEDED(hr)) { hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3); } if (SUCCEEDED(hr)) { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); result.local_used = info.CurrentUsage; result.local_budget = info.Budget; } if (SUCCEEDED(hr)) { struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); result.non_local_used = info.CurrentUsage; result.non_local_budget = info.Budget; } if (dxgiAdapter3) { IDXGIAdapter_Release(dxgiAdapter3); } return result; } /* ========================== * * Swapchain * ========================== */ internal void swapchain_init_resources(struct swapchain *swapchain) { for (u32 i = 0; i < countof(swapchain->buffers); ++i) { ID3D12Resource *resource = 0; HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); if (FAILED(hr)) { /* TODO: Don't panic */ P_Panic(Lit("Failed to get swapchain buffer")); } struct swapchain_buffer *sb = &swapchain->buffers[i]; ZeroStruct(sb); sb->swapchain = swapchain; sb->resource = resource; sb->rtv_descriptor = descriptor_alloc(G.rtv_heap); sb->state = D3D12_RESOURCE_STATE_COMMON; ID3D12Device_CreateRenderTargetView(G.device, sb->resource, 0, sb->rtv_descriptor->handle); } } G_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution) { HRESULT hr = 0; HWND hwnd = (HWND)P_GetInternalWindowHandle(window); struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct swapchain *swapchain = 0; { P_Lock lock = P_LockE(&G.swapchains_mutex); if (G.first_free_swapchain) { swapchain = G.first_free_swapchain; G.first_free_swapchain = swapchain->next_free; } else { swapchain = PushStruct(G.swapchains_arena, struct swapchain); } P_Unlock(&lock); } /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = 0; { DXGI_SWAP_CHAIN_DESC1 desc = ZI; desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; desc.Width = resolution.x; desc.Height = resolution.y; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT; desc.Scaling = DXGI_SCALING_NONE; desc.Flags = DX12_SWAPCHAIN_FLAGS; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1); if (FAILED(hr)) { P_Panic(Lit("Failed to create IDXGISwapChain1")); } } /* Upgrade to swapchain3 */ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain); if (FAILED(hr)) { P_Panic(Lit("Failed to create IDXGISwapChain3")); } /* Create waitable object */ #if DX12_WAIT_FRAME_LATENCY > 0 IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, DX12_WAIT_FRAME_LATENCY); swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain); Assert(swapchain->waitable); #endif /* Disable Alt+Enter changing monitor resolution to match window size */ IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER); IDXGISwapChain1_Release(swapchain1); swapchain->hwnd = hwnd; swapchain_init_resources(swapchain); return (G_Swapchain *)swapchain; } void gp_swapchain_release(G_Swapchain *gp_swapchain) { /* TODO */ (UNUSED)gp_swapchain; } void gp_swapchain_wait(G_Swapchain *gp_swapchain) { #if DX12_WAIT_FRAME_LATENCY > 0 struct swapchain *swapchain = (struct swapchain *)gp_swapchain; if (swapchain->waitable) { WaitForSingleObjectEx(swapchain->waitable, 1000, 1); } #else (UNUSED)gp_swapchain; #endif } internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution) { __prof; resolution.x = MaxI32(resolution.x, 1); resolution.y = MaxI32(resolution.y, 1); b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution); if (should_rebuild) { HRESULT hr = 0; struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; /* Lock direct queue submissions (in case any write to backbuffer) */ /* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */ P_Lock lock = P_LockE(&cq->submit_fence_mutex); //DEBUGBREAKABLE; //P_Lock lock = P_LockE(&G.global_command_list_record_mutex); { /* Flush direct queue */ //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target); { HANDLE event = CreateEvent(0, 0, 0, 0); ID3D12Fence_SetEventOnCompletion(cq->submit_fence, cq->submit_fence_target, event); WaitForSingleObject(event, INFINITE); CloseHandle(event); } /* Release buffers */ for (u32 i = 0; i < countof(swapchain->buffers); ++i) { struct swapchain_buffer *sb = &swapchain->buffers[i]; descriptor_release(sb->rtv_descriptor); ID3D12Resource_Release(sb->resource); } /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS); if (FAILED(hr)) { /* TODO: Don't panic */ P_Panic(Lit("Failed to resize swapchain")); } } P_Unlock(&lock); swapchain_init_resources(swapchain); swapchain->resolution = resolution; } u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain); return &swapchain->buffers[backbuffer_index]; } /* ========================== * * Present * ========================== */ internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf) { __prof; struct pipeline_scope *pipeline_scope = pipeline_scope_begin(); struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit")); if (blit_pipeline->success) { struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT]; struct command_list *cl = command_list_open(cq->cl_pool); { __profn("Present blit"); __profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2)); struct swapchain *swapchain = dst->swapchain; /* Upload dummmy vert & index buffer */ /* TODO: Make these static */ /* Dummy vertex buffer */ LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0); struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices); /* Upload descriptor heap */ struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap); ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps); Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y)); D3D12_VIEWPORT viewport = viewport_from_rect(viewport_rect); D3D12_RECT scissor = scissor_from_rect(viewport_rect); Mat4x4 vp_matrix = ZI; { Xform xf = src_xf; xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y)); xf = TranslateXform(xf, VEC2(0.5, 0.5)); vp_matrix = calculate_vp(xf, viewport.Width, viewport.Height); } /* Transition dst to render target */ { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = dst->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = dst->state; rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); dst->state = rtb.StateAfter; } ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0); /* Clear */ f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f }; ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0); /* Bind pipeline */ command_list_set_pipeline(cl, blit_pipeline); /* Set Rasterizer State */ ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport); ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor); /* Set sig */ struct k_blit_sig sig = ZI; sig.projection = K_Float4x4FromMat4x4(vp_matrix); sig.flags = K_UintFromU32(K_BLIT_FLAG_NONE); sig.tex_urid = K_UintFromU32(src->srv_descriptor->index); command_list_set_sig(cl, &sig, sizeof(sig)); /* Draw */ D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0); D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT); ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv); ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv); ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0); /* Transition dst to presentable */ { struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI; rtb.pResource = dst->resource; rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rtb.StateBefore = dst->state; rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT; struct D3D12_RESOURCE_BARRIER rb = ZI; rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb.Flags = 0; rb.Transition = rtb; ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb); dst->state = rtb.StateAfter; } } command_list_close(cl); } pipeline_scope_end(pipeline_scope); } void gp_present(G_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, G_Resource *texture, Xform texture_xf, i32 vsync) { __prof; struct swapchain *swapchain = (struct swapchain *)gp_swapchain; struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, backbuffer_resolution); struct dx12_resource *texture_resource = (struct dx12_resource *)texture; /* Blit */ present_blit(swapchain_buffer, texture_resource, texture_xf); u32 present_flags = 0; if (vsync == 0) { present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING); } /* Present */ { __profn("Present"); HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); if (!SUCCEEDED(hr)) { Assert(0); } } #if ProfilingIsEnabled_GPU { __profframe(0); __profn("Mark queue frames"); /* Lock because frame marks shouldn't occur while command lists are recording */ P_Lock lock = P_LockE(&G.global_command_list_record_mutex); for (u32 i = 0; i < countof(G.command_queues); ++i) { { struct command_queue *cq = G.command_queues[i]; __prof_dx12_new_frame(cq->prof); } } P_Unlock(&lock); } { __profn("Collect queues"); for (u32 i = 0; i < countof(G.command_queues); ++i) { struct command_queue *cq = G.command_queues[i]; __prof_dx12_collect(cq->prof); } } #endif } /* ========================== * * Evictor thread * ========================== */ internal P_JobDef(dx12_evictor_job, _) { (UNUSED)_; u64 completed_targets[DX12_NUM_QUEUES] = ZI; b32 shutdown = 0; while (!shutdown) { { __profn("Dx12 evictor run"); TempArena scratch = BeginScratchNoConflict(); u64 targets[countof(completed_targets)] = ZI; /* Copyqueued data */ u32 num_fenced_releases = 0; struct fenced_release_data *fenced_releases = 0; { __profn("Copyqueued releases"); P_Lock lock = P_LockE(&G.fenced_releases_mutex); num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data); fenced_releases = PushStructsNoZero(scratch.arena, struct fenced_release_data, num_fenced_releases); CopyBytes(fenced_releases, ArenaBase(G.fenced_releases_arena), G.fenced_releases_arena->pos); ResetArena(G.fenced_releases_arena); CopyBytes(targets, G.fenced_release_targets, sizeof(targets)); P_Unlock(&lock); } /* Wait until fences reach target */ { __profn("Check fences"); for (u32 i = 0; i < countof(targets); ++i) { while (completed_targets[i] < targets[i]) { struct command_queue *cq = G.command_queues[i]; completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence); if (completed_targets[i] < targets[i]) { __profn("Wait on fence"); { struct dx12_wait_fence_job_sig sig = ZI; sig.fence = cq->submit_fence; sig.target = targets[i]; { P_Counter counter = ZI; P_Run(1, dx12_wait_fence_job, &sig, P_Pool_Floating, P_Priority_Low, &counter); P_WaitOnCounter(&counter); } } } } } } /* Process releases */ for (u32 i = 0; i < num_fenced_releases; ++i) { struct fenced_release_data *fr = &fenced_releases[i]; switch (fr->kind) { default: { /* Unknown handle type */ Assert(0); } break; case FENCED_RELEASE_KIND_RESOURCE: { struct dx12_resource *resource = (struct dx12_resource *)fr->ptr; dx12_resource_release_now(resource); } break; case FENCED_RELEASE_KIND_PIPELINE: { struct pipeline *pipeline = (struct pipeline *)fr->ptr; pipeline_release_now(pipeline); } break; } } EndScratch(scratch); } P_Lock lock = P_LockE(&G.evictor_wake_mutex); { while (!G.evictor_shutdown && G.evictor_wake_gen == 0) { P_WaitOnCv(&G.evictor_wake_cv, &lock); } shutdown = G.evictor_shutdown; G.evictor_wake_gen = 0; } P_Unlock(&lock); } }