GPU_D12_SharedState GPU_D12_shared_state = ZI; //////////////////////////////////////////////////////////// //~ @hookimpl Startup void GPU_Startup(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); Arena *perm = PermArena(); ////////////////////////////// //- Initialize device { HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if GPU_DEBUG { ID3D12Debug *debug_controller0 = 0; { hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug0")); } ID3D12Debug_EnableDebugLayer(debug_controller0); #if GPU_DEBUG_VALIDATION { ID3D12Debug1 *debug_controller1 = 0; { hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug1")); } ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1); } ID3D12Debug_Release(debug_controller1); } #endif } ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ { hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); if (FAILED(hr)) { Panic(Lit("Failed to initialize DXGI factory")); } } /* Create device */ { IDXGIAdapter3 *adapter = 0; ID3D12Device10 *device = 0; String error = Lit("Could not initialize GPU device."); String first_gpu_name = ZI; u32 adapter_index = 0; b32 skip = 0; /* For iGPU testing */ for (;;) { { hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); } if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter3_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { /* TODO: Verify feature support: * - HighestShaderModel >= D3D_SHADER_MODEL_6_6 * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 * - EnhancedBarriersSupported == 1 */ hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); } if (SUCCEEDED(hr) && !skip) { break; } skip = 0; ID3D12Device_Release(device); IDXGIAdapter3_Release(adapter); adapter = 0; device = 0; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { error = StringF(scratch.arena, "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", FmtString(first_gpu_name)); } Panic(error); } g->adapter = adapter; g->device = device; } /* Enable debug layer breaks */ { #if GPU_DEBUG /* Enable D3D12 Debug break */ { ID3D12InfoQueue *info = 0; hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { Panic(Lit("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { Panic(Lit("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); IDXGIInfoQueue_Release(dxgi_info); } #endif } } ////////////////////////////// //- Initialize command queues { GPU_D12_CommandQueueDesc descs[] = { { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH }, { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, }; for (u32 i = 0; i < MinU32(countof(descs), countof(g->queues)); ++i) { GPU_D12_CommandQueueDesc desc = descs[i]; D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority }; GPU_D12_Queue *queue = &g->queues[i]; queue->desc = desc; HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence); } if (FAILED(hr)) { Panic(Lit("Failed to create GPU Command Queue")); } } } ////////////////////////////// //- Initialize descriptor heaps { Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; }; Dx12HeapDesc descs[GPU_D12_DescriptorHeapKind_Count] = { [GPU_D12_DescriptorHeapKind_CbvSrvUav] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxCbvSrvUavDescriptors, }, [GPU_D12_DescriptorHeapKind_Rtv] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = GPU_D12_MaxRtvDescriptors, }, [GPU_D12_DescriptorHeapKind_Sampler] = { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxSamplerDescriptors, }, }; for (GPU_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind) { Dx12HeapDesc desc = descs[kind]; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[kind]; heap->descriptors_arena = AcquireArena(Gibi(1)); heap->type = desc.type; heap->max_count = desc.max; heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, desc.type); D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI; d3d_desc.Type = desc.type; d3d_desc.Flags = desc.flags; d3d_desc.NumDescriptors = desc.max; HRESULT hr = 0; if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); } if (SUCCEEDED(hr)) { ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); } if (SUCCEEDED(hr)) { /* Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil */ GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena()); GPU_D12_Descriptor *nil_descriptor = GPU_D12_PushDescriptor(gpu_perm, kind, 0); Assert(nil_descriptor->index == 0); } if (FAILED(hr)) { Panic(Lit("Failed to create descriptor heap")); } } } ////////////////////////////// //- Initialize bindless root signature { HRESULT hr = 0; /* Serialize root signature */ ID3D10Blob *blob = 0; if (SUCCEEDED(hr)) { D3D12_ROOT_PARAMETER params[MaxShaderConstants] = ZI; for (i32 slot = 0; slot < MaxShaderConstants; ++slot) { D3D12_ROOT_PARAMETER *param = ¶ms[slot]; param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; param->Constants.ShaderRegister = slot; param->Constants.RegisterSpace = 0; param->Constants.Num32BitValues = 1; } D3D12_ROOT_SIGNATURE_DESC desc = ZI; desc.NumParameters = countof(params); desc.pParameters = params; desc.NumStaticSamplers = 0; desc.pStaticSamplers = 0; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); } /* Create root signature */ ID3D12RootSignature *rootsig = 0; if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); } g->bindless_rootsig = rootsig; if (blob) { ID3D10Blob_Release(blob); } if (FAILED(hr)) { Panic(Lit("Failed to create root signature")); } } EndScratch(scratch); } //////////////////////////////////////////////////////////// //~ Helpers GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle) { return (GPU_D12_Arena *)handle.v; } GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle) { return (GPU_D12_CmdList *)handle.v; } GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle) { return (GPU_D12_Resource *)handle.v; } GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle) { return (GPU_D12_Swapchain *)handle.v; } DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) { return (DXGI_FORMAT)format; } D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStages(GPU_Stage stages) { D3D12_BARRIER_SYNC result = 0; if (stages == GPU_Stage_All) { result = D3D12_BARRIER_SYNC_ALL; } else { result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, GPU_Stage_ComputeShading); result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, GPU_Stage_IndexAssembly); result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, GPU_Stage_VertexShading); result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, GPU_Stage_PixelShading); result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, GPU_Stage_DepthStencil); result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, GPU_Stage_RenderTarget); result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, GPU_Stage_Copy); result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, GPU_Stage_Indirect); } return result; } D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccesses(GPU_Access accesses) { D3D12_BARRIER_ACCESS result = 0; if (accesses == 0) { result = D3D12_BARRIER_ACCESS_NO_ACCESS; } else if (accesses == GPU_Access_All) { result = D3D12_BARRIER_ACCESS_COMMON; } else { result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, GPU_Access_ShaderReadWrite); result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, GPU_Access_ShaderRead); result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, GPU_Access_CopyWrite); result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, GPU_Access_CopyRead); result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, GPU_Access_IndexBuffer); result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, GPU_Access_IndirectArgument); result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, GPU_Access_DepthStencilRead); result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, GPU_Access_DepthStencilWrite); result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, GPU_Access_RenderTargetWrite); } return result; } D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout) { PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { [GPU_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, [GPU_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON, [GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present] = D3D12_BARRIER_LAYOUT_COMMON, [GPU_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, [GPU_Layout_DirectComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ, [GPU_Layout_DirectComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, [GPU_Layout_DirectComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE, [GPU_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, [GPU_Layout_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS, [GPU_Layout_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, [GPU_Layout_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE, [GPU_Layout_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE, [GPU_Layout_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, [GPU_Layout_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, [GPU_Layout_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, [GPU_Layout_ComputeQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, [GPU_Layout_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS, [GPU_Layout_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ, [GPU_Layout_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE, [GPU_Layout_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE, }; return translate[layout]; }; //////////////////////////////////////////////////////////// //~ Pipeline GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc))); /* Fetch pipeline from cache */ GPU_D12_Pipeline *pipeline = 0; b32 is_pipeline_new = 0; GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)]; { { Lock lock = LockS(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } Unlock(&lock); } if (!pipeline) { Lock lock = LockE(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } if (!pipeline) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); pipeline = PushStruct(perm, GPU_D12_Pipeline); pipeline->desc = desc; pipeline->hash = hash; is_pipeline_new = 1; PushAlign(perm, CachelineSize); SllStackPushN(bin->first, pipeline, next_in_bin); } Unlock(&lock); } } /* Create pipeline */ if (is_pipeline_new) { HRESULT hr = 0; b32 ok = 1; String error_str = ZI; /* Create PSO */ ID3D12PipelineState *pso = 0; if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource))) { D3D12_RASTERIZER_DESC raster_desc = ZI; if (desc.is_wireframe) { raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; } else { raster_desc.FillMode = D3D12_FILL_MODE_SOLID; } raster_desc.CullMode = D3D12_CULL_MODE_NONE; raster_desc.FrontCounterClockwise = 0; raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; raster_desc.DepthClipEnable = 1; raster_desc.MultisampleEnable = 0; raster_desc.AntialiasedLineEnable = 0; raster_desc.ForcedSampleCount = 0; raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; D3D12_BLEND_DESC blend_desc = ZI; blend_desc.AlphaToCoverageEnable = 0; blend_desc.IndependentBlendEnable = 0; blend_desc.RenderTarget[0].BlendEnable = 1; blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; ds_desc.DepthEnable = 0; ds_desc.StencilEnable = 0; String vs = DataFromResource(desc.vs.resource); String ps = DataFromResource(desc.ps.resource); D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.VS.pShaderBytecode = vs.text; pso_desc.VS.BytecodeLength = vs.len; pso_desc.PS.pShaderBytecode = ps.text; pso_desc.PS.BytecodeLength = ps.len; pso_desc.RasterizerState = raster_desc; pso_desc.BlendState = blend_desc; pso_desc.DepthStencilState = ds_desc; pso_desc.PrimitiveTopologyType = desc.topology_type; pso_desc.SampleMask = UINT_MAX; pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) { StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); if (format != DXGI_FORMAT_UNKNOWN) { pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; } else { break; } } hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create graphics pipeline"); ok = 0; } } else if (ok) { String cs = DataFromResource(desc.cs.resource); D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.CS.pShaderBytecode = cs.text; pso_desc.CS.BytecodeLength = cs.len; hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create compute pipeline"); ok = 0; } } if (!ok) { /* TOOD: Don't panic */ Panic(error_str); } pipeline->pso = pso; pipeline->error = error_str; pipeline->ok = ok; } return pipeline; } //////////////////////////////////////////////////////////// //~ Queue GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; return &g->queues[kind]; } //////////////////////////////////////////////////////////// //~ Raw command list GPU_D12_RawCommandList *GPU_D12_PrepareRawCommandList(GPU_QueueKind queue_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); /* Try to pull first completed command list from queue */ GPU_D12_RawCommandList *cl = ZI; { Lock lock = LockE(&queue->commit_mutex); { u64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); cl = queue->first_committed_cl; if (cl && cl->commit_fence_target <= completed) { SllQueuePop(queue->first_committed_cl, queue->last_committed_cl); } else { cl = 0; } } Unlock(&lock); } /* Allocate new command list if none are available */ if (!cl) { Arena *perm = PermArena(); { PushAlign(perm, CachelineSize); cl = PushStruct(perm, GPU_D12_RawCommandList); PushAlign(perm, CachelineSize); } cl->queue = queue; HRESULT hr = 0; { if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->d3d_ca); } if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl); } if (SUCCEEDED(hr)) { hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); } /* Initialize Direct queue CPU-only descriptors */ if (SUCCEEDED(hr) && queue_kind == GPU_QueueKind_Direct) { GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena()); for (u32 i = 0; i < countof(cl->rtv_descriptors); ++i) { cl->rtv_descriptors[i] = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv, 0); } cl->rtv_clear_descriptor = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv, 0); } } if (FAILED(hr)) { Panic(Lit("Failed to create command list")); } } /* Reset command list */ { HRESULT hr = 0; { if (SUCCEEDED(hr)) { hr = ID3D12CommandAllocator_Reset(cl->d3d_ca); } if (SUCCEEDED(hr)) { hr = ID3D12GraphicsCommandList_Reset(cl->d3d_cl, cl->d3d_ca, 0); } } if (FAILED(hr)) { Panic(Lit("Failed to reset command list")); } } return cl; } void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) { GPU_D12_Queue *queue = cl->queue; /* Close */ { HRESULT hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to close command list before execution")); } } /* Commit */ { Lock lock = LockE(&queue->commit_mutex); { u64 target = ++queue->commit_fence_target; cl->commit_fence_target = target; /* Execute */ ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl); ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, target); /* Append */ SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl); } Unlock(&lock); } } //////////////////////////////////////////////////////////// //~ @hookimpl Arena GPU_ArenaHandle GPU_AcquireArena(void) { GPU_D12_Arena *gpu_arena = 0; { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); gpu_arena = PushStruct(perm, GPU_D12_Arena); PushAlign(perm, CachelineSize); } gpu_arena->arena = AcquireArena(Gibi(1)); return GPU_D12_MakeHandle(GPU_ArenaHandle, gpu_arena); } void GPU_ReleaseArena(GPU_ArenaHandle arena) { /* TODO */ } //////////////////////////////////////////////////////////// //~ Resource helpers GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; GPU_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor); return &descriptors[index]; } GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind, u32 forced) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; GPU_D12_Descriptor *descriptor = 0; /* Grab completed descriptor from arena */ if (forced == 0) { GPU_D12_DescriptorList *descriptors_by_queue = gpu_arena->committed_descriptors_by_heap_and_queue[heap_kind]; for (GPU_QueueKind queue_kind = 0; !descriptor && queue_kind < GPU_NumQueues; ++queue_kind) { GPU_D12_DescriptorList *descriptors = &descriptors_by_queue[queue_kind]; descriptor = descriptors->first; if (descriptor) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); u64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); if (queue_commit_completion >= descriptor->queue_commit_target) { /* Descriptor no longer in use by gpu, reuse it */ DllQueueRemove(descriptors->first, descriptors->last, descriptor); } else { /* Descriptor may still be in use by gpu */ descriptor = 0; } } } } /* Allocate new descriptor from heap */ u32 index = forced; if (!descriptor) { Lock lock = LockE(&heap->mutex); { if (index == 0) { if (heap->first_free) { descriptor = heap->first_free; DllStackRemove(heap->first_free, descriptor); index = descriptor->index; } else { u32 descriptors_count = ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor); if (descriptors_count >= heap->max_count) { Panic(Lit("Max descriptors reached in heap")); } descriptor = PushStruct(heap->descriptors_arena, GPU_D12_Descriptor); index = descriptors_count; } } else { if (index >= heap->max_count) { Panic(Lit("Max descriptors reached in heap")); } /* Push descriptors if index slot is past end of heap */ u32 descriptors_count = ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor); if (index >= descriptors_count) { u32 pushed_count = index - descriptors_count + 1; PushStructs(heap->descriptors_arena, GPU_D12_Descriptor, pushed_count); for (u32 pushed_index = descriptors_count; pushed_index < descriptors_count + pushed_count; ++pushed_index) { GPU_D12_Descriptor *pushed = &(ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor)[pushed_index]); if (pushed_index < index) { pushed->heap = heap; pushed->index = pushed_index; pushed->handle.ptr = heap->start_handle.ptr + (pushed_index * heap->descriptor_size); DllStackPush(heap->first_free, pushed); } else { descriptor = pushed; } } } /* Search free list for freed descriptor with matching index */ if (!descriptor) { for (GPU_D12_Descriptor *n = heap->first_free; n; n = n->next) { if (n->index == index) { DllStackRemove(heap->first_free, n); descriptor = n; break; } } } if (!descriptor) { Arena *perm = PermArena(); Panic(StringF(perm, "Tried to force push a GPU pointer into slot %F, but a descriptor already exists there (current heap count: %F)", FmtUint(index), FmtUint(ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor)))); } } } Unlock(&lock); } /* Initialize descriptor handle */ ZeroStruct(descriptor); descriptor->heap = heap; descriptor->index = index; descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); return descriptor; } //////////////////////////////////////////////////////////// //~ @hookimpl Resource //- Resource creation GPU_ResourceHandle GPU_PushBufferResource(GPU_ArenaHandle arena_handle, GPU_BufferResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); /* Create resource heap */ if (!gpu_arena->d3d_resource_heap) { /* FIXME: Dynamic size */ D3D12_HEAP_DESC d3d_desc = ZI; d3d_desc.SizeInBytes = Mebi(64); d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */ d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; ID3D12Heap *heap = 0; HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap); if (!SUCCEEDED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create D3D12 resource heap")); } gpu_arena->d3d_resource_heap = heap; gpu_arena->heap_size = d3d_desc.SizeInBytes; } /* Create d3d resource */ ID3D12Resource *d3d_resource = 0; u64 aligned_size = AlignU64(MaxU64(desc.size, 1), 4); { D3D12_RESOURCE_DESC1 d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Width = aligned_size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite); u64 alloc_size = 0; u64 alloc_align = 0; { D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI; ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); alloc_size = alloc_info.SizeInBytes; alloc_align = alloc_info.Alignment; } u64 alloc_pos = gpu_arena->heap_pos; alloc_pos = AlignU64(alloc_pos, alloc_align); gpu_arena->heap_pos = alloc_pos + alloc_size; if (alloc_pos + alloc_size > gpu_arena->heap_size) { Panic(Lit("Gpu arena overflow")); } HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device, gpu_arena->d3d_resource_heap, alloc_pos, &d3d_desc, D3D12_BARRIER_LAYOUT_UNDEFINED, 0, 0, 0, &IID_ID3D12Resource, (void **)&d3d_resource); } GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource); resource->d3d_resource = d3d_resource; resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; resource->flags = desc.flags; resource->buffer_size = desc.size; resource->buffer_size_aligned = aligned_size; resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); return GPU_D12_MakeHandle(GPU_ResourceHandle, resource); } GPU_ResourceHandle GPU_PushTextureResource(GPU_ArenaHandle arena_handle, GPU_TextureResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout); /* Create resource heap */ if (!gpu_arena->d3d_resource_heap) { /* FIXME: Dynamic size */ D3D12_HEAP_DESC d3d_desc = ZI; d3d_desc.SizeInBytes = Mebi(64); d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */ d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; ID3D12Heap *heap = 0; HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap); if (!SUCCEEDED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create D3D12 resource heap")); } gpu_arena->d3d_resource_heap = heap; gpu_arena->heap_size = d3d_desc.SizeInBytes; } /* Create d3d resource */ ID3D12Resource *d3d_resource = 0; { D3D12_RESOURCE_DESC1 d3d_desc = ZI; d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : D3D12_RESOURCE_DIMENSION_TEXTURE3D; d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format); d3d_desc.Width = MaxI32(desc.dims.x, 1); d3d_desc.Height = MaxI32(desc.dims.y, 1); d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1); d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1); d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil); u64 alloc_size = 0; u64 alloc_align = 0; { D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI; ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); alloc_size = alloc_info.SizeInBytes; alloc_align = alloc_info.Alignment; } u64 alloc_pos = gpu_arena->heap_pos; alloc_pos = AlignU64(alloc_pos, alloc_align); gpu_arena->heap_pos = alloc_pos + alloc_size; if (alloc_pos + alloc_size > gpu_arena->heap_size) { Panic(Lit("Gpu arena overflow")); } D3D12_CLEAR_VALUE clear_value = { .Color[0] = desc.clear_color.x, .Color[1] = desc.clear_color.y, .Color[2] = desc.clear_color.z, .Color[3] = desc.clear_color.w, .Format = d3d_desc.Format }; HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device, gpu_arena->d3d_resource_heap, alloc_pos, &d3d_desc, initial_layout, (d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0, 0, 0, &IID_ID3D12Resource, (void **)&d3d_resource); } GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource); resource->d3d_resource = d3d_resource; resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; resource->flags = desc.flags; resource->is_texture = 1; resource->texture_format = desc.format; resource->texture_dims = desc.dims; resource->texture_mip_levels = desc.mip_levels; resource->texture_layout = initial_layout; return GPU_D12_MakeHandle(GPU_ResourceHandle, resource); } GPU_ResourceHandle GPU_PushSamplerResource(GPU_ArenaHandle arena_handle, GPU_SamplerResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource); resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; resource->sampler_desc = desc; return GPU_D12_MakeHandle(GPU_ResourceHandle, resource); } //////////////////////////////////////////////////////////// //~ @hookimpl Shader-accessible pointer u32 GPU_PushPointer(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, GPU_PointerDesc pointer_desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); u32 result = 0; ShaderHandleKind kind = pointer_desc.kind; b32 is_buffer = kind == ShaderHandleKind_StructuredBuffer || kind == ShaderHandleKind_RWStructuredBuffer || kind == ShaderHandleKind_ByteAddressBuffer || kind == ShaderHandleKind_RWByteAddressBuffer; b32 is_sampler = kind == ShaderHandleKind_SamplerState; b32 is_texture = !is_buffer && !is_sampler; b32 is_raw = kind == ShaderHandleKind_ByteAddressBuffer || kind == ShaderHandleKind_RWByteAddressBuffer; b32 is_uav = kind == ShaderHandleKind_RWStructuredBuffer || kind == ShaderHandleKind_RWByteAddressBuffer || kind == ShaderHandleKind_RWTexture1D || kind == ShaderHandleKind_RWTexture2D || kind == ShaderHandleKind_RWTexture3D; GPU_D12_Descriptor *descriptor = 0; if (is_buffer) { descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav, pointer_desc.forced); u64 buffer_size_aligned = resource->buffer_size_aligned; u64 num_elements_in_buffer = buffer_size_aligned / pointer_desc.element_size; u64 num_elements_after_offset = num_elements_in_buffer > pointer_desc.element_offset ? num_elements_in_buffer - pointer_desc.element_offset : 0; if (num_elements_after_offset > 0) { if (is_uav) { D3D12_UNORDERED_ACCESS_VIEW_DESC desc = ZI; { desc.Format = DXGI_FORMAT_UNKNOWN; desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER ; desc.Buffer.FirstElement = pointer_desc.element_offset; desc.Buffer.NumElements = num_elements_after_offset; desc.Buffer.StructureByteStride = pointer_desc.element_size; desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; } if (is_raw) { desc.Format = DXGI_FORMAT_R32_TYPELESS; desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; } ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, &desc, descriptor->handle); } else { D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI; { desc.Format = DXGI_FORMAT_UNKNOWN; desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; desc.Buffer.FirstElement = pointer_desc.element_offset; desc.Buffer.NumElements = num_elements_after_offset; desc.Buffer.StructureByteStride = pointer_desc.element_size; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; } if (is_raw) { desc.Format = DXGI_FORMAT_R32_TYPELESS; desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; } ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, &desc, descriptor->handle); } } } else if (is_texture) { descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav, pointer_desc.forced); if (is_uav) { ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle); } else { ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, 0, descriptor->handle); } } else if (is_sampler) { descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_Sampler, pointer_desc.forced); GPU_SamplerResourceDesc sampler_desc = resource->sampler_desc; D3D12_SAMPLER_DESC d3d_desc = ZI; { d3d_desc.Filter = (D3D12_FILTER)sampler_desc.filter; d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.x; d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.y; d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.z; d3d_desc.MipLODBias = sampler_desc.mip_lod_bias; d3d_desc.MaxAnisotropy = MaxU32(sampler_desc.max_anisotropy, 1); d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)sampler_desc.comparison; d3d_desc.BorderColor[0] = sampler_desc.border_color.x; d3d_desc.BorderColor[1] = sampler_desc.border_color.y; d3d_desc.BorderColor[2] = sampler_desc.border_color.z; d3d_desc.BorderColor[3] = sampler_desc.border_color.w; d3d_desc.MinLOD = sampler_desc.min_lod; d3d_desc.MaxLOD = sampler_desc.max_lod; } if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.MaxLOD >= F32Infinity) { d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; } ID3D12Device_CreateSampler(g->device, &d3d_desc, descriptor->handle); } return descriptor->index; } //- Count u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer); return resource->buffer_size; } i32 GPU_Count1D(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return resource->texture_dims.x; } Vec2I32 GPU_Count2D(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return VEC2I32(resource->texture_dims.x, resource->texture_dims.y); } Vec3I32 GPU_Count3D(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return resource->texture_dims; } i32 GPU_CountWidth(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return resource->texture_dims.x; } i32 GPU_CountHeight(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return resource->texture_dims.y; } i32 GPU_CountDepth(GPU_ResourceHandle texture) { GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture); return resource->texture_dims.z; } //////////////////////////////////////////////////////////// //~ Command helpers GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl) { GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Grab chunk */ GPU_D12_CmdChunk *chunk = cl->last_cmd_chunk; { if (chunk && chunk->cmds_count >= GPU_D12_CmdsPerChunk) { chunk = 0; } if (!chunk) { Lock lock = LockE(&g->free_cmd_chunks_mutex); { chunk = g->first_free_cmd_chunk; if (chunk) { g->first_free_cmd_chunk = chunk->next; } } Unlock(&lock); if (chunk) { GPU_D12_Cmd *cmds = chunk->cmds; ZeroStruct(chunk); chunk->cmds = cmds; } } if (!chunk) { Arena *perm = PermArena(); chunk = PushStruct(perm, GPU_D12_CmdChunk); chunk->cmds = PushStructsNoZero(perm, GPU_D12_Cmd, GPU_D12_CmdsPerChunk); } if (chunk != cl->last_cmd_chunk) { SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); } } /* Push cmd to chunk */ GPU_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; ++cl->cmds_count; return cmd; } GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v) { GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Constant; cmd->constant.slot = slot; CopyBytes(&cmd->constant.value, v, 4); return cmd; } GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); GPU_D12_StagingRegionNode *result = 0; Lock lock = LockE(&queue->staging_mutex); { GPU_D12_StagingHeap *heap = queue->staging_heap; i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); /* Find first completed region with matching size. * For each region in heap: * - If region size > size, split off a smaller region & use it * * - If region size < size, try to merge with next completed region * * - If no available completed region with eligible size, queue the * current heap for deletion & create a new heap * with larger size */ /* FIXME: Region completion target should be atomic, and initialized to * u64/i64 max until cl submission actually sets value */ /* Find region with large enough size */ GPU_D12_StagingRegionNode *match = 0; if (heap && heap->size >= size) { GPU_D12_StagingRegionNode *r = heap->head_region_node; for (;;) { b32 is_completed = completed >= Atomic64Fetch(&r->completion_target); if (is_completed) { GPU_D12_StagingRegionNode *next = r->next; u64 region_size = 0; if (next->pos > r->pos) { region_size = next->pos - r->pos; } else { region_size = heap->size - r->pos; } if (region_size < size) { GPU_D12_StagingRegionNode *prev = r->prev; b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target); if (prev_is_completed && prev->pos < r->pos) { /* Merge with previous region & retry */ prev->next = next; SllStackPush(heap->first_free_region_node, r); r = prev; } else { /* Continue to next region */ r = next; } } else { /* Found matching region */ match = r; break; } } else { /* No large-enough completed region found */ break; } } } /* Create new heap if no match found */ if (!match) { /* Queue old heap for deletion */ u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64)); if (heap) { /* FIXME: Queue for deletion here */ new_heap_size = MaxU64(new_heap_size, heap->size * 2); heap = 0; } /* Create new heap */ { Arena *arena = AcquireArena(Gibi(1)); heap = PushStruct(arena, GPU_D12_StagingHeap); heap->arena = arena; heap->size = new_heap_size; /* Create backing upload heap resource */ ID3D12Resource *d3d_resource = 0; { D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Alignment = 0; d3d_desc.Width = new_heap_size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, D3D12_HEAP_FLAG_CREATE_NOT_ZEROED, &d3d_desc, D3D12_RESOURCE_STATE_COMMON, 0, &IID_ID3D12Resource, (void **)&d3d_resource); if (!SUCCEEDED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create upload heap")); } } heap->resource.d3d_resource = d3d_resource; heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; heap->resource.buffer_size = new_heap_size; heap->resource.buffer_size_aligned = new_heap_size; heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); /* Map */ { D3D12_RANGE read_range = ZI; HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped); if (!SUCCEEDED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to map upload heap")); } } } /* Create initial region */ match = PushStruct(heap->arena, GPU_D12_StagingRegionNode); match->heap = heap; match->next = match; match->prev = match; heap->head_region_node = match; } /* Split extra region space */ { GPU_D12_StagingRegionNode *next = match->next; u64 region_size = 0; if (next->pos > match->pos) { region_size = next->pos - match->pos; } else { region_size = heap->size - match->pos; } if (region_size > size) { GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node; if (new_next) { SllStackPop(heap->first_free_region_node); } else { new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode); } new_next->next = next; new_next->prev = match; next->prev = new_next; match->next = new_next; new_next->heap = heap; new_next->pos = match->pos + size; } } Atomic64Set(&match->completion_target, I64Max); result = match; } Unlock(&lock); return result; } //////////////////////////////////////////////////////////// //~ @hookimpl Command //- Command list GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = 0; Lock lock = LockE(&g->free_cmd_lists_mutex); { cl = g->first_free_cmd_list; if (cl) { g->first_free_cmd_list = cl->next; ZeroStruct(cl); } else { Arena *perm = PermArena(); cl = PushStruct(perm, GPU_D12_CmdList); } } Unlock(&lock); cl->queue_kind = queue; return GPU_D12_MakeHandle(GPU_CommandListHandle, cl); } void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); /* Begin dx12 command list */ GPU_D12_RawCommandList *rcl = GPU_D12_PrepareRawCommandList(queue_kind); ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; /* Pipeline state */ b32 graphics_rootsig_set = 0; b32 compute_rootsig_set = 0; b32 descriptor_heaps_set = 0; GPU_D12_Pipeline *bound_pipeline = 0; /* Constants state */ u64 slotted_constants[MaxShaderConstants]; u64 bound_compute_constants[MaxShaderConstants]; u64 bound_graphics_constants[MaxShaderConstants]; for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } /* Zero initialze all constant slots */ for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } slotted_constants[MaxShaderConstants - 1] = queue_kind == queue_kind == GPU_QueueKind_AsyncCompute; /* IsAsyncCompute constant */ /* Rasterizer state */ D3D12_VIEWPORT bound_viewport = ZI; D3D12_RECT bound_scissor = ZI; D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; u64 bound_render_target_uids[GPU_MaxRenderTargets] = ZI; u64 bound_render_clear_target_uid = 0; /* Flatten command chunks */ u64 cmds_count = 0; GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cl->cmds_count); { /* Flatten command chunks */ { for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) { for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) { cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; } } } /* Free command chunks */ { Lock lock = LockE(&g->free_cmd_chunks_mutex); { for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) { chunk->next = g->first_free_cmd_chunk; g->first_free_cmd_chunk = chunk; } } Unlock(&lock); } } /* Batch barrier cmds */ { u64 cmd_idx = 0; u64 batch_gen = 0; GPU_D12_Cmd *prev_barrier_cmd = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; switch (cmd->kind) { /* Batch-interrupting cmds */ default: { cmd_idx += 1; batch_gen += 1; } break; /* Non-batch-interrupting cmds */ case GPU_D12_CmdKind_Constant: { cmd_idx += 1; } break; case GPU_D12_CmdKind_Barrier: { /* Determine 'before' state from lookup */ if (prev_barrier_cmd) { if (prev_barrier_cmd->barrier.batch_gen != batch_gen) { /* This barrier is part of new batch */ prev_barrier_cmd->barrier.is_end_of_batch = 1; } } cmd->barrier.batch_gen = batch_gen; prev_barrier_cmd = cmd; cmd_idx += 1; } break; } } if (prev_barrier_cmd) { prev_barrier_cmd->barrier.is_end_of_batch = 1; } } /* Process gpu commands into dx12 commands */ { u64 batch_barrier_idx_start = 0; u64 batch_barrier_idx_opl = 0; /* One past last */ u64 cmd_idx = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; switch (cmd->kind) { default: { cmd_idx += 1; } break; //- Constant case GPU_D12_CmdKind_Constant: { i32 slot = cmd->constant.slot; u32 value = cmd->constant.value; if (slot >= 0 && slot < countof(slotted_constants)) { slotted_constants[slot] = value; } cmd_idx += 1; } break; //- Barrier case GPU_D12_CmdKind_Barrier: { batch_barrier_idx_opl = cmd_idx + 1; /* Submit batched barriers */ if (cmd->barrier.is_end_of_batch) { /* Build barriers */ u64 buffer_barriers_count = 0; u64 texture_barriers_count = 0; u64 global_barriers_count = 0; D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) { GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier) { GPU_BarrierDesc desc = barrier_cmd->barrier.desc; GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource); D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; /* Translate gpu barrier kind -> d3d barrier fields */ D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStages(desc.sync_prev); D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStages(desc.sync_next); D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccesses(desc.access_prev); D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccesses(desc.access_next); D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout; D3D12_BARRIER_LAYOUT layout_after = resource->texture_layout; if (desc.layout != GPU_Layout_NoChange) { layout_after = GPU_D12_BarrierLayoutFromLayout(desc.layout); resource->texture_layout = layout_after; } /* Build barrier */ switch (barrier_type) { case D3D12_BARRIER_TYPE_BUFFER: { D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; barrier->pResource = resource->d3d_resource; barrier->Offset = 0; barrier->Size = U64Max; } break; case D3D12_BARRIER_TYPE_TEXTURE: { D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; barrier->LayoutBefore = layout_before; barrier->LayoutAfter = layout_after; barrier->pResource = resource->d3d_resource; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; } break; case D3D12_BARRIER_TYPE_GLOBAL: { D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; } break; } } } /* Dispatch barriers */ { u32 barrier_groups_count = 0; D3D12_BARRIER_GROUP barrier_groups[3] = ZI; if (buffer_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_BUFFER; group->NumBarriers = buffer_barriers_count; group->pBufferBarriers = buffer_barriers; } if (texture_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_TEXTURE; group->NumBarriers = texture_barriers_count; group->pTextureBarriers = texture_barriers; } if (global_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_GLOBAL; group->NumBarriers = global_barriers_count; group->pGlobalBarriers = global_barriers; } if (barrier_groups_count > 0) { ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); } } batch_barrier_idx_start = cmd_idx + 1; } cmd_idx += 1; } break; //- Copy bytes case GPU_D12_CmdKind_CopyBytes: { u64 src_offset = cmd->copy_bytes.src_copy_range.min; u64 copy_size = cmd->copy_bytes.src_copy_range.max - cmd->copy_bytes.src_copy_range.min; ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl, cmd->copy_bytes.dst->d3d_resource, cmd->copy_bytes.dst_offset, cmd->copy_bytes.src->d3d_resource, src_offset, copy_size); cmd_idx += 1; } break; //- Copy texels case GPU_D12_CmdKind_CopyTexels: { GPU_D12_Resource *dst = cmd->copy_texels.dst; GPU_D12_Resource *src = cmd->copy_texels.src; D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; Vec3I32 dst_offset = cmd->copy_texels.dst_offset; Rng3I32 src_copy_range = cmd->copy_texels.src_copy_range; D3D12_BOX src_box = ZI; { src_box.left = src_copy_range.p0.x; src_box.top = src_copy_range.p0.y; src_box.front = src_copy_range.p0.z; src_box.right = src_copy_range.p1.x; src_box.bottom = src_copy_range.p1.y; src_box.back = src_copy_range.p1.z; } if (dst->flags & GPU_ResourceFlag_AllowDepthStencil) { /* Depth-stencil textures must have src box & dst offset set to 0 * https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion */ ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); } else { ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, &src_box); } cmd_idx += 1; } break; //- Compute case GPU_D12_CmdKind_Compute: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.cs = cmd->compute.cs; pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } if (pipeline) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!compute_rootsig_set) { ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, g->bindless_rootsig); compute_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); bound_pipeline = pipeline; } /* Update root constants */ for (i32 slot = 0; slot < MaxShaderConstants; ++slot) { if (bound_compute_constants[slot] != slotted_constants[slot]) { ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); bound_compute_constants[slot] = slotted_constants[slot]; } } /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); } cmd_idx += 1; } break; //- Rasterize case GPU_D12_CmdKind_Rasterize: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.vs = cmd->rasterize.vs; pipeline_desc.ps = cmd->rasterize.ps; { pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; } } if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip) { pipeline_desc.is_wireframe = 1; } for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i) { GPU_D12_Resource *rt = cmd->rasterize.render_targets[i]; if (rt) { pipeline_desc.render_target_formats[i] = rt->texture_format; } else { pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; } } pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } /* Create ibv */ u32 indices_count = 0; D3D12_INDEX_BUFFER_VIEW ibv = ZI; { GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; if (desc.index_count > 0) { GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource); ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; ibv.SizeInBytes = desc.index_size * desc.index_count; if (desc.index_size == 2) { ibv.Format = DXGI_FORMAT_R16_UINT; indices_count = ibv.SizeInBytes / 2; } else if (desc.index_size == 4) { ibv.Format = DXGI_FORMAT_R32_UINT; indices_count = ibv.SizeInBytes / 4; } else { Assert(0); /* Invalid index size */ } } } /* Prepare & dispatch */ if (pipeline && indices_count > 0) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!graphics_rootsig_set) { ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, g->bindless_rootsig); graphics_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); bound_pipeline = pipeline; } /* Update root constants */ for (i32 slot = 0; slot < MaxShaderConstants; ++slot) { if (bound_graphics_constants[slot] != slotted_constants[slot]) { ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); bound_graphics_constants[slot] = slotted_constants[slot]; } } /* Set viewport */ { D3D12_VIEWPORT viewport = ZI; { Rng3 range = cmd->rasterize.viewport; viewport.TopLeftX = range.p0.x; viewport.TopLeftY = range.p0.y; viewport.Width = range.p1.x - range.p0.x; viewport.Height = range.p1.y - range.p0.y; viewport.MinDepth = range.p0.z; viewport.MaxDepth = range.p1.z; } if (!MatchStruct(&viewport, &bound_viewport)) { bound_viewport = viewport; ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); } } /* Set scissor */ { D3D12_RECT scissor = ZI; { Rng2 range = cmd->rasterize.scissor; scissor.left = range.p0.x; scissor.top = range.p0.y; scissor.right = range.p1.x; scissor.bottom = range.p1.y; } if (!MatchStruct(&scissor, &bound_scissor)) { bound_scissor = scissor; ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); } } /* Set topology */ { D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; } if (topology != bound_primitive_topology) { ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); } } /* Set index buffer */ if (!MatchStruct(&ibv, &bound_ibv)) { ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); bound_ibv = ibv; } /* Bind render targets */ { b32 om_dirty = 0; u32 rtvs_count = 0; for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i) { GPU_D12_Resource *rt = cmd->rasterize.render_targets[i]; if (rt) { if (bound_render_target_uids[i] != rt->uid) { GPU_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_descriptor->handle); bound_render_target_uids[i] = rt->uid; om_dirty = 1; } ++rtvs_count; } else { break; } } if (om_dirty) { D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[GPU_MaxRenderTargets] = ZI; for (u32 i = 0; i < rtvs_count; ++i) { rtv_handles[i] = rcl->rtv_descriptors[i]->handle; } ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); } } /* Dispatch */ ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); } cmd_idx += 1; } break; //- Clear rtv case GPU_D12_CmdKind_ClearRtv: { GPU_D12_Resource *rt = cmd->clear_rtv.render_target; f32 clear_color[4] = ZI; { clear_color[0] = cmd->clear_rtv.color.x; clear_color[1] = cmd->clear_rtv.color.y; clear_color[2] = cmd->clear_rtv.color.z; clear_color[3] = cmd->clear_rtv.color.w; } D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; if (bound_render_clear_target_uid != rt->uid) { ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_handle); bound_render_clear_target_uid = rt->uid; } ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); cmd_idx += 1; } break; } } } /* End dx12 command list */ GPU_D12_CommitRawCommandList(rcl); /* Free command list */ { Lock lock = LockE(&g->free_cmd_lists_mutex); { cl->next = g->first_free_cmd_list; g->first_free_cmd_list = cl; } Unlock(&lock); } EndScratch(scratch); } //- Arena void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle) { GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); /* TODO */ /* FIXME: Move descriptors into committed lists */ /* FIXME: Release id3d12 resource com object references */ gpu_arena->heap_pos = 0; } //- Cpu -> Gpu copy void GPU_CopyCpuToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); u64 copy_size = src_copy_range.max - src_copy_range.min; GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, copy_size); CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, copy_size); GPU_CopyBufferToBuffer(cl_handle, dst_handle, dst_offset, GPU_D12_MakeHandle(GPU_ResourceHandle, ®ion->heap->resource), RNGU64(region->pos, copy_size)); } void GPU_CopyCpuToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle); Assert(dst->is_texture); Vec3I32 staged_dims = ZI; { staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x; staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y; staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z; } /* Grab footprint info */ u64 footprint_rows_count = 0; u64 footprint_row_size = 0; u64 footprint_size = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; { D3D12_RESOURCE_DESC src_desc = ZI; { ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); src_desc.Width = staged_dims.x; src_desc.Height = staged_dims.y; src_desc.DepthOrArraySize = staged_dims.z; } ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size); } /* Fill staging buffer */ GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, footprint_size); { D3D12_RANGE read_range = ZI; u8 *dst_base = (u8 *)region->heap->mapped + region->pos + footprint.Offset; u8 *src_base = src; u32 z_size = footprint_row_size * footprint_rows_count; for (i32 z = 0; z < src_dims.z; ++z) { u32 z_offset = z * z_size; for (i32 y = 0; y < footprint_rows_count; ++y) { u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset; u8 *src_row = src_base + y * footprint_row_size + z_offset; CopyBytes(dst_row, src_row, footprint_row_size); } } } GPU_CopyBufferToTexture(cl_handle, dst_handle, dst_offset, GPU_D12_MakeHandle(GPU_ResourceHandle, ®ion->heap->resource), staged_dims, RNG3I32(VEC3I32(0, 0, 0), staged_dims)); } //- Gpu <-> Gpu copy void GPU_CopyBufferToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, GPU_ResourceHandle src_handle, RngU64 src_copy_range) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_CopyBytes; cmd->copy_bytes.dst = GPU_D12_ResourceFromHandle(dst_handle); cmd->copy_bytes.src = GPU_D12_ResourceFromHandle(src_handle); cmd->copy_bytes.dst_offset = dst_offset; cmd->copy_bytes.src_copy_range = src_copy_range; } void GPU_CopyBufferToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Vec3I32 src_dims, Rng3I32 src_copy_range) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle); GPU_D12_Resource *src = GPU_D12_ResourceFromHandle(src_handle); Assert(dst->is_texture); Assert(!src->is_texture); /* Grab footprint info */ D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; { D3D12_RESOURCE_DESC src_desc = ZI; { ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); src_desc.Width = src_dims.x; src_desc.Height = src_dims.y; src_desc.DepthOrArraySize = src_dims.z; } ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, 0, 0, 0); } D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; { dst_loc.pResource = dst->d3d_resource; dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst_loc.SubresourceIndex = 0; } { src_loc.pResource = src->d3d_resource; src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src_loc.PlacedFootprint = footprint; } GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_CopyTexels; cmd->copy_texels.dst = dst; cmd->copy_texels.src = src; cmd->copy_texels.dst_loc = dst_loc; cmd->copy_texels.src_loc = src_loc; cmd->copy_texels.dst_offset = dst_offset; cmd->copy_texels.src_copy_range = src_copy_range; } void GPU_CopyTextureToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Rng3I32 src_copy_range) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle); GPU_D12_Resource *src = GPU_D12_ResourceFromHandle(src_handle); Assert(dst->is_texture); Assert(src->is_texture); D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; { dst_loc.pResource = dst->d3d_resource; dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst_loc.SubresourceIndex = 0; } { src_loc.pResource = dst->d3d_resource; src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src_loc.SubresourceIndex = 0; } GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_CopyTexels; cmd->copy_texels.dst = dst; cmd->copy_texels.src = src; cmd->copy_texels.dst_loc = dst_loc; cmd->copy_texels.src_loc = src_loc; cmd->copy_texels.dst_offset = dst_offset; cmd->copy_texels.src_copy_range = src_copy_range; } void GPU_CopyTextureToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Rng3I32 src_copy_range) { /* TODO */ Assert(0); } //- Constant void GPU_SetConstant_(GPU_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Constant; cmd->constant.slot = slot; CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4)); } //- Barrier void GPU_Sync(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Barrier; cmd->barrier.desc = desc; } //- Compute void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 groups) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Compute; cmd->compute.cs = cs; cmd->compute.groups = groups; } //- Rasterize void GPU_Rasterize(GPU_CommandListHandle cl_handle, VertexShader vs, PixelShader ps, u32 instances_count, GPU_IndexBufferDesc index_buffer, u32 render_targets_count, GPU_ResourceHandle *render_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Rasterize; cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; cmd->rasterize.instances_count = instances_count; cmd->rasterize.index_buffer_desc = index_buffer; for (u32 i = 0; i < MinU32(render_targets_count, GPU_MaxRenderTargets); ++i) { cmd->rasterize.render_targets[i] = GPU_D12_ResourceFromHandle(render_targets[i]); } cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; cmd->rasterize.mode = mode; } //- Clear void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, GPU_ResourceHandle resource_handle, Vec4 color) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_ClearRtv; cmd->clear_rtv.render_target = GPU_D12_ResourceFromHandle(resource_handle); cmd->clear_rtv.color = color; } //////////////////////////////////////////////////////////// //~ @hookimpl Queue synchronization void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_queue_kind) { if (completion_queue_kind != waiter_queue_kind) { GPU_D12_Queue *completion_queue = GPU_D12_QueueFromKind(completion_queue_kind); GPU_D12_Queue *waiter_queue = GPU_D12_QueueFromKind(waiter_queue_kind); ID3D12Fence *d3d_fence = completion_queue->commit_fence; u64 fence_target = 0; { Lock lock = LockS(&completion_queue->commit_mutex); fence_target = completion_queue->commit_fence_target; Unlock(&lock); } if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target) { ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target); } } } void GPU_SyncOtherQueues(GPU_QueueKind completion_queue_kind) { if (GPU_IsMultiQueueEnabled) { GPU_D12_Queue *completion_queue = GPU_D12_QueueFromKind(completion_queue_kind); ID3D12Fence *d3d_fence = completion_queue->commit_fence; u64 fence_target = 0; { Lock lock = LockS(&completion_queue->commit_mutex); fence_target = completion_queue->commit_fence_target; Unlock(&lock); } if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target) { for (GPU_QueueKind waiter_queue_kind = 0; waiter_queue_kind < GPU_NumQueues; ++waiter_queue_kind) { if (waiter_queue_kind != completion_queue_kind) { GPU_D12_Queue *waiter_queue = GPU_D12_QueueFromKind(waiter_queue_kind); ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target); } } } } } //////////////////////////////////////////////////////////// //~ @hookimpl Map hooks // GPU_Mapped GPU_Map(GPU_Resource *gpu_r) // { // GPU_Mapped result = ZI; // result.resource = gpu_r; // GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; // D3D12_RANGE read_range = ZI; // HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem); // if (FAILED(hr) || !result.mem) // { // /* TODO: Don't panic */ // Panic(Lit("Failed to map command buffer resource")); // } // return result; // } // void GPU_Unmap(GPU_Mapped m) // { // GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource; // ID3D12Resource_Unmap(r->d3d_resource, 0, 0); // } // void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) // { // GPU_D12_SharedState *g = &GPU_D12_shared_state; // D3D12_RESOURCE_DESC desc = ZI; // ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); // u64 upload_size = 0; // u64 upload_row_size = 0; // u32 upload_num_rows = 0; // D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; // ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); // D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; // { // D3D12_RANGE read_range = ZI; // u8 *dst_base = (u8 *)dst + placed_footprint.Offset; // u8 *src_base = src; // u32 z_size = upload_row_size * upload_num_rows; // b32 src_overflow = 0; // for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) // { // u32 z_offset = z * z_size; // for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) // { // u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; // u8 *src_row = src_base + y * upload_row_size + z_offset; // CopyBytes(dst_row, src_row, upload_row_size); // } // } // } // } //////////////////////////////////////////////////////////// //~ @hookimpl Statistics GPU_Stats GPU_QueryStats(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_Stats result = ZI; { DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); result.local_committed = info.CurrentUsage; result.local_budget = info.Budget; } { DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); result.non_local_budget = info.Budget; result.non_local_committed = info.CurrentUsage; } result.driver_resources_allocated = Atomic64Fetch(&g->driver_resources_allocated); result.driver_descriptors_allocated = Atomic64Fetch(&g->driver_descriptors_allocated); return result; } //////////////////////////////////////////////////////////// //~ @hookimpl Swapchain GPU_SwapchainHandle GPU_AcquireSwapchain(u64 os_window_handle) { GPU_D12_Swapchain *swapchain = 0; { Arena *perm = PermArena(); swapchain = PushStruct(perm, GPU_D12_Swapchain); } swapchain->window_hwnd = (HWND)os_window_handle; return GPU_D12_MakeHandle(GPU_SwapchainHandle, swapchain); } void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle) { /* TODO */ } GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, GPU_Format format, Vec2I32 size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Swapchain *swapchain = GPU_D12_SwapchainFromHandle(swapchain_handle); size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); /* Initialize swapchain */ if (!swapchain->d3d_swapchain) { HRESULT hr = 0; /* Create d3d swapchain */ { IDXGISwapChain3 *swapchain3 = 0; { /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = 0; if (SUCCEEDED(hr)) { DXGI_SWAP_CHAIN_DESC1 desc = ZI; desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format); desc.Width = size.x; desc.Height = size.y; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferCount = GPU_D12_SwapchainBufferCount; desc.Scaling = DXGI_SCALING_NONE; desc.Flags = GPU_D12_SwapchainFlags; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)direct_queue->d3d_queue, swapchain->window_hwnd, &desc, 0, 0, &swapchain1); } /* Upgrade to swapchain3 */ if (SUCCEEDED(hr)) { hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain3); IDXGISwapChain1_Release(swapchain1); } } swapchain->d3d_swapchain = swapchain3; swapchain->backbuffers_format = format; swapchain->backbuffers_resolution = size; } /* Create waitable object */ { HANDLE waitable = 0; if (SUCCEEDED(hr) && GPU_D12_FrameLatency > 0) { hr = IDXGISwapChain3_SetMaximumFrameLatency(swapchain->d3d_swapchain, GPU_D12_FrameLatency); waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->d3d_swapchain); } swapchain->waitable = waitable; } /* Create present fence */ { HANDLE present_event = 0; ID3D12Fence *present_fence = 0; if (SUCCEEDED(hr)) { present_event = CreateEvent(0, 0, 0, 0); hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&present_fence); } swapchain->present_fence = present_fence; swapchain->present_event = present_event; } /* Disable Alt+Enter */ IDXGIFactory_MakeWindowAssociation(g->factory, swapchain->window_hwnd, DXGI_MWA_NO_ALT_ENTER); if (FAILED(hr)) { Panic(Lit("Failed to create swapchain")); } } /* Resize backbuffers */ if (!MatchVec2I32(swapchain->backbuffers_resolution, size) || swapchain->backbuffers_format != format) { HRESULT hr = 0; /* Wait for any previous backbuffer commands to finish */ { ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); WaitForSingleObject(swapchain->present_event, INFINITE); } /* Release backbuffers */ for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) { GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i]; if (backbuffer->d3d_resource) { ID3D12Resource_Release(backbuffer->d3d_resource); backbuffer->d3d_resource = 0; } } /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->d3d_swapchain, 0, size.x, size.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to resize swapchain")); } } /* Initialize backbuffers */ { for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) { GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i]; if (!backbuffer->d3d_resource) { ID3D12Resource *d3d_resource = 0; HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to get swapchain buffer")); } ZeroStruct(backbuffer); backbuffer->d3d_resource = d3d_resource; backbuffer->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; backbuffer->flags = GPU_ResourceFlag_AllowRenderTarget; backbuffer->is_texture = 1; backbuffer->texture_format = format; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mip_levels = 1; backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; } } swapchain->backbuffers_format = format; swapchain->backbuffers_resolution = size; } /* Wait for available backbuffer */ if (swapchain->waitable) { DWORD wait_result = WaitForSingleObject(swapchain->waitable, 500); if (wait_result == WAIT_TIMEOUT) { ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); WaitForSingleObject(swapchain->present_event, INFINITE); } } /* Grab current backbuffer */ GPU_D12_Resource *cur_backbuffer = 0; { u32 backbuffer_idx = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->d3d_swapchain); cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; } return GPU_D12_MakeHandle(GPU_ResourceHandle, cur_backbuffer); } void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync) { GPU_D12_Resource *backbuffer = GPU_D12_ResourceFromHandle(backbuffer_handle); GPU_D12_Swapchain *swapchain = backbuffer->swapchain; GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); u32 present_flags = 0; if (GPU_D12_TearingIsAllowed && vsync == 0) { present_flags |= DXGI_PRESENT_ALLOW_TEARING; } /* Present */ { HRESULT hr = IDXGISwapChain3_Present(swapchain->d3d_swapchain, vsync, present_flags); if (!SUCCEEDED(hr)) { Assert(0); } } if (vsync != 0 && !(present_flags & DXGI_PRESENT_ALLOW_TEARING)) { /* FIXME: Don't flush in fullscreen mode? */ // DwmFlush(); } /* Increment swapchain fence */ { u64 target = ++swapchain->present_fence_target; ID3D12CommandQueue_Signal(direct_queue->d3d_queue, swapchain->present_fence, target); } }