GPU_D12_SharedState GPU_D12_shared_state = ZI; //////////////////////////////////////////////////////////// //~ Startup void GPU_D12_Startup(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); Arena *perm = PermArena(); ////////////////////////////// //- Initialize device { HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if GPU_DEBUG { __profn("Enable debug layer"); ID3D12Debug *debug_controller0 = 0; { hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug0")); } ID3D12Debug_EnableDebugLayer(debug_controller0); #if GPU_DEBUG_VALIDATION { ID3D12Debug1 *debug_controller1 = 0; { hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug1")); } ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1); } ID3D12Debug_Release(debug_controller1); } #endif } ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ { __profn("Create factory"); hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); if (FAILED(hr)) { Panic(Lit("Failed to initialize DXGI factory")); } } /* Create device */ { __profn("Create device"); IDXGIAdapter3 *adapter = 0; ID3D12Device *device = 0; String error = Lit("Could not initialize GPU device."); String first_gpu_name = ZI; u32 adapter_index = 0; b32 skip = 0; /* For iGPU testing */ for (;;) { { hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); } if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter3_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { /* TODO: Verify feature support: * - HighestShaderModel >= D3D_SHADER_MODEL_6_6 * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 * - EnhancedBarriersSupported == 1 */ hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); } if (SUCCEEDED(hr) && !skip) { break; } skip = 0; ID3D12Device_Release(device); IDXGIAdapter3_Release(adapter); adapter = 0; device = 0; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { error = StringF(scratch.arena, "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", FmtString(first_gpu_name)); } Panic(error); } g->adapter = adapter; g->device = device; } /* Enable debug layer breaks */ { #if GPU_DEBUG /* Enable D3D12 Debug break */ { __profn("Enable d3d12 debug break"); ID3D12InfoQueue *info = 0; hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { Panic(Lit("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { __profn("Enable dxgi debug break"); IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { Panic(Lit("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); IDXGIInfoQueue_Release(dxgi_info); } #endif } } ////////////////////////////// //- Initialize command queues { GPU_D12_CommandQueueDesc descs[] = { { .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH }, { .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, { .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL }, }; for (u32 i = 0; i < MinU32(countof(descs), countof(g->queues)); ++i) { GPU_D12_CommandQueueDesc desc = descs[i]; D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority }; GPU_D12_Queue *queue = &g->queues[i]; queue->desc = desc; HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); if (SUCCEEDED(hr)) { hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence); } if (FAILED(hr)) { Panic(Lit("Failed to create GPU Command Queue")); } } } ////////////////////////////// //- Initialize descriptor heaps { Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; }; Dx12HeapDesc descs[] = { { .type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxCbvSrvUavDescriptors, }, { .type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE, .max = GPU_D12_MaxRtvDescriptors, }, { .type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, .flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, .max = GPU_D12_MaxSamplerDescriptors, }, }; for (u32 i = 0; i < countof(descs); ++i) { Dx12HeapDesc desc = descs[i]; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[i]; heap->descriptors_arena = AcquireArena(Gibi(1)); heap->type = desc.type; heap->max_count = desc.max; heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, desc.type); D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI; d3d_desc.Type = desc.type; d3d_desc.Flags = desc.flags; d3d_desc.NumDescriptors = desc.max; HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); if (FAILED(hr)) { Panic(Lit("Failed to create descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); } } ////////////////////////////// //- Initialize bindless root signature { HRESULT hr = 0; /* Serialize root signature */ ID3D10Blob *blob = 0; if (SUCCEEDED(hr)) { __profn("Serialize root signature"); D3D12_ROOT_PARAMETER param = ZI; param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; param.Constants.ShaderRegister = 0; param.Constants.RegisterSpace = 0; param.Constants.Num32BitValues = 64; D3D12_ROOT_SIGNATURE_DESC desc = ZI; desc.NumParameters = 1; desc.pParameters = ¶m; desc.NumStaticSamplers = 0; desc.pStaticSamplers = 0; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); } /* Create root signature */ ID3D12RootSignature *rootsig = 0; if (SUCCEEDED(hr)) { __profn("Create root signature"); hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); } g->bindless_rootsig = rootsig; if (blob) { ID3D10Blob_Release(blob); } if (FAILED(hr)) { Panic(Lit("Failed to create root signature")); } } ////////////////////////////// //- Initialize queue sync worker // JobPoolId sync_pool = InitJobPool(1, Lit("Dx12 queue sync"), JobPoolPriority_Critical); // RunJob(GPU_D12_StartQueueSync, .pool = sync_pool); EndScratch(scratch); } //////////////////////////////////////////////////////////// //~ Helpers GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle) { return (GPU_D12_Arena *)handle.v; } GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle) { return (GPU_D12_CmdList *)handle.v; } GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle) { return (GPU_D12_Resource *)handle.v; } GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle) { return (GPU_D12_Swapchain *)handle.v; } DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) { return (DXGI_FORMAT)format; } D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStageFlags(GPU_StageFlag flags) { D3D12_BARRIER_SYNC result = 0; if (flags == GPU_StageFlag_AllStages) { result = D3D12_BARRIER_SYNC_ALL; } else { result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(flags, GPU_StageFlag_ComputeShading); result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(flags, GPU_StageFlag_IndexAssembly); result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(flags, GPU_StageFlag_VertexShading); result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(flags, GPU_StageFlag_PixelShading); result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(flags, GPU_StageFlag_DepthStencil); result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(flags, GPU_StageFlag_RenderTarget); result |= D3D12_BARRIER_SYNC_COPY * AnyBit(flags, GPU_StageFlag_Copy); result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(flags, GPU_StageFlag_Indirect); } return result; } D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccessFlags(GPU_AccessFlag flags) { D3D12_BARRIER_ACCESS result = 0; if (flags == 0) { result = D3D12_BARRIER_ACCESS_NO_ACCESS; } else if (flags == GPU_AccessFlag_AllAccess) { result = D3D12_BARRIER_ACCESS_COMMON; } else { result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(flags, GPU_AccessFlag_ShaderReadWrite); result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(flags, GPU_AccessFlag_ShaderRead); result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(flags, GPU_AccessFlag_CopyWrite); result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(flags, GPU_AccessFlag_CopyRead); result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(flags, GPU_AccessFlag_IndexBuffer); result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(flags, GPU_AccessFlag_IndirectArgument); result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(flags, GPU_AccessFlag_DepthStencilRead); result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(flags, GPU_AccessFlag_DepthStencilWrite); result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(flags, GPU_AccessFlag_RenderTargetWrite); } return result; } D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayoutKind(GPU_LayoutKind kind) { PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { [GPU_LayoutKind_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, [GPU_LayoutKind_Present] = D3D12_BARRIER_LAYOUT_COMMON, [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMMON, [GPU_LayoutKind_AnyQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, [GPU_LayoutKind_AnyQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ, [GPU_LayoutKind_AnyQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE, [GPU_LayoutKind_AnyQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE, [GPU_LayoutKind_DirectQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, [GPU_LayoutKind_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS, [GPU_LayoutKind_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, [GPU_LayoutKind_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE, [GPU_LayoutKind_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE, [GPU_LayoutKind_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, [GPU_LayoutKind_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ, [GPU_LayoutKind_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, [GPU_LayoutKind_ComputeQueue_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, [GPU_LayoutKind_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS, [GPU_LayoutKind_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ, [GPU_LayoutKind_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE, [GPU_LayoutKind_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE, }; return translate[kind]; }; //////////////////////////////////////////////////////////// //~ Pipeline JobImpl(GPU_D12_LoadPipeline, sig, _) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Pipeline *pipeline = sig->pipeline; GPU_D12_PipelineDesc desc = pipeline->desc; HRESULT hr = 0; b32 ok = 1; String error_str = ZI; /* Create PSO */ ID3D12PipelineState *pso = 0; if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource))) { D3D12_RASTERIZER_DESC raster_desc = ZI; if (desc.is_wireframe) { raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME; } else { raster_desc.FillMode = D3D12_FILL_MODE_SOLID; } raster_desc.CullMode = D3D12_CULL_MODE_NONE; raster_desc.FrontCounterClockwise = 0; raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; raster_desc.DepthClipEnable = 1; raster_desc.MultisampleEnable = 0; raster_desc.AntialiasedLineEnable = 0; raster_desc.ForcedSampleCount = 0; raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; D3D12_BLEND_DESC blend_desc = ZI; blend_desc.AlphaToCoverageEnable = 0; blend_desc.IndependentBlendEnable = 0; blend_desc.RenderTarget[0].BlendEnable = 1; blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; ds_desc.DepthEnable = 0; ds_desc.StencilEnable = 0; String vs = DataFromResource(desc.vs.resource); String ps = DataFromResource(desc.ps.resource); D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.VS.pShaderBytecode = vs.text; pso_desc.VS.BytecodeLength = vs.len; pso_desc.PS.pShaderBytecode = ps.text; pso_desc.PS.BytecodeLength = ps.len; pso_desc.RasterizerState = raster_desc; pso_desc.BlendState = blend_desc; pso_desc.DepthStencilState = ds_desc; pso_desc.PrimitiveTopologyType = desc.topology_type; pso_desc.SampleMask = UINT_MAX; pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) { StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); if (format != DXGI_FORMAT_UNKNOWN) { pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; } else { break; } } hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create pipeline state object"); ok = 0; } } else if (ok) { String cs = DataFromResource(desc.cs.resource); D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.CS.pShaderBytecode = cs.text; pso_desc.CS.BytecodeLength = cs.len; hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create pipeline state object"); ok = 0; } } pipeline->pso = pso; pipeline->error = error_str; pipeline->ok = ok; } GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc))); GPU_D12_Pipeline *pipeline = 0; b32 is_pipeline_new = 0; GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)]; { { Lock lock = LockS(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } Unlock(&lock); } if (!pipeline) { Lock lock = LockE(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } if (!pipeline) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); pipeline = PushStruct(perm, GPU_D12_Pipeline); pipeline->desc = desc; pipeline->hash = hash; is_pipeline_new = 1; PushAlign(perm, CachelineSize); SllStackPushN(bin->first, pipeline, next_in_bin); } Unlock(&lock); } } if (is_pipeline_new) { RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline); } YieldOnFence(&pipeline->ready_fence, 1); return pipeline; } //////////////////////////////////////////////////////////// //~ Queue GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; return &g->queues[kind]; } //////////////////////////////////////////////////////////// //~ Descriptor // GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap) // { // GPU_D12_Descriptor *d = 0; // u32 index = 0; // D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; // { // Lock lock = LockE(&heap->mutex); // if (heap->first_free) // { // d = heap->first_free; // heap->first_free = d->next_free; // handle = d->handle; // index = d->index; // } // else // { // if (heap->allocated_count >= heap->max_count) // { // Panic(Lit("Max descriptors reached in heap")); // } // d = PushStructNoZero(heap->arena, GPU_D12_Descriptor); // index = heap->allocated_count++; // handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); // Atomic64FetchAdd(&GPU_D12_shared_state.driver_descriptors_allocated, 1); // } // Unlock(&lock); // } // ZeroStruct(d); // d->valid = 1; // d->heap = heap; // d->handle = handle; // d->index = index; // return d; // } // void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor) // { // GPU_D12_DescriptorHeap *heap = descriptor->heap; // Lock lock = LockE(&heap->mutex); // { // descriptor->next_free = heap->first_free; // heap->first_free = descriptor; // } // Unlock(&lock); // } // GPU_D12_Descriptor *GPU_D12_DescriptorFromRtPtr(RenderTargetGpuPtr ptr) // { // /* TODO */ // return 0; // } // D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromIbPtr(IndexBufferGpuPtr ptr) // { // /* TODO */ // D3D12_INDEX_BUFFER_VIEW result = ZI; // return result; // } //////////////////////////////////////////////////////////// //~ Raw command list GPU_D12_RawCommandList *GPU_D12_PrepareRawCommandList(GPU_QueueKind queue_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); /* Pull first completed command list from queue if ready */ GPU_D12_RawCommandList *cl = ZI; { Lock lock = LockE(&queue->commit_mutex); { u64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); cl = queue->first_committed_cl; if (cl && cl->commit_fence_target <= completed) { SllQueuePop(queue->first_committed_cl, queue->last_committed_cl); } else { cl = 0; } } Unlock(&lock); } /* Allocate new command list if none are available */ if (!cl) { Arena *perm = PermArena(); { PushAlign(perm, CachelineSize); cl = PushStruct(perm, GPU_D12_RawCommandList); PushAlign(perm, CachelineSize); } cl->queue = queue; HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); if (FAILED(hr)) { Panic(Lit("Failed to create command allocator")); } hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to close command list during initialization")); } } /* Reset command list */ { HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca); if (FAILED(hr)) { Panic(Lit("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); if (FAILED(hr)) { Panic(Lit("Failed to reset command list")); } } return cl; } void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) { GPU_D12_Queue *queue = cl->queue; /* Close */ { __profn("Close DX12 command list"); HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to close command list before execution")); } } /* Commit */ { __profn("Commit"); Lock lock = LockE(&queue->commit_mutex); { u64 target = ++queue->commit_fence_target; cl->commit_fence_target = target; /* Execute */ ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl); ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, target); /* Append */ SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl); } Unlock(&lock); } } //////////////////////////////////////////////////////////// //~ Queue sync job // JobImpl(GPU_D12_StartQueueSync, _, __) // { // GPU_D12_SharedState *g = &GPU_D12_shared_state; // HANDLE queue_fences_events[GPU_NumQueues] = ZI; // i64 queue_fences_seen[GPU_NumQueues] = ZI; // for (i32 i = 0; i < countof(queue_fences_events); ++i) // { // queue_fences_events[i] = CreateEvent(0, 0, 1, 0); // queue_fences_seen[i] = -1; // } // for (;;) // { // WaitForMultipleObjects(countof(queue_fences_events), queue_fences_events, 0, INFINITE); // for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind) // { // GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); // i64 last_seen = queue_fences_seen[queue_kind]; // i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); // if (completed > last_seen) // { // SetFence(&queue->sync_fence, completed); // queue_fences_seen[queue_kind] = completed; // ID3D12Fence_SetEventOnCompletion(queue->commit_fence, completed + 1, queue_fences_events[queue_kind]); // } // } // } // } //////////////////////////////////////////////////////////// //~ @hookimpl Startup hook void GPU_Startup(void) { GPU_D12_Startup(); } //////////////////////////////////////////////////////////// //~ @hookimpl Fence hooks Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); return &queue->sync_fence; } void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) { GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); ID3D12Fence *b_fence = queue_b->commit_fence; ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); } //////////////////////////////////////////////////////////// //~ @hookimpl Resource hooks #if 0 GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = 0; if (desc.kind == GPU_ResourceKind_Unknown) { Panic(Lit("Unknown gpu resource type")); } u64 buffer_size = 0; if (desc.kind == GPU_ResourceKind_Buffer) { desc.buffer.stride = MaxU32(desc.buffer.stride, 1); buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); } u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); /* Grab reusable */ { u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; { Lock lock = LockE(&bin->mutex); { GPU_D12_ResourceReuseList *list = bin->first; for (; list; list = list->next) { if (list->hash == reuse_hash) break; } if (list) { r = list->first; list->first = r->next_free; if (!list->first) { DllQueueRemove(bin->first, bin->last, list); SllStackPush(bin->first_free, list); list->prev = 0; } r->next_free = 0; } } Unlock(&lock); } } /* Grab from free list */ if (!r) { { Lock lock = LockE(&g->free_resources_mutex); r = g->first_free_resource; if (r) { g->first_free_resource = r->next_free; } Unlock(&lock); } if (r) { ZeroStruct(r); } } /* Push new */ if (!r) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); r = PushStruct(perm, GPU_D12_Resource); PushAlign(perm, CachelineSize); } /* Create d3d resource */ if (!r->d3d_resource) { switch (desc.kind) { default: break; /* Buffer */ case GPU_ResourceKind_Buffer: { D3D12_HEAP_FLAGS heap_flags = 0; if (!(desc.flags & GPU_ResourceFlag_Zeroed)) { heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; } D3D12_HEAP_PROPERTIES heap_props = { .Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT }; Assert(!(desc.flags & GPU_ResourceFlag_Rasterizable)); D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Alignment = 0; d3d_desc.Width = buffer_size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable); r->state = desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_RESOURCE_STATE_COPY_DEST : D3D12_RESOURCE_STATE_COMMON; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource); Atomic64FetchAdd(&g->driver_resources_allocated, 1); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create buffer resource")); } r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource); } break; /* Texture */ case GPU_ResourceKind_Texture1D: case GPU_ResourceKind_Texture2D: case GPU_ResourceKind_Texture3D: { D3D12_HEAP_FLAGS heap_flags = 0; if (!(desc.flags & GPU_ResourceFlag_Zeroed)) { heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; } D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : D3D12_RESOURCE_DIMENSION_TEXTURE3D; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.texture.format); d3d_desc.Alignment = 0; d3d_desc.Width = MaxI32(desc.texture.size.x, 1); d3d_desc.Height = MaxI32(desc.texture.size.y, 1); d3d_desc.DepthOrArraySize = MaxI32(desc.texture.size.z, 1); d3d_desc.MipLevels = (desc.flags & GPU_ResourceFlag_MaxMipLevels) ? 0 : MaxI32(desc.texture.mip_levels, 1); d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Rasterizable); r->state = D3D12_RESOURCE_STATE_COMMON; D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; clear_value.Color[0] = desc.clear_color.x; clear_value.Color[1] = desc.clear_color.y; clear_value.Color[2] = desc.clear_color.z; clear_value.Color[3] = desc.clear_color.w; D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource); Atomic64FetchAdd(&g->driver_resources_allocated, 1); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create buffer resource")); } } break; } } r->srv_descriptor = &GPU_D12_NilDescriptor; r->uav_descriptor = &GPU_D12_NilDescriptor; r->rtv_descriptor = &GPU_D12_NilDescriptor; r->sampler_descriptor = &GPU_D12_NilDescriptor; /* Create texture srv descriptor */ if (desc.kind == GPU_ResourceKind_Texture1D || desc.kind == GPU_ResourceKind_Texture2D || desc.kind == GPU_ResourceKind_Texture3D) { if (!r->srv_descriptor->valid) { r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle); } /* Create buffer srv descriptor */ if (desc.kind == GPU_ResourceKind_Buffer && desc.buffer.heap_kind != GPU_HeapKind_Download && desc.buffer.count > 0) { if (!r->srv_descriptor->valid) { r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; srv_desc.Format = DXGI_FORMAT_UNKNOWN; srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv_desc.Buffer.FirstElement = 0; srv_desc.Buffer.NumElements = desc.buffer.count; srv_desc.Buffer.StructureByteStride = desc.buffer.stride; srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle); } /* Create uav descriptor */ if (desc.flags & GPU_ResourceFlag_Writable) { if (!r->uav_descriptor->valid) { r->uav_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle); } /* Create rtv descriptor */ if (desc.flags & GPU_ResourceFlag_Rasterizable) { if (!r->rtv_descriptor->valid) { r->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap); } ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle); } /* Create sampler descriptor */ if (desc.kind == GPU_ResourceKind_Sampler) { if (!r->sampler_descriptor->valid) { r->sampler_descriptor = GPU_D12_AcquireDescriptor(g->sampler_heap); } D3D12_SAMPLER_DESC d3d_desc = ZI; d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter; d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x; d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y; d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z; d3d_desc.MipLODBias = desc.sampler.mip_lod_bias; d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1); d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison; d3d_desc.BorderColor[0] = desc.sampler.border_color.x; d3d_desc.BorderColor[1] = desc.sampler.border_color.y; d3d_desc.BorderColor[2] = desc.sampler.border_color.z; d3d_desc.BorderColor[3] = desc.sampler.border_color.w; d3d_desc.MinLOD = desc.sampler.min_lod; d3d_desc.MaxLOD = desc.sampler.max_lod; /* Defaults */ if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; if (d3d_desc.MaxLOD >= F32Infinity) { d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; } ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle); } r->desc = desc; r->buffer_size = buffer_size; return (GPU_Resource *)r; } void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; if (r->srv_descriptor->valid) { GPU_D12_ReleaseDescriptor(r->srv_descriptor); } if (r->uav_descriptor->valid) { GPU_D12_ReleaseDescriptor(r->uav_descriptor); } if (r->rtv_descriptor->valid) { GPU_D12_ReleaseDescriptor(r->rtv_descriptor); } if (r->sampler_descriptor->valid) { GPU_D12_ReleaseDescriptor(r->sampler_descriptor); } if (flags & GPU_ReleaseFlag_Reuse) { GPU_ResourceDesc desc = r->desc; u64 buffer_size = r->buffer_size; u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; { Lock lock = LockE(&bin->mutex); { GPU_D12_ResourceReuseList *list = bin->first; for (; list; list = list->next) { if (list->hash == reuse_hash) break; } if (!list) { list = bin->first_free; if (list) { bin->first_free = list->next; } else { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); list = PushStruct(perm, GPU_D12_ResourceReuseList); PushAlign(perm, CachelineSize); } list->hash = reuse_hash; DllQueuePush(bin->first, bin->last, list); } SllStackPushN(list->first, r, next_free); } Unlock(&lock); } } else { switch (r->desc.kind) { default: break; case GPU_ResourceKind_Buffer: case GPU_ResourceKind_Texture1D: case GPU_ResourceKind_Texture2D: case GPU_ResourceKind_Texture3D: { ID3D12Resource_Release(r->d3d_resource); Atomic64FetchAdd(&g->driver_resources_allocated, -1); } break; } Lock lock = LockE(&g->free_resources_mutex); r->next_free = g->first_free_resource; g->first_free_resource = r; Unlock(&lock); } } Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource) { GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; return VEC2I32(resource->desc.texture.size.x, resource->desc.texture.size.y); } Vec3I32 GPU_GetTextureSize3D(GPU_Resource *gpu_resource) { GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; return resource->desc.texture.size; } u64 GPU_GetFootprintSize(GPU_Resource *gpu_resource) { GPU_D12_SharedState *g = &GPU_D12_shared_state; D3D12_RESOURCE_DESC desc = ZI; D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; ID3D12Resource_GetDesc(((GPU_D12_Resource *)gpu_resource)->d3d_resource, &desc); u64 footprint_size = 0; u64 upload_row_size = 0; u32 upload_num_rows = 0; ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &footprint_size); return footprint_size; } u64 GPU_GetBufferCount(GPU_Resource *gpu_resource) { GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; return resource->desc.buffer.count; } #endif //////////////////////////////////////////////////////////// //~ @hookimpl Arena GPU_ArenaHandle GPU_AcquireArena(void) { GPU_D12_Arena *gpu_arena = 0; { Arena *perm = PermArena(); gpu_arena = PushStruct(perm, GPU_D12_Arena); } return (GPU_ArenaHandle) { .v = (u64)gpu_arena }; } void GPU_ReleaseArena(GPU_ArenaHandle arena) { /* TODO */ } //////////////////////////////////////////////////////////// //~ Resource helpers GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Resource *resource, GPU_D12_DescriptorHeapKind heap_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; GPU_D12_Descriptor *descriptor = 0; /* Grab completed descriptor from arena */ if (!descriptor) { GPU_D12_DescriptorList *descriptors_by_queue = gpu_arena->committed_descriptors_by_heap_and_queue[heap_kind]; for (GPU_QueueKind queue_kind = 0; !descriptor && queue_kind < GPU_NumQueues; ++queue_kind) { GPU_D12_DescriptorList *descriptors = &descriptors_by_queue[queue_kind]; descriptor = descriptors->first; if (descriptor) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); u64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence); if (queue_commit_completion >= descriptor->queue_commit_target) { /* Descriptor no longer in use by gpu, reuse it */ SllQueuePop(descriptors->first, descriptors->last); } else { /* Descriptor may still be in use by gpu */ descriptor = 0; } } } } /* Allocate new descriptor from heap */ u32 index = 0; if (!descriptor) { Lock lock = LockE(&heap->mutex); { if (heap->first_free) { descriptor = heap->first_free; SllStackPop(heap->first_free); index = descriptor->index; } else { descriptor = PushStructNoZero(heap->descriptors_arena, GPU_D12_Descriptor); index = heap->allocated_count++; if (index >= heap->max_count) { Panic(Lit("Max descriptors reached in heap")); } } } Unlock(&lock); } /* Initialize descriptor handle */ ZeroStruct(descriptor); descriptor->heap = heap; descriptor->resource = resource; descriptor->index = index; descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); return descriptor; } GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; GPU_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor); return &descriptors[index]; } D3D12_INDEX_BUFFER_VIEW GPU_D12_IbvFromIbPtr(IndexBufferGpuPtr ptr) { /* TODO */ D3D12_INDEX_BUFFER_VIEW result = ZI; return result; } //////////////////////////////////////////////////////////// //~ @hookimpl Resource //- Resource creation GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc) { /* TODO */ return (GPU_ResourceHandle) { 0 }; } GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc) { /* TODO */ return (GPU_ResourceHandle) { 0 }; } GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc) { /* TODO */ return (GPU_ResourceHandle) { 0 }; } //- Pointer creation BufferGpuPtr GPU_PushBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) { /* TODO */ return (BufferGpuPtr) { 0 }; } RWBufferGpuPtr GPU_PushRWBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) { /* TODO */ return (RWBufferGpuPtr) { 0 }; } IndexBufferGpuPtr GPU_PushIndexBufferPtrEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) { /* TODO */ return (IndexBufferGpuPtr) { 0 }; } Texture1DGpuPtr GPU_PushTexture1DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (Texture1DGpuPtr) { 0 }; } RWTexture1DGpuPtr GPU_PushRWTexture1DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (RWTexture1DGpuPtr) { 0 }; } Texture2DGpuPtr GPU_PushTexture2DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (Texture2DGpuPtr) { 0 }; } RWTexture2DGpuPtr GPU_PushRWTexture2DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (RWTexture2DGpuPtr) { 0 }; } Texture3DGpuPtr GPU_PushTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (Texture3DGpuPtr) { 0 }; } RWTexture3DGpuPtr GPU_PushRWTexture3DPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (RWTexture3DGpuPtr) { 0 }; } RenderTargetGpuPtr GPU_PushRenderTargetPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* Allocate descriptor */ GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Arena *arena = GPU_D12_ArenaFromHandle(arena_handle); GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); GPU_D12_Descriptor *rtv_descriptor = GPU_D12_PushDescriptor(arena, resource, GPU_D12_DescriptorHeapKind_Rtv); /* Initialize descriptor */ ID3D12Device_CreateRenderTargetView(g->device, resource->d3d_resource, 0, rtv_descriptor->handle); /* TODO */ return (RenderTargetGpuPtr) { .v = rtv_descriptor->index }; } SamplerGpuPtr GPU_PushSamplerPtr(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) { /* TODO */ return (SamplerGpuPtr) { 0 }; } //- Count u64 GPU_CountBufferEx(GPU_ResourceHandle buffer, u64 element_size) { /* TODO */ return 0; } u64 GPU_Count1D(GPU_ResourceHandle texture1d) { /* TODO */ return 0; } u64 GPU_Count2D(GPU_ResourceHandle texture2d) { /* TODO */ return 0; } u64 GPU_Count3D(GPU_ResourceHandle texture3d) { /* TODO */ return 0; } //////////////////////////////////////////////////////////// //~ Command helpers GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl) { GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Grab chunk */ GPU_D12_CmdChunk *chunk = cl->last_cmd_chunk; { if (chunk && chunk->cmds_count >= GPU_D12_CmdsPerChunk) { chunk = 0; } if (!chunk) { Lock lock = LockE(&g->free_cmd_chunks_mutex); { chunk = g->first_free_cmd_chunk; if (chunk) { g->first_free_cmd_chunk = chunk->next; } } Unlock(&lock); if (chunk) { GPU_D12_Cmd *cmds = chunk->cmds; ZeroStruct(chunk); chunk->cmds = cmds; } } if (!chunk) { Arena *perm = PermArena(); chunk = PushStruct(perm, GPU_D12_CmdChunk); chunk->cmds = PushStructsNoZero(perm, GPU_D12_Cmd, GPU_D12_CmdsPerChunk); } if (chunk != cl->last_cmd_chunk) { SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); } } /* Push cmd to chunk */ GPU_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; ++cl->cmds_count; return cmd; } //////////////////////////////////////////////////////////// //~ @hookimpl Command //- Command list GPU_CommandListHandle GPU_PrepareCommandList(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = 0; Lock lock = LockE(&g->free_cmd_lists_mutex); { cl = g->first_free_cmd_list; if (cl) { g->first_free_cmd_list = cl->next; ZeroStruct(cl); } else { Arena *perm = PermArena(); cl = PushStruct(perm, GPU_D12_CmdList); } } Unlock(&lock); return (GPU_CommandListHandle) { .v = (u64)cl }; } void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_PrepareRawCommandList(queue_kind); ID3D12GraphicsCommandList7 *rcl = dx12_cl->cl; /* Pipeline state */ b32 graphics_rootsig_set = 0; b32 compute_rootsig_set = 0; b32 descriptor_heaps_set = 0; GPU_D12_Pipeline *bound_pipeline = 0; /* Rasterizer state */ D3D12_VIEWPORT bound_viewport = ZI; D3D12_RECT bound_scissor = ZI; D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRenderTargets] = ZI; /* Flatten command chunks */ u64 cmds_count = 0; GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cl->cmds_count); { /* Flatten command chunks */ { for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) { for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) { cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; } } } /* Free command chunks */ { Lock lock = LockE(&g->free_cmd_chunks_mutex); { for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) { chunk->next = g->first_free_cmd_chunk; g->first_free_cmd_chunk = chunk; } } Unlock(&lock); } } /* Batch barrier cmds */ { u64 cmd_idx = 0; u64 batch_gen = 0; GPU_D12_Cmd *prev_barrier_cmd = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; switch (cmd->kind) { /* Non-batch-interrupting cmds */ case GPU_D12_CmdKind_Constant: { cmd_idx += 1; } break; /* Batch-interrupting cmds */ case GPU_D12_CmdKind_Copy: case GPU_D12_CmdKind_Compute: case GPU_D12_CmdKind_Rasterize: case GPU_D12_CmdKind_ClearRtv: { cmd_idx += 1; batch_gen += 1; } break; case GPU_D12_CmdKind_Barrier: { /* Determine 'before' state from lookup */ if (prev_barrier_cmd) { if (prev_barrier_cmd->barrier.batch_gen != batch_gen) { /* This barrier is part of new batch */ prev_barrier_cmd->barrier.is_end_of_batch = 1; } else { /* Barriers can be batched */ prev_barrier_cmd->skip = 1; } } cmd->barrier.batch_gen = batch_gen; prev_barrier_cmd = cmd; cmd_idx += 1; } break; } } if (prev_barrier_cmd) { prev_barrier_cmd->barrier.is_end_of_batch = 1; } } /* Process gpu commands into dx12 commands */ { u64 batch_barrier_idx_start = 0; u64 batch_barrier_idx_opl = 0; /* One past last */ u64 cmd_idx = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; if (cmd->skip) { cmd_idx += 1; } else { switch (cmd->kind) { default: { cmd_idx += 1; } break; //- Access case GPU_D12_CmdKind_Barrier: { batch_barrier_idx_opl = cmd_idx + 1; /* Submit batched barriers */ if (cmd->barrier.is_end_of_batch) { /* Build barriers */ u64 buffer_barriers_count = 0; u64 texture_barriers_count = 0; u64 global_barriers_count = 0; D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start)); for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) { GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier && !barrier_cmd->skip) { GPU_BarrierDesc desc = barrier_cmd->barrier.desc; GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource); D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; /* Translate gpu barrier kind -> d3d barrier fields */ D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStageFlags(desc.sync_prev); D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStageFlags(desc.sync_next); D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccessFlags(desc.access_prev); D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccessFlags(desc.access_next); /* Build barrier */ switch (barrier_type) { case D3D12_BARRIER_TYPE_BUFFER: { D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; barrier->pResource = resource->d3d_resource; barrier->Offset = 0; barrier->Size = U64Max; } break; case D3D12_BARRIER_TYPE_TEXTURE: { D3D12_BARRIER_LAYOUT layout_after = 0; if (desc.layout == GPU_LayoutKind_NoChange) { layout_after = resource->texture_layout; } else { layout_after = GPU_D12_BarrierLayoutFromLayoutKind(desc.layout); } D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; barrier->LayoutBefore = resource->texture_layout; barrier->LayoutAfter = layout_after; barrier->pResource = resource->d3d_resource; barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; resource->texture_layout = layout_after; } break; case D3D12_BARRIER_TYPE_GLOBAL: { D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; barrier->SyncBefore = sync_before; barrier->SyncAfter = sync_after; barrier->AccessBefore = access_before; barrier->AccessAfter = access_after; } break; } } } /* Dispatch barriers */ { u32 barrier_groups_count = 0; D3D12_BARRIER_GROUP barrier_groups[3] = ZI; if (buffer_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_BUFFER; group->NumBarriers = buffer_barriers_count; group->pBufferBarriers = buffer_barriers; } if (texture_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_TEXTURE; group->NumBarriers = texture_barriers_count; group->pTextureBarriers = texture_barriers; } if (global_barriers_count > 0) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_GLOBAL; group->NumBarriers = global_barriers_count; group->pGlobalBarriers = global_barriers; } if (barrier_groups_count > 0) { ID3D12GraphicsCommandList7_Barrier(rcl, barrier_groups_count, barrier_groups); } } batch_barrier_idx_start = cmd_idx + 1; } cmd_idx += 1; } break; //- Copy resource // case GPU_D12_CmdKind_Copy: // { // GPU_D12_Resource *dst = cmd->copy.dst; // GPU_D12_Resource *src = cmd->copy.src; // D3D12_RESOURCE_DESC dst_desc = ZI; // D3D12_RESOURCE_DESC src_desc = ZI; // ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); // ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); // if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) // { /* Copy buffer -> buffer */ // u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; // u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; // u64 cpy_len = MinU64(dst_len, src_len); // if (cpy_len > 0) // { // ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); // /* Implicit promotion */ // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; // } // } // else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) // { /* Copy buffer -> texture */ // D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; // ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); // D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; // dst_loc.pResource = dst->d3d_resource; // dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; // dst_loc.SubresourceIndex = 0; // D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; // src_loc.pResource = src->d3d_resource; // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; // src_loc.PlacedFootprint = dst_placed_footprint; // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); // /* Implicit promotion */ // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; // } // else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) // { /* Copy texture -> buffer */ // /* TODO */ // Assert(0); // } // else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) // { /* Copy texture -> texture */ // /* TODO */ // Assert(0); // } // cmd_idx += 1; // } break; //- Compute case GPU_D12_CmdKind_Compute: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.cs = cmd->compute.cs; pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } if (pipeline) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!compute_rootsig_set) { ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); compute_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); bound_pipeline = pipeline; } /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); } cmd_idx += 1; } break; //- Rasterize case GPU_D12_CmdKind_Rasterize: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.vs = cmd->rasterize.vs; pipeline_desc.ps = cmd->rasterize.ps; { pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; } } if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip) { pipeline_desc.is_wireframe = 1; } for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) { GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; if (rtv_descriptor != 0) { pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_format; } else { pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; } } pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } /* Calculate IBV count */ u32 indices_count = 0; D3D12_INDEX_BUFFER_VIEW ibv = cmd->rasterize.ibv; if (ibv.Format == DXGI_FORMAT_R16_UINT) { indices_count = ibv.SizeInBytes / 2; } else if (ibv.Format == DXGI_FORMAT_R32_UINT) { indices_count = ibv.SizeInBytes / 4; } /* Prepare & dispatch */ if (pipeline && indices_count > 0) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, }; ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!graphics_rootsig_set) { ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); graphics_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); bound_pipeline = pipeline; } // /* Fill signature */ // /* TODO: Only upload dirty */ // { // u32 sig_size = cmd->rasterize.sig_size; // void *sig = cmd->rasterize.sig; // u32 num32bit = sig_size / 4; // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); // } /* Set viewport */ { D3D12_VIEWPORT viewport = ZI; { Rng3 range = cmd->rasterize.viewport; viewport.TopLeftX = range.p0.x; viewport.TopLeftY = range.p0.y; viewport.Width = range.p1.x - range.p0.x; viewport.Height = range.p1.y - range.p0.y; viewport.MinDepth = range.p0.z; viewport.MaxDepth = range.p1.z; } if (!MatchStruct(&viewport, &bound_viewport)) { bound_viewport = viewport; ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); } } /* Set scissor */ { D3D12_RECT scissor = ZI; { Rng2 range = cmd->rasterize.scissor; scissor.left = range.p0.x; scissor.top = range.p0.y; scissor.right = range.p1.x; scissor.bottom = range.p1.y; } if (!MatchStruct(&scissor, &bound_scissor)) { bound_scissor = scissor; ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); } } /* Set topology */ { D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; } if (topology != bound_primitive_topology) { ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); } } /* Set index buffer */ if (!MatchStruct(&ibv, &bound_ibv)) { ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); bound_ibv = ibv; } /* Bind render targets */ { b32 om_dirty = 0; u32 rtvs_count = 0; D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_raster_targets)] = ZI; for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) { GPU_D12_Descriptor *rtv_desc = cmd->rasterize.rtv_descriptors[i]; if (rtv_desc != 0) { om_dirty = om_dirty || (bound_raster_targets[i].ptr != rtv_desc->handle.ptr); rtvs[rtvs_count++] = rtv_desc->handle; } else { break; } } if (om_dirty) { CopyStructs(bound_raster_targets, rtvs, rtvs_count); ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, rtvs_count, rtvs, 0, 0); } } /* Dispatch */ ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); } cmd_idx += 1; } break; //- Clear rtv case GPU_D12_CmdKind_ClearRtv: { GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor; GPU_D12_Resource *resource = descriptor->resource; f32 clear_color[4] = ZI; clear_color[0] = cmd->clear_rtv.color.x; clear_color[1] = cmd->clear_rtv.color.y; clear_color[2] = cmd->clear_rtv.color.z; clear_color[3] = cmd->clear_rtv.color.w; ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, descriptor->handle, clear_color, 0, 0); cmd_idx += 1; } break; } } } } /* End dx12 command list */ GPU_D12_CommitRawCommandList(dx12_cl); /* Free command list */ { Lock lock = LockE(&g->free_cmd_lists_mutex); { cl->next = g->first_free_cmd_list; g->first_free_cmd_list = cl; } Unlock(&lock); } EndScratch(scratch); } //- Arena void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena) { /* TODO */ } //- Cpu -> Gpu copy void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range) { /* TODO */ } void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range) { /* TODO */ // GPU_D12_SharedState *g = &GPU_D12_shared_state; // D3D12_RESOURCE_DESC desc = ZI; // ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); // u64 upload_size = 0; // u64 upload_row_size = 0; // u32 upload_num_rows = 0; // D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; // ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); // D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; // { // D3D12_RANGE read_range = ZI; // u8 *dst_base = (u8 *)dst + placed_footprint.Offset; // u8 *src_base = src; // u32 z_size = upload_row_size * upload_num_rows; // b32 src_overflow = 0; // for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) // { // u32 z_offset = z * z_size; // for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) // { // u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; // u8 *src_row = src_base + y * upload_row_size + z_offset; // CopyBytes(dst_row, src_row, upload_row_size); // } // } // } // GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle); // D3D12_RESOURCE_DESC desc = ZI; // ID3D12Resource_GetDesc(dst->d3d_resource, &desc); // u64 upload_size = 0; // u64 upload_row_size = 0; // u32 upload_num_rows = 0; // D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; // ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, dst->texture_mip_levels, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); // D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; // void *copy_start = ((u8 *)src) + src_copy_range.min; // u64 copy_len = src_copy_range.max - src_copy_range.min; } //- Gpu <-> Gpu copy void GPU_CopyBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, GPU_ResourceHandle src, RngU64 src_copy_range) { /* TODO */ } void GPU_CopyTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst, Vec3I32 dst_offset, GPU_ResourceHandle src, Rng3I32 src_copy_range) { /* TODO */ } //- Constants void GPU_SetConstU32(GPU_CommandListHandle cl_handle, i32 slot, u32 v) { /* TODO */ } void GPU_SetConstF32(GPU_CommandListHandle cl_handle, i32 slot, f32 v) { /* TODO */ } void GPU_SetConstBuffer(GPU_CommandListHandle cl_handle, i32 slot, BufferGpuPtr v) { /* TODO */ } void GPU_SetConstRWBuffer(GPU_CommandListHandle cl_handle, i32 slot, RWBufferGpuPtr v) { /* TODO */ } void GPU_SetConstTexture1D(GPU_CommandListHandle cl_handle, i32 slot, Texture1DGpuPtr v) { /* TODO */ } void GPU_SetConstRWTexture1D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture1DGpuPtr v) { /* TODO */ } void GPU_SetConstTexture2D(GPU_CommandListHandle cl_handle, i32 slot, Texture2DGpuPtr v) { /* TODO */ } void GPU_SetConstRWTexture2D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture2DGpuPtr v) { /* TODO */ } void GPU_SetConstTexture3D(GPU_CommandListHandle cl_handle, i32 slot, Texture3DGpuPtr v) { /* TODO */ } void GPU_SetConstRWTexture3D(GPU_CommandListHandle cl_handle, i32 slot, RWTexture3DGpuPtr v) { /* TODO */ } void GPU_SetConstSampler(GPU_CommandListHandle cl_handle, i32 slot, SamplerGpuPtr v) { /* TODO */ } //- Barrier void GPU_BarrierEx(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Barrier; cmd->barrier.desc = desc; } //- Compute void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 groups) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Compute; cmd->compute.cs = cs; cmd->compute.groups = groups; } //- Rasterize void GPU_Rasterize(GPU_CommandListHandle cl_handle, VertexShader vs, PixelShader ps, u32 instances_count, IndexBufferGpuPtr idx_buff, u32 raster_targets_count, RenderTargetGpuPtr *raster_targets, Rng3 viewport, Rng2 scissor, GPU_RasterMode mode) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Rasterize; cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; cmd->rasterize.instances_count = instances_count; cmd->rasterize.ibv = GPU_D12_IbvFromIbPtr(idx_buff); for (u32 i = 0; i < MinU32(raster_targets_count, GPU_MaxRenderTargets); ++i) { cmd->rasterize.rtv_descriptors[i] = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, raster_targets[i].v); } cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; cmd->rasterize.mode = mode; } //- Clear void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, RenderTargetGpuPtr ptr, Vec4 color) { GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_ClearRtv; cmd->clear_rtv.rtv_descriptor = GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind_Rtv, ptr.v); cmd->clear_rtv.color = color; } //- Profile void GPU_ProfN(GPU_CommandListHandle cl, String name) { /* TODO */ } //////////////////////////////////////////////////////////// //~ @hookimpl Map hooks // GPU_Mapped GPU_Map(GPU_Resource *gpu_r) // { // GPU_Mapped result = ZI; // result.resource = gpu_r; // GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; // D3D12_RANGE read_range = ZI; // HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem); // if (FAILED(hr) || !result.mem) // { // /* TODO: Don't panic */ // Panic(Lit("Failed to map command buffer resource")); // } // return result; // } // void GPU_Unmap(GPU_Mapped m) // { // GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource; // ID3D12Resource_Unmap(r->d3d_resource, 0, 0); // } // void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) // { // GPU_D12_SharedState *g = &GPU_D12_shared_state; // D3D12_RESOURCE_DESC desc = ZI; // ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); // u64 upload_size = 0; // u64 upload_row_size = 0; // u32 upload_num_rows = 0; // D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; // ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); // D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; // { // D3D12_RANGE read_range = ZI; // u8 *dst_base = (u8 *)dst + placed_footprint.Offset; // u8 *src_base = src; // u32 z_size = upload_row_size * upload_num_rows; // b32 src_overflow = 0; // for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) // { // u32 z_offset = z * z_size; // for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) // { // u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; // u8 *src_row = src_base + y * upload_row_size + z_offset; // CopyBytes(dst_row, src_row, upload_row_size); // } // } // } // } //////////////////////////////////////////////////////////// //~ @hookimpl Statistics GPU_Stats GPU_QueryStats(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_Stats result = ZI; { DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); result.local_committed = info.CurrentUsage; result.local_budget = info.Budget; } { DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI; IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); result.non_local_budget = info.Budget; result.non_local_committed = info.CurrentUsage; } result.driver_resources_allocated = Atomic64Fetch(&g->driver_resources_allocated); result.driver_descriptors_allocated = Atomic64Fetch(&g->driver_descriptors_allocated); return result; } GPU_Stats GPU_QuerySharedMemoryStats(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_Stats result = ZI; return result; } //////////////////////////////////////////////////////////// //~ @hookimpl Swapchain GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window) { GPU_D12_Swapchain *swapchain = 0; { Arena *perm = PermArena(); swapchain = PushStruct(perm, GPU_D12_Swapchain); } swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window); return (GPU_SwapchainHandle) { .v = (u64)swapchain }; } void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle) { /* TODO */ } GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, GPU_Format format, Vec2I32 size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Swapchain *swapchain = GPU_D12_SwapchainFromHandle(swapchain_handle); size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1)); GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); /* Initialize swapchain */ if (!swapchain->d3d_swapchain) { HRESULT hr = 0; /* Create d3d swapchain */ { IDXGISwapChain3 *swapchain3 = 0; { /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = 0; if (SUCCEEDED(hr)) { DXGI_SWAP_CHAIN_DESC1 desc = ZI; desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format); desc.Width = size.x; desc.Height = size.y; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferCount = GPU_D12_SwapchainBufferCount; desc.Scaling = DXGI_SCALING_NONE; desc.Flags = GPU_D12_SwapchainFlags; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)direct_queue->d3d_queue, swapchain->window_hwnd, &desc, 0, 0, &swapchain1); } /* Upgrade to swapchain3 */ if (SUCCEEDED(hr)) { hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain3); IDXGISwapChain1_Release(swapchain1); } } swapchain->d3d_swapchain = swapchain3; swapchain->backbuffers_format = format; swapchain->backbuffers_resolution = size; } /* Create waitable object */ { HANDLE waitable = 0; if (SUCCEEDED(hr) && GPU_D12_FrameLatency > 0) { hr = IDXGISwapChain3_SetMaximumFrameLatency(swapchain->d3d_swapchain, GPU_D12_FrameLatency); waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->d3d_swapchain); } swapchain->waitable = waitable; } /* Create present fence */ { HANDLE present_event = 0; ID3D12Fence *present_fence = 0; if (SUCCEEDED(hr)) { present_event = CreateEvent(0, 0, 0, 0); hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&present_fence); } swapchain->present_fence = present_fence; swapchain->present_event = present_event; } /* Disable Alt+Enter */ IDXGIFactory_MakeWindowAssociation(g->factory, swapchain->window_hwnd, DXGI_MWA_NO_ALT_ENTER); if (FAILED(hr)) { Panic(Lit("Failed to create swapchain")); } } /* Resize backbuffers */ if (!MatchVec2I32(swapchain->backbuffers_resolution, size) || swapchain->backbuffers_format != format) { HRESULT hr = 0; /* Wait for any previous backbuffer commands to finish */ { ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); WaitForSingleObject(swapchain->present_event, INFINITE); } /* Release backbuffers */ for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) { GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i]; if (backbuffer->d3d_resource) { ID3D12Resource_Release(backbuffer->d3d_resource); backbuffer->d3d_resource = 0; } } /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->d3d_swapchain, 0, size.x, size.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to resize swapchain")); } } /* Initialize backbuffers */ { for (u32 i = 0; i < countof(swapchain->backbuffers); ++i) { GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i]; if (!backbuffer->d3d_resource) { ID3D12Resource *d3d_resource = 0; HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to get swapchain buffer")); } ZeroStruct(backbuffer); backbuffer->d3d_resource = d3d_resource; backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1); backbuffer->is_texture = 1; backbuffer->texture_flags = GPU_TextureFlag_AllowRenderTarget; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mip_levels = 1; backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; } } swapchain->backbuffers_format = format; swapchain->backbuffers_resolution = size; } /* Wait for available backbuffer */ if (swapchain->waitable) { DWORD wait_result = WaitForSingleObject(swapchain->waitable, 500); if (wait_result == WAIT_TIMEOUT) { ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event); WaitForSingleObject(swapchain->present_event, INFINITE); } } /* Grab current backbuffer */ GPU_D12_Resource *cur_backbuffer = 0; { u32 backbuffer_idx = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->d3d_swapchain); cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; } return (GPU_ResourceHandle) { .v = (u64)cur_backbuffer }; } void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync) { GPU_D12_Resource *backbuffer = GPU_D12_ResourceFromHandle(backbuffer_handle); GPU_D12_Swapchain *swapchain = backbuffer->swapchain; GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); u32 present_flags = 0; if (GPU_D12_TearingIsAllowed && vsync == 0) { present_flags |= DXGI_PRESENT_ALLOW_TEARING; } /* Present */ { __profn("Present"); HRESULT hr = IDXGISwapChain3_Present(swapchain->d3d_swapchain, vsync, present_flags); if (!SUCCEEDED(hr)) { Assert(0); } } if (vsync != 0 && !(present_flags & DXGI_PRESENT_ALLOW_TEARING)) { /* FIXME: Don't flush in fullscreen mode? */ // DwmFlush(); } /* Increment swapchain fence */ { u64 target = ++swapchain->present_fence_target; ID3D12CommandQueue_Signal(direct_queue->d3d_queue, swapchain->present_fence, target); } }