GPU_D12_SharedState GPU_D12_shared_state = ZI; //////////////////////////////// //~ Helpers GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_FiberState **f = &g->fiber_states[fiber_id]; if (!*f) { Arena *perm = PermArena(); *f = PushStruct(perm, GPU_D12_FiberState); } return *f; } DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format) { return (DXGI_FORMAT)format; } GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl) { GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); Arena *perm = PermArena(); GPU_D12_Command *cmd = f->first_free_command; if (cmd) { f->first_free_command = cmd->next; } else { cmd = PushStructNoZero(perm, GPU_D12_Command); } ZeroStruct(cmd); QueuePush(cl->first, cl->last, cmd); ++cl->count; return cmd; } u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size) { u64 result = RandU64FromSeeds(desc.kind, desc.flags); switch(desc.kind) { default: break; case GPU_ResourceKind_Texture1D: case GPU_ResourceKind_Texture2D: case GPU_ResourceKind_Texture3D: { result = RandU64FromSeeds(result, desc.texture.format); result = RandU64FromSeeds(result, desc.texture.mip_levels); result = RandU64FromSeeds(result, desc.clear_color.x); result = RandU64FromSeeds(result, desc.clear_color.y); result = RandU64FromSeeds(result, desc.clear_color.z); result = RandU64FromSeeds(result, desc.clear_color.w); result = RandU64FromSeeds(result, desc.texture.size.x); result = RandU64FromSeeds(result, desc.texture.size.y); result = RandU64FromSeeds(result, desc.texture.size.z); } break; case GPU_ResourceKind_Buffer: { result = RandU64FromSeeds(result, desc.buffer.heap_kind); result = RandU64FromSeeds(result, buffer_size); } break; } return result; } //////////////////////////////// //~ Startup void GPU_D12_Startup(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; /* Init device */ GPU_D12_InitDevice(); /* Init queues */ { GPU_D12_QueueDesc descs[] = { {.kind = GPU_QueueKind_Direct, .d3d_type = D3D12_COMMAND_LIST_TYPE_DIRECT, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") }, {.kind = GPU_QueueKind_Compute, .d3d_type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") }, {.kind = GPU_QueueKind_Copy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copy queue") }, {.kind = GPU_QueueKind_BackgroundCopy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") } }; u32 job_count = 0; Fence job_fence = ZI; job_count += RunJob(GPU_D12_InitQueue, .count = GPU_NumQueues, .sig.descs = descs, .fence = &job_fence); YieldOnFence(&job_fence, job_count); } /* Init descriptor heaps */ g->cbv_srv_uav_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, GPU_D12_MaxCbvSrvUavDescriptors, ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV)); g->sampler_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, GPU_D12_MaxSamplerDescriptors, ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)); g->rtv_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, GPU_D12_MaxRtvDescriptors, ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV)); /* Init rootsig */ GPU_D12_InitRootsig(); /* Start queue sync job */ RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated); } //////////////////////////////// //~ Initialization //- Device initialization void GPU_D12_InitDevice(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; TempArena scratch = BeginScratchNoConflict(); HRESULT hr = 0; /* Enable debug layer */ u32 dxgi_factory_flags = 0; #if GPU_DEBUG { __profn("Enable debug layer"); ID3D12Debug *debug_controller0 = 0; hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug0")); } ID3D12Debug1 *debug_controller1 = 0; hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); if (FAILED(hr)) { Panic(Lit("Failed to create ID3D12Debug1")); } ID3D12Debug_EnableDebugLayer(debug_controller0); /* FIXME: Enable this */ // ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1); ID3D12Debug_Release(debug_controller1); ID3D12Debug_Release(debug_controller0); dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } #endif /* Create factory */ { __profn("Create factory"); hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory); if (FAILED(hr)) { Panic(Lit("Failed to initialize DXGI factory")); } } /* Create device */ { __profn("Create device"); IDXGIAdapter1 *adapter = 0; ID3D12Device *device = 0; String error = Lit("Could not initialize GPU device."); String first_gpu_name = ZI; u32 adapter_index = 0; b32 skip = 0; /* For debugging iGPU */ for (;;) { { hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter); } if (SUCCEEDED(hr)) { DXGI_ADAPTER_DESC1 desc; IDXGIAdapter1_GetDesc1(adapter, &desc); if (first_gpu_name.len == 0) { first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); } if (SUCCEEDED(hr) && !skip) { break; } skip = 0; ID3D12Device_Release(device); IDXGIAdapter1_Release(adapter); adapter = 0; device = 0; ++adapter_index; } else { break; } } if (!device) { if (first_gpu_name.len > 0) { error = StringF(scratch.arena, "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", FmtString(first_gpu_name)); } Panic(error); } g->adapter = adapter; g->device = device; } #if GPU_DEBUG /* Enable D3D12 Debug break */ { __profn("Enable d3d12 debug break"); ID3D12InfoQueue *info = 0; hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info); if (FAILED(hr)) { Panic(Lit("Failed to query ID3D12Device interface")); } ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1); ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1); ID3D12InfoQueue_Release(info); } /* Enable DXGI Debug break */ { __profn("Enable dxgi debug break"); IDXGIInfoQueue *dxgi_info = 0; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info); if (FAILED(hr)) { Panic(Lit("Failed to get DXGI debug interface")); } IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1); IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1); IDXGIInfoQueue_Release(dxgi_info); } #endif EndScratch(scratch); } //- Queue initialization JobDef(GPU_D12_InitQueue, sig, id) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_QueueDesc desc = sig->descs[id]; Arena *perm = PermArena(); HRESULT hr = 0; GPU_D12_Queue *queue = 0; { PushAlign(perm, CachelineSize); queue = PushStruct(perm, GPU_D12_Queue); PushAlign(perm, CachelineSize); } queue->desc = desc; D3D12_COMMAND_QUEUE_DESC d3d_desc = ZI; d3d_desc.Type = desc.d3d_type; d3d_desc.Priority = desc.d3d_priority; hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue); if (FAILED(hr)) { Panic(Lit("Failed to create command queue")); } hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->submit_fence); if (FAILED(hr)) { Panic(Lit("Failed to create command queue fence")); } g->queues[desc.kind] = queue; } //- Heap initialization GPU_D12_DescriptorHeap *GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, u32 max_descs, u32 desc_size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; Arena *arena = AcquireArena(Gibi(64)); GPU_D12_DescriptorHeap *heap = PushStruct(arena, GPU_D12_DescriptorHeap); heap->arena = arena; heap->type = type; heap->max_count = max_descs; heap->descriptor_size = desc_size; D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI; d3d_desc.Type = type; d3d_desc.Flags = flags; d3d_desc.NumDescriptors = max_descs; HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap); if (FAILED(hr)) { Panic(Lit("Failed to create CPU descriptor heap")); } ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle); return heap; } //- Rootsig initialization void GPU_D12_InitRootsig(void) { GPU_D12_SharedState *g = &GPU_D12_shared_state; b32 ok = 1; HRESULT hr = 0; String error_str = ZI; /* Serialize root signature */ ID3D10Blob *blob = 0; if (ok) { __profn("Create root signature"); D3D12_ROOT_PARAMETER param = ZI; param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; param.Constants.ShaderRegister = 0; param.Constants.RegisterSpace = 0; param.Constants.Num32BitValues = 64; D3D12_ROOT_SIGNATURE_DESC desc = ZI; desc.NumParameters = 1; desc.pParameters = ¶m; desc.NumStaticSamplers = 0; desc.pStaticSamplers = 0; desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED; hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0); if (FAILED(hr)) { error_str = Lit("Failed to serialize root signature"); ok = 0; } } /* Create root signature */ ID3D12RootSignature *rootsig = 0; if (ok) { __profn("Create root signature"); hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig); if (FAILED(hr)) { error_str = Lit("Failed to create root signature"); ok = 0; } } if (blob) { ID3D10Blob_Release(blob); } g->bindless_rootsig = rootsig; if (!ok) { Panic(error_str); } } //////////////////////////////// //~ Pipeline operations JobDef(GPU_D12_LoadPipeline, sig, _) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Pipeline *pipeline = sig->pipeline; GPU_D12_PipelineDesc desc = pipeline->desc; HRESULT hr = 0; b32 ok = 1; String error_str = ZI; /* Create PSO */ ID3D12PipelineState *pso = 0; if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource))) { D3D12_RASTERIZER_DESC raster_desc = ZI; raster_desc.FillMode = D3D12_FILL_MODE_SOLID; raster_desc.CullMode = D3D12_CULL_MODE_NONE; raster_desc.FrontCounterClockwise = 0; raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS; raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP; raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS; raster_desc.DepthClipEnable = 1; raster_desc.MultisampleEnable = 0; raster_desc.AntialiasedLineEnable = 0; raster_desc.ForcedSampleCount = 0; raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; D3D12_BLEND_DESC blend_desc = ZI; blend_desc.AlphaToCoverageEnable = 0; blend_desc.IndependentBlendEnable = 0; blend_desc.RenderTarget[0].BlendEnable = 1; blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA; blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE; blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD; blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; D3D12_DEPTH_STENCIL_DESC ds_desc = ZI; ds_desc.DepthEnable = 0; ds_desc.StencilEnable = 0; String vs = DataFromResource(desc.vs.resource); String ps = DataFromResource(desc.ps.resource); D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.VS.pShaderBytecode = vs.text; pso_desc.VS.BytecodeLength = vs.len; pso_desc.PS.pShaderBytecode = ps.text; pso_desc.PS.BytecodeLength = ps.len; pso_desc.RasterizerState = raster_desc; pso_desc.BlendState = blend_desc; pso_desc.DepthStencilState = ds_desc; pso_desc.PrimitiveTopologyType = desc.topology_type; pso_desc.SampleMask = UINT_MAX; pso_desc.SampleDesc.Count = 1; pso_desc.SampleDesc.Quality = 0; for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i) { StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats)); DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]); if (format != DXGI_FORMAT_UNKNOWN) { pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format; } else { break; } } hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create pipeline state object"); ok = 0; } } else if (ok) { String cs = DataFromResource(desc.cs.resource); D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI; pso_desc.pRootSignature = g->bindless_rootsig; pso_desc.CS.pShaderBytecode = cs.text; pso_desc.CS.BytecodeLength = cs.len; hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso); if (FAILED(hr)) { error_str = Lit("Failed to create pipeline state object"); ok = 0; } } pipeline->pso = pso; pipeline->error = error_str; pipeline->ok = 1; } GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc))); GPU_D12_Pipeline *pipeline = 0; b32 is_pipeline_new = 0; GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)]; { { Lock lock = LockS(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } Unlock(&lock); } if (!pipeline) { Lock lock = LockE(&bin->mutex); for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin) { if (pipeline->hash == hash) break; } if (!pipeline) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); pipeline = PushStruct(perm, GPU_D12_Pipeline); pipeline->desc = desc; pipeline->hash = hash; is_pipeline_new = 1; PushAlign(perm, CachelineSize); StackPushN(bin->first, pipeline, next_in_bin); } Unlock(&lock); } } if (is_pipeline_new) { RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline); } YieldOnFence(&pipeline->ready_fence, 1); return pipeline; } //////////////////////////////// //~ Queue operations GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; return g->queues[kind]; } //////////////////////////////// //~ Descriptor operations GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap) { GPU_D12_Descriptor *d = 0; u32 index = 0; D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI; { Lock lock = LockE(&heap->mutex); if (heap->first_free) { d = heap->first_free; heap->first_free = d->next_free; handle = d->handle; index = d->index; } else { if (heap->allocated_count >= heap->max_count) { Panic(Lit("Max descriptors reached in heap")); } d = PushStructNoZero(heap->arena, GPU_D12_Descriptor); index = heap->allocated_count++; handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); } Unlock(&lock); } ZeroStruct(d); d->heap = heap; d->handle = handle; d->index = index; return d; } void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor) { GPU_D12_DescriptorHeap *heap = descriptor->heap; Lock lock = LockE(&heap->mutex); { descriptor->next_free = heap->first_free; heap->first_free = descriptor; } Unlock(&lock); } //////////////////////////////// //~ Raw command list GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); /* Pull first completed command list from queue if ready */ GPU_D12_RawCommandList *cl = ZI; { Lock lock = LockE(&queue->submit_mutex); { u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); cl = queue->first_submitted_cl; if (cl && cl->submit_fence_target <= completed) { QueuePop(queue->first_submitted_cl, queue->last_submitted_cl); } else { cl = 0; } } Unlock(&lock); } /* Allocate new command list if none are available */ if (!cl) { Arena *perm = PermArena(); { PushAlign(perm, CachelineSize); cl = PushStruct(perm, GPU_D12_RawCommandList); PushAlign(perm, CachelineSize); } cl->queue = queue; HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.d3d_type, &IID_ID3D12CommandAllocator, (void **)&cl->ca); if (FAILED(hr)) { Panic(Lit("Failed to create command allocator")); } hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.d3d_type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to create command list")); } hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { Panic(Lit("Failed to close command list during initialization")); } } /* Reset command list */ { HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca); if (FAILED(hr)) { Panic(Lit("Failed to reset command allocator")); } hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0); if (FAILED(hr)) { Panic(Lit("Failed to reset command list")); } } return cl; } u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl) { GPU_D12_Queue *queue = cl->queue; /* Close */ { __profn("Close DX12 command list"); HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to close command list before execution")); } } /* Submit */ u64 target = 0; { __profn("Execute"); Lock lock = LockE(&queue->submit_mutex); { target = ++queue->submit_fence_target; cl->submit_fence_target = target; /* Execute */ ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl); ID3D12CommandQueue_Signal(queue->d3d_queue, queue->submit_fence, target); /* Append */ QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl); } Unlock(&lock); } return target; } //////////////////////////////// //~ Swapchain helpers void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain) { GPU_D12_SharedState *g = &GPU_D12_shared_state; for (u32 i = 0; i < countof(swapchain->buffers); ++i) { ID3D12Resource *resource = 0; HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to get swapchain buffer")); } GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i]; ZeroStruct(sb); sb->swapchain = swapchain; sb->d3d_resource = resource; sb->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap); sb->state = D3D12_RESOURCE_STATE_COMMON; ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle); } } GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution) { __prof; GPU_D12_SharedState *g = &GPU_D12_shared_state; resolution.x = MaxI32(resolution.x, 1); resolution.y = MaxI32(resolution.y, 1); b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution); if (should_rebuild) { HRESULT hr = 0; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); /* Lock direct queue submissions (in case any write to backbuffer) */ /* TODO: Less overkill approach - Only flush GPU_D12_BlitToSwapchain since we know it's the only operation targeting backbuffer */ Lock lock = LockE(&queue->submit_mutex); //DEBUGBREAKABLE; //Lock lock = LockE(&g->global_command_list_record_mutex); { /* Flush direct queue */ //ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target); { HANDLE event = CreateEvent(0, 0, 0, 0); ID3D12Fence_SetEventOnCompletion(queue->submit_fence, queue->submit_fence_target, event); WaitForSingleObject(event, INFINITE); CloseHandle(event); } /* Release buffers */ for (u32 i = 0; i < countof(swapchain->buffers); ++i) { GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i]; GPU_D12_ReleaseDescriptor(sb->rtv_descriptor); ID3D12Resource_Release(sb->d3d_resource); } /* Resize buffers */ hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to resize swapchain")); } } Unlock(&lock); GPU_D12_InitSwapchainResources(swapchain); swapchain->resolution = resolution; } u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain); return &swapchain->buffers[backbuffer_index]; } i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_pos) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Swapchain *swapchain = dst->swapchain; GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct); ID3D12GraphicsCommandList *rcl = dx12_cl->cl; D3D12_RESOURCE_STATES old_texture_state = texture->state; { u32 barriers_count = 0; D3D12_RESOURCE_BARRIER rbs[2] = ZI; /* Transition backbuffer to RENDER_TARGET */ { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Transition.pResource = dst->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; rb->Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; } ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } /* Clear */ { f32 clear_color[4] = ZI; ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, dst->rtv_descriptor->handle, clear_color, 0, 0); } { u32 barriers_count = 0; D3D12_RESOURCE_BARRIER rbs[2] = ZI; /* Transition backbuffer to COPY_DEST */ { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Transition.pResource = dst->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; } /* Transition texture to COPY_SRC */ if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE) { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Transition.pResource = texture->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = texture->state; rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; texture->state = rb->Transition.StateAfter; } ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } /* Copy */ { D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; dst_loc.pResource = dst->d3d_resource; dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst_loc.SubresourceIndex = 0; D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; src_loc.pResource = texture->d3d_resource; src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; src_loc.SubresourceIndex = 0; Vec2I32 dst_size = swapchain->resolution; Vec2I32 src_size = VEC2I32(texture->desc.texture.size.x, texture->desc.texture.size.y); i32 dst_left = dst_pos.x; i32 dst_top = dst_pos.y; i32 src_left = 0; i32 src_top = 0; i32 src_right = src_size.x; i32 src_bottom = src_size.y; /* Clamp copy src & dst */ if (dst_left < 0) { src_left -= dst_left; dst_left = 0; } if (dst_top < 0) { src_top -= dst_top; dst_top = 0; } if (dst_left + (src_left + src_right) > dst_size.x) { src_right -= (dst_left + (src_left + src_right)) - dst_size.x; } if (dst_top + (src_top + src_bottom) > dst_size.y) { src_bottom -= (dst_top + (src_top + src_bottom)) - dst_size.y; } if (src_left < src_right && src_bottom > src_top) { D3D12_BOX src_box = ZI; src_box.left = src_left; src_box.top = src_top; src_box.right = src_right; src_box.bottom = src_bottom; src_box.back = 1; ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, dst_left, dst_top, 0, &src_loc, &src_box); } } { u32 barriers_count = 0; D3D12_RESOURCE_BARRIER rbs[2] = ZI; /* Transition backbuffer to PRESENT */ { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Transition.pResource = dst->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; } /* Transition texture to original state */ if (texture->state != old_texture_state) { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; rb->Transition.pResource = texture->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = texture->state; rb->Transition.StateAfter = old_texture_state; texture->state = rb->Transition.StateAfter; } ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } i64 fence_target = GPU_D12_EndRawCommandList(dx12_cl); return fence_target; } //////////////////////////////// //~ Queue sync job JobDef(GPU_D12_StartQueueSync, _, __) { GPU_D12_SharedState *g = &GPU_D12_shared_state; HANDLE queue_fences_events[GPU_NumQueues] = ZI; i64 queue_fences_seen[GPU_NumQueues] = ZI; for (i32 i = 0; i < countof(queue_fences_events); ++i) { queue_fences_events[i] = CreateEvent(0, 0, 1, 0); queue_fences_seen[i] = -1; } for (;;) { WaitForMultipleObjects(countof(queue_fences_events), queue_fences_events, 0, INFINITE); for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); i64 last_seen = queue_fences_seen[queue_kind]; i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence); if (completed > last_seen) { SetFence(&queue->sync_fence, completed); queue_fences_seen[queue_kind] = completed; ID3D12Fence_SetEventOnCompletion(queue->submit_fence, completed + 1, queue_fences_events[queue_kind]); } } } } //////////////////////////////// //~ @hookdef Startup hook void GPU_Startup(void) { GPU_D12_Startup(); } //////////////////////////////// //~ @hookdecl Fence hooks Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) { GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); return &queue->sync_fence; } void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) { GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); ID3D12Fence *b_fence = queue_b->submit_fence; ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); } //////////////////////////////// //~ @hookdef Rasterizer helper hooks GPU_Viewport GPU_ViewportFromRect(Rect rect) { GPU_Viewport viewport = ZI; viewport.top_left_x = rect.x; viewport.top_left_y = rect.y; viewport.width = rect.width; viewport.height = rect.height; viewport.min_depth = 0.0f; viewport.max_depth = 1.0f; return viewport; } GPU_Scissor GPU_ScissorFromRect(Rect rect) { GPU_Scissor scissor = ZI; scissor.left = rect.x; scissor.top = rect.y; scissor.right = rect.x + rect.width; scissor.bottom = rect.y + rect.height; return scissor; } //////////////////////////////// //~ @hookdef Resource hooks GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = 0; if (desc.kind == GPU_ResourceKind_Unknown) { Panic(Lit("Unknown gpu resource type")); } u64 buffer_size = 0; if (desc.kind == GPU_ResourceKind_Buffer) { desc.buffer.stride = MaxU32(desc.buffer.stride, 1); buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); } u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); /* Grab reusable */ { u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; { Lock lock = LockE(&bin->mutex); { GPU_D12_ResourceReuseList *list = bin->first; for (; list; list = list->next) { if (list->hash == reuse_hash) break; } if (list) { r = list->first; list->first = r->next_free; if (!list->first) { DllRemove(bin->first, bin->last, list); StackPush(bin->first_free, list); list->prev = 0; } r->next_free = 0; } } Unlock(&lock); } } /* Grab from free list */ if (!r) { { Lock lock = LockE(&g->free_resources_mutex); r = g->first_free_resource; if (r) { g->first_free_resource = r->next_free; } Unlock(&lock); } if (r) { ZeroStruct(r); } } /* Push new */ if (!r) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); r = PushStruct(perm, GPU_D12_Resource); PushAlign(perm, CachelineSize); } /* Create d3d resource */ if (!r->d3d_resource) { switch (desc.kind) { case GPU_ResourceKind_Sampler: break; /* Buffer */ case GPU_ResourceKind_Buffer: { D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_HEAP_PROPERTIES heap_props = { .Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT }; Assert(!(desc.flags & GPU_ResourceFlag_Renderable)); D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; d3d_desc.Alignment = 0; d3d_desc.Width = buffer_size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable); r->state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create buffer resource")); } r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource); } break; /* Texture */ case GPU_ResourceKind_Texture1D: case GPU_ResourceKind_Texture2D: case GPU_ResourceKind_Texture3D: { D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT }; D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : D3D12_RESOURCE_DIMENSION_TEXTURE3D; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.texture.format); d3d_desc.Alignment = 0; d3d_desc.Width = desc.texture.size.x; d3d_desc.Height = desc.texture.size.y; d3d_desc.DepthOrArraySize = desc.texture.size.z; d3d_desc.MipLevels = (desc.flags & GPU_ResourceFlag_MaxMipLevels) ? 0 : MaxI32(desc.texture.mip_levels, 1); d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_Renderable); r->state = D3D12_RESOURCE_STATE_COPY_DEST; D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; clear_value.Color[0] = desc.clear_color.x; clear_value.Color[1] = desc.clear_color.y; clear_value.Color[2] = desc.clear_color.z; clear_value.Color[3] = desc.clear_color.w; D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0; HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource); if (FAILED(hr)) { /* TODO: Don't panic */ Panic(Lit("Failed to create buffer resource")); } } break; } } /* Create texture srv descriptor */ if (desc.kind == GPU_ResourceKind_Texture1D || desc.kind == GPU_ResourceKind_Texture2D || desc.kind == GPU_ResourceKind_Texture3D) { if (!r->srv_descriptor) { r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle); } /* Create buffer srv descriptor */ if (desc.kind == GPU_ResourceKind_Buffer && desc.buffer.heap_kind != GPU_HeapKind_Download && desc.buffer.count > 0) { if (!r->srv_descriptor) { r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; srv_desc.Format = DXGI_FORMAT_UNKNOWN; srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; srv_desc.Buffer.FirstElement = 0; srv_desc.Buffer.NumElements = desc.buffer.count; srv_desc.Buffer.StructureByteStride = desc.buffer.stride; srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle); } /* Create uav descriptor */ if (desc.flags & GPU_ResourceFlag_Writable) { if (!r->uav_descriptor) { r->uav_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap); } ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle); } /* Create rtv descriptor */ if (desc.flags & GPU_ResourceFlag_Renderable) { if (!r->rtv_descriptor) { r->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap); } ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle); } /* Create sampler descriptor */ if (desc.kind == GPU_ResourceKind_Sampler) { if (!r->sampler_descriptor) { r->sampler_descriptor = GPU_D12_AcquireDescriptor(g->sampler_heap); } D3D12_SAMPLER_DESC d3d_desc = ZI; d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter; d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x; d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y; d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z; d3d_desc.MipLODBias = desc.sampler.mip_lod_bias; d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1); d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison; d3d_desc.BorderColor[0] = desc.sampler.border_color.x; d3d_desc.BorderColor[1] = desc.sampler.border_color.y; d3d_desc.BorderColor[2] = desc.sampler.border_color.z; d3d_desc.BorderColor[3] = desc.sampler.border_color.w; d3d_desc.MinLOD = desc.sampler.min_lod; d3d_desc.MaxLOD = desc.sampler.max_lod; /* Defaults */ if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;; if (d3d_desc.MaxLOD >= F32Infinity) { d3d_desc.MaxLOD = D3D12_FLOAT32_MAX; } ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle); } r->desc = desc; r->buffer_size = buffer_size; return (GPU_Resource *)r; } void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; if (r->srv_descriptor) { GPU_D12_ReleaseDescriptor(r->srv_descriptor); r->srv_descriptor = 0; } if (r->uav_descriptor) { GPU_D12_ReleaseDescriptor(r->uav_descriptor); r->uav_descriptor = 0; } if (r->rtv_descriptor) { GPU_D12_ReleaseDescriptor(r->rtv_descriptor); r->rtv_descriptor = 0; } if (r->sampler_descriptor) { GPU_D12_ReleaseDescriptor(r->sampler_descriptor); r->sampler_descriptor = 0; } if (flags & GPU_ReleaseFlag_Reuse) { GPU_ResourceDesc desc = r->desc; u64 buffer_size = r->buffer_size; u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); u64 bin_index = reuse_hash % countof(g->resource_reuse_bins); GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index]; { Lock lock = LockE(&bin->mutex); { GPU_D12_ResourceReuseList *list = bin->first; for (; list; list = list->next) { if (list->hash == reuse_hash) break; } if (!list) { list = bin->first_free; if (list) { bin->first_free = list->next; } else { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); list = PushStruct(perm, GPU_D12_ResourceReuseList); PushAlign(perm, CachelineSize); } list->hash = reuse_hash; DllPushBack(bin->first, bin->last, list); } StackPushN(list->first, r, next_free); } Unlock(&lock); } } else { switch (r->desc.kind) { case GPU_ResourceKind_Buffer: case GPU_ResourceKind_Texture1D: case GPU_ResourceKind_Texture2D: case GPU_ResourceKind_Texture3D: { ID3D12Resource_Release(r->d3d_resource); } } Lock lock = LockE(&g->free_resources_mutex); r->next_free = g->first_free_resource; g->first_free_resource = r; Unlock(&lock); } } u32 GPU_GetReadableId(GPU_Resource *gpu_resource) { u32 result = U32Max; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; if (r && r->srv_descriptor) { result = r->srv_descriptor->index; } return result; } u32 GPU_GetWritableId(GPU_Resource *gpu_resource) { u32 result = U32Max; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; if (r && r->uav_descriptor) { result = r->uav_descriptor->index; } return result; } u32 GPU_GetSamplerId(GPU_Resource *gpu_resource) { u32 result = U32Max; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource; if (r && r->sampler_descriptor) { result = r->sampler_descriptor->index; } return result; } Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource) { GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; return VEC2I32(resource->desc.texture.size.x, resource->desc.texture.size.y); } Vec3I32 GPU_GetTextureSize3D(GPU_Resource *gpu_resource) { GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource; return resource->desc.texture.size; } u64 GPU_GetFootprintSize(GPU_Resource *gpu_resource) { GPU_D12_SharedState *g = &GPU_D12_shared_state; D3D12_RESOURCE_DESC desc = ZI; D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; ID3D12Resource_GetDesc(((GPU_D12_Resource *)gpu_resource)->d3d_resource, &desc); u64 footprint_size = 0; u64 upload_row_size = 0; u32 upload_num_rows = 0; ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &footprint_size); return footprint_size; } //////////////////////////////// //~ @hookdef Command list hooks GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind) { GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); Arena *perm = PermArena(); GPU_D12_CommandList *cl = f->first_free_command_list; if (cl) { StackPop(f->first_free_command_list); ZeroStruct(cl); } else { cl = PushStruct(perm, GPU_D12_CommandList); } cl->queue_kind = queue_kind; return (GPU_CommandList *)cl; } i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_QueueKind queue_kind = cl->queue_kind; GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI; GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI; /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); ID3D12GraphicsCommandList *rcl = dx12_cl->cl; b32 graphics_rootsig_set = 0; b32 compute_rootsig_set = 0; b32 descriptor_heaps_set = 0; GPU_D12_Pipeline *bound_pipeline = 0; /* Process gpu commands into dx12 commands */ { GPU_D12_Command *cmd = cl->first; while (cmd) { switch (cmd->kind) { default: break; //- Resource barrier case GPU_D12_CommandKind_TransitionToSrv: case GPU_D12_CommandKind_TransitionToUav: case GPU_D12_CommandKind_TransitionToRtv: case GPU_D12_CommandKind_TransitionToCopySrc: case GPU_D12_CommandKind_TransitionToCopyDst: case GPU_D12_CommandKind_FlushUav: { u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1); /* Build barriers batch list */ Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; }; u32 max_barriers_count = 0; TmpBarrier *first_barrier = 0; TmpBarrier *last_barrier = 0; while (cmd && (cmd->kind == GPU_D12_CommandKind_TransitionToSrv || cmd->kind == GPU_D12_CommandKind_TransitionToUav || cmd->kind == GPU_D12_CommandKind_TransitionToRtv || cmd->kind == GPU_D12_CommandKind_TransitionToCopySrc || cmd->kind == GPU_D12_CommandKind_TransitionToCopyDst || cmd->kind == GPU_D12_CommandKind_FlushUav)) { D3D12_RESOURCE_BARRIER_TYPE type = ZI; D3D12_RESOURCE_STATES state_after = ZI; GPU_D12_Resource *resource = cmd->barrier.resource; switch (cmd->kind) { case GPU_D12_CommandKind_TransitionToSrv: { type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; } break; case GPU_D12_CommandKind_TransitionToUav: { type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; } break; case GPU_D12_CommandKind_TransitionToRtv: { type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; state_after = D3D12_RESOURCE_STATE_RENDER_TARGET; i32 slot = cmd->barrier.rt_slot; if (slot >= 0 && slot < countof(slotted_render_targets)) { slotted_render_targets[slot] = resource; } } break; case GPU_D12_CommandKind_TransitionToCopySrc: { type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; state_after = D3D12_RESOURCE_STATE_COPY_SOURCE; } break; case GPU_D12_CommandKind_TransitionToCopyDst: { type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; state_after = D3D12_RESOURCE_STATE_COPY_DEST; } break; case GPU_D12_CommandKind_FlushUav: { type = D3D12_RESOURCE_BARRIER_TYPE_UAV; } break; } b32 skip = 0; if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen) { /* Skip UAV transitions on resources that already have transition in the batch */ skip = 1; } if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->barrier_state_after == state_after) { /* Skip transitions into existing state */ skip = 1; } if (!skip) { resource->barrier_type = type; resource->barrier_state_after = state_after; if (resource->barrier_gen != barrier_gen) { TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier); resource->barrier_gen = barrier_gen; b->r = resource; QueuePush(first_barrier, last_barrier, b); ++max_barriers_count; } } cmd = cmd->next; } /* Submit batched barriers */ /* FIXME: Transitions from UAV -> UAV should insert UAV barrier */ u32 barriers_count = 0; D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count); for (TmpBarrier *b = first_barrier; b; b = b->next) { GPU_D12_Resource *resource = b->r; D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type; D3D12_RESOURCE_STATES state_before = resource->state; D3D12_RESOURCE_STATES state_after = resource->barrier_state_after; if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after)) { D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; rb->Type = resource->barrier_type; if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) { rb->Transition.pResource = resource->d3d_resource; rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; rb->Transition.StateBefore = state_before; rb->Transition.StateAfter = state_after; resource->state = state_after; } else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) { rb->UAV.pResource = resource->d3d_resource; } } } if (barriers_count > 0) { ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); } } break; //- Clear rtv case GPU_D12_CommandKind_ClearRtv: { GPU_D12_Resource *resource = cmd->clear.resource; Assert(resource->state == D3D12_RESOURCE_STATE_RENDER_TARGET); f32 clear_color[4] = ZI; clear_color[0] = resource->desc.clear_color.x; clear_color[1] = resource->desc.clear_color.y; clear_color[2] = resource->desc.clear_color.z; clear_color[3] = resource->desc.clear_color.w; ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, resource->rtv_descriptor->handle, clear_color, 0, 0); cmd = cmd->next; } break; //- Copy resource case GPU_D12_CommandKind_Copy: { GPU_D12_Resource *dst = cmd->copy.dst; GPU_D12_Resource *src = cmd->copy.src; D3D12_RESOURCE_DESC dst_desc = ZI; D3D12_RESOURCE_DESC src_desc = ZI; ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { /* Copy buffer -> buffer */ u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; u64 cpy_len = MinU64(dst_len, src_len); if (cpy_len > 0) { ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); } } else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { /* Copy buffer -> texture */ D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; dst_loc.pResource = dst->d3d_resource; dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; dst_loc.SubresourceIndex = 0; D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; src_loc.pResource = src->d3d_resource; src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; src_loc.PlacedFootprint = dst_placed_footprint; ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); } else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) { /* Copy texture -> buffer */ /* TODO */ Assert(0); } else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) { /* Copy texture -> texture */ /* TODO */ Assert(0); } cmd = cmd->next; } break; //- Dispatch Vs/Ps shader case GPU_D12_CommandKind_Rasterize: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.vs = cmd->rasterize.vs; pipeline_desc.ps = cmd->rasterize.ps; { pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterizeMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; case GPU_RasterizeMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterizeMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; case GPU_RasterizeMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; case GPU_RasterizeMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; } } for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) { GPU_D12_Resource *r = slotted_render_targets[i]; if (r) { pipeline_desc.render_target_formats[i] = r->desc.texture.format; } else { Assert(0); /* No bound render target in slot */ pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; } } pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } if (pipeline && cmd->rasterize.index_buffer->desc.buffer.count > 0) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!graphics_rootsig_set) { ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); graphics_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); bound_pipeline = pipeline; } /* Fill signature */ /* TODO: Only upload dirty */ { u32 sig_size = cmd->rasterize.sig_size; void *sig = cmd->rasterize.sig; u32 num32bit = sig_size / 4; ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); } /* Set rasterizer state */ /* TODO: Only set dirty */ { D3D12_RECT scissor = ZI; scissor.left = cmd->rasterize.scissor.left; scissor.top = cmd->rasterize.scissor.top; scissor.right = cmd->rasterize.scissor.right; scissor.bottom = cmd->rasterize.scissor.bottom; D3D12_VIEWPORT viewport = ZI; viewport.TopLeftX = cmd->rasterize.viewport.top_left_x; viewport.TopLeftY = cmd->rasterize.viewport.top_left_y; viewport.Width = cmd->rasterize.viewport.width; viewport.Height = cmd->rasterize.viewport.height; viewport.MinDepth = cmd->rasterize.viewport.min_depth; viewport.MaxDepth = cmd->rasterize.viewport.max_depth; ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); } /* Set topology */ /* TODO: Only set dirty */ { D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; switch (cmd->rasterize.mode) { default: Assert(0); break; case GPU_RasterizeMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; case GPU_RasterizeMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; case GPU_RasterizeMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; case GPU_RasterizeMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; case GPU_RasterizeMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; } ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); } /* Set index buffer */ /* TODO: Only set dirty */ u32 indices_count = 0; { GPU_D12_Resource *indices = cmd->rasterize.index_buffer; D3D12_INDEX_BUFFER_VIEW ibv = ZI; ibv.BufferLocation = indices->buffer_gpu_address; if (indices->desc.buffer.stride == 2) { ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT); } else { Assert(indices->desc.buffer.stride == 4); ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT); } ibv.SizeInBytes = indices->desc.buffer.count * indices->desc.buffer.stride; indices_count = indices->desc.buffer.count; ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); } /* Bind render targets */ { b32 om_dirty = 0; D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_render_targets)] = ZI; for (u32 i = 0; i < cmd->rasterize.rts_count; ++i) { GPU_D12_Resource *target = slotted_render_targets[i]; if (bound_render_targets[i] != target) { bound_render_targets[i] = target; om_dirty = 1; } rtvs[i] = target->rtv_descriptor->handle; } if (om_dirty) { ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, cmd->rasterize.rts_count, rtvs, 0, 0); } } /* Dispatch */ ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); } cmd = cmd->next; } break; //- Dispatch compute shader case GPU_D12_CommandKind_Compute: { GPU_D12_Pipeline *pipeline = 0; { GPU_D12_PipelineDesc pipeline_desc = ZI; pipeline_desc.cs = cmd->compute.cs; pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } if (pipeline) { /* Set descriptor heaps */ if (!descriptor_heaps_set) { ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap }; ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); descriptor_heaps_set = 1; } /* Bind rootsig */ if (!compute_rootsig_set) { ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); compute_rootsig_set = 1; } /* Bind pipeline */ if (pipeline != bound_pipeline) { ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); bound_pipeline = pipeline; } /* Fill signature */ /* TODO: Only upload dirty */ { u32 sig_size = cmd->compute.sig_size; void *sig = cmd->compute.sig; u32 num32bit = sig_size / 4; ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(rcl, 0, num32bit, sig, 0); } /* Dispatch */ ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z); } cmd = cmd->next; } break; } } } /* End dx12 command list */ u64 fence_target = GPU_D12_EndRawCommandList(dx12_cl); /* Free commands */ if (cl->last) { cl->last->next = f->first_free_command; f->first_free_command = cl->first; } /* Free command list */ StackPush(f->first_free_command_list, cl); EndScratch(scratch); return fence_target; } //////////////////////////////// //~ @hookdef Profiling helper hooks void GPU_ProfN(GPU_CommandList *cl, String name) { /* TODO */ } //////////////////////////////// //~ @hookdef Barrier hooks void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToSrv; cmd->barrier.resource = (GPU_D12_Resource *)resource; } void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToUav; cmd->barrier.resource = (GPU_D12_Resource *)resource; } void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToRtv; cmd->barrier.resource = (GPU_D12_Resource *)resource; cmd->barrier.rt_slot = slot; } void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToCopySrc; cmd->barrier.resource = (GPU_D12_Resource *)resource; } void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToCopyDst; cmd->barrier.resource = (GPU_D12_Resource *)resource; } void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_FlushUav; cmd->barrier.resource = (GPU_D12_Resource *)resource; } //////////////////////////////// //~ @hookdef Dispatch hooks void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CommandKind_ClearRtv; cmd->clear.resource = (GPU_D12_Resource *)resource; } void GPU_Rasterize_(GPU_CommandList *gpu_cl, u32 sig_size, void *sig, VertexShader vs, PixelShader ps, u32 rts_count, GPU_Viewport viewport, GPU_Scissor scissor, u32 instances_count, GPU_Resource *index_buffer, GPU_RasterizeMode mode) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CommandKind_Rasterize; Assert(sig_size <= sizeof(cmd->rasterize.sig)); cmd->rasterize.sig_size = MinU32(sizeof(cmd->rasterize.sig), sig_size); CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size); cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; cmd->rasterize.rts_count = rts_count; Assert(rts_count < GPU_MaxRenderTargets); cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; cmd->rasterize.instances_count = instances_count; cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer; cmd->rasterize.mode = mode; } void GPU_Compute_(GPU_CommandList *gpu_cl, u32 sig_size, void *sig, ComputeShader cs, u32 num_threads_x, u32 num_threads_y, u32 num_threads_z) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CommandKind_Compute; Assert(sig_size <= sizeof(cmd->compute.sig)); cmd->compute.sig_size = MinU32(sizeof(cmd->compute.sig), sig_size); CopyBytes(cmd->compute.sig, sig, cmd->compute.sig_size); cmd->compute.cs = cs; cmd->compute.num_threads_x = num_threads_x; cmd->compute.num_threads_y = num_threads_y; cmd->compute.num_threads_z = num_threads_z; } //////////////////////////////// //~ @hookdef Copy hooks void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resource *gpu_src) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst; GPU_D12_Resource *src = (GPU_D12_Resource *)gpu_src; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CommandKind_Copy; cmd->copy.dst = dst; cmd->copy.src = src; } //////////////////////////////// //~ @hookdef Map hooks GPU_Mapped GPU_Map(GPU_Resource *gpu_r) { GPU_Mapped result = ZI; result.resource = gpu_r; GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r; D3D12_RANGE read_range = ZI; HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem); if (FAILED(hr) || !result.mem) { /* TODO: Don't panic */ Panic(Lit("Failed to map command buffer resource")); } return result; } void GPU_Unmap(GPU_Mapped m) { GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource; ID3D12Resource_Unmap(r->d3d_resource, 0, 0); } void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference) { GPU_D12_SharedState *g = &GPU_D12_shared_state; D3D12_RESOURCE_DESC desc = ZI; ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc); u64 upload_size = 0; u64 upload_row_size = 0; u32 upload_num_rows = 0; D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI; ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size); D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint; { D3D12_RANGE read_range = ZI; u8 *dst_base = (u8 *)dst + placed_footprint.Offset; u8 *src_base = src; u32 z_size = upload_row_size * upload_num_rows; b32 src_overflow = 0; for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z) { u32 z_offset = z * z_size; for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y) { u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset; u8 *src_row = src_base + y * upload_row_size + z_offset; CopyBytes(dst_row, src_row, upload_row_size); } } } } //////////////////////////////// //~ @hookdef Memory info hooks GPU_MemoryInfo GPU_QueryMemoryInfo(void) { /* TODO */ return (GPU_MemoryInfo) ZI; } //////////////////////////////// //~ @hookdef Swapchain hooks GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, GPU_Format format, Vec2I32 size) { GPU_D12_SharedState *g = &GPU_D12_shared_state; HRESULT hr = 0; HWND hwnd = (HWND)P_GetInternalWindowHandle(window); GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct); GPU_D12_Swapchain *swapchain = 0; { Lock lock = LockE(&g->free_swapchains_mutex); { swapchain = g->first_free_swapchain; if (swapchain) { g->first_free_swapchain = swapchain->next; } } Unlock(&lock); } if (!swapchain) { Arena *perm = PermArena(); PushAlign(perm, CachelineSize); swapchain = PushStructNoZero(perm, GPU_D12_Swapchain); PushAlign(perm, CachelineSize); } ZeroStruct(swapchain); swapchain->format = format; /* Create swapchain1 */ IDXGISwapChain1 *swapchain1 = 0; { DXGI_SWAP_CHAIN_DESC1 desc = ZI; desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format); desc.Width = size.x; desc.Height = size.y; desc.SampleDesc.Count = 1; desc.SampleDesc.Quality = 0; desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT; desc.BufferCount = GPU_D12_SwapchainBufferCount; desc.Scaling = DXGI_SCALING_NONE; desc.Flags = GPU_D12_SwapchainFlags; desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)queue->d3d_queue, hwnd, &desc, 0, 0, &swapchain1); if (FAILED(hr)) { Panic(Lit("Failed to create IDXGISwapChain1")); } } /* Upgrade to swapchain3 */ hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain); if (FAILED(hr)) { Panic(Lit("Failed to create IDXGISwapChain3")); } /* Create waitable object */ #if GPU_D12_FrameLatency > 0 IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, GPU_D12_FrameLatency); swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain); Assert(swapchain->waitable); #endif /* Disable Alt+Enter changing monitor resolution to match window size */ IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER); IDXGISwapChain1_Release(swapchain1); swapchain->window_hwnd = hwnd; GPU_D12_InitSwapchainResources(swapchain); return (GPU_Swapchain *)swapchain; } void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain) { /* TODO */ } void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain) { /* TODO */ } i64 GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, GPU_Resource *gpu_texture, Vec2I32 backbuffer_size, Vec2I32 dst, i32 vsync) { GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain; GPU_D12_Resource *texture = (GPU_D12_Resource *)gpu_texture; GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_size); D3D12_RESOURCE_DESC src_desc = ZI; D3D12_RESOURCE_DESC dst_desc = ZI; ID3D12Resource_GetDesc(texture->d3d_resource, &src_desc); ID3D12Resource_GetDesc(swapchain_buffer->d3d_resource, &dst_desc); b32 is_blitable = src_desc.Dimension == dst_desc.Dimension && src_desc.SampleDesc.Count == dst_desc.SampleDesc.Count && src_desc.SampleDesc.Quality == dst_desc.SampleDesc.Quality; Assert(is_blitable == 1); /* Texture resource must be similar enough to backbuffer resource to blit */ i64 fence_target = 0; if (is_blitable) { /* Blit */ fence_target = GPU_D12_BlitToSwapchain(swapchain_buffer, texture, dst); u32 present_flags = 0; if (GPU_D12_TearingIsAllowed && vsync == 0) { present_flags |= DXGI_PRESENT_ALLOW_TEARING; } /* Present */ { __profn("Present"); HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags); if (!SUCCEEDED(hr)) { Assert(0); } } } return fence_target; }