2707 lines
108 KiB
C
2707 lines
108 KiB
C
GPU_D12_SharedState GPU_D12_shared_state = ZI;
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Startup
|
|
|
|
void GPU_Startup(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
Arena *perm = PermArena();
|
|
|
|
//////////////////////////////
|
|
//- Initialize device
|
|
|
|
{
|
|
HRESULT hr = 0;
|
|
|
|
/* Enable debug layer */
|
|
u32 dxgi_factory_flags = 0;
|
|
#if GPU_DEBUG
|
|
{
|
|
ID3D12Debug *debug_controller0 = 0;
|
|
{
|
|
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create ID3D12Debug0"));
|
|
}
|
|
ID3D12Debug_EnableDebugLayer(debug_controller0);
|
|
#if GPU_DEBUG_VALIDATION
|
|
{
|
|
ID3D12Debug1 *debug_controller1 = 0;
|
|
{
|
|
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create ID3D12Debug1"));
|
|
}
|
|
ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
|
|
}
|
|
ID3D12Debug_Release(debug_controller1);
|
|
}
|
|
#endif
|
|
}
|
|
ID3D12Debug_Release(debug_controller0);
|
|
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
|
|
}
|
|
#endif
|
|
|
|
/* Create factory */
|
|
{
|
|
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to initialize DXGI factory"));
|
|
}
|
|
}
|
|
|
|
/* Create device */
|
|
{
|
|
IDXGIAdapter3 *adapter = 0;
|
|
ID3D12Device10 *device = 0;
|
|
String error = Lit("Could not initialize GPU device.");
|
|
String first_gpu_name = ZI;
|
|
u32 adapter_index = 0;
|
|
b32 skip = 0; /* For iGPU testing */
|
|
for (;;)
|
|
{
|
|
{
|
|
hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter);
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
IDXGIAdapter3_GetDesc1(adapter, &desc);
|
|
if (first_gpu_name.len == 0)
|
|
{
|
|
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
|
|
}
|
|
{
|
|
/* TODO: Verify feature support:
|
|
* - HighestShaderModel >= D3D_SHADER_MODEL_6_6
|
|
* - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
|
|
* - EnhancedBarriersSupported == 1
|
|
*/
|
|
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
|
|
}
|
|
if (SUCCEEDED(hr) && !skip)
|
|
{
|
|
break;
|
|
}
|
|
skip = 0;
|
|
ID3D12Device_Release(device);
|
|
IDXGIAdapter3_Release(adapter);
|
|
adapter = 0;
|
|
device = 0;
|
|
++adapter_index;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (!device)
|
|
{
|
|
if (first_gpu_name.len > 0)
|
|
{
|
|
error = StringF(scratch.arena,
|
|
"Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.",
|
|
FmtString(first_gpu_name));
|
|
}
|
|
Panic(error);
|
|
}
|
|
g->adapter = adapter;
|
|
g->device = device;
|
|
}
|
|
|
|
/* Enable debug layer breaks */
|
|
{
|
|
#if GPU_DEBUG
|
|
/* Enable D3D12 Debug break */
|
|
{
|
|
ID3D12InfoQueue *info = 0;
|
|
hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to query ID3D12Device interface"));
|
|
}
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
|
|
ID3D12InfoQueue_Release(info);
|
|
}
|
|
/* Enable DXGI Debug break */
|
|
{
|
|
IDXGIInfoQueue *dxgi_info = 0;
|
|
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to get DXGI debug interface"));
|
|
}
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
|
|
IDXGIInfoQueue_Release(dxgi_info);
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Initialize command queues
|
|
|
|
{
|
|
GPU_D12_CommandQueueDesc descs[] = {
|
|
{ .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH },
|
|
{ .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL },
|
|
{ .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL },
|
|
};
|
|
for (u32 i = 0; i < MinU32(countof(descs), countof(g->queues)); ++i)
|
|
{
|
|
GPU_D12_CommandQueueDesc desc = descs[i];
|
|
D3D12_COMMAND_QUEUE_DESC d3d_desc = { .Type = desc.type, .Priority = desc.priority };
|
|
GPU_D12_Queue *queue = &g->queues[i];
|
|
queue->desc = desc;
|
|
HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->commit_fence);
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create GPU Command Queue"));
|
|
}
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Initialize descriptor heaps
|
|
|
|
{
|
|
Struct(Dx12HeapDesc) { D3D12_DESCRIPTOR_HEAP_TYPE type; D3D12_DESCRIPTOR_HEAP_FLAGS flags; u64 max; };
|
|
Dx12HeapDesc descs[GPU_D12_DescriptorHeapKind_Count] = {
|
|
[GPU_D12_DescriptorHeapKind_CbvSrvUav] = {
|
|
.type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
|
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
|
.max = GPU_D12_MaxCbvSrvUavDescriptors,
|
|
},
|
|
[GPU_D12_DescriptorHeapKind_Rtv] = {
|
|
.type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
|
|
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
|
.max = GPU_D12_MaxRtvDescriptors,
|
|
},
|
|
[GPU_D12_DescriptorHeapKind_Sampler] = {
|
|
.type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
.flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
|
.max = GPU_D12_MaxSamplerDescriptors,
|
|
},
|
|
};
|
|
for (GPU_D12_DescriptorHeapKind kind = 0; kind < countof(descs); ++kind)
|
|
{
|
|
Dx12HeapDesc desc = descs[kind];
|
|
GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[kind];
|
|
heap->descriptors_arena = AcquireArena(Gibi(1));
|
|
|
|
heap->type = desc.type;
|
|
heap->max_count = desc.max;
|
|
heap->descriptor_size = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, desc.type);
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI;
|
|
d3d_desc.Type = desc.type;
|
|
d3d_desc.Flags = desc.flags;
|
|
d3d_desc.NumDescriptors = desc.max;
|
|
|
|
HRESULT hr = 0;
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
/* Push an empty descriptor at index 0, so that a handle with a value of 0 always represents nil */
|
|
GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena());
|
|
GPU_D12_Descriptor *nil_descriptor = GPU_D12_PushDescriptor(gpu_perm, kind, 0);
|
|
Assert(nil_descriptor->index == 0);
|
|
}
|
|
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create descriptor heap"));
|
|
}
|
|
}
|
|
}
|
|
|
|
//////////////////////////////
|
|
//- Initialize bindless root signature
|
|
|
|
{
|
|
HRESULT hr = 0;
|
|
|
|
/* Serialize root signature */
|
|
ID3D10Blob *blob = 0;
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
D3D12_ROOT_PARAMETER params[MaxShaderConstants] = ZI;
|
|
for (i32 slot = 0; slot < MaxShaderConstants; ++slot)
|
|
{
|
|
D3D12_ROOT_PARAMETER *param = ¶ms[slot];
|
|
param->ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
param->ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
param->Constants.ShaderRegister = slot;
|
|
param->Constants.RegisterSpace = 0;
|
|
param->Constants.Num32BitValues = 1;
|
|
}
|
|
|
|
D3D12_ROOT_SIGNATURE_DESC desc = ZI;
|
|
desc.NumParameters = countof(params);
|
|
desc.pParameters = params;
|
|
desc.NumStaticSamplers = 0;
|
|
desc.pStaticSamplers = 0;
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED;
|
|
|
|
hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0);
|
|
}
|
|
|
|
/* Create root signature */
|
|
ID3D12RootSignature *rootsig = 0;
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig);
|
|
}
|
|
g->bindless_rootsig = rootsig;
|
|
|
|
if (blob)
|
|
{
|
|
ID3D10Blob_Release(blob);
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create root signature"));
|
|
}
|
|
}
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Helpers
|
|
|
|
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle)
|
|
{
|
|
return (GPU_D12_Arena *)handle.v;
|
|
}
|
|
|
|
GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle)
|
|
{
|
|
return (GPU_D12_CmdList *)handle.v;
|
|
}
|
|
|
|
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle)
|
|
{
|
|
return (GPU_D12_Resource *)handle.v;
|
|
}
|
|
|
|
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle)
|
|
{
|
|
return (GPU_D12_Swapchain *)handle.v;
|
|
}
|
|
|
|
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
|
|
{
|
|
return (DXGI_FORMAT)format;
|
|
}
|
|
|
|
D3D12_BARRIER_SYNC GPU_D12_BarrierSyncFromStages(GPU_Stage stages)
|
|
{
|
|
D3D12_BARRIER_SYNC result = 0;
|
|
if (stages == GPU_Stage_All)
|
|
{
|
|
result = D3D12_BARRIER_SYNC_ALL;
|
|
}
|
|
else
|
|
{
|
|
result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, GPU_Stage_ComputeShading);
|
|
result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, GPU_Stage_IndexAssembly);
|
|
result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, GPU_Stage_VertexShading);
|
|
result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, GPU_Stage_PixelShading);
|
|
result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, GPU_Stage_DepthStencil);
|
|
result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, GPU_Stage_RenderTarget);
|
|
result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, GPU_Stage_Copy);
|
|
result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, GPU_Stage_Indirect);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
D3D12_BARRIER_ACCESS GPU_D12_BarrierAccessFromAccesses(GPU_Access accesses)
|
|
{
|
|
D3D12_BARRIER_ACCESS result = 0;
|
|
if (accesses == 0)
|
|
{
|
|
result = D3D12_BARRIER_ACCESS_NO_ACCESS;
|
|
}
|
|
else if (accesses == GPU_Access_All)
|
|
{
|
|
result = D3D12_BARRIER_ACCESS_COMMON;
|
|
}
|
|
else
|
|
{
|
|
result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, GPU_Access_ShaderReadWrite);
|
|
result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, GPU_Access_ShaderRead);
|
|
result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, GPU_Access_CopyWrite);
|
|
result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, GPU_Access_CopyRead);
|
|
result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, GPU_Access_IndexBuffer);
|
|
result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, GPU_Access_IndirectArgument);
|
|
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, GPU_Access_DepthStencilRead);
|
|
result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, GPU_Access_DepthStencilWrite);
|
|
result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, GPU_Access_RenderTargetWrite);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
D3D12_BARRIER_LAYOUT GPU_D12_BarrierLayoutFromLayout(GPU_Layout layout)
|
|
{
|
|
PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = {
|
|
[GPU_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED,
|
|
[GPU_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON,
|
|
[GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present] = D3D12_BARRIER_LAYOUT_COMMON,
|
|
[GPU_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS,
|
|
[GPU_Layout_DirectComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_GENERIC_READ,
|
|
[GPU_Layout_DirectComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_SHADER_RESOURCE,
|
|
[GPU_Layout_DirectComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COPY_SOURCE,
|
|
[GPU_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON,
|
|
[GPU_Layout_DirectQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS,
|
|
[GPU_Layout_DirectQueue_ShaderRead_CopyRead_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ,
|
|
[GPU_Layout_DirectQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE,
|
|
[GPU_Layout_DirectQueue_CopyRead] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE,
|
|
[GPU_Layout_DirectQueue_DepthStencilRead_DepthStencilWrite] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE,
|
|
[GPU_Layout_DirectQueue_DepthStencilRead] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ,
|
|
[GPU_Layout_DirectQueue_RenderTargetWrite] = D3D12_BARRIER_LAYOUT_RENDER_TARGET,
|
|
[GPU_Layout_ComputeQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON,
|
|
[GPU_Layout_ComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS,
|
|
[GPU_Layout_ComputeQueue_ShaderRead_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ,
|
|
[GPU_Layout_ComputeQueue_ShaderRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE,
|
|
[GPU_Layout_ComputeQueue_CopyRead] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE,
|
|
};
|
|
return translate[layout];
|
|
};
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Pipeline
|
|
|
|
GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc)));
|
|
|
|
/* Fetch pipeline from cache */
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
b32 is_pipeline_new = 0;
|
|
GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)];
|
|
{
|
|
{
|
|
Lock lock = LockS(&bin->mutex);
|
|
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
|
|
{
|
|
if (pipeline->hash == hash) break;
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
if (!pipeline)
|
|
{
|
|
Lock lock = LockE(&bin->mutex);
|
|
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
|
|
{
|
|
if (pipeline->hash == hash) break;
|
|
}
|
|
if (!pipeline)
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
pipeline = PushStruct(perm, GPU_D12_Pipeline);
|
|
pipeline->desc = desc;
|
|
pipeline->hash = hash;
|
|
is_pipeline_new = 1;
|
|
PushAlign(perm, CachelineSize);
|
|
SllStackPushN(bin->first, pipeline, next_in_bin);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
/* Create pipeline */
|
|
if (is_pipeline_new)
|
|
{
|
|
HRESULT hr = 0;
|
|
b32 ok = 1;
|
|
String error_str = ZI;
|
|
|
|
/* Create PSO */
|
|
ID3D12PipelineState *pso = 0;
|
|
if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource)))
|
|
{
|
|
D3D12_RASTERIZER_DESC raster_desc = ZI;
|
|
if (desc.is_wireframe)
|
|
{
|
|
raster_desc.FillMode = D3D12_FILL_MODE_WIREFRAME;
|
|
}
|
|
else
|
|
{
|
|
raster_desc.FillMode = D3D12_FILL_MODE_SOLID;
|
|
}
|
|
raster_desc.CullMode = D3D12_CULL_MODE_NONE;
|
|
raster_desc.FrontCounterClockwise = 0;
|
|
raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
|
|
raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
|
|
raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
|
|
raster_desc.DepthClipEnable = 1;
|
|
raster_desc.MultisampleEnable = 0;
|
|
raster_desc.AntialiasedLineEnable = 0;
|
|
raster_desc.ForcedSampleCount = 0;
|
|
raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
|
|
|
|
D3D12_BLEND_DESC blend_desc = ZI;
|
|
blend_desc.AlphaToCoverageEnable = 0;
|
|
blend_desc.IndependentBlendEnable = 0;
|
|
blend_desc.RenderTarget[0].BlendEnable = 1;
|
|
blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
|
|
blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
|
|
|
D3D12_DEPTH_STENCIL_DESC ds_desc = ZI;
|
|
ds_desc.DepthEnable = 0;
|
|
ds_desc.StencilEnable = 0;
|
|
|
|
String vs = DataFromResource(desc.vs.resource);
|
|
String ps = DataFromResource(desc.ps.resource);
|
|
|
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI;
|
|
pso_desc.pRootSignature = g->bindless_rootsig;
|
|
pso_desc.VS.pShaderBytecode = vs.text;
|
|
pso_desc.VS.BytecodeLength = vs.len;
|
|
pso_desc.PS.pShaderBytecode = ps.text;
|
|
pso_desc.PS.BytecodeLength = ps.len;
|
|
pso_desc.RasterizerState = raster_desc;
|
|
pso_desc.BlendState = blend_desc;
|
|
pso_desc.DepthStencilState = ds_desc;
|
|
pso_desc.PrimitiveTopologyType = desc.topology_type;
|
|
pso_desc.SampleMask = UINT_MAX;
|
|
pso_desc.SampleDesc.Count = 1;
|
|
pso_desc.SampleDesc.Quality = 0;
|
|
for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i)
|
|
{
|
|
StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats));
|
|
DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]);
|
|
if (format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create graphics pipeline");
|
|
ok = 0;
|
|
}
|
|
}
|
|
else if (ok)
|
|
{
|
|
String cs = DataFromResource(desc.cs.resource);
|
|
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI;
|
|
pso_desc.pRootSignature = g->bindless_rootsig;
|
|
pso_desc.CS.pShaderBytecode = cs.text;
|
|
pso_desc.CS.BytecodeLength = cs.len;
|
|
hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create compute pipeline");
|
|
ok = 0;
|
|
}
|
|
}
|
|
|
|
if (!ok)
|
|
{
|
|
/* TOOD: Don't panic */
|
|
Panic(error_str);
|
|
}
|
|
|
|
pipeline->pso = pso;
|
|
pipeline->error = error_str;
|
|
pipeline->ok = ok;
|
|
}
|
|
|
|
return pipeline;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Queue
|
|
|
|
GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
return &g->queues[kind];
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Raw command list
|
|
|
|
GPU_D12_RawCommandList *GPU_D12_PrepareRawCommandList(GPU_QueueKind queue_kind)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
|
|
/* Try to pull first completed command list from queue */
|
|
GPU_D12_RawCommandList *cl = ZI;
|
|
{
|
|
Lock lock = LockE(&queue->commit_mutex);
|
|
{
|
|
u64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
|
cl = queue->first_committed_cl;
|
|
if (cl && cl->commit_fence_target <= completed)
|
|
{
|
|
SllQueuePop(queue->first_committed_cl, queue->last_committed_cl);
|
|
}
|
|
else
|
|
{
|
|
cl = 0;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
/* Allocate new command list if none are available */
|
|
if (!cl)
|
|
{
|
|
Arena *perm = PermArena();
|
|
{
|
|
PushAlign(perm, CachelineSize);
|
|
cl = PushStruct(perm, GPU_D12_RawCommandList);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
cl->queue = queue;
|
|
|
|
HRESULT hr = 0;
|
|
{
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->d3d_ca);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl);
|
|
}
|
|
|
|
/* Initialize Direct queue CPU-only descriptors */
|
|
if (SUCCEEDED(hr) && queue_kind == GPU_QueueKind_Direct)
|
|
{
|
|
GPU_D12_Arena *gpu_perm = GPU_D12_ArenaFromHandle(GPU_PermArena());
|
|
for (u32 i = 0; i < countof(cl->rtv_descriptors); ++i)
|
|
{
|
|
cl->rtv_descriptors[i] = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv, 0);
|
|
}
|
|
cl->rtv_clear_descriptor = GPU_D12_PushDescriptor(gpu_perm, GPU_D12_DescriptorHeapKind_Rtv, 0);
|
|
}
|
|
}
|
|
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create command list"));
|
|
}
|
|
}
|
|
|
|
/* Reset command list */
|
|
{
|
|
HRESULT hr = 0;
|
|
{
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12CommandAllocator_Reset(cl->d3d_ca);
|
|
}
|
|
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = ID3D12GraphicsCommandList_Reset(cl->d3d_cl, cl->d3d_ca, 0);
|
|
}
|
|
}
|
|
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to reset command list"));
|
|
}
|
|
}
|
|
|
|
return cl;
|
|
}
|
|
|
|
void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl)
|
|
{
|
|
GPU_D12_Queue *queue = cl->queue;
|
|
|
|
/* Close */
|
|
{
|
|
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->d3d_cl);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to close command list before execution"));
|
|
}
|
|
}
|
|
|
|
/* Commit */
|
|
{
|
|
Lock lock = LockE(&queue->commit_mutex);
|
|
{
|
|
u64 target = ++queue->commit_fence_target;
|
|
cl->commit_fence_target = target;
|
|
|
|
/* Execute */
|
|
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl);
|
|
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, target);
|
|
|
|
/* Append */
|
|
SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Arena
|
|
|
|
GPU_ArenaHandle GPU_AcquireArena(void)
|
|
{
|
|
GPU_D12_Arena *gpu_arena = 0;
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
gpu_arena = PushStruct(perm, GPU_D12_Arena);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
gpu_arena->arena = AcquireArena(Gibi(1));
|
|
|
|
return GPU_D12_MakeHandle(GPU_ArenaHandle, gpu_arena);
|
|
}
|
|
|
|
void GPU_ReleaseArena(GPU_ArenaHandle arena)
|
|
{
|
|
/* TODO */
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Resource helpers
|
|
|
|
GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind];
|
|
GPU_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor);
|
|
return &descriptors[index];
|
|
}
|
|
|
|
GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind, u32 forced)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind];
|
|
|
|
GPU_D12_Descriptor *descriptor = 0;
|
|
|
|
/* Grab completed descriptor from arena */
|
|
if (forced == 0)
|
|
{
|
|
GPU_D12_DescriptorList *descriptors_by_queue = gpu_arena->committed_descriptors_by_heap_and_queue[heap_kind];
|
|
for (GPU_QueueKind queue_kind = 0; !descriptor && queue_kind < GPU_NumQueues; ++queue_kind)
|
|
{
|
|
GPU_D12_DescriptorList *descriptors = &descriptors_by_queue[queue_kind];
|
|
descriptor = descriptors->first;
|
|
if (descriptor)
|
|
{
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
u64 queue_commit_completion = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
|
if (queue_commit_completion >= descriptor->queue_commit_target)
|
|
{
|
|
/* Descriptor no longer in use by gpu, reuse it */
|
|
DllQueueRemove(descriptors->first, descriptors->last, descriptor);
|
|
}
|
|
else
|
|
{
|
|
/* Descriptor may still be in use by gpu */
|
|
descriptor = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Allocate new descriptor from heap */
|
|
u32 index = forced;
|
|
if (!descriptor)
|
|
{
|
|
Lock lock = LockE(&heap->mutex);
|
|
{
|
|
if (index == 0)
|
|
{
|
|
if (heap->first_free)
|
|
{
|
|
descriptor = heap->first_free;
|
|
DllStackRemove(heap->first_free, descriptor);
|
|
index = descriptor->index;
|
|
}
|
|
else
|
|
{
|
|
u32 descriptors_count = ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor);
|
|
if (descriptors_count >= heap->max_count)
|
|
{
|
|
Panic(Lit("Max descriptors reached in heap"));
|
|
}
|
|
descriptor = PushStruct(heap->descriptors_arena, GPU_D12_Descriptor);
|
|
index = descriptors_count;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (index >= heap->max_count)
|
|
{
|
|
Panic(Lit("Max descriptors reached in heap"));
|
|
}
|
|
|
|
/* Push descriptors if index slot is past end of heap */
|
|
u32 descriptors_count = ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor);
|
|
if (index >= descriptors_count)
|
|
{
|
|
u32 pushed_count = index - descriptors_count + 1;
|
|
PushStructs(heap->descriptors_arena, GPU_D12_Descriptor, pushed_count);
|
|
for (u32 pushed_index = descriptors_count; pushed_index < descriptors_count + pushed_count; ++pushed_index)
|
|
{
|
|
GPU_D12_Descriptor *pushed = &(ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor)[pushed_index]);
|
|
if (pushed_index < index)
|
|
{
|
|
pushed->heap = heap;
|
|
pushed->index = pushed_index;
|
|
pushed->handle.ptr = heap->start_handle.ptr + (pushed_index * heap->descriptor_size);
|
|
DllStackPush(heap->first_free, pushed);
|
|
}
|
|
else
|
|
{
|
|
descriptor = pushed;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Search free list for freed descriptor with matching index */
|
|
if (!descriptor)
|
|
{
|
|
for (GPU_D12_Descriptor *n = heap->first_free; n; n = n->next)
|
|
{
|
|
if (n->index == index)
|
|
{
|
|
DllStackRemove(heap->first_free, n);
|
|
descriptor = n;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!descriptor)
|
|
{
|
|
Arena *perm = PermArena();
|
|
Panic(StringF(perm, "Tried to force push a GPU pointer into slot %F, but a descriptor already exists there (current heap count: %F)", FmtUint(index), FmtUint(ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor))));
|
|
}
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
/* Initialize descriptor handle */
|
|
ZeroStruct(descriptor);
|
|
descriptor->heap = heap;
|
|
descriptor->index = index;
|
|
descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
|
|
|
|
return descriptor;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Resource
|
|
|
|
//- Resource creation
|
|
|
|
GPU_ResourceHandle GPU_PushBufferResource(GPU_ArenaHandle arena_handle, GPU_BufferResourceDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
|
|
|
/* Create resource heap */
|
|
if (!gpu_arena->d3d_resource_heap)
|
|
{
|
|
/* FIXME: Dynamic size */
|
|
D3D12_HEAP_DESC d3d_desc = ZI;
|
|
d3d_desc.SizeInBytes = Mebi(64);
|
|
d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */
|
|
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
ID3D12Heap *heap = 0;
|
|
HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to create D3D12 resource heap"));
|
|
}
|
|
|
|
gpu_arena->d3d_resource_heap = heap;
|
|
gpu_arena->heap_size = d3d_desc.SizeInBytes;
|
|
}
|
|
|
|
/* Create d3d resource */
|
|
ID3D12Resource *d3d_resource = 0;
|
|
u64 aligned_size = AlignU64(MaxU64(desc.size, 1), 4);
|
|
{
|
|
D3D12_RESOURCE_DESC1 d3d_desc = ZI;
|
|
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
d3d_desc.Width = aligned_size;
|
|
d3d_desc.Height = 1;
|
|
d3d_desc.DepthOrArraySize = 1;
|
|
d3d_desc.MipLevels = 1;
|
|
d3d_desc.SampleDesc.Count = 1;
|
|
d3d_desc.SampleDesc.Quality = 0;
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite);
|
|
|
|
u64 alloc_size = 0;
|
|
u64 alloc_align = 0;
|
|
{
|
|
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI;
|
|
ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
|
|
alloc_size = alloc_info.SizeInBytes;
|
|
alloc_align = alloc_info.Alignment;
|
|
}
|
|
|
|
u64 alloc_pos = gpu_arena->heap_pos;
|
|
alloc_pos = AlignU64(alloc_pos, alloc_align);
|
|
gpu_arena->heap_pos = alloc_pos + alloc_size;
|
|
|
|
if (alloc_pos + alloc_size > gpu_arena->heap_size)
|
|
{
|
|
Panic(Lit("Gpu arena overflow"));
|
|
}
|
|
|
|
HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device,
|
|
gpu_arena->d3d_resource_heap,
|
|
alloc_pos,
|
|
&d3d_desc,
|
|
D3D12_BARRIER_LAYOUT_UNDEFINED,
|
|
0,
|
|
0,
|
|
0,
|
|
&IID_ID3D12Resource,
|
|
(void **)&d3d_resource);
|
|
}
|
|
|
|
GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
|
|
resource->d3d_resource = d3d_resource;
|
|
resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
resource->flags = desc.flags;
|
|
|
|
resource->buffer_size = desc.size;
|
|
resource->buffer_size_aligned = aligned_size;
|
|
resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
|
|
|
|
return GPU_D12_MakeHandle(GPU_ResourceHandle, resource);
|
|
}
|
|
|
|
GPU_ResourceHandle GPU_PushTextureResource(GPU_ArenaHandle arena_handle, GPU_TextureResourceDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
|
D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout);
|
|
|
|
/* Create resource heap */
|
|
if (!gpu_arena->d3d_resource_heap)
|
|
{
|
|
/* FIXME: Dynamic size */
|
|
D3D12_HEAP_DESC d3d_desc = ZI;
|
|
d3d_desc.SizeInBytes = Mebi(64);
|
|
d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */
|
|
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
|
|
|
ID3D12Heap *heap = 0;
|
|
HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to create D3D12 resource heap"));
|
|
}
|
|
|
|
gpu_arena->d3d_resource_heap = heap;
|
|
gpu_arena->heap_size = d3d_desc.SizeInBytes;
|
|
}
|
|
|
|
/* Create d3d resource */
|
|
ID3D12Resource *d3d_resource = 0;
|
|
{
|
|
D3D12_RESOURCE_DESC1 d3d_desc = ZI;
|
|
d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
|
desc.kind == GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
|
|
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
|
d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format);
|
|
d3d_desc.Width = MaxI32(desc.dims.x, 1);
|
|
d3d_desc.Height = MaxI32(desc.dims.y, 1);
|
|
d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1);
|
|
d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1);
|
|
d3d_desc.SampleDesc.Count = 1;
|
|
d3d_desc.SampleDesc.Quality = 0;
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite);
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget);
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil);
|
|
|
|
u64 alloc_size = 0;
|
|
u64 alloc_align = 0;
|
|
{
|
|
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI;
|
|
ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
|
|
alloc_size = alloc_info.SizeInBytes;
|
|
alloc_align = alloc_info.Alignment;
|
|
}
|
|
|
|
u64 alloc_pos = gpu_arena->heap_pos;
|
|
alloc_pos = AlignU64(alloc_pos, alloc_align);
|
|
gpu_arena->heap_pos = alloc_pos + alloc_size;
|
|
|
|
if (alloc_pos + alloc_size > gpu_arena->heap_size)
|
|
{
|
|
Panic(Lit("Gpu arena overflow"));
|
|
}
|
|
|
|
D3D12_CLEAR_VALUE clear_value = {
|
|
.Color[0] = desc.clear_color.x,
|
|
.Color[1] = desc.clear_color.y,
|
|
.Color[2] = desc.clear_color.z,
|
|
.Color[3] = desc.clear_color.w,
|
|
.Format = d3d_desc.Format
|
|
};
|
|
|
|
HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device,
|
|
gpu_arena->d3d_resource_heap,
|
|
alloc_pos,
|
|
&d3d_desc,
|
|
initial_layout,
|
|
(d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
|
|
0,
|
|
0,
|
|
&IID_ID3D12Resource,
|
|
(void **)&d3d_resource);
|
|
}
|
|
|
|
GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
|
|
resource->d3d_resource = d3d_resource;
|
|
resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
resource->flags = desc.flags;
|
|
|
|
resource->is_texture = 1;
|
|
resource->texture_format = desc.format;
|
|
resource->texture_dims = desc.dims;
|
|
resource->texture_mip_levels = desc.mip_levels;
|
|
resource->texture_layout = initial_layout;
|
|
|
|
return GPU_D12_MakeHandle(GPU_ResourceHandle, resource);
|
|
}
|
|
|
|
GPU_ResourceHandle GPU_PushSamplerResource(GPU_ArenaHandle arena_handle, GPU_SamplerResourceDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
|
|
|
GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
|
|
resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
resource->sampler_desc = desc;
|
|
|
|
return GPU_D12_MakeHandle(GPU_ResourceHandle, resource);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Shader-accessible pointer
|
|
|
|
u32 GPU_PushPointer(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, GPU_PointerDesc pointer_desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle);
|
|
u32 result = 0;
|
|
|
|
ShaderHandleKind kind = pointer_desc.kind;
|
|
|
|
b32 is_buffer = kind == ShaderHandleKind_StructuredBuffer ||
|
|
kind == ShaderHandleKind_RWStructuredBuffer ||
|
|
kind == ShaderHandleKind_ByteAddressBuffer ||
|
|
kind == ShaderHandleKind_RWByteAddressBuffer;
|
|
|
|
b32 is_sampler = kind == ShaderHandleKind_SamplerState;
|
|
|
|
b32 is_texture = !is_buffer && !is_sampler;
|
|
|
|
b32 is_raw = kind == ShaderHandleKind_ByteAddressBuffer ||
|
|
kind == ShaderHandleKind_RWByteAddressBuffer;
|
|
|
|
b32 is_uav = kind == ShaderHandleKind_RWStructuredBuffer ||
|
|
kind == ShaderHandleKind_RWByteAddressBuffer ||
|
|
kind == ShaderHandleKind_RWTexture1D ||
|
|
kind == ShaderHandleKind_RWTexture2D ||
|
|
kind == ShaderHandleKind_RWTexture3D;
|
|
|
|
|
|
|
|
GPU_D12_Descriptor *descriptor = 0;
|
|
if (is_buffer)
|
|
{
|
|
descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav, pointer_desc.forced);
|
|
u64 buffer_size_aligned = resource->buffer_size_aligned;
|
|
u64 num_elements_in_buffer = buffer_size_aligned / pointer_desc.element_size;
|
|
u64 num_elements_after_offset = num_elements_in_buffer > pointer_desc.element_offset ? num_elements_in_buffer - pointer_desc.element_offset : 0;
|
|
if (num_elements_after_offset > 0)
|
|
{
|
|
if (is_uav)
|
|
{
|
|
D3D12_UNORDERED_ACCESS_VIEW_DESC desc = ZI;
|
|
{
|
|
desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER ;
|
|
desc.Buffer.FirstElement = pointer_desc.element_offset;
|
|
desc.Buffer.NumElements = num_elements_after_offset;
|
|
desc.Buffer.StructureByteStride = pointer_desc.element_size;
|
|
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
|
|
}
|
|
if (is_raw)
|
|
{
|
|
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
|
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
|
|
}
|
|
ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, &desc, descriptor->handle);
|
|
}
|
|
else
|
|
{
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI;
|
|
{
|
|
desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
|
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
desc.Buffer.FirstElement = pointer_desc.element_offset;
|
|
desc.Buffer.NumElements = num_elements_after_offset;
|
|
desc.Buffer.StructureByteStride = pointer_desc.element_size;
|
|
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
|
}
|
|
if (is_raw)
|
|
{
|
|
desc.Format = DXGI_FORMAT_R32_TYPELESS;
|
|
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;
|
|
}
|
|
ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, &desc, descriptor->handle);
|
|
}
|
|
}
|
|
}
|
|
else if (is_texture)
|
|
{
|
|
descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav, pointer_desc.forced);
|
|
if (is_uav)
|
|
{
|
|
ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle);
|
|
}
|
|
else
|
|
{
|
|
ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, 0, descriptor->handle);
|
|
}
|
|
}
|
|
else if (is_sampler)
|
|
{
|
|
descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_Sampler, pointer_desc.forced);
|
|
GPU_SamplerResourceDesc sampler_desc = resource->sampler_desc;
|
|
D3D12_SAMPLER_DESC d3d_desc = ZI;
|
|
{
|
|
d3d_desc.Filter = (D3D12_FILTER)sampler_desc.filter;
|
|
d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.x;
|
|
d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.y;
|
|
d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)sampler_desc.z;
|
|
d3d_desc.MipLODBias = sampler_desc.mip_lod_bias;
|
|
d3d_desc.MaxAnisotropy = MaxU32(sampler_desc.max_anisotropy, 1);
|
|
d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)sampler_desc.comparison;
|
|
d3d_desc.BorderColor[0] = sampler_desc.border_color.x;
|
|
d3d_desc.BorderColor[1] = sampler_desc.border_color.y;
|
|
d3d_desc.BorderColor[2] = sampler_desc.border_color.z;
|
|
d3d_desc.BorderColor[3] = sampler_desc.border_color.w;
|
|
d3d_desc.MinLOD = sampler_desc.min_lod;
|
|
d3d_desc.MaxLOD = sampler_desc.max_lod;
|
|
}
|
|
if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
|
|
if (d3d_desc.MaxLOD >= F32Infinity)
|
|
{
|
|
d3d_desc.MaxLOD = D3D12_FLOAT32_MAX;
|
|
}
|
|
ID3D12Device_CreateSampler(g->device, &d3d_desc, descriptor->handle);
|
|
}
|
|
|
|
return descriptor->index;
|
|
}
|
|
|
|
//- Count
|
|
|
|
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer);
|
|
return resource->buffer_size;
|
|
}
|
|
|
|
i32 GPU_Count1D(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return resource->texture_dims.x;
|
|
}
|
|
|
|
Vec2I32 GPU_Count2D(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return VEC2I32(resource->texture_dims.x, resource->texture_dims.y);
|
|
}
|
|
|
|
Vec3I32 GPU_Count3D(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return resource->texture_dims;
|
|
}
|
|
|
|
i32 GPU_CountWidth(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return resource->texture_dims.x;
|
|
}
|
|
|
|
i32 GPU_CountHeight(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return resource->texture_dims.y;
|
|
}
|
|
|
|
i32 GPU_CountDepth(GPU_ResourceHandle texture)
|
|
{
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
|
return resource->texture_dims.z;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ Command helpers
|
|
|
|
GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
/* Grab chunk */
|
|
GPU_D12_CmdChunk *chunk = cl->last_cmd_chunk;
|
|
{
|
|
if (chunk && chunk->cmds_count >= GPU_D12_CmdsPerChunk)
|
|
{
|
|
chunk = 0;
|
|
}
|
|
if (!chunk)
|
|
{
|
|
Lock lock = LockE(&g->free_cmd_chunks_mutex);
|
|
{
|
|
chunk = g->first_free_cmd_chunk;
|
|
if (chunk)
|
|
{
|
|
g->first_free_cmd_chunk = chunk->next;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
if (chunk)
|
|
{
|
|
GPU_D12_Cmd *cmds = chunk->cmds;
|
|
ZeroStruct(chunk);
|
|
chunk->cmds = cmds;
|
|
}
|
|
}
|
|
if (!chunk)
|
|
{
|
|
Arena *perm = PermArena();
|
|
chunk = PushStruct(perm, GPU_D12_CmdChunk);
|
|
chunk->cmds = PushStructsNoZero(perm, GPU_D12_Cmd, GPU_D12_CmdsPerChunk);
|
|
}
|
|
if (chunk != cl->last_cmd_chunk)
|
|
{
|
|
SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk);
|
|
}
|
|
}
|
|
|
|
/* Push cmd to chunk */
|
|
GPU_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++];
|
|
++cl->cmds_count;
|
|
return cmd;
|
|
}
|
|
|
|
GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v)
|
|
{
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_Constant;
|
|
cmd->constant.slot = slot;
|
|
CopyBytes(&cmd->constant.value, v, 4);
|
|
return cmd;
|
|
}
|
|
|
|
GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_QueueKind queue_kind = cl->queue_kind;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
GPU_D12_StagingRegionNode *result = 0;
|
|
|
|
Lock lock = LockE(&queue->staging_mutex);
|
|
{
|
|
GPU_D12_StagingHeap *heap = queue->staging_heap;
|
|
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
|
|
|
/* Find first completed region with matching size.
|
|
* For each region in heap:
|
|
* - If region size > size, split off a smaller region & use it
|
|
*
|
|
* - If region size < size, try to merge with next completed region
|
|
*
|
|
* - If no available completed region with eligible size, queue the
|
|
* current heap for deletion & create a new heap
|
|
* with larger size
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* FIXME: Region completion target should be atomic, and initialized to
|
|
* u64/i64 max until cl submission actually sets value */
|
|
|
|
/* Find region with large enough size */
|
|
GPU_D12_StagingRegionNode *match = 0;
|
|
if (heap && heap->size >= size)
|
|
{
|
|
GPU_D12_StagingRegionNode *r = heap->head_region_node;
|
|
for (;;)
|
|
{
|
|
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
|
|
if (is_completed)
|
|
{
|
|
GPU_D12_StagingRegionNode *next = r->next;
|
|
u64 region_size = 0;
|
|
if (next->pos > r->pos)
|
|
{
|
|
region_size = next->pos - r->pos;
|
|
}
|
|
else
|
|
{
|
|
region_size = heap->size - r->pos;
|
|
}
|
|
|
|
if (region_size < size)
|
|
{
|
|
GPU_D12_StagingRegionNode *prev = r->prev;
|
|
b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target);
|
|
if (prev_is_completed && prev->pos < r->pos)
|
|
{
|
|
/* Merge with previous region & retry */
|
|
prev->next = next;
|
|
SllStackPush(heap->first_free_region_node, r);
|
|
r = prev;
|
|
}
|
|
else
|
|
{
|
|
/* Continue to next region */
|
|
r = next;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Found matching region */
|
|
match = r;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* No large-enough completed region found */
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Create new heap if no match found */
|
|
if (!match)
|
|
{
|
|
/* Queue old heap for deletion */
|
|
u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
|
|
if (heap)
|
|
{
|
|
/* FIXME: Queue for deletion here */
|
|
new_heap_size = MaxU64(new_heap_size, heap->size * 2);
|
|
heap = 0;
|
|
}
|
|
|
|
/* Create new heap */
|
|
{
|
|
Arena *arena = AcquireArena(Gibi(1));
|
|
heap = PushStruct(arena, GPU_D12_StagingHeap);
|
|
heap->arena = arena;
|
|
heap->size = new_heap_size;
|
|
|
|
/* Create backing upload heap resource */
|
|
ID3D12Resource *d3d_resource = 0;
|
|
{
|
|
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
|
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
d3d_desc.Alignment = 0;
|
|
d3d_desc.Width = new_heap_size;
|
|
d3d_desc.Height = 1;
|
|
d3d_desc.DepthOrArraySize = 1;
|
|
d3d_desc.MipLevels = 1;
|
|
d3d_desc.SampleDesc.Count = 1;
|
|
d3d_desc.SampleDesc.Quality = 0;
|
|
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
|
|
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device,
|
|
&heap_props,
|
|
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED,
|
|
&d3d_desc,
|
|
D3D12_RESOURCE_STATE_COMMON,
|
|
0,
|
|
&IID_ID3D12Resource,
|
|
(void **)&d3d_resource);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to create upload heap"));
|
|
}
|
|
}
|
|
heap->resource.d3d_resource = d3d_resource;
|
|
heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
heap->resource.buffer_size = new_heap_size;
|
|
heap->resource.buffer_size_aligned = new_heap_size;
|
|
heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
|
|
|
|
/* Map */
|
|
{
|
|
D3D12_RANGE read_range = ZI;
|
|
HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to map upload heap"));
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Create initial region */
|
|
match = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
|
|
match->heap = heap;
|
|
match->next = match;
|
|
match->prev = match;
|
|
heap->head_region_node = match;
|
|
}
|
|
|
|
/* Split extra region space */
|
|
{
|
|
GPU_D12_StagingRegionNode *next = match->next;
|
|
u64 region_size = 0;
|
|
if (next->pos > match->pos)
|
|
{
|
|
region_size = next->pos - match->pos;
|
|
}
|
|
else
|
|
{
|
|
region_size = heap->size - match->pos;
|
|
}
|
|
|
|
if (region_size > size)
|
|
{
|
|
GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node;
|
|
if (new_next)
|
|
{
|
|
SllStackPop(heap->first_free_region_node);
|
|
}
|
|
else
|
|
{
|
|
new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
|
|
}
|
|
new_next->next = next;
|
|
new_next->prev = match;
|
|
next->prev = new_next;
|
|
match->next = new_next;
|
|
|
|
new_next->heap = heap;
|
|
new_next->pos = match->pos + size;
|
|
}
|
|
}
|
|
|
|
Atomic64Set(&match->completion_target, I64Max);
|
|
result = match;
|
|
}
|
|
Unlock(&lock);
|
|
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Command
|
|
|
|
//- Command list
|
|
|
|
GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_CmdList *cl = 0;
|
|
Lock lock = LockE(&g->free_cmd_lists_mutex);
|
|
{
|
|
cl = g->first_free_cmd_list;
|
|
if (cl)
|
|
{
|
|
g->first_free_cmd_list = cl->next;
|
|
ZeroStruct(cl);
|
|
}
|
|
else
|
|
{
|
|
Arena *perm = PermArena();
|
|
cl = PushStruct(perm, GPU_D12_CmdList);
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
cl->queue_kind = queue;
|
|
|
|
return GPU_D12_MakeHandle(GPU_CommandListHandle, cl);
|
|
}
|
|
|
|
void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_QueueKind queue_kind = cl->queue_kind;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
/* Begin dx12 command list */
|
|
GPU_D12_RawCommandList *rcl = GPU_D12_PrepareRawCommandList(queue_kind);
|
|
ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl;
|
|
|
|
/* Pipeline state */
|
|
b32 graphics_rootsig_set = 0;
|
|
b32 compute_rootsig_set = 0;
|
|
b32 descriptor_heaps_set = 0;
|
|
GPU_D12_Pipeline *bound_pipeline = 0;
|
|
|
|
/* Constants state */
|
|
u64 slotted_constants[MaxShaderConstants];
|
|
u64 bound_compute_constants[MaxShaderConstants];
|
|
u64 bound_graphics_constants[MaxShaderConstants];
|
|
for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } /* Zero initialze all constant slots */
|
|
for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; }
|
|
for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; }
|
|
|
|
slotted_constants[MaxShaderConstants - 1] = queue_kind == queue_kind == GPU_QueueKind_AsyncCompute; /* IsAsyncCompute constant */
|
|
|
|
/* Rasterizer state */
|
|
D3D12_VIEWPORT bound_viewport = ZI;
|
|
D3D12_RECT bound_scissor = ZI;
|
|
D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1;
|
|
D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI;
|
|
u64 bound_render_target_uids[GPU_MaxRenderTargets] = ZI;
|
|
u64 bound_render_clear_target_uid = 0;
|
|
|
|
/* Flatten command chunks */
|
|
u64 cmds_count = 0;
|
|
GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cl->cmds_count);
|
|
{
|
|
/* Flatten command chunks */
|
|
{
|
|
for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
|
|
{
|
|
for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx)
|
|
{
|
|
cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx];
|
|
}
|
|
}
|
|
}
|
|
/* Free command chunks */
|
|
{
|
|
Lock lock = LockE(&g->free_cmd_chunks_mutex);
|
|
{
|
|
for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
|
|
{
|
|
chunk->next = g->first_free_cmd_chunk;
|
|
g->first_free_cmd_chunk = chunk;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
/* Batch barrier cmds */
|
|
{
|
|
u64 cmd_idx = 0;
|
|
u64 batch_gen = 0;
|
|
GPU_D12_Cmd *prev_barrier_cmd = 0;
|
|
while (cmd_idx < cmds_count)
|
|
{
|
|
GPU_D12_Cmd *cmd = &cmds[cmd_idx];
|
|
switch (cmd->kind)
|
|
{
|
|
/* Batch-interrupting cmds */
|
|
default:
|
|
{
|
|
cmd_idx += 1;
|
|
batch_gen += 1;
|
|
} break;
|
|
|
|
/* Non-batch-interrupting cmds */
|
|
case GPU_D12_CmdKind_Constant:
|
|
{
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
case GPU_D12_CmdKind_Barrier:
|
|
{
|
|
/* Determine 'before' state from lookup */
|
|
if (prev_barrier_cmd)
|
|
{
|
|
if (prev_barrier_cmd->barrier.batch_gen != batch_gen)
|
|
{
|
|
/* This barrier is part of new batch */
|
|
prev_barrier_cmd->barrier.is_end_of_batch = 1;
|
|
}
|
|
}
|
|
cmd->barrier.batch_gen = batch_gen;
|
|
prev_barrier_cmd = cmd;
|
|
cmd_idx += 1;
|
|
} break;
|
|
}
|
|
}
|
|
|
|
if (prev_barrier_cmd)
|
|
{
|
|
prev_barrier_cmd->barrier.is_end_of_batch = 1;
|
|
}
|
|
}
|
|
|
|
/* Process gpu commands into dx12 commands */
|
|
{
|
|
u64 batch_barrier_idx_start = 0;
|
|
u64 batch_barrier_idx_opl = 0; /* One past last */
|
|
|
|
u64 cmd_idx = 0;
|
|
while (cmd_idx < cmds_count)
|
|
{
|
|
GPU_D12_Cmd *cmd = &cmds[cmd_idx];
|
|
switch (cmd->kind)
|
|
{
|
|
default:
|
|
{
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Constant
|
|
case GPU_D12_CmdKind_Constant:
|
|
{
|
|
i32 slot = cmd->constant.slot;
|
|
u32 value = cmd->constant.value;
|
|
if (slot >= 0 && slot < countof(slotted_constants))
|
|
{
|
|
slotted_constants[slot] = value;
|
|
}
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Barrier
|
|
|
|
case GPU_D12_CmdKind_Barrier:
|
|
{
|
|
batch_barrier_idx_opl = cmd_idx + 1;
|
|
|
|
/* Submit batched barriers */
|
|
if (cmd->barrier.is_end_of_batch)
|
|
{
|
|
/* Build barriers */
|
|
u64 buffer_barriers_count = 0;
|
|
u64 texture_barriers_count = 0;
|
|
u64 global_barriers_count = 0;
|
|
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
|
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
|
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_barrier_idx_opl - batch_barrier_idx_start));
|
|
for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx)
|
|
{
|
|
GPU_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
|
|
if (barrier_cmd->kind == GPU_D12_CmdKind_Barrier)
|
|
{
|
|
GPU_BarrierDesc desc = barrier_cmd->barrier.desc;
|
|
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(desc.resource);
|
|
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
|
|
|
/* Translate gpu barrier kind -> d3d barrier fields */
|
|
D3D12_BARRIER_SYNC sync_before = GPU_D12_BarrierSyncFromStages(desc.sync_prev);
|
|
D3D12_BARRIER_SYNC sync_after = GPU_D12_BarrierSyncFromStages(desc.sync_next);
|
|
D3D12_BARRIER_ACCESS access_before = GPU_D12_BarrierAccessFromAccesses(desc.access_prev);
|
|
D3D12_BARRIER_ACCESS access_after = GPU_D12_BarrierAccessFromAccesses(desc.access_next);
|
|
D3D12_BARRIER_LAYOUT layout_before = resource->texture_layout;
|
|
D3D12_BARRIER_LAYOUT layout_after = resource->texture_layout;
|
|
if (desc.layout != GPU_Layout_NoChange)
|
|
{
|
|
layout_after = GPU_D12_BarrierLayoutFromLayout(desc.layout);
|
|
resource->texture_layout = layout_after;
|
|
}
|
|
|
|
/* Build barrier */
|
|
switch (barrier_type)
|
|
{
|
|
case D3D12_BARRIER_TYPE_BUFFER:
|
|
{
|
|
D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++];
|
|
barrier->SyncBefore = sync_before;
|
|
barrier->SyncAfter = sync_after;
|
|
barrier->AccessBefore = access_before;
|
|
barrier->AccessAfter = access_after;
|
|
barrier->pResource = resource->d3d_resource;
|
|
barrier->Offset = 0;
|
|
barrier->Size = U64Max;
|
|
} break;
|
|
|
|
case D3D12_BARRIER_TYPE_TEXTURE:
|
|
{
|
|
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++];
|
|
barrier->SyncBefore = sync_before;
|
|
barrier->SyncAfter = sync_after;
|
|
barrier->AccessBefore = access_before;
|
|
barrier->AccessAfter = access_after;
|
|
barrier->LayoutBefore = layout_before;
|
|
barrier->LayoutAfter = layout_after;
|
|
barrier->pResource = resource->d3d_resource;
|
|
barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff;
|
|
} break;
|
|
|
|
case D3D12_BARRIER_TYPE_GLOBAL:
|
|
{
|
|
D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++];
|
|
barrier->SyncBefore = sync_before;
|
|
barrier->SyncAfter = sync_after;
|
|
barrier->AccessBefore = access_before;
|
|
barrier->AccessAfter = access_after;
|
|
} break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Dispatch barriers */
|
|
{
|
|
u32 barrier_groups_count = 0;
|
|
D3D12_BARRIER_GROUP barrier_groups[3] = ZI;
|
|
if (buffer_barriers_count > 0)
|
|
{
|
|
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
|
|
group->Type = D3D12_BARRIER_TYPE_BUFFER;
|
|
group->NumBarriers = buffer_barriers_count;
|
|
group->pBufferBarriers = buffer_barriers;
|
|
}
|
|
if (texture_barriers_count > 0)
|
|
{
|
|
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
|
|
group->Type = D3D12_BARRIER_TYPE_TEXTURE;
|
|
group->NumBarriers = texture_barriers_count;
|
|
group->pTextureBarriers = texture_barriers;
|
|
}
|
|
if (global_barriers_count > 0)
|
|
{
|
|
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
|
|
group->Type = D3D12_BARRIER_TYPE_GLOBAL;
|
|
group->NumBarriers = global_barriers_count;
|
|
group->pGlobalBarriers = global_barriers;
|
|
}
|
|
if (barrier_groups_count > 0)
|
|
{
|
|
ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups);
|
|
}
|
|
}
|
|
|
|
batch_barrier_idx_start = cmd_idx + 1;
|
|
}
|
|
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Copy bytes
|
|
|
|
case GPU_D12_CmdKind_CopyBytes:
|
|
{
|
|
u64 src_offset = cmd->copy_bytes.src_copy_range.min;
|
|
u64 copy_size = cmd->copy_bytes.src_copy_range.max - cmd->copy_bytes.src_copy_range.min;
|
|
ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl,
|
|
cmd->copy_bytes.dst->d3d_resource,
|
|
cmd->copy_bytes.dst_offset,
|
|
cmd->copy_bytes.src->d3d_resource,
|
|
src_offset,
|
|
copy_size);
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Copy texels
|
|
|
|
case GPU_D12_CmdKind_CopyTexels:
|
|
{
|
|
GPU_D12_Resource *dst = cmd->copy_texels.dst;
|
|
GPU_D12_Resource *src = cmd->copy_texels.src;
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc;
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc;
|
|
Vec3I32 dst_offset = cmd->copy_texels.dst_offset;
|
|
Rng3I32 src_copy_range = cmd->copy_texels.src_copy_range;
|
|
|
|
D3D12_BOX src_box = ZI;
|
|
{
|
|
src_box.left = src_copy_range.p0.x;
|
|
src_box.top = src_copy_range.p0.y;
|
|
src_box.front = src_copy_range.p0.z;
|
|
src_box.right = src_copy_range.p1.x;
|
|
src_box.bottom = src_copy_range.p1.y;
|
|
src_box.back = src_copy_range.p1.z;
|
|
}
|
|
|
|
if (dst->flags & GPU_ResourceFlag_AllowDepthStencil)
|
|
{
|
|
/* Depth-stencil textures must have src box & dst offset set to 0
|
|
* https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion
|
|
*/
|
|
ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0);
|
|
}
|
|
else
|
|
{
|
|
ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, &src_box);
|
|
}
|
|
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Compute
|
|
|
|
case GPU_D12_CmdKind_Compute:
|
|
{
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
{
|
|
GPU_D12_PipelineDesc pipeline_desc = ZI;
|
|
pipeline_desc.cs = cmd->compute.cs;
|
|
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
|
|
}
|
|
|
|
if (pipeline)
|
|
{
|
|
/* Set descriptor heaps */
|
|
if (!descriptor_heaps_set)
|
|
{
|
|
ID3D12DescriptorHeap *heaps[] = {
|
|
g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap,
|
|
g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap,
|
|
};
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps);
|
|
descriptor_heaps_set = 1;
|
|
}
|
|
|
|
/* Bind rootsig */
|
|
if (!compute_rootsig_set)
|
|
{
|
|
ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, g->bindless_rootsig);
|
|
compute_rootsig_set = 1;
|
|
}
|
|
|
|
/* Bind pipeline */
|
|
if (pipeline != bound_pipeline)
|
|
{
|
|
ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso);
|
|
bound_pipeline = pipeline;
|
|
}
|
|
|
|
/* Update root constants */
|
|
for (i32 slot = 0; slot < MaxShaderConstants; ++slot)
|
|
{
|
|
if (bound_compute_constants[slot] != slotted_constants[slot])
|
|
{
|
|
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0);
|
|
bound_compute_constants[slot] = slotted_constants[slot];
|
|
}
|
|
}
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z);
|
|
}
|
|
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Rasterize
|
|
|
|
case GPU_D12_CmdKind_Rasterize:
|
|
{
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
{
|
|
GPU_D12_PipelineDesc pipeline_desc = ZI;
|
|
pipeline_desc.vs = cmd->rasterize.vs;
|
|
pipeline_desc.ps = cmd->rasterize.ps;
|
|
{
|
|
pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED;
|
|
switch (cmd->rasterize.mode)
|
|
{
|
|
default: Assert(0); break;
|
|
case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break;
|
|
case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
|
|
case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
|
|
case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
}
|
|
}
|
|
if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip)
|
|
{
|
|
pipeline_desc.is_wireframe = 1;
|
|
}
|
|
for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i)
|
|
{
|
|
GPU_D12_Resource *rt = cmd->rasterize.render_targets[i];
|
|
if (rt)
|
|
{
|
|
pipeline_desc.render_target_formats[i] = rt->texture_format;
|
|
}
|
|
else
|
|
{
|
|
pipeline_desc.render_target_formats[i] = GPU_Format_Unknown;
|
|
}
|
|
}
|
|
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
|
|
}
|
|
|
|
/* Create ibv */
|
|
u32 indices_count = 0;
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
|
|
{
|
|
GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
|
|
if (desc.index_count > 0)
|
|
{
|
|
GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource);
|
|
ibv.BufferLocation = index_buffer_resource->buffer_gpu_address;
|
|
ibv.SizeInBytes = desc.index_size * desc.index_count;
|
|
if (desc.index_size == 2)
|
|
{
|
|
ibv.Format = DXGI_FORMAT_R16_UINT;
|
|
indices_count = ibv.SizeInBytes / 2;
|
|
}
|
|
else if (desc.index_size == 4)
|
|
{
|
|
ibv.Format = DXGI_FORMAT_R32_UINT;
|
|
indices_count = ibv.SizeInBytes / 4;
|
|
}
|
|
else
|
|
{
|
|
Assert(0); /* Invalid index size */
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Prepare & dispatch */
|
|
if (pipeline && indices_count > 0)
|
|
{
|
|
/* Set descriptor heaps */
|
|
if (!descriptor_heaps_set)
|
|
{
|
|
ID3D12DescriptorHeap *heaps[] = {
|
|
g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap,
|
|
g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap,
|
|
};
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps);
|
|
descriptor_heaps_set = 1;
|
|
}
|
|
|
|
/* Bind rootsig */
|
|
if (!graphics_rootsig_set)
|
|
{
|
|
ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, g->bindless_rootsig);
|
|
graphics_rootsig_set = 1;
|
|
}
|
|
|
|
/* Bind pipeline */
|
|
if (pipeline != bound_pipeline)
|
|
{
|
|
ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso);
|
|
bound_pipeline = pipeline;
|
|
}
|
|
|
|
/* Update root constants */
|
|
for (i32 slot = 0; slot < MaxShaderConstants; ++slot)
|
|
{
|
|
if (bound_graphics_constants[slot] != slotted_constants[slot])
|
|
{
|
|
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0);
|
|
bound_graphics_constants[slot] = slotted_constants[slot];
|
|
}
|
|
}
|
|
|
|
/* Set viewport */
|
|
{
|
|
D3D12_VIEWPORT viewport = ZI;
|
|
{
|
|
Rng3 range = cmd->rasterize.viewport;
|
|
viewport.TopLeftX = range.p0.x;
|
|
viewport.TopLeftY = range.p0.y;
|
|
viewport.Width = range.p1.x - range.p0.x;
|
|
viewport.Height = range.p1.y - range.p0.y;
|
|
viewport.MinDepth = range.p0.z;
|
|
viewport.MaxDepth = range.p1.z;
|
|
}
|
|
if (!MatchStruct(&viewport, &bound_viewport))
|
|
{
|
|
bound_viewport = viewport;
|
|
ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport);
|
|
}
|
|
}
|
|
|
|
/* Set scissor */
|
|
{
|
|
D3D12_RECT scissor = ZI;
|
|
{
|
|
Rng2 range = cmd->rasterize.scissor;
|
|
scissor.left = range.p0.x;
|
|
scissor.top = range.p0.y;
|
|
scissor.right = range.p1.x;
|
|
scissor.bottom = range.p1.y;
|
|
}
|
|
if (!MatchStruct(&scissor, &bound_scissor))
|
|
{
|
|
bound_scissor = scissor;
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor);
|
|
}
|
|
}
|
|
|
|
/* Set topology */
|
|
{
|
|
D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
switch (cmd->rasterize.mode)
|
|
{
|
|
default: Assert(0); break;
|
|
case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break;
|
|
case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break;
|
|
case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break;
|
|
case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
|
|
case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break;
|
|
case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
|
|
case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break;
|
|
}
|
|
if (topology != bound_primitive_topology)
|
|
{
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology);
|
|
}
|
|
}
|
|
|
|
/* Set index buffer */
|
|
if (!MatchStruct(&ibv, &bound_ibv))
|
|
{
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv);
|
|
bound_ibv = ibv;
|
|
}
|
|
|
|
/* Bind render targets */
|
|
{
|
|
b32 om_dirty = 0;
|
|
u32 rtvs_count = 0;
|
|
for (u32 i = 0; i < countof(cmd->rasterize.render_targets); ++i)
|
|
{
|
|
GPU_D12_Resource *rt = cmd->rasterize.render_targets[i];
|
|
if (rt)
|
|
{
|
|
if (bound_render_target_uids[i] != rt->uid)
|
|
{
|
|
GPU_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i];
|
|
ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_descriptor->handle);
|
|
bound_render_target_uids[i] = rt->uid;
|
|
om_dirty = 1;
|
|
}
|
|
++rtvs_count;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (om_dirty)
|
|
{
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[GPU_MaxRenderTargets] = ZI;
|
|
for (u32 i = 0; i < rtvs_count; ++i)
|
|
{
|
|
rtv_handles[i] = rcl->rtv_descriptors[i]->handle;
|
|
}
|
|
ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0);
|
|
}
|
|
|
|
cmd_idx += 1;
|
|
} break;
|
|
|
|
//- Clear rtv
|
|
|
|
case GPU_D12_CmdKind_ClearRtv:
|
|
{
|
|
GPU_D12_Resource *rt = cmd->clear_rtv.render_target;
|
|
f32 clear_color[4] = ZI;
|
|
{
|
|
clear_color[0] = cmd->clear_rtv.color.x;
|
|
clear_color[1] = cmd->clear_rtv.color.y;
|
|
clear_color[2] = cmd->clear_rtv.color.z;
|
|
clear_color[3] = cmd->clear_rtv.color.w;
|
|
}
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle;
|
|
if (bound_render_clear_target_uid != rt->uid)
|
|
{
|
|
ID3D12Device_CreateRenderTargetView(g->device, rt->d3d_resource, 0, rtv_handle);
|
|
bound_render_clear_target_uid = rt->uid;
|
|
}
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0);
|
|
cmd_idx += 1;
|
|
} break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* End dx12 command list */
|
|
GPU_D12_CommitRawCommandList(rcl);
|
|
|
|
/* Free command list */
|
|
{
|
|
Lock lock = LockE(&g->free_cmd_lists_mutex);
|
|
{
|
|
cl->next = g->first_free_cmd_list;
|
|
g->first_free_cmd_list = cl;
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
//- Arena
|
|
|
|
void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle)
|
|
{
|
|
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
|
|
|
/* TODO */
|
|
|
|
/* FIXME: Move descriptors into committed lists */
|
|
|
|
/* FIXME: Release id3d12 resource com object references */
|
|
gpu_arena->heap_pos = 0;
|
|
}
|
|
|
|
//- Cpu -> Gpu copy
|
|
|
|
void GPU_CopyCpuToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
u64 copy_size = src_copy_range.max - src_copy_range.min;
|
|
GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, copy_size);
|
|
CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, copy_size);
|
|
GPU_CopyBufferToBuffer(cl_handle,
|
|
dst_handle,
|
|
dst_offset,
|
|
GPU_D12_MakeHandle(GPU_ResourceHandle, ®ion->heap->resource),
|
|
RNGU64(region->pos, copy_size));
|
|
}
|
|
|
|
void GPU_CopyCpuToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle);
|
|
Assert(dst->is_texture);
|
|
|
|
Vec3I32 staged_dims = ZI;
|
|
{
|
|
staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x;
|
|
staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y;
|
|
staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z;
|
|
}
|
|
|
|
/* Grab footprint info */
|
|
u64 footprint_rows_count = 0;
|
|
u64 footprint_row_size = 0;
|
|
u64 footprint_size = 0;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI;
|
|
{
|
|
D3D12_RESOURCE_DESC src_desc = ZI;
|
|
{
|
|
ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc);
|
|
src_desc.Width = staged_dims.x;
|
|
src_desc.Height = staged_dims.y;
|
|
src_desc.DepthOrArraySize = staged_dims.z;
|
|
}
|
|
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size);
|
|
}
|
|
|
|
/* Fill staging buffer */
|
|
GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, footprint_size);
|
|
{
|
|
D3D12_RANGE read_range = ZI;
|
|
u8 *dst_base = (u8 *)region->heap->mapped + region->pos + footprint.Offset;
|
|
u8 *src_base = src;
|
|
u32 z_size = footprint_row_size * footprint_rows_count;
|
|
for (i32 z = 0; z < src_dims.z; ++z)
|
|
{
|
|
u32 z_offset = z * z_size;
|
|
for (i32 y = 0; y < footprint_rows_count; ++y)
|
|
{
|
|
u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset;
|
|
u8 *src_row = src_base + y * footprint_row_size + z_offset;
|
|
CopyBytes(dst_row, src_row, footprint_row_size);
|
|
}
|
|
}
|
|
}
|
|
|
|
GPU_CopyBufferToTexture(cl_handle,
|
|
dst_handle, dst_offset,
|
|
GPU_D12_MakeHandle(GPU_ResourceHandle, ®ion->heap->resource), staged_dims,
|
|
RNG3I32(VEC3I32(0, 0, 0), staged_dims));
|
|
}
|
|
|
|
//- Gpu <-> Gpu copy
|
|
|
|
void GPU_CopyBufferToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, GPU_ResourceHandle src_handle, RngU64 src_copy_range)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_CopyBytes;
|
|
cmd->copy_bytes.dst = GPU_D12_ResourceFromHandle(dst_handle);
|
|
cmd->copy_bytes.src = GPU_D12_ResourceFromHandle(src_handle);
|
|
cmd->copy_bytes.dst_offset = dst_offset;
|
|
cmd->copy_bytes.src_copy_range = src_copy_range;
|
|
}
|
|
|
|
void GPU_CopyBufferToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle);
|
|
GPU_D12_Resource *src = GPU_D12_ResourceFromHandle(src_handle);
|
|
Assert(dst->is_texture);
|
|
Assert(!src->is_texture);
|
|
|
|
/* Grab footprint info */
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI;
|
|
{
|
|
D3D12_RESOURCE_DESC src_desc = ZI;
|
|
{
|
|
ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc);
|
|
src_desc.Width = src_dims.x;
|
|
src_desc.Height = src_dims.y;
|
|
src_desc.DepthOrArraySize = src_dims.z;
|
|
}
|
|
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, 0, 0, 0);
|
|
}
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
|
{
|
|
dst_loc.pResource = dst->d3d_resource;
|
|
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dst_loc.SubresourceIndex = 0;
|
|
}
|
|
{
|
|
src_loc.pResource = src->d3d_resource;
|
|
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
src_loc.PlacedFootprint = footprint;
|
|
}
|
|
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_CopyTexels;
|
|
cmd->copy_texels.dst = dst;
|
|
cmd->copy_texels.src = src;
|
|
cmd->copy_texels.dst_loc = dst_loc;
|
|
cmd->copy_texels.src_loc = src_loc;
|
|
cmd->copy_texels.dst_offset = dst_offset;
|
|
cmd->copy_texels.src_copy_range = src_copy_range;
|
|
}
|
|
|
|
void GPU_CopyTextureToTexture(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Resource *dst = GPU_D12_ResourceFromHandle(dst_handle);
|
|
GPU_D12_Resource *src = GPU_D12_ResourceFromHandle(src_handle);
|
|
Assert(dst->is_texture);
|
|
Assert(src->is_texture);
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
|
{
|
|
dst_loc.pResource = dst->d3d_resource;
|
|
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dst_loc.SubresourceIndex = 0;
|
|
}
|
|
{
|
|
src_loc.pResource = dst->d3d_resource;
|
|
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
src_loc.SubresourceIndex = 0;
|
|
}
|
|
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_CopyTexels;
|
|
cmd->copy_texels.dst = dst;
|
|
cmd->copy_texels.src = src;
|
|
cmd->copy_texels.dst_loc = dst_loc;
|
|
cmd->copy_texels.src_loc = src_loc;
|
|
cmd->copy_texels.dst_offset = dst_offset;
|
|
cmd->copy_texels.src_copy_range = src_copy_range;
|
|
}
|
|
|
|
void GPU_CopyTextureToBuffer(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, GPU_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
|
{
|
|
/* TODO */
|
|
Assert(0);
|
|
}
|
|
|
|
//- Constant
|
|
|
|
void GPU_SetConstant_(GPU_CommandListHandle cl_handle, i32 slot, void *src_32bit, u32 size)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_Constant;
|
|
cmd->constant.slot = slot;
|
|
CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4));
|
|
}
|
|
|
|
//- Barrier
|
|
|
|
void GPU_Sync(GPU_CommandListHandle cl_handle, GPU_BarrierDesc desc)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_Barrier;
|
|
cmd->barrier.desc = desc;
|
|
}
|
|
|
|
//- Compute
|
|
|
|
void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 groups)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_Compute;
|
|
cmd->compute.cs = cs;
|
|
cmd->compute.groups = groups;
|
|
}
|
|
|
|
//- Rasterize
|
|
|
|
void GPU_Rasterize(GPU_CommandListHandle cl_handle,
|
|
VertexShader vs, PixelShader ps,
|
|
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
|
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
|
Rng3 viewport, Rng2 scissor,
|
|
GPU_RasterMode mode)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_Rasterize;
|
|
cmd->rasterize.vs = vs;
|
|
cmd->rasterize.ps = ps;
|
|
cmd->rasterize.instances_count = instances_count;
|
|
cmd->rasterize.index_buffer_desc = index_buffer;
|
|
for (u32 i = 0; i < MinU32(render_targets_count, GPU_MaxRenderTargets); ++i)
|
|
{
|
|
cmd->rasterize.render_targets[i] = GPU_D12_ResourceFromHandle(render_targets[i]);
|
|
}
|
|
cmd->rasterize.viewport = viewport;
|
|
cmd->rasterize.scissor = scissor;
|
|
cmd->rasterize.mode = mode;
|
|
}
|
|
|
|
//- Clear
|
|
|
|
void GPU_ClearRenderTarget(GPU_CommandListHandle cl_handle, GPU_ResourceHandle resource_handle, Vec4 color)
|
|
{
|
|
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
|
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CmdKind_ClearRtv;
|
|
cmd->clear_rtv.render_target = GPU_D12_ResourceFromHandle(resource_handle);
|
|
cmd->clear_rtv.color = color;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Queue synchronization
|
|
|
|
void GPU_SyncQueue(GPU_QueueKind completion_queue_kind, GPU_QueueKind waiter_queue_kind)
|
|
{
|
|
if (completion_queue_kind != waiter_queue_kind)
|
|
{
|
|
GPU_D12_Queue *completion_queue = GPU_D12_QueueFromKind(completion_queue_kind);
|
|
GPU_D12_Queue *waiter_queue = GPU_D12_QueueFromKind(waiter_queue_kind);
|
|
ID3D12Fence *d3d_fence = completion_queue->commit_fence;
|
|
u64 fence_target = 0;
|
|
{
|
|
Lock lock = LockS(&completion_queue->commit_mutex);
|
|
fence_target = completion_queue->commit_fence_target;
|
|
Unlock(&lock);
|
|
}
|
|
if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target)
|
|
{
|
|
ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target);
|
|
}
|
|
}
|
|
}
|
|
|
|
void GPU_SyncOtherQueues(GPU_QueueKind completion_queue_kind)
|
|
{
|
|
if (GPU_IsMultiQueueEnabled)
|
|
{
|
|
GPU_D12_Queue *completion_queue = GPU_D12_QueueFromKind(completion_queue_kind);
|
|
ID3D12Fence *d3d_fence = completion_queue->commit_fence;
|
|
u64 fence_target = 0;
|
|
{
|
|
Lock lock = LockS(&completion_queue->commit_mutex);
|
|
fence_target = completion_queue->commit_fence_target;
|
|
Unlock(&lock);
|
|
}
|
|
if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target)
|
|
{
|
|
for (GPU_QueueKind waiter_queue_kind = 0; waiter_queue_kind < GPU_NumQueues; ++waiter_queue_kind)
|
|
{
|
|
if (waiter_queue_kind != completion_queue_kind)
|
|
{
|
|
GPU_D12_Queue *waiter_queue = GPU_D12_QueueFromKind(waiter_queue_kind);
|
|
ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Map hooks
|
|
|
|
// GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
|
|
// {
|
|
// GPU_Mapped result = ZI;
|
|
// result.resource = gpu_r;
|
|
// GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r;
|
|
// D3D12_RANGE read_range = ZI;
|
|
// HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
|
|
// if (FAILED(hr) || !result.mem)
|
|
// {
|
|
// /* TODO: Don't panic */
|
|
// Panic(Lit("Failed to map command buffer resource"));
|
|
// }
|
|
// return result;
|
|
// }
|
|
|
|
// void GPU_Unmap(GPU_Mapped m)
|
|
// {
|
|
// GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource;
|
|
// ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
|
|
// }
|
|
|
|
// void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference)
|
|
// {
|
|
// GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
// D3D12_RESOURCE_DESC desc = ZI;
|
|
// ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc);
|
|
|
|
// u64 upload_size = 0;
|
|
// u64 upload_row_size = 0;
|
|
// u32 upload_num_rows = 0;
|
|
// D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
|
// ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
|
|
// D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
|
|
|
|
// {
|
|
// D3D12_RANGE read_range = ZI;
|
|
// u8 *dst_base = (u8 *)dst + placed_footprint.Offset;
|
|
// u8 *src_base = src;
|
|
|
|
// u32 z_size = upload_row_size * upload_num_rows;
|
|
|
|
// b32 src_overflow = 0;
|
|
// for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
|
|
// {
|
|
// u32 z_offset = z * z_size;
|
|
// for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
|
|
// {
|
|
// u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset;
|
|
// u8 *src_row = src_base + y * upload_row_size + z_offset;
|
|
// CopyBytes(dst_row, src_row, upload_row_size);
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Statistics
|
|
|
|
GPU_Stats GPU_QueryStats(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_Stats result = ZI;
|
|
{
|
|
DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
|
|
IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
|
|
result.local_committed = info.CurrentUsage;
|
|
result.local_budget = info.Budget;
|
|
}
|
|
{
|
|
DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
|
|
IDXGIAdapter3_QueryVideoMemoryInfo(g->adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
|
|
result.non_local_budget = info.Budget;
|
|
result.non_local_committed = info.CurrentUsage;
|
|
}
|
|
result.driver_resources_allocated = Atomic64Fetch(&g->driver_resources_allocated);
|
|
result.driver_descriptors_allocated = Atomic64Fetch(&g->driver_descriptors_allocated);
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//~ @hookimpl Swapchain
|
|
|
|
GPU_SwapchainHandle GPU_AcquireSwapchain(u64 os_window_handle)
|
|
{
|
|
GPU_D12_Swapchain *swapchain = 0;
|
|
{
|
|
Arena *perm = PermArena();
|
|
swapchain = PushStruct(perm, GPU_D12_Swapchain);
|
|
}
|
|
swapchain->window_hwnd = (HWND)os_window_handle;
|
|
return GPU_D12_MakeHandle(GPU_SwapchainHandle, swapchain);
|
|
}
|
|
|
|
void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle)
|
|
{
|
|
/* TODO */
|
|
}
|
|
|
|
GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, GPU_Format format, Vec2I32 size)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Swapchain *swapchain = GPU_D12_SwapchainFromHandle(swapchain_handle);
|
|
size = VEC2I32(MaxI32(size.x, 1), MaxI32(size.y, 1));
|
|
GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
|
|
|
|
/* Initialize swapchain */
|
|
if (!swapchain->d3d_swapchain)
|
|
{
|
|
HRESULT hr = 0;
|
|
|
|
/* Create d3d swapchain */
|
|
{
|
|
IDXGISwapChain3 *swapchain3 = 0;
|
|
{
|
|
/* Create swapchain1 */
|
|
IDXGISwapChain1 *swapchain1 = 0;
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
|
|
desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format);
|
|
desc.Width = size.x;
|
|
desc.Height = size.y;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
desc.BufferCount = GPU_D12_SwapchainBufferCount;
|
|
desc.Scaling = DXGI_SCALING_NONE;
|
|
desc.Flags = GPU_D12_SwapchainFlags;
|
|
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
|
|
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)direct_queue->d3d_queue, swapchain->window_hwnd, &desc, 0, 0, &swapchain1);
|
|
}
|
|
|
|
/* Upgrade to swapchain3 */
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain3);
|
|
IDXGISwapChain1_Release(swapchain1);
|
|
}
|
|
|
|
|
|
}
|
|
swapchain->d3d_swapchain = swapchain3;
|
|
swapchain->backbuffers_format = format;
|
|
swapchain->backbuffers_resolution = size;
|
|
}
|
|
|
|
/* Create waitable object */
|
|
{
|
|
HANDLE waitable = 0;
|
|
if (SUCCEEDED(hr) && GPU_D12_FrameLatency > 0)
|
|
{
|
|
hr = IDXGISwapChain3_SetMaximumFrameLatency(swapchain->d3d_swapchain, GPU_D12_FrameLatency);
|
|
waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->d3d_swapchain);
|
|
}
|
|
swapchain->waitable = waitable;
|
|
}
|
|
|
|
/* Create present fence */
|
|
{
|
|
HANDLE present_event = 0;
|
|
ID3D12Fence *present_fence = 0;
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
present_event = CreateEvent(0, 0, 0, 0);
|
|
hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&present_fence);
|
|
}
|
|
swapchain->present_fence = present_fence;
|
|
swapchain->present_event = present_event;
|
|
}
|
|
|
|
/* Disable Alt+Enter */
|
|
IDXGIFactory_MakeWindowAssociation(g->factory, swapchain->window_hwnd, DXGI_MWA_NO_ALT_ENTER);
|
|
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create swapchain"));
|
|
}
|
|
}
|
|
|
|
/* Resize backbuffers */
|
|
if (!MatchVec2I32(swapchain->backbuffers_resolution, size) || swapchain->backbuffers_format != format)
|
|
{
|
|
HRESULT hr = 0;
|
|
|
|
/* Wait for any previous backbuffer commands to finish */
|
|
{
|
|
ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event);
|
|
WaitForSingleObject(swapchain->present_event, INFINITE);
|
|
}
|
|
|
|
/* Release backbuffers */
|
|
for (u32 i = 0; i < countof(swapchain->backbuffers); ++i)
|
|
{
|
|
GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i];
|
|
if (backbuffer->d3d_resource)
|
|
{
|
|
ID3D12Resource_Release(backbuffer->d3d_resource);
|
|
backbuffer->d3d_resource = 0;
|
|
}
|
|
}
|
|
|
|
/* Resize buffers */
|
|
hr = IDXGISwapChain_ResizeBuffers(swapchain->d3d_swapchain, 0, size.x, size.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to resize swapchain"));
|
|
}
|
|
}
|
|
|
|
/* Initialize backbuffers */
|
|
{
|
|
for (u32 i = 0; i < countof(swapchain->backbuffers); ++i)
|
|
{
|
|
GPU_D12_Resource *backbuffer = &swapchain->backbuffers[i];
|
|
if (!backbuffer->d3d_resource)
|
|
{
|
|
ID3D12Resource *d3d_resource = 0;
|
|
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->d3d_swapchain, i, &IID_ID3D12Resource, (void **)&d3d_resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to get swapchain buffer"));
|
|
}
|
|
ZeroStruct(backbuffer);
|
|
backbuffer->d3d_resource = d3d_resource;
|
|
backbuffer->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
backbuffer->flags = GPU_ResourceFlag_AllowRenderTarget;
|
|
|
|
backbuffer->is_texture = 1;
|
|
backbuffer->texture_format = format;
|
|
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
|
|
backbuffer->texture_mip_levels = 1;
|
|
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
|
|
backbuffer->swapchain = swapchain;
|
|
}
|
|
}
|
|
swapchain->backbuffers_format = format;
|
|
swapchain->backbuffers_resolution = size;
|
|
}
|
|
|
|
/* Wait for available backbuffer */
|
|
if (swapchain->waitable)
|
|
{
|
|
DWORD wait_result = WaitForSingleObject(swapchain->waitable, 500);
|
|
if (wait_result == WAIT_TIMEOUT)
|
|
{
|
|
ID3D12Fence_SetEventOnCompletion(swapchain->present_fence, swapchain->present_fence_target, swapchain->present_event);
|
|
WaitForSingleObject(swapchain->present_event, INFINITE);
|
|
}
|
|
}
|
|
|
|
/* Grab current backbuffer */
|
|
GPU_D12_Resource *cur_backbuffer = 0;
|
|
{
|
|
u32 backbuffer_idx = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->d3d_swapchain);
|
|
cur_backbuffer = &swapchain->backbuffers[backbuffer_idx];
|
|
}
|
|
|
|
return GPU_D12_MakeHandle(GPU_ResourceHandle, cur_backbuffer);
|
|
}
|
|
|
|
void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync)
|
|
{
|
|
GPU_D12_Resource *backbuffer = GPU_D12_ResourceFromHandle(backbuffer_handle);
|
|
GPU_D12_Swapchain *swapchain = backbuffer->swapchain;
|
|
GPU_D12_Queue *direct_queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
|
|
|
|
u32 present_flags = 0;
|
|
if (GPU_D12_TearingIsAllowed && vsync == 0)
|
|
{
|
|
present_flags |= DXGI_PRESENT_ALLOW_TEARING;
|
|
}
|
|
|
|
/* Present */
|
|
{
|
|
HRESULT hr = IDXGISwapChain3_Present(swapchain->d3d_swapchain, vsync, present_flags);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
Assert(0);
|
|
}
|
|
}
|
|
|
|
if (vsync != 0 && !(present_flags & DXGI_PRESENT_ALLOW_TEARING))
|
|
{
|
|
/* FIXME: Don't flush in fullscreen mode? */
|
|
// DwmFlush();
|
|
}
|
|
|
|
/* Increment swapchain fence */
|
|
{
|
|
u64 target = ++swapchain->present_fence_target;
|
|
ID3D12CommandQueue_Signal(direct_queue->d3d_queue, swapchain->present_fence, target);
|
|
}
|
|
}
|