2179 lines
81 KiB
C
2179 lines
81 KiB
C
GPU_D12_SharedState GPU_D12_shared_state = ZI;
|
|
|
|
////////////////////////////////
|
|
//~ Helpers
|
|
|
|
GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_FiberState **f = &g->fiber_states[fiber_id];
|
|
if (!*f)
|
|
{
|
|
Arena *perm = PermArena();
|
|
*f = PushStruct(perm, GPU_D12_FiberState);
|
|
}
|
|
return *f;
|
|
}
|
|
|
|
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
|
|
{
|
|
return (DXGI_FORMAT)format;
|
|
}
|
|
|
|
GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
|
|
{
|
|
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
|
|
Arena *perm = PermArena();
|
|
GPU_D12_Command *cmd = f->first_free_command;
|
|
if (cmd)
|
|
{
|
|
f->first_free_command = cmd->next;
|
|
}
|
|
else
|
|
{
|
|
cmd = PushStructNoZero(perm, GPU_D12_Command);
|
|
}
|
|
ZeroStruct(cmd);
|
|
QueuePush(cl->first, cl->last, cmd);
|
|
++cl->count;
|
|
return cmd;
|
|
}
|
|
|
|
u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size)
|
|
{
|
|
u64 result = RandU64FromSeeds(desc.kind, desc.flags);
|
|
switch(desc.kind)
|
|
{
|
|
default: break;
|
|
case GPU_ResourceKind_Texture1D:
|
|
case GPU_ResourceKind_Texture2D:
|
|
case GPU_ResourceKind_Texture3D:
|
|
{
|
|
result = RandU64FromSeeds(result, desc.texture.format);
|
|
result = RandU64FromSeeds(result, desc.texture.mip_levels);
|
|
result = RandU64FromSeeds(result, desc.clear_color.x);
|
|
result = RandU64FromSeeds(result, desc.clear_color.y);
|
|
result = RandU64FromSeeds(result, desc.clear_color.z);
|
|
result = RandU64FromSeeds(result, desc.clear_color.w);
|
|
result = RandU64FromSeeds(result, desc.texture.size.x);
|
|
result = RandU64FromSeeds(result, desc.texture.size.y);
|
|
result = RandU64FromSeeds(result, desc.texture.size.z);
|
|
} break;
|
|
case GPU_ResourceKind_Buffer:
|
|
{
|
|
result = RandU64FromSeeds(result, desc.buffer.heap_kind);
|
|
result = RandU64FromSeeds(result, buffer_size);
|
|
} break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Startup
|
|
|
|
void GPU_D12_Startup(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
/* Init device */
|
|
GPU_D12_InitDevice();
|
|
|
|
/* Init queues */
|
|
{
|
|
GPU_D12_QueueDesc descs[] = {
|
|
{.kind = GPU_QueueKind_Direct, .d3d_type = D3D12_COMMAND_LIST_TYPE_DIRECT, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") },
|
|
{.kind = GPU_QueueKind_Compute, .d3d_type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") },
|
|
{.kind = GPU_QueueKind_Copy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copy queue") },
|
|
{.kind = GPU_QueueKind_BackgroundCopy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") }
|
|
};
|
|
u32 job_count = 0; Fence job_fence = ZI;
|
|
job_count += RunJob(GPU_D12_InitQueue, .count = GPU_NumQueues, .sig.descs = descs, .fence = &job_fence);
|
|
YieldOnFence(&job_fence, job_count);
|
|
}
|
|
|
|
/* Init descriptor heaps */
|
|
g->cbv_srv_uav_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
|
GPU_D12_MaxCbvSrvUavDescriptors,
|
|
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
|
|
|
|
g->sampler_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
|
|
GPU_D12_MaxSamplerDescriptors,
|
|
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
|
|
|
|
g->rtv_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
|
|
D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
|
|
GPU_D12_MaxRtvDescriptors,
|
|
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV));
|
|
|
|
/* Init rootsig */
|
|
GPU_D12_InitRootsig();
|
|
|
|
/* Start queue sync job */
|
|
RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated);
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Initialization
|
|
|
|
//- Device initialization
|
|
|
|
void GPU_D12_InitDevice(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
HRESULT hr = 0;
|
|
|
|
/* Enable debug layer */
|
|
u32 dxgi_factory_flags = 0;
|
|
#if GPU_DEBUG
|
|
{
|
|
__profn("Enable debug layer");
|
|
ID3D12Debug *debug_controller0 = 0;
|
|
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create ID3D12Debug0"));
|
|
}
|
|
|
|
ID3D12Debug1 *debug_controller1 = 0;
|
|
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create ID3D12Debug1"));
|
|
}
|
|
|
|
ID3D12Debug_EnableDebugLayer(debug_controller0);
|
|
|
|
/* FIXME: Enable this */
|
|
// ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
|
|
|
|
ID3D12Debug_Release(debug_controller1);
|
|
ID3D12Debug_Release(debug_controller0);
|
|
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
|
|
}
|
|
#endif
|
|
|
|
/* Create factory */
|
|
{
|
|
__profn("Create factory");
|
|
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to initialize DXGI factory"));
|
|
}
|
|
}
|
|
|
|
/* Create device */
|
|
{
|
|
__profn("Create device");
|
|
IDXGIAdapter1 *adapter = 0;
|
|
ID3D12Device *device = 0;
|
|
String error = Lit("Could not initialize GPU device.");
|
|
String first_gpu_name = ZI;
|
|
u32 adapter_index = 0;
|
|
b32 skip = 0; /* For debugging iGPU */
|
|
for (;;)
|
|
{
|
|
{
|
|
hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
IDXGIAdapter1_GetDesc1(adapter, &desc);
|
|
if (first_gpu_name.len == 0)
|
|
{
|
|
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
|
|
}
|
|
{
|
|
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
|
|
}
|
|
if (SUCCEEDED(hr) && !skip)
|
|
{
|
|
break;
|
|
}
|
|
skip = 0;
|
|
ID3D12Device_Release(device);
|
|
IDXGIAdapter1_Release(adapter);
|
|
adapter = 0;
|
|
device = 0;
|
|
++adapter_index;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (!device)
|
|
{
|
|
if (first_gpu_name.len > 0)
|
|
{
|
|
error = StringF(scratch.arena,
|
|
"Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.",
|
|
FmtString(first_gpu_name));
|
|
}
|
|
Panic(error);
|
|
}
|
|
g->adapter = adapter;
|
|
g->device = device;
|
|
}
|
|
|
|
#if GPU_DEBUG
|
|
/* Enable D3D12 Debug break */
|
|
{
|
|
__profn("Enable d3d12 debug break");
|
|
ID3D12InfoQueue *info = 0;
|
|
hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to query ID3D12Device interface"));
|
|
}
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
|
|
ID3D12InfoQueue_Release(info);
|
|
}
|
|
|
|
/* Enable DXGI Debug break */
|
|
{
|
|
__profn("Enable dxgi debug break");
|
|
IDXGIInfoQueue *dxgi_info = 0;
|
|
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to get DXGI debug interface"));
|
|
}
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
|
|
IDXGIInfoQueue_Release(dxgi_info);
|
|
}
|
|
#endif
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
//- Queue initialization
|
|
|
|
JobDef(GPU_D12_InitQueue, sig, id)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_QueueDesc desc = sig->descs[id];
|
|
Arena *perm = PermArena();
|
|
HRESULT hr = 0;
|
|
|
|
GPU_D12_Queue *queue = 0;
|
|
{
|
|
PushAlign(perm, CachelineSize);
|
|
queue = PushStruct(perm, GPU_D12_Queue);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
queue->desc = desc;
|
|
|
|
D3D12_COMMAND_QUEUE_DESC d3d_desc = ZI;
|
|
d3d_desc.Type = desc.d3d_type;
|
|
d3d_desc.Priority = desc.d3d_priority;
|
|
hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create command queue"));
|
|
}
|
|
|
|
hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->submit_fence);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create command queue fence"));
|
|
}
|
|
|
|
g->queues[desc.kind] = queue;
|
|
}
|
|
|
|
//- Heap initialization
|
|
|
|
GPU_D12_DescriptorHeap *GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, u32 max_descs, u32 desc_size)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
Arena *arena = AcquireArena(Gibi(64));
|
|
GPU_D12_DescriptorHeap *heap = PushStruct(arena, GPU_D12_DescriptorHeap);
|
|
heap->arena = arena;
|
|
|
|
heap->type = type;
|
|
heap->max_count = max_descs;
|
|
heap->descriptor_size = desc_size;
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI;
|
|
d3d_desc.Type = type;
|
|
d3d_desc.Flags = flags;
|
|
d3d_desc.NumDescriptors = max_descs;
|
|
HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create CPU descriptor heap"));
|
|
}
|
|
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle);
|
|
|
|
return heap;
|
|
}
|
|
|
|
//- Rootsig initialization
|
|
|
|
void GPU_D12_InitRootsig(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
b32 ok = 1;
|
|
HRESULT hr = 0;
|
|
String error_str = ZI;
|
|
|
|
/* Serialize root signature */
|
|
ID3D10Blob *blob = 0;
|
|
if (ok)
|
|
{
|
|
__profn("Create root signature");
|
|
|
|
D3D12_ROOT_PARAMETER param = ZI;
|
|
param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
|
param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
|
param.Constants.ShaderRegister = 0;
|
|
param.Constants.RegisterSpace = 0;
|
|
param.Constants.Num32BitValues = 64;
|
|
|
|
D3D12_ROOT_SIGNATURE_DESC desc = ZI;
|
|
desc.NumParameters = 1;
|
|
desc.pParameters = ¶m;
|
|
desc.NumStaticSamplers = 0;
|
|
desc.pStaticSamplers = 0;
|
|
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED;
|
|
|
|
hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to serialize root signature");
|
|
ok = 0;
|
|
}
|
|
}
|
|
|
|
/* Create root signature */
|
|
ID3D12RootSignature *rootsig = 0;
|
|
if (ok)
|
|
{
|
|
__profn("Create root signature");
|
|
|
|
hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create root signature");
|
|
ok = 0;
|
|
}
|
|
}
|
|
|
|
if (blob)
|
|
{
|
|
ID3D10Blob_Release(blob);
|
|
}
|
|
|
|
g->bindless_rootsig = rootsig;
|
|
if (!ok)
|
|
{
|
|
Panic(error_str);
|
|
}
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Pipeline operations
|
|
|
|
JobDef(GPU_D12_LoadPipeline, sig, _)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Pipeline *pipeline = sig->pipeline;
|
|
GPU_D12_PipelineDesc desc = pipeline->desc;
|
|
|
|
HRESULT hr = 0;
|
|
b32 ok = 1;
|
|
String error_str = ZI;
|
|
|
|
/* Create PSO */
|
|
ID3D12PipelineState *pso = 0;
|
|
if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource)))
|
|
{
|
|
D3D12_RASTERIZER_DESC raster_desc = ZI;
|
|
raster_desc.FillMode = D3D12_FILL_MODE_SOLID;
|
|
raster_desc.CullMode = D3D12_CULL_MODE_NONE;
|
|
raster_desc.FrontCounterClockwise = 0;
|
|
raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
|
|
raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
|
|
raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
|
|
raster_desc.DepthClipEnable = 1;
|
|
raster_desc.MultisampleEnable = 0;
|
|
raster_desc.AntialiasedLineEnable = 0;
|
|
raster_desc.ForcedSampleCount = 0;
|
|
raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
|
|
|
|
D3D12_BLEND_DESC blend_desc = ZI;
|
|
blend_desc.AlphaToCoverageEnable = 0;
|
|
blend_desc.IndependentBlendEnable = 0;
|
|
blend_desc.RenderTarget[0].BlendEnable = 1;
|
|
blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
|
|
blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
|
|
|
D3D12_DEPTH_STENCIL_DESC ds_desc = ZI;
|
|
ds_desc.DepthEnable = 0;
|
|
ds_desc.StencilEnable = 0;
|
|
|
|
String vs = DataFromResource(desc.vs.resource);
|
|
String ps = DataFromResource(desc.ps.resource);
|
|
|
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI;
|
|
pso_desc.pRootSignature = g->bindless_rootsig;
|
|
pso_desc.VS.pShaderBytecode = vs.text;
|
|
pso_desc.VS.BytecodeLength = vs.len;
|
|
pso_desc.PS.pShaderBytecode = ps.text;
|
|
pso_desc.PS.BytecodeLength = ps.len;
|
|
pso_desc.RasterizerState = raster_desc;
|
|
pso_desc.BlendState = blend_desc;
|
|
pso_desc.DepthStencilState = ds_desc;
|
|
pso_desc.PrimitiveTopologyType = desc.topology_type;
|
|
pso_desc.SampleMask = UINT_MAX;
|
|
pso_desc.SampleDesc.Count = 1;
|
|
pso_desc.SampleDesc.Quality = 0;
|
|
for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i)
|
|
{
|
|
StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats));
|
|
DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]);
|
|
if (format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create pipeline state object");
|
|
ok = 0;
|
|
}
|
|
}
|
|
else if (ok)
|
|
{
|
|
String cs = DataFromResource(desc.cs.resource);
|
|
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI;
|
|
pso_desc.pRootSignature = g->bindless_rootsig;
|
|
pso_desc.CS.pShaderBytecode = cs.text;
|
|
pso_desc.CS.BytecodeLength = cs.len;
|
|
hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create pipeline state object");
|
|
ok = 0;
|
|
}
|
|
}
|
|
|
|
pipeline->pso = pso;
|
|
pipeline->error = error_str;
|
|
pipeline->ok = 1;
|
|
}
|
|
|
|
GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc)));
|
|
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
b32 is_pipeline_new = 0;
|
|
GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)];
|
|
{
|
|
{
|
|
Lock lock = LockS(&bin->mutex);
|
|
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
|
|
{
|
|
if (pipeline->hash == hash) break;
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
if (!pipeline)
|
|
{
|
|
Lock lock = LockE(&bin->mutex);
|
|
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
|
|
{
|
|
if (pipeline->hash == hash) break;
|
|
}
|
|
if (!pipeline)
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
pipeline = PushStruct(perm, GPU_D12_Pipeline);
|
|
pipeline->desc = desc;
|
|
pipeline->hash = hash;
|
|
is_pipeline_new = 1;
|
|
PushAlign(perm, CachelineSize);
|
|
StackPushN(bin->first, pipeline, next_in_bin);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
if (is_pipeline_new)
|
|
{
|
|
RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline);
|
|
}
|
|
YieldOnFence(&pipeline->ready_fence, 1);
|
|
|
|
return pipeline;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Queue operations
|
|
|
|
GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
return g->queues[kind];
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Descriptor operations
|
|
|
|
GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap)
|
|
{
|
|
GPU_D12_Descriptor *d = 0;
|
|
u32 index = 0;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
|
|
{
|
|
Lock lock = LockE(&heap->mutex);
|
|
if (heap->first_free)
|
|
{
|
|
d = heap->first_free;
|
|
heap->first_free = d->next_free;
|
|
handle = d->handle;
|
|
index = d->index;
|
|
}
|
|
else
|
|
{
|
|
if (heap->allocated_count >= heap->max_count)
|
|
{
|
|
Panic(Lit("Max descriptors reached in heap"));
|
|
}
|
|
d = PushStructNoZero(heap->arena, GPU_D12_Descriptor);
|
|
index = heap->allocated_count++;
|
|
handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
ZeroStruct(d);
|
|
d->heap = heap;
|
|
d->handle = handle;
|
|
d->index = index;
|
|
return d;
|
|
}
|
|
|
|
void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor)
|
|
{
|
|
GPU_D12_DescriptorHeap *heap = descriptor->heap;
|
|
Lock lock = LockE(&heap->mutex);
|
|
{
|
|
descriptor->next_free = heap->first_free;
|
|
heap->first_free = descriptor;
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Raw command list
|
|
|
|
GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
|
|
/* Pull first completed command list from queue if ready */
|
|
GPU_D12_RawCommandList *cl = ZI;
|
|
{
|
|
Lock lock = LockE(&queue->submit_mutex);
|
|
{
|
|
u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
|
|
cl = queue->first_submitted_cl;
|
|
if (cl && cl->submit_fence_target <= completed)
|
|
{
|
|
QueuePop(queue->first_submitted_cl, queue->last_submitted_cl);
|
|
}
|
|
else
|
|
{
|
|
cl = 0;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
/* Allocate new command list if none are available */
|
|
if (!cl)
|
|
{
|
|
Arena *perm = PermArena();
|
|
{
|
|
PushAlign(perm, CachelineSize);
|
|
cl = PushStruct(perm, GPU_D12_RawCommandList);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
cl->queue = queue;
|
|
|
|
HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.d3d_type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create command allocator"));
|
|
}
|
|
|
|
hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.d3d_type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create command list"));
|
|
}
|
|
|
|
hr = ID3D12GraphicsCommandList_Close(cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to close command list during initialization"));
|
|
}
|
|
}
|
|
|
|
/* Reset command list */
|
|
{
|
|
HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to reset command allocator"));
|
|
}
|
|
|
|
hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to reset command list"));
|
|
}
|
|
}
|
|
|
|
return cl;
|
|
}
|
|
|
|
u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
|
|
{
|
|
GPU_D12_Queue *queue = cl->queue;
|
|
|
|
/* Close */
|
|
{
|
|
__profn("Close DX12 command list");
|
|
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to close command list before execution"));
|
|
}
|
|
}
|
|
|
|
/* Submit */
|
|
u64 target = 0;
|
|
{
|
|
__profn("Execute");
|
|
Lock lock = LockE(&queue->submit_mutex);
|
|
{
|
|
target = ++queue->submit_fence_target;
|
|
cl->submit_fence_target = target;
|
|
/* Execute */
|
|
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl);
|
|
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->submit_fence, target);
|
|
/* Append */
|
|
QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
|
|
return target;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Swapchain helpers
|
|
|
|
void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
|
|
{
|
|
ID3D12Resource *resource = 0;
|
|
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to get swapchain buffer"));
|
|
}
|
|
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
|
|
ZeroStruct(sb);
|
|
sb->swapchain = swapchain;
|
|
sb->d3d_resource = resource;
|
|
sb->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap);
|
|
sb->state = D3D12_RESOURCE_STATE_COMMON;
|
|
ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle);
|
|
}
|
|
}
|
|
|
|
GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
resolution.x = MaxI32(resolution.x, 1);
|
|
resolution.y = MaxI32(resolution.y, 1);
|
|
b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
|
|
if (should_rebuild)
|
|
{
|
|
HRESULT hr = 0;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
|
|
/* Lock direct queue submissions (in case any write to backbuffer) */
|
|
/* TODO: Less overkill approach - Only flush GPU_D12_BlitToSwapchain since we know it's the only operation targeting backbuffer */
|
|
Lock lock = LockE(&queue->submit_mutex);
|
|
//DEBUGBREAKABLE;
|
|
//Lock lock = LockE(&g->global_command_list_record_mutex);
|
|
{
|
|
/* Flush direct queue */
|
|
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
|
|
{
|
|
HANDLE event = CreateEvent(0, 0, 0, 0);
|
|
ID3D12Fence_SetEventOnCompletion(queue->submit_fence, queue->submit_fence_target, event);
|
|
WaitForSingleObject(event, INFINITE);
|
|
CloseHandle(event);
|
|
}
|
|
|
|
/* Release buffers */
|
|
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
|
|
{
|
|
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
|
|
GPU_D12_ReleaseDescriptor(sb->rtv_descriptor);
|
|
ID3D12Resource_Release(sb->d3d_resource);
|
|
}
|
|
|
|
/* Resize buffers */
|
|
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to resize swapchain"));
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
|
|
GPU_D12_InitSwapchainResources(swapchain);
|
|
|
|
swapchain->resolution = resolution;
|
|
}
|
|
|
|
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
|
|
return &swapchain->buffers[backbuffer_index];
|
|
}
|
|
|
|
i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_pos)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
GPU_D12_Swapchain *swapchain = dst->swapchain;
|
|
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct);
|
|
ID3D12GraphicsCommandList *rcl = dx12_cl->cl;
|
|
D3D12_RESOURCE_STATES old_texture_state = texture->state;
|
|
|
|
{
|
|
u32 barriers_count = 0;
|
|
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
|
|
/* Transition backbuffer to RENDER_TARGET */
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Transition.pResource = dst->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
|
|
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
}
|
|
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
|
|
}
|
|
|
|
/* Clear */
|
|
{
|
|
f32 clear_color[4] = ZI;
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, dst->rtv_descriptor->handle, clear_color, 0, 0);
|
|
}
|
|
|
|
{
|
|
u32 barriers_count = 0;
|
|
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
|
|
/* Transition backbuffer to COPY_DEST */
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Transition.pResource = dst->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
}
|
|
/* Transition texture to COPY_SRC */
|
|
if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE)
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Transition.pResource = texture->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = texture->state;
|
|
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
texture->state = rb->Transition.StateAfter;
|
|
}
|
|
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
|
|
}
|
|
|
|
/* Copy */
|
|
{
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
|
|
dst_loc.pResource = dst->d3d_resource;
|
|
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dst_loc.SubresourceIndex = 0;
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
|
src_loc.pResource = texture->d3d_resource;
|
|
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
src_loc.SubresourceIndex = 0;
|
|
|
|
Vec2I32 dst_size = swapchain->resolution;
|
|
Vec2I32 src_size = VEC2I32(texture->desc.texture.size.x, texture->desc.texture.size.y);
|
|
|
|
i32 dst_left = dst_pos.x;
|
|
i32 dst_top = dst_pos.y;
|
|
|
|
i32 src_left = 0;
|
|
i32 src_top = 0;
|
|
i32 src_right = src_size.x;
|
|
i32 src_bottom = src_size.y;
|
|
|
|
/* Clamp copy src & dst */
|
|
if (dst_left < 0)
|
|
{
|
|
src_left -= dst_left;
|
|
dst_left = 0;
|
|
}
|
|
if (dst_top < 0)
|
|
{
|
|
src_top -= dst_top;
|
|
dst_top = 0;
|
|
}
|
|
if (dst_left + (src_left + src_right) > dst_size.x)
|
|
{
|
|
src_right -= (dst_left + (src_left + src_right)) - dst_size.x;
|
|
}
|
|
if (dst_top + (src_top + src_bottom) > dst_size.y)
|
|
{
|
|
src_bottom -= (dst_top + (src_top + src_bottom)) - dst_size.y;
|
|
}
|
|
|
|
if (src_left < src_right && src_bottom > src_top)
|
|
{
|
|
D3D12_BOX src_box = ZI;
|
|
src_box.left = src_left;
|
|
src_box.top = src_top;
|
|
src_box.right = src_right;
|
|
src_box.bottom = src_bottom;
|
|
src_box.back = 1;
|
|
ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, dst_left, dst_top, 0, &src_loc, &src_box);
|
|
}
|
|
}
|
|
|
|
{
|
|
u32 barriers_count = 0;
|
|
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
|
|
/* Transition backbuffer to PRESENT */
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Transition.pResource = dst->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
|
|
}
|
|
/* Transition texture to original state */
|
|
if (texture->state != old_texture_state)
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Transition.pResource = texture->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = texture->state;
|
|
rb->Transition.StateAfter = old_texture_state;
|
|
texture->state = rb->Transition.StateAfter;
|
|
}
|
|
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
|
|
}
|
|
|
|
i64 fence_target = GPU_D12_EndRawCommandList(dx12_cl);
|
|
return fence_target;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ Queue sync job
|
|
|
|
JobDef(GPU_D12_StartQueueSync, _, __)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
HANDLE queue_fences_events[GPU_NumQueues] = ZI;
|
|
i64 queue_fences_seen[GPU_NumQueues] = ZI;
|
|
for (i32 i = 0; i < countof(queue_fences_events); ++i)
|
|
{
|
|
queue_fences_events[i] = CreateEvent(0, 0, 1, 0);
|
|
queue_fences_seen[i] = -1;
|
|
}
|
|
for (;;)
|
|
{
|
|
WaitForMultipleObjects(countof(queue_fences_events), queue_fences_events, 0, INFINITE);
|
|
for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind)
|
|
{
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
i64 last_seen = queue_fences_seen[queue_kind];
|
|
i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
|
|
if (completed > last_seen)
|
|
{
|
|
SetFence(&queue->sync_fence, completed);
|
|
queue_fences_seen[queue_kind] = completed;
|
|
ID3D12Fence_SetEventOnCompletion(queue->submit_fence, completed + 1, queue_fences_events[queue_kind]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Startup hook
|
|
|
|
void GPU_Startup(void)
|
|
{
|
|
GPU_D12_Startup();
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdecl Fence hooks
|
|
|
|
Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
|
|
{
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
return &queue->sync_fence;
|
|
}
|
|
|
|
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
|
|
{
|
|
GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
|
|
GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
|
|
ID3D12Fence *b_fence = queue_b->submit_fence;
|
|
ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Rasterizer helper hooks
|
|
|
|
GPU_Viewport GPU_ViewportFromRect(Rect rect)
|
|
{
|
|
GPU_Viewport viewport = ZI;
|
|
viewport.top_left_x = rect.x;
|
|
viewport.top_left_y = rect.y;
|
|
viewport.width = rect.width;
|
|
viewport.height = rect.height;
|
|
viewport.min_depth = 0.0f;
|
|
viewport.max_depth = 1.0f;
|
|
return viewport;
|
|
}
|
|
|
|
GPU_Scissor GPU_ScissorFromRect(Rect rect)
|
|
{
|
|
GPU_Scissor scissor = ZI;
|
|
scissor.left = rect.x;
|
|
scissor.top = rect.y;
|
|
scissor.right = rect.x + rect.width;
|
|
scissor.bottom = rect.y + rect.height;
|
|
return scissor;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Resource hooks
|
|
|
|
GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Resource *r = 0;
|
|
|
|
if (desc.kind == GPU_ResourceKind_Unknown)
|
|
{
|
|
Panic(Lit("Unknown gpu resource type"));
|
|
}
|
|
|
|
u64 buffer_size = 0;
|
|
if (desc.kind == GPU_ResourceKind_Buffer)
|
|
{
|
|
desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
|
|
buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
|
|
}
|
|
|
|
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
|
|
/* Grab reusable */
|
|
{
|
|
u64 bin_index = reuse_hash % countof(g->resource_reuse_bins);
|
|
GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index];
|
|
{
|
|
Lock lock = LockE(&bin->mutex);
|
|
{
|
|
GPU_D12_ResourceReuseList *list = bin->first;
|
|
for (; list; list = list->next)
|
|
{
|
|
if (list->hash == reuse_hash) break;
|
|
}
|
|
if (list)
|
|
{
|
|
r = list->first;
|
|
list->first = r->next_free;
|
|
if (!list->first)
|
|
{
|
|
DllRemove(bin->first, bin->last, list);
|
|
StackPush(bin->first_free, list);
|
|
list->prev = 0;
|
|
}
|
|
r->next_free = 0;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
/* Grab from free list */
|
|
if (!r)
|
|
{
|
|
{
|
|
Lock lock = LockE(&g->free_resources_mutex);
|
|
r = g->first_free_resource;
|
|
if (r)
|
|
{
|
|
g->first_free_resource = r->next_free;
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
if (r)
|
|
{
|
|
ZeroStruct(r);
|
|
}
|
|
}
|
|
|
|
/* Push new */
|
|
if (!r)
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
r = PushStruct(perm, GPU_D12_Resource);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
|
|
/* Create d3d resource */
|
|
if (!r->d3d_resource)
|
|
{
|
|
switch (desc.kind)
|
|
{
|
|
case GPU_ResourceKind_Sampler: break;
|
|
|
|
/* Buffer */
|
|
case GPU_ResourceKind_Buffer:
|
|
{
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
D3D12_HEAP_PROPERTIES heap_props = {
|
|
.Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD
|
|
: desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK
|
|
: D3D12_HEAP_TYPE_DEFAULT
|
|
};
|
|
Assert(!(desc.flags & GPU_ResourceFlag_Renderable));
|
|
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
|
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
d3d_desc.Alignment = 0;
|
|
d3d_desc.Width = buffer_size;
|
|
d3d_desc.Height = 1;
|
|
d3d_desc.DepthOrArraySize = 1;
|
|
d3d_desc.MipLevels = 1;
|
|
d3d_desc.SampleDesc.Count = 1;
|
|
d3d_desc.SampleDesc.Quality = 0;
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable);
|
|
r->state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
|
|
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to create buffer resource"));
|
|
}
|
|
r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource);
|
|
} break;
|
|
|
|
/* Texture */
|
|
case GPU_ResourceKind_Texture1D:
|
|
case GPU_ResourceKind_Texture2D:
|
|
case GPU_ResourceKind_Texture3D:
|
|
{
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
|
|
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
|
d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D
|
|
: desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D
|
|
: D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
|
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.texture.format);
|
|
d3d_desc.Alignment = 0;
|
|
d3d_desc.Width = desc.texture.size.x;
|
|
d3d_desc.Height = desc.texture.size.y;
|
|
d3d_desc.DepthOrArraySize = desc.texture.size.z;
|
|
d3d_desc.MipLevels = (desc.flags & GPU_ResourceFlag_MaxMipLevels) ? 0 : MaxI32(desc.texture.mip_levels, 1);
|
|
d3d_desc.SampleDesc.Count = 1;
|
|
d3d_desc.SampleDesc.Quality = 0;
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable);
|
|
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_Renderable);
|
|
r->state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
|
|
clear_value.Color[0] = desc.clear_color.x;
|
|
clear_value.Color[1] = desc.clear_color.y;
|
|
clear_value.Color[2] = desc.clear_color.z;
|
|
clear_value.Color[3] = desc.clear_color.w;
|
|
D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
|
|
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to create buffer resource"));
|
|
}
|
|
} break;
|
|
}
|
|
}
|
|
|
|
/* Create texture srv descriptor */
|
|
if (desc.kind == GPU_ResourceKind_Texture1D
|
|
|| desc.kind == GPU_ResourceKind_Texture2D
|
|
|| desc.kind == GPU_ResourceKind_Texture3D)
|
|
{
|
|
if (!r->srv_descriptor)
|
|
{
|
|
r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
|
|
}
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle);
|
|
}
|
|
|
|
/* Create buffer srv descriptor */
|
|
if (desc.kind == GPU_ResourceKind_Buffer
|
|
&& desc.buffer.heap_kind != GPU_HeapKind_Download
|
|
&& desc.buffer.count > 0)
|
|
{
|
|
if (!r->srv_descriptor)
|
|
{
|
|
r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
|
|
}
|
|
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI;
|
|
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
|
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
srv_desc.Buffer.FirstElement = 0;
|
|
srv_desc.Buffer.NumElements = desc.buffer.count;
|
|
srv_desc.Buffer.StructureByteStride = desc.buffer.stride;
|
|
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle);
|
|
}
|
|
|
|
/* Create uav descriptor */
|
|
if (desc.flags & GPU_ResourceFlag_Writable)
|
|
{
|
|
if (!r->uav_descriptor)
|
|
{
|
|
r->uav_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
|
|
}
|
|
ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle);
|
|
}
|
|
|
|
/* Create rtv descriptor */
|
|
if (desc.flags & GPU_ResourceFlag_Renderable)
|
|
{
|
|
if (!r->rtv_descriptor)
|
|
{
|
|
r->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap);
|
|
}
|
|
ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle);
|
|
}
|
|
|
|
/* Create sampler descriptor */
|
|
if (desc.kind == GPU_ResourceKind_Sampler)
|
|
{
|
|
if (!r->sampler_descriptor)
|
|
{
|
|
r->sampler_descriptor = GPU_D12_AcquireDescriptor(g->sampler_heap);
|
|
}
|
|
D3D12_SAMPLER_DESC d3d_desc = ZI;
|
|
d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter;
|
|
d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x;
|
|
d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y;
|
|
d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z;
|
|
d3d_desc.MipLODBias = desc.sampler.mip_lod_bias;
|
|
d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1);
|
|
d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison;
|
|
d3d_desc.BorderColor[0] = desc.sampler.border_color.x;
|
|
d3d_desc.BorderColor[1] = desc.sampler.border_color.y;
|
|
d3d_desc.BorderColor[2] = desc.sampler.border_color.z;
|
|
d3d_desc.BorderColor[3] = desc.sampler.border_color.w;
|
|
d3d_desc.MinLOD = desc.sampler.min_lod;
|
|
d3d_desc.MaxLOD = desc.sampler.max_lod;
|
|
|
|
/* Defaults */
|
|
if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
|
|
if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
|
|
if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
|
|
if (d3d_desc.MaxLOD >= F32Infinity)
|
|
{
|
|
d3d_desc.MaxLOD = D3D12_FLOAT32_MAX;
|
|
}
|
|
ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle);
|
|
}
|
|
|
|
r->desc = desc;
|
|
r->buffer_size = buffer_size;
|
|
|
|
return (GPU_Resource *)r;
|
|
}
|
|
|
|
void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
|
|
|
|
if (r->srv_descriptor)
|
|
{
|
|
GPU_D12_ReleaseDescriptor(r->srv_descriptor);
|
|
r->srv_descriptor = 0;
|
|
}
|
|
if (r->uav_descriptor)
|
|
{
|
|
GPU_D12_ReleaseDescriptor(r->uav_descriptor);
|
|
r->uav_descriptor = 0;
|
|
}
|
|
if (r->rtv_descriptor)
|
|
{
|
|
GPU_D12_ReleaseDescriptor(r->rtv_descriptor);
|
|
r->rtv_descriptor = 0;
|
|
}
|
|
if (r->sampler_descriptor)
|
|
{
|
|
GPU_D12_ReleaseDescriptor(r->sampler_descriptor);
|
|
r->sampler_descriptor = 0;
|
|
}
|
|
|
|
if (flags & GPU_ReleaseFlag_Reuse)
|
|
{
|
|
GPU_ResourceDesc desc = r->desc;
|
|
u64 buffer_size = r->buffer_size;
|
|
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
|
|
u64 bin_index = reuse_hash % countof(g->resource_reuse_bins);
|
|
GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index];
|
|
{
|
|
Lock lock = LockE(&bin->mutex);
|
|
{
|
|
GPU_D12_ResourceReuseList *list = bin->first;
|
|
for (; list; list = list->next)
|
|
{
|
|
if (list->hash == reuse_hash) break;
|
|
}
|
|
if (!list)
|
|
{
|
|
list = bin->first_free;
|
|
if (list)
|
|
{
|
|
bin->first_free = list->next;
|
|
}
|
|
else
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
list = PushStruct(perm, GPU_D12_ResourceReuseList);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
list->hash = reuse_hash;
|
|
DllPushBack(bin->first, bin->last, list);
|
|
}
|
|
StackPushN(list->first, r, next_free);
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (r->desc.kind)
|
|
{
|
|
case GPU_ResourceKind_Buffer:
|
|
case GPU_ResourceKind_Texture1D:
|
|
case GPU_ResourceKind_Texture2D:
|
|
case GPU_ResourceKind_Texture3D:
|
|
{
|
|
ID3D12Resource_Release(r->d3d_resource);
|
|
}
|
|
}
|
|
Lock lock = LockE(&g->free_resources_mutex);
|
|
r->next_free = g->first_free_resource;
|
|
g->first_free_resource = r;
|
|
Unlock(&lock);
|
|
}
|
|
|
|
}
|
|
|
|
u32 GPU_GetReadableId(GPU_Resource *gpu_resource)
|
|
{
|
|
u32 result = U32Max;
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
|
|
if (r && r->srv_descriptor)
|
|
{
|
|
result = r->srv_descriptor->index;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
u32 GPU_GetWritableId(GPU_Resource *gpu_resource)
|
|
{
|
|
u32 result = U32Max;
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
|
|
if (r && r->uav_descriptor)
|
|
{
|
|
result = r->uav_descriptor->index;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
u32 GPU_GetSamplerId(GPU_Resource *gpu_resource)
|
|
{
|
|
u32 result = U32Max;
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
|
|
if (r && r->sampler_descriptor)
|
|
{
|
|
result = r->sampler_descriptor->index;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource)
|
|
{
|
|
GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource;
|
|
return VEC2I32(resource->desc.texture.size.x, resource->desc.texture.size.y);
|
|
}
|
|
|
|
Vec3I32 GPU_GetTextureSize3D(GPU_Resource *gpu_resource)
|
|
{
|
|
GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource;
|
|
return resource->desc.texture.size;
|
|
}
|
|
|
|
u64 GPU_GetFootprintSize(GPU_Resource *gpu_resource)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
|
ID3D12Resource_GetDesc(((GPU_D12_Resource *)gpu_resource)->d3d_resource, &desc);
|
|
u64 footprint_size = 0;
|
|
u64 upload_row_size = 0;
|
|
u32 upload_num_rows = 0;
|
|
ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &footprint_size);
|
|
return footprint_size;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Command list hooks
|
|
|
|
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind)
|
|
{
|
|
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
|
|
Arena *perm = PermArena();
|
|
GPU_D12_CommandList *cl = f->first_free_command_list;
|
|
if (cl)
|
|
{
|
|
StackPop(f->first_free_command_list);
|
|
ZeroStruct(cl);
|
|
}
|
|
else
|
|
{
|
|
cl = PushStruct(perm, GPU_D12_CommandList);
|
|
}
|
|
cl->queue_kind = queue_kind;
|
|
return (GPU_CommandList *)cl;
|
|
}
|
|
|
|
i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
|
|
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
|
GPU_QueueKind queue_kind = cl->queue_kind;
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI;
|
|
GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI;
|
|
|
|
/* Begin dx12 command list */
|
|
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind);
|
|
ID3D12GraphicsCommandList *rcl = dx12_cl->cl;
|
|
|
|
b32 graphics_rootsig_set = 0;
|
|
b32 compute_rootsig_set = 0;
|
|
b32 descriptor_heaps_set = 0;
|
|
GPU_D12_Pipeline *bound_pipeline = 0;
|
|
|
|
/* Process gpu commands into dx12 commands */
|
|
{
|
|
GPU_D12_Command *cmd = cl->first;
|
|
while (cmd)
|
|
{
|
|
switch (cmd->kind)
|
|
{
|
|
default: break;
|
|
|
|
//- Resource barrier
|
|
case GPU_D12_CommandKind_TransitionToSrv:
|
|
case GPU_D12_CommandKind_TransitionToUav:
|
|
case GPU_D12_CommandKind_TransitionToRtv:
|
|
case GPU_D12_CommandKind_TransitionToCopySrc:
|
|
case GPU_D12_CommandKind_TransitionToCopyDst:
|
|
case GPU_D12_CommandKind_FlushUav:
|
|
{
|
|
u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1);
|
|
|
|
/* Build barriers batch list */
|
|
Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; };
|
|
u32 max_barriers_count = 0;
|
|
TmpBarrier *first_barrier = 0;
|
|
TmpBarrier *last_barrier = 0;
|
|
while (cmd && (cmd->kind == GPU_D12_CommandKind_TransitionToSrv
|
|
|| cmd->kind == GPU_D12_CommandKind_TransitionToUav
|
|
|| cmd->kind == GPU_D12_CommandKind_TransitionToRtv
|
|
|| cmd->kind == GPU_D12_CommandKind_TransitionToCopySrc
|
|
|| cmd->kind == GPU_D12_CommandKind_TransitionToCopyDst
|
|
|| cmd->kind == GPU_D12_CommandKind_FlushUav))
|
|
{
|
|
D3D12_RESOURCE_BARRIER_TYPE type = ZI;
|
|
D3D12_RESOURCE_STATES state_after = ZI;
|
|
GPU_D12_Resource *resource = cmd->barrier.resource;
|
|
|
|
switch (cmd->kind)
|
|
{
|
|
case GPU_D12_CommandKind_TransitionToSrv:
|
|
{
|
|
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
|
|
} break;
|
|
case GPU_D12_CommandKind_TransitionToUav:
|
|
{
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
|
|
} break;
|
|
case GPU_D12_CommandKind_TransitionToRtv:
|
|
{
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
state_after = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
i32 slot = cmd->barrier.rt_slot;
|
|
if (slot >= 0 && slot < countof(slotted_render_targets))
|
|
{
|
|
slotted_render_targets[slot] = resource;
|
|
}
|
|
} break;
|
|
case GPU_D12_CommandKind_TransitionToCopySrc:
|
|
{
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
state_after = D3D12_RESOURCE_STATE_COPY_SOURCE;
|
|
} break;
|
|
case GPU_D12_CommandKind_TransitionToCopyDst:
|
|
{
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
state_after = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
} break;
|
|
case GPU_D12_CommandKind_FlushUav:
|
|
{
|
|
type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
} break;
|
|
}
|
|
|
|
b32 skip = 0;
|
|
if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen)
|
|
{
|
|
/* Skip UAV transitions on resources that already have transition in the batch */
|
|
skip = 1;
|
|
}
|
|
if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->barrier_state_after == state_after)
|
|
{
|
|
/* Skip transitions into existing state */
|
|
skip = 1;
|
|
}
|
|
|
|
if (!skip)
|
|
{
|
|
resource->barrier_type = type;
|
|
resource->barrier_state_after = state_after;
|
|
if (resource->barrier_gen != barrier_gen)
|
|
{
|
|
TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier);
|
|
resource->barrier_gen = barrier_gen;
|
|
b->r = resource;
|
|
QueuePush(first_barrier, last_barrier, b);
|
|
++max_barriers_count;
|
|
}
|
|
}
|
|
|
|
cmd = cmd->next;
|
|
}
|
|
|
|
/* Submit batched barriers */
|
|
/* FIXME: Transitions from UAV -> UAV should insert UAV barrier */
|
|
u32 barriers_count = 0;
|
|
D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count);
|
|
for (TmpBarrier *b = first_barrier; b; b = b->next)
|
|
{
|
|
GPU_D12_Resource *resource = b->r;
|
|
D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type;
|
|
D3D12_RESOURCE_STATES state_before = resource->state;
|
|
D3D12_RESOURCE_STATES state_after = resource->barrier_state_after;
|
|
if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after))
|
|
{
|
|
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
|
|
rb->Type = resource->barrier_type;
|
|
if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION)
|
|
{
|
|
rb->Transition.pResource = resource->d3d_resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = state_before;
|
|
rb->Transition.StateAfter = state_after;
|
|
resource->state = state_after;
|
|
}
|
|
else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV)
|
|
{
|
|
rb->UAV.pResource = resource->d3d_resource;
|
|
}
|
|
}
|
|
}
|
|
if (barriers_count > 0)
|
|
{
|
|
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
|
|
}
|
|
} break;
|
|
|
|
//- Clear rtv
|
|
case GPU_D12_CommandKind_ClearRtv:
|
|
{
|
|
GPU_D12_Resource *resource = cmd->clear.resource;
|
|
Assert(resource->state == D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
f32 clear_color[4] = ZI;
|
|
clear_color[0] = resource->desc.clear_color.x;
|
|
clear_color[1] = resource->desc.clear_color.y;
|
|
clear_color[2] = resource->desc.clear_color.z;
|
|
clear_color[3] = resource->desc.clear_color.w;
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, resource->rtv_descriptor->handle, clear_color, 0, 0);
|
|
cmd = cmd->next;
|
|
} break;
|
|
|
|
//- Copy resource
|
|
case GPU_D12_CommandKind_Copy:
|
|
{
|
|
GPU_D12_Resource *dst = cmd->copy.dst;
|
|
GPU_D12_Resource *src = cmd->copy.src;
|
|
|
|
D3D12_RESOURCE_DESC dst_desc = ZI;
|
|
D3D12_RESOURCE_DESC src_desc = ZI;
|
|
ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc);
|
|
ID3D12Resource_GetDesc(src->d3d_resource, &src_desc);
|
|
|
|
if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
|
|
{ /* Copy buffer -> buffer */
|
|
u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride;
|
|
u64 src_len = src->desc.buffer.count * src->desc.buffer.stride;
|
|
u64 cpy_len = MinU64(dst_len, src_len);
|
|
if (cpy_len > 0)
|
|
{
|
|
ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len);
|
|
}
|
|
}
|
|
else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
|
|
{ /* Copy buffer -> texture */
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI;
|
|
ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0);
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
|
|
dst_loc.pResource = dst->d3d_resource;
|
|
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
|
|
dst_loc.SubresourceIndex = 0;
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
|
src_loc.pResource = src->d3d_resource;
|
|
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
|
src_loc.PlacedFootprint = dst_placed_footprint;
|
|
|
|
ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0);
|
|
}
|
|
else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
|
|
{ /* Copy texture -> buffer */
|
|
/* TODO */
|
|
Assert(0);
|
|
}
|
|
else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER)
|
|
{ /* Copy texture -> texture */
|
|
/* TODO */
|
|
Assert(0);
|
|
}
|
|
|
|
cmd = cmd->next;
|
|
} break;
|
|
|
|
//- Dispatch Vs/Ps shader
|
|
case GPU_D12_CommandKind_Rasterize:
|
|
{
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
{
|
|
GPU_D12_PipelineDesc pipeline_desc = ZI;
|
|
pipeline_desc.vs = cmd->rasterize.vs;
|
|
pipeline_desc.ps = cmd->rasterize.ps;
|
|
{
|
|
pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED;
|
|
switch (cmd->rasterize.mode)
|
|
{
|
|
default: Assert(0); break;
|
|
case GPU_RasterizeMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break;
|
|
case GPU_RasterizeMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
|
|
case GPU_RasterizeMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
|
|
case GPU_RasterizeMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
case GPU_RasterizeMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
|
|
}
|
|
}
|
|
for (u32 i = 0; i < cmd->rasterize.rts_count; ++i)
|
|
{
|
|
GPU_D12_Resource *r = slotted_render_targets[i];
|
|
if (r)
|
|
{
|
|
pipeline_desc.render_target_formats[i] = r->desc.texture.format;
|
|
}
|
|
else
|
|
{
|
|
Assert(0); /* No bound render target in slot */
|
|
pipeline_desc.render_target_formats[i] = GPU_Format_Unknown;
|
|
}
|
|
}
|
|
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
|
|
}
|
|
|
|
if (pipeline
|
|
&& cmd->rasterize.index_buffer->desc.buffer.count > 0)
|
|
{
|
|
/* Set descriptor heaps */
|
|
if (!descriptor_heaps_set)
|
|
{
|
|
ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap };
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps);
|
|
descriptor_heaps_set = 1;
|
|
}
|
|
|
|
/* Bind rootsig */
|
|
if (!graphics_rootsig_set)
|
|
{
|
|
ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig);
|
|
graphics_rootsig_set = 1;
|
|
}
|
|
|
|
/* Bind pipeline */
|
|
if (pipeline != bound_pipeline)
|
|
{
|
|
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso);
|
|
bound_pipeline = pipeline;
|
|
}
|
|
|
|
/* Fill signature */
|
|
/* TODO: Only upload dirty */
|
|
{
|
|
u32 sig_size = cmd->rasterize.sig_size;
|
|
void *sig = cmd->rasterize.sig;
|
|
u32 num32bit = sig_size / 4;
|
|
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0);
|
|
}
|
|
|
|
/* Set rasterizer state */
|
|
/* TODO: Only set dirty */
|
|
{
|
|
D3D12_RECT scissor = ZI;
|
|
scissor.left = cmd->rasterize.scissor.left;
|
|
scissor.top = cmd->rasterize.scissor.top;
|
|
scissor.right = cmd->rasterize.scissor.right;
|
|
scissor.bottom = cmd->rasterize.scissor.bottom;
|
|
D3D12_VIEWPORT viewport = ZI;
|
|
viewport.TopLeftX = cmd->rasterize.viewport.top_left_x;
|
|
viewport.TopLeftY = cmd->rasterize.viewport.top_left_y;
|
|
viewport.Width = cmd->rasterize.viewport.width;
|
|
viewport.Height = cmd->rasterize.viewport.height;
|
|
viewport.MinDepth = cmd->rasterize.viewport.min_depth;
|
|
viewport.MaxDepth = cmd->rasterize.viewport.max_depth;
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor);
|
|
ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport);
|
|
}
|
|
|
|
/* Set topology */
|
|
/* TODO: Only set dirty */
|
|
{
|
|
D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
|
switch (cmd->rasterize.mode)
|
|
{
|
|
default: Assert(0); break;
|
|
case GPU_RasterizeMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break;
|
|
case GPU_RasterizeMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break;
|
|
case GPU_RasterizeMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break;
|
|
case GPU_RasterizeMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
|
|
case GPU_RasterizeMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break;
|
|
}
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology);
|
|
}
|
|
|
|
/* Set index buffer */
|
|
/* TODO: Only set dirty */
|
|
u32 indices_count = 0;
|
|
{
|
|
GPU_D12_Resource *indices = cmd->rasterize.index_buffer;
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
|
|
ibv.BufferLocation = indices->buffer_gpu_address;
|
|
if (indices->desc.buffer.stride == 2)
|
|
{
|
|
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
|
|
}
|
|
else
|
|
{
|
|
Assert(indices->desc.buffer.stride == 4);
|
|
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
|
|
}
|
|
ibv.SizeInBytes = indices->desc.buffer.count * indices->desc.buffer.stride;
|
|
indices_count = indices->desc.buffer.count;
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv);
|
|
}
|
|
|
|
/* Bind render targets */
|
|
{
|
|
b32 om_dirty = 0;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_render_targets)] = ZI;
|
|
for (u32 i = 0; i < cmd->rasterize.rts_count; ++i)
|
|
{
|
|
GPU_D12_Resource *target = slotted_render_targets[i];
|
|
if (bound_render_targets[i] != target)
|
|
{
|
|
bound_render_targets[i] = target;
|
|
om_dirty = 1;
|
|
}
|
|
rtvs[i] = target->rtv_descriptor->handle;
|
|
}
|
|
if (om_dirty)
|
|
{
|
|
ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, cmd->rasterize.rts_count, rtvs, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0);
|
|
}
|
|
|
|
cmd = cmd->next;
|
|
} break;
|
|
|
|
//- Dispatch compute shader
|
|
case GPU_D12_CommandKind_Compute:
|
|
{
|
|
GPU_D12_Pipeline *pipeline = 0;
|
|
{
|
|
GPU_D12_PipelineDesc pipeline_desc = ZI;
|
|
pipeline_desc.cs = cmd->compute.cs;
|
|
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
|
|
}
|
|
|
|
if (pipeline)
|
|
{
|
|
/* Set descriptor heaps */
|
|
if (!descriptor_heaps_set)
|
|
{
|
|
ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap };
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps);
|
|
descriptor_heaps_set = 1;
|
|
}
|
|
|
|
/* Bind rootsig */
|
|
if (!compute_rootsig_set)
|
|
{
|
|
ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig);
|
|
compute_rootsig_set = 1;
|
|
}
|
|
|
|
/* Bind pipeline */
|
|
if (pipeline != bound_pipeline)
|
|
{
|
|
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso);
|
|
bound_pipeline = pipeline;
|
|
}
|
|
|
|
/* Fill signature */
|
|
/* TODO: Only upload dirty */
|
|
{
|
|
u32 sig_size = cmd->compute.sig_size;
|
|
void *sig = cmd->compute.sig;
|
|
u32 num32bit = sig_size / 4;
|
|
ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(rcl, 0, num32bit, sig, 0);
|
|
}
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z);
|
|
}
|
|
|
|
cmd = cmd->next;
|
|
} break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* End dx12 command list */
|
|
u64 fence_target = GPU_D12_EndRawCommandList(dx12_cl);
|
|
|
|
/* Free commands */
|
|
if (cl->last)
|
|
{
|
|
cl->last->next = f->first_free_command;
|
|
f->first_free_command = cl->first;
|
|
}
|
|
|
|
/* Free command list */
|
|
StackPush(f->first_free_command_list, cl);
|
|
|
|
EndScratch(scratch);
|
|
return fence_target;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Profiling helper hooks
|
|
|
|
void GPU_ProfN(GPU_CommandList *cl, String name)
|
|
{
|
|
/* TODO */
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Barrier hooks
|
|
|
|
void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_TransitionToSrv;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_TransitionToUav;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_TransitionToRtv;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
cmd->barrier.rt_slot = slot;
|
|
}
|
|
|
|
void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_TransitionToCopySrc;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_TransitionToCopyDst;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
|
cmd->kind = GPU_D12_CommandKind_FlushUav;
|
|
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Dispatch hooks
|
|
|
|
void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
|
|
{
|
|
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CommandKind_ClearRtv;
|
|
cmd->clear.resource = (GPU_D12_Resource *)resource;
|
|
}
|
|
|
|
void GPU_Rasterize_(GPU_CommandList *gpu_cl,
|
|
u32 sig_size,
|
|
void *sig,
|
|
VertexShader vs,
|
|
PixelShader ps,
|
|
u32 rts_count,
|
|
GPU_Viewport viewport,
|
|
GPU_Scissor scissor,
|
|
u32 instances_count,
|
|
GPU_Resource *index_buffer,
|
|
GPU_RasterizeMode mode)
|
|
{
|
|
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CommandKind_Rasterize;
|
|
Assert(sig_size <= sizeof(cmd->rasterize.sig));
|
|
cmd->rasterize.sig_size = MinU32(sizeof(cmd->rasterize.sig), sig_size);
|
|
CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size);
|
|
cmd->rasterize.vs = vs;
|
|
cmd->rasterize.ps = ps;
|
|
cmd->rasterize.rts_count = rts_count;
|
|
Assert(rts_count < GPU_MaxRenderTargets);
|
|
cmd->rasterize.viewport = viewport;
|
|
cmd->rasterize.scissor = scissor;
|
|
cmd->rasterize.instances_count = instances_count;
|
|
cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer;
|
|
cmd->rasterize.mode = mode;
|
|
}
|
|
|
|
void GPU_Compute_(GPU_CommandList *gpu_cl,
|
|
u32 sig_size,
|
|
void *sig,
|
|
ComputeShader cs,
|
|
u32 num_threads_x,
|
|
u32 num_threads_y,
|
|
u32 num_threads_z)
|
|
{
|
|
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CommandKind_Compute;
|
|
Assert(sig_size <= sizeof(cmd->compute.sig));
|
|
cmd->compute.sig_size = MinU32(sizeof(cmd->compute.sig), sig_size);
|
|
CopyBytes(cmd->compute.sig, sig, cmd->compute.sig_size);
|
|
cmd->compute.cs = cs;
|
|
cmd->compute.num_threads_x = num_threads_x;
|
|
cmd->compute.num_threads_y = num_threads_y;
|
|
cmd->compute.num_threads_z = num_threads_z;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Copy hooks
|
|
|
|
void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resource *gpu_src)
|
|
{
|
|
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
|
GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst;
|
|
GPU_D12_Resource *src = (GPU_D12_Resource *)gpu_src;
|
|
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
|
|
cmd->kind = GPU_D12_CommandKind_Copy;
|
|
cmd->copy.dst = dst;
|
|
cmd->copy.src = src;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Map hooks
|
|
|
|
GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
|
|
{
|
|
GPU_Mapped result = ZI;
|
|
result.resource = gpu_r;
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r;
|
|
D3D12_RANGE read_range = ZI;
|
|
HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
|
|
if (FAILED(hr) || !result.mem)
|
|
{
|
|
/* TODO: Don't panic */
|
|
Panic(Lit("Failed to map command buffer resource"));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void GPU_Unmap(GPU_Mapped m)
|
|
{
|
|
GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource;
|
|
ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
|
|
}
|
|
|
|
void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc);
|
|
|
|
u64 upload_size = 0;
|
|
u64 upload_row_size = 0;
|
|
u32 upload_num_rows = 0;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
|
ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
|
|
D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
|
|
|
|
{
|
|
D3D12_RANGE read_range = ZI;
|
|
u8 *dst_base = (u8 *)dst + placed_footprint.Offset;
|
|
u8 *src_base = src;
|
|
|
|
u32 z_size = upload_row_size * upload_num_rows;
|
|
|
|
b32 src_overflow = 0;
|
|
for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
|
|
{
|
|
u32 z_offset = z * z_size;
|
|
for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
|
|
{
|
|
u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset;
|
|
u8 *src_row = src_base + y * upload_row_size + z_offset;
|
|
CopyBytes(dst_row, src_row, upload_row_size);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Memory info hooks
|
|
|
|
GPU_MemoryInfo GPU_QueryMemoryInfo(void)
|
|
{
|
|
/* TODO */
|
|
return (GPU_MemoryInfo) ZI;
|
|
}
|
|
|
|
////////////////////////////////
|
|
//~ @hookdef Swapchain hooks
|
|
|
|
GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, GPU_Format format, Vec2I32 size)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
HRESULT hr = 0;
|
|
HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
|
|
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
|
|
|
|
GPU_D12_Swapchain *swapchain = 0;
|
|
{
|
|
Lock lock = LockE(&g->free_swapchains_mutex);
|
|
{
|
|
swapchain = g->first_free_swapchain;
|
|
if (swapchain)
|
|
{
|
|
g->first_free_swapchain = swapchain->next;
|
|
}
|
|
}
|
|
Unlock(&lock);
|
|
}
|
|
if (!swapchain)
|
|
{
|
|
Arena *perm = PermArena();
|
|
PushAlign(perm, CachelineSize);
|
|
swapchain = PushStructNoZero(perm, GPU_D12_Swapchain);
|
|
PushAlign(perm, CachelineSize);
|
|
}
|
|
ZeroStruct(swapchain);
|
|
swapchain->format = format;
|
|
|
|
/* Create swapchain1 */
|
|
IDXGISwapChain1 *swapchain1 = 0;
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
|
|
desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format);
|
|
desc.Width = size.x;
|
|
desc.Height = size.y;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
desc.BufferCount = GPU_D12_SwapchainBufferCount;
|
|
desc.Scaling = DXGI_SCALING_NONE;
|
|
desc.Flags = GPU_D12_SwapchainFlags;
|
|
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
|
|
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)queue->d3d_queue, hwnd, &desc, 0, 0, &swapchain1);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create IDXGISwapChain1"));
|
|
}
|
|
}
|
|
|
|
/* Upgrade to swapchain3 */
|
|
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
|
|
if (FAILED(hr))
|
|
{
|
|
Panic(Lit("Failed to create IDXGISwapChain3"));
|
|
}
|
|
|
|
/* Create waitable object */
|
|
#if GPU_D12_FrameLatency > 0
|
|
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, GPU_D12_FrameLatency);
|
|
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
|
|
Assert(swapchain->waitable);
|
|
#endif
|
|
|
|
/* Disable Alt+Enter changing monitor resolution to match window size */
|
|
IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
|
|
|
|
IDXGISwapChain1_Release(swapchain1);
|
|
swapchain->window_hwnd = hwnd;
|
|
|
|
GPU_D12_InitSwapchainResources(swapchain);
|
|
|
|
return (GPU_Swapchain *)swapchain;
|
|
}
|
|
|
|
void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
|
|
{
|
|
/* TODO */
|
|
}
|
|
|
|
void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain)
|
|
{
|
|
/* TODO */
|
|
}
|
|
|
|
i64 GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, GPU_Resource *gpu_texture, Vec2I32 backbuffer_size, Vec2I32 dst, i32 vsync)
|
|
{
|
|
GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain;
|
|
GPU_D12_Resource *texture = (GPU_D12_Resource *)gpu_texture;
|
|
GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_size);
|
|
|
|
D3D12_RESOURCE_DESC src_desc = ZI;
|
|
D3D12_RESOURCE_DESC dst_desc = ZI;
|
|
ID3D12Resource_GetDesc(texture->d3d_resource, &src_desc);
|
|
ID3D12Resource_GetDesc(swapchain_buffer->d3d_resource, &dst_desc);
|
|
|
|
b32 is_blitable = src_desc.Dimension == dst_desc.Dimension
|
|
&& src_desc.SampleDesc.Count == dst_desc.SampleDesc.Count
|
|
&& src_desc.SampleDesc.Quality == dst_desc.SampleDesc.Quality;
|
|
Assert(is_blitable == 1); /* Texture resource must be similar enough to backbuffer resource to blit */
|
|
|
|
i64 fence_target = 0;
|
|
if (is_blitable)
|
|
{
|
|
/* Blit */
|
|
fence_target = GPU_D12_BlitToSwapchain(swapchain_buffer, texture, dst);
|
|
|
|
u32 present_flags = 0;
|
|
if (GPU_D12_TearingIsAllowed && vsync == 0)
|
|
{
|
|
present_flags |= DXGI_PRESENT_ALLOW_TEARING;
|
|
}
|
|
|
|
/* Present */
|
|
{
|
|
__profn("Present");
|
|
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
Assert(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
return fence_target;
|
|
}
|