power_play/src/gpu/gpu_dx12/gpu_dx12.c
2025-09-23 19:19:21 -05:00

2179 lines
81 KiB
C

GPU_D12_SharedState GPU_D12_shared_state = ZI;
////////////////////////////////
//~ Helpers
GPU_D12_FiberState *GPU_D12_FiberStateFromId(i16 fiber_id)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_FiberState **f = &g->fiber_states[fiber_id];
if (!*f)
{
Arena *perm = PermArena();
*f = PushStruct(perm, GPU_D12_FiberState);
}
return *f;
}
DXGI_FORMAT GPU_D12_DxgiFormatFromGpuFormat(GPU_Format format)
{
return (DXGI_FORMAT)format;
}
GPU_D12_Command *GPU_D12_PushCmd(GPU_D12_CommandList *cl)
{
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
Arena *perm = PermArena();
GPU_D12_Command *cmd = f->first_free_command;
if (cmd)
{
f->first_free_command = cmd->next;
}
else
{
cmd = PushStructNoZero(perm, GPU_D12_Command);
}
ZeroStruct(cmd);
QueuePush(cl->first, cl->last, cmd);
++cl->count;
return cmd;
}
u64 GPU_D12_ReuseHashFromResourceDesc(GPU_ResourceDesc desc, u64 buffer_size)
{
u64 result = RandU64FromSeeds(desc.kind, desc.flags);
switch(desc.kind)
{
default: break;
case GPU_ResourceKind_Texture1D:
case GPU_ResourceKind_Texture2D:
case GPU_ResourceKind_Texture3D:
{
result = RandU64FromSeeds(result, desc.texture.format);
result = RandU64FromSeeds(result, desc.texture.mip_levels);
result = RandU64FromSeeds(result, desc.clear_color.x);
result = RandU64FromSeeds(result, desc.clear_color.y);
result = RandU64FromSeeds(result, desc.clear_color.z);
result = RandU64FromSeeds(result, desc.clear_color.w);
result = RandU64FromSeeds(result, desc.texture.size.x);
result = RandU64FromSeeds(result, desc.texture.size.y);
result = RandU64FromSeeds(result, desc.texture.size.z);
} break;
case GPU_ResourceKind_Buffer:
{
result = RandU64FromSeeds(result, desc.buffer.heap_kind);
result = RandU64FromSeeds(result, buffer_size);
} break;
}
return result;
}
////////////////////////////////
//~ Startup
void GPU_D12_Startup(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
/* Init device */
GPU_D12_InitDevice();
/* Init queues */
{
GPU_D12_QueueDesc descs[] = {
{.kind = GPU_QueueKind_Direct, .d3d_type = D3D12_COMMAND_LIST_TYPE_DIRECT, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") },
{.kind = GPU_QueueKind_Compute, .d3d_type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") },
{.kind = GPU_QueueKind_Copy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copy queue") },
{.kind = GPU_QueueKind_BackgroundCopy, .d3d_type = D3D12_COMMAND_LIST_TYPE_COPY, .d3d_priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") }
};
u32 job_count = 0; Fence job_fence = ZI;
job_count += RunJob(GPU_D12_InitQueue, .count = GPU_NumQueues, .sig.descs = descs, .fence = &job_fence);
YieldOnFence(&job_fence, job_count);
}
/* Init descriptor heaps */
g->cbv_srv_uav_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
GPU_D12_MaxCbvSrvUavDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV));
g->sampler_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
GPU_D12_MaxSamplerDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER));
g->rtv_heap = GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_RTV,
D3D12_DESCRIPTOR_HEAP_FLAG_NONE,
GPU_D12_MaxRtvDescriptors,
ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV));
/* Init rootsig */
GPU_D12_InitRootsig();
/* Start queue sync job */
RunJob(GPU_D12_StartQueueSync, .pool = JobPool_Hyper, .flags = JobFlag_Dedicated);
}
////////////////////////////////
//~ Initialization
//- Device initialization
void GPU_D12_InitDevice(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
TempArena scratch = BeginScratchNoConflict();
HRESULT hr = 0;
/* Enable debug layer */
u32 dxgi_factory_flags = 0;
#if GPU_DEBUG
{
__profn("Enable debug layer");
ID3D12Debug *debug_controller0 = 0;
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
if (FAILED(hr))
{
Panic(Lit("Failed to create ID3D12Debug0"));
}
ID3D12Debug1 *debug_controller1 = 0;
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
if (FAILED(hr))
{
Panic(Lit("Failed to create ID3D12Debug1"));
}
ID3D12Debug_EnableDebugLayer(debug_controller0);
/* FIXME: Enable this */
// ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
ID3D12Debug_Release(debug_controller1);
ID3D12Debug_Release(debug_controller0);
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
/* Create factory */
{
__profn("Create factory");
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory);
if (FAILED(hr))
{
Panic(Lit("Failed to initialize DXGI factory"));
}
}
/* Create device */
{
__profn("Create device");
IDXGIAdapter1 *adapter = 0;
ID3D12Device *device = 0;
String error = Lit("Could not initialize GPU device.");
String first_gpu_name = ZI;
u32 adapter_index = 0;
b32 skip = 0; /* For debugging iGPU */
for (;;)
{
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
}
if (SUCCEEDED(hr))
{
DXGI_ADAPTER_DESC1 desc;
IDXGIAdapter1_GetDesc1(adapter, &desc);
if (first_gpu_name.len == 0)
{
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
}
{
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
}
if (SUCCEEDED(hr) && !skip)
{
break;
}
skip = 0;
ID3D12Device_Release(device);
IDXGIAdapter1_Release(adapter);
adapter = 0;
device = 0;
++adapter_index;
}
else
{
break;
}
}
if (!device)
{
if (first_gpu_name.len > 0)
{
error = StringF(scratch.arena,
"Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.",
FmtString(first_gpu_name));
}
Panic(error);
}
g->adapter = adapter;
g->device = device;
}
#if GPU_DEBUG
/* Enable D3D12 Debug break */
{
__profn("Enable d3d12 debug break");
ID3D12InfoQueue *info = 0;
hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info);
if (FAILED(hr))
{
Panic(Lit("Failed to query ID3D12Device interface"));
}
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
ID3D12InfoQueue_Release(info);
}
/* Enable DXGI Debug break */
{
__profn("Enable dxgi debug break");
IDXGIInfoQueue *dxgi_info = 0;
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
if (FAILED(hr))
{
Panic(Lit("Failed to get DXGI debug interface"));
}
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
IDXGIInfoQueue_Release(dxgi_info);
}
#endif
EndScratch(scratch);
}
//- Queue initialization
JobDef(GPU_D12_InitQueue, sig, id)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_QueueDesc desc = sig->descs[id];
Arena *perm = PermArena();
HRESULT hr = 0;
GPU_D12_Queue *queue = 0;
{
PushAlign(perm, CachelineSize);
queue = PushStruct(perm, GPU_D12_Queue);
PushAlign(perm, CachelineSize);
}
queue->desc = desc;
D3D12_COMMAND_QUEUE_DESC d3d_desc = ZI;
d3d_desc.Type = desc.d3d_type;
d3d_desc.Priority = desc.d3d_priority;
hr = ID3D12Device_CreateCommandQueue(g->device, &d3d_desc, &IID_ID3D12CommandQueue, (void **)&queue->d3d_queue);
if (FAILED(hr))
{
Panic(Lit("Failed to create command queue"));
}
hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&queue->submit_fence);
if (FAILED(hr))
{
Panic(Lit("Failed to create command queue fence"));
}
g->queues[desc.kind] = queue;
}
//- Heap initialization
GPU_D12_DescriptorHeap *GPU_D12_InitDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, u32 max_descs, u32 desc_size)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
Arena *arena = AcquireArena(Gibi(64));
GPU_D12_DescriptorHeap *heap = PushStruct(arena, GPU_D12_DescriptorHeap);
heap->arena = arena;
heap->type = type;
heap->max_count = max_descs;
heap->descriptor_size = desc_size;
D3D12_DESCRIPTOR_HEAP_DESC d3d_desc = ZI;
d3d_desc.Type = type;
d3d_desc.Flags = flags;
d3d_desc.NumDescriptors = max_descs;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &d3d_desc, &IID_ID3D12DescriptorHeap, (void **)&heap->d3d_heap);
if (FAILED(hr))
{
Panic(Lit("Failed to create CPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap->d3d_heap, &heap->start_handle);
return heap;
}
//- Rootsig initialization
void GPU_D12_InitRootsig(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
b32 ok = 1;
HRESULT hr = 0;
String error_str = ZI;
/* Serialize root signature */
ID3D10Blob *blob = 0;
if (ok)
{
__profn("Create root signature");
D3D12_ROOT_PARAMETER param = ZI;
param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
param.Constants.ShaderRegister = 0;
param.Constants.RegisterSpace = 0;
param.Constants.Num32BitValues = 64;
D3D12_ROOT_SIGNATURE_DESC desc = ZI;
desc.NumParameters = 1;
desc.pParameters = &param;
desc.NumStaticSamplers = 0;
desc.pStaticSamplers = 0;
desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED | D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED;
hr = D3D12SerializeRootSignature(&desc, D3D_ROOT_SIGNATURE_VERSION_1, &blob, 0);
if (FAILED(hr))
{
error_str = Lit("Failed to serialize root signature");
ok = 0;
}
}
/* Create root signature */
ID3D12RootSignature *rootsig = 0;
if (ok)
{
__profn("Create root signature");
hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), &IID_ID3D12RootSignature, (void **)&rootsig);
if (FAILED(hr))
{
error_str = Lit("Failed to create root signature");
ok = 0;
}
}
if (blob)
{
ID3D10Blob_Release(blob);
}
g->bindless_rootsig = rootsig;
if (!ok)
{
Panic(error_str);
}
}
////////////////////////////////
//~ Pipeline operations
JobDef(GPU_D12_LoadPipeline, sig, _)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Pipeline *pipeline = sig->pipeline;
GPU_D12_PipelineDesc desc = pipeline->desc;
HRESULT hr = 0;
b32 ok = 1;
String error_str = ZI;
/* Create PSO */
ID3D12PipelineState *pso = 0;
if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource)))
{
D3D12_RASTERIZER_DESC raster_desc = ZI;
raster_desc.FillMode = D3D12_FILL_MODE_SOLID;
raster_desc.CullMode = D3D12_CULL_MODE_NONE;
raster_desc.FrontCounterClockwise = 0;
raster_desc.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
raster_desc.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
raster_desc.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
raster_desc.DepthClipEnable = 1;
raster_desc.MultisampleEnable = 0;
raster_desc.AntialiasedLineEnable = 0;
raster_desc.ForcedSampleCount = 0;
raster_desc.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
D3D12_BLEND_DESC blend_desc = ZI;
blend_desc.AlphaToCoverageEnable = 0;
blend_desc.IndependentBlendEnable = 0;
blend_desc.RenderTarget[0].BlendEnable = 1;
blend_desc.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[0].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[0].BlendOp = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_ONE;
blend_desc.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
D3D12_DEPTH_STENCIL_DESC ds_desc = ZI;
ds_desc.DepthEnable = 0;
ds_desc.StencilEnable = 0;
String vs = DataFromResource(desc.vs.resource);
String ps = DataFromResource(desc.ps.resource);
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = ZI;
pso_desc.pRootSignature = g->bindless_rootsig;
pso_desc.VS.pShaderBytecode = vs.text;
pso_desc.VS.BytecodeLength = vs.len;
pso_desc.PS.pShaderBytecode = ps.text;
pso_desc.PS.BytecodeLength = ps.len;
pso_desc.RasterizerState = raster_desc;
pso_desc.BlendState = blend_desc;
pso_desc.DepthStencilState = ds_desc;
pso_desc.PrimitiveTopologyType = desc.topology_type;
pso_desc.SampleMask = UINT_MAX;
pso_desc.SampleDesc.Count = 1;
pso_desc.SampleDesc.Quality = 0;
for (i32 i = 0; i < (i32)countof(desc.render_target_formats); ++i)
{
StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc.render_target_formats));
DXGI_FORMAT format = GPU_D12_DxgiFormatFromGpuFormat(desc.render_target_formats[i]);
if (format != DXGI_FORMAT_UNKNOWN)
{
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
}
else
{
break;
}
}
hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
if (FAILED(hr))
{
error_str = Lit("Failed to create pipeline state object");
ok = 0;
}
}
else if (ok)
{
String cs = DataFromResource(desc.cs.resource);
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = ZI;
pso_desc.pRootSignature = g->bindless_rootsig;
pso_desc.CS.pShaderBytecode = cs.text;
pso_desc.CS.BytecodeLength = cs.len;
hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
if (FAILED(hr))
{
error_str = Lit("Failed to create pipeline state object");
ok = 0;
}
}
pipeline->pso = pso;
pipeline->error = error_str;
pipeline->ok = 1;
}
GPU_D12_Pipeline *GPU_D12_PipelineFromDesc(GPU_D12_PipelineDesc desc)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
u64 hash = RandU64FromSeed(HashFnv64(Fnv64Basis, StringFromStruct(&desc)));
GPU_D12_Pipeline *pipeline = 0;
b32 is_pipeline_new = 0;
GPU_D12_PipelineBin *bin = &g->pipeline_bins[hash % countof(g->pipeline_bins)];
{
{
Lock lock = LockS(&bin->mutex);
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
{
if (pipeline->hash == hash) break;
}
Unlock(&lock);
}
if (!pipeline)
{
Lock lock = LockE(&bin->mutex);
for (pipeline = bin->first; pipeline; pipeline = pipeline->next_in_bin)
{
if (pipeline->hash == hash) break;
}
if (!pipeline)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
pipeline = PushStruct(perm, GPU_D12_Pipeline);
pipeline->desc = desc;
pipeline->hash = hash;
is_pipeline_new = 1;
PushAlign(perm, CachelineSize);
StackPushN(bin->first, pipeline, next_in_bin);
}
Unlock(&lock);
}
}
if (is_pipeline_new)
{
RunJob(GPU_D12_LoadPipeline, .fence = &pipeline->ready_fence, .sig.pipeline = pipeline);
}
YieldOnFence(&pipeline->ready_fence, 1);
return pipeline;
}
////////////////////////////////
//~ Queue operations
GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
return g->queues[kind];
}
////////////////////////////////
//~ Descriptor operations
GPU_D12_Descriptor *GPU_D12_AcquireDescriptor(GPU_D12_DescriptorHeap *heap)
{
GPU_D12_Descriptor *d = 0;
u32 index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
{
Lock lock = LockE(&heap->mutex);
if (heap->first_free)
{
d = heap->first_free;
heap->first_free = d->next_free;
handle = d->handle;
index = d->index;
}
else
{
if (heap->allocated_count >= heap->max_count)
{
Panic(Lit("Max descriptors reached in heap"));
}
d = PushStructNoZero(heap->arena, GPU_D12_Descriptor);
index = heap->allocated_count++;
handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
}
Unlock(&lock);
}
ZeroStruct(d);
d->heap = heap;
d->handle = handle;
d->index = index;
return d;
}
void GPU_D12_ReleaseDescriptor(GPU_D12_Descriptor *descriptor)
{
GPU_D12_DescriptorHeap *heap = descriptor->heap;
Lock lock = LockE(&heap->mutex);
{
descriptor->next_free = heap->first_free;
heap->first_free = descriptor;
}
Unlock(&lock);
}
////////////////////////////////
//~ Raw command list
GPU_D12_RawCommandList *GPU_D12_BeginRawCommandList(GPU_QueueKind queue_kind)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
/* Pull first completed command list from queue if ready */
GPU_D12_RawCommandList *cl = ZI;
{
Lock lock = LockE(&queue->submit_mutex);
{
u64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
cl = queue->first_submitted_cl;
if (cl && cl->submit_fence_target <= completed)
{
QueuePop(queue->first_submitted_cl, queue->last_submitted_cl);
}
else
{
cl = 0;
}
}
Unlock(&lock);
}
/* Allocate new command list if none are available */
if (!cl)
{
Arena *perm = PermArena();
{
PushAlign(perm, CachelineSize);
cl = PushStruct(perm, GPU_D12_RawCommandList);
PushAlign(perm, CachelineSize);
}
cl->queue = queue;
HRESULT hr = ID3D12Device_CreateCommandAllocator(g->device, queue->desc.d3d_type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
if (FAILED(hr))
{
Panic(Lit("Failed to create command allocator"));
}
hr = ID3D12Device_CreateCommandList(g->device, 0, queue->desc.d3d_type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
if (FAILED(hr))
{
Panic(Lit("Failed to create command list"));
}
hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr))
{
Panic(Lit("Failed to close command list during initialization"));
}
}
/* Reset command list */
{
HRESULT hr = ID3D12CommandAllocator_Reset(cl->ca);
if (FAILED(hr))
{
Panic(Lit("Failed to reset command allocator"));
}
hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
if (FAILED(hr))
{
Panic(Lit("Failed to reset command list"));
}
}
return cl;
}
u64 GPU_D12_EndRawCommandList(GPU_D12_RawCommandList *cl)
{
GPU_D12_Queue *queue = cl->queue;
/* Close */
{
__profn("Close DX12 command list");
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to close command list before execution"));
}
}
/* Submit */
u64 target = 0;
{
__profn("Execute");
Lock lock = LockE(&queue->submit_mutex);
{
target = ++queue->submit_fence_target;
cl->submit_fence_target = target;
/* Execute */
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->cl);
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->submit_fence, target);
/* Append */
QueuePush(queue->first_submitted_cl, queue->last_submitted_cl, cl);
}
Unlock(&lock);
}
return target;
}
////////////////////////////////
//~ Swapchain helpers
void GPU_D12_InitSwapchainResources(GPU_D12_Swapchain *swapchain)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
{
ID3D12Resource *resource = 0;
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to get swapchain buffer"));
}
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
ZeroStruct(sb);
sb->swapchain = swapchain;
sb->d3d_resource = resource;
sb->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap);
sb->state = D3D12_RESOURCE_STATE_COMMON;
ID3D12Device_CreateRenderTargetView(g->device, sb->d3d_resource, 0, sb->rtv_descriptor->handle);
}
}
GPU_D12_SwapchainBuffer *GPU_D12_UpdateSwapchain(GPU_D12_Swapchain *swapchain, Vec2I32 resolution)
{
__prof;
GPU_D12_SharedState *g = &GPU_D12_shared_state;
resolution.x = MaxI32(resolution.x, 1);
resolution.y = MaxI32(resolution.y, 1);
b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
if (should_rebuild)
{
HRESULT hr = 0;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
/* Lock direct queue submissions (in case any write to backbuffer) */
/* TODO: Less overkill approach - Only flush GPU_D12_BlitToSwapchain since we know it's the only operation targeting backbuffer */
Lock lock = LockE(&queue->submit_mutex);
//DEBUGBREAKABLE;
//Lock lock = LockE(&g->global_command_list_record_mutex);
{
/* Flush direct queue */
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
{
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(queue->submit_fence, queue->submit_fence_target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
/* Release buffers */
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
{
GPU_D12_SwapchainBuffer *sb = &swapchain->buffers[i];
GPU_D12_ReleaseDescriptor(sb->rtv_descriptor);
ID3D12Resource_Release(sb->d3d_resource);
}
/* Resize buffers */
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, GPU_D12_SwapchainFlags);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to resize swapchain"));
}
}
Unlock(&lock);
GPU_D12_InitSwapchainResources(swapchain);
swapchain->resolution = resolution;
}
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
return &swapchain->buffers[backbuffer_index];
}
i64 GPU_D12_BlitToSwapchain(GPU_D12_SwapchainBuffer *dst, GPU_D12_Resource *texture, Vec2I32 dst_pos)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Swapchain *swapchain = dst->swapchain;
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(GPU_QueueKind_Direct);
ID3D12GraphicsCommandList *rcl = dx12_cl->cl;
D3D12_RESOURCE_STATES old_texture_state = texture->state;
{
u32 barriers_count = 0;
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
/* Transition backbuffer to RENDER_TARGET */
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Transition.pResource = dst->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
}
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
}
/* Clear */
{
f32 clear_color[4] = ZI;
ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, dst->rtv_descriptor->handle, clear_color, 0, 0);
}
{
u32 barriers_count = 0;
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
/* Transition backbuffer to COPY_DEST */
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Transition.pResource = dst->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
}
/* Transition texture to COPY_SRC */
if (texture->state != D3D12_RESOURCE_STATE_COPY_SOURCE)
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Transition.pResource = texture->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = texture->state;
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
texture->state = rb->Transition.StateAfter;
}
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
}
/* Copy */
{
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
dst_loc.pResource = dst->d3d_resource;
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
dst_loc.SubresourceIndex = 0;
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
src_loc.pResource = texture->d3d_resource;
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_loc.SubresourceIndex = 0;
Vec2I32 dst_size = swapchain->resolution;
Vec2I32 src_size = VEC2I32(texture->desc.texture.size.x, texture->desc.texture.size.y);
i32 dst_left = dst_pos.x;
i32 dst_top = dst_pos.y;
i32 src_left = 0;
i32 src_top = 0;
i32 src_right = src_size.x;
i32 src_bottom = src_size.y;
/* Clamp copy src & dst */
if (dst_left < 0)
{
src_left -= dst_left;
dst_left = 0;
}
if (dst_top < 0)
{
src_top -= dst_top;
dst_top = 0;
}
if (dst_left + (src_left + src_right) > dst_size.x)
{
src_right -= (dst_left + (src_left + src_right)) - dst_size.x;
}
if (dst_top + (src_top + src_bottom) > dst_size.y)
{
src_bottom -= (dst_top + (src_top + src_bottom)) - dst_size.y;
}
if (src_left < src_right && src_bottom > src_top)
{
D3D12_BOX src_box = ZI;
src_box.left = src_left;
src_box.top = src_top;
src_box.right = src_right;
src_box.bottom = src_bottom;
src_box.back = 1;
ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, dst_left, dst_top, 0, &src_loc, &src_box);
}
}
{
u32 barriers_count = 0;
D3D12_RESOURCE_BARRIER rbs[2] = ZI;
/* Transition backbuffer to PRESENT */
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Transition.pResource = dst->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
rb->Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
}
/* Transition texture to original state */
if (texture->state != old_texture_state)
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Transition.pResource = texture->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = texture->state;
rb->Transition.StateAfter = old_texture_state;
texture->state = rb->Transition.StateAfter;
}
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
}
i64 fence_target = GPU_D12_EndRawCommandList(dx12_cl);
return fence_target;
}
////////////////////////////////
//~ Queue sync job
JobDef(GPU_D12_StartQueueSync, _, __)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
HANDLE queue_fences_events[GPU_NumQueues] = ZI;
i64 queue_fences_seen[GPU_NumQueues] = ZI;
for (i32 i = 0; i < countof(queue_fences_events); ++i)
{
queue_fences_events[i] = CreateEvent(0, 0, 1, 0);
queue_fences_seen[i] = -1;
}
for (;;)
{
WaitForMultipleObjects(countof(queue_fences_events), queue_fences_events, 0, INFINITE);
for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind)
{
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
i64 last_seen = queue_fences_seen[queue_kind];
i64 completed = ID3D12Fence_GetCompletedValue(queue->submit_fence);
if (completed > last_seen)
{
SetFence(&queue->sync_fence, completed);
queue_fences_seen[queue_kind] = completed;
ID3D12Fence_SetEventOnCompletion(queue->submit_fence, completed + 1, queue_fences_events[queue_kind]);
}
}
}
}
////////////////////////////////
//~ @hookdef Startup hook
void GPU_Startup(void)
{
GPU_D12_Startup();
}
////////////////////////////////
//~ @hookdecl Fence hooks
Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
{
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
return &queue->sync_fence;
}
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
{
GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
ID3D12Fence *b_fence = queue_b->submit_fence;
ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
}
////////////////////////////////
//~ @hookdef Rasterizer helper hooks
GPU_Viewport GPU_ViewportFromRect(Rect rect)
{
GPU_Viewport viewport = ZI;
viewport.top_left_x = rect.x;
viewport.top_left_y = rect.y;
viewport.width = rect.width;
viewport.height = rect.height;
viewport.min_depth = 0.0f;
viewport.max_depth = 1.0f;
return viewport;
}
GPU_Scissor GPU_ScissorFromRect(Rect rect)
{
GPU_Scissor scissor = ZI;
scissor.left = rect.x;
scissor.top = rect.y;
scissor.right = rect.x + rect.width;
scissor.bottom = rect.y + rect.height;
return scissor;
}
////////////////////////////////
//~ @hookdef Resource hooks
GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Resource *r = 0;
if (desc.kind == GPU_ResourceKind_Unknown)
{
Panic(Lit("Unknown gpu resource type"));
}
u64 buffer_size = 0;
if (desc.kind == GPU_ResourceKind_Buffer)
{
desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
}
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
/* Grab reusable */
{
u64 bin_index = reuse_hash % countof(g->resource_reuse_bins);
GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index];
{
Lock lock = LockE(&bin->mutex);
{
GPU_D12_ResourceReuseList *list = bin->first;
for (; list; list = list->next)
{
if (list->hash == reuse_hash) break;
}
if (list)
{
r = list->first;
list->first = r->next_free;
if (!list->first)
{
DllRemove(bin->first, bin->last, list);
StackPush(bin->first_free, list);
list->prev = 0;
}
r->next_free = 0;
}
}
Unlock(&lock);
}
}
/* Grab from free list */
if (!r)
{
{
Lock lock = LockE(&g->free_resources_mutex);
r = g->first_free_resource;
if (r)
{
g->first_free_resource = r->next_free;
}
Unlock(&lock);
}
if (r)
{
ZeroStruct(r);
}
}
/* Push new */
if (!r)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
r = PushStruct(perm, GPU_D12_Resource);
PushAlign(perm, CachelineSize);
}
/* Create d3d resource */
if (!r->d3d_resource)
{
switch (desc.kind)
{
case GPU_ResourceKind_Sampler: break;
/* Buffer */
case GPU_ResourceKind_Buffer:
{
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_HEAP_PROPERTIES heap_props = {
.Type = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_HEAP_TYPE_UPLOAD
: desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK
: D3D12_HEAP_TYPE_DEFAULT
};
Assert(!(desc.flags & GPU_ResourceFlag_Renderable));
D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Alignment = 0;
d3d_desc.Width = buffer_size;
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable);
r->state = desc.buffer.heap_kind == GPU_HeapKind_Upload ? D3D12_RESOURCE_STATE_GENERIC_READ : D3D12_RESOURCE_STATE_COPY_DEST;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, 0, &IID_ID3D12Resource, (void **)&r->d3d_resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to create buffer resource"));
}
r->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->d3d_resource);
} break;
/* Texture */
case GPU_ResourceKind_Texture1D:
case GPU_ResourceKind_Texture2D:
case GPU_ResourceKind_Texture3D:
{
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = desc.kind == GPU_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D
: desc.kind == GPU_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D
: D3D12_RESOURCE_DIMENSION_TEXTURE3D;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.texture.format);
d3d_desc.Alignment = 0;
d3d_desc.Width = desc.texture.size.x;
d3d_desc.Height = desc.texture.size.y;
d3d_desc.DepthOrArraySize = desc.texture.size.z;
d3d_desc.MipLevels = (desc.flags & GPU_ResourceFlag_MaxMipLevels) ? 0 : MaxI32(desc.texture.mip_levels, 1);
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * !!(desc.flags & GPU_ResourceFlag_Writable);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * !!(desc.flags & GPU_ResourceFlag_Renderable);
r->state = D3D12_RESOURCE_STATE_COPY_DEST;
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
clear_value.Color[0] = desc.clear_color.x;
clear_value.Color[1] = desc.clear_color.y;
clear_value.Color[2] = desc.clear_color.z;
clear_value.Color[3] = desc.clear_color.w;
D3D12_CLEAR_VALUE *clear_value_ptr = d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &d3d_desc, r->state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->d3d_resource);
if (FAILED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to create buffer resource"));
}
} break;
}
}
/* Create texture srv descriptor */
if (desc.kind == GPU_ResourceKind_Texture1D
|| desc.kind == GPU_ResourceKind_Texture2D
|| desc.kind == GPU_ResourceKind_Texture3D)
{
if (!r->srv_descriptor)
{
r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
}
ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, 0, r->srv_descriptor->handle);
}
/* Create buffer srv descriptor */
if (desc.kind == GPU_ResourceKind_Buffer
&& desc.buffer.heap_kind != GPU_HeapKind_Download
&& desc.buffer.count > 0)
{
if (!r->srv_descriptor)
{
r->srv_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
}
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI;
srv_desc.Format = DXGI_FORMAT_UNKNOWN;
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
srv_desc.Buffer.FirstElement = 0;
srv_desc.Buffer.NumElements = desc.buffer.count;
srv_desc.Buffer.StructureByteStride = desc.buffer.stride;
srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
ID3D12Device_CreateShaderResourceView(g->device, r->d3d_resource, &srv_desc, r->srv_descriptor->handle);
}
/* Create uav descriptor */
if (desc.flags & GPU_ResourceFlag_Writable)
{
if (!r->uav_descriptor)
{
r->uav_descriptor = GPU_D12_AcquireDescriptor(g->cbv_srv_uav_heap);
}
ID3D12Device_CreateUnorderedAccessView(g->device, r->d3d_resource, 0, 0, r->uav_descriptor->handle);
}
/* Create rtv descriptor */
if (desc.flags & GPU_ResourceFlag_Renderable)
{
if (!r->rtv_descriptor)
{
r->rtv_descriptor = GPU_D12_AcquireDescriptor(g->rtv_heap);
}
ID3D12Device_CreateRenderTargetView(g->device, r->d3d_resource, 0, r->rtv_descriptor->handle);
}
/* Create sampler descriptor */
if (desc.kind == GPU_ResourceKind_Sampler)
{
if (!r->sampler_descriptor)
{
r->sampler_descriptor = GPU_D12_AcquireDescriptor(g->sampler_heap);
}
D3D12_SAMPLER_DESC d3d_desc = ZI;
d3d_desc.Filter = (D3D12_FILTER)desc.sampler.filter;
d3d_desc.AddressU = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.x;
d3d_desc.AddressV = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.y;
d3d_desc.AddressW = (D3D12_TEXTURE_ADDRESS_MODE)desc.sampler.z;
d3d_desc.MipLODBias = desc.sampler.mip_lod_bias;
d3d_desc.MaxAnisotropy = MaxU32(desc.sampler.max_anisotropy, 1);
d3d_desc.ComparisonFunc = (D3D12_COMPARISON_FUNC)desc.sampler.comparison;
d3d_desc.BorderColor[0] = desc.sampler.border_color.x;
d3d_desc.BorderColor[1] = desc.sampler.border_color.y;
d3d_desc.BorderColor[2] = desc.sampler.border_color.z;
d3d_desc.BorderColor[3] = desc.sampler.border_color.w;
d3d_desc.MinLOD = desc.sampler.min_lod;
d3d_desc.MaxLOD = desc.sampler.max_lod;
/* Defaults */
if (d3d_desc.AddressU == 0) d3d_desc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
if (d3d_desc.AddressV == 0) d3d_desc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
if (d3d_desc.AddressW == 0) d3d_desc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;;
if (d3d_desc.MaxLOD >= F32Infinity)
{
d3d_desc.MaxLOD = D3D12_FLOAT32_MAX;
}
ID3D12Device_CreateSampler(g->device, &d3d_desc, r->sampler_descriptor->handle);
}
r->desc = desc;
r->buffer_size = buffer_size;
return (GPU_Resource *)r;
}
void GPU_ReleaseResource(GPU_Resource *gpu_resource, GPU_ReleaseFlag flags)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
if (r->srv_descriptor)
{
GPU_D12_ReleaseDescriptor(r->srv_descriptor);
r->srv_descriptor = 0;
}
if (r->uav_descriptor)
{
GPU_D12_ReleaseDescriptor(r->uav_descriptor);
r->uav_descriptor = 0;
}
if (r->rtv_descriptor)
{
GPU_D12_ReleaseDescriptor(r->rtv_descriptor);
r->rtv_descriptor = 0;
}
if (r->sampler_descriptor)
{
GPU_D12_ReleaseDescriptor(r->sampler_descriptor);
r->sampler_descriptor = 0;
}
if (flags & GPU_ReleaseFlag_Reuse)
{
GPU_ResourceDesc desc = r->desc;
u64 buffer_size = r->buffer_size;
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
u64 bin_index = reuse_hash % countof(g->resource_reuse_bins);
GPU_D12_ResourceReuseListBin *bin = &g->resource_reuse_bins[bin_index];
{
Lock lock = LockE(&bin->mutex);
{
GPU_D12_ResourceReuseList *list = bin->first;
for (; list; list = list->next)
{
if (list->hash == reuse_hash) break;
}
if (!list)
{
list = bin->first_free;
if (list)
{
bin->first_free = list->next;
}
else
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
list = PushStruct(perm, GPU_D12_ResourceReuseList);
PushAlign(perm, CachelineSize);
}
list->hash = reuse_hash;
DllPushBack(bin->first, bin->last, list);
}
StackPushN(list->first, r, next_free);
}
Unlock(&lock);
}
}
else
{
switch (r->desc.kind)
{
case GPU_ResourceKind_Buffer:
case GPU_ResourceKind_Texture1D:
case GPU_ResourceKind_Texture2D:
case GPU_ResourceKind_Texture3D:
{
ID3D12Resource_Release(r->d3d_resource);
}
}
Lock lock = LockE(&g->free_resources_mutex);
r->next_free = g->first_free_resource;
g->first_free_resource = r;
Unlock(&lock);
}
}
u32 GPU_GetReadableId(GPU_Resource *gpu_resource)
{
u32 result = U32Max;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
if (r && r->srv_descriptor)
{
result = r->srv_descriptor->index;
}
return result;
}
u32 GPU_GetWritableId(GPU_Resource *gpu_resource)
{
u32 result = U32Max;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
if (r && r->uav_descriptor)
{
result = r->uav_descriptor->index;
}
return result;
}
u32 GPU_GetSamplerId(GPU_Resource *gpu_resource)
{
u32 result = U32Max;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_resource;
if (r && r->sampler_descriptor)
{
result = r->sampler_descriptor->index;
}
return result;
}
Vec2I32 GPU_GetTextureSize2D(GPU_Resource *gpu_resource)
{
GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource;
return VEC2I32(resource->desc.texture.size.x, resource->desc.texture.size.y);
}
Vec3I32 GPU_GetTextureSize3D(GPU_Resource *gpu_resource)
{
GPU_D12_Resource *resource = (GPU_D12_Resource *)gpu_resource;
return resource->desc.texture.size;
}
u64 GPU_GetFootprintSize(GPU_Resource *gpu_resource)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
D3D12_RESOURCE_DESC desc = ZI;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
ID3D12Resource_GetDesc(((GPU_D12_Resource *)gpu_resource)->d3d_resource, &desc);
u64 footprint_size = 0;
u64 upload_row_size = 0;
u32 upload_num_rows = 0;
ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &footprint_size);
return footprint_size;
}
////////////////////////////////
//~ @hookdef Command list hooks
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind)
{
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
Arena *perm = PermArena();
GPU_D12_CommandList *cl = f->first_free_command_list;
if (cl)
{
StackPop(f->first_free_command_list);
ZeroStruct(cl);
}
else
{
cl = PushStruct(perm, GPU_D12_CommandList);
}
cl->queue_kind = queue_kind;
return (GPU_CommandList *)cl;
}
i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_QueueKind queue_kind = cl->queue_kind;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
TempArena scratch = BeginScratchNoConflict();
GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI;
GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI;
/* Begin dx12 command list */
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind);
ID3D12GraphicsCommandList *rcl = dx12_cl->cl;
b32 graphics_rootsig_set = 0;
b32 compute_rootsig_set = 0;
b32 descriptor_heaps_set = 0;
GPU_D12_Pipeline *bound_pipeline = 0;
/* Process gpu commands into dx12 commands */
{
GPU_D12_Command *cmd = cl->first;
while (cmd)
{
switch (cmd->kind)
{
default: break;
//- Resource barrier
case GPU_D12_CommandKind_TransitionToSrv:
case GPU_D12_CommandKind_TransitionToUav:
case GPU_D12_CommandKind_TransitionToRtv:
case GPU_D12_CommandKind_TransitionToCopySrc:
case GPU_D12_CommandKind_TransitionToCopyDst:
case GPU_D12_CommandKind_FlushUav:
{
u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1);
/* Build barriers batch list */
Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; };
u32 max_barriers_count = 0;
TmpBarrier *first_barrier = 0;
TmpBarrier *last_barrier = 0;
while (cmd && (cmd->kind == GPU_D12_CommandKind_TransitionToSrv
|| cmd->kind == GPU_D12_CommandKind_TransitionToUav
|| cmd->kind == GPU_D12_CommandKind_TransitionToRtv
|| cmd->kind == GPU_D12_CommandKind_TransitionToCopySrc
|| cmd->kind == GPU_D12_CommandKind_TransitionToCopyDst
|| cmd->kind == GPU_D12_CommandKind_FlushUav))
{
D3D12_RESOURCE_BARRIER_TYPE type = ZI;
D3D12_RESOURCE_STATES state_after = ZI;
GPU_D12_Resource *resource = cmd->barrier.resource;
switch (cmd->kind)
{
case GPU_D12_CommandKind_TransitionToSrv:
{
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
} break;
case GPU_D12_CommandKind_TransitionToUav:
{
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
} break;
case GPU_D12_CommandKind_TransitionToRtv:
{
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
state_after = D3D12_RESOURCE_STATE_RENDER_TARGET;
i32 slot = cmd->barrier.rt_slot;
if (slot >= 0 && slot < countof(slotted_render_targets))
{
slotted_render_targets[slot] = resource;
}
} break;
case GPU_D12_CommandKind_TransitionToCopySrc:
{
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
state_after = D3D12_RESOURCE_STATE_COPY_SOURCE;
} break;
case GPU_D12_CommandKind_TransitionToCopyDst:
{
type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
state_after = D3D12_RESOURCE_STATE_COPY_DEST;
} break;
case GPU_D12_CommandKind_FlushUav:
{
type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
} break;
}
b32 skip = 0;
if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen)
{
/* Skip UAV transitions on resources that already have transition in the batch */
skip = 1;
}
if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->barrier_state_after == state_after)
{
/* Skip transitions into existing state */
skip = 1;
}
if (!skip)
{
resource->barrier_type = type;
resource->barrier_state_after = state_after;
if (resource->barrier_gen != barrier_gen)
{
TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier);
resource->barrier_gen = barrier_gen;
b->r = resource;
QueuePush(first_barrier, last_barrier, b);
++max_barriers_count;
}
}
cmd = cmd->next;
}
/* Submit batched barriers */
/* FIXME: Transitions from UAV -> UAV should insert UAV barrier */
u32 barriers_count = 0;
D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count);
for (TmpBarrier *b = first_barrier; b; b = b->next)
{
GPU_D12_Resource *resource = b->r;
D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type;
D3D12_RESOURCE_STATES state_before = resource->state;
D3D12_RESOURCE_STATES state_after = resource->barrier_state_after;
if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after))
{
D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
rb->Type = resource->barrier_type;
if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION)
{
rb->Transition.pResource = resource->d3d_resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = state_before;
rb->Transition.StateAfter = state_after;
resource->state = state_after;
}
else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV)
{
rb->UAV.pResource = resource->d3d_resource;
}
}
}
if (barriers_count > 0)
{
ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
}
} break;
//- Clear rtv
case GPU_D12_CommandKind_ClearRtv:
{
GPU_D12_Resource *resource = cmd->clear.resource;
Assert(resource->state == D3D12_RESOURCE_STATE_RENDER_TARGET);
f32 clear_color[4] = ZI;
clear_color[0] = resource->desc.clear_color.x;
clear_color[1] = resource->desc.clear_color.y;
clear_color[2] = resource->desc.clear_color.z;
clear_color[3] = resource->desc.clear_color.w;
ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, resource->rtv_descriptor->handle, clear_color, 0, 0);
cmd = cmd->next;
} break;
//- Copy resource
case GPU_D12_CommandKind_Copy:
{
GPU_D12_Resource *dst = cmd->copy.dst;
GPU_D12_Resource *src = cmd->copy.src;
D3D12_RESOURCE_DESC dst_desc = ZI;
D3D12_RESOURCE_DESC src_desc = ZI;
ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc);
ID3D12Resource_GetDesc(src->d3d_resource, &src_desc);
if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy buffer -> buffer */
u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride;
u64 src_len = src->desc.buffer.count * src->desc.buffer.stride;
u64 cpy_len = MinU64(dst_len, src_len);
if (cpy_len > 0)
{
ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len);
}
}
else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy buffer -> texture */
D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI;
ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0);
D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI;
dst_loc.pResource = dst->d3d_resource;
dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
dst_loc.SubresourceIndex = 0;
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
src_loc.pResource = src->d3d_resource;
src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
src_loc.PlacedFootprint = dst_placed_footprint;
ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0);
}
else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy texture -> buffer */
/* TODO */
Assert(0);
}
else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER)
{ /* Copy texture -> texture */
/* TODO */
Assert(0);
}
cmd = cmd->next;
} break;
//- Dispatch Vs/Ps shader
case GPU_D12_CommandKind_Rasterize:
{
GPU_D12_Pipeline *pipeline = 0;
{
GPU_D12_PipelineDesc pipeline_desc = ZI;
pipeline_desc.vs = cmd->rasterize.vs;
pipeline_desc.ps = cmd->rasterize.ps;
{
pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED;
switch (cmd->rasterize.mode)
{
default: Assert(0); break;
case GPU_RasterizeMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break;
case GPU_RasterizeMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
case GPU_RasterizeMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break;
case GPU_RasterizeMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
case GPU_RasterizeMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break;
}
}
for (u32 i = 0; i < cmd->rasterize.rts_count; ++i)
{
GPU_D12_Resource *r = slotted_render_targets[i];
if (r)
{
pipeline_desc.render_target_formats[i] = r->desc.texture.format;
}
else
{
Assert(0); /* No bound render target in slot */
pipeline_desc.render_target_formats[i] = GPU_Format_Unknown;
}
}
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
}
if (pipeline
&& cmd->rasterize.index_buffer->desc.buffer.count > 0)
{
/* Set descriptor heaps */
if (!descriptor_heaps_set)
{
ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps);
descriptor_heaps_set = 1;
}
/* Bind rootsig */
if (!graphics_rootsig_set)
{
ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig);
graphics_rootsig_set = 1;
}
/* Bind pipeline */
if (pipeline != bound_pipeline)
{
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso);
bound_pipeline = pipeline;
}
/* Fill signature */
/* TODO: Only upload dirty */
{
u32 sig_size = cmd->rasterize.sig_size;
void *sig = cmd->rasterize.sig;
u32 num32bit = sig_size / 4;
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0);
}
/* Set rasterizer state */
/* TODO: Only set dirty */
{
D3D12_RECT scissor = ZI;
scissor.left = cmd->rasterize.scissor.left;
scissor.top = cmd->rasterize.scissor.top;
scissor.right = cmd->rasterize.scissor.right;
scissor.bottom = cmd->rasterize.scissor.bottom;
D3D12_VIEWPORT viewport = ZI;
viewport.TopLeftX = cmd->rasterize.viewport.top_left_x;
viewport.TopLeftY = cmd->rasterize.viewport.top_left_y;
viewport.Width = cmd->rasterize.viewport.width;
viewport.Height = cmd->rasterize.viewport.height;
viewport.MinDepth = cmd->rasterize.viewport.min_depth;
viewport.MaxDepth = cmd->rasterize.viewport.max_depth;
ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor);
ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport);
}
/* Set topology */
/* TODO: Only set dirty */
{
D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
switch (cmd->rasterize.mode)
{
default: Assert(0); break;
case GPU_RasterizeMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break;
case GPU_RasterizeMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break;
case GPU_RasterizeMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break;
case GPU_RasterizeMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
case GPU_RasterizeMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break;
}
ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology);
}
/* Set index buffer */
/* TODO: Only set dirty */
u32 indices_count = 0;
{
GPU_D12_Resource *indices = cmd->rasterize.index_buffer;
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = indices->buffer_gpu_address;
if (indices->desc.buffer.stride == 2)
{
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R16_UINT);
}
else
{
Assert(indices->desc.buffer.stride == 4);
ibv.Format = GPU_D12_DxgiFormatFromGpuFormat(DXGI_FORMAT_R32_UINT);
}
ibv.SizeInBytes = indices->desc.buffer.count * indices->desc.buffer.stride;
indices_count = indices->desc.buffer.count;
ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv);
}
/* Bind render targets */
{
b32 om_dirty = 0;
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_render_targets)] = ZI;
for (u32 i = 0; i < cmd->rasterize.rts_count; ++i)
{
GPU_D12_Resource *target = slotted_render_targets[i];
if (bound_render_targets[i] != target)
{
bound_render_targets[i] = target;
om_dirty = 1;
}
rtvs[i] = target->rtv_descriptor->handle;
}
if (om_dirty)
{
ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, cmd->rasterize.rts_count, rtvs, 0, 0);
}
}
/* Dispatch */
ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0);
}
cmd = cmd->next;
} break;
//- Dispatch compute shader
case GPU_D12_CommandKind_Compute:
{
GPU_D12_Pipeline *pipeline = 0;
{
GPU_D12_PipelineDesc pipeline_desc = ZI;
pipeline_desc.cs = cmd->compute.cs;
pipeline = GPU_D12_PipelineFromDesc(pipeline_desc);
}
if (pipeline)
{
/* Set descriptor heaps */
if (!descriptor_heaps_set)
{
ID3D12DescriptorHeap *heaps[] = { g->cbv_srv_uav_heap->d3d_heap, g->sampler_heap->d3d_heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps);
descriptor_heaps_set = 1;
}
/* Bind rootsig */
if (!compute_rootsig_set)
{
ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig);
compute_rootsig_set = 1;
}
/* Bind pipeline */
if (pipeline != bound_pipeline)
{
ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso);
bound_pipeline = pipeline;
}
/* Fill signature */
/* TODO: Only upload dirty */
{
u32 sig_size = cmd->compute.sig_size;
void *sig = cmd->compute.sig;
u32 num32bit = sig_size / 4;
ID3D12GraphicsCommandList_SetComputeRoot32BitConstants(rcl, 0, num32bit, sig, 0);
}
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.num_threads_x, cmd->compute.num_threads_y, cmd->compute.num_threads_z);
}
cmd = cmd->next;
} break;
}
}
}
/* End dx12 command list */
u64 fence_target = GPU_D12_EndRawCommandList(dx12_cl);
/* Free commands */
if (cl->last)
{
cl->last->next = f->first_free_command;
f->first_free_command = cl->first;
}
/* Free command list */
StackPush(f->first_free_command_list, cl);
EndScratch(scratch);
return fence_target;
}
////////////////////////////////
//~ @hookdef Profiling helper hooks
void GPU_ProfN(GPU_CommandList *cl, String name)
{
/* TODO */
}
////////////////////////////////
//~ @hookdef Barrier hooks
void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToSrv;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToUav;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToRtv;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
cmd->barrier.rt_slot = slot;
}
void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToCopySrc;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToCopyDst;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource)
{
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_FlushUav;
cmd->barrier.resource = (GPU_D12_Resource *)resource;
}
////////////////////////////////
//~ @hookdef Dispatch hooks
void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_ClearRtv;
cmd->clear.resource = (GPU_D12_Resource *)resource;
}
void GPU_Rasterize_(GPU_CommandList *gpu_cl,
u32 sig_size,
void *sig,
VertexShader vs,
PixelShader ps,
u32 rts_count,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count,
GPU_Resource *index_buffer,
GPU_RasterizeMode mode)
{
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Rasterize;
Assert(sig_size <= sizeof(cmd->rasterize.sig));
cmd->rasterize.sig_size = MinU32(sizeof(cmd->rasterize.sig), sig_size);
CopyBytes(cmd->rasterize.sig, sig, cmd->rasterize.sig_size);
cmd->rasterize.vs = vs;
cmd->rasterize.ps = ps;
cmd->rasterize.rts_count = rts_count;
Assert(rts_count < GPU_MaxRenderTargets);
cmd->rasterize.viewport = viewport;
cmd->rasterize.scissor = scissor;
cmd->rasterize.instances_count = instances_count;
cmd->rasterize.index_buffer = (GPU_D12_Resource *)index_buffer;
cmd->rasterize.mode = mode;
}
void GPU_Compute_(GPU_CommandList *gpu_cl,
u32 sig_size,
void *sig,
ComputeShader cs,
u32 num_threads_x,
u32 num_threads_y,
u32 num_threads_z)
{
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Compute;
Assert(sig_size <= sizeof(cmd->compute.sig));
cmd->compute.sig_size = MinU32(sizeof(cmd->compute.sig), sig_size);
CopyBytes(cmd->compute.sig, sig, cmd->compute.sig_size);
cmd->compute.cs = cs;
cmd->compute.num_threads_x = num_threads_x;
cmd->compute.num_threads_y = num_threads_y;
cmd->compute.num_threads_z = num_threads_z;
}
////////////////////////////////
//~ @hookdef Copy hooks
void GPU_CopyResource(GPU_CommandList *gpu_cl, GPU_Resource *gpu_dst, GPU_Resource *gpu_src)
{
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Resource *dst = (GPU_D12_Resource *)gpu_dst;
GPU_D12_Resource *src = (GPU_D12_Resource *)gpu_src;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CommandKind_Copy;
cmd->copy.dst = dst;
cmd->copy.src = src;
}
////////////////////////////////
//~ @hookdef Map hooks
GPU_Mapped GPU_Map(GPU_Resource *gpu_r)
{
GPU_Mapped result = ZI;
result.resource = gpu_r;
GPU_D12_Resource *r = (GPU_D12_Resource *)gpu_r;
D3D12_RANGE read_range = ZI;
HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
if (FAILED(hr) || !result.mem)
{
/* TODO: Don't panic */
Panic(Lit("Failed to map command buffer resource"));
}
return result;
}
void GPU_Unmap(GPU_Mapped m)
{
GPU_D12_Resource *r = (GPU_D12_Resource *)m.resource;
ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
}
void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
D3D12_RESOURCE_DESC desc = ZI;
ID3D12Resource_GetDesc(((GPU_D12_Resource *)footprint_reference)->d3d_resource, &desc);
u64 upload_size = 0;
u64 upload_row_size = 0;
u32 upload_num_rows = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
{
D3D12_RANGE read_range = ZI;
u8 *dst_base = (u8 *)dst + placed_footprint.Offset;
u8 *src_base = src;
u32 z_size = upload_row_size * upload_num_rows;
b32 src_overflow = 0;
for (u32 z = 0; !src_overflow && z < desc.DepthOrArraySize; ++z)
{
u32 z_offset = z * z_size;
for (u32 y = 0; !src_overflow && y < upload_num_rows; ++y)
{
u8 *dst_row = dst_base + y * footprint.RowPitch + z_offset;
u8 *src_row = src_base + y * upload_row_size + z_offset;
CopyBytes(dst_row, src_row, upload_row_size);
}
}
}
}
////////////////////////////////
//~ @hookdef Memory info hooks
GPU_MemoryInfo GPU_QueryMemoryInfo(void)
{
/* TODO */
return (GPU_MemoryInfo) ZI;
}
////////////////////////////////
//~ @hookdef Swapchain hooks
GPU_Swapchain *GPU_AcquireSwapchain(P_Window *window, GPU_Format format, Vec2I32 size)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
HRESULT hr = 0;
HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(GPU_QueueKind_Direct);
GPU_D12_Swapchain *swapchain = 0;
{
Lock lock = LockE(&g->free_swapchains_mutex);
{
swapchain = g->first_free_swapchain;
if (swapchain)
{
g->first_free_swapchain = swapchain->next;
}
}
Unlock(&lock);
}
if (!swapchain)
{
Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
swapchain = PushStructNoZero(perm, GPU_D12_Swapchain);
PushAlign(perm, CachelineSize);
}
ZeroStruct(swapchain);
swapchain->format = format;
/* Create swapchain1 */
IDXGISwapChain1 *swapchain1 = 0;
{
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
desc.Format = GPU_D12_DxgiFormatFromGpuFormat(format);
desc.Width = size.x;
desc.Height = size.y;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = GPU_D12_SwapchainBufferCount;
desc.Scaling = DXGI_SCALING_NONE;
desc.Flags = GPU_D12_SwapchainFlags;
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)queue->d3d_queue, hwnd, &desc, 0, 0, &swapchain1);
if (FAILED(hr))
{
Panic(Lit("Failed to create IDXGISwapChain1"));
}
}
/* Upgrade to swapchain3 */
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
if (FAILED(hr))
{
Panic(Lit("Failed to create IDXGISwapChain3"));
}
/* Create waitable object */
#if GPU_D12_FrameLatency > 0
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, GPU_D12_FrameLatency);
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
Assert(swapchain->waitable);
#endif
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
IDXGISwapChain1_Release(swapchain1);
swapchain->window_hwnd = hwnd;
GPU_D12_InitSwapchainResources(swapchain);
return (GPU_Swapchain *)swapchain;
}
void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain)
{
/* TODO */
}
void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain)
{
/* TODO */
}
i64 GPU_PresentSwapchain(GPU_Swapchain *gpu_swapchain, GPU_Resource *gpu_texture, Vec2I32 backbuffer_size, Vec2I32 dst, i32 vsync)
{
GPU_D12_Swapchain *swapchain = (GPU_D12_Swapchain *)gpu_swapchain;
GPU_D12_Resource *texture = (GPU_D12_Resource *)gpu_texture;
GPU_D12_SwapchainBuffer *swapchain_buffer = GPU_D12_UpdateSwapchain(swapchain, backbuffer_size);
D3D12_RESOURCE_DESC src_desc = ZI;
D3D12_RESOURCE_DESC dst_desc = ZI;
ID3D12Resource_GetDesc(texture->d3d_resource, &src_desc);
ID3D12Resource_GetDesc(swapchain_buffer->d3d_resource, &dst_desc);
b32 is_blitable = src_desc.Dimension == dst_desc.Dimension
&& src_desc.SampleDesc.Count == dst_desc.SampleDesc.Count
&& src_desc.SampleDesc.Quality == dst_desc.SampleDesc.Quality;
Assert(is_blitable == 1); /* Texture resource must be similar enough to backbuffer resource to blit */
i64 fence_target = 0;
if (is_blitable)
{
/* Blit */
fence_target = GPU_D12_BlitToSwapchain(swapchain_buffer, texture, dst);
u32 present_flags = 0;
if (GPU_D12_TearingIsAllowed && vsync == 0)
{
present_flags |= DXGI_PRESENT_ALLOW_TEARING;
}
/* Present */
{
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
if (!SUCCEEDED(hr))
{
Assert(0);
}
}
}
return fence_target;
}