3444 lines
128 KiB
C
3444 lines
128 KiB
C
GPU_D12_SharedState GPU_D12_shared_state = ZI;
|
|
|
|
////////////////////////////////
|
|
//~ Windows libs
|
|
|
|
#pragma comment(lib, "d3d12")
|
|
#pragma comment(lib, "dxgi")
|
|
#pragma comment(lib, "dxguid")
|
|
#pragma comment(lib, "d3dcompiler")
|
|
|
|
#if ProfilingGpu
|
|
/* For RegOpenKeyEx */
|
|
# include <winreg.h>
|
|
# pragma comment(lib, "advapi32")
|
|
#endif
|
|
|
|
/* ========================== *
|
|
* Startup
|
|
* ========================== */
|
|
|
|
void GPU_Startup(void)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
if (Atomic32FetchTestSet(&g->initialized, 0, 1) != 0)
|
|
{
|
|
P_Panic(Lit("GP layer already initialized"));
|
|
}
|
|
|
|
/* Initialize command descriptor heaps pool */
|
|
g->command_descriptor_heaps_arena = AllocArena(Gibi(64));
|
|
|
|
/* Initialize command buffers pool */
|
|
g->command_buffers_arena = AllocArena(Gibi(64));
|
|
g->command_buffers_dict = InitDict(g->command_buffers_arena, 4096);
|
|
|
|
/* Initialize resources pool */
|
|
g->resources_arena = AllocArena(Gibi(64));
|
|
|
|
/* Initialize swapchains pool */
|
|
g->swapchains_arena = AllocArena(Gibi(64));
|
|
|
|
/* Initialize pipeline cache */
|
|
g->pipelines_arena = AllocArena(Gibi(64));
|
|
g->pipeline_descs = InitDict(g->pipelines_arena, 1024);
|
|
g->top_pipelines = InitDict(g->pipelines_arena, 1024);
|
|
g->top_successful_pipelines = InitDict(g->pipelines_arena, 1024);
|
|
|
|
/* Initialize fenced releases queue */
|
|
g->fenced_releases_arena = AllocArena(Gibi(64));
|
|
|
|
/* Initialize embedded shader archive */
|
|
String embedded_data = INC_GetDxcTar();
|
|
if (embedded_data.len <= 0)
|
|
{
|
|
P_Panic(Lit("No embedded shaders found"));
|
|
}
|
|
g->dxc_archive = TAR_ArchiveFromString(g->pipelines_arena, embedded_data, Lit(""));
|
|
|
|
/* Initialize dx12 */
|
|
/* TODO: Parallelize phases */
|
|
dx12_init_device();
|
|
dx12_init_objects();
|
|
dx12_init_pipelines();
|
|
dx12_init_noise();
|
|
|
|
/* Register callbacks */
|
|
#if RESOURCE_RELOADING
|
|
W_RegisterCallback(pipeline_watch_callback);
|
|
#endif
|
|
P_OnExit(gp_shutdown);
|
|
|
|
/* Start evictor job */
|
|
P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &g->evictor_job_counter);
|
|
}
|
|
|
|
P_ExitFuncDef(gp_shutdown)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
#if 0
|
|
/* Release objects to make live object reporting less noisy */
|
|
//IDXGISwapChain3_Release(g->swapchain);
|
|
for (u32 i = 0; i < countof(g->command_queues); ++i)
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
cmomand_queue_release(cq);
|
|
}
|
|
ID3D12Device_Release(g->device);
|
|
#else
|
|
(UNUSED)command_queue_release;
|
|
#endif
|
|
|
|
{
|
|
P_Lock lock = P_LockE(&g->evictor_wake_mutex);
|
|
g->evictor_shutdown = 1;
|
|
P_SignalCv(&g->evictor_wake_cv, I32Max);
|
|
P_Unlock(&lock);
|
|
}
|
|
P_WaitOnCounter(&g->evictor_job_counter);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Dx12 device initialization
|
|
* ========================== */
|
|
|
|
void dx12_init_error(String error)
|
|
{
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
String msg = StringFormat(scratch.arena, Lit("Failed to initialize DirectX 12.\n\n%F"), FmtString(error));
|
|
P_Panic(msg);
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
void dx12_init_device(void)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
HRESULT hr = 0;
|
|
|
|
/* Enable debug layer */
|
|
u32 dxgi_factory_flags = 0;
|
|
#if DX12_DEBUG
|
|
{
|
|
__profn("Enable debug layer");
|
|
ID3D12Debug *debug_controller0 = 0;
|
|
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
|
|
if (FAILED(hr))
|
|
{
|
|
dx12_init_error(Lit("Failed to create ID3D12Debug0"));
|
|
}
|
|
|
|
ID3D12Debug1 *debug_controller1 = 0;
|
|
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
|
|
if (FAILED(hr))
|
|
{
|
|
dx12_init_error(Lit("Failed to create ID3D12Debug1"));
|
|
}
|
|
|
|
ID3D12Debug_EnableDebugLayer(debug_controller0);
|
|
|
|
/* FIXME: Enable this */
|
|
//ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
|
|
|
|
ID3D12Debug_Release(debug_controller1);
|
|
ID3D12Debug_Release(debug_controller0);
|
|
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
|
|
}
|
|
#endif
|
|
|
|
/* Create factory */
|
|
{
|
|
__profn("Create factory");
|
|
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&g->factory);
|
|
if (FAILED(hr))
|
|
{
|
|
dx12_init_error(Lit("Failed to initialize DXGI factory"));
|
|
}
|
|
}
|
|
|
|
/* Create device */
|
|
{
|
|
__profn("Create device");
|
|
IDXGIAdapter1 *adapter = 0;
|
|
ID3D12Device *device = 0;
|
|
String error = Lit("Could not initialize GPU device.");
|
|
String first_gpu_name = ZI;
|
|
u32 adapter_index = 0;
|
|
b32 skip = 0; /* For debugging iGPU */
|
|
for (;;)
|
|
{
|
|
{
|
|
hr = IDXGIFactory6_EnumAdapterByGpuPreference(g->factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
DXGI_ADAPTER_DESC1 desc;
|
|
IDXGIAdapter1_GetDesc1(adapter, &desc);
|
|
if (first_gpu_name.len == 0)
|
|
{
|
|
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
|
|
}
|
|
{
|
|
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
|
|
}
|
|
if (SUCCEEDED(hr) && !skip)
|
|
{
|
|
break;
|
|
}
|
|
skip = 0;
|
|
ID3D12Device_Release(device);
|
|
IDXGIAdapter1_Release(adapter);
|
|
adapter = 0;
|
|
device = 0;
|
|
++adapter_index;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
if (!device)
|
|
{
|
|
if (first_gpu_name.len > 0)
|
|
{
|
|
String fmt = Lit("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.");
|
|
error = StringFormat(scratch.arena, fmt, FmtString(first_gpu_name));
|
|
}
|
|
dx12_init_error(error);
|
|
}
|
|
g->adapter = adapter;
|
|
g->device = device;
|
|
}
|
|
|
|
#if DX12_DEBUG
|
|
/* Enable D3D12 Debug break */
|
|
{
|
|
__profn("Enable d3d12 debug break");
|
|
ID3D12InfoQueue *info = 0;
|
|
hr = ID3D12Device_QueryInterface(g->device, &IID_ID3D12InfoQueue, (void **)&info);
|
|
if (FAILED(hr))
|
|
{
|
|
dx12_init_error(Lit("Failed to query ID3D12Device interface"));
|
|
}
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
|
|
ID3D12InfoQueue_Release(info);
|
|
}
|
|
|
|
/* Enable DXGI Debug break */
|
|
{
|
|
__profn("Enable dxgi debug break");
|
|
IDXGIInfoQueue *dxgi_info = 0;
|
|
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
|
|
if (FAILED(hr))
|
|
{
|
|
dx12_init_error(Lit("Failed to get DXGI debug interface"));
|
|
}
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
|
|
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
|
|
IDXGIInfoQueue_Release(dxgi_info);
|
|
}
|
|
#endif
|
|
|
|
#if ProfilingGpu && ProfilingGpuStablePowerState
|
|
/* Enable stable power state */
|
|
{
|
|
__profn("Set stable power state");
|
|
b32 success = 1;
|
|
HKEY key = 0;
|
|
success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
|
|
if (success)
|
|
{
|
|
DWORD value = ZI;
|
|
DWORD dword_size = sizeof(DWORD);
|
|
success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS;
|
|
RegCloseKey(key);
|
|
if (success)
|
|
{
|
|
success = value != 0;
|
|
}
|
|
}
|
|
P_LogInfoF("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)");
|
|
if (success)
|
|
{
|
|
P_LogInfoF("Machine is in developer mode, calling ID3D12Device::SetStablePowerState");
|
|
hr = ID3D12Device_SetStablePowerState(g->device, 1);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
P_LogInfoF("ID3D12Device::SetStablePowerState succeeded");
|
|
}
|
|
else
|
|
{
|
|
success = 0;
|
|
P_LogErrorF("ID3D12Device::SetStablePowerState failed");
|
|
}
|
|
}
|
|
else
|
|
{
|
|
P_LogWarningF("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState");
|
|
}
|
|
if (!success)
|
|
{
|
|
P_LogWarningF("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable.");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Dx12 object initialization
|
|
* ========================== */
|
|
|
|
void dx12_init_objects(void)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
/* Initialize desc sizes */
|
|
g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
|
|
g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
g->desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(g->device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
|
|
|
|
/* Initialize desc counts */
|
|
g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
|
|
g->desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
|
|
|
|
/* Create global descriptor heaps */
|
|
g->cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
g->rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
|
|
|
|
/* Create command queues */
|
|
{
|
|
__profn("Allocate command queues");
|
|
struct command_queue_desc params[] = {
|
|
{.type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") },
|
|
{.type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") },
|
|
{.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copyqueue") },
|
|
{.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") }
|
|
};
|
|
struct command_queue_alloc_job_sig sig = ZI;
|
|
sig.descs_in = params;
|
|
sig.cqs_out = g->command_queues;
|
|
{
|
|
P_Counter counter = ZI;
|
|
P_Run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
#if ProfilingIsEnabled
|
|
{
|
|
/* Initialize serially for consistent order in profiler */
|
|
__profn("Initialize command queue profiling contexts");
|
|
for (i32 i = 0; i < DX12_NUM_QUEUES; ++i)
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
String dbg_name = params[i].dbg_name;
|
|
__prof_dx12_ctx_alloc(cq->prof, g->device, cq->cq, dbg_name.text, dbg_name.len);
|
|
(UNUSED)dbg_name;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/* ========================== *
|
|
* Dx12 pipeline initialization
|
|
* ========================== */
|
|
|
|
void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
|
|
|
|
void dx12_init_pipelines(void)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
/* Register pipeline descs */
|
|
{
|
|
/* Material pipeline */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_material");
|
|
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
desc->rtvs[0].blending = 1;
|
|
desc->rtvs[1].format = DXGI_FORMAT_R16G16B16A16_FLOAT;
|
|
desc->rtvs[1].blending = 1;
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
/* Flood pipeline */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_flood");
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
/* Shade pipeline */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_shade");
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
/* Shape pipeline */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_shape");
|
|
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
desc->rtvs[0].blending = 1;
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
/* UI pipeline */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_ui");
|
|
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
desc->rtvs[0].blending = 1;
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
/* Blit pipeilne */
|
|
{
|
|
struct pipeline_desc *desc = PushStruct(g->pipelines_arena, struct pipeline_desc);
|
|
desc->name = Lit("kernel_blit");
|
|
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
desc->rtvs[0].blending = 1;
|
|
SetDictValue(g->pipelines_arena, g->pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
|
|
}
|
|
}
|
|
|
|
/* Compile pipelines */
|
|
u32 num_pipelines = 0;
|
|
struct pipeline_desc *descs = PushDry(scratch.arena, struct pipeline_desc);
|
|
for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next)
|
|
{
|
|
struct pipeline_desc *desc = (struct pipeline_desc *)entry->value;
|
|
*PushStruct(scratch.arena, struct pipeline_desc) = *desc;
|
|
++num_pipelines;
|
|
}
|
|
struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines);
|
|
{
|
|
__profn("Allocate pipelines");
|
|
struct pipeline_alloc_job_sig sig = ZI;
|
|
sig.descs_in = descs;
|
|
sig.pipelines_out = pipelines;
|
|
P_Counter counter = ZI;
|
|
P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
for (u32 i = 0; i < num_pipelines; ++i)
|
|
{
|
|
struct pipeline *pipeline = pipelines[i];
|
|
if (pipeline->success)
|
|
{
|
|
P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
|
|
if (pipeline->error.len)
|
|
{
|
|
String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
|
|
P_LogWarning(msg);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
|
|
String msg = StringFormat(scratch.arena, Lit("Error initializing pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
|
|
P_LogError(msg);
|
|
P_MessageBox(P_MessageBoxKind_Warning, msg);
|
|
}
|
|
}
|
|
pipeline_register(num_pipelines, pipelines);
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Noise texture initialization
|
|
* ========================== */
|
|
|
|
void dx12_init_noise(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
{
|
|
String noise_res_name = Lit("noise_128x128x64_16.dat");
|
|
RES_Resource noise_res = RES_OpenResource(noise_res_name);
|
|
DXGI_FORMAT format = DXGI_FORMAT_R16_UINT;
|
|
//u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
|
|
u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
|
|
if (RES_ResourceExists(&noise_res))
|
|
{
|
|
String data = RES_GetResourceData(&noise_res);
|
|
if (data.len != expected_size)
|
|
{
|
|
P_Panic(StringFormat(scratch.arena,
|
|
Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"),
|
|
FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH),
|
|
FmtUint(expected_size), FmtUint(data.len)));
|
|
}
|
|
{
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
|
|
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
|
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
desc.Format = format;
|
|
desc.Alignment = 0;
|
|
desc.Width = K_BLUE_NOISE_TEX_WIDTH;
|
|
desc.Height = K_BLUE_NOISE_TEX_HEIGHT;
|
|
desc.DepthOrArraySize = K_BLUE_NOISE_TEX_DEPTH;
|
|
desc.MipLevels = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
|
|
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST);
|
|
r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
|
|
|
|
/* Upload texture */
|
|
{
|
|
P_Counter counter = ZI;
|
|
struct dx12_upload_job_sig sig = ZI;
|
|
sig.resource = r;
|
|
sig.data = data.text;
|
|
P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
P_Panic(StringFormat(scratch.arena, Lit("Noise resource \"%F\" not found"), FmtString(noise_res_name)));
|
|
}
|
|
RES_CloseResource(&noise_res);
|
|
}
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Shader compilation
|
|
* ========================== */
|
|
|
|
#if RESOURCE_RELOADING
|
|
|
|
P_JobDef(shader_compile_job, job)
|
|
{
|
|
__prof;
|
|
struct shader_compile_job_sig *sig = job.sig;
|
|
Arena *arena = sig->arena;
|
|
struct shader_compile_desc *desc = &sig->descs[job.id];
|
|
struct shader_compile_result *result = &sig->results[job.id];
|
|
|
|
TempArena scratch = BeginScratch(arena);
|
|
{
|
|
i64 start_ns = P_TimeNs();
|
|
DXC_Result dxc_result = ZI;
|
|
{
|
|
__profn("Compile shader");
|
|
P_LogInfoF("Compiling shader \"%F:%F\"", FmtString(desc->friendly_name), FmtString(desc->entry));
|
|
/* NOTE: `DXC_ARGS` is supplied by build system at compile time */
|
|
char *dxc_args_cstr = Stringize(DXC_ARGS);
|
|
String dxc_args_str = StringFromCstrNoLimit(dxc_args_cstr);
|
|
StringArray dxc_args_array = SplitString(scratch.arena, dxc_args_str, Lit(" "));
|
|
String shader_args[] = {
|
|
desc->friendly_name,
|
|
Lit("-E"), desc->entry,
|
|
Lit("-T"), desc->target,
|
|
};
|
|
u32 num_args = countof(shader_args) + dxc_args_array.count;
|
|
String *args = PushStructs(scratch.arena, String, num_args);
|
|
for (u32 i = 0; i < countof(shader_args); ++i)
|
|
{
|
|
args[i] = shader_args[i];
|
|
}
|
|
for (u32 i = 0; i < dxc_args_array.count; ++i)
|
|
{
|
|
args[i + countof(shader_args)] = dxc_args_array.strings[i];
|
|
}
|
|
dxc_result = DXC_Compile(arena, desc->src, num_args, args);
|
|
}
|
|
result->success = dxc_result.success;
|
|
result->dxc = dxc_result.dxc;
|
|
result->errors = dxc_result.errors;
|
|
result->elapsed_ns = P_TimeNs() - start_ns;
|
|
|
|
}
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
#endif
|
|
|
|
/* ========================== *
|
|
* Pipeline
|
|
* ========================== */
|
|
|
|
P_JobDef(pipeline_alloc_job, job)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct pipeline_alloc_job_sig *sig = job.sig;
|
|
struct pipeline_desc *desc = &sig->descs_in[job.id];
|
|
struct pipeline **pipelines_out = sig->pipelines_out;
|
|
|
|
struct pipeline *pipeline = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
if (g->first_free_pipeline)
|
|
{
|
|
pipeline = g->first_free_pipeline;
|
|
g->first_free_pipeline = pipeline->next;
|
|
}
|
|
else
|
|
{
|
|
pipeline = PushStructNoZero(g->pipelines_arena, struct pipeline);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(pipeline);
|
|
pipelines_out[job.id] = pipeline;
|
|
pipeline->desc = *desc;
|
|
pipeline->name = desc->name;
|
|
pipeline->hash = HashFnv64(Fnv64Basis, pipeline->name);
|
|
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
{
|
|
i64 start_ns = P_TimeNs();
|
|
String pipeline_name = pipeline->name;
|
|
P_LogInfoF("Loading pipeline \"%F\"", FmtString(pipeline_name));
|
|
b32 success = 1;
|
|
HRESULT hr = 0;
|
|
|
|
String error_str = ZI;
|
|
|
|
String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data;
|
|
String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data;
|
|
String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : TAR_EntryFromName(&g->dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data;
|
|
if (success && vs_dxc.len > 0 && ps_dxc.len <= 0)
|
|
{
|
|
error_str = Lit("Pipeline has vertex shader without pixel shader");
|
|
success = 0;
|
|
}
|
|
if (success && vs_dxc.len <= 0 && ps_dxc.len > 0)
|
|
{
|
|
error_str = Lit("Pipeline has pixel shader without vertex shader");
|
|
success = 0;
|
|
}
|
|
if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0))
|
|
{
|
|
error_str = Lit("Pipeline has a compute shader with a vertex/pixel shader");
|
|
success = 0;
|
|
}
|
|
if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0)
|
|
{
|
|
error_str = Lit("Pipeline has no shaders");
|
|
success = 0;
|
|
}
|
|
|
|
ID3D10Blob *vs_blob = 0;
|
|
ID3D10Blob *ps_blob = 0;
|
|
ID3D10Blob *cs_blob = 0;
|
|
if (success && vs_dxc.len > 0)
|
|
{
|
|
hr = D3DCreateBlob(vs_dxc.len, &vs_blob);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
CopyBytes(ID3D10Blob_GetBufferPointer(vs_blob), vs_dxc.text, vs_dxc.len);
|
|
}
|
|
else
|
|
{
|
|
error_str = Lit("Failed to create vertex shader blob");
|
|
success = 0;
|
|
}
|
|
}
|
|
if (success && ps_dxc.len > 0)
|
|
{
|
|
hr = D3DCreateBlob(ps_dxc.len, &ps_blob);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
CopyBytes(ID3D10Blob_GetBufferPointer(ps_blob), ps_dxc.text, ps_dxc.len);
|
|
}
|
|
else
|
|
{
|
|
error_str = Lit("Failed to create pixel shader blob");
|
|
success = 0;
|
|
}
|
|
}
|
|
if (success && cs_dxc.len > 0)
|
|
{
|
|
hr = D3DCreateBlob(cs_dxc.len, &cs_blob);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
CopyBytes(ID3D10Blob_GetBufferPointer(cs_blob), cs_dxc.text, cs_dxc.len);
|
|
}
|
|
else
|
|
{
|
|
error_str = Lit("Failed to create compute shader blob");
|
|
success = 0;
|
|
}
|
|
}
|
|
|
|
/* Get root signature blob
|
|
* NOTE: This isn't necessary for creating the root signature (since it
|
|
* could reuse the shader blob), however we'd like to verify that the
|
|
* root signature exists and matches between vs & ps shaders. */
|
|
ID3D10Blob *rootsig_blob = 0;
|
|
if (success)
|
|
{
|
|
__profn("Validate root signatures");
|
|
if (cs_dxc.len > 0)
|
|
{
|
|
u32 cs_rootsig_data_len = 0;
|
|
ID3D10Blob *cs_rootsig_blob = 0;
|
|
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob);
|
|
if (cs_rootsig_blob)
|
|
{
|
|
cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob);
|
|
}
|
|
if (cs_rootsig_data_len == 0)
|
|
{
|
|
success = 0;
|
|
error_str = Lit("Compute shader is missing root signature");
|
|
}
|
|
else
|
|
{
|
|
rootsig_blob = cs_rootsig_blob;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
char *vs_rootsig_data = 0;
|
|
char *ps_rootsig_data = 0;
|
|
u32 vs_rootsig_data_len = 0;
|
|
u32 ps_rootsig_data_len = 0;
|
|
ID3D10Blob *vs_rootsig_blob = 0;
|
|
ID3D10Blob *ps_rootsig_blob = 0;
|
|
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob);
|
|
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob);
|
|
if (vs_rootsig_blob)
|
|
{
|
|
vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob);
|
|
vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob);
|
|
}
|
|
if (ps_rootsig_blob)
|
|
{
|
|
ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob);
|
|
ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob);
|
|
}
|
|
if (vs_rootsig_data_len == 0)
|
|
{
|
|
success = 0;
|
|
error_str = Lit("Vertex shader is missing root signature");
|
|
}
|
|
else if (ps_rootsig_data_len == 0)
|
|
{
|
|
success = 0;
|
|
error_str = Lit("Pixel shader is missing root signature");
|
|
}
|
|
else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len))
|
|
{
|
|
success = 0;
|
|
error_str = Lit("Root signature mismatch between vertex and pixel shader");
|
|
}
|
|
else
|
|
{
|
|
rootsig_blob = vs_rootsig_blob;
|
|
}
|
|
if (ps_rootsig_blob)
|
|
{
|
|
ID3D10Blob_Release(ps_rootsig_blob);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Create root signature */
|
|
ID3D12RootSignature *rootsig = 0;
|
|
if (success)
|
|
{
|
|
__profn("Create root signature");
|
|
hr = ID3D12Device_CreateRootSignature(g->device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create root signature");
|
|
success = 0;
|
|
}
|
|
}
|
|
|
|
/* Create PSO */
|
|
ID3D12PipelineState *pso = 0;
|
|
if (success)
|
|
{
|
|
if (cs_dxc.len > 0)
|
|
{
|
|
__profn("Create compute PSO");
|
|
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 };
|
|
pso_desc.pRootSignature = rootsig;
|
|
pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs_blob);
|
|
pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs_blob);
|
|
hr = ID3D12Device_CreateComputePipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
}
|
|
else
|
|
{
|
|
__profn("Create graphics PSO");
|
|
|
|
/* Default rasterizer state */
|
|
D3D12_RASTERIZER_DESC raster_desc = {
|
|
.FillMode = D3D12_FILL_MODE_SOLID,
|
|
.CullMode = D3D12_CULL_MODE_NONE,
|
|
.FrontCounterClockwise = 0,
|
|
.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
|
|
.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
|
|
.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
|
|
.DepthClipEnable = 1,
|
|
.MultisampleEnable = 0,
|
|
.AntialiasedLineEnable = 0,
|
|
.ForcedSampleCount = 0,
|
|
.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
|
|
};
|
|
|
|
/* Empty input layout */
|
|
D3D12_INPUT_LAYOUT_DESC input_layout_desc = ZI;
|
|
|
|
/* Blend state */
|
|
D3D12_BLEND_DESC blend_desc = {
|
|
.AlphaToCoverageEnable = 0,
|
|
.IndependentBlendEnable = 1
|
|
};
|
|
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i)
|
|
{
|
|
StaticAssert(countof(blend_desc.RenderTarget) <= countof(desc->rtvs));
|
|
if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
b32 blending_enabled = desc->rtvs[i].blending;
|
|
blend_desc.RenderTarget[i].BlendEnable = blending_enabled;
|
|
blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA;
|
|
blend_desc.RenderTarget[i].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
|
|
blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
|
|
blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
|
blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Disable depth stencil */
|
|
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = {
|
|
.DepthEnable = 0,
|
|
.StencilEnable = 0
|
|
};
|
|
|
|
/* PSO */
|
|
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
|
|
pso_desc.pRootSignature = rootsig;
|
|
pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs_blob);
|
|
pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs_blob);
|
|
pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps_blob);
|
|
pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps_blob);
|
|
pso_desc.BlendState = blend_desc;
|
|
pso_desc.SampleMask = UINT_MAX;
|
|
pso_desc.RasterizerState = raster_desc;
|
|
pso_desc.DepthStencilState = depth_stencil_desc;
|
|
pso_desc.InputLayout = input_layout_desc;
|
|
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
|
|
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i)
|
|
{
|
|
StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc->rtvs));
|
|
DXGI_FORMAT format = desc->rtvs[i].format;
|
|
if (format != DXGI_FORMAT_UNKNOWN)
|
|
{
|
|
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
pso_desc.SampleDesc.Count = 1;
|
|
pso_desc.SampleDesc.Quality = 0;
|
|
hr = ID3D12Device_CreateGraphicsPipelineState(g->device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
|
|
}
|
|
if (FAILED(hr))
|
|
{
|
|
error_str = Lit("Failed to create pipeline state object");
|
|
success = 0;
|
|
}
|
|
}
|
|
|
|
/* Parse errors */
|
|
if (!success && error_str.len <= 0)
|
|
{
|
|
error_str = Lit("Unknown error");
|
|
}
|
|
|
|
pipeline->pso = pso;
|
|
pipeline->rootsig = rootsig;
|
|
pipeline->compilation_time_ns = P_TimeNs() - start_ns;
|
|
pipeline->success = success;
|
|
pipeline->is_gfx = cs_dxc.len == 0;
|
|
pipeline->error = error_str;
|
|
|
|
if (rootsig_blob)
|
|
{
|
|
ID3D10Blob_Release(rootsig_blob);
|
|
}
|
|
if (vs_blob)
|
|
{
|
|
ID3D10Blob_Release(vs_blob);
|
|
}
|
|
if (ps_blob)
|
|
{
|
|
ID3D10Blob_Release(ps_blob);
|
|
}
|
|
if (cs_blob)
|
|
{
|
|
ID3D10Blob_Release(cs_blob);
|
|
}
|
|
}
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
void pipeline_release_now(struct pipeline *pipeline)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
if (pipeline->pso)
|
|
{
|
|
ID3D12PipelineState_Release(pipeline->pso);
|
|
}
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
{
|
|
pipeline->next = g->first_free_pipeline;
|
|
g->first_free_pipeline = pipeline;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Pipeline cache
|
|
* ========================== */
|
|
|
|
struct pipeline_scope *pipeline_scope_begin(void)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct pipeline_scope *scope = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
if (g->first_free_pipeline_scope)
|
|
{
|
|
scope = g->first_free_pipeline_scope;
|
|
g->first_free_pipeline_scope = scope->next_free;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
Arena *arena = 0;
|
|
if (scope)
|
|
{
|
|
arena = scope->arena;
|
|
}
|
|
else
|
|
{
|
|
arena = AllocArena(Mebi(64));
|
|
}
|
|
ResetArena(arena);
|
|
scope = PushStruct(arena, struct pipeline_scope);
|
|
scope->arena = arena;
|
|
scope->refs = InitDict(scope->arena, 64);
|
|
return scope;
|
|
}
|
|
|
|
void pipeline_scope_end(struct pipeline_scope *scope)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
{
|
|
for (DictEntry *entry = scope->refs->first; entry; entry = entry->next)
|
|
{
|
|
struct pipeline *pipeline = (struct pipeline *)entry->value;
|
|
if (--pipeline->refcount <= 0)
|
|
{
|
|
fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE);
|
|
}
|
|
}
|
|
scope->next_free = g->first_free_pipeline_scope;
|
|
g->first_free_pipeline_scope = scope;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
Readonly struct pipeline g_nil_pipeline = ZI;
|
|
struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct pipeline *result = &g_nil_pipeline;
|
|
u64 hash = HashFnv64(Fnv64Basis, name);
|
|
|
|
struct pipeline *tmp = (struct pipeline *)DictValueFromHash(scope->refs, hash);
|
|
if (tmp)
|
|
{
|
|
result = tmp;
|
|
}
|
|
else
|
|
{
|
|
{
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
tmp = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash);
|
|
if (tmp)
|
|
{
|
|
++tmp->refcount;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
if (tmp)
|
|
{
|
|
SetDictValue(scope->arena, scope->refs, hash, (u64)tmp);
|
|
result = tmp;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
P_Lock lock = P_LockE(&g->pipelines_mutex);
|
|
{
|
|
for (u64 i = 0; i < num_pipelines; ++i)
|
|
{
|
|
struct pipeline *pipeline = pipelines[i];
|
|
u64 hash = pipeline->hash;
|
|
/* Insert into top dict */
|
|
{
|
|
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_pipelines, hash);
|
|
if (old_pipeline && --old_pipeline->refcount <= 0)
|
|
{
|
|
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
|
|
}
|
|
SetDictValue(g->pipelines_arena, g->top_pipelines, hash, (u64)pipeline);
|
|
++pipeline->refcount;
|
|
}
|
|
/* Insert into success dict */
|
|
if (pipeline->success)
|
|
{
|
|
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, hash);
|
|
if (old_pipeline && --old_pipeline->refcount <= 0)
|
|
{
|
|
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
|
|
}
|
|
SetDictValue(g->pipelines_arena, g->top_successful_pipelines, hash, (u64)pipeline);
|
|
++pipeline->refcount;
|
|
}
|
|
}
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
#if RESOURCE_RELOADING
|
|
W_CallbackFuncDef(pipeline_watch_callback, name)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
String rst_extension = Lit(".rst");
|
|
String knl_extension = Lit(".knl");
|
|
|
|
b32 is_src = StringStartsWith(name, Lit("src/"));
|
|
b32 is_rs = is_src && StringEndsWith(name, rst_extension);
|
|
b32 is_cs = is_src && !is_rs && StringEndsWith(name, knl_extension);
|
|
b32 success = 0;
|
|
|
|
/* Recompile shaders */
|
|
String pipeline_name = ZI;
|
|
String friendly_name = ZI;
|
|
i32 num_shaders = 0;
|
|
struct shader_compile_desc *shader_descs = 0;
|
|
struct shader_compile_result *shader_results = 0;
|
|
if (is_rs || is_cs)
|
|
{
|
|
P_LogDebugF("Change detected in shader source file \"%F\", recompiling...", FmtString(name));
|
|
success = 1;
|
|
P_File file = P_OpenFileReadWait(name);
|
|
String data = P_ReadFile(scratch.arena, file);
|
|
{
|
|
friendly_name = name;
|
|
StringArray split = SplitString(scratch.arena, friendly_name, Lit("src/"));
|
|
friendly_name = split.count > 0 ? CatString(scratch.arena, Lit("src/"), split.strings[split.count - 1]) : friendly_name;
|
|
}
|
|
{
|
|
pipeline_name = name;
|
|
StringArray split = SplitString(scratch.arena, pipeline_name, Lit("/"));
|
|
pipeline_name = split.count > 0 ? split.strings[split.count - 1] : pipeline_name;
|
|
split = SplitString(scratch.arena, pipeline_name, Lit("."));
|
|
pipeline_name = split.count > 1 ? split.strings[split.count - 2] : pipeline_name;
|
|
}
|
|
{
|
|
struct shader_compile_job_sig sig = ZI;
|
|
sig.arena = scratch.arena;
|
|
if (is_rs)
|
|
{
|
|
num_shaders = 2;
|
|
shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
|
|
shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
|
|
sig.descs = shader_descs;
|
|
sig.results = shader_results;
|
|
sig.descs[0].src = data;
|
|
sig.descs[0].friendly_name = friendly_name;
|
|
sig.descs[0].entry = Lit("vs");
|
|
sig.descs[0].target = Lit("vs_6_6");
|
|
sig.descs[1].src = data;
|
|
sig.descs[1].friendly_name = friendly_name;
|
|
sig.descs[1].entry = Lit("ps");
|
|
sig.descs[1].target = Lit("ps_6_6");
|
|
}
|
|
else if (is_cs)
|
|
{
|
|
num_shaders = 1;
|
|
shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
|
|
shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
|
|
sig.descs = shader_descs;
|
|
sig.results = shader_results;
|
|
sig.descs[0].src = data;
|
|
sig.descs[0].friendly_name = friendly_name;
|
|
sig.descs[0].entry = Lit("cs");
|
|
sig.descs[0].target = Lit("cs_6_6");
|
|
}
|
|
{
|
|
P_Counter counter = ZI;
|
|
P_Run(num_shaders, shader_compile_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
}
|
|
P_CloseFIle(file);
|
|
}
|
|
|
|
|
|
for (i32 i = 0; i < num_shaders; ++i)
|
|
{
|
|
struct shader_compile_desc *desc = &shader_descs[i];
|
|
struct shader_compile_result *result = &shader_results[i];
|
|
if (result->success)
|
|
{
|
|
P_LogSuccessF("Finished compiling shader \"%F:%F\" in %F seconds", FmtString(desc->friendly_name), FmtString(desc->entry), FmtFloat(SecondsFromNs(result->elapsed_ns)));
|
|
if (result->errors.len > 0)
|
|
{
|
|
String msg = result->errors;
|
|
P_LogWarning(msg);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
String msg = result->errors;
|
|
P_LogError(msg);
|
|
success = 0;
|
|
}
|
|
}
|
|
|
|
if (success)
|
|
{
|
|
/* Create pipeline descs */
|
|
u32 num_pipelines = 0;
|
|
struct pipeline_desc *pipeline_descs = PushDry(scratch.arena, struct pipeline_desc);
|
|
for (DictEntry *entry = g->pipeline_descs->first; entry; entry = entry->next)
|
|
{
|
|
struct pipeline_desc *pipeline_desc = (struct pipeline_desc *)entry->value;
|
|
struct pipeline_desc new_pipeline_desc = *pipeline_desc;
|
|
if (EqString(pipeline_desc->name, pipeline_name))
|
|
{
|
|
if (is_rs)
|
|
{
|
|
new_pipeline_desc.vs_dxc = shader_results[0].dxc;
|
|
new_pipeline_desc.ps_dxc = shader_results[1].dxc;
|
|
}
|
|
else if (is_cs)
|
|
{
|
|
new_pipeline_desc.cs_dxc = shader_results[0].dxc;
|
|
}
|
|
*PushStructNoZero(scratch.arena, struct pipeline_desc) = new_pipeline_desc;
|
|
++num_pipelines;
|
|
}
|
|
}
|
|
|
|
/* Recompile dirty pipelines */
|
|
if (num_pipelines > 0)
|
|
{
|
|
__profn("Compile dirty pipelines");
|
|
struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines);
|
|
{
|
|
struct pipeline_alloc_job_sig sig = ZI;
|
|
sig.descs_in = pipeline_descs;
|
|
sig.pipelines_out = pipelines;
|
|
P_Counter counter = ZI;
|
|
P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
{
|
|
P_Lock lock = P_LockS(&g->pipelines_mutex);
|
|
for (u32 i = 0; i < num_pipelines; ++i)
|
|
{
|
|
struct pipeline *pipeline = pipelines[i];
|
|
if (pipeline->success)
|
|
{
|
|
P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
|
|
if (pipeline->error.len > 0)
|
|
{
|
|
String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
|
|
P_LogWarning(msg);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
{
|
|
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
|
|
String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(error));
|
|
P_LogError(msg);
|
|
}
|
|
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(g->top_successful_pipelines, pipeline->hash);
|
|
if (!old_pipeline)
|
|
{
|
|
/* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */
|
|
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
|
|
String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
|
|
P_MessageBox(P_MessageBoxKind_Warning, msg);
|
|
}
|
|
|
|
}
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
pipeline_register(num_pipelines, pipelines);
|
|
}
|
|
}
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
#endif
|
|
|
|
/* ========================== *
|
|
* Descriptor
|
|
* ========================== */
|
|
|
|
struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
|
|
{
|
|
__prof;
|
|
struct descriptor *d = 0;
|
|
u32 index = 0;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
|
|
{
|
|
P_Lock lock = P_LockE(&dh->mutex);
|
|
if (dh->first_free_descriptor)
|
|
{
|
|
d = dh->first_free_descriptor;
|
|
dh->first_free_descriptor = d->next_free;
|
|
handle = d->handle;
|
|
index = d->index;
|
|
}
|
|
else
|
|
{
|
|
if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity)
|
|
{
|
|
P_Panic(Lit("Max descriptors reached in heap"));
|
|
}
|
|
d = PushStructNoZero(dh->arena, struct descriptor);
|
|
index = dh->num_descriptors_reserved++;
|
|
handle.ptr = dh->handle.ptr + (index * dh->descriptor_size);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(d);
|
|
d->heap = dh;
|
|
d->handle = handle;
|
|
d->index = index;
|
|
return d;
|
|
}
|
|
|
|
void descriptor_release(struct descriptor *descriptor)
|
|
{
|
|
struct cpu_descriptor_heap *dh = descriptor->heap;
|
|
P_Lock lock = P_LockE(&dh->mutex);
|
|
{
|
|
descriptor->next_free = dh->first_free_descriptor;
|
|
dh->first_free_descriptor = descriptor;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* ========================== *
|
|
* CPU descriptor heap
|
|
* ========================== */
|
|
|
|
struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct cpu_descriptor_heap *dh = 0;
|
|
{
|
|
Arena *arena = AllocArena(Mebi(64));
|
|
dh = PushStruct(arena, struct cpu_descriptor_heap);
|
|
dh->arena = arena;
|
|
}
|
|
|
|
u32 num_descriptors = 0;
|
|
u32 descriptor_size = 0;
|
|
if (type < (i32)countof(g->desc_counts) && type < (i32)countof(g->desc_sizes))
|
|
{
|
|
num_descriptors = g->desc_counts[type];
|
|
descriptor_size = g->desc_sizes[type];
|
|
}
|
|
if (num_descriptors == 0 || descriptor_size == 0)
|
|
{
|
|
P_Panic(Lit("Unsupported CPU descriptor type"));
|
|
}
|
|
dh->num_descriptors_capacity = num_descriptors;
|
|
dh->descriptor_size = descriptor_size;
|
|
|
|
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
|
|
desc.Type = type;
|
|
desc.NumDescriptors = num_descriptors;
|
|
HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create CPU descriptor heap"));
|
|
}
|
|
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle);
|
|
|
|
return dh;
|
|
}
|
|
|
|
#if 0
|
|
void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
|
|
{
|
|
/* TODO */
|
|
(UNUSED)dh;
|
|
}
|
|
#endif
|
|
|
|
/* ========================== *
|
|
* Fenced release
|
|
* ========================== */
|
|
|
|
void fenced_release(void *data, enum fenced_release_kind kind)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct fenced_release_data fr = ZI;
|
|
fr.kind = kind;
|
|
fr.ptr = data;
|
|
|
|
u64 fr_targets[countof(g->fenced_release_targets)] = ZI;
|
|
|
|
/* Read current fence target values from command queues */
|
|
for (u32 i = 0; i < countof(g->command_queues); ++i)
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
P_Lock lock = P_LockS(&cq->submit_fence_mutex);
|
|
{
|
|
fr_targets[i] = cq->submit_fence_target;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* PushStruct data to release queue */
|
|
{
|
|
P_Lock lock = P_LockE(&g->fenced_releases_mutex);
|
|
{
|
|
*PushStruct(g->fenced_releases_arena, struct fenced_release_data) = fr;
|
|
CopyBytes(g->fenced_release_targets, fr_targets, sizeof(fr_targets));
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Wake evictor */
|
|
{
|
|
P_Lock lock = P_LockE(&g->evictor_wake_mutex);
|
|
{
|
|
++g->evictor_wake_gen;
|
|
P_SignalCv(&g->evictor_wake_cv, I32Max);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
}
|
|
|
|
/* ========================== *
|
|
* Resource
|
|
* ========================== */
|
|
|
|
struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct dx12_resource *r = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&g->resources_mutex);
|
|
if (g->first_free_resource)
|
|
{
|
|
r = g->first_free_resource;
|
|
g->first_free_resource = r->next_free;
|
|
}
|
|
else
|
|
{
|
|
r = PushStructNoZero(g->resources_arena, struct dx12_resource);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(r);
|
|
|
|
D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } };
|
|
D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
|
|
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to create resource"));
|
|
}
|
|
|
|
r->state = initial_state;
|
|
|
|
if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
|
|
{
|
|
r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
void dx12_resource_release_now(struct dx12_resource *t)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
/* Release descriptors */
|
|
/* TODO: Batch lock heaps */
|
|
if (t->cbv_descriptor)
|
|
{
|
|
descriptor_release(t->cbv_descriptor);
|
|
}
|
|
if (t->srv_descriptor)
|
|
{
|
|
descriptor_release(t->srv_descriptor);
|
|
}
|
|
if (t->uav_descriptor)
|
|
{
|
|
descriptor_release(t->uav_descriptor);
|
|
}
|
|
if (t->rtv_descriptor)
|
|
{
|
|
descriptor_release(t->rtv_descriptor);
|
|
}
|
|
|
|
/* Release resource */
|
|
ID3D12Resource_Release(t->resource);
|
|
|
|
/* Add to free list */
|
|
P_Lock lock = P_LockE(&g->resources_mutex);
|
|
t->next_free = g->first_free_resource;
|
|
g->first_free_resource = t;
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
void GPU_ReleaseResource(GPU_Resource *resource)
|
|
{
|
|
struct dx12_resource *r = (struct dx12_resource *)resource;
|
|
fenced_release(r, FENCED_RELEASE_KIND_RESOURCE);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Resource barrier
|
|
* ========================== */
|
|
|
|
void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs)
|
|
{
|
|
__prof;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
|
|
i32 num_rbs = 0;
|
|
struct D3D12_RESOURCE_BARRIER *rbs = PushStructsNoZero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs);
|
|
for (i32 i = 0; i < num_descs; ++i)
|
|
{
|
|
struct dx12_resource_barrier_desc *desc = &descs[i];
|
|
struct dx12_resource *resource = desc->resource;
|
|
enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type;
|
|
if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION)
|
|
{
|
|
enum D3D12_RESOURCE_STATES old_state = resource->state;
|
|
enum D3D12_RESOURCE_STATES new_state = desc->new_state;
|
|
if (new_state != old_state)
|
|
{
|
|
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
|
|
ZeroStruct(rb);
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb->Flags = 0;
|
|
rb->Transition.pResource = resource->resource;
|
|
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rb->Transition.StateBefore = old_state;
|
|
rb->Transition.StateAfter = new_state;
|
|
resource->state = new_state;
|
|
}
|
|
}
|
|
else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV)
|
|
{
|
|
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
|
|
ZeroStruct(rb);
|
|
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
|
|
rb->Flags = 0;
|
|
rb->UAV.pResource = resource->resource;
|
|
}
|
|
else
|
|
{
|
|
/* Unknown barrier type */
|
|
Assert(0);
|
|
}
|
|
}
|
|
|
|
if (num_rbs > 0)
|
|
{
|
|
ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs);
|
|
}
|
|
|
|
EndScratch(scratch);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Command queue
|
|
* ========================== */
|
|
|
|
struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
|
|
|
|
P_JobDef(command_queue_alloc_job, job)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct command_queue_alloc_job_sig *sig = job.sig;
|
|
struct command_queue_desc *desc = &sig->descs_in[job.id];
|
|
{
|
|
struct command_queue *cq = 0;
|
|
{
|
|
Arena *arena = AllocArena(Gibi(64));
|
|
cq = PushStruct(arena, struct command_queue);
|
|
cq->arena = arena;
|
|
}
|
|
cq->desc = *desc;
|
|
|
|
D3D12_COMMAND_QUEUE_DESC dx12_desc = ZI;
|
|
dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
|
dx12_desc.Type = desc->type;
|
|
dx12_desc.Priority = desc->priority;
|
|
HRESULT hr = ID3D12Device_CreateCommandQueue(g->device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create command queue"));
|
|
}
|
|
|
|
hr = ID3D12Device_CreateFence(g->device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create command queue fence"));
|
|
}
|
|
|
|
cq->cl_pool = command_list_pool_alloc(cq);
|
|
|
|
sig->cqs_out[job.id] = cq;
|
|
}
|
|
}
|
|
|
|
void command_queue_release(struct command_queue *cq)
|
|
{
|
|
__prof;
|
|
/* TODO */
|
|
(UNUSED)cq;
|
|
//ID3D12CommandQueue_Release(cq->cq);
|
|
}
|
|
|
|
/* ========================== *
|
|
* Command list
|
|
* ========================== */
|
|
|
|
struct command_list_pool *command_list_pool_alloc(struct command_queue *cq)
|
|
{
|
|
struct command_list_pool *pool = 0;
|
|
{
|
|
Arena *arena = AllocArena(Gibi(64));
|
|
pool = PushStruct(arena, struct command_list_pool);
|
|
pool->arena = arena;
|
|
}
|
|
pool->cq = cq;
|
|
return pool;
|
|
}
|
|
|
|
struct command_list *command_list_open(struct command_list_pool *pool)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct command_queue *cq = pool->cq;
|
|
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence);
|
|
|
|
struct command_list *cl = 0;
|
|
struct ID3D12GraphicsCommandList *old_cl = 0;
|
|
struct ID3D12CommandAllocator *old_ca = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&pool->mutex);
|
|
/* Find first command list ready for reuse */
|
|
for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted)
|
|
{
|
|
if (completed_fence_value >= tmp->submitted_fence_target)
|
|
{
|
|
cl = tmp;
|
|
break;
|
|
}
|
|
}
|
|
if (cl)
|
|
{
|
|
/* Remove from submitted list */
|
|
old_cl = cl->cl;
|
|
old_ca = cl->ca;
|
|
struct command_list *prev = cl->prev_submitted;
|
|
struct command_list *next = cl->next_submitted;
|
|
if (prev)
|
|
{
|
|
prev->next_submitted = next;
|
|
}
|
|
else
|
|
{
|
|
pool->first_submitted_command_list = next;
|
|
}
|
|
if (next)
|
|
{
|
|
next->prev_submitted = prev;
|
|
}
|
|
else
|
|
{
|
|
pool->last_submitted_command_list = prev;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl = PushStructNoZero(pool->arena, struct command_list);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(cl);
|
|
cl->cq = cq;
|
|
cl->pool = pool;
|
|
cl->global_record_lock = P_LockS(&g->global_command_list_record_mutex);
|
|
|
|
HRESULT hr = 0;
|
|
if (old_cl)
|
|
{
|
|
cl->cl = old_cl;
|
|
cl->ca = old_ca;
|
|
}
|
|
else
|
|
{
|
|
hr = ID3D12Device_CreateCommandAllocator(g->device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create command allocator"));
|
|
}
|
|
|
|
hr = ID3D12Device_CreateCommandList(g->device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create command list"));
|
|
}
|
|
|
|
hr = ID3D12GraphicsCommandList_Close(cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to close command list during initialization"));
|
|
}
|
|
}
|
|
|
|
/* Reset */
|
|
hr = ID3D12CommandAllocator_Reset(cl->ca);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to reset command allocator"));
|
|
}
|
|
|
|
hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to reset command list"));
|
|
}
|
|
|
|
return cl;
|
|
}
|
|
|
|
/* TODO: Allow multiple command list submissions */
|
|
u64 command_list_close(struct command_list *cl)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct command_queue *cq = cl->cq;
|
|
struct command_list_pool *pool = cl->pool;
|
|
|
|
/* Close */
|
|
{
|
|
__profn("Close DX12 command list");
|
|
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to close command list before execution"));
|
|
}
|
|
}
|
|
|
|
/* Submit */
|
|
u64 submit_fence_target = 0;
|
|
{
|
|
__profn("Execute");
|
|
P_Lock submit_lock = P_LockS(&g->global_submit_mutex);
|
|
P_Lock fence_lock = P_LockE(&cq->submit_fence_mutex);
|
|
{
|
|
submit_fence_target = ++cq->submit_fence_target;
|
|
ID3D12CommandQueue_ExecuteCommandLists(cq->cq, 1, (ID3D12CommandList **)&cl->cl);
|
|
ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, submit_fence_target);
|
|
}
|
|
P_Unlock(&fence_lock);
|
|
P_Unlock(&submit_lock);
|
|
}
|
|
|
|
/* Add descriptor heaps to submitted list */
|
|
{
|
|
P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex);
|
|
for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list)
|
|
{
|
|
cdh->submitted_cq = cq;
|
|
cdh->submitted_fence_target = submit_fence_target;
|
|
if (g->last_submitted_command_descriptor_heap)
|
|
{
|
|
g->last_submitted_command_descriptor_heap->next_submitted = cdh;
|
|
}
|
|
else
|
|
{
|
|
g->first_submitted_command_descriptor_heap = cdh;
|
|
}
|
|
g->last_submitted_command_descriptor_heap = cdh;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Add command buffers to submitted list */
|
|
{
|
|
P_Lock lock = P_LockE(&g->command_buffers_mutex);
|
|
for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list)
|
|
{
|
|
struct command_buffer_group *group = cb->group;
|
|
cb->submitted_cq = cq;
|
|
cb->submitted_fence_target = submit_fence_target;
|
|
if (group->last_submitted)
|
|
{
|
|
group->last_submitted->next_submitted = cb;
|
|
}
|
|
else
|
|
{
|
|
group->first_submitted = cb;
|
|
}
|
|
group->last_submitted = cb;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Add command list to pool submitted list */
|
|
P_Unlock(&cl->global_record_lock);
|
|
cl->submitted_fence_target = submit_fence_target;
|
|
{
|
|
P_Lock lock = P_LockE(&pool->mutex);
|
|
if (pool->last_submitted_command_list)
|
|
{
|
|
pool->last_submitted_command_list->next_submitted = cl;
|
|
}
|
|
else
|
|
{
|
|
pool->first_submitted_command_list = cl;
|
|
}
|
|
pool->last_submitted_command_list = cl;
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
return submit_fence_target;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Command descriptor heap (GPU / shader visible descriptor heap)
|
|
* ========================== */
|
|
|
|
struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
Assert(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */
|
|
|
|
/* Allocate GPU heap */
|
|
struct command_descriptor_heap *cdh = 0;
|
|
ID3D12DescriptorHeap *old_heap = 0;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI;
|
|
{
|
|
P_Lock lock = P_LockE(&g->command_descriptor_heaps_mutex);
|
|
/* Find first heap ready for reuse */
|
|
for (struct command_descriptor_heap *tmp = g->first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted)
|
|
{
|
|
/* TODO: Cache completed fence values */
|
|
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
|
|
if (completed_fence_value >= tmp->submitted_fence_target)
|
|
{
|
|
cdh = tmp;
|
|
break;
|
|
}
|
|
}
|
|
if (cdh)
|
|
{
|
|
/* Remove from submitted list */
|
|
old_heap = cdh->heap;
|
|
old_start_cpu_handle = cdh->start_cpu_handle;
|
|
old_start_gpu_handle = cdh->start_gpu_handle;
|
|
struct command_descriptor_heap *prev = cdh->prev_submitted;
|
|
struct command_descriptor_heap *next = cdh->next_submitted;
|
|
if (prev)
|
|
{
|
|
prev->next_submitted = next;
|
|
}
|
|
else
|
|
{
|
|
g->first_submitted_command_descriptor_heap = next;
|
|
}
|
|
if (next)
|
|
{
|
|
next->prev_submitted = prev;
|
|
}
|
|
else
|
|
{
|
|
g->last_submitted_command_descriptor_heap = prev;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* No available heap available for reuse, allocate new */
|
|
cdh = PushStructNoZero(g->command_descriptor_heaps_arena, struct command_descriptor_heap);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(cdh);
|
|
|
|
if (old_heap)
|
|
{
|
|
cdh->heap = old_heap;
|
|
cdh->start_cpu_handle = old_start_cpu_handle;
|
|
cdh->start_gpu_handle = old_start_gpu_handle;
|
|
}
|
|
else
|
|
{
|
|
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
|
|
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
|
desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
|
|
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
|
|
HRESULT hr = ID3D12Device_CreateDescriptorHeap(g->device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create GPU descriptor heap"));
|
|
}
|
|
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle);
|
|
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_gpu_handle);
|
|
}
|
|
|
|
/* CopyCPU heap */
|
|
{
|
|
P_Lock lock = P_LockS(&dh_cpu->mutex);
|
|
ID3D12Device_CopyDescriptorsSimple(g->device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Insert into command list */
|
|
cdh->next_in_command_list = cl->first_command_descriptor_heap;
|
|
cl->first_command_descriptor_heap = cdh;
|
|
|
|
return cdh;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Command buffer
|
|
* ========================== */
|
|
|
|
u64 command_buffer_hash_from_size(u64 size)
|
|
{
|
|
u64 hash = RandU64FromSeed(size);
|
|
return hash;
|
|
}
|
|
|
|
u64 align_up_pow2(u64 v)
|
|
{
|
|
u64 result = 0;
|
|
if (v > 0)
|
|
{
|
|
result = v - 1;
|
|
result |= result >> 1;
|
|
result |= result >> 2;
|
|
result |= result >> 4;
|
|
result |= result >> 8;
|
|
result |= result >> 16;
|
|
result |= result >> 32;
|
|
++result;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1)
|
|
struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
|
|
/* Data length should be a multiple of stride */
|
|
Assert(data_len % data_stride == 0);
|
|
|
|
/* Determine size */
|
|
u64 size = MaxU64(DX12_COMMAND_BUFFER_MIN_SIZE, align_up_pow2(data_len));
|
|
|
|
/* Allocate buffer */
|
|
struct command_buffer_group *cb_group = 0;
|
|
struct command_buffer *cb = 0;
|
|
struct dx12_resource *r = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&g->command_buffers_mutex);
|
|
|
|
{
|
|
u64 group_hash = command_buffer_hash_from_size(size);
|
|
DictEntry *cb_group_entry = EnsureDictEntry(g->command_buffers_arena, g->command_buffers_dict, group_hash);
|
|
cb_group = (struct command_buffer_group *)cb_group_entry->value;
|
|
if (!cb_group)
|
|
{
|
|
/* Create group */
|
|
cb_group = PushStruct(g->command_buffers_arena, struct command_buffer_group);
|
|
cb_group_entry->value = (u64)cb_group;
|
|
}
|
|
}
|
|
/* Find first command buffer ready for reuse */
|
|
for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted)
|
|
{
|
|
/* TODO: Cache completed fence values */
|
|
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
|
|
if (completed_fence_value >= tmp->submitted_fence_target)
|
|
{
|
|
cb = tmp;
|
|
break;
|
|
}
|
|
}
|
|
if (cb)
|
|
{
|
|
/* Remove from submitted list */
|
|
r = cb->resource;
|
|
struct command_buffer *prev = cb->prev_submitted;
|
|
struct command_buffer *next = cb->next_submitted;
|
|
if (prev)
|
|
{
|
|
prev->next_submitted = next;
|
|
}
|
|
else
|
|
{
|
|
cb_group->first_submitted = next;
|
|
}
|
|
if (next)
|
|
{
|
|
next->prev_submitted = prev;
|
|
}
|
|
else
|
|
{
|
|
cb_group->last_submitted = prev;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Allocate new */
|
|
cb = PushStructNoZero(g->command_buffers_arena, struct command_buffer);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
ZeroStruct(cb);
|
|
cb->group = cb_group;
|
|
cb->size = data_len;
|
|
|
|
/* Create upload heap */
|
|
if (!r)
|
|
{
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
|
|
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
desc.Flags = 0;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
desc.Alignment = 0;
|
|
desc.Width = size;
|
|
desc.Height = 1;
|
|
desc.DepthOrArraySize = 1;
|
|
desc.MipLevels = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
|
|
|
|
r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
|
|
r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
}
|
|
cb->resource = r;
|
|
|
|
{
|
|
struct D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI;
|
|
desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
|
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
|
desc.Buffer.FirstElement = 0;
|
|
desc.Buffer.NumElements = MaxU32(data_len / data_stride, 1);
|
|
desc.Buffer.StructureByteStride = data_stride;
|
|
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->resource, &desc, r->srv_descriptor->handle);
|
|
}
|
|
|
|
/* Write data to resource */
|
|
{
|
|
D3D12_RANGE read_range = ZI;
|
|
void *dst = 0;
|
|
HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst);
|
|
if (FAILED(hr) || !dst)
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to map command buffer resource"));
|
|
}
|
|
CopyBytes(dst, data, data_len);
|
|
ID3D12Resource_Unmap(cb->resource->resource, 0, 0);
|
|
}
|
|
|
|
/* Insert into command list */
|
|
cb->next_in_command_list = cl->first_command_buffer;
|
|
cl->first_command_buffer = cb;
|
|
|
|
return cb;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Wait job
|
|
* ========================== */
|
|
|
|
P_JobDef(dx12_wait_fence_job, job)
|
|
{
|
|
__prof;
|
|
struct dx12_wait_fence_job_sig *sig = job.sig;
|
|
ID3D12Fence *fence = sig->fence;
|
|
u64 target = sig->target;
|
|
if (ID3D12Fence_GetCompletedValue(fence) < target)
|
|
{
|
|
/* TODO: Pool events */
|
|
HANDLE event = CreateEvent(0, 0, 0, 0);
|
|
ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event);
|
|
WaitForSingleObject(event, INFINITE);
|
|
CloseHandle(event);
|
|
}
|
|
}
|
|
|
|
/* ========================== *
|
|
* Texture
|
|
* ========================== */
|
|
|
|
GPU_Resource *GPU_AllocTexture(GPU_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
if (size.x <= 0 || size.y <= 0)
|
|
{
|
|
P_Panic(Lit("Tried to create texture with dimension <= 0"));
|
|
}
|
|
LocalPersist const DXGI_FORMAT formats[] = {
|
|
[GP_TEXTURE_FORMAT_R8_UNORM] = DXGI_FORMAT_R8_UNORM,
|
|
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB,
|
|
[GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = DXGI_FORMAT_R16G16B16A16_FLOAT
|
|
};
|
|
|
|
DXGI_FORMAT dxgi_format = ZI;
|
|
if (format >= 0 && format < (i32)countof(formats))
|
|
{
|
|
dxgi_format = formats[format];
|
|
}
|
|
if (format == 0)
|
|
{
|
|
P_Panic(Lit("Tried to create texture with unknown format"));
|
|
}
|
|
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
|
|
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
desc.Format = dxgi_format;
|
|
desc.Alignment = 0;
|
|
desc.Width = size.x;
|
|
desc.Height = size.y;
|
|
desc.DepthOrArraySize = 1;
|
|
desc.MipLevels = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
|
|
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
|
|
|
|
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
|
|
r->texture_size = size;
|
|
r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
|
|
if (flags & GP_TEXTURE_FLAG_TARGETABLE)
|
|
{
|
|
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
r->rtv_descriptor = descriptor_alloc(g->rtv_heap);
|
|
ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle);
|
|
ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle);
|
|
}
|
|
|
|
/* Upload texture */
|
|
if (initial_data)
|
|
{
|
|
/* TODO: Make wait optional */
|
|
P_Counter counter = ZI;
|
|
struct dx12_upload_job_sig sig = ZI;
|
|
sig.resource = r;
|
|
sig.data = initial_data;
|
|
P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
|
|
return (GPU_Resource *)r;
|
|
}
|
|
|
|
Vec2I32 GPU_GetTextureSize(GPU_Resource *resource)
|
|
{
|
|
struct dx12_resource *r = (struct dx12_resource *)resource;
|
|
return r->texture_size;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Upload
|
|
* ========================== */
|
|
|
|
P_JobDef(dx12_upload_job, job)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct dx12_upload_job_sig *sig = job.sig;
|
|
struct dx12_resource *r = sig->resource;
|
|
void *data = sig->data;
|
|
|
|
Assert(r->state == D3D12_RESOURCE_STATE_COPY_DEST);
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
ID3D12Resource_GetDesc(r->resource, &desc);
|
|
|
|
{
|
|
u64 upload_size = 0;
|
|
u64 upload_row_size = 0;
|
|
u32 upload_num_rows = 0;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
|
|
ID3D12Device_GetCopyableFootprints(g->device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
|
|
D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
|
|
|
|
/* Create upload heap */
|
|
struct dx12_resource *upload = 0;
|
|
{
|
|
D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
|
|
upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
|
|
D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
|
|
D3D12_RESOURCE_DESC upload_desc = ZI;
|
|
upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
upload_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
upload_desc.Alignment = 0;
|
|
upload_desc.Width = upload_size;
|
|
upload_desc.Height = 1;
|
|
upload_desc.DepthOrArraySize = 1;
|
|
upload_desc.MipLevels = 1;
|
|
upload_desc.SampleDesc.Count = 1;
|
|
upload_desc.SampleDesc.Quality = 0;
|
|
D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
|
|
|
|
upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state);
|
|
}
|
|
|
|
struct command_queue *cq = g->command_queues[DX12_QUEUE_COPY_BACKGROUND];
|
|
struct command_list *cl = command_list_open(cq->cl_pool);
|
|
{
|
|
/* Copyto upload heap */
|
|
{
|
|
D3D12_RANGE read_range = ZI;
|
|
void *mapped = 0;
|
|
HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped);
|
|
if (FAILED(hr) || !mapped)
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to map texture upload resource"));
|
|
}
|
|
u8 *dst = (u8 *)mapped + placed_footprint.Offset;
|
|
u8 *src = data;
|
|
|
|
u32 z_size = upload_row_size * upload_num_rows;
|
|
|
|
for (u32 z = 0; z < desc.DepthOrArraySize; ++z)
|
|
{
|
|
u32 z_offset = z * z_size;
|
|
for (u32 y = 0; y < upload_num_rows; ++y)
|
|
{
|
|
CopyBytes(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size);
|
|
}
|
|
}
|
|
ID3D12Resource_Unmap(upload->resource, 0, 0);
|
|
}
|
|
|
|
/* Copyfrom upload heap to texture */
|
|
{
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", Rgb32F(0.2, 0.5, 0.2));
|
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = {
|
|
.pResource = r->resource,
|
|
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
|
|
.SubresourceIndex = 0,
|
|
};
|
|
|
|
D3D12_TEXTURE_COPY_LOCATION src_loc = {
|
|
.pResource = upload->resource,
|
|
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
|
|
.PlacedFootprint = placed_footprint,
|
|
};
|
|
|
|
ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0);
|
|
}
|
|
} u64 fence_target = command_list_close(cl);
|
|
|
|
/* Wait on fence so we know it's safe to release upload heap */
|
|
if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target)
|
|
{
|
|
struct dx12_wait_fence_job_sig wait_sig = ZI;
|
|
wait_sig.fence = cq->submit_fence;
|
|
wait_sig.target = fence_target;
|
|
P_Counter counter = ZI;
|
|
P_Run(1, dx12_wait_fence_job, &wait_sig, P_Pool_Floating, P_Priority_Low, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
|
|
/* Release upload heap now */
|
|
dx12_resource_release_now(upload);
|
|
}
|
|
}
|
|
|
|
/* ========================== *
|
|
* Run utils
|
|
* ========================== */
|
|
|
|
void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline)
|
|
{
|
|
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, pipeline->pso);
|
|
if (pipeline->is_gfx)
|
|
{
|
|
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, pipeline->rootsig);
|
|
}
|
|
else
|
|
{
|
|
ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, pipeline->rootsig);
|
|
}
|
|
cl->cur_pipeline = pipeline;
|
|
}
|
|
|
|
void command_list_set_sig(struct command_list *cl, void *src, u32 size)
|
|
{
|
|
__prof;
|
|
Assert(size % 16 == 0); /* Root constant structs must pad to 16 bytes */
|
|
Assert(size <= 256); /* Only 64 32-bit root constants allowed in signature */
|
|
u32 num32bit = size / 4;
|
|
b32 is_gfx = cl->cur_pipeline->is_gfx;
|
|
for (u32 i = 0; i < num32bit; ++i)
|
|
{
|
|
u32 val = 0;
|
|
CopyBytes(&val, (((u32 *)src) + i), 4);
|
|
if (is_gfx)
|
|
{
|
|
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i);
|
|
}
|
|
else
|
|
{
|
|
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i);
|
|
}
|
|
}
|
|
}
|
|
|
|
struct D3D12_VIEWPORT viewport_from_rect(Rect r)
|
|
{
|
|
struct D3D12_VIEWPORT viewport = ZI;
|
|
viewport.TopLeftX = r.x;
|
|
viewport.TopLeftY = r.y;
|
|
viewport.Width = r.width;
|
|
viewport.Height = r.height;
|
|
viewport.MinDepth = 0.0f;
|
|
viewport.MaxDepth = 1.0f;
|
|
return viewport;
|
|
}
|
|
|
|
D3D12_RECT scissor_from_rect(Rect r)
|
|
{
|
|
D3D12_RECT scissor = ZI;
|
|
scissor.left = r.x;
|
|
scissor.top = r.y;
|
|
scissor.right = r.x + r.width;
|
|
scissor.bottom = r.y + r.height;
|
|
return scissor;
|
|
}
|
|
|
|
D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size)
|
|
{
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = ZI;
|
|
vbv.BufferLocation = cb->resource->gpu_address;
|
|
vbv.SizeInBytes = cb->size;
|
|
vbv.StrideInBytes = vertex_size;
|
|
return vbv;
|
|
}
|
|
|
|
D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format)
|
|
{
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
|
|
ibv.BufferLocation = cb->resource->gpu_address;
|
|
ibv.Format = format;
|
|
ibv.SizeInBytes = cb->size;
|
|
return ibv;
|
|
}
|
|
|
|
struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
|
|
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
|
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
|
|
|
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
|
|
|
D3D12_RESOURCE_DESC desc = ZI;
|
|
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
|
|
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
|
desc.Format = format;
|
|
desc.Alignment = 0;
|
|
desc.Width = size.x;
|
|
desc.Height = size.y;
|
|
desc.DepthOrArraySize = 1;
|
|
desc.MipLevels = 1;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
|
|
|
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
|
|
r->srv_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
r->uav_descriptor = descriptor_alloc(g->cbv_srv_uav_heap);
|
|
r->rtv_descriptor = descriptor_alloc(g->rtv_heap);
|
|
ID3D12Device_CreateShaderResourceView(g->device, r->resource, 0, r->srv_descriptor->handle);
|
|
ID3D12Device_CreateUnorderedAccessView(g->device, r->resource, 0, 0, r->uav_descriptor->handle);
|
|
ID3D12Device_CreateRenderTargetView(g->device, r->resource, 0, r->rtv_descriptor->handle);
|
|
|
|
r->texture_size = size;
|
|
return r;
|
|
}
|
|
|
|
/* Calculate the view projection matrix */
|
|
Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height)
|
|
{
|
|
Mat4x4 projection = Mat4x4FromOrtho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0);
|
|
Mat4x4 view4x4 = Mat4x4FromXform(view);
|
|
return MulMat4x4(projection, view4x4);
|
|
}
|
|
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct D3D12_GPU_DESCRIPTOR_HANDLE result = ZI;
|
|
result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * g->desc_sizes[descriptor->heap->type];
|
|
return result;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Render sig
|
|
* ========================== */
|
|
|
|
struct render_sig *render_sig_alloc(void)
|
|
{
|
|
__prof;
|
|
struct render_sig *sig = 0;
|
|
{
|
|
Arena *arena = AllocArena(Mebi(64));
|
|
sig = PushStruct(arena, struct render_sig);
|
|
sig->arena = arena;
|
|
}
|
|
|
|
sig->material_instance_descs_arena = AllocArena(Gibi(1));
|
|
sig->material_grid_descs_arena = AllocArena(Gibi(1));
|
|
sig->ui_rect_instance_descs_arena = AllocArena(Gibi(1));
|
|
sig->ui_shape_verts_arena = AllocArena(Gibi(1));
|
|
sig->ui_shape_indices_arena = AllocArena(Gibi(1));
|
|
|
|
return sig;
|
|
}
|
|
|
|
void render_sig_reset(struct render_sig *sig)
|
|
{
|
|
__prof;
|
|
|
|
/* Reset material instances */
|
|
sig->num_material_instance_descs = 0;
|
|
ResetArena(sig->material_instance_descs_arena);
|
|
|
|
/* Reset UI rect instances */
|
|
sig->num_ui_rect_instance_descs = 0;
|
|
ResetArena(sig->ui_rect_instance_descs_arena);
|
|
|
|
/* Reset shapes */
|
|
ResetArena(sig->ui_shape_verts_arena);
|
|
ResetArena(sig->ui_shape_indices_arena);
|
|
|
|
/* Reset grids */
|
|
sig->num_material_grid_descs = 0;
|
|
ResetArena(sig->material_grid_descs_arena);
|
|
}
|
|
|
|
GPU_RenderSig *GPU_AllocRenderSig(void)
|
|
{
|
|
__prof;
|
|
struct render_sig *sig = render_sig_alloc();
|
|
return (GPU_RenderSig *)sig;
|
|
}
|
|
|
|
u32 GPU_PushRenderCmd(GPU_RenderSig *render_sig, GPU_RenderCmdDesc *cmd_desc)
|
|
{
|
|
u32 ret = 0;
|
|
struct render_sig *sig = (struct render_sig *)render_sig;
|
|
if (sig)
|
|
{
|
|
switch (cmd_desc->kind)
|
|
{
|
|
default: break;
|
|
|
|
case GP_RENDER_CMD_KIND_DRAW_MATERIAL:
|
|
{
|
|
struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->material.texture;
|
|
struct material_instance_desc *instance_desc = PushStruct(sig->material_instance_descs_arena, struct material_instance_desc);
|
|
instance_desc->xf = cmd_desc->material.xf;
|
|
instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF;
|
|
instance_desc->clip = cmd_desc->material.clip;
|
|
instance_desc->tint = cmd_desc->material.tint;
|
|
instance_desc->is_light = cmd_desc->material.is_light;
|
|
instance_desc->light_emittance = cmd_desc->material.light_emittance;
|
|
instance_desc->grid_id = cmd_desc->material.grid_cmd_id - 1;
|
|
ret = ++sig->num_material_instance_descs;
|
|
} break;
|
|
|
|
case GP_RENDER_CMD_KIND_DRAW_UI_RECT:
|
|
{
|
|
struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->ui_rect.texture;
|
|
struct ui_rect_instance_desc *instance_desc = PushStruct(sig->ui_rect_instance_descs_arena, struct ui_rect_instance_desc);
|
|
instance_desc->xf = cmd_desc->ui_rect.xf;
|
|
instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF;
|
|
instance_desc->clip = cmd_desc->ui_rect.clip;
|
|
instance_desc->tint = cmd_desc->ui_rect.tint;
|
|
ret = ++sig->num_ui_rect_instance_descs;
|
|
} break;
|
|
|
|
case GP_RENDER_CMD_KIND_DRAW_UI_SHAPE:
|
|
{
|
|
u32 color = cmd_desc->ui_shape.color;
|
|
K_ShapeVert *verts = PushStructsNoZero(sig->ui_shape_verts_arena, K_ShapeVert, cmd_desc->ui_shape.vertices.count);
|
|
u32 *indices = PushStructsNoZero(sig->ui_shape_indices_arena, u32, cmd_desc->ui_shape.indices.count);
|
|
for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i)
|
|
{
|
|
K_ShapeVert *v = &verts[i];
|
|
v->pos = cmd_desc->ui_shape.vertices.points[i];
|
|
v->color_srgb = color;
|
|
}
|
|
u32 vert_offset = verts - (K_ShapeVert *)ArenaBase(sig->ui_shape_verts_arena);
|
|
for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i)
|
|
{
|
|
indices[i] = cmd_desc->ui_shape.indices.indices[i] + vert_offset;
|
|
}
|
|
} break;
|
|
|
|
case GP_RENDER_CMD_KIND_PUSH_GRID:
|
|
{
|
|
struct material_grid_desc *grid_desc = PushStruct(sig->material_grid_descs_arena, struct material_grid_desc);
|
|
grid_desc->line_thickness = cmd_desc->grid.line_thickness;
|
|
grid_desc->line_spacing = cmd_desc->grid.line_spacing;
|
|
grid_desc->offset = cmd_desc->grid.offset;
|
|
grid_desc->bg0_color = cmd_desc->grid.bg0_color;
|
|
grid_desc->bg1_color = cmd_desc->grid.bg1_color;
|
|
grid_desc->line_color = cmd_desc->grid.line_color;
|
|
grid_desc->x_color = cmd_desc->grid.x_color;
|
|
grid_desc->y_color = cmd_desc->grid.y_color;
|
|
ret = ++sig->num_material_grid_descs;
|
|
} break;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Render
|
|
* ========================== */
|
|
|
|
GPU_Resource *GPU_RunRender(GPU_RenderSig *gp_render_sig, GPU_RenderParams params)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
struct render_sig *rsig = (struct render_sig *)gp_render_sig;
|
|
++rsig->frame_index;
|
|
|
|
Vec2I32 ui_size = VEC2I32(MaxI32(params.ui_size.x, 1), MaxI32(params.ui_size.y, 1));
|
|
Vec2I32 render_size = VEC2I32(MaxI32(params.render_size.x, 1), MaxI32(params.render_size.y, 1));
|
|
Xform world_to_render_xf = params.world_to_render_xf;
|
|
Xform render_to_ui_xf = params.render_to_ui_xf;
|
|
|
|
Rect ui_viewport = RectFromVec2(VEC2(0, 0), VEC2(ui_size.x, ui_size.y));
|
|
Rect render_viewport = RectFromVec2(VEC2(0, 0), VEC2(render_size.x, render_size.y));
|
|
|
|
|
|
/* Allocate render buffers */
|
|
if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size))
|
|
{
|
|
__profn("Release sig resources");
|
|
fenced_release(rsig->albedo, FENCED_RELEASE_KIND_RESOURCE);
|
|
fenced_release(rsig->emittance, FENCED_RELEASE_KIND_RESOURCE);
|
|
fenced_release(rsig->emittance_flood_read, FENCED_RELEASE_KIND_RESOURCE);
|
|
fenced_release(rsig->emittance_flood_target, FENCED_RELEASE_KIND_RESOURCE);
|
|
fenced_release(rsig->shade_read, FENCED_RELEASE_KIND_RESOURCE);
|
|
fenced_release(rsig->shade_target, FENCED_RELEASE_KIND_RESOURCE);
|
|
rsig->shade_target = 0;
|
|
}
|
|
if (!rsig->shade_target)
|
|
{
|
|
__profn("Allocate sig resources");
|
|
rsig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
rsig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
rsig->emittance_flood_read = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
rsig->emittance_flood_target = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
rsig->shade_read = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
rsig->shade_target = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
|
}
|
|
|
|
/* Allocate ui buffers */
|
|
if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size))
|
|
{
|
|
fenced_release(rsig->ui_target, FENCED_RELEASE_KIND_RESOURCE);
|
|
rsig->ui_target = 0;
|
|
}
|
|
if (!rsig->ui_target)
|
|
{
|
|
rsig->ui_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, ui_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
|
|
}
|
|
|
|
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
|
|
struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_material"));
|
|
struct pipeline *flood_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_flood"));
|
|
struct pipeline *shade_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shade"));
|
|
struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
|
|
struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_ui"));
|
|
struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shape"));
|
|
struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
|
|
struct command_list *cl = command_list_open(cq->cl_pool);
|
|
{
|
|
__profn("Run render");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Run render", Rgb32F(0.5, 0.2, 0.2));
|
|
Mat4x4 world_to_render_vp_matrix = calculate_vp(world_to_render_xf, render_viewport.width, render_viewport.height);
|
|
Mat4x4 ui_vp_matrix = calculate_vp(XformIdentity, ui_viewport.width, ui_viewport.height);
|
|
Mat4x4 blit_vp_matrix = ZI;
|
|
{
|
|
Xform xf = render_to_ui_xf;
|
|
xf = ScaleXform(xf, VEC2(render_size.x, render_size.y));
|
|
xf = TranslateXform(xf, VEC2(0.5, 0.5));
|
|
blit_vp_matrix = calculate_vp(xf, ui_viewport.width, ui_viewport.height);
|
|
}
|
|
|
|
/* Upload dummmy vert & index buffer */
|
|
/* TODO: Make these static */
|
|
/* Dummy vertex buffer */
|
|
LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
|
|
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0);
|
|
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices);
|
|
|
|
/* Process sig data into uploadable data */
|
|
K_MaterialInstance *material_instances = PushStructsNoZero(scratch.arena, K_MaterialInstance, rsig->num_material_instance_descs);
|
|
K_UiInstance *ui_rect_instances = PushStructsNoZero(scratch.arena, K_UiInstance, rsig->num_ui_rect_instance_descs);
|
|
K_MaterialGrid *grids = PushStructsNoZero(scratch.arena, K_MaterialGrid, rsig->num_material_grid_descs);
|
|
{
|
|
__profn("Process sig data");
|
|
|
|
/* Process material instances */
|
|
{
|
|
__profn("Process material instances");
|
|
for (u32 i = 0; i < rsig->num_material_instance_descs; ++i)
|
|
{
|
|
struct material_instance_desc *desc = &((struct material_instance_desc *)ArenaBase(rsig->material_instance_descs_arena))[i];
|
|
K_MaterialInstance *instance = &material_instances[i];
|
|
instance->tex_nurid = desc->texture_id;
|
|
instance->grid_id = desc->grid_id;
|
|
instance->xf = desc->xf;
|
|
instance->uv0 = desc->clip.p0;
|
|
instance->uv1 = desc->clip.p1;
|
|
instance->tint_srgb = desc->tint;
|
|
instance->is_light = desc->is_light;
|
|
instance->light_emittance_srgb = desc->light_emittance;
|
|
}
|
|
}
|
|
|
|
/* Process ui rect instances */
|
|
{
|
|
__profn("Process ui rect instances");
|
|
for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i)
|
|
{
|
|
struct ui_rect_instance_desc *desc = &((struct ui_rect_instance_desc *)ArenaBase(rsig->ui_rect_instance_descs_arena))[i];
|
|
K_UiInstance *instance = &ui_rect_instances[i];
|
|
instance->tex_nurid = desc->texture_id;
|
|
instance->xf = desc->xf;
|
|
instance->uv0 = desc->clip.p0;
|
|
instance->uv1 = desc->clip.p1;
|
|
instance->tint_srgb = desc->tint;
|
|
}
|
|
}
|
|
|
|
/* Process grids */
|
|
{
|
|
__profn("Process grids");
|
|
for (u32 i = 0; i < rsig->num_material_grid_descs; ++i)
|
|
{
|
|
struct material_grid_desc *desc = &((struct material_grid_desc *)ArenaBase(rsig->material_grid_descs_arena))[i];
|
|
K_MaterialGrid *grid = &grids[i];
|
|
grid->line_thickness = desc->line_thickness;
|
|
grid->line_spacing = desc->line_spacing;
|
|
grid->offset = desc->offset;
|
|
grid->bg0_srgb = desc->bg0_color;
|
|
grid->bg1_srgb = desc->bg1_color;
|
|
grid->line_srgb = desc->line_color;
|
|
grid->x_srgb = desc->x_color;
|
|
grid->y_srgb = desc->y_color;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Upload buffers */
|
|
u64 num_ui_shape_verts = rsig->ui_shape_verts_arena->pos / sizeof(K_ShapeVert);
|
|
u64 num_ui_shape_indices = rsig->ui_shape_indices_arena->pos / sizeof(u32);
|
|
struct command_buffer *material_instance_buffer = command_list_push_buffer(cl, rsig->num_material_instance_descs, material_instances);
|
|
struct command_buffer *ui_rect_instance_buffer = command_list_push_buffer(cl, rsig->num_ui_rect_instance_descs, ui_rect_instances);
|
|
struct command_buffer *ui_shape_verts_buffer = command_list_push_buffer(cl, num_ui_shape_verts, (K_ShapeVert *)ArenaBase(rsig->ui_shape_verts_arena));
|
|
struct command_buffer *ui_shape_indices_buffer = command_list_push_buffer(cl, num_ui_shape_indices, (u32 *)ArenaBase(rsig->ui_shape_indices_arena));
|
|
struct command_buffer *grid_buffer = command_list_push_buffer(cl, rsig->num_material_grid_descs, grids);
|
|
|
|
/* Upload descriptor heap */
|
|
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap);
|
|
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
|
|
|
|
/* Prep for material pass */
|
|
{
|
|
/* Barrier */
|
|
{
|
|
struct dx12_resource_barrier_desc barriers[] = {
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET }
|
|
};
|
|
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = {
|
|
rsig->albedo->rtv_descriptor->handle,
|
|
rsig->emittance->rtv_descriptor->handle,
|
|
};
|
|
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
|
|
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0);
|
|
}
|
|
/* Clear */
|
|
{
|
|
__profn("Clear gbuffers");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", Rgb32F(0.5, 0.2, 0.2));
|
|
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->albedo->rtv_descriptor->handle, clear_color, 0, 0);
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->emittance->rtv_descriptor->handle, clear_color, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* Material pass */
|
|
if (material_pipeline->success)
|
|
{
|
|
__profn("Material pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Material pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, material_pipeline);
|
|
|
|
/* Set Rasterizer State */
|
|
D3D12_VIEWPORT viewport = viewport_from_rect(render_viewport);
|
|
D3D12_RECT scissor = scissor_from_rect(render_viewport);
|
|
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
|
|
|
|
/* Set sig */
|
|
K_MaterialSig sig = ZI;
|
|
sig.projection = world_to_render_vp_matrix;
|
|
sig.instances_urid = material_instance_buffer->resource->srv_descriptor->index;
|
|
sig.grids_urid = grid_buffer->resource->srv_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Draw */
|
|
u32 instance_count = material_instance_buffer->size / sizeof(K_MaterialInstance);
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0);
|
|
}
|
|
|
|
/* Prep for flood pass */
|
|
{
|
|
/* Barrier */
|
|
{
|
|
struct dx12_resource_barrier_desc barriers[] = {
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
|
|
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 },
|
|
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
|
|
};
|
|
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
|
|
}
|
|
}
|
|
|
|
/* Flood pass */
|
|
if (flood_pipeline->success && !params.effects_disabled)
|
|
{
|
|
__profn("Flood pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, flood_pipeline);
|
|
|
|
i32 step_length = -1;
|
|
|
|
/* TODO: Remove this */
|
|
u64 max_steps = GetGstat(GSTAT_DEBUG_STEPS);
|
|
u64 step = 0;
|
|
while (step_length != 0 && step < max_steps)
|
|
{
|
|
__profn("Flood step");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Flood step", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* UAV barrier */
|
|
{
|
|
struct dx12_resource_barrier_desc barriers[] = {
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 }
|
|
};
|
|
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
|
|
}
|
|
|
|
/* Set sig */
|
|
K_FloodSig sig = ZI;
|
|
sig.step_len = step_length;
|
|
sig.emittance_tex_urid = rsig->emittance->srv_descriptor->index;
|
|
sig.read_flood_tex_urid = rsig->emittance_flood_read->uav_descriptor->index;
|
|
sig.target_flood_tex_urid = rsig->emittance_flood_target->uav_descriptor->index;
|
|
sig.tex_width = render_size.x;
|
|
sig.tex_height = render_size.y;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1);
|
|
|
|
/* Swap buffers */
|
|
struct dx12_resource *swp = rsig->emittance_flood_read;
|
|
rsig->emittance_flood_read = rsig->emittance_flood_target;
|
|
rsig->emittance_flood_target = swp;
|
|
|
|
/* Update step */
|
|
if (step_length == -1)
|
|
{
|
|
step_length = MaxI32(render_size.x, render_size.y) / 2;
|
|
}
|
|
else
|
|
{
|
|
step_length /= 2;
|
|
}
|
|
++step;
|
|
}
|
|
}
|
|
|
|
/* Prep for shade pass */
|
|
{
|
|
/* Barrier */
|
|
{
|
|
struct dx12_resource_barrier_desc barriers[] = {
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
|
|
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 },
|
|
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 },
|
|
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
|
|
};
|
|
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
|
|
}
|
|
/* Clear */
|
|
{
|
|
__profn("Clear shade target");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Clear shade target", Rgb32F(0.5, 0.2, 0.2));
|
|
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(rsig->shade_target->uav_descriptor, descriptor_heap), rsig->shade_target->uav_descriptor->handle, rsig->shade_target->resource, clear_color, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* Shade pass */
|
|
if (shade_pipeline->success)
|
|
{
|
|
__profn("Shade pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, shade_pipeline);
|
|
|
|
u32 shade_flags = K_SHADE_FLAG_NONE;
|
|
if (params.effects_disabled)
|
|
{
|
|
shade_flags |= K_SHADE_FLAG_DISABLE_EFFECTS;
|
|
}
|
|
|
|
/* Set sig */
|
|
K_ShadeSig sig = ZI;
|
|
sig.flags = shade_flags;
|
|
sig.tex_width = render_size.x;
|
|
sig.tex_height = render_size.y;
|
|
sig.frame_seed = VEC4I32((u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
|
|
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
|
|
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
|
|
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF));
|
|
sig.frame_index = rsig->frame_index;
|
|
sig.camera_offset = world_to_render_xf.og;
|
|
sig.albedo_tex_urid = rsig->albedo->srv_descriptor->index;
|
|
sig.emittance_tex_urid = rsig->emittance->srv_descriptor->index;
|
|
sig.emittance_flood_tex_urid = rsig->emittance_flood_read->srv_descriptor->index;
|
|
sig.read_tex_urid = rsig->shade_read->uav_descriptor->index;
|
|
sig.target_tex_urid = rsig->shade_target->uav_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Dispatch */
|
|
ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1);
|
|
|
|
/* Swap */
|
|
struct dx12_resource *swp = rsig->shade_read;
|
|
rsig->shade_read = rsig->shade_target;
|
|
rsig->shade_target = swp;
|
|
}
|
|
|
|
/* Prep for UI pass */
|
|
{
|
|
/* Barrier */
|
|
{
|
|
struct dx12_resource_barrier_desc barriers[] = {
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 },
|
|
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->ui_target, D3D12_RESOURCE_STATE_RENDER_TARGET }
|
|
};
|
|
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
|
|
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &rsig->ui_target->rtv_descriptor->handle, 0, 0);
|
|
}
|
|
/* Clear */
|
|
{
|
|
__profn("Clear ui target");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Clear ui target", Rgb32F(0.5, 0.2, 0.2));
|
|
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->ui_target->rtv_descriptor->handle, clear_color, 0, 0);
|
|
}
|
|
}
|
|
|
|
/* UI blit pass */
|
|
if (blit_pipeline->success)
|
|
{
|
|
__profn("UI blit pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, blit_pipeline);
|
|
|
|
/* Set Rasterizer State */
|
|
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
|
|
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
|
|
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
|
|
|
|
/* Set sig */
|
|
K_BlitSig sig = ZI;
|
|
sig.projection = blit_vp_matrix;
|
|
sig.flags = K_BLIT_FLAG_TONE_MAP | K_BLIT_FLAG_GAMMA_CORRECT;
|
|
sig.exposure = 2.0;
|
|
sig.gamma = (f32)2.2;
|
|
sig.tex_urid = rsig->shade_read->uav_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Draw */
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
|
|
}
|
|
|
|
/* UI rect pass */
|
|
if (ui_pipeline->success)
|
|
{
|
|
__profn("UI rect pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "UI rect pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, ui_pipeline);
|
|
|
|
/* Set Rasterizer State */
|
|
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
|
|
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
|
|
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
|
|
|
|
/* Set sig */
|
|
K_UiSig sig = ZI;
|
|
sig.projection = ui_vp_matrix;
|
|
sig.instances_urid = ui_rect_instance_buffer->resource->srv_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Draw */
|
|
u32 instance_count = ui_rect_instance_buffer->size / sizeof(K_UiInstance);
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0);
|
|
}
|
|
|
|
/* UI shape pass */
|
|
if (shape_pipeline->success)
|
|
{
|
|
__profn("UI shape pass");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "UI shape pass", Rgb32F(0.5, 0.2, 0.2));
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, shape_pipeline);
|
|
|
|
/* Set Rasterizer State */
|
|
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
|
|
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
|
|
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
|
|
|
|
/* Set sig */
|
|
K_ShapeSig sig = ZI;
|
|
sig.projection = ui_vp_matrix;
|
|
sig.verts_urid = ui_shape_verts_buffer->resource->srv_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Draw */
|
|
u32 index_count = ui_shape_indices_buffer->size / sizeof(u32);
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(ui_shape_indices_buffer, DXGI_FORMAT_R32_UINT);
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, index_count, 1, 0, 0, 0);
|
|
}
|
|
}
|
|
command_list_close(cl);
|
|
pipeline_scope_end(pipeline_scope);
|
|
|
|
render_sig_reset(rsig);
|
|
EndScratch(scratch);
|
|
|
|
return (GPU_Resource *)rsig->ui_target;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Memory info
|
|
* ========================== */
|
|
|
|
GPU_MemoryInfo GPU_QueryMemoryInfo(void)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
GPU_MemoryInfo result = ZI;
|
|
|
|
HRESULT hr = 0;
|
|
IDXGIAdapter3 *dxgiAdapter3 = 0;
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
hr = IDXGIAdapter_QueryInterface(g->adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
|
|
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
|
|
result.local_used = info.CurrentUsage;
|
|
result.local_budget = info.Budget;
|
|
}
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
|
|
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
|
|
result.non_local_used = info.CurrentUsage;
|
|
result.non_local_budget = info.Budget;
|
|
}
|
|
if (dxgiAdapter3)
|
|
{
|
|
IDXGIAdapter_Release(dxgiAdapter3);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/* ========================== *
|
|
* Swapchain
|
|
* ========================== */
|
|
|
|
void swapchain_init_resources(struct swapchain *swapchain)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
|
|
{
|
|
ID3D12Resource *resource = 0;
|
|
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to get swapchain buffer"));
|
|
}
|
|
struct swapchain_buffer *sb = &swapchain->buffers[i];
|
|
ZeroStruct(sb);
|
|
sb->swapchain = swapchain;
|
|
sb->resource = resource;
|
|
sb->rtv_descriptor = descriptor_alloc(g->rtv_heap);
|
|
sb->state = D3D12_RESOURCE_STATE_COMMON;
|
|
ID3D12Device_CreateRenderTargetView(g->device, sb->resource, 0, sb->rtv_descriptor->handle);
|
|
}
|
|
}
|
|
|
|
GPU_Swapchain *GPU_AllocSwapchain(P_Window *window, Vec2I32 resolution)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
HRESULT hr = 0;
|
|
HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
|
|
struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
|
|
|
|
struct swapchain *swapchain = 0;
|
|
{
|
|
P_Lock lock = P_LockE(&g->swapchains_mutex);
|
|
if (g->first_free_swapchain)
|
|
{
|
|
swapchain = g->first_free_swapchain;
|
|
g->first_free_swapchain = swapchain->next_free;
|
|
}
|
|
else
|
|
{
|
|
swapchain = PushStruct(g->swapchains_arena, struct swapchain);
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Create swapchain1 */
|
|
IDXGISwapChain1 *swapchain1 = 0;
|
|
{
|
|
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
|
|
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
|
desc.Width = resolution.x;
|
|
desc.Height = resolution.y;
|
|
desc.SampleDesc.Count = 1;
|
|
desc.SampleDesc.Quality = 0;
|
|
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
|
|
desc.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT;
|
|
desc.Scaling = DXGI_SCALING_NONE;
|
|
desc.Flags = DX12_SWAPCHAIN_FLAGS;
|
|
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
|
|
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
|
|
hr = IDXGIFactory2_CreateSwapChainForHwnd(g->factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create IDXGISwapChain1"));
|
|
}
|
|
}
|
|
|
|
/* Upgrade to swapchain3 */
|
|
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
|
|
if (FAILED(hr))
|
|
{
|
|
P_Panic(Lit("Failed to create IDXGISwapChain3"));
|
|
}
|
|
|
|
/* Create waitable object */
|
|
#if DX12_WAIT_FRAME_LATENCY > 0
|
|
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, DX12_WAIT_FRAME_LATENCY);
|
|
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
|
|
Assert(swapchain->waitable);
|
|
#endif
|
|
|
|
/* Disable Alt+Enter changing monitor resolution to match window size */
|
|
IDXGIFactory_MakeWindowAssociation(g->factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
|
|
|
|
IDXGISwapChain1_Release(swapchain1);
|
|
swapchain->hwnd = hwnd;
|
|
|
|
swapchain_init_resources(swapchain);
|
|
|
|
return (GPU_Swapchain *)swapchain;
|
|
}
|
|
|
|
void GPU_ReleaseSwapchain(GPU_Swapchain *gp_swapchain)
|
|
{
|
|
/* TODO */
|
|
(UNUSED)gp_swapchain;
|
|
}
|
|
|
|
void GPU_WaitOnSwapchain(GPU_Swapchain *gp_swapchain)
|
|
{
|
|
#if DX12_WAIT_FRAME_LATENCY > 0
|
|
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
|
|
if (swapchain->waitable)
|
|
{
|
|
WaitForSingleObjectEx(swapchain->waitable, 1000, 1);
|
|
}
|
|
#else
|
|
(UNUSED)gp_swapchain;
|
|
#endif
|
|
}
|
|
|
|
struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
resolution.x = MaxI32(resolution.x, 1);
|
|
resolution.y = MaxI32(resolution.y, 1);
|
|
b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
|
|
if (should_rebuild)
|
|
{
|
|
HRESULT hr = 0;
|
|
struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
|
|
/* Lock direct queue submissions (in case any write to backbuffer) */
|
|
/* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */
|
|
P_Lock lock = P_LockE(&cq->submit_fence_mutex);
|
|
//DEBUGBREAKABLE;
|
|
//P_Lock lock = P_LockE(&g->global_command_list_record_mutex);
|
|
{
|
|
/* Flush direct queue */
|
|
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
|
|
{
|
|
HANDLE event = CreateEvent(0, 0, 0, 0);
|
|
ID3D12Fence_SetEventOnCompletion(cq->submit_fence, cq->submit_fence_target, event);
|
|
WaitForSingleObject(event, INFINITE);
|
|
CloseHandle(event);
|
|
}
|
|
|
|
/* Release buffers */
|
|
for (u32 i = 0; i < countof(swapchain->buffers); ++i)
|
|
{
|
|
struct swapchain_buffer *sb = &swapchain->buffers[i];
|
|
descriptor_release(sb->rtv_descriptor);
|
|
ID3D12Resource_Release(sb->resource);
|
|
}
|
|
|
|
/* Resize buffers */
|
|
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS);
|
|
if (FAILED(hr))
|
|
{
|
|
/* TODO: Don't panic */
|
|
P_Panic(Lit("Failed to resize swapchain"));
|
|
}
|
|
}
|
|
P_Unlock(&lock);
|
|
|
|
swapchain_init_resources(swapchain);
|
|
|
|
swapchain->resolution = resolution;
|
|
}
|
|
|
|
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
|
|
return &swapchain->buffers[backbuffer_index];
|
|
}
|
|
|
|
/* ========================== *
|
|
* Present
|
|
* ========================== */
|
|
|
|
void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf)
|
|
{
|
|
__prof;
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
|
|
struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
|
|
if (blit_pipeline->success)
|
|
{
|
|
struct command_queue *cq = g->command_queues[DX12_QUEUE_DIRECT];
|
|
struct command_list *cl = command_list_open(cq->cl_pool);
|
|
{
|
|
__profn("Present blit");
|
|
__profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2));
|
|
struct swapchain *swapchain = dst->swapchain;
|
|
|
|
/* Upload dummmy vert & index buffer */
|
|
/* TODO: Make these static */
|
|
/* Dummy vertex buffer */
|
|
LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
|
|
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0);
|
|
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices);
|
|
|
|
/* Upload descriptor heap */
|
|
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, g->cbv_srv_uav_heap);
|
|
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
|
|
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
|
|
|
|
Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y));
|
|
D3D12_VIEWPORT viewport = viewport_from_rect(viewport_rect);
|
|
D3D12_RECT scissor = scissor_from_rect(viewport_rect);
|
|
|
|
Mat4x4 vp_matrix = ZI;
|
|
{
|
|
Xform xf = src_xf;
|
|
xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y));
|
|
xf = TranslateXform(xf, VEC2(0.5, 0.5));
|
|
vp_matrix = calculate_vp(xf, viewport.Width, viewport.Height);
|
|
}
|
|
|
|
/* Transition dst to render target */
|
|
{
|
|
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
|
|
rtb.pResource = dst->resource;
|
|
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rtb.StateBefore = dst->state;
|
|
rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
|
|
struct D3D12_RESOURCE_BARRIER rb = ZI;
|
|
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb.Flags = 0;
|
|
rb.Transition = rtb;
|
|
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
|
|
dst->state = rtb.StateAfter;
|
|
}
|
|
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0);
|
|
|
|
/* Clear */
|
|
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
|
|
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0);
|
|
|
|
/* Bind pipeline */
|
|
command_list_set_pipeline(cl, blit_pipeline);
|
|
|
|
/* Set Rasterizer State */
|
|
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
|
|
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
|
|
|
|
/* Set sig */
|
|
K_BlitSig sig = ZI;
|
|
sig.projection = vp_matrix;
|
|
sig.flags = K_BLIT_FLAG_NONE;
|
|
sig.tex_urid = src->srv_descriptor->index;
|
|
command_list_set_sig(cl, &sig, sizeof(sig));
|
|
|
|
/* Draw */
|
|
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
|
|
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
|
|
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
|
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
|
|
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
|
|
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
|
|
|
|
/* Transition dst to presentable */
|
|
{
|
|
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
|
|
rtb.pResource = dst->resource;
|
|
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
|
rtb.StateBefore = dst->state;
|
|
rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
|
|
struct D3D12_RESOURCE_BARRIER rb = ZI;
|
|
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
|
|
rb.Flags = 0;
|
|
rb.Transition = rtb;
|
|
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
|
|
dst->state = rtb.StateAfter;
|
|
}
|
|
}
|
|
command_list_close(cl);
|
|
}
|
|
pipeline_scope_end(pipeline_scope);
|
|
}
|
|
|
|
void GPU_PresentSwapchain(GPU_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, GPU_Resource *texture, Xform texture_xf, i32 vsync)
|
|
{
|
|
__prof;
|
|
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
|
|
struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, backbuffer_resolution);
|
|
struct dx12_resource *texture_resource = (struct dx12_resource *)texture;
|
|
|
|
/* Blit */
|
|
present_blit(swapchain_buffer, texture_resource, texture_xf);
|
|
|
|
u32 present_flags = 0;
|
|
if (vsync == 0)
|
|
{
|
|
present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING);
|
|
}
|
|
|
|
/* Present */
|
|
{
|
|
__profn("Present");
|
|
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
|
|
if (!SUCCEEDED(hr))
|
|
{
|
|
Assert(0);
|
|
}
|
|
}
|
|
|
|
#if ProfilingGpu
|
|
{
|
|
__profframe(0);
|
|
|
|
__profn("Mark queue frames");
|
|
/* Lock because frame marks shouldn't occur while command lists are recording */
|
|
P_Lock lock = P_LockE(&g->global_command_list_record_mutex);
|
|
for (u32 i = 0; i < countof(g->command_queues); ++i)
|
|
{
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
__prof_dx12_new_frame(cq->prof);
|
|
}
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
{
|
|
__profn("Collect queues");
|
|
for (u32 i = 0; i < countof(g->command_queues); ++i)
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
__prof_dx12_collect(cq->prof);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* ========================== *
|
|
* Evictor job
|
|
* ========================== */
|
|
|
|
P_JobDef(dx12_evictor_job, _)
|
|
{
|
|
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
u64 completed_targets[DX12_NUM_QUEUES] = ZI;
|
|
|
|
b32 shutdown = 0;
|
|
while (!shutdown)
|
|
{
|
|
{
|
|
__profn("Dx12 evictor run");
|
|
TempArena scratch = BeginScratchNoConflict();
|
|
u64 targets[countof(completed_targets)] = ZI;
|
|
|
|
/* Copyqueued data */
|
|
u32 num_fenced_releases = 0;
|
|
struct fenced_release_data *fenced_releases = 0;
|
|
{
|
|
__profn("Copyqueued releases");
|
|
P_Lock lock = P_LockE(&g->fenced_releases_mutex);
|
|
num_fenced_releases = g->fenced_releases_arena->pos / sizeof(struct fenced_release_data);
|
|
fenced_releases = PushStructsNoZero(scratch.arena, struct fenced_release_data, num_fenced_releases);
|
|
CopyBytes(fenced_releases, ArenaBase(g->fenced_releases_arena), g->fenced_releases_arena->pos);
|
|
ResetArena(g->fenced_releases_arena);
|
|
CopyBytes(targets, g->fenced_release_targets, sizeof(targets));
|
|
P_Unlock(&lock);
|
|
}
|
|
|
|
/* Wait until fences reach target */
|
|
{
|
|
__profn("Check fences");
|
|
for (u32 i = 0; i < countof(targets); ++i)
|
|
{
|
|
while (completed_targets[i] < targets[i])
|
|
{
|
|
struct command_queue *cq = g->command_queues[i];
|
|
completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence);
|
|
if (completed_targets[i] < targets[i])
|
|
{
|
|
__profn("Wait on fence");
|
|
{
|
|
struct dx12_wait_fence_job_sig sig = ZI;
|
|
sig.fence = cq->submit_fence;
|
|
sig.target = targets[i];
|
|
{
|
|
P_Counter counter = ZI;
|
|
P_Run(1, dx12_wait_fence_job, &sig, P_Pool_Floating, P_Priority_Low, &counter);
|
|
P_WaitOnCounter(&counter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Process releases */
|
|
for (u32 i = 0; i < num_fenced_releases; ++i)
|
|
{
|
|
struct fenced_release_data *fr = &fenced_releases[i];
|
|
switch (fr->kind)
|
|
{
|
|
default:
|
|
{
|
|
/* Unknown handle type */
|
|
Assert(0);
|
|
} break;
|
|
|
|
case FENCED_RELEASE_KIND_RESOURCE:
|
|
{
|
|
struct dx12_resource *resource = (struct dx12_resource *)fr->ptr;
|
|
dx12_resource_release_now(resource);
|
|
} break;
|
|
|
|
case FENCED_RELEASE_KIND_PIPELINE:
|
|
{
|
|
struct pipeline *pipeline = (struct pipeline *)fr->ptr;
|
|
pipeline_release_now(pipeline);
|
|
} break;
|
|
}
|
|
}
|
|
EndScratch(scratch);
|
|
}
|
|
P_Lock lock = P_LockE(&g->evictor_wake_mutex);
|
|
{
|
|
while (!g->evictor_shutdown && g->evictor_wake_gen == 0)
|
|
{
|
|
P_WaitOnCv(&g->evictor_wake_cv, &lock);
|
|
}
|
|
shutdown = g->evictor_shutdown;
|
|
g->evictor_wake_gen = 0;
|
|
}
|
|
P_Unlock(&lock);
|
|
}
|
|
}
|