power_play/src/gp/gp_core_dx12.c
2025-07-30 18:28:23 -05:00

3577 lines
135 KiB
C

#pragma warning(push, 0)
# define UNICODE
# define COBJMACROS
# include <Windows.h>
# include <d3d12.h>
# include <dxgidebug.h>
# include <dxgi1_6.h>
# include <combaseapi.h>
# include <d3dcompiler.h>
#pragma warning(pop)
#pragma comment(lib, "d3d12")
#pragma comment(lib, "dxgi")
#pragma comment(lib, "dxguid")
#pragma comment(lib, "d3dcompiler")
#if ProfilingIsEnabled_GPU
/* For RegOpenKeyEx */
# include <winreg.h>
# pragma comment(lib, "advapi32")
#endif
#define DX12_ALLOW_TEARING 1
#define DX12_WAIT_FRAME_LATENCY 1
#define DX12_SWAPCHAIN_FLAGS (((DX12_ALLOW_TEARING != 0) * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | ((DX12_WAIT_FRAME_LATENCY != 0) * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
#define DX12_SWAPCHAIN_BUFFER_COUNT (4)
/* Arbitrary limits */
#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64)
#define DX12_NUM_RTV_DESCRIPTORS (1024 * 1)
#define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64)
#define DX12_MULTI_QUEUE !ProfilingIsEnabled
#if DX12_MULTI_QUEUE
# define DX12_QUEUE_DIRECT 0
# define DX12_QUEUE_COMPUTE 1
# define DX12_QUEUE_COPY 2
# define DX12_QUEUE_COPY_BACKGROUND 3
# define DX12_NUM_QUEUES 4
#else
# define DX12_QUEUE_DIRECT 0
# define DX12_QUEUE_COMPUTE 0
# define DX12_QUEUE_COPY 0
# define DX12_QUEUE_COPY_BACKGROUND 0
# define DX12_NUM_QUEUES 1
#endif
#if RtcIsEnabled
# define DX12_DEBUG 1
# define DX12_SHADER_DEBUG 1
#else
# define DX12_DEBUG 0
# define DX12_SHADER_DEBUG 0
#endif
/* ========================== *
* internal structs
* ========================== */
struct shader_desc {
String file;
String func;
};
struct pipeline_rtv_desc {
DXGI_FORMAT format;
b32 blending;
};
struct pipeline_desc {
String name;
/* If a dxc string is set, then it will be used directly instead of looking up dxc from archive using pipeline name */
String vs_dxc;
String ps_dxc;
String cs_dxc;
struct pipeline_rtv_desc rtvs[8];
};
struct pipeline {
String name;
u64 hash;
b32 success;
b32 is_gfx;
String error;
i64 compilation_time_ns;
/* Lock global pipelines mutex when accessing */
i64 refcount;
ID3D12PipelineState *pso;
ID3D12RootSignature *rootsig;
struct pipeline_desc desc;
struct pipeline *next;
};
struct pipeline_error {
String msg;
struct pipeline_error *next;
};
struct pipeline_include {
String name;
u64 name_hash;
struct pipeline_include *next;
};
struct pipeline_scope {
Arena *arena;
Dict *refs;
struct pipeline_scope *next_free;
};
struct command_queue_desc {
enum D3D12_COMMAND_LIST_TYPE type;
enum D3D12_COMMAND_QUEUE_PRIORITY priority;
String dbg_name;
};
struct command_queue {
struct command_queue_desc desc;
ID3D12CommandQueue *cq;
Arena *arena;
P_Mutex submit_fence_mutex;
u64 submit_fence_target;
ID3D12Fence *submit_fence;
struct command_list_pool *cl_pool;
#if ProfilingIsEnabled_GPU
__prof_dx12_ctx(prof);
#endif
};
struct command_list_pool {
struct command_queue *cq;
Arena *arena;
P_Mutex mutex;
struct command_list *first_submitted_command_list;
struct command_list *last_submitted_command_list;
};
struct command_list {
struct command_queue *cq;
struct command_list_pool *pool;
struct ID3D12CommandAllocator *ca;
struct ID3D12GraphicsCommandList *cl;
P_Lock global_record_lock;
struct pipeline *cur_pipeline;
struct command_descriptor_heap *first_command_descriptor_heap;
struct command_buffer *first_command_buffer;
u64 submitted_fence_target;
struct command_list *prev_submitted;
struct command_list *next_submitted;
};
struct command_descriptor_heap {
D3D12_DESCRIPTOR_HEAP_TYPE type;
ID3D12DescriptorHeap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle;
struct command_descriptor_heap *next_in_command_list;
u64 submitted_fence_target;
struct command_queue *submitted_cq;
struct command_descriptor_heap *prev_submitted;
struct command_descriptor_heap *next_submitted;
};
struct command_buffer {
struct command_buffer_group *group;
u64 size;
struct dx12_resource *resource;
D3D12_VERTEX_BUFFER_VIEW vbv;
D3D12_INDEX_BUFFER_VIEW Ibv;
struct command_buffer *next_in_command_list;
u64 submitted_fence_target;
struct command_queue *submitted_cq;
struct command_buffer *prev_submitted;
struct command_buffer *next_submitted;
};
struct command_buffer_group {
struct command_buffer *first_submitted;
struct command_buffer *last_submitted;
};
struct descriptor {
struct cpu_descriptor_heap *heap;
u32 index;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
struct descriptor *next_free;
};
struct dx12_resource {
enum D3D12_RESOURCE_STATES state;
ID3D12Resource *resource;
struct descriptor *cbv_descriptor;
struct descriptor *srv_descriptor;
struct descriptor *uav_descriptor;
struct descriptor *rtv_descriptor;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */
Vec2I32 texture_size;
struct dx12_resource *next_free;
};
struct swapchain_buffer {
struct swapchain *swapchain;
ID3D12Resource *resource;
struct descriptor *rtv_descriptor;
D3D12_RESOURCE_STATES state;
};
struct swapchain {
IDXGISwapChain3 *swapchain;
HWND hwnd;
HANDLE waitable;
Vec2I32 resolution;
struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT];
struct swapchain *next_free;
};
struct cpu_descriptor_heap {
enum D3D12_DESCRIPTOR_HEAP_TYPE type;
Arena *arena;
P_Mutex mutex;
u32 descriptor_size;
u32 num_descriptors_reserved;
u32 num_descriptors_capacity;
struct descriptor *first_free_descriptor;
ID3D12DescriptorHeap *heap;
struct D3D12_CPU_DESCRIPTOR_HANDLE handle;
};
enum fenced_release_kind {
FENCED_RELEASE_KIND_NONE,
FENCED_RELEASE_KIND_RESOURCE,
FENCED_RELEASE_KIND_PIPELINE
};
struct fenced_release_data {
enum fenced_release_kind kind;
void *ptr;
};
/* ========================== *
* internal procs
* ========================== */
internal P_ExitFuncDef(gp_shutdown);
internal void dx12_init_device(void);
internal void dx12_init_objects(void);
internal void dx12_init_pipelines(void);
internal void dx12_init_noise(void);
internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
internal void command_queue_release(struct command_queue *cq);
internal P_JobDef(dx12_evictor_job, _);
internal void fenced_release(void *data, enum fenced_release_kind kind);
internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state);
internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh);
struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; };
internal P_JobDef(command_queue_alloc_job, job);
struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; };
internal P_JobDef(pipeline_alloc_job, job);
struct dx12_upload_job_sig { struct dx12_resource *resource; void *data; };
internal P_JobDef(dx12_upload_job, job);
#if RESOURCE_RELOADING
internal WATCH_CALLBACK_FUNC_DEF(pipeline_watch_callback, name);
#endif
/* ========================== *
* Global state
* ========================== */
Global struct {
Atomic32 initialized;
/* Descriptor heaps pool */
P_Mutex command_descriptor_heaps_mutex;
Arena *command_descriptor_heaps_arena;
struct command_descriptor_heap *first_submitted_command_descriptor_heap;
struct command_descriptor_heap *last_submitted_command_descriptor_heap;
/* Command buffers pool */
P_Mutex command_buffers_mutex;
Arena *command_buffers_arena;
Dict *command_buffers_dict;
/* Resources pool */
P_Mutex resources_mutex;
Arena *resources_arena;
struct dx12_resource *first_free_resource;
/* Swapchains pool */
P_Mutex swapchains_mutex;
Arena *swapchains_arena;
struct swapchain *first_free_swapchain;
/* Shader bytecode archive */
struct tar_archive dxc_archive;
/* Pipeline cache */
P_Mutex pipelines_mutex;
Arena *pipelines_arena;
struct pipeline *first_free_pipeline;
Dict *pipeline_descs;
Dict *top_pipelines; /* Latest pipelines */
Dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */
struct pipeline_scope *first_free_pipeline_scope;
/* Fenced release queue */
P_Mutex fenced_releases_mutex;
Arena *fenced_releases_arena;
u64 fenced_release_targets[DX12_NUM_QUEUES];
/* Factory */
IDXGIFactory6 *factory;
/* Adapter */
IDXGIAdapter1 *adapter;
/* Device */
ID3D12Device *device;
/* Descriptor sizes */
u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
/* Global descriptor heaps */
struct cpu_descriptor_heap *cbv_srv_uav_heap;
struct cpu_descriptor_heap *rtv_heap;
/* Command queues */
P_Mutex global_command_list_record_mutex;
P_Mutex global_submit_mutex;
struct command_queue *command_queues[DX12_NUM_QUEUES];
/* Evictor job */
P_Counter evictor_job_counter;
P_Cv evictor_wake_cv;
P_Mutex evictor_wake_mutex;
i64 evictor_wake_gen;
b32 evictor_shutdown;
} G = ZI, DebugAlias(G, G_gp_dx12);
/* ========================== *
* Startup
* ========================== */
void gp_startup(void)
{
__prof;
if (Atomic32FetchTestSet(&G.initialized, 0, 1) != 0) {
P_Panic(Lit("GP layer already initialized"));
}
/* Initialize command descriptor heaps pool */
G.command_descriptor_heaps_arena = AllocArena(Gibi(64));
/* Initialize command buffers pool */
G.command_buffers_arena = AllocArena(Gibi(64));
G.command_buffers_dict = InitDict(G.command_buffers_arena, 4096);
/* Initialize resources pool */
G.resources_arena = AllocArena(Gibi(64));
/* Initialize swapchains pool */
G.swapchains_arena = AllocArena(Gibi(64));
/* Initialize pipeline cache */
G.pipelines_arena = AllocArena(Gibi(64));
G.pipeline_descs = InitDict(G.pipelines_arena, 1024);
G.top_pipelines = InitDict(G.pipelines_arena, 1024);
G.top_successful_pipelines = InitDict(G.pipelines_arena, 1024);
/* Initialize fenced releases queue */
G.fenced_releases_arena = AllocArena(Gibi(64));
/* Initialize embedded shader archive */
String embedded_data = INC_GetDxcTar();
if (embedded_data.len <= 0) {
P_Panic(Lit("No embedded shaders found"));
}
G.dxc_archive = tar_parse(G.pipelines_arena, embedded_data, Lit(""));
/* Initialize dx12 */
/* TODO: Parallelize phases */
dx12_init_device();
dx12_init_objects();
dx12_init_pipelines();
dx12_init_noise();
/* Register callbacks */
#if RESOURCE_RELOADING
watch_register_callback(pipeline_watch_callback);
#endif
P_OnExit(gp_shutdown);
/* Start evictor job */
P_Run(1, dx12_evictor_job, 0, P_Pool_Background, P_Priority_Low, &G.evictor_job_counter);
}
internal P_ExitFuncDef(gp_shutdown)
{
__prof;
#if 0
/* Release objects to make live object reporting less noisy */
//IDXGISwapChain3_Release(G.swapchain);
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
cmomand_queue_release(cq);
}
ID3D12Device_Release(G.device);
#else
(UNUSED)command_queue_release;
#endif
{
P_Lock lock = P_LockE(&G.evictor_wake_mutex);
G.evictor_shutdown = 1;
P_SignalCv(&G.evictor_wake_cv, I32Max);
P_Unlock(&lock);
}
P_WaitOnCounter(&G.evictor_job_counter);
}
/* ========================== *
* Dx12 device initialization
* ========================== */
internal void dx12_init_error(String error)
{
TempArena scratch = BeginScratchNoConflict();
String msg = StringFormat(scratch.arena, Lit("Failed to initialize DirectX 12.\n\n%F"), FmtString(error));
P_Panic(msg);
EndScratch(scratch);
}
internal void dx12_init_device(void)
{
__prof;
TempArena scratch = BeginScratchNoConflict();
HRESULT hr = 0;
/* Enable debug layer */
u32 dxgi_factory_flags = 0;
#if DX12_DEBUG
{
__profn("Enable debug layer");
ID3D12Debug *debug_controller0 = 0;
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
if (FAILED(hr)) {
dx12_init_error(Lit("Failed to create ID3D12Debug0"));
}
ID3D12Debug1 *debug_controller1 = 0;
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
if (FAILED(hr)) {
dx12_init_error(Lit("Failed to create ID3D12Debug1"));
}
ID3D12Debug_EnableDebugLayer(debug_controller0);
/* FIXME: Enable this */
//ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
ID3D12Debug_Release(debug_controller1);
ID3D12Debug_Release(debug_controller0);
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
/* Create factory */
{
__profn("Create factory");
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory);
if (FAILED(hr)) {
dx12_init_error(Lit("Failed to initialize DXGI factory"));
}
}
/* Create device */
{
__profn("Create device");
IDXGIAdapter1 *adapter = 0;
ID3D12Device *device = 0;
String error = Lit("Could not initialize GPU device.");
String first_gpu_name = ZI;
u32 adapter_index = 0;
b32 skip = 0; /* For debugging iGPU */
for (;;) {
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
}
if (SUCCEEDED(hr)) {
DXGI_ADAPTER_DESC1 desc;
IDXGIAdapter1_GetDesc1(adapter, &desc);
if (first_gpu_name.len == 0) {
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
}
{
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
}
if (SUCCEEDED(hr) && !skip ) {
break;
}
skip = 0;
ID3D12Device_Release(device);
IDXGIAdapter1_Release(adapter);
adapter = 0;
device = 0;
++adapter_index;
} else {
break;
}
}
if (!device) {
if (first_gpu_name.len > 0) {
String fmt = Lit("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.");
error = StringFormat(scratch.arena, fmt, FmtString(first_gpu_name));
}
dx12_init_error(error);
}
G.adapter = adapter;
G.device = device;
}
#if DX12_DEBUG
/* Enable D3D12 Debug break */
{
__profn("Enable d3d12 debug break");
ID3D12InfoQueue *info = 0;
hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info);
if (FAILED(hr)) {
dx12_init_error(Lit("Failed to query ID3D12Device interface"));
}
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
ID3D12InfoQueue_Release(info);
}
/* Enable DXGI Debug break */
{
__profn("Enable dxgi debug break");
IDXGIInfoQueue *dxgi_info = 0;
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
if (FAILED(hr)) {
dx12_init_error(Lit("Failed to get DXGI debug interface"));
}
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
IDXGIInfoQueue_Release(dxgi_info);
}
#endif
#if ProfilingIsEnabled_GPU && ProfilingIsEnabled_GPU_STABLE_POWER_STATE
/* Enable stable power state */
{
__profn("Set stable power state");
b32 success = 1;
HKEY key = 0;
success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
if (success) {
DWORD value = ZI;
DWORD dword_size = sizeof(DWORD);
success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS;
RegCloseKey(key);
if (success) {
success = value != 0;
}
}
P_LogInfoF("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)");
if (success) {
P_LogInfoF("Machine is in developer mode, calling ID3D12Device::SetStablePowerState");
hr = ID3D12Device_SetStablePowerState(G.device, 1);
if (SUCCEEDED(hr)) {
P_LogInfoF("ID3D12Device::SetStablePowerState succeeded");
} else {
success = 0;
P_LogErrorF("ID3D12Device::SetStablePowerState failed");
}
} else {
P_LogWarningF("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState");
}
if (!success) {
P_LogWarningF("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable.");
}
}
#endif
EndScratch(scratch);
}
/* ========================== *
* Dx12 object initialization
* ========================== */
internal void dx12_init_objects(void)
{
__prof;
/* Initialize desc sizes */
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
/* Initialize desc counts */
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
/* Create global descriptor heaps */
G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
/* Create command queues */
{
__profn("Allocate command queues");
struct command_queue_desc params[] = {
{.type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Direct queue") },
{.type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Compute queue") },
{.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = Lit("Copyqueue") },
{.type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = Lit("Background copy queue") }
};
struct command_queue_alloc_job_sig sig = ZI;
sig.descs_in = params;
sig.cqs_out = G.command_queues;
{
P_Counter counter = ZI;
P_Run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
#if ProfilingIsEnabled
{
/* Initialize serially for consistent order in profiler */
__profn("Initialize command queue profiling contexts");
for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) {
struct command_queue *cq = G.command_queues[i];
String dbg_name = params[i].dbg_name;
__prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len);
(UNUSED)dbg_name;
}
}
#endif
}
}
/* ========================== *
* Dx12 pipeline initialization
* ========================== */
internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
internal void dx12_init_pipelines(void)
{
__prof;
TempArena scratch = BeginScratchNoConflict();
/* Register pipeline descs */
{
/* Material pipeline */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_material");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
desc->rtvs[1].format = DXGI_FORMAT_R16G16B16A16_FLOAT;
desc->rtvs[1].blending = 1;
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
/* Flood pipeline */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_flood");
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
/* Shade pipeline */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_shade");
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
/* Shape pipeline */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_shape");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
/* UI pipeline */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_ui");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
/* Blit pipeilne */
{
struct pipeline_desc *desc = PushStruct(G.pipelines_arena, struct pipeline_desc);
desc->name = Lit("kernel_blit");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
SetDictValue(G.pipelines_arena, G.pipeline_descs, HashFnv64(Fnv64Basis, desc->name), (u64)desc);
}
}
/* Compile pipelines */
u32 num_pipelines = 0;
struct pipeline_desc *descs = PushDry(scratch.arena, struct pipeline_desc);
for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) {
struct pipeline_desc *desc = (struct pipeline_desc *)entry->value;
*PushStruct(scratch.arena, struct pipeline_desc) = *desc;
++num_pipelines;
}
struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines);
{
__profn("Allocate pipelines");
struct pipeline_alloc_job_sig sig = ZI;
sig.descs_in = descs;
sig.pipelines_out = pipelines;
P_Counter counter = ZI;
P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
for (u32 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
if (pipeline->success) {
P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
if (pipeline->error.len) {
String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
P_LogWarning(msg);
}
} else {
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
String msg = StringFormat(scratch.arena, Lit("Error initializing pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
P_LogError(msg);
P_MessageBox(P_MessageBoxKind_Warning, msg);
}
}
pipeline_register(num_pipelines, pipelines);
EndScratch(scratch);
}
/* ========================== *
* Noise texture initialization
* ========================== */
internal void dx12_init_noise(void)
{
TempArena scratch = BeginScratchNoConflict();
{
String noise_res_name = Lit("noise_128x128x64_16.dat");
R_Resource noise_res = resource_open(noise_res_name);
DXGI_FORMAT format = DXGI_FORMAT_R16_UINT;
//u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
u32 expected_size = K_BLUE_NOISE_TEX_WIDTH * K_BLUE_NOISE_TEX_HEIGHT * K_BLUE_NOISE_TEX_DEPTH * 2;
if (resource_exists(&noise_res)) {
String data = resource_get_data(&noise_res);
if (data.len != expected_size) {
P_Panic(StringFormat(scratch.arena,
Lit("Noise texture has unexpected size for a %Fx%Fx%F texture (expected %F, got %F)"),
FmtUint(K_BLUE_NOISE_TEX_WIDTH), FmtUint(K_BLUE_NOISE_TEX_HEIGHT), FmtUint(K_BLUE_NOISE_TEX_DEPTH),
FmtUint(expected_size), FmtUint(data.len)));
}
{
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Format = format;
desc.Alignment = 0;
desc.Width = K_BLUE_NOISE_TEX_WIDTH;
desc.Height = K_BLUE_NOISE_TEX_HEIGHT;
desc.DepthOrArraySize = K_BLUE_NOISE_TEX_DEPTH;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, D3D12_RESOURCE_STATE_COPY_DEST);
r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
/* Upload texture */
{
P_Counter counter = ZI;
struct dx12_upload_job_sig sig = ZI;
sig.resource = r;
sig.data = data.text;
P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
}
} else {
P_Panic(StringFormat(scratch.arena, Lit("Noise resource \"%F\" not found"), FmtString(noise_res_name)));
}
resource_close(&noise_res);
}
EndScratch(scratch);
}
/* ========================== *
* Shader compilation
* ========================== */
#if RESOURCE_RELOADING
struct shader_compile_desc {
String src;
String friendly_name;
String entry;
String target;
};
struct shader_compile_result {
i64 elapsed_ns;
String dxc;
String errors;
b32 success;
};
struct shader_compile_job_sig {
Arena *arena;
struct shader_compile_desc *descs;
struct shader_compile_result *results;
};
internal P_JobDef(shader_compile_job, job)
{
__prof;
struct shader_compile_job_sig *sig = job.sig;
Arena *arena = sig->arena;
struct shader_compile_desc *desc = &sig->descs[job.id];
struct shader_compile_result *result = &sig->results[job.id];
TempArena scratch = BeginScratch(arena);
{
i64 start_ns = P_TimeNs();
DXC_Result dxc_result = ZI;
{
__profn("Compile shader");
P_LogInfoF("Compiling shader \"%F:%F\"", FmtString(desc->friendly_name), FmtString(desc->entry));
/* NOTE: `DXC_ARGS` is supplied by build system at compile time */
char *dxc_args_cstr = Stringize(DXC_ARGS);
String dxc_args_str = StringFromCstrNoLimit(dxc_args_cstr);
StringArray dxc_args_array = SplitString(scratch.arena, dxc_args_str, Lit(" "));
String shader_args[] = {
desc->friendly_name,
Lit("-E"), desc->entry,
Lit("-T"), desc->target,
};
u32 num_args = countof(shader_args) + dxc_args_array.count;
String *args = PushStructs(scratch.arena, String, num_args);
for (u32 i = 0; i < countof(shader_args); ++i) {
args[i] = shader_args[i];
}
for (u32 i = 0; i < dxc_args_array.count; ++i) {
args[i + countof(shader_args)] = dxc_args_array.strings[i];
}
dxc_result = DXC_Compile(arena, desc->src, num_args, args);
}
result->success = dxc_result.success;
result->dxc = dxc_result.dxc;
result->errors = dxc_result.errors;
result->elapsed_ns = P_TimeNs() - start_ns;
}
EndScratch(scratch);
}
#endif
/* ========================== *
* Pipeline
* ========================== */
internal P_JobDef(pipeline_alloc_job, job)
{
__prof;
struct pipeline_alloc_job_sig *sig = job.sig;
struct pipeline_desc *desc = &sig->descs_in[job.id];
struct pipeline **pipelines_out = sig->pipelines_out;
struct pipeline *pipeline = 0;
{
P_Lock lock = P_LockE(&G.pipelines_mutex);
if (G.first_free_pipeline) {
pipeline = G.first_free_pipeline;
G.first_free_pipeline = pipeline->next;
} else {
pipeline = PushStructNoZero(G.pipelines_arena, struct pipeline);
}
P_Unlock(&lock);
}
ZeroStruct(pipeline);
pipelines_out[job.id] = pipeline;
pipeline->desc = *desc;
pipeline->name = desc->name;
pipeline->hash = HashFnv64(Fnv64Basis, pipeline->name);
TempArena scratch = BeginScratchNoConflict();
{
i64 start_ns = P_TimeNs();
String pipeline_name = pipeline->name;
P_LogInfoF("Loading pipeline \"%F\"", FmtString(pipeline_name));
b32 success = 1;
HRESULT hr = 0;
String error_str = ZI;
String vs_dxc = desc->vs_dxc.len > 0 ? desc->vs_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".vs")))->data;
String ps_dxc = desc->ps_dxc.len > 0 ? desc->ps_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".ps")))->data;
String cs_dxc = desc->cs_dxc.len > 0 ? desc->cs_dxc : tar_get(&G.dxc_archive, CatString(scratch.arena, pipeline_name, Lit(".cs")))->data;
if (success && vs_dxc.len > 0 && ps_dxc.len <= 0) {
error_str = Lit("Pipeline has vertex shader without pixel shader");
success = 0;
}
if (success && vs_dxc.len <= 0 && ps_dxc.len > 0) {
error_str = Lit("Pipeline has pixel shader without vertex shader");
success = 0;
}
if (success && cs_dxc.len > 0 && (vs_dxc.len > 0 || ps_dxc.len > 0)) {
error_str = Lit("Pipeline has a compute shader with a vertex/pixel shader");
success = 0;
}
if (success && cs_dxc.len <= 0 && vs_dxc.len <= 0 && ps_dxc.len <= 0) {
error_str = Lit("Pipeline has no shaders");
success = 0;
}
ID3D10Blob *vs_blob = 0;
ID3D10Blob *ps_blob = 0;
ID3D10Blob *cs_blob = 0;
if (success && vs_dxc.len > 0) {
hr = D3DCreateBlob(vs_dxc.len, &vs_blob);
if (SUCCEEDED(hr)) {
CopyBytes(ID3D10Blob_GetBufferPointer(vs_blob), vs_dxc.text, vs_dxc.len);
} else {
error_str = Lit("Failed to create vertex shader blob");
success = 0;
}
}
if (success && ps_dxc.len > 0) {
hr = D3DCreateBlob(ps_dxc.len, &ps_blob);
if (SUCCEEDED(hr)) {
CopyBytes(ID3D10Blob_GetBufferPointer(ps_blob), ps_dxc.text, ps_dxc.len);
} else {
error_str = Lit("Failed to create pixel shader blob");
success = 0;
}
}
if (success && cs_dxc.len > 0) {
hr = D3DCreateBlob(cs_dxc.len, &cs_blob);
if (SUCCEEDED(hr)) {
CopyBytes(ID3D10Blob_GetBufferPointer(cs_blob), cs_dxc.text, cs_dxc.len);
} else {
error_str = Lit("Failed to create compute shader blob");
success = 0;
}
}
/* Get root signature blob
* NOTE: This isn't necessary for creating the root signature (since it
* could reuse the shader blob), however we'd like to verify that the
* root signature exists and matches between vs & ps shaders. */
ID3D10Blob *rootsig_blob = 0;
if (success) {
__profn("Validate root signatures");
if (cs_dxc.len > 0) {
u32 cs_rootsig_data_len = 0;
ID3D10Blob *cs_rootsig_blob = 0;
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs_blob), ID3D10Blob_GetBufferSize(cs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob);
if (cs_rootsig_blob) {
cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob);
}
if (cs_rootsig_data_len == 0) {
success = 0;
error_str = Lit("Compute shader is missing root signature");
} else {
rootsig_blob = cs_rootsig_blob;
}
} else {
char *vs_rootsig_data = 0;
char *ps_rootsig_data = 0;
u32 vs_rootsig_data_len = 0;
u32 ps_rootsig_data_len = 0;
ID3D10Blob *vs_rootsig_blob = 0;
ID3D10Blob *ps_rootsig_blob = 0;
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob);
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob);
if (vs_rootsig_blob) {
vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob);
vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob);
}
if (ps_rootsig_blob) {
ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob);
ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob);
}
if (vs_rootsig_data_len == 0) {
success = 0;
error_str = Lit("Vertex shader is missing root signature");
} else if (ps_rootsig_data_len == 0) {
success = 0;
error_str = Lit("Pixel shader is missing root signature");
} else if (vs_rootsig_data_len != ps_rootsig_data_len || !EqBytes(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) {
success = 0;
error_str = Lit("Root signature mismatch between vertex and pixel shader");
} else {
rootsig_blob = vs_rootsig_blob;
}
if (ps_rootsig_blob) {
ID3D10Blob_Release(ps_rootsig_blob);
}
}
}
/* Create root signature */
ID3D12RootSignature *rootsig = 0;
if (success) {
__profn("Create root signature");
hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
if (FAILED(hr)) {
error_str = Lit("Failed to create root signature");
success = 0;
}
}
/* Create PSO */
ID3D12PipelineState *pso = 0;
if (success) {
if (cs_dxc.len > 0) {
__profn("Create compute PSO");
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = rootsig;
pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs_blob);
pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs_blob);
hr = ID3D12Device_CreateComputePipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
} else {
__profn("Create graphics PSO");
/* Default rasterizer state */
D3D12_RASTERIZER_DESC raster_desc = {
.FillMode = D3D12_FILL_MODE_SOLID,
.CullMode = D3D12_CULL_MODE_NONE,
.FrontCounterClockwise = 0,
.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
.DepthClipEnable = 1,
.MultisampleEnable = 0,
.AntialiasedLineEnable = 0,
.ForcedSampleCount = 0,
.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
};
/* Empty input layout */
D3D12_INPUT_LAYOUT_DESC input_layout_desc = ZI;
/* Blend state */
D3D12_BLEND_DESC blend_desc = {
.AlphaToCoverageEnable = 0,
.IndependentBlendEnable = 1
};
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
StaticAssert(countof(blend_desc.RenderTarget) <= countof(desc->rtvs));
if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) {
b32 blending_enabled = desc->rtvs[i].blending;
blend_desc.RenderTarget[i].BlendEnable = blending_enabled;
blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[i].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
} else {
break;
}
}
/* Disable depth stencil */
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = {
.DepthEnable = 0,
.StencilEnable = 0
};
/* PSO */
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = rootsig;
pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs_blob);
pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs_blob);
pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps_blob);
pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps_blob);
pso_desc.BlendState = blend_desc;
pso_desc.SampleMask = UINT_MAX;
pso_desc.RasterizerState = raster_desc;
pso_desc.DepthStencilState = depth_stencil_desc;
pso_desc.InputLayout = input_layout_desc;
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
StaticAssert(countof(pso_desc.RTVFormats) <= countof(desc->rtvs));
DXGI_FORMAT format = desc->rtvs[i].format;
if (format != DXGI_FORMAT_UNKNOWN) {
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
} else {
break;
}
}
pso_desc.SampleDesc.Count = 1;
pso_desc.SampleDesc.Quality = 0;
hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
}
if (FAILED(hr)) {
error_str = Lit("Failed to create pipeline state object");
success = 0;
}
}
/* Parse errors */
if (!success && error_str.len <= 0) {
error_str = Lit("Unknown error");
}
pipeline->pso = pso;
pipeline->rootsig = rootsig;
pipeline->compilation_time_ns = P_TimeNs() - start_ns;
pipeline->success = success;
pipeline->is_gfx = cs_dxc.len == 0;
pipeline->error = error_str;
if (rootsig_blob) {
ID3D10Blob_Release(rootsig_blob);
}
if (vs_blob) {
ID3D10Blob_Release(vs_blob);
}
if (ps_blob) {
ID3D10Blob_Release(ps_blob);
}
if (cs_blob) {
ID3D10Blob_Release(cs_blob);
}
}
EndScratch(scratch);
}
internal void pipeline_release_now(struct pipeline *pipeline)
{
__prof;
if (pipeline->pso) {
ID3D12PipelineState_Release(pipeline->pso);
}
P_Lock lock = P_LockE(&G.pipelines_mutex);
{
pipeline->next = G.first_free_pipeline;
G.first_free_pipeline = pipeline;
}
P_Unlock(&lock);
}
/* ========================== *
* Pipeline cache
* ========================== */
internal struct pipeline_scope *pipeline_scope_begin(void)
{
__prof;
struct pipeline_scope *scope = 0;
{
P_Lock lock = P_LockE(&G.pipelines_mutex);
if (G.first_free_pipeline_scope) {
scope = G.first_free_pipeline_scope;
G.first_free_pipeline_scope = scope->next_free;
}
P_Unlock(&lock);
}
Arena *arena = 0;
if (scope) {
arena = scope->arena;
} else {
arena = AllocArena(Mebi(64));
}
ResetArena(arena);
scope = PushStruct(arena, struct pipeline_scope);
scope->arena = arena;
scope->refs = InitDict(scope->arena, 64);
return scope;
}
internal void pipeline_scope_end(struct pipeline_scope *scope)
{
__prof;
P_Lock lock = P_LockE(&G.pipelines_mutex);
{
for (DictEntry *entry = scope->refs->first; entry; entry = entry->next) {
struct pipeline *pipeline = (struct pipeline *)entry->value;
if (--pipeline->refcount <= 0) {
fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
}
scope->next_free = G.first_free_pipeline_scope;
G.first_free_pipeline_scope = scope;
}
P_Unlock(&lock);
}
internal Readonly struct pipeline g_nil_pipeline = ZI;
internal struct pipeline *pipeline_from_name(struct pipeline_scope *scope, String name)
{
__prof;
struct pipeline *result = &g_nil_pipeline;
u64 hash = HashFnv64(Fnv64Basis, name);
struct pipeline *tmp = (struct pipeline *)DictValueFromHash(scope->refs, hash);
if (tmp) {
result = tmp;
} else {
{
P_Lock lock = P_LockE(&G.pipelines_mutex);
tmp = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash);
if (tmp) {
++tmp->refcount;
}
P_Unlock(&lock);
}
if (tmp) {
SetDictValue(scope->arena, scope->refs, hash, (u64)tmp);
result = tmp;
}
}
return result;
}
internal void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
{
__prof;
P_Lock lock = P_LockE(&G.pipelines_mutex);
{
for (u64 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
u64 hash = pipeline->hash;
/* Insert into top dict */
{
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_pipelines, hash);
if (old_pipeline && --old_pipeline->refcount <= 0) {
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
SetDictValue(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline);
++pipeline->refcount;
}
/* Insert into success dict */
if (pipeline->success) {
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, hash);
if (old_pipeline && --old_pipeline->refcount <= 0) {
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
SetDictValue(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline);
++pipeline->refcount;
}
}
}
P_Unlock(&lock);
}
#if RESOURCE_RELOADING
internal WATCH_CALLBACK_FUNC_DEF(pipeline_watch_callback, name)
{
__prof;
TempArena scratch = BeginScratchNoConflict();
String rst_extension = Lit(".rst");
String knl_extension = Lit(".knl");
b32 is_src = StringStartsWith(name, Lit("src/"));
b32 is_rs = is_src && StringEndsWith(name, rst_extension);
b32 is_cs = is_src && !is_rs && StringEndsWith(name, knl_extension);
b32 success = 0;
/* Recompile shaders */
String pipeline_name = ZI;
String friendly_name = ZI;
i32 num_shaders = 0;
struct shader_compile_desc *shader_descs = 0;
struct shader_compile_result *shader_results = 0;
if (is_rs || is_cs) {
P_LogDebugF("Change detected in shader source file \"%F\", recompiling...", FmtString(name));
success = 1;
P_File file = P_OpenFileReadWait(name);
String data = P_ReadFile(scratch.arena, file);
{
friendly_name = name;
StringArray split = SplitString(scratch.arena, friendly_name, Lit("src/"));
friendly_name = split.count > 0 ? CatString(scratch.arena, Lit("src/"), split.strings[split.count - 1]) : friendly_name;
}
{
pipeline_name = name;
StringArray split = SplitString(scratch.arena, pipeline_name, Lit("/"));
pipeline_name = split.count > 0 ? split.strings[split.count - 1] : pipeline_name;
split = SplitString(scratch.arena, pipeline_name, Lit("."));
pipeline_name = split.count > 1 ? split.strings[split.count - 2] : pipeline_name;
}
{
struct shader_compile_job_sig sig = ZI;
sig.arena = scratch.arena;
if (is_rs) {
num_shaders = 2;
shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
sig.descs = shader_descs;
sig.results = shader_results;
sig.descs[0].src = data;
sig.descs[0].friendly_name = friendly_name;
sig.descs[0].entry = Lit("vs");
sig.descs[0].target = Lit("vs_6_6");
sig.descs[1].src = data;
sig.descs[1].friendly_name = friendly_name;
sig.descs[1].entry = Lit("ps");
sig.descs[1].target = Lit("ps_6_6");
} else if (is_cs) {
num_shaders = 1;
shader_descs = PushStructs(scratch.arena, struct shader_compile_desc, num_shaders);
shader_results = PushStructs(scratch.arena, struct shader_compile_result, num_shaders);
sig.descs = shader_descs;
sig.results = shader_results;
sig.descs[0].src = data;
sig.descs[0].friendly_name = friendly_name;
sig.descs[0].entry = Lit("cs");
sig.descs[0].target = Lit("cs_6_6");
}
{
P_Counter counter = ZI;
P_Run(num_shaders, shader_compile_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
}
P_CloseFIle(file);
}
for (i32 i = 0; i < num_shaders; ++i) {
struct shader_compile_desc *desc = &shader_descs[i];
struct shader_compile_result *result = &shader_results[i];
if (result->success) {
P_LogSuccessF("Finished compiling shader \"%F:%F\" in %F seconds", FmtString(desc->friendly_name), FmtString(desc->entry), FmtFloat(SecondsFromNs(result->elapsed_ns)));
if (result->errors.len > 0) {
String msg = result->errors;
P_LogWarning(msg);
}
} else {
String msg = result->errors;
P_LogError(msg);
success = 0;
}
}
if (success) {
/* Create pipeline descs */
u32 num_pipelines = 0;
struct pipeline_desc *pipeline_descs = PushDry(scratch.arena, struct pipeline_desc);
for (DictEntry *entry = G.pipeline_descs->first; entry; entry = entry->next) {
struct pipeline_desc *pipeline_desc = (struct pipeline_desc *)entry->value;
struct pipeline_desc new_pipeline_desc = *pipeline_desc;
if (EqString(pipeline_desc->name, pipeline_name)) {
if (is_rs) {
new_pipeline_desc.vs_dxc = shader_results[0].dxc;
new_pipeline_desc.ps_dxc = shader_results[1].dxc;
} else if (is_cs) {
new_pipeline_desc.cs_dxc = shader_results[0].dxc;
}
*PushStructNoZero(scratch.arena, struct pipeline_desc) = new_pipeline_desc;
++num_pipelines;
}
}
/* Recompile dirty pipelines */
if (num_pipelines > 0) {
__profn("Compile dirty pipelines");
struct pipeline **pipelines = PushStructs(scratch.arena, struct pipeline *, num_pipelines);
{
struct pipeline_alloc_job_sig sig = ZI;
sig.descs_in = pipeline_descs;
sig.pipelines_out = pipelines;
P_Counter counter = ZI;
P_Run(num_pipelines, pipeline_alloc_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
{
P_Lock lock = P_LockS(&G.pipelines_mutex);
for (u32 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
if (pipeline->success) {
P_LogSuccessF("Successfully compiled pipeline \"%F\" in %F seconds", FmtString(pipeline->name), FmtFloat(SecondsFromNs(pipeline->compilation_time_ns)));
if (pipeline->error.len > 0) {
String msg = StringFormat(scratch.arena, Lit("Warning while compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(pipeline->error));
P_LogWarning(msg);
}
} else {
{
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n%F"), FmtString(pipeline->name), FmtString(error));
P_LogError(msg);
}
struct pipeline *old_pipeline = (struct pipeline *)DictValueFromHash(G.top_successful_pipelines, pipeline->hash);
if (!old_pipeline) {
/* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */
String error = pipeline->error.len > 0 ? pipeline->error : Lit("Unknown error");
String msg = StringFormat(scratch.arena, Lit("Error compiling pipeline \"%F\":\n\n%F"), FmtString(pipeline->name), FmtString(error));
P_MessageBox(P_MessageBoxKind_Warning, msg);
}
}
}
P_Unlock(&lock);
}
pipeline_register(num_pipelines, pipelines);
}
}
EndScratch(scratch);
}
#endif
/* ========================== *
* Descriptor
* ========================== */
internal struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
{
__prof;
struct descriptor *d = 0;
u32 index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
{
P_Lock lock = P_LockE(&dh->mutex);
if (dh->first_free_descriptor) {
d = dh->first_free_descriptor;
dh->first_free_descriptor = d->next_free;
handle = d->handle;
index = d->index;
} else {
if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) {
P_Panic(Lit("Max descriptors reached in heap"));
}
d = PushStructNoZero(dh->arena, struct descriptor);
index = dh->num_descriptors_reserved++;
handle.ptr = dh->handle.ptr + (index * dh->descriptor_size);
}
P_Unlock(&lock);
}
ZeroStruct(d);
d->heap = dh;
d->handle = handle;
d->index = index;
return d;
}
internal void descriptor_release(struct descriptor *descriptor)
{
struct cpu_descriptor_heap *dh = descriptor->heap;
P_Lock lock = P_LockE(&dh->mutex);
{
descriptor->next_free = dh->first_free_descriptor;
dh->first_free_descriptor = descriptor;
}
P_Unlock(&lock);
}
/* ========================== *
* CPU descriptor heap
* ========================== */
internal struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
{
__prof;
struct cpu_descriptor_heap *dh = 0;
{
Arena *arena = AllocArena(Mebi(64));
dh = PushStruct(arena, struct cpu_descriptor_heap);
dh->arena = arena;
}
u32 num_descriptors = 0;
u32 descriptor_size = 0;
if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) {
num_descriptors = G.desc_counts[type];
descriptor_size = G.desc_sizes[type];
}
if (num_descriptors == 0 || descriptor_size == 0) {
P_Panic(Lit("Unsupported CPU descriptor type"));
}
dh->num_descriptors_capacity = num_descriptors;
dh->descriptor_size = descriptor_size;
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = type;
desc.NumDescriptors = num_descriptors;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create CPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle);
return dh;
}
#if 0
internal void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
{
/* TODO */
(UNUSED)dh;
}
#endif
/* ========================== *
* Fenced release
* ========================== */
internal void fenced_release(void *data, enum fenced_release_kind kind)
{
struct fenced_release_data fr = ZI;
fr.kind = kind;
fr.ptr = data;
u64 fr_targets[countof(G.fenced_release_targets)] = ZI;
/* Read current fence target values from command queues */
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
P_Lock lock = P_LockS(&cq->submit_fence_mutex);
{
fr_targets[i] = cq->submit_fence_target;
}
P_Unlock(&lock);
}
/* PushStruct data to release queue */
{
P_Lock lock = P_LockE(&G.fenced_releases_mutex);
{
*PushStruct(G.fenced_releases_arena, struct fenced_release_data) = fr;
CopyBytes(G.fenced_release_targets, fr_targets, sizeof(fr_targets));
}
P_Unlock(&lock);
}
/* Wake evictor */
{
P_Lock lock = P_LockE(&G.evictor_wake_mutex);
{
++G.evictor_wake_gen;
P_SignalCv(&G.evictor_wake_cv, I32Max);
}
P_Unlock(&lock);
}
}
/* ========================== *
* Resource
* ========================== */
internal struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state)
{
__prof;
struct dx12_resource *r = 0;
{
P_Lock lock = P_LockE(&G.resources_mutex);
if (G.first_free_resource) {
r = G.first_free_resource;
G.first_free_resource = r->next_free;
} else {
r = PushStructNoZero(G.resources_arena, struct dx12_resource);
}
P_Unlock(&lock);
}
ZeroStruct(r);
D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } };
D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource);
if (FAILED(hr)) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to create resource"));
}
r->state = initial_state;
if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource);
}
return r;
}
internal void dx12_resource_release_now(struct dx12_resource *t)
{
__prof;
/* Release descriptors */
/* TODO: Batch lock heaps */
if (t->cbv_descriptor) {
descriptor_release(t->cbv_descriptor);
}
if (t->srv_descriptor) {
descriptor_release(t->srv_descriptor);
}
if (t->uav_descriptor) {
descriptor_release(t->uav_descriptor);
}
if (t->rtv_descriptor) {
descriptor_release(t->rtv_descriptor);
}
/* Release resource */
ID3D12Resource_Release(t->resource);
/* Add to free list */
P_Lock lock = P_LockE(&G.resources_mutex);
t->next_free = G.first_free_resource;
G.first_free_resource = t;
P_Unlock(&lock);
}
void gp_resource_release(G_Resource *resource)
{
struct dx12_resource *r = (struct dx12_resource *)resource;
fenced_release(r, FENCED_RELEASE_KIND_RESOURCE);
}
/* ========================== *
* Resource barrier
* ========================== */
struct dx12_resource_barrier_desc {
enum D3D12_RESOURCE_BARRIER_TYPE type;
struct dx12_resource *resource;
enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */
};
internal void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_descs, struct dx12_resource_barrier_desc *descs)
{
__prof;
TempArena scratch = BeginScratchNoConflict();
i32 num_rbs = 0;
struct D3D12_RESOURCE_BARRIER *rbs = PushStructsNoZero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_descs);
for (i32 i = 0; i < num_descs; ++i) {
struct dx12_resource_barrier_desc *desc = &descs[i];
struct dx12_resource *resource = desc->resource;
enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type;
if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) {
enum D3D12_RESOURCE_STATES old_state = resource->state;
enum D3D12_RESOURCE_STATES new_state = desc->new_state;
if (new_state != old_state) {
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
ZeroStruct(rb);
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Flags = 0;
rb->Transition.pResource = resource->resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = old_state;
rb->Transition.StateAfter = new_state;
resource->state = new_state;
}
} else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) {
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
ZeroStruct(rb);
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
rb->Flags = 0;
rb->UAV.pResource = resource->resource;
} else {
/* Unknown barrier type */
Assert(0);
}
}
if (num_rbs > 0) {
ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs);
}
EndScratch(scratch);
}
/* ========================== *
* Command queue
* ========================== */
internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
internal P_JobDef(command_queue_alloc_job, job)
{
__prof;
struct command_queue_alloc_job_sig *sig = job.sig;
struct command_queue_desc *desc = &sig->descs_in[job.id];
{
struct command_queue *cq = 0;
{
Arena *arena = AllocArena(Gibi(64));
cq = PushStruct(arena, struct command_queue);
cq->arena = arena;
}
cq->desc = *desc;
D3D12_COMMAND_QUEUE_DESC dx12_desc = ZI;
dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
dx12_desc.Type = desc->type;
dx12_desc.Priority = desc->priority;
HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create command queue"));
}
hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create command queue fence"));
}
cq->cl_pool = command_list_pool_alloc(cq);
sig->cqs_out[job.id] = cq;
}
}
internal void command_queue_release(struct command_queue *cq)
{
__prof;
/* TODO */
(UNUSED)cq;
//ID3D12CommandQueue_Release(cq->cq);
}
/* ========================== *
* Command list
* ========================== */
internal struct command_list_pool *command_list_pool_alloc(struct command_queue *cq)
{
struct command_list_pool *pool = 0;
{
Arena *arena = AllocArena(Gibi(64));
pool = PushStruct(arena, struct command_list_pool);
pool->arena = arena;
}
pool->cq = cq;
return pool;
}
internal struct command_list *command_list_open(struct command_list_pool *pool)
{
__prof;
struct command_queue *cq = pool->cq;
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence);
struct command_list *cl = 0;
struct ID3D12GraphicsCommandList *old_cl = 0;
struct ID3D12CommandAllocator *old_ca = 0;
{
P_Lock lock = P_LockE(&pool->mutex);
/* Find first command list ready for reuse */
for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) {
if (completed_fence_value >= tmp->submitted_fence_target) {
cl = tmp;
break;
}
}
if (cl) {
/* Remove from submitted list */
old_cl = cl->cl;
old_ca = cl->ca;
struct command_list *prev = cl->prev_submitted;
struct command_list *next = cl->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
pool->first_submitted_command_list = next;
}
if (next) {
next->prev_submitted = prev;
} else {
pool->last_submitted_command_list = prev;
}
} else {
cl = PushStructNoZero(pool->arena, struct command_list);
}
P_Unlock(&lock);
}
ZeroStruct(cl);
cl->cq = cq;
cl->pool = pool;
cl->global_record_lock = P_LockS(&G.global_command_list_record_mutex);
HRESULT hr = 0;
if (old_cl) {
cl->cl = old_cl;
cl->ca = old_ca;
} else {
hr = ID3D12Device_CreateCommandAllocator(G.device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create command allocator"));
}
hr = ID3D12Device_CreateCommandList(G.device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create command list"));
}
hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr)) {
P_Panic(Lit("Failed to close command list during initialization"));
}
}
/* Reset */
hr = ID3D12CommandAllocator_Reset(cl->ca);
if (FAILED(hr)) {
P_Panic(Lit("Failed to reset command allocator"));
}
hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
if (FAILED(hr)) {
P_Panic(Lit("Failed to reset command list"));
}
return cl;
}
/* TODO: Allow multiple command list submissions */
internal u64 command_list_close(struct command_list *cl)
{
__prof;
struct command_queue *cq = cl->cq;
struct command_list_pool *pool = cl->pool;
/* Close */
{
__profn("Close DX12 command list");
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr)) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to close command list before execution"));
}
}
/* Submit */
u64 submit_fence_target = 0;
{
__profn("Execute");
P_Lock submit_lock = P_LockS(&G.global_submit_mutex);
P_Lock fence_lock = P_LockE(&cq->submit_fence_mutex);
{
submit_fence_target = ++cq->submit_fence_target;
ID3D12CommandQueue_ExecuteCommandLists(cq->cq, 1, (ID3D12CommandList **)&cl->cl);
ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, submit_fence_target);
}
P_Unlock(&fence_lock);
P_Unlock(&submit_lock);
}
/* Add descriptor heaps to submitted list */
{
P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex);
for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) {
cdh->submitted_cq = cq;
cdh->submitted_fence_target = submit_fence_target;
if (G.last_submitted_command_descriptor_heap) {
G.last_submitted_command_descriptor_heap->next_submitted = cdh;
} else {
G.first_submitted_command_descriptor_heap = cdh;
}
G.last_submitted_command_descriptor_heap = cdh;
}
P_Unlock(&lock);
}
/* Add command buffers to submitted list */
{
P_Lock lock = P_LockE(&G.command_buffers_mutex);
for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) {
struct command_buffer_group *group = cb->group;
cb->submitted_cq = cq;
cb->submitted_fence_target = submit_fence_target;
if (group->last_submitted) {
group->last_submitted->next_submitted = cb;
} else {
group->first_submitted = cb;
}
group->last_submitted = cb;
}
P_Unlock(&lock);
}
/* Add command list to pool submitted list */
P_Unlock(&cl->global_record_lock);
cl->submitted_fence_target = submit_fence_target;
{
P_Lock lock = P_LockE(&pool->mutex);
if (pool->last_submitted_command_list) {
pool->last_submitted_command_list->next_submitted = cl;
} else {
pool->first_submitted_command_list = cl;
}
pool->last_submitted_command_list = cl;
P_Unlock(&lock);
}
return submit_fence_target;
}
/* ========================== *
* Command descriptor heap (GPU / shader visible descriptor heap)
* ========================== */
internal struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu)
{
__prof;
Assert(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */
/* Allocate GPU heap */
struct command_descriptor_heap *cdh = 0;
ID3D12DescriptorHeap *old_heap = 0;
D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI;
D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI;
{
P_Lock lock = P_LockE(&G.command_descriptor_heaps_mutex);
/* Find first heap ready for reuse */
for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) {
/* TODO: Cache completed fence values */
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
if (completed_fence_value >= tmp->submitted_fence_target) {
cdh = tmp;
break;
}
}
if (cdh) {
/* Remove from submitted list */
old_heap = cdh->heap;
old_start_cpu_handle = cdh->start_cpu_handle;
old_start_gpu_handle = cdh->start_gpu_handle;
struct command_descriptor_heap *prev = cdh->prev_submitted;
struct command_descriptor_heap *next = cdh->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
G.first_submitted_command_descriptor_heap = next;
}
if (next) {
next->prev_submitted = prev;
} else {
G.last_submitted_command_descriptor_heap = prev;
}
} else {
/* No available heap available for reuse, allocate new */
cdh = PushStructNoZero(G.command_descriptor_heaps_arena, struct command_descriptor_heap);
}
P_Unlock(&lock);
}
ZeroStruct(cdh);
if (old_heap) {
cdh->heap = old_heap;
cdh->start_cpu_handle = old_start_cpu_handle;
cdh->start_gpu_handle = old_start_gpu_handle;
} else {
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create GPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle);
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_gpu_handle);
}
/* CopyCPU heap */
{
P_Lock lock = P_LockS(&dh_cpu->mutex);
ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
P_Unlock(&lock);
}
/* Insert into command list */
cdh->next_in_command_list = cl->first_command_descriptor_heap;
cl->first_command_descriptor_heap = cdh;
return cdh;
}
/* ========================== *
* Command buffer
* ========================== */
internal u64 command_buffer_hash_from_size(u64 size)
{
u64 hash = RandU64FromSeed(size);
return hash;
}
internal u64 align_up_pow2(u64 v)
{
u64 result = 0;
if (v > 0) {
result = v - 1;
result |= result >> 1;
result |= result >> 2;
result |= result >> 4;
result |= result >> 8;
result |= result >> 16;
result |= result >> 32;
++result;
}
return result;
}
#define command_list_push_buffer(cl, count, elems) _command_list_push_buffer((cl), count * ((elems) ? sizeof(*(elems)) : 0), (elems), (elems) ? sizeof(*(elems)) : 1)
internal struct command_buffer *_command_list_push_buffer(struct command_list *cl, u64 data_len, void *data, u64 data_stride)
{
__prof;
/* Data length should be a multiple of stride */
Assert(data_len % data_stride == 0);
/* Determine size */
u64 size = MaxU64(DX12_COMMAND_BUFFER_MIN_SIZE, align_up_pow2(data_len));
/* Allocate buffer */
struct command_buffer_group *cb_group = 0;
struct command_buffer *cb = 0;
struct dx12_resource *r = 0;
{
P_Lock lock = P_LockE(&G.command_buffers_mutex);
{
u64 group_hash = command_buffer_hash_from_size(size);
DictEntry *cb_group_entry = EnsureDictEntry(G.command_buffers_arena, G.command_buffers_dict, group_hash);
cb_group = (struct command_buffer_group *)cb_group_entry->value;
if (!cb_group) {
/* Create group */
cb_group = PushStruct(G.command_buffers_arena, struct command_buffer_group);
cb_group_entry->value = (u64)cb_group;
}
}
/* Find first command buffer ready for reuse */
for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) {
/* TODO: Cache completed fence values */
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
if (completed_fence_value >= tmp->submitted_fence_target) {
cb = tmp;
break;
}
}
if (cb) {
/* Remove from submitted list */
r = cb->resource;
struct command_buffer *prev = cb->prev_submitted;
struct command_buffer *next = cb->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
cb_group->first_submitted = next;
}
if (next) {
next->prev_submitted = prev;
} else {
cb_group->last_submitted = prev;
}
} else {
/* Allocate new */
cb = PushStructNoZero(G.command_buffers_arena, struct command_buffer);
}
P_Unlock(&lock);
}
ZeroStruct(cb);
cb->group = cb_group;
cb->size = data_len;
/* Create upload heap */
if (!r) {
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Flags = 0;
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.Alignment = 0;
desc.Width = size;
desc.Height = 1;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
}
cb->resource = r;
{
struct D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI;
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = MaxU32(data_len / data_stride, 1);
desc.Buffer.StructureByteStride = data_stride;
desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
ID3D12Device_CreateShaderResourceView(G.device, r->resource, &desc, r->srv_descriptor->handle);
}
/* Write data to resource */
{
D3D12_RANGE read_range = ZI;
void *dst = 0;
HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst);
if (FAILED(hr) || !dst) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to map command buffer resource"));
}
CopyBytes(dst, data, data_len);
ID3D12Resource_Unmap(cb->resource->resource, 0, 0);
}
/* Insert into command list */
cb->next_in_command_list = cl->first_command_buffer;
cl->first_command_buffer = cb;
return cb;
}
/* ========================== *
* Wait job
* ========================== */
struct dx12_wait_fence_job_sig {
ID3D12Fence *fence;
u64 target;
};
internal P_JobDef(dx12_wait_fence_job, job)
{
__prof;
struct dx12_wait_fence_job_sig *sig = job.sig;
ID3D12Fence *fence = sig->fence;
u64 target = sig->target;
if (ID3D12Fence_GetCompletedValue(fence) < target) {
/* TODO: Pool events */
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
}
/* ========================== *
* Texture
* ========================== */
G_Resource *gp_texture_alloc(G_TextureFormat format, u32 flags, Vec2I32 size, void *initial_data)
{
__prof;
if (size.x <= 0 || size.y <= 0) {
P_Panic(Lit("Tried to create texture with dimension <= 0"));
}
LocalPersist const DXGI_FORMAT formats[] = {
[GP_TEXTURE_FORMAT_R8_UNORM] = DXGI_FORMAT_R8_UNORM,
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM,
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB,
[GP_TEXTURE_FORMAT_R16G16B16A16_FLOAT] = DXGI_FORMAT_R16G16B16A16_FLOAT
};
DXGI_FORMAT dxgi_format = ZI;
if (format >= 0 && format < (i32)countof(formats)) {
dxgi_format = formats[format];
}
if (format == 0) {
P_Panic(Lit("Tried to create texture with unknown format"));
}
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Format = dxgi_format;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
r->texture_size = size;
r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
if (flags & GP_TEXTURE_FLAG_TARGETABLE) {
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
r->rtv_descriptor = descriptor_alloc(G.rtv_heap);
ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle);
ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle);
}
/* Upload texture */
if (initial_data) {
/* TODO: Make wait optional */
P_Counter counter = ZI;
struct dx12_upload_job_sig sig = ZI;
sig.resource = r;
sig.data = initial_data;
P_Run(1, dx12_upload_job, &sig, P_Pool_Inherit, P_Priority_Inherit, &counter);
P_WaitOnCounter(&counter);
}
return (G_Resource *)r;
}
Vec2I32 gp_texture_get_size(G_Resource *resource)
{
struct dx12_resource *r = (struct dx12_resource *)resource;
return r->texture_size;
}
/* ========================== *
* Upload
* ========================== */
internal P_JobDef(dx12_upload_job, job)
{
struct dx12_upload_job_sig *sig = job.sig;
struct dx12_resource *r = sig->resource;
void *data = sig->data;
Assert(r->state == D3D12_RESOURCE_STATE_COPY_DEST);
D3D12_RESOURCE_DESC desc = ZI;
ID3D12Resource_GetDesc(r->resource, &desc);
{
u64 upload_size = 0;
u64 upload_row_size = 0;
u32 upload_num_rows = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT placed_footprint = ZI;
ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &placed_footprint, &upload_num_rows, &upload_row_size, &upload_size);
D3D12_SUBRESOURCE_FOOTPRINT footprint = placed_footprint.Footprint;
/* Create upload heap */
struct dx12_resource *upload = 0;
{
D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC upload_desc = ZI;
upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
upload_desc.Format = DXGI_FORMAT_UNKNOWN;
upload_desc.Alignment = 0;
upload_desc.Width = upload_size;
upload_desc.Height = 1;
upload_desc.DepthOrArraySize = 1;
upload_desc.MipLevels = 1;
upload_desc.SampleDesc.Count = 1;
upload_desc.SampleDesc.Quality = 0;
D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state);
}
struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND];
struct command_list *cl = command_list_open(cq->cl_pool);
{
/* Copyto upload heap */
{
D3D12_RANGE read_range = ZI;
void *mapped = 0;
HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped);
if (FAILED(hr) || !mapped) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to map texture upload resource"));
}
u8 *dst = (u8 *)mapped + placed_footprint.Offset;
u8 *src = data;
u32 z_size = upload_row_size * upload_num_rows;
for (u32 z = 0; z < desc.DepthOrArraySize; ++z) {
u32 z_offset = z * z_size;
for (u32 y = 0; y < upload_num_rows; ++y) {
CopyBytes(dst + y * footprint.RowPitch + z_offset, src + y * upload_row_size + z_offset, upload_row_size);
}
}
ID3D12Resource_Unmap(upload->resource, 0, 0);
}
/* Copyfrom upload heap to texture */
{
__profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", Rgb32F(0.2, 0.5, 0.2));
D3D12_TEXTURE_COPY_LOCATION dst_loc = {
.pResource = r->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
.SubresourceIndex = 0,
};
D3D12_TEXTURE_COPY_LOCATION src_loc = {
.pResource = upload->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
.PlacedFootprint = placed_footprint,
};
ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0);
}
} u64 fence_target = command_list_close(cl);
/* Wait on fence so we know it's safe to release upload heap */
if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) {
struct dx12_wait_fence_job_sig wait_sig = ZI;
wait_sig.fence = cq->submit_fence;
wait_sig.target = fence_target;
P_Counter counter = ZI;
P_Run(1, dx12_wait_fence_job, &wait_sig, P_Pool_Floating, P_Priority_Low, &counter);
P_WaitOnCounter(&counter);
}
/* Release upload heap now */
dx12_resource_release_now(upload);
}
}
/* ========================== *
* Run utils
* ========================== */
internal void command_list_set_pipeline(struct command_list *cl, struct pipeline *pipeline)
{
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, pipeline->pso);
if (pipeline->is_gfx) {
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, pipeline->rootsig);
} else {
ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, pipeline->rootsig);
}
cl->cur_pipeline = pipeline;
}
internal void command_list_set_sig(struct command_list *cl, void *src, u32 size)
{
__prof;
Assert(size % 16 == 0); /* Root constant structs must pad to 16 bytes */
Assert(size <= 256); /* Only 64 32-bit root constants allowed in signature */
u32 num32bit = size / 4;
b32 is_gfx = cl->cur_pipeline->is_gfx;
for (u32 i = 0; i < num32bit; ++i) {
u32 val = 0;
CopyBytes(&val, (((u32 *)src) + i), 4);
if (is_gfx) {
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i);
} else {
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i);
}
}
}
internal struct D3D12_VIEWPORT viewport_from_rect(Rect r)
{
struct D3D12_VIEWPORT viewport = ZI;
viewport.TopLeftX = r.x;
viewport.TopLeftY = r.y;
viewport.Width = r.width;
viewport.Height = r.height;
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
return viewport;
}
internal D3D12_RECT scissor_from_rect(Rect r)
{
D3D12_RECT scissor = ZI;
scissor.left = r.x;
scissor.top = r.y;
scissor.right = r.x + r.width;
scissor.bottom = r.y + r.height;
return scissor;
}
internal D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size)
{
D3D12_VERTEX_BUFFER_VIEW vbv = ZI;
vbv.BufferLocation = cb->resource->gpu_address;
vbv.SizeInBytes = cb->size;
vbv.StrideInBytes = vertex_size;
return vbv;
}
internal D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format)
{
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = cb->resource->gpu_address;
ibv.Format = format;
ibv.SizeInBytes = cb->size;
return ibv;
}
internal struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, Vec2I32 size, D3D12_RESOURCE_STATES initial_state)
{
__prof;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Format = format;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state);
r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
r->rtv_descriptor = descriptor_alloc(G.rtv_heap);
ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle);
ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle);
r->texture_size = size;
return r;
}
/* Calculate the view projection matrix */
Inline Mat4x4 calculate_vp(Xform view, f32 viewport_width, f32 viewport_height)
{
Mat4x4 projection = Mat4x4FromOrtho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0);
Mat4x4 view4x4 = Mat4x4FromXform(view);
return MulMat4x4(projection, view4x4);
}
internal D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh)
{
struct D3D12_GPU_DESCRIPTOR_HANDLE result = ZI;
result.ptr = cdh->start_gpu_handle.ptr + descriptor->index * G.desc_sizes[descriptor->heap->type];
return result;
}
/* ========================== *
* Render sig
* ========================== */
struct render_sig {
Arena *arena;
RandState rand;
u32 frame_index;
/* Material instances */
u32 num_material_instance_descs;
Arena *material_instance_descs_arena;
/* Ui instances */
u32 num_ui_rect_instance_descs;
Arena *ui_rect_instance_descs_arena;
/* UI shapes */
Arena *ui_shape_verts_arena;
Arena *ui_shape_indices_arena;
/* Grids */
u32 num_material_grid_descs;
Arena *material_grid_descs_arena;
/* Resources */
struct dx12_resource *albedo;
struct dx12_resource *emittance;
struct dx12_resource *emittance_flood_read;
struct dx12_resource *emittance_flood_target;
struct dx12_resource *shade_read;
struct dx12_resource *shade_target;
struct dx12_resource *ui_target;
};
struct material_instance_desc {
Xform xf;
u32 texture_id;
ClipRect clip;
u32 tint;
b32 is_light;
Vec3 light_emittance;
u32 grid_id;
};
struct ui_rect_instance_desc {
Xform xf;
u32 texture_id;
ClipRect clip;
u32 tint;
};
struct material_grid_desc {
f32 line_thickness;
f32 line_spacing;
Vec2 offset;
u32 bg0_color;
u32 bg1_color;
u32 line_color;
u32 x_color;
u32 y_color;
};
internal struct render_sig *render_sig_alloc(void)
{
__prof;
struct render_sig *sig = 0;
{
Arena *arena = AllocArena(Mebi(64));
sig = PushStruct(arena, struct render_sig);
sig->arena = arena;
}
sig->material_instance_descs_arena = AllocArena(Gibi(1));
sig->material_grid_descs_arena = AllocArena(Gibi(1));
sig->ui_rect_instance_descs_arena = AllocArena(Gibi(1));
sig->ui_shape_verts_arena = AllocArena(Gibi(1));
sig->ui_shape_indices_arena = AllocArena(Gibi(1));
return sig;
}
internal void render_sig_reset(struct render_sig *sig)
{
__prof;
/* Reset material instances */
sig->num_material_instance_descs = 0;
ResetArena(sig->material_instance_descs_arena);
/* Reset UI rect instances */
sig->num_ui_rect_instance_descs = 0;
ResetArena(sig->ui_rect_instance_descs_arena);
/* Reset shapes */
ResetArena(sig->ui_shape_verts_arena);
ResetArena(sig->ui_shape_indices_arena);
/* Reset grids */
sig->num_material_grid_descs = 0;
ResetArena(sig->material_grid_descs_arena);
}
G_RenderSig *gp_render_sig_alloc(void)
{
__prof;
struct render_sig *sig = render_sig_alloc();
return (G_RenderSig *)sig;
}
u32 gp_push_render_cmd(G_RenderSig *render_sig, G_RenderCmdDesc *cmd_desc)
{
u32 ret = 0;
struct render_sig *sig = (struct render_sig *)render_sig;
if (sig) {
switch (cmd_desc->kind) {
default: break;
case GP_RENDER_CMD_KIND_DRAW_MATERIAL:
{
struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->material.texture;
struct material_instance_desc *instance_desc = PushStruct(sig->material_instance_descs_arena, struct material_instance_desc);
instance_desc->xf = cmd_desc->material.xf;
instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF;
instance_desc->clip = cmd_desc->material.clip;
instance_desc->tint = cmd_desc->material.tint;
instance_desc->is_light = cmd_desc->material.is_light;
instance_desc->light_emittance = cmd_desc->material.light_emittance;
instance_desc->grid_id = cmd_desc->material.grid_cmd_id - 1;
ret = ++sig->num_material_instance_descs;
} break;
case GP_RENDER_CMD_KIND_DRAW_UI_RECT:
{
struct dx12_resource *texture = (struct dx12_resource *)cmd_desc->ui_rect.texture;
struct ui_rect_instance_desc *instance_desc = PushStruct(sig->ui_rect_instance_descs_arena, struct ui_rect_instance_desc);
instance_desc->xf = cmd_desc->ui_rect.xf;
instance_desc->texture_id = texture ? texture->srv_descriptor->index : 0xFFFFFFFF;
instance_desc->clip = cmd_desc->ui_rect.clip;
instance_desc->tint = cmd_desc->ui_rect.tint;
ret = ++sig->num_ui_rect_instance_descs;
} break;
case GP_RENDER_CMD_KIND_DRAW_UI_SHAPE:
{
u32 color = cmd_desc->ui_shape.color;
struct k_shape_vert *verts = PushStructsNoZero(sig->ui_shape_verts_arena, struct k_shape_vert, cmd_desc->ui_shape.vertices.count);
u32 *indices = PushStructsNoZero(sig->ui_shape_indices_arena, u32, cmd_desc->ui_shape.indices.count);
for (u32 i = 0; i < cmd_desc->ui_shape.vertices.count; ++i) {
struct k_shape_vert *v = &verts[i];
v->pos = K_Float2FromV2(cmd_desc->ui_shape.vertices.points[i]);
v->color_srgb = K_UintFromU32(color);
}
u32 vert_offset = verts - (struct k_shape_vert *)ArenaBase(sig->ui_shape_verts_arena);
for (u32 i = 0; i < cmd_desc->ui_shape.indices.count; ++i) {
indices[i] = cmd_desc->ui_shape.indices.indices[i] + vert_offset;
}
} break;
case GP_RENDER_CMD_KIND_PUSH_GRID:
{
struct material_grid_desc *grid_desc = PushStruct(sig->material_grid_descs_arena, struct material_grid_desc);
grid_desc->line_thickness = cmd_desc->grid.line_thickness;
grid_desc->line_spacing = cmd_desc->grid.line_spacing;
grid_desc->offset = cmd_desc->grid.offset;
grid_desc->bg0_color = cmd_desc->grid.bg0_color;
grid_desc->bg1_color = cmd_desc->grid.bg1_color;
grid_desc->line_color = cmd_desc->grid.line_color;
grid_desc->x_color = cmd_desc->grid.x_color;
grid_desc->y_color = cmd_desc->grid.y_color;
ret = ++sig->num_material_grid_descs;
} break;
}
}
return ret;
}
/* ========================== *
* Render
* ========================== */
G_Resource *gp_run_render(G_RenderSig *gp_render_sig, G_RenderParams params)
{
__prof;
TempArena scratch = BeginScratchNoConflict();
struct render_sig *rsig = (struct render_sig *)gp_render_sig;
++rsig->frame_index;
Vec2I32 ui_size = VEC2I32(MaxI32(params.ui_size.x, 1), MaxI32(params.ui_size.y, 1));
Vec2I32 render_size = VEC2I32(MaxI32(params.render_size.x, 1), MaxI32(params.render_size.y, 1));
Xform world_to_render_xf = params.world_to_render_xf;
Xform render_to_ui_xf = params.render_to_ui_xf;
Rect ui_viewport = RectFromVec2(VEC2(0, 0), VEC2(ui_size.x, ui_size.y));
Rect render_viewport = RectFromVec2(VEC2(0, 0), VEC2(render_size.x, render_size.y));
/* Allocate render buffers */
if (rsig->shade_target && !EqVec2I32(render_size, rsig->shade_target->texture_size)) {
__profn("Release sig resources");
fenced_release(rsig->albedo, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(rsig->emittance, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(rsig->emittance_flood_read, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(rsig->emittance_flood_target, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(rsig->shade_read, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(rsig->shade_target, FENCED_RELEASE_KIND_RESOURCE);
rsig->shade_target = 0;
}
if (!rsig->shade_target) {
__profn("Allocate sig resources");
rsig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
rsig->emittance = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
rsig->emittance_flood_read = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rsig->emittance_flood_target = gbuff_alloc(DXGI_FORMAT_R16G16_UINT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rsig->shade_read = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
rsig->shade_target = gbuff_alloc(DXGI_FORMAT_R16G16B16A16_FLOAT, render_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
/* Allocate ui buffers */
if (rsig->ui_target && !EqVec2I32(ui_size, rsig->ui_target->texture_size)) {
fenced_release(rsig->ui_target, FENCED_RELEASE_KIND_RESOURCE);
rsig->ui_target = 0;
}
if (!rsig->ui_target) {
rsig->ui_target = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, ui_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
}
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_material"));
struct pipeline *flood_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_flood"));
struct pipeline *shade_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shade"));
struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
struct pipeline *ui_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_ui"));
struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_shape"));
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profn("Run render");
__profnc_dx12(cl->cq->prof, cl->cl, "Run render", Rgb32F(0.5, 0.2, 0.2));
Mat4x4 world_to_render_vp_matrix = calculate_vp(world_to_render_xf, render_viewport.width, render_viewport.height);
Mat4x4 ui_vp_matrix = calculate_vp(XformIdentity, ui_viewport.width, ui_viewport.height);
Mat4x4 blit_vp_matrix = ZI;
{
Xform xf = render_to_ui_xf;
xf = ScaleXform(xf, VEC2(render_size.x, render_size.y));
xf = TranslateXform(xf, VEC2(0.5, 0.5));
blit_vp_matrix = calculate_vp(xf, ui_viewport.width, ui_viewport.height);
}
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0);
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices);
/* Process sig data into uploadable data */
struct k_material_instance *material_instances = PushStructsNoZero(scratch.arena, struct k_material_instance, rsig->num_material_instance_descs);
struct k_ui_instance *ui_rect_instances = PushStructsNoZero(scratch.arena, struct k_ui_instance, rsig->num_ui_rect_instance_descs);
struct k_material_grid *grids = PushStructsNoZero(scratch.arena, struct k_material_grid, rsig->num_material_grid_descs);
{
__profn("Process sig data");
/* Process material instances */
{
__profn("Process material instances");
for (u32 i = 0; i < rsig->num_material_instance_descs; ++i) {
struct material_instance_desc *desc = &((struct material_instance_desc *)ArenaBase(rsig->material_instance_descs_arena))[i];
struct k_material_instance *instance = &material_instances[i];
instance->tex_nurid = K_UintFromU32(desc->texture_id);
instance->grid_id = K_UintFromU32(desc->grid_id);
instance->xf = K_Float2x3FromXform(desc->xf);
instance->uv0 = K_Float2FromV2(desc->clip.p0);
instance->uv1 = K_Float2FromV2(desc->clip.p1);
instance->tint_srgb = K_UintFromU32(desc->tint);
instance->is_light = K_UintFromU32(desc->is_light);
instance->light_emittance_srgb = K_Float3FromV3(desc->light_emittance);
}
}
/* Process ui rect instances */
{
__profn("Process ui rect instances");
for (u32 i = 0; i < rsig->num_ui_rect_instance_descs; ++i) {
struct ui_rect_instance_desc *desc = &((struct ui_rect_instance_desc *)ArenaBase(rsig->ui_rect_instance_descs_arena))[i];
struct k_ui_instance *instance = &ui_rect_instances[i];
instance->tex_nurid = K_UintFromU32(desc->texture_id);
instance->xf = K_Float2x3FromXform(desc->xf);
instance->uv0 = K_Float2FromV2(desc->clip.p0);
instance->uv1 = K_Float2FromV2(desc->clip.p1);
instance->tint_srgb = K_UintFromU32(desc->tint);
}
}
/* Process grids */
{
__profn("Process grids");
for (u32 i = 0; i < rsig->num_material_grid_descs; ++i) {
struct material_grid_desc *desc = &((struct material_grid_desc *)ArenaBase(rsig->material_grid_descs_arena))[i];
struct k_material_grid *grid = &grids[i];
grid->line_thickness = K_FloatFromF32(desc->line_thickness);
grid->line_spacing = K_FloatFromF32(desc->line_spacing);
grid->offset = K_Float2FromV2(desc->offset);
grid->bg0_srgb = K_UintFromU32(desc->bg0_color);
grid->bg1_srgb = K_UintFromU32(desc->bg1_color);
grid->line_srgb = K_UintFromU32(desc->line_color);
grid->x_srgb = K_UintFromU32(desc->x_color);
grid->y_srgb = K_UintFromU32(desc->y_color);
}
}
}
/* Upload buffers */
u64 num_ui_shape_verts = rsig->ui_shape_verts_arena->pos / sizeof(struct k_shape_vert);
u64 num_ui_shape_indices = rsig->ui_shape_indices_arena->pos / sizeof(u32);
struct command_buffer *material_instance_buffer = command_list_push_buffer(cl, rsig->num_material_instance_descs, material_instances);
struct command_buffer *ui_rect_instance_buffer = command_list_push_buffer(cl, rsig->num_ui_rect_instance_descs, ui_rect_instances);
struct command_buffer *ui_shape_verts_buffer = command_list_push_buffer(cl, num_ui_shape_verts, (struct k_shape_vert *)ArenaBase(rsig->ui_shape_verts_arena));
struct command_buffer *ui_shape_indices_buffer = command_list_push_buffer(cl, num_ui_shape_indices, (u32 *)ArenaBase(rsig->ui_shape_indices_arena));
struct command_buffer *grid_buffer = command_list_push_buffer(cl, rsig->num_material_grid_descs, grids);
/* Upload descriptor heap */
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
/* Prep for material pass */
{
/* Barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET }
};
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = {
rsig->albedo->rtv_descriptor->handle,
rsig->emittance->rtv_descriptor->handle,
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0);
}
/* Clear */
{
__profn("Clear gbuffers");
__profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", Rgb32F(0.5, 0.2, 0.2));
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->albedo->rtv_descriptor->handle, clear_color, 0, 0);
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->emittance->rtv_descriptor->handle, clear_color, 0, 0);
}
}
/* Material pass */
if (material_pipeline->success) {
__profn("Material pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Material pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, material_pipeline);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(render_viewport);
D3D12_RECT scissor = scissor_from_rect(render_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
struct k_material_sig sig = ZI;
sig.projection = K_Float4x4FromMat4x4(world_to_render_vp_matrix);
sig.instances_urid = K_UintFromU32(material_instance_buffer->resource->srv_descriptor->index);
sig.grids_urid = K_UintFromU32(grid_buffer->resource->srv_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Draw */
u32 instance_count = material_instance_buffer->size / sizeof(struct k_material_instance);
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0);
}
/* Prep for flood pass */
{
/* Barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
}
/* Flood pass */
if (flood_pipeline->success && !params.effects_disabled) {
__profn("Flood pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, flood_pipeline);
i32 step_length = -1;
/* TODO: Remove this */
u64 max_steps = GetGstat(GSTAT_DEBUG_STEPS);
u64 step = 0;
while (step_length != 0 && step < max_steps) {
__profn("Flood step");
__profnc_dx12(cl->cq->prof, cl->cl, "Flood step", Rgb32F(0.5, 0.2, 0.2));
/* UAV barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
/* Set sig */
struct k_flood_sig sig = ZI;
sig.step_len = K_IntFromI32(step_length);
sig.emittance_tex_urid = K_UintFromU32(rsig->emittance->srv_descriptor->index);
sig.read_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_read->uav_descriptor->index);
sig.target_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_target->uav_descriptor->index);
sig.tex_width = K_UintFromU32(render_size.x);
sig.tex_height = K_UintFromU32(render_size.y);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1);
/* Swap buffers */
struct dx12_resource *swp = rsig->emittance_flood_read;
rsig->emittance_flood_read = rsig->emittance_flood_target;
rsig->emittance_flood_target = swp;
/* Update step */
if (step_length == -1) {
step_length = MaxI32(render_size.x, render_size.y) / 2;
} else {
step_length /= 2;
}
++step;
}
}
/* Prep for shade pass */
{
/* Barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->emittance_flood_read, 0 },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
/* Clear */
{
__profn("Clear shade target");
__profnc_dx12(cl->cq->prof, cl->cl, "Clear shade target", Rgb32F(0.5, 0.2, 0.2));
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(rsig->shade_target->uav_descriptor, descriptor_heap), rsig->shade_target->uav_descriptor->handle, rsig->shade_target->resource, clear_color, 0, 0);
}
}
/* Shade pass */
if (shade_pipeline->success) {
__profn("Shade pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, shade_pipeline);
u32 shade_flags = K_SHADE_FLAG_NONE;
if (params.effects_disabled) {
shade_flags |= K_SHADE_FLAG_DISABLE_EFFECTS;
}
/* Set sig */
struct k_shade_sig sig = ZI;
sig.flags = K_UintFromU32(shade_flags);
sig.tex_width = K_UintFromU32(render_size.x);
sig.tex_height = K_UintFromU32(render_size.y);
sig.frame_seed = K_Uint4FromU32((u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF),
(u32)(RandU64FromState(&rsig->rand) & 0xFFFFFFFF));
sig.frame_index = K_UintFromU32(rsig->frame_index);
sig.camera_offset = K_Float2FromV2(world_to_render_xf.og);
sig.albedo_tex_urid = K_UintFromU32(rsig->albedo->srv_descriptor->index);
sig.emittance_tex_urid = K_UintFromU32(rsig->emittance->srv_descriptor->index);
sig.emittance_flood_tex_urid = K_UintFromU32(rsig->emittance_flood_read->srv_descriptor->index);
sig.read_tex_urid = K_UintFromU32(rsig->shade_read->uav_descriptor->index);
sig.target_tex_urid = K_UintFromU32(rsig->shade_target->uav_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(cl->cl, (render_size.x + 7) / 8, (render_size.y + 7) / 8, 1);
/* Swap */
struct dx12_resource *swp = rsig->shade_read;
rsig->shade_read = rsig->shade_target;
rsig->shade_target = swp;
}
/* Prep for UI pass */
{
/* Barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->shade_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, rsig->shade_read, 0 },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, rsig->ui_target, D3D12_RESOURCE_STATE_RENDER_TARGET }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &rsig->ui_target->rtv_descriptor->handle, 0, 0);
}
/* Clear */
{
__profn("Clear ui target");
__profnc_dx12(cl->cq->prof, cl->cl, "Clear ui target", Rgb32F(0.5, 0.2, 0.2));
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, rsig->ui_target->rtv_descriptor->handle, clear_color, 0, 0);
}
}
/* UI blit pass */
if (blit_pipeline->success) {
__profn("UI blit pass");
__profnc_dx12(cl->cq->prof, cl->cl, "UI blit pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, blit_pipeline);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
struct k_blit_sig sig = ZI;
sig.projection = K_Float4x4FromMat4x4(blit_vp_matrix);
sig.flags = K_UintFromU32(K_BLIT_FLAG_TONE_MAP | K_BLIT_FLAG_GAMMA_CORRECT);
sig.exposure = K_FloatFromF32(2.0);
sig.gamma = K_FloatFromF32((f32)2.2);
sig.tex_urid = K_UintFromU32(rsig->shade_read->uav_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Draw */
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
}
/* UI rect pass */
if (ui_pipeline->success) {
__profn("UI rect pass");
__profnc_dx12(cl->cq->prof, cl->cl, "UI rect pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, ui_pipeline);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
struct k_ui_sig sig = ZI;
sig.projection = K_Float4x4FromMat4x4(ui_vp_matrix);
sig.instances_urid = K_UintFromU32(ui_rect_instance_buffer->resource->srv_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Draw */
u32 instance_count = ui_rect_instance_buffer->size / sizeof(struct k_ui_instance);
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0);
}
/* UI shape pass */
if (shape_pipeline->success) {
__profn("UI shape pass");
__profnc_dx12(cl->cq->prof, cl->cl, "UI shape pass", Rgb32F(0.5, 0.2, 0.2));
/* Bind pipeline */
command_list_set_pipeline(cl, shape_pipeline);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(ui_viewport);
D3D12_RECT scissor = scissor_from_rect(ui_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
struct k_shape_sig sig = ZI;
sig.projection = K_Float4x4FromMat4x4(ui_vp_matrix);
sig.verts_urid = K_UintFromU32(ui_shape_verts_buffer->resource->srv_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Draw */
u32 index_count = ui_shape_indices_buffer->size / sizeof(u32);
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(ui_shape_indices_buffer, DXGI_FORMAT_R32_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, index_count, 1, 0, 0, 0);
}
}
command_list_close(cl);
pipeline_scope_end(pipeline_scope);
render_sig_reset(rsig);
EndScratch(scratch);
return (G_Resource *)rsig->ui_target;
}
/* ========================== *
* Memory info
* ========================== */
G_MemoryInfo gp_query_memory_info(void)
{
G_MemoryInfo result = ZI;
HRESULT hr = 0;
IDXGIAdapter3 *dxgiAdapter3 = 0;
if (SUCCEEDED(hr)) {
hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
}
if (SUCCEEDED(hr)) {
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
result.local_used = info.CurrentUsage;
result.local_budget = info.Budget;
}
if (SUCCEEDED(hr)) {
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
result.non_local_used = info.CurrentUsage;
result.non_local_budget = info.Budget;
}
if (dxgiAdapter3) {
IDXGIAdapter_Release(dxgiAdapter3);
}
return result;
}
/* ========================== *
* Swapchain
* ========================== */
internal void swapchain_init_resources(struct swapchain *swapchain)
{
for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
ID3D12Resource *resource = 0;
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
if (FAILED(hr)) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to get swapchain buffer"));
}
struct swapchain_buffer *sb = &swapchain->buffers[i];
ZeroStruct(sb);
sb->swapchain = swapchain;
sb->resource = resource;
sb->rtv_descriptor = descriptor_alloc(G.rtv_heap);
sb->state = D3D12_RESOURCE_STATE_COMMON;
ID3D12Device_CreateRenderTargetView(G.device, sb->resource, 0, sb->rtv_descriptor->handle);
}
}
G_Swapchain *gp_swapchain_alloc(P_Window *window, Vec2I32 resolution)
{
HRESULT hr = 0;
HWND hwnd = (HWND)P_GetInternalWindowHandle(window);
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct swapchain *swapchain = 0;
{
P_Lock lock = P_LockE(&G.swapchains_mutex);
if (G.first_free_swapchain) {
swapchain = G.first_free_swapchain;
G.first_free_swapchain = swapchain->next_free;
} else {
swapchain = PushStruct(G.swapchains_arena, struct swapchain);
}
P_Unlock(&lock);
}
/* Create swapchain1 */
IDXGISwapChain1 *swapchain1 = 0;
{
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.Width = resolution.x;
desc.Height = resolution.y;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT;
desc.Scaling = DXGI_SCALING_NONE;
desc.Flags = DX12_SWAPCHAIN_FLAGS;
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create IDXGISwapChain1"));
}
}
/* Upgrade to swapchain3 */
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
if (FAILED(hr)) {
P_Panic(Lit("Failed to create IDXGISwapChain3"));
}
/* Create waitable object */
#if DX12_WAIT_FRAME_LATENCY > 0
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, DX12_WAIT_FRAME_LATENCY);
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
Assert(swapchain->waitable);
#endif
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
IDXGISwapChain1_Release(swapchain1);
swapchain->hwnd = hwnd;
swapchain_init_resources(swapchain);
return (G_Swapchain *)swapchain;
}
void gp_swapchain_release(G_Swapchain *gp_swapchain)
{
/* TODO */
(UNUSED)gp_swapchain;
}
void gp_swapchain_wait(G_Swapchain *gp_swapchain)
{
#if DX12_WAIT_FRAME_LATENCY > 0
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
if (swapchain->waitable) {
WaitForSingleObjectEx(swapchain->waitable, 1000, 1);
}
#else
(UNUSED)gp_swapchain;
#endif
}
internal struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, Vec2I32 resolution)
{
__prof;
resolution.x = MaxI32(resolution.x, 1);
resolution.y = MaxI32(resolution.y, 1);
b32 should_rebuild = !EqVec2I32(swapchain->resolution, resolution);
if (should_rebuild) {
HRESULT hr = 0;
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
/* Lock direct queue submissions (in case any write to backbuffer) */
/* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */
P_Lock lock = P_LockE(&cq->submit_fence_mutex);
//DEBUGBREAKABLE;
//P_Lock lock = P_LockE(&G.global_command_list_record_mutex);
{
/* Flush direct queue */
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
{
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(cq->submit_fence, cq->submit_fence_target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
/* Release buffers */
for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
struct swapchain_buffer *sb = &swapchain->buffers[i];
descriptor_release(sb->rtv_descriptor);
ID3D12Resource_Release(sb->resource);
}
/* Resize buffers */
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS);
if (FAILED(hr)) {
/* TODO: Don't panic */
P_Panic(Lit("Failed to resize swapchain"));
}
}
P_Unlock(&lock);
swapchain_init_resources(swapchain);
swapchain->resolution = resolution;
}
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
return &swapchain->buffers[backbuffer_index];
}
/* ========================== *
* Present
* ========================== */
internal void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, Xform src_xf)
{
__prof;
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, Lit("kernel_blit"));
if (blit_pipeline->success) {
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profn("Present blit");
__profnc_dx12(cl->cq->prof, cl->cl, "Present blit", Rgb32F(0.5, 0.2, 0.2));
struct swapchain *swapchain = dst->swapchain;
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
LocalPersist u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, 0, (u8 *)0);
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, countof(quad_indices), quad_indices);
/* Upload descriptor heap */
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
Rect viewport_rect = RectFromVec2(VEC2(0, 0), VEC2(swapchain->resolution.x, swapchain->resolution.y));
D3D12_VIEWPORT viewport = viewport_from_rect(viewport_rect);
D3D12_RECT scissor = scissor_from_rect(viewport_rect);
Mat4x4 vp_matrix = ZI;
{
Xform xf = src_xf;
xf = ScaleXform(xf, VEC2(src->texture_size.x, src->texture_size.y));
xf = TranslateXform(xf, VEC2(0.5, 0.5));
vp_matrix = calculate_vp(xf, viewport.Width, viewport.Height);
}
/* Transition dst to render target */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0);
/* Clear */
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0);
/* Bind pipeline */
command_list_set_pipeline(cl, blit_pipeline);
/* Set Rasterizer State */
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set sig */
struct k_blit_sig sig = ZI;
sig.projection = K_Float4x4FromMat4x4(vp_matrix);
sig.flags = K_UintFromU32(K_BLIT_FLAG_NONE);
sig.tex_urid = K_UintFromU32(src->srv_descriptor->index);
command_list_set_sig(cl, &sig, sizeof(sig));
/* Draw */
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
/* Transition dst to presentable */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
}
command_list_close(cl);
}
pipeline_scope_end(pipeline_scope);
}
void gp_present(G_Swapchain *gp_swapchain, Vec2I32 backbuffer_resolution, G_Resource *texture, Xform texture_xf, i32 vsync)
{
__prof;
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, backbuffer_resolution);
struct dx12_resource *texture_resource = (struct dx12_resource *)texture;
/* Blit */
present_blit(swapchain_buffer, texture_resource, texture_xf);
u32 present_flags = 0;
if (vsync == 0) {
present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING);
}
/* Present */
{
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
if (!SUCCEEDED(hr)) {
Assert(0);
}
}
#if ProfilingIsEnabled_GPU
{
__profframe(0);
__profn("Mark queue frames");
/* Lock because frame marks shouldn't occur while command lists are recording */
P_Lock lock = P_LockE(&G.global_command_list_record_mutex);
for (u32 i = 0; i < countof(G.command_queues); ++i) {
{
struct command_queue *cq = G.command_queues[i];
__prof_dx12_new_frame(cq->prof);
}
}
P_Unlock(&lock);
}
{
__profn("Collect queues");
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
__prof_dx12_collect(cq->prof);
}
}
#endif
}
/* ========================== *
* Evictor thread
* ========================== */
internal P_JobDef(dx12_evictor_job, _)
{
(UNUSED)_;
u64 completed_targets[DX12_NUM_QUEUES] = ZI;
b32 shutdown = 0;
while (!shutdown) {
{
__profn("Dx12 evictor run");
TempArena scratch = BeginScratchNoConflict();
u64 targets[countof(completed_targets)] = ZI;
/* Copyqueued data */
u32 num_fenced_releases = 0;
struct fenced_release_data *fenced_releases = 0;
{
__profn("Copyqueued releases");
P_Lock lock = P_LockE(&G.fenced_releases_mutex);
num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data);
fenced_releases = PushStructsNoZero(scratch.arena, struct fenced_release_data, num_fenced_releases);
CopyBytes(fenced_releases, ArenaBase(G.fenced_releases_arena), G.fenced_releases_arena->pos);
ResetArena(G.fenced_releases_arena);
CopyBytes(targets, G.fenced_release_targets, sizeof(targets));
P_Unlock(&lock);
}
/* Wait until fences reach target */
{
__profn("Check fences");
for (u32 i = 0; i < countof(targets); ++i) {
while (completed_targets[i] < targets[i]) {
struct command_queue *cq = G.command_queues[i];
completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence);
if (completed_targets[i] < targets[i]) {
__profn("Wait on fence");
{
struct dx12_wait_fence_job_sig sig = ZI;
sig.fence = cq->submit_fence;
sig.target = targets[i];
{
P_Counter counter = ZI;
P_Run(1, dx12_wait_fence_job, &sig, P_Pool_Floating, P_Priority_Low, &counter);
P_WaitOnCounter(&counter);
}
}
}
}
}
}
/* Process releases */
for (u32 i = 0; i < num_fenced_releases; ++i) {
struct fenced_release_data *fr = &fenced_releases[i];
switch (fr->kind) {
default:
{
/* Unknown handle type */
Assert(0);
} break;
case FENCED_RELEASE_KIND_RESOURCE:
{
struct dx12_resource *resource = (struct dx12_resource *)fr->ptr;
dx12_resource_release_now(resource);
} break;
case FENCED_RELEASE_KIND_PIPELINE:
{
struct pipeline *pipeline = (struct pipeline *)fr->ptr;
pipeline_release_now(pipeline);
} break;
}
}
EndScratch(scratch);
}
P_Lock lock = P_LockE(&G.evictor_wake_mutex);
{
while (!G.evictor_shutdown && G.evictor_wake_gen == 0) {
P_WaitOnCv(&G.evictor_wake_cv, &lock);
}
shutdown = G.evictor_shutdown;
G.evictor_wake_gen = 0;
}
P_Unlock(&lock);
}
}