power_play/src/gp_dx12.c
2025-07-17 19:48:30 -05:00

3427 lines
128 KiB
C

#include "gp.h"
#include "sys.h"
#include "arena.h"
#include "memory.h"
#include "string.h"
#include "app.h"
#include "log.h"
#include "resource.h"
#include "atomic.h"
#include "util.h"
#include "rand.h"
#include "sprite.h"
#include "gstat.h"
#include "snc.h"
/* Include common shader types */
#define SH_CPU 1
#include "../res/sh/sh_common.h"
#pragma warning(push, 0)
# define UNICODE
# define COBJMACROS
# include <Windows.h>
# include <d3d12.h>
# include <dxgidebug.h>
# include <dxgi1_6.h>
# include <combaseapi.h>
# include <d3dcompiler.h>
#pragma warning(pop)
#pragma comment(lib, "d3d12")
#pragma comment(lib, "dxgi")
#pragma comment(lib, "dxguid")
#pragma comment(lib, "d3dcompiler")
#if PROFILING_D3D
/* For RegOpenKeyEx */
# include <winreg.h>
# pragma comment(lib, "advapi32")
#endif
#define DX12_ALLOW_TEARING 1
#define DX12_WAIT_FRAME_LATENCY 1
#define DX12_SWAPCHAIN_FLAGS ((DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | (DX12_WAIT_FRAME_LATENCY * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
#define DX12_SWAPCHAIN_BUFFER_COUNT (4)
/* Arbitrary limits */
#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64)
#define DX12_NUM_RTV_DESCRIPTORS (1024 * 1)
#define DX12_COMMAND_BUFFER_MIN_SIZE (1024 * 64)
#define DX12_MULTI_QUEUE !PROFILING
#if DX12_MULTI_QUEUE
# define DX12_QUEUE_DIRECT 0
# define DX12_QUEUE_COMPUTE 1
# define DX12_QUEUE_COPY 2
# define DX12_QUEUE_COPY_BACKGROUND 3
# define DX12_NUM_QUEUES 4
#else
# define DX12_QUEUE_DIRECT 0
# define DX12_QUEUE_COMPUTE 0
# define DX12_QUEUE_COPY 0
# define DX12_QUEUE_COPY_BACKGROUND 0
# define DX12_NUM_QUEUES 1
#endif
#if RTC
# define DX12_DEBUG 1
# define DX12_SHADER_DEBUG 1
#else
# define DX12_DEBUG 0
# define DX12_SHADER_DEBUG 0
#endif
struct shader_desc {
struct string file;
struct string func;
};
struct pipeline_rtv_desc {
DXGI_FORMAT format;
b32 blending;
};
struct pipeline_desc {
struct string name;
struct shader_desc cs;
struct shader_desc vs;
struct shader_desc ps;
D3D12_INPUT_ELEMENT_DESC ia[8];
struct pipeline_rtv_desc rtvs[8];
};
struct pipeline {
b32 success;
struct arena *arena;
struct string name;
u64 hash;
struct pipeline_error *first_error;
struct pipeline_error *last_error;
i64 compilation_time_ns;
/* Dict with shader source & included file names as keys */
struct dict *dependencies;
/* Lock global pipelines mutex when accessing */
i64 refcount;
ID3D12PipelineState *pso;
ID3D12RootSignature *rootsig;
struct pipeline_desc desc;
};
struct pipeline_error {
struct string msg;
struct pipeline_error *next;
};
struct pipeline_include {
struct string name;
u64 name_hash;
struct pipeline_include *next;
};
struct pipeline_scope {
struct arena *arena;
struct dict *refs;
struct pipeline_scope *next_free;
};
struct command_queue_desc {
enum D3D12_COMMAND_LIST_TYPE type;
enum D3D12_COMMAND_QUEUE_PRIORITY priority;
struct string dbg_name;
};
struct command_queue {
struct command_queue_desc desc;
ID3D12CommandQueue *cq;
struct arena *arena;
struct snc_mutex submit_fence_mutex;
u64 submit_fence_target;
ID3D12Fence *submit_fence;
struct command_list_pool *cl_pool;
#if PROFILING_D3D
__prof_dx12_ctx(prof);
#endif
};
struct command_list_pool {
struct command_queue *cq;
struct arena *arena;
struct snc_mutex mutex;
struct command_list *first_submitted_command_list;
struct command_list *last_submitted_command_list;
};
struct command_list {
struct command_queue *cq;
struct command_list_pool *pool;
struct ID3D12CommandAllocator *ca;
struct ID3D12GraphicsCommandList *cl;
struct snc_lock global_record_lock;
struct command_descriptor_heap *first_command_descriptor_heap;
struct command_buffer *first_command_buffer;
u64 submitted_fence_target;
struct command_list *prev_submitted;
struct command_list *next_submitted;
};
struct command_descriptor_heap {
D3D12_DESCRIPTOR_HEAP_TYPE type;
ID3D12DescriptorHeap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE start_cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle;
struct command_descriptor_heap *next_in_command_list;
u64 submitted_fence_target;
struct command_queue *submitted_cq;
struct command_descriptor_heap *prev_submitted;
struct command_descriptor_heap *next_submitted;
};
struct command_buffer {
struct command_buffer_group *group;
u64 size;
struct dx12_resource *resource;
D3D12_VERTEX_BUFFER_VIEW vbv;
D3D12_INDEX_BUFFER_VIEW Ibv;
struct command_buffer *next_in_command_list;
u64 submitted_fence_target;
struct command_queue *submitted_cq;
struct command_buffer *prev_submitted;
struct command_buffer *next_submitted;
};
struct command_buffer_group {
struct command_buffer *first_submitted;
struct command_buffer *last_submitted;
};
struct descriptor {
struct cpu_descriptor_heap *heap;
u32 index;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
struct descriptor *next_free;
};
struct dx12_resource {
enum D3D12_RESOURCE_STATES state;
ID3D12Resource *resource;
struct descriptor *cbv_descriptor;
struct descriptor *srv_descriptor;
struct descriptor *uav_descriptor;
struct descriptor *rtv_descriptor;
D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */
struct v2i32 texture_size;
struct dx12_resource *next_free;
};
struct swapchain_buffer {
struct swapchain *swapchain;
ID3D12Resource *resource;
struct descriptor *rtv_descriptor;
D3D12_RESOURCE_STATES state;
};
struct swapchain {
IDXGISwapChain3 *swapchain;
HWND hwnd;
HANDLE waitable;
struct v2i32 resolution;
struct swapchain_buffer buffers[DX12_SWAPCHAIN_BUFFER_COUNT];
struct swapchain *next_free;
};
struct cpu_descriptor_heap {
enum D3D12_DESCRIPTOR_HEAP_TYPE type;
struct arena *arena;
struct snc_mutex mutex;
u32 descriptor_size;
u32 num_descriptors_reserved;
u32 num_descriptors_capacity;
struct descriptor *first_free_descriptor;
ID3D12DescriptorHeap *heap;
struct D3D12_CPU_DESCRIPTOR_HANDLE handle;
};
enum fenced_release_kind {
FENCED_RELEASE_KIND_NONE,
FENCED_RELEASE_KIND_RESOURCE,
FENCED_RELEASE_KIND_PIPELINE
};
struct fenced_release_data {
enum fenced_release_kind kind;
void *ptr;
};
/* ========================== *
* Global state
* ========================== */
GLOBAL struct {
struct atomic32 initialized;
/* Descriptor heaps pool */
struct snc_mutex command_descriptor_heaps_mutex;
struct arena *command_descriptor_heaps_arena;
struct command_descriptor_heap *first_submitted_command_descriptor_heap;
struct command_descriptor_heap *last_submitted_command_descriptor_heap;
/* Command buffers pool */
struct snc_mutex command_buffers_mutex;
struct arena *command_buffers_arena;
struct dict *command_buffers_dict;
/* Resources pool */
struct snc_mutex resources_mutex;
struct arena *resources_arena;
struct dx12_resource *first_free_resource;
/* Swapchains pool */
struct snc_mutex swapchains_mutex;
struct arena *swapchains_arena;
struct swapchain *first_free_swapchain;
/* Pipeline cache */
struct snc_mutex pipelines_mutex;
struct arena *pipelines_arena;
struct dict *pipeline_descs;
struct dict *top_pipelines; /* Latest pipelines */
struct dict *top_successful_pipelines; /* Latest pipelines that successfully compiled */
struct pipeline_scope *first_free_pipeline_scope;
/* Fenced release queue */
struct snc_mutex fenced_releases_mutex;
struct arena *fenced_releases_arena;
u64 fenced_release_targets[DX12_NUM_QUEUES];
/* Factory */
IDXGIFactory6 *factory;
/* Adapter */
IDXGIAdapter1 *adapter;
/* Device */
ID3D12Device *device;
/* Descriptor sizes */
u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
/* Global descriptor heaps */
struct cpu_descriptor_heap *cbv_srv_uav_heap;
struct cpu_descriptor_heap *rtv_heap;
/* Command queues */
struct snc_mutex global_command_list_record_mutex;
struct snc_mutex global_submit_mutex;
struct command_queue *command_queues[DX12_NUM_QUEUES];
/* Evictor job */
struct snc_counter evictor_job_counter;
struct snc_cv evictor_wake_cv;
struct snc_mutex evictor_wake_mutex;
i64 evictor_wake_gen;
b32 evictor_shutdown;
} G = ZI, DEBUG_ALIAS(G, G_gp_dx12);
/* ========================== *
* Startup
* ========================== */
INTERNAL SYS_EXIT_FUNC(gp_shutdown);
INTERNAL void dx12_init_device(void);
INTERNAL void dx12_init_objects(void);
INTERNAL void dx12_init_pipelines(void);
INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
INTERNAL void command_queue_release(struct command_queue *cq);
INTERNAL SYS_JOB_DEF(dx12_evictor_job, _);
INTERNAL void fenced_release(void *data, enum fenced_release_kind kind);
struct command_queue_alloc_job_sig { struct command_queue_desc *descs_in; struct command_queue **cqs_out; };
INTERNAL SYS_JOB_DEF(command_queue_alloc_job, job);
struct pipeline_alloc_job_sig { struct pipeline_desc *descs_in; struct pipeline **pipelines_out; };
INTERNAL SYS_JOB_DEF(pipeline_alloc_job, job);
#if RESOURCE_RELOADING
INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name);
#endif
void gp_startup(void)
{
__prof;
if (atomic32_fetch_test_set(&G.initialized, 0, 1) != 0) {
sys_panic(LIT("GP layer already initialized"));
}
/* Initialize command descriptor heaps pool */
G.command_descriptor_heaps_arena = arena_alloc(GIBI(64));
/* Initialize command buffers pool */
G.command_buffers_arena = arena_alloc(GIBI(64));
G.command_buffers_dict = dict_init(G.command_buffers_arena, 4096);
/* Initialize resources pool */
G.resources_arena = arena_alloc(GIBI(64));
/* Initialize swapchains pool */
G.swapchains_arena = arena_alloc(GIBI(64));
/* Initialize pipeline cache */
G.pipelines_arena = arena_alloc(GIBI(64));
G.pipeline_descs = dict_init(G.pipelines_arena, 1024);
G.top_pipelines = dict_init(G.pipelines_arena, 1024);
G.top_successful_pipelines = dict_init(G.pipelines_arena, 1024);
/* Initialize fenced releases queue */
G.fenced_releases_arena = arena_alloc(GIBI(64));
/* Initialize dx12 */
dx12_init_device();
dx12_init_objects();
dx12_init_pipelines();
/* Register callbacks */
#if RESOURCE_RELOADING
resource_register_watch_callback(pipeline_resource_watch_callback);
#endif
sys_on_exit(gp_shutdown);
/* Start evictor job */
sys_run(1, dx12_evictor_job, 0, SYS_POOL_BACKGROUND, SYS_PRIORITY_LOW, &G.evictor_job_counter);
}
INTERNAL SYS_EXIT_FUNC(gp_shutdown)
{
__prof;
#if 0
/* Release objects to make live object reporting less noisy */
//IDXGISwapChain3_Release(G.swapchain);
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
cmomand_queue_release(cq);
}
ID3D12Device_Release(G.device);
#else
(UNUSED)command_queue_release;
#endif
{
struct snc_lock lock = snc_lock_e(&G.evictor_wake_mutex);
G.evictor_shutdown = 1;
snc_cv_signal(&G.evictor_wake_cv, I32_MAX);
snc_unlock(&lock);
}
snc_counter_wait(&G.evictor_job_counter);
}
/* ========================== *
* Dx12 device initialization
* ========================== */
INTERNAL void dx12_init_error(struct string error)
{
struct arena_temp scratch = scratch_begin_no_conflict();
struct string msg = string_format(scratch.arena, LIT("Failed to initialize DirectX 12.\n\n%F"), FMT_STR(error));
sys_panic(msg);
scratch_end(scratch);
}
INTERNAL void dx12_init_device(void)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
HRESULT hr = 0;
/* Enable debug layer */
u32 dxgi_factory_flags = 0;
#if DX12_DEBUG
{
__profn("Enable debug layer");
ID3D12Debug *debug_controller0 = 0;
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create ID3D12Debug0"));
}
ID3D12Debug1 *debug_controller1 = 0;
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create ID3D12Debug1"));
}
ID3D12Debug_EnableDebugLayer(debug_controller0);
/* FIXME: Enable this */
//ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1);
ID3D12Debug_Release(debug_controller1);
ID3D12Debug_Release(debug_controller0);
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
/* Create factory */
{
__profn("Create factory");
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to initialize DXGI factory"));
}
}
/* Create device */
{
__profn("Create device");
IDXGIAdapter1 *adapter = 0;
ID3D12Device *device = 0;
struct string error = LIT("Could not initialize GPU device.");
struct string first_gpu_name = ZI;
u32 adapter_index = 0;
for (;;) {
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
}
if (SUCCEEDED(hr)) {
DXGI_ADAPTER_DESC1 desc;
IDXGIAdapter1_GetDesc1(adapter, &desc);
if (first_gpu_name.len == 0) {
first_gpu_name = string_from_wstr_no_limit(scratch.arena, desc.Description);
}
{
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
}
if (SUCCEEDED(hr)) {
break;
}
ID3D12Device_Release(device);
IDXGIAdapter1_Release(adapter);
adapter = 0;
device = 0;
++adapter_index;
} else {
break;
}
}
if (!device) {
if (first_gpu_name.len > 0) {
struct string fmt = LIT("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.");
error = string_format(scratch.arena, fmt, FMT_STR(first_gpu_name));
}
dx12_init_error(error);
}
G.adapter = adapter;
G.device = device;
}
#if DX12_DEBUG
/* Enable D3D12 Debug break */
{
__profn("Enable d3d12 debug break");
ID3D12InfoQueue *info = 0;
hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to query ID3D12Device interface"));
}
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, 1);
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, 1);
ID3D12InfoQueue_Release(info);
}
/* Enable DXGI Debug break */
{
__profn("Enable dxgi debug break");
IDXGIInfoQueue *dxgi_info = 0;
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to get DXGI debug interface"));
}
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, 1);
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, 1);
IDXGIInfoQueue_Release(dxgi_info);
}
#endif
#if PROFILING_D3D
/* Enable stable power state */
{
__profn("Set stable power state");
b32 success = 1;
HKEY key = 0;
success = RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\AppModelUnlock", 0, KEY_READ, &key) == ERROR_SUCCESS;
if (success) {
DWORD value = ZI;
DWORD dword_size = sizeof(DWORD);
success = RegQueryValueExW(key, L"AllowDevelopmentWithoutDevLicense", 0, 0, (LPBYTE)&value, &dword_size) == ERROR_SUCCESS;
RegCloseKey(key);
if (success) {
success = value != 0;
}
}
logf_info("D3D12 profiling is enabled, attempting to set stable power state (this will increase GPU timing stability at the cost of performance)");
if (success) {
logf_info("Machine is in developer mode, calling ID3D12Device::SetStablePowerState");
hr = ID3D12Device_SetStablePowerState(G.device, 1);
if (SUCCEEDED(hr)) {
logf_info("ID3D12Device::SetStablePowerState succeeded");
} else {
success = 0;
logf_error("ID3D12Device::SetStablePowerState failed");
}
} else {
logf_warning("Machine is not in developer mode, cannot call ID3D12Device::SetStablePowerState");
}
if (!success) {
logf_warning("Profiling is enabled, but ID3D12Device::SetStablePowerState could not be called. This means that GPU timing may be unreliable.");
}
}
#endif
scratch_end(scratch);
}
/* ========================== *
* Dx12 object initialization
* ========================== */
INTERNAL void dx12_init_objects(void)
{
__prof;
/* Initialize desc sizes */
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
/* Initialize desc counts */
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
/* Create global descriptor heaps */
G.cbv_srv_uav_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.rtv_heap = cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
/* Create command queues */
{
__profn("Allocate command queues");
struct command_queue_desc params[] = {
{ .type = D3D12_COMMAND_LIST_TYPE_DIRECT, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = LIT("Direct queue") },
{ .type = D3D12_COMMAND_LIST_TYPE_COMPUTE, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = LIT("Compute queue") },
{ .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH, .dbg_name = LIT("Copy queue") },
{ .type = D3D12_COMMAND_LIST_TYPE_COPY, .priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL, .dbg_name = LIT("Background copy queue") }
};
struct command_queue_alloc_job_sig sig = ZI;
sig.descs_in = params;
sig.cqs_out = G.command_queues;
{
struct snc_counter counter = ZI;
sys_run(DX12_NUM_QUEUES, command_queue_alloc_job, &sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter);
snc_counter_wait(&counter);
}
#if PROFILING
{
/* Initialize serially for consistent order in profiler */
__profn("Initialize command queue profiling contexts");
for (i32 i = 0; i < DX12_NUM_QUEUES; ++i) {
struct command_queue *cq = G.command_queues[i];
struct string dbg_name = params[i].dbg_name;
__prof_dx12_ctx_alloc(cq->prof, G.device, cq->cq, dbg_name.text, dbg_name.len);
(UNUSED)dbg_name;
}
}
#endif
}
}
/* ========================== *
* Dx12 pipeline initialization
* ========================== */
INTERNAL void pipeline_register(u64 num_pipelines, struct pipeline **pipelines);
INTERNAL void dx12_init_pipelines(void)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
/* Register pipeline descs */
{
/* Material pipeline */
{
struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc);
desc->name = LIT("material");
desc->vs.file = LIT("sh/material.hlsl");
desc->ps.file = LIT("sh/material.hlsl");
desc->vs.func = LIT("vs");
desc->ps.func = LIT("ps");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
desc->rtvs[1].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[1].blending = 1;
dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc);
}
/* Flood pipeline */
{
struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc);
desc->name = LIT("flood");
desc->cs.file = LIT("sh/flood.hlsl");
desc->cs.func = LIT("cs");
dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc);
}
/* Shade pipeline */
{
struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc);
desc->name = LIT("shade");
desc->cs.file = LIT("sh/shade.hlsl");
desc->cs.func = LIT("cs");
dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc);
}
/* Shape pipeline */
{
struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc);
desc->name = LIT("shape");
desc->vs.file = LIT("sh/shape.hlsl");
desc->ps.file = LIT("sh/shape.hlsl");
desc->vs.func = LIT("vs");
desc->ps.func = LIT("ps");
desc->ia[0] = (D3D12_INPUT_ELEMENT_DESC) { "pos", 0, DXGI_FORMAT_R32G32_FLOAT, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 };
desc->ia[1] = (D3D12_INPUT_ELEMENT_DESC) { "color_srgb", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, D3D12_APPEND_ALIGNED_ELEMENT, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0 };
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc);
}
/* Blit pipeilne */
{
struct pipeline_desc *desc = arena_push(G.pipelines_arena, struct pipeline_desc);
desc->name = LIT("blit");
desc->vs.file = LIT("sh/blit.hlsl");
desc->ps.file = LIT("sh/blit.hlsl");
desc->vs.func = LIT("vs");
desc->ps.func = LIT("ps");
desc->rtvs[0].format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc->rtvs[0].blending = 1;
dict_set(G.pipelines_arena, G.pipeline_descs, hash_fnv64(HASH_FNV64_BASIS, desc->name), (u64)desc);
}
}
/* Compile pipelines */
u32 num_pipelines = 0;
struct pipeline_desc *descs = arena_push_dry(scratch.arena, struct pipeline_desc);
for (struct dict_entry *entry = G.pipeline_descs->first; entry; entry = entry->next) {
struct pipeline_desc *desc = (struct pipeline_desc *)entry->value;
*arena_push(scratch.arena, struct pipeline_desc) = *desc;
++num_pipelines;
}
struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines);
{
__profn("Allocate pipelines");
struct pipeline_alloc_job_sig sig = ZI;
sig.descs_in = descs;
sig.pipelines_out = pipelines;
struct snc_counter counter = ZI;
sys_run(num_pipelines, pipeline_alloc_job, &sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter);
snc_counter_wait(&counter);
}
for (u32 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
if (!pipeline->success) {
struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error");
struct string msg = string_format(scratch.arena, LIT("Error initializing pipeline \"%F\":\n\n%F"), FMT_STR(pipeline->name), FMT_STR(error));
log_error(msg);
sys_message_box(SYS_MESSAGE_BOX_KIND_WARNING, msg);
}
}
pipeline_register(num_pipelines, pipelines);
scratch_end(scratch);
}
/* ========================== *
* Shader compilation
* ========================== */
struct dx12_include_handler {
ID3DInclude d3d_handler;
ID3DIncludeVtbl vtbl;
struct pipeline *pipeline;
struct snc_mutex pipeline_mutex;
u64 num_open_resources;
struct resource open_resources[1024];
};
INTERNAL HRESULT dx12_include_open(ID3DInclude *d3d_handler, D3D_INCLUDE_TYPE include_type, LPCSTR name_cstr, LPCVOID parent_data, LPCVOID *data_out, UINT *data_len_out)
{
__prof;
(UNUSED)include_type;
(UNUSED)parent_data;
HRESULT result = E_FAIL;
struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler;
struct string name = string_from_cstr_no_limit((char *)name_cstr);
u64 hash = hash_fnv64(HASH_FNV64_BASIS, name);
if (handler->num_open_resources >= countof(handler->open_resources)) {
sys_panic(LIT("Dx12 include handler resource oversig"));
}
struct snc_lock lock = snc_lock_e(&handler->pipeline_mutex);
{
struct pipeline *pipeline = handler->pipeline;
dict_set(pipeline->arena, pipeline->dependencies, hash, 1);
}
snc_unlock(&lock);
struct resource *res = &handler->open_resources[handler->num_open_resources++];
*res = resource_open(name);
if (resource_exists(res)) {
struct string data = resource_get_data(res);
*data_out = data.text;
*data_len_out = data.len;
result = S_OK;
}
return result;
}
INTERNAL HRESULT dx12_include_close(ID3DInclude *d3d_handler, LPCVOID data)
{
__prof;
(UNUSED)data;
struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler;
for (u64 i = 0; i < handler->num_open_resources; ++i) {
struct resource *res = &handler->open_resources[i];
resource_close(res);
}
handler->num_open_resources = 0;
return S_OK;
}
INTERNAL struct dx12_include_handler *dx12_include_handler_alloc(struct arena *arena, struct pipeline *pipeline)
{
__prof;
struct dx12_include_handler *handler = arena_push(arena, struct dx12_include_handler);
handler->d3d_handler.lpVtbl = &handler->vtbl;
handler->vtbl.Open = dx12_include_open;
handler->vtbl.Close = dx12_include_close;
handler->pipeline = pipeline;
return handler;
}
INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler)
{
__prof;
for (u64 i = 0; i < handler->num_open_resources; ++i) {
ASSERT(0); /* Resource should have been closed by handler by now */
struct resource *res = &handler->open_resources[i];
resource_close(res);
}
handler->num_open_resources = 0;
}
enum shader_compile_job_kind {
SHADER_COMPILE_TASK_KIND_VS,
SHADER_COMPILE_TASK_KIND_PS,
SHADER_COMPILE_TASK_KIND_CS
};
struct shader_compile_job_param {
/* In */
enum shader_compile_job_kind kind;
struct pipeline *pipeline;
struct shader_desc shader_desc;
struct resource *shader_res;
/* Out */
b32 success;
ID3DBlob *blob;
ID3DBlob *error_blob;
i64 elapsed;
};
struct shader_compile_job_sig {
struct shader_compile_job_param **params;
};
/* TODO: Compile shaders offline w/ dxc for performance & language features */
INTERNAL SYS_JOB_DEF(shader_compile_job, job)
{
__prof;
struct shader_compile_job_sig *sig = job.sig;
struct shader_compile_job_param *param = sig->params[job.id];
enum shader_compile_job_kind kind = param->kind;
struct pipeline *pipeline = param->pipeline;
struct shader_desc shader_desc = param->shader_desc;
struct resource *shader_res = param->shader_res;
struct arena_temp scratch = scratch_begin_no_conflict();
{
i64 start_ns = sys_time_ns();
b32 success = 0;
ID3DBlob *blob = 0;
ID3DBlob *error_blob = 0;
if (resource_exists(shader_res)) {
struct dx12_include_handler *include_handler = dx12_include_handler_alloc(scratch.arena, pipeline);
char *func_cstr = cstr_from_string(scratch.arena, shader_desc.func);
u32 d3d_compile_flags = D3DCOMPILE_ENABLE_UNBOUNDED_DESCRIPTOR_TABLES;
#if DX12_SHADER_DEBUG
d3d_compile_flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_ENABLE_STRICTNESS;
#else
d3d_compile_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
/* Compile shader */
{
__profn("Compile shader");
struct string shader_src = resource_get_data(shader_res);
logf_info("Compiling shader \"%F:%F\"", FMT_STR(shader_desc.file), FMT_STR(shader_desc.func));
/* Compile shader */
struct string friendly_name = string_cat(scratch.arena, LIT("res/"), shader_desc.file);
char *friendly_name_cstr = cstr_from_string(scratch.arena, friendly_name);
char *target = 0;
switch (kind) {
case SHADER_COMPILE_TASK_KIND_VS:
{
target = "vs_5_1";
} break;
case SHADER_COMPILE_TASK_KIND_PS:
{
target = "ps_5_1";
} break;
case SHADER_COMPILE_TASK_KIND_CS:
{
target = "cs_5_1";
} break;
}
D3D_SHADER_MACRO defines[] = {
{ "SH_CPU", "0" },
{ 0, 0 }
};
HRESULT hr = D3DCompile(shader_src.text, shader_src.len, friendly_name_cstr, defines, (ID3DInclude *)include_handler, func_cstr, target, d3d_compile_flags, 0, &blob, &error_blob);
success = SUCCEEDED(hr) && !error_blob;
}
dx12_include_handler_release(include_handler);
}
#if 0
if (success) {
logf_success("Finished compiling shader \"%F\" in %F seconds", FMT_STR(src_name), FMT_FLOAT(SECONDS_FROM_NS(sys_time_ns() - start_ns)));
}
#endif
param->success = success;
param->blob = blob;
param->error_blob = error_blob;
param->elapsed = sys_time_ns() - start_ns;
}
scratch_end(scratch);
}
/* ========================== *
* Pipeline
* ========================== */
INTERNAL SYS_JOB_DEF(pipeline_alloc_job, job)
{
__prof;
struct pipeline_alloc_job_sig *sig = job.sig;
struct pipeline_desc *desc = &sig->descs_in[job.id];
struct pipeline **pipelines_out = sig->pipelines_out;
struct pipeline *pipeline = 0;
{
struct arena *pipeline_arena = arena_alloc(MEBI(64));
pipeline = arena_push(pipeline_arena, struct pipeline);
pipeline->arena = pipeline_arena;
pipelines_out[job.id] = pipeline;
}
pipeline->desc = *desc;
pipeline->name = string_copy(pipeline->arena, desc->name);
pipeline->hash = hash_fnv64(HASH_FNV64_BASIS, pipeline->name);
pipeline->dependencies = dict_init(pipeline->arena, 64);
struct arena_temp scratch = scratch_begin_no_conflict();
{
i64 start_ns = sys_time_ns();
struct string pipeline_name = pipeline->name;
logf_info("Loading pipeline \"%F\"", FMT_STR(pipeline_name));
b32 success = 1;
HRESULT hr = 0;
struct string error_str = LIT("Unknown error");
b32 has_cs = desc->cs.file.len > 0;
b32 ps_res_is_shared = string_eq(desc->vs.file, desc->ps.file);
struct resource cs_res = ZI;
struct resource vs_res = ZI;
struct resource ps_res = ZI;
if (has_cs) {
cs_res = resource_open(desc->cs.file);
if (desc->vs.file.len > 0 || desc->ps.file.len > 0) {
error_str = LIT("Pipeline desc contains both compute and vs/ps shader");
success = 0;
}
} else {
vs_res = resource_open(desc->vs.file);
ps_res = vs_res;
if (!ps_res_is_shared) {
ps_res = resource_open(desc->ps.file);
}
if (desc->vs.file.len <= 0 || desc->ps.file.len <= 0) {
error_str = LIT("Pipeline desc is missing shaders");
success = 0;
}
}
if (success) {
if (has_cs) {
dict_set(pipeline->arena, pipeline->dependencies, hash_fnv64(HASH_FNV64_BASIS, desc->cs.file), 1);
} else {
dict_set(pipeline->arena, pipeline->dependencies, hash_fnv64(HASH_FNV64_BASIS, desc->vs.file), 1);
dict_set(pipeline->arena, pipeline->dependencies, hash_fnv64(HASH_FNV64_BASIS, desc->ps.file), 1);
}
}
if (success) {
if (has_cs) {
if (!resource_exists(&cs_res)) {
error_str = string_format(scratch.arena, LIT("Compute shader source \"%F\" not found"), FMT_STR(desc->vs.file));
success = 0;
}
} else {
if (!resource_exists(&vs_res)) {
error_str = string_format(scratch.arena, LIT("Vertex shader source \"%F\" not found"), FMT_STR(desc->vs.file));
success = 0;
} else if (!resource_exists(&ps_res)) {
error_str = string_format(scratch.arena, LIT("Pixel shader source \"%F\" not found"), FMT_STR(desc->ps.file));
success = 0;
}
}
}
struct shader_compile_job_param vs = ZI;
vs.kind = SHADER_COMPILE_TASK_KIND_VS;
vs.pipeline = pipeline;
vs.shader_desc = desc->vs;
vs.shader_res = &vs_res;
struct shader_compile_job_param ps = ZI;
ps.kind = SHADER_COMPILE_TASK_KIND_PS;
ps.pipeline = pipeline;
ps.shader_desc = desc->ps;
ps.shader_res = &ps_res;
struct shader_compile_job_param cs = ZI;
cs.kind = SHADER_COMPILE_TASK_KIND_CS;
cs.pipeline = pipeline;
cs.shader_desc = desc->cs;
cs.shader_res = &cs_res;
/* Compile shaders */
if (success) {
if (has_cs) {
struct shader_compile_job_param *params[] = { &cs };
struct shader_compile_job_sig comp_sig = { .params = params };
struct snc_counter counter = ZI;
sys_run(countof(params), shader_compile_job, &comp_sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter);
snc_counter_wait(&counter);
success = cs.success;
} else {
struct shader_compile_job_param *params[] = { &vs, &ps };
struct shader_compile_job_sig comp_sig = { .params = params };
struct snc_counter counter = ZI;
sys_run(countof(params), shader_compile_job, &comp_sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter);
snc_counter_wait(&counter);
success = vs.success && ps.success;
}
}
/* Get root signature blob
* NOTE: This isn't necessary for creating the root signature (since it
* could reuse the shader blob), however we'd like to verify that the
* root signature exists and matches between shaders. */
ID3D10Blob *rootsig_blob = 0;
if (success) {
__profn("Validate root signatures");
if (has_cs) {
u32 cs_rootsig_data_len = 0;
ID3D10Blob *cs_rootsig_blob = 0;
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(cs.blob), ID3D10Blob_GetBufferSize(cs.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &cs_rootsig_blob);
if (cs_rootsig_blob) {
cs_rootsig_data_len = ID3D10Blob_GetBufferSize(cs_rootsig_blob);
}
if (cs_rootsig_data_len == 0) {
success = 0;
error_str = LIT("Vertex shader is missing root signature");
} else {
rootsig_blob = cs_rootsig_blob;
}
} else {
char *vs_rootsig_data = 0;
char *ps_rootsig_data = 0;
u32 vs_rootsig_data_len = 0;
u32 ps_rootsig_data_len = 0;
ID3D10Blob *vs_rootsig_blob = 0;
ID3D10Blob *ps_rootsig_blob = 0;
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs.blob), ID3D10Blob_GetBufferSize(vs.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob);
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps.blob), ID3D10Blob_GetBufferSize(ps.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob);
if (vs_rootsig_blob) {
vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob);
vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob);
}
if (ps_rootsig_blob) {
ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob);
ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob);
}
if (vs_rootsig_data_len == 0) {
success = 0;
error_str = LIT("Vertex shader is missing root signature");
} else if (ps_rootsig_data_len == 0) {
success = 0;
error_str = LIT("Pixel shader is missing root signature");
} else if (vs_rootsig_data_len != ps_rootsig_data_len || !MEMEQ(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) {
success = 0;
error_str = LIT("Root signature mismatch between vertex and pixel shader");
} else {
rootsig_blob = vs_rootsig_blob;
}
if (ps_rootsig_blob) {
ID3D10Blob_Release(ps_rootsig_blob);
}
}
}
/* Create root signature */
ID3D12RootSignature *rootsig = 0;
if (success) {
__profn("Create root signature");
hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
if (FAILED(hr)) {
error_str = LIT("Failed to create root signature");
success = 0;
}
}
/* Create PSO */
ID3D12PipelineState *pso = 0;
if (success) {
if (has_cs) {
__profn("Create compute PSO");
D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = rootsig;
if (cs.success) {
pso_desc.CS.pShaderBytecode = ID3D10Blob_GetBufferPointer(cs.blob);
pso_desc.CS.BytecodeLength = ID3D10Blob_GetBufferSize(cs.blob);
}
hr = ID3D12Device_CreateComputePipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
} else {
__profn("Create graphics PSO");
/* Default rasterizer state */
D3D12_RASTERIZER_DESC raster_desc = {
.FillMode = D3D12_FILL_MODE_SOLID,
.CullMode = D3D12_CULL_MODE_NONE,
.FrontCounterClockwise = 0,
.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
.DepthClipEnable = 1,
.MultisampleEnable = 0,
.AntialiasedLineEnable = 0,
.ForcedSampleCount = 0,
.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
};
/* Input layout */
u32 num_input_layout_elements = 0;
for (u32 i = 0; i < countof(desc->ia); ++i) {
if (desc->ia[i].SemanticName == 0) {
break;
}
++num_input_layout_elements;
}
D3D12_INPUT_LAYOUT_DESC input_layout_desc = {
.pInputElementDescs = desc->ia,
.NumElements = num_input_layout_elements
};
/* Blend state */
D3D12_BLEND_DESC blend_desc = {
.AlphaToCoverageEnable = 0,
.IndependentBlendEnable = 1
};
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
STATIC_ASSERT(countof(blend_desc.RenderTarget) <= countof(desc->rtvs));
if (desc->rtvs[i].format != DXGI_FORMAT_UNKNOWN) {
b32 blending_enabled = desc->rtvs[i].blending;
blend_desc.RenderTarget[i].BlendEnable = blending_enabled;
blend_desc.RenderTarget[i].SrcBlend = D3D12_BLEND_SRC_ALPHA;
blend_desc.RenderTarget[i].DestBlend = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
blend_desc.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA;
blend_desc.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
blend_desc.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
} else {
break;
}
}
/* Disable depth stencil */
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = {
.DepthEnable = 0,
.StencilEnable = 0
};
/* PSO */
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = rootsig;
if (vs.success) {
pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs.blob);
pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs.blob);
}
if (ps.success) {
pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps.blob);
pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps.blob);
}
pso_desc.BlendState = blend_desc;
pso_desc.SampleMask = UINT_MAX;
pso_desc.RasterizerState = raster_desc;
pso_desc.DepthStencilState = depth_stencil_desc;
pso_desc.InputLayout = input_layout_desc;
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
for (i32 i = 0; i < (i32)countof(desc->rtvs); ++i) {
STATIC_ASSERT(countof(pso_desc.RTVFormats) <= countof(desc->rtvs));
DXGI_FORMAT format = desc->rtvs[i].format;
if (format != DXGI_FORMAT_UNKNOWN) {
pso_desc.RTVFormats[pso_desc.NumRenderTargets++] = format;
} else {
break;
}
}
pso_desc.SampleDesc.Count = 1;
pso_desc.SampleDesc.Quality = 0;
hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
}
if (FAILED(hr)) {
error_str = LIT("Failed to create pipeline state object");
success = 0;
}
}
/* Copy error */
if (!success) {
ID3D10Blob *error_blob = cs.error_blob ? cs.error_blob : (vs.error_blob ? vs.error_blob : ps.error_blob);
if (error_blob) {
u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob);
char *error_blob_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob);
struct string error_blob_str = string_copy(scratch.arena, string_from_cstr(error_blob_cstr, error_blob_cstr_len));
if (string_ends_with(error_blob_str, LIT("\n"))) {
/* Remove trailing newline */
error_blob_str.len -= 1;
}
if (error_blob_str.len > 0) {
error_str = error_blob_str;
}
}
struct pipeline_error *error = arena_push(pipeline->arena, struct pipeline_error);
error->msg = string_copy(pipeline->arena, error_str);
if (pipeline->last_error) {
pipeline->last_error->next = error;
} else {
pipeline->first_error = error;
}
pipeline->last_error = error;
}
pipeline->pso = pso;
pipeline->rootsig = rootsig;
pipeline->compilation_time_ns = sys_time_ns() - start_ns;
pipeline->success = success;
if (has_cs) {
resource_close(&cs_res);
} else {
resource_close(&vs_res);
if (!ps_res_is_shared) {
resource_close(&ps_res);
}
}
if (rootsig_blob) {
ID3D10Blob_Release(rootsig_blob);
}
if (vs.blob) {
ID3D10Blob_Release(vs.blob);
}
if (vs.error_blob) {
ID3D10Blob_Release(vs.error_blob);
}
if (ps.blob) {
ID3D10Blob_Release(ps.blob);
}
if (ps.error_blob) {
ID3D10Blob_Release(ps.error_blob);
}
}
scratch_end(scratch);
}
INTERNAL void pipeline_release_now(struct pipeline *pipeline)
{
__prof;
if (pipeline->pso) {
ID3D12PipelineState_Release(pipeline->pso);
}
arena_release(pipeline->arena);
}
/* ========================== *
* Pipeline cache
* ========================== */
INTERNAL struct pipeline_scope *pipeline_scope_begin(void)
{
__prof;
struct pipeline_scope *scope = 0;
{
struct snc_lock lock = snc_lock_e(&G.pipelines_mutex);
if (G.first_free_pipeline_scope) {
scope = G.first_free_pipeline_scope;
G.first_free_pipeline_scope = scope->next_free;
}
snc_unlock(&lock);
}
struct arena *arena = 0;
if (scope) {
arena = scope->arena;
} else {
arena = arena_alloc(MEBI(64));
}
arena_reset(arena);
scope = arena_push(arena, struct pipeline_scope);
scope->arena = arena;
scope->refs = dict_init(scope->arena, 64);
return scope;
}
INTERNAL void pipeline_scope_end(struct pipeline_scope *scope)
{
__prof;
struct snc_lock lock = snc_lock_e(&G.pipelines_mutex);
{
for (struct dict_entry *entry = scope->refs->first; entry; entry = entry->next) {
struct pipeline *pipeline = (struct pipeline *)entry->value;
if (--pipeline->refcount <= 0) {
fenced_release(pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
}
scope->next_free = G.first_free_pipeline_scope;
G.first_free_pipeline_scope = scope;
}
snc_unlock(&lock);
}
INTERNAL READONLY struct pipeline g_nil_pipeline = ZI;
INTERNAL struct pipeline *pipeline_from_name(struct pipeline_scope *scope, struct string name)
{
__prof;
struct pipeline *res = &g_nil_pipeline;
u64 hash = hash_fnv64(HASH_FNV64_BASIS, name);
struct pipeline *tmp = dict_get(scope->refs, hash);
if (tmp) {
res = tmp;
} else {
{
struct snc_lock lock = snc_lock_e(&G.pipelines_mutex);
tmp = dict_get(G.top_successful_pipelines, hash);
if (tmp) {
++tmp->refcount;
}
snc_unlock(&lock);
}
if (tmp) {
dict_set(scope->arena, scope->refs, hash, (u64)tmp);
res = tmp;
}
}
return res;
}
INTERNAL void pipeline_register(u64 num_pipelines, struct pipeline **pipelines)
{
__prof;
struct snc_lock lock = snc_lock_e(&G.pipelines_mutex);
{
for (u64 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
u64 hash = pipeline->hash;
/* Insert into top dict */
{
struct pipeline *old_pipeline = (struct pipeline *)dict_get(G.top_pipelines, hash);
if (old_pipeline && --old_pipeline->refcount <= 0) {
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
dict_set(G.pipelines_arena, G.top_pipelines, hash, (u64)pipeline);
++pipeline->refcount;
}
/* Insert into success dict */
if (pipeline->success) {
struct pipeline *old_pipeline = (struct pipeline *)dict_get(G.top_successful_pipelines, hash);
if (old_pipeline && --old_pipeline->refcount <= 0) {
fenced_release(old_pipeline, FENCED_RELEASE_KIND_PIPELINE);
}
dict_set(G.pipelines_arena, G.top_successful_pipelines, hash, (u64)pipeline);
++pipeline->refcount;
}
}
}
snc_unlock(&lock);
}
#if RESOURCE_RELOADING
INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(pipeline_resource_watch_callback, name)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
/* Find dirty pipelines */
u64 hash = hash_fnv64(HASH_FNV64_BASIS, name);
u32 num_pipelines = 0;
struct pipeline_desc *pipeline_descs = arena_push_dry(scratch.arena, struct pipeline_desc);
{
struct snc_lock lock = snc_lock_s(&G.pipelines_mutex);
for (struct dict_entry *entry = G.top_pipelines->first; entry; entry = entry->next) {
struct pipeline *pipeline = (struct pipeline *)entry->value;
if (dict_get(pipeline->dependencies, hash) == 1) {
logf_debug("Change detected in shader source file \"%F\", recompiling pipeline \"%F\"", FMT_STR(name), FMT_STR(pipeline->name));
*arena_push(scratch.arena, struct pipeline_desc) = pipeline->desc;
++num_pipelines;
}
}
snc_unlock(&lock);
}
/* Recompile dirty pipelines */
if (num_pipelines > 0) {
__profn("Compile dirty pipelines");
struct pipeline **pipelines = arena_push_array(scratch.arena, struct pipeline *, num_pipelines);
{
struct pipeline_alloc_job_sig sig = ZI;
sig.descs_in = pipeline_descs;
sig.pipelines_out = pipelines;
struct snc_counter counter = ZI;
sys_run(num_pipelines, pipeline_alloc_job, &sig, SYS_POOL_INHERIT, SYS_PRIORITY_INHERIT, &counter);
snc_counter_wait(&counter);
}
{
struct snc_lock lock = snc_lock_s(&G.pipelines_mutex);
for (u32 i = 0; i < num_pipelines; ++i) {
struct pipeline *pipeline = pipelines[i];
if (pipeline->success) {
logf_success("Successfully compiled pipeline \"%F\" in %F seconds", FMT_STR(pipeline->name), FMT_FLOAT(SECONDS_FROM_NS(pipeline->compilation_time_ns)));
} else {
{
struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error");
struct string msg = string_format(scratch.arena, LIT("Error compiling pipeline \"%F\":\n%F"), FMT_STR(pipeline->name), FMT_STR(error));
log_error(msg);
}
struct pipeline *old_pipeline = dict_get(G.top_successful_pipelines, pipeline->hash);
if (!old_pipeline) {
/* If no previously successful pipeline exists, then show a message box rather than logging since logs may not be visible to user */
struct string error = pipeline->first_error ? pipeline->first_error->msg : LIT("Unknown error");
struct string msg = string_format(scratch.arena, LIT("Error compiling pipeline \"%F\":\n\n%F"), FMT_STR(pipeline->name), FMT_STR(error));
sys_message_box(SYS_MESSAGE_BOX_KIND_WARNING, msg);
}
}
}
snc_unlock(&lock);
}
pipeline_register(num_pipelines, pipelines);
}
scratch_end(scratch);
}
#endif
/* ========================== *
* Descriptor
* ========================== */
INTERNAL struct descriptor *descriptor_alloc(struct cpu_descriptor_heap *dh)
{
__prof;
struct descriptor *d = 0;
u32 index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
{
struct snc_lock lock = snc_lock_e(&dh->mutex);
if (dh->first_free_descriptor) {
d = dh->first_free_descriptor;
dh->first_free_descriptor = d->next_free;
handle = d->handle;
index = d->index;
} else {
if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) {
sys_panic(LIT("Max descriptors reached in heap"));
}
d = arena_push_no_zero(dh->arena, struct descriptor);
index = dh->num_descriptors_reserved++;
handle.ptr = dh->handle.ptr + (index * dh->descriptor_size);
}
snc_unlock(&lock);
}
MEMZERO_STRUCT(d);
d->heap = dh;
d->handle = handle;
d->index = index;
return d;
}
INTERNAL void descriptor_release(struct descriptor *descriptor)
{
struct cpu_descriptor_heap *dh = descriptor->heap;
struct snc_lock lock = snc_lock_e(&dh->mutex);
{
descriptor->next_free = dh->first_free_descriptor;
dh->first_free_descriptor = descriptor;
}
snc_unlock(&lock);
}
/* ========================== *
* CPU descriptor heap
* ========================== */
INTERNAL struct cpu_descriptor_heap *cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
{
__prof;
struct cpu_descriptor_heap *dh = 0;
{
struct arena *arena = arena_alloc(MEBI(64));
dh = arena_push(arena, struct cpu_descriptor_heap);
dh->arena = arena;
}
u32 num_descriptors = 0;
u32 descriptor_size = 0;
if (type < (i32)countof(G.desc_counts) && type < (i32)countof(G.desc_sizes)) {
num_descriptors = G.desc_counts[type];
descriptor_size = G.desc_sizes[type];
}
if (num_descriptors == 0 || descriptor_size == 0) {
sys_panic(LIT("Unsupported CPU descriptor type"));
}
dh->num_descriptors_capacity = num_descriptors;
dh->descriptor_size = descriptor_size;
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = type;
desc.NumDescriptors = num_descriptors;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create CPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle);
return dh;
}
#if 0
INTERNAL void cpu_descriptor_heap_release(struct cpu_descriptor_heap *dh)
{
/* TODO */
(UNUSED)dh;
}
#endif
/* ========================== *
* Sig
* ========================== */
struct sig {
struct arena *arena;
/* Material instances */
u32 num_material_instance_descs;
struct arena *material_instance_descs_arena;
/* Grids */
u32 num_material_grid_descs;
struct arena *material_grid_descs_arena;
/* Shapes */
struct arena *shape_verts_arena;
struct arena *shape_indices_arena;
/* Resources */
struct v2i32 old_size;
struct dx12_resource *albedo;
struct dx12_resource *emittance;
struct dx12_resource *emittance_flood_a;
struct dx12_resource *emittance_flood_b;
struct sig *next_free;
};
struct material_instance_desc {
struct xform xf;
struct sprite_tag sprite;
struct dx12_resource *texture;
struct clip_rect clip;
u32 tint;
u32 emittance;
i32 grid_id;
};
struct material_grid_desc {
f32 line_thickness;
f32 line_spacing;
struct v2 offset;
u32 bg0_color;
u32 bg1_color;
u32 line_color;
u32 x_color;
u32 y_color;
};
INTERNAL struct sig *sig_alloc(void)
{
__prof;
struct sig *sig = 0;
{
struct arena *arena = arena_alloc(MEBI(64));
sig = arena_push(arena, struct sig);
sig->arena = arena;
}
sig->material_instance_descs_arena = arena_alloc(GIBI(1));
sig->material_grid_descs_arena = arena_alloc(GIBI(1));
sig->shape_verts_arena = arena_alloc(GIBI(1));
sig->shape_indices_arena = arena_alloc(GIBI(1));
return sig;
}
INTERNAL void sig_reset(struct sig *sig)
{
__prof;
/* Reset material instances */
sig->num_material_instance_descs = 0;
arena_reset(sig->material_instance_descs_arena);
/* Reset grids */
sig->num_material_grid_descs = 0;
arena_reset(sig->material_grid_descs_arena);
/* Reset shapes */
arena_reset(sig->shape_verts_arena);
arena_reset(sig->shape_indices_arena);
}
struct gp_sig *gp_sig_alloc(void)
{
__prof;
struct sig *sig = sig_alloc();
return (struct gp_sig *)sig;
}
i32 gp_push_cmd(struct gp_sig *gp_sig, struct gp_cmd_desc *cmd_desc)
{
i32 ret = 0;
struct sig *sig = (struct sig *)gp_sig;
if (sig) {
switch (cmd_desc->kind) {
default: break;
case GP_CMD_KIND_DRAW_MATERIAL:
{
struct material_instance_desc *instance_desc = arena_push(sig->material_instance_descs_arena, struct material_instance_desc);
instance_desc->xf = cmd_desc->material.xf;
instance_desc->sprite = cmd_desc->material.sprite;
instance_desc->texture = (struct dx12_resource *)cmd_desc->material.texture;
instance_desc->clip = cmd_desc->material.clip;
instance_desc->tint = cmd_desc->material.tint;
instance_desc->emittance = cmd_desc->material.emittance;
instance_desc->grid_id = cmd_desc->material.grid_cmd_id - 1;
ret = ++sig->num_material_instance_descs;
} break;
case GP_CMD_KIND_DRAW_SHAPE:
{
u32 color = cmd_desc->shape.color;
struct sh_shape_vert *verts = arena_push_array_no_zero(sig->shape_verts_arena, struct sh_shape_vert, cmd_desc->shape.vertices.count);
u32 *indices = arena_push_array_no_zero(sig->shape_indices_arena, u32, cmd_desc->shape.indices.count);
for (u32 i = 0; i < cmd_desc->shape.vertices.count; ++i) {
struct sh_shape_vert *v = &verts[i];
v->pos = sh_float2_from_v2(cmd_desc->shape.vertices.points[i]);
v->color_srgb = sh_uint_from_u32(color);
}
u32 vert_offset = verts - (struct sh_shape_vert *)arena_base(sig->shape_verts_arena);
for (u32 i = 0; i < cmd_desc->shape.indices.count; ++i) {
indices[i] = cmd_desc->shape.indices.indices[i] + vert_offset;
}
} break;
case GP_CMD_KIND_PUSH_GRID:
{
struct material_grid_desc *grid_desc = arena_push(sig->material_grid_descs_arena, struct material_grid_desc);
grid_desc->line_thickness = cmd_desc->grid.line_thickness;
grid_desc->line_spacing = cmd_desc->grid.line_spacing;
grid_desc->offset = cmd_desc->grid.offset;
grid_desc->bg0_color = cmd_desc->grid.bg0_color;
grid_desc->bg1_color = cmd_desc->grid.bg1_color;
grid_desc->line_color = cmd_desc->grid.line_color;
grid_desc->x_color = cmd_desc->grid.x_color;
grid_desc->y_color = cmd_desc->grid.y_color;
ret = ++sig->num_material_grid_descs;
} break;
}
}
return ret;
}
/* ========================== *
* Fenced release
* ========================== */
INTERNAL void fenced_release(void *data, enum fenced_release_kind kind)
{
struct fenced_release_data fr = ZI;
fr.kind = kind;
fr.ptr = data;
u64 fr_targets[countof(G.fenced_release_targets)] = ZI;
/* Read current fence target values from command queues */
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
struct snc_lock lock = snc_lock_s(&cq->submit_fence_mutex);
{
fr_targets[i] = cq->submit_fence_target;
}
snc_unlock(&lock);
}
/* Push data to release queue */
{
struct snc_lock lock = snc_lock_e(&G.fenced_releases_mutex);
{
*arena_push(G.fenced_releases_arena, struct fenced_release_data) = fr;
MEMCPY(G.fenced_release_targets, fr_targets, sizeof(fr_targets));
}
snc_unlock(&lock);
}
/* Wake evictor */
{
struct snc_lock lock = snc_lock_e(&G.evictor_wake_mutex);
{
++G.evictor_wake_gen;
snc_cv_signal(&G.evictor_wake_cv, I32_MAX);
}
snc_unlock(&lock);
}
}
/* ========================== *
* Resource
* ========================== */
enum dx12_resource_view_flags {
DX12_RESOURCE_VIEW_FLAG_NONE = 0,
DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1),
DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2),
DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3),
DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4)
};
INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, i32 view_flags)
{
__prof;
struct dx12_resource *r = 0;
{
struct snc_lock lock = snc_lock_e(&G.resources_mutex);
if (G.first_free_resource) {
r = G.first_free_resource;
G.first_free_resource = r->next_free;
} else {
r = arena_push_no_zero(G.resources_arena, struct dx12_resource);
}
snc_unlock(&lock);
}
MEMZERO_STRUCT(r);
D3D12_CLEAR_VALUE clear_value = { .Format = desc.Format, .Color = { 0 } };
D3D12_CLEAR_VALUE *clear_value_ptr = desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET ? &clear_value : 0;
HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, clear_value_ptr, &IID_ID3D12Resource, (void **)&r->resource);
if (FAILED(hr)) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to create resource"));
}
r->state = initial_state;
if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_CBV) {
r->cbv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = ZI;
cbv_desc.BufferLocation = r->gpu_address;
//cbv_desc.SizeInBytes = desc.ByteWidth;
/* FIXME: Get actual size */
cbv_desc.SizeInBytes = KIBI(64);
ID3D12Device_CreateConstantBufferView(G.device, &cbv_desc, r->cbv_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_SRV) {
r->srv_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateShaderResourceView(G.device, r->resource, 0, r->srv_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_UAV) {
r->uav_descriptor = descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, 0, 0, r->uav_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_RTV) {
r->rtv_descriptor = descriptor_alloc(G.rtv_heap);
ID3D12Device_CreateRenderTargetView(G.device, r->resource, 0, r->rtv_descriptor->handle);
}
return r;
}
INTERNAL void dx12_resource_release_now(struct dx12_resource *t)
{
__prof;
/* Release descriptors */
/* TODO: Batch lock heaps */
if (t->cbv_descriptor) {
descriptor_release(t->cbv_descriptor);
}
if (t->srv_descriptor) {
descriptor_release(t->srv_descriptor);
}
if (t->uav_descriptor) {
descriptor_release(t->uav_descriptor);
}
if (t->rtv_descriptor) {
descriptor_release(t->rtv_descriptor);
}
/* Release resource */
ID3D12Resource_Release(t->resource);
/* Add to free list */
struct snc_lock lock = snc_lock_e(&G.resources_mutex);
t->next_free = G.first_free_resource;
G.first_free_resource = t;
snc_unlock(&lock);
}
void gp_resource_release(struct gp_resource *resource)
{
struct dx12_resource *r = (struct dx12_resource *)resource;
fenced_release(r, FENCED_RELEASE_KIND_RESOURCE);
}
/* ========================== *
* Resource barrier
* ========================== */
struct dx12_resource_barrier_desc {
enum D3D12_RESOURCE_BARRIER_TYPE type;
struct dx12_resource *resource;
enum D3D12_RESOURCE_STATES new_state; /* 0 if type != D3D12_RESOURCE_BARRIER_TYPE_TRANSITION */
};
INTERNAL void dx12_resource_barriers(ID3D12GraphicsCommandList *cl, i32 num_barriers, struct dx12_resource_barrier_desc *descs)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
i32 num_rbs = 0;
struct D3D12_RESOURCE_BARRIER *rbs = arena_push_array_no_zero(scratch.arena, struct D3D12_RESOURCE_BARRIER, num_barriers);
for (i32 i = 0; i < num_barriers; ++i) {
struct dx12_resource_barrier_desc *desc = &descs[i];
struct dx12_resource *resource = desc->resource;
enum D3D12_RESOURCE_BARRIER_TYPE type = desc->type;
if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) {
enum D3D12_RESOURCE_STATES old_state = resource->state;
enum D3D12_RESOURCE_STATES new_state = desc->new_state;
if (new_state != old_state) {
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
MEMZERO_STRUCT(rb);
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb->Flags = 0;
rb->Transition.pResource = resource->resource;
rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rb->Transition.StateBefore = old_state;
rb->Transition.StateAfter = new_state;
resource->state = new_state;
}
} else if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV) {
struct D3D12_RESOURCE_BARRIER *rb = &rbs[num_rbs++];
MEMZERO_STRUCT(rb);
rb->Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
rb->Flags = 0;
rb->UAV.pResource = resource->resource;
} else {
/* Unknown barrier */
ASSERT(0);
}
}
if (num_rbs > 0) {
ID3D12GraphicsCommandList_ResourceBarrier(cl, num_rbs, rbs);
}
scratch_end(scratch);
}
/* ========================== *
* Command queue
* ========================== */
INTERNAL struct command_list_pool *command_list_pool_alloc(struct command_queue *cq);
INTERNAL SYS_JOB_DEF(command_queue_alloc_job, job)
{
__prof;
struct command_queue_alloc_job_sig *sig = job.sig;
struct command_queue_desc *desc = &sig->descs_in[job.id];
{
struct command_queue *cq = 0;
{
struct arena *arena = arena_alloc(GIBI(64));
cq = arena_push(arena, struct command_queue);
cq->arena = arena;
}
cq->desc = *desc;
D3D12_COMMAND_QUEUE_DESC dx12_desc = ZI;
dx12_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
dx12_desc.Type = desc->type;
dx12_desc.Priority = desc->priority;
HRESULT hr = ID3D12Device_CreateCommandQueue(G.device, &dx12_desc, &IID_ID3D12CommandQueue, (void **)&cq->cq);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command queue"));
}
hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&cq->submit_fence);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command queue fence"));
}
cq->cl_pool = command_list_pool_alloc(cq);
sig->cqs_out[job.id] = cq;
}
}
INTERNAL void command_queue_release(struct command_queue *cq)
{
__prof;
/* TODO */
(UNUSED)cq;
//ID3D12CommandQueue_Release(cq->cq);
}
/* ========================== *
* Command list
* ========================== */
INTERNAL struct command_list_pool *command_list_pool_alloc(struct command_queue *cq)
{
struct command_list_pool *pool = 0;
{
struct arena *arena = arena_alloc(GIBI(64));
pool = arena_push(arena, struct command_list_pool);
pool->arena = arena;
}
pool->cq = cq;
return pool;
}
INTERNAL struct command_list *command_list_open(struct command_list_pool *pool)
{
__prof;
struct command_queue *cq = pool->cq;
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(cq->submit_fence);
struct command_list *cl = 0;
struct ID3D12GraphicsCommandList *old_cl = 0;
struct ID3D12CommandAllocator *old_ca = 0;
{
struct snc_lock lock = snc_lock_e(&pool->mutex);
/* Find first command list ready for reuse */
for (struct command_list *tmp = pool->first_submitted_command_list; tmp; tmp = tmp->next_submitted) {
if (completed_fence_value >= tmp->submitted_fence_target) {
cl = tmp;
break;
}
}
if (cl) {
/* Remove from submitted list */
old_cl = cl->cl;
old_ca = cl->ca;
struct command_list *prev = cl->prev_submitted;
struct command_list *next = cl->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
pool->first_submitted_command_list = next;
}
if (next) {
next->prev_submitted = prev;
} else {
pool->last_submitted_command_list = prev;
}
} else {
cl = arena_push_no_zero(pool->arena, struct command_list);
}
snc_unlock(&lock);
}
MEMZERO_STRUCT(cl);
cl->cq = cq;
cl->pool = pool;
cl->global_record_lock = snc_lock_s(&G.global_command_list_record_mutex);
HRESULT hr = 0;
if (old_cl) {
cl->cl = old_cl;
cl->ca = old_ca;
} else {
hr = ID3D12Device_CreateCommandAllocator(G.device, cq->desc.type, &IID_ID3D12CommandAllocator, (void **)&cl->ca);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command allocator"));
}
hr = ID3D12Device_CreateCommandList(G.device, 0, cq->desc.type, cl->ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->cl);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command list"));
}
hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr)) {
sys_panic(LIT("Failed to close command list during initialization"));
}
}
/* Reset */
hr = ID3D12CommandAllocator_Reset(cl->ca);
if (FAILED(hr)) {
sys_panic(LIT("Failed to reset command allocator"));
}
hr = ID3D12GraphicsCommandList_Reset(cl->cl, cl->ca, 0);
if (FAILED(hr)) {
sys_panic(LIT("Failed to reset command list"));
}
return cl;
}
/* TODO: Allow multiple command list submissions */
INTERNAL u64 command_list_close(struct command_list *cl)
{
__prof;
struct command_queue *cq = cl->cq;
struct command_list_pool *pool = cl->pool;
/* Close */
{
__profn("Close DX12 command list");
HRESULT hr = ID3D12GraphicsCommandList_Close(cl->cl);
if (FAILED(hr)) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to close command list before execution"));
}
}
/* Submit */
u64 submit_fence_target = 0;
{
__profn("Execute");
struct snc_lock submit_lock = snc_lock_s(&G.global_submit_mutex);
struct snc_lock fence_lock = snc_lock_e(&cq->submit_fence_mutex);
{
submit_fence_target = ++cq->submit_fence_target;
ID3D12CommandQueue_ExecuteCommandLists(cq->cq, 1, (ID3D12CommandList **)&cl->cl);
ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, submit_fence_target);
}
snc_unlock(&fence_lock);
snc_unlock(&submit_lock);
}
/* Add descriptor heaps to submitted list */
{
struct snc_lock lock = snc_lock_e(&G.command_descriptor_heaps_mutex);
for (struct command_descriptor_heap *cdh = cl->first_command_descriptor_heap; cdh; cdh = cdh->next_in_command_list) {
cdh->submitted_cq = cq;
cdh->submitted_fence_target = submit_fence_target;
if (G.last_submitted_command_descriptor_heap) {
G.last_submitted_command_descriptor_heap->next_submitted = cdh;
} else {
G.first_submitted_command_descriptor_heap = cdh;
}
G.last_submitted_command_descriptor_heap = cdh;
}
snc_unlock(&lock);
}
/* Add command buffers to submitted list */
{
struct snc_lock lock = snc_lock_e(&G.command_buffers_mutex);
for (struct command_buffer *cb = cl->first_command_buffer; cb; cb = cb->next_in_command_list) {
struct command_buffer_group *group = cb->group;
cb->submitted_cq = cq;
cb->submitted_fence_target = submit_fence_target;
if (group->last_submitted) {
group->last_submitted->next_submitted = cb;
} else {
group->first_submitted = cb;
}
group->last_submitted = cb;
}
snc_unlock(&lock);
}
/* Add command list to pool submitted list */
snc_unlock(&cl->global_record_lock);
cl->submitted_fence_target = submit_fence_target;
{
struct snc_lock lock = snc_lock_e(&pool->mutex);
if (pool->last_submitted_command_list) {
pool->last_submitted_command_list->next_submitted = cl;
} else {
pool->first_submitted_command_list = cl;
}
pool->last_submitted_command_list = cl;
snc_unlock(&lock);
}
return submit_fence_target;
}
/* ========================== *
* Command descriptor heap (GPU / shader visible descriptor heap)
* ========================== */
INTERNAL struct command_descriptor_heap *command_list_push_descriptor_heap(struct command_list *cl, struct cpu_descriptor_heap *dh_cpu)
{
__prof;
ASSERT(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */
/* Allocate GPU heap */
struct command_descriptor_heap *cdh = 0;
ID3D12DescriptorHeap *old_heap = 0;
D3D12_CPU_DESCRIPTOR_HANDLE old_start_cpu_handle = ZI;
D3D12_GPU_DESCRIPTOR_HANDLE old_start_gpu_handle = ZI;
{
struct snc_lock lock = snc_lock_e(&G.command_descriptor_heaps_mutex);
/* Find first heap ready for reuse */
for (struct command_descriptor_heap *tmp = G.first_submitted_command_descriptor_heap; tmp; tmp = tmp->next_submitted) {
/* TODO: Cache completed fence values */
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
if (completed_fence_value >= tmp->submitted_fence_target) {
cdh = tmp;
break;
}
}
if (cdh) {
/* Remove from submitted list */
old_heap = cdh->heap;
old_start_cpu_handle = cdh->start_cpu_handle;
old_start_gpu_handle = cdh->start_gpu_handle;
struct command_descriptor_heap *prev = cdh->prev_submitted;
struct command_descriptor_heap *next = cdh->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
G.first_submitted_command_descriptor_heap = next;
}
if (next) {
next->prev_submitted = prev;
} else {
G.last_submitted_command_descriptor_heap = prev;
}
} else {
/* No available heap available for reuse, allocate new */
cdh = arena_push_no_zero(G.command_descriptor_heaps_arena, struct command_descriptor_heap);
}
snc_unlock(&lock);
}
MEMZERO_STRUCT(cdh);
if (old_heap) {
cdh->heap = old_heap;
cdh->start_cpu_handle = old_start_cpu_handle;
cdh->start_gpu_handle = old_start_gpu_handle;
} else {
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&cdh->heap);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create GPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_cpu_handle);
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(cdh->heap, &cdh->start_gpu_handle);
}
/* Copy CPU heap */
{
struct snc_lock lock = snc_lock_s(&dh_cpu->mutex);
ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, cdh->start_cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
snc_unlock(&lock);
}
/* Insert into command list */
cdh->next_in_command_list = cl->first_command_descriptor_heap;
cl->first_command_descriptor_heap = cdh;
return cdh;
}
/* ========================== *
* Command buffer
* ========================== */
INTERNAL u64 command_buffer_hash_from_size(u64 size)
{
u64 hash = rand_u64_from_seed(size);
return hash;
}
INTERNAL u64 align_up_pow2(u64 v)
{
u64 res = 0;
if (v > 0) {
res = v - 1;
res |= res >> 1;
res |= res >> 2;
res |= res >> 4;
res |= res >> 8;
res |= res >> 16;
res |= res >> 32;
++res;
}
return res;
}
INTERNAL struct command_buffer *command_list_push_buffer(struct command_list *cl, struct string data)
{
__prof;
/* Determine size */
u64 size = max_u64(DX12_COMMAND_BUFFER_MIN_SIZE, align_up_pow2(data.len));
/* Allocate buffer */
struct command_buffer_group *cb_group = 0;
struct command_buffer *cb = 0;
struct dx12_resource *resource = 0;
{
struct snc_lock lock = snc_lock_e(&G.command_buffers_mutex);
{
u64 group_hash = command_buffer_hash_from_size(size);
struct dict_entry *cb_group_entry = dict_ensure_entry(G.command_buffers_arena, G.command_buffers_dict, group_hash);
cb_group = cb_group_entry->value;
if (!cb_group) {
/* Create group */
cb_group = arena_push(G.command_buffers_arena, struct command_buffer_group);
cb_group_entry->value = (u64)cb_group;
}
}
/* Find first command buffer ready for reuse */
for (struct command_buffer *tmp = cb_group->first_submitted; tmp; tmp = tmp->next_submitted) {
/* TODO: Cache completed fence values */
u64 completed_fence_value = ID3D12Fence_GetCompletedValue(tmp->submitted_cq->submit_fence);
if (completed_fence_value >= tmp->submitted_fence_target) {
cb = tmp;
break;
}
}
if (cb) {
/* Remove from submitted list */
resource = cb->resource;
struct command_buffer *prev = cb->prev_submitted;
struct command_buffer *next = cb->next_submitted;
if (prev) {
prev->next_submitted = next;
} else {
cb_group->first_submitted = next;
}
if (next) {
next->prev_submitted = prev;
} else {
cb_group->last_submitted = prev;
}
} else {
/* Allocate new */
cb = arena_push_no_zero(G.command_buffers_arena, struct command_buffer);
}
snc_unlock(&lock);
}
MEMZERO_STRUCT(cb);
cb->group = cb_group;
cb->size = data.len;
/* Create upload heap */
if (resource) {
cb->resource = resource;
} else {
enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_NONE;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
desc.Format = DXGI_FORMAT_UNKNOWN;
desc.Alignment = 0;
desc.Width = size;
desc.Height = 1;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
cb->resource = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags);
}
/* Copy data to resource */
{
D3D12_RANGE read_range = ZI;
void *dst = 0;
HRESULT hr = ID3D12Resource_Map(cb->resource->resource, 0, &read_range, &dst);
if (FAILED(hr) || !dst) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to map command buffer resource"));
}
MEMCPY(dst, data.text, data.len);
ID3D12Resource_Unmap(cb->resource->resource, 0, 0);
}
/* Insert into command list */
cb->next_in_command_list = cl->first_command_buffer;
cl->first_command_buffer = cb;
return cb;
}
/* ========================== *
* Util
* ========================== */
INTERNAL void command_list_set_graphics_root_constant(struct command_list *cl, void *src, u32 size)
{
__prof;
if (size % 4 == 0) {
u32 num32bit = size / 4;
for (u32 i = 0; i < num32bit; ++i) {
u32 val = 0;
MEMCPY(&val, (((u32 *)src) + i), 4);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(cl->cl, 0, val, i);
}
} else {
/* Root constant structs must pad to 32 bits */
ASSERT(0);
}
}
INTERNAL void command_list_set_compute_root_constant(struct command_list *cl, void *src, u32 size)
{
__prof;
if (size % 4 == 0) {
u32 num32bit = size / 4;
for (u32 i = 0; i < num32bit; ++i) {
u32 val = 0;
MEMCPY(&val, (((u32 *)src) + i), 4);
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(cl->cl, 0, val, i);
}
} else {
/* Root constant structs must pad to 32 bits */
ASSERT(0);
}
}
INTERNAL struct D3D12_VIEWPORT viewport_from_rect(struct rect r)
{
struct D3D12_VIEWPORT viewport = ZI;
viewport.TopLeftX = r.x;
viewport.TopLeftY = r.y;
viewport.Width = r.width;
viewport.Height = r.height;
viewport.MinDepth = 0.0f;
viewport.MaxDepth = 1.0f;
return viewport;
}
INTERNAL D3D12_RECT scissor_from_rect(struct rect r)
{
D3D12_RECT scissor = ZI;
scissor.left = r.x;
scissor.top = r.y;
scissor.right = r.x + r.width;
scissor.bottom = r.y + r.height;
return scissor;
}
INTERNAL D3D12_VERTEX_BUFFER_VIEW vbv_from_command_buffer(struct command_buffer *cb, u32 vertex_size)
{
D3D12_VERTEX_BUFFER_VIEW vbv = ZI;
vbv.BufferLocation = cb->resource->gpu_address;
vbv.SizeInBytes = cb->size;
vbv.StrideInBytes = vertex_size;
return vbv;
}
INTERNAL D3D12_INDEX_BUFFER_VIEW ibv_from_command_buffer(struct command_buffer *cb, DXGI_FORMAT format)
{
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
ibv.BufferLocation = cb->resource->gpu_address;
ibv.Format = format;
ibv.SizeInBytes = cb->size;
return ibv;
}
/* ========================== *
* Wait job
* ========================== */
struct dx12_wait_fence_job_sig {
ID3D12Fence *fence;
u64 target;
};
INTERNAL SYS_JOB_DEF(dx12_wait_fence_job, job)
{
__prof;
struct dx12_wait_fence_job_sig *sig = job.sig;
ID3D12Fence *fence = sig->fence;
u64 target = sig->target;
if (ID3D12Fence_GetCompletedValue(fence) < target) {
/* TODO: Pool events */
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(sig->fence, sig->target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
}
/* ========================== *
* Texture
* ========================== */
struct gp_resource *gp_texture_alloc(enum gp_texture_format format, u32 flags, struct v2i32 size, void *initial_data)
{
__prof;
if (size.x <= 0 || size.y <= 0) {
sys_panic(LIT("Tried to create texture with dimension <= 0"));
}
struct dxgi_format_info { DXGI_FORMAT format; u32 size; };
LOCAL_PERSIST const struct dxgi_format_info formats[] = {
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM] = { DXGI_FORMAT_R8G8B8A8_UNORM, 4 },
[GP_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = { DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 4 }
};
DXGI_FORMAT dxgi_format = ZI;
u32 pixel_size = 0;
if (format < (i32)countof(formats)) {
dxgi_format = formats[format].format;
pixel_size = formats[format].size;
ASSERT(dxgi_format != 0);
ASSERT(pixel_size != 0);
}
if (format == 0) {
sys_panic(LIT("Tried to create texture with unknown format"));
}
enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Format = dxgi_format;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
if (flags & GP_TEXTURE_FLAG_TARGETABLE) {
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV | DX12_RESOURCE_VIEW_FLAG_UAV;
}
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags);
r->texture_size = size;
/* Upload texture */
if (initial_data) {
u64 upload_size = 0;
u64 upload_row_size = 0;
u32 upload_num_rows = 0;
D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI;
ID3D12Device_GetCopyableFootprints(G.device, &desc, 0, 1, 0, &footprint, &upload_num_rows, &upload_row_size, &upload_size);
/* Create temp upload heap */
struct dx12_resource *upload = 0;
{
enum dx12_resource_view_flags upload_view_flags = DX12_RESOURCE_VIEW_FLAG_NONE;
D3D12_HEAP_PROPERTIES upload_heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
upload_heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
upload_heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS upload_heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC upload_desc = ZI;
upload_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
upload_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
upload_desc.Format = DXGI_FORMAT_UNKNOWN;
upload_desc.Alignment = 0;
upload_desc.Width = upload_size;
upload_desc.Height = 1;
upload_desc.DepthOrArraySize = 1;
upload_desc.MipLevels = 1;
upload_desc.SampleDesc.Count = 1;
upload_desc.SampleDesc.Quality = 0;
D3D12_RESOURCE_STATES upload_initial_state = D3D12_RESOURCE_STATE_GENERIC_READ;
/* FIXME: Release */
upload = dx12_resource_alloc(upload_heap_props, upload_heap_flags, upload_desc, upload_initial_state, upload_view_flags);
/* Copy to upload heap */
/* FIXME: Copy based on footprint */
{
D3D12_RANGE read_range = ZI;
void *mapped = 0;
HRESULT hr = ID3D12Resource_Map(upload->resource, 0, &read_range, &mapped);
if (FAILED(hr) || !mapped) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to map texture upload resource"));
}
u8 *dst = (u8 *)mapped + footprint.Offset;
u8 *src = initial_data;
for (u32 y = 0; y < upload_num_rows; ++y) {
memcpy(dst + y * footprint.Footprint.RowPitch, src + y * size.x * pixel_size, size.x * pixel_size);
}
ID3D12Resource_Unmap(upload->resource, 0, 0);
}
}
/* Copy from upload heap to texture */
struct command_queue *cq = G.command_queues[DX12_QUEUE_COPY_BACKGROUND];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profnc_dx12(cl->cq->prof, cl->cl, "Upload texture", RGB32_F(0.2, 0.5, 0.2));
D3D12_TEXTURE_COPY_LOCATION dst_loc = {
.pResource = r->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
.SubresourceIndex = 0,
};
D3D12_TEXTURE_COPY_LOCATION src_loc = {
.pResource = upload->resource,
.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
.PlacedFootprint = footprint,
};
ID3D12GraphicsCommandList_CopyTextureRegion(cl->cl, &dst_loc, 0, 0, 0, &src_loc, 0);
}
u64 fence_target = command_list_close(cl);
/* Submit wait job */
/* TODO: Make wait optional */
if (ID3D12Fence_GetCompletedValue(cq->submit_fence) < fence_target) {
struct dx12_wait_fence_job_sig sig = ZI;
sig.fence = cq->submit_fence;
sig.target = fence_target;
struct snc_counter counter = ZI;
sys_run(1, dx12_wait_fence_job, &sig, SYS_POOL_FLOATING, SYS_PRIORITY_LOW, &counter);
snc_counter_wait(&counter);
}
}
return (struct gp_resource *)r;
}
struct v2i32 gp_texture_get_size(struct gp_resource *resource)
{
struct dx12_resource *r = (struct dx12_resource *)resource;
return r->texture_size;
}
/* ========================== *
* Run
* ========================== */
INTERNAL struct dx12_resource *gbuff_alloc(DXGI_FORMAT format, struct v2i32 size, D3D12_RESOURCE_STATES initial_state)
{
__prof;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
desc.Format = format;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, DX12_RESOURCE_VIEW_FLAG_SRV | DX12_RESOURCE_VIEW_FLAG_UAV | DX12_RESOURCE_VIEW_FLAG_RTV);
r->texture_size = size;
return r;
}
/* Calculate the view projection matrix */
INLINE struct mat4x4 calculate_vp(struct xform view, f32 viewport_width, f32 viewport_height)
{
struct mat4x4 projection = mat4x4_from_ortho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0);
struct mat4x4 view4x4 = mat4x4_from_xform(view);
return mat4x4_mul(projection, view4x4);
}
INTERNAL D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_from_descriptor(struct descriptor *descriptor, struct command_descriptor_heap *cdh)
{
struct D3D12_GPU_DESCRIPTOR_HANDLE res = ZI;
res.ptr = cdh->start_gpu_handle.ptr + descriptor->index * G.desc_sizes[descriptor->heap->type];
return res;
}
void gp_run(struct gp_run_params params)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
struct sig *sig = (struct sig *)params.sig;
struct dx12_resource *final_target = (struct dx12_resource *)params.draw_target;
struct v2i32 final_target_size = final_target->texture_size;
/* Allocate resources */
if (!v2i32_eq(sig->old_size, final_target_size)) {
__profn("Allocate buffers");
/* Release buffers */
/* TODO: Batch release */
if (sig->albedo) {
fenced_release(sig->albedo, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(sig->emittance, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(sig->emittance_flood_a, FENCED_RELEASE_KIND_RESOURCE);
fenced_release(sig->emittance_flood_b, FENCED_RELEASE_KIND_RESOURCE);
}
/* Alloc buffers */
sig->albedo = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
sig->emittance = gbuff_alloc(DXGI_FORMAT_R8G8B8A8_UNORM, final_target_size, D3D12_RESOURCE_STATE_RENDER_TARGET);
sig->emittance_flood_a = gbuff_alloc(DXGI_FORMAT_R32_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
sig->emittance_flood_b = gbuff_alloc(DXGI_FORMAT_R32_UINT, final_target_size, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
}
struct sprite_scope *sprite_scope = sprite_scope_begin();
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
struct pipeline *material_pipeline = pipeline_from_name(pipeline_scope, LIT("material"));
struct pipeline *flood_pipeline = pipeline_from_name(pipeline_scope, LIT("flood"));
struct pipeline *shade_pipeline = pipeline_from_name(pipeline_scope, LIT("shade"));
struct pipeline *shape_pipeline = pipeline_from_name(pipeline_scope, LIT("shape"));
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profnc_dx12(cl->cq->prof, cl->cl, "Run", RGB32_F(0.5, 0.2, 0.2));
struct mat4x4 vp_matrix = calculate_vp(params.draw_target_view, params.draw_target_viewport.width, params.draw_target_viewport.height);
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, STRING(0, 0));
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, STRING_FROM_ARRAY(quad_indices));
/* Process sig data into uploadable data */
struct sh_material_instance *material_instances = arena_push_array_no_zero(scratch.arena, struct sh_material_instance, sig->num_material_instance_descs);
struct sh_material_grid *grids = arena_push_array_no_zero(scratch.arena, struct sh_material_grid, sig->num_material_grid_descs);
{
__profn("Process sig data");
/* Process material instances */
{
__profn("Process material instances");
for (u32 i = 0; i < sig->num_material_instance_descs; ++i) {
struct material_instance_desc *desc = &((struct material_instance_desc *)arena_base(sig->material_instance_descs_arena))[i];
struct sh_material_instance *instance = &material_instances[i];
i32 texture_id = -1;
if (desc->texture != 0) {
texture_id = desc->texture->srv_descriptor->index;
} else if (desc->sprite.hash != 0) {
struct sprite_texture *st = sprite_texture_from_tag_async(sprite_scope, desc->sprite);
struct dx12_resource *texture = (struct dx12_resource *)st->gp_texture;
if (texture) {
texture_id = texture->srv_descriptor->index;
}
}
instance->tex_nurid = sh_int_from_i32(texture_id);
instance->grid_id = sh_int_from_i32(desc->grid_id);
instance->xf = sh_float2x3_from_xform(desc->xf);
instance->uv0 = sh_float2_from_v2(desc->clip.p0);
instance->uv1 = sh_float2_from_v2(desc->clip.p1);
instance->tint_srgb = sh_uint_from_u32(desc->tint);
instance->emittance_srgb = sh_uint_from_u32(desc->emittance);
}
}
/* Process grids */
{
__profn("Process grids");
for (u32 i = 0; i < sig->num_material_grid_descs; ++i) {
struct material_grid_desc *desc = &((struct material_grid_desc *)arena_base(sig->material_grid_descs_arena))[i];
struct sh_material_grid *grid = &grids[i];
grid->line_thickness = sh_float_from_f32(desc->line_thickness);
grid->line_spacing = sh_float_from_f32(desc->line_spacing);
grid->offset = sh_float2_from_v2(desc->offset);
grid->bg0_srgb = sh_uint_from_u32(desc->bg0_color);
grid->bg1_srgb = sh_uint_from_u32(desc->bg1_color);
grid->line_srgb = sh_uint_from_u32(desc->line_color);
grid->x_srgb = sh_uint_from_u32(desc->x_color);
grid->y_srgb = sh_uint_from_u32(desc->y_color);
}
}
}
/* Upload buffers */
struct command_buffer *material_instance_buffer = command_list_push_buffer(cl, STRING(sizeof(*material_instances) * sig->num_material_instance_descs, (u8 *)material_instances));
struct command_buffer *grid_buffer = command_list_push_buffer(cl, STRING(sizeof(*grids) * sig->num_material_grid_descs, (u8 *)grids));
struct command_buffer *shape_verts_buffer = command_list_push_buffer(cl, STRING_FROM_ARENA(sig->shape_verts_arena));
struct command_buffer *shape_indices_buffer = command_list_push_buffer(cl, STRING_FROM_ARENA(sig->shape_indices_arena));
/* Upload descriptor heap */
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
/* Material pass */
{
__profn("Material pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Material pass", RGB32_F(0.5, 0.2, 0.2));
/* Bind gbuffers */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_RENDER_TARGET },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_RENDER_TARGET }
};
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[] = {
sig->albedo->rtv_descriptor->handle,
sig->emittance->rtv_descriptor->handle,
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, countof(rtvs), rtvs, 0, 0);
}
/* Clear gbuffers */
{
__profn("Clear gbuffers");
__profnc_dx12(cl->cq->prof, cl->cl, "Clear gbuffers", RGB32_F(0.5, 0.2, 0.2));
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->albedo->rtv_descriptor->handle, clear_color, 0, 0);
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, sig->emittance->rtv_descriptor->handle, clear_color, 0, 0);
}
/* Dispatch */
if (material_pipeline->success) {
__profn("Material pass run");
__profnc_dx12(cl->cq->prof, cl->cl, "Material pass run", RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, material_pipeline->pso);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, material_pipeline->rootsig);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(params.draw_target_viewport);
D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set constants */
struct sh_material_constants constants = ZI;
constants.projection = sh_float4x4_from_mat4x4(vp_matrix);
/* Set parameters */
command_list_set_graphics_root_constant(cl, &constants, sizeof(constants));
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle);
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(cl->cl, 2, material_instance_buffer->resource->gpu_address);
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(cl->cl, 3, grid_buffer->resource->gpu_address);
/* Draw */
u32 instance_count = material_instance_buffer->size / sizeof(struct sh_material_instance);
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, instance_count, 0, 0, 0);
}
}
/* Flood pass */
struct dx12_resource *emittance_flood_read = sig->emittance_flood_a;
struct dx12_resource *emittance_flood_write = sig->emittance_flood_a;
{
__profn("Flood pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Flood pass", RGB32_F(0.5, 0.2, 0.2));
/* Transition emittance & emittance flood */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_UNORDERED_ACCESS },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_write, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
/* Dispatch */
if (flood_pipeline->success) {
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, flood_pipeline->pso);
ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, flood_pipeline->rootsig);
i32 step_length = -1;
/* TODO: Remove this */
u64 max_steps = gstat_get(GSTAT_DEBUG_STEPS);
u64 step = 0;
while (step_length != 0 && step < max_steps) {
__profn("Flood step");
__profnc_dx12(cl->cq->prof, cl->cl, "Flood step", RGB32_F(0.5, 0.2, 0.2));
/* UAV barrier */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
/* Set constants */
struct sh_flood_constants constants = ZI;
constants.step_len = sh_int_from_i32(step_length);
constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index);
constants.read_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->uav_descriptor->index);
constants.write_flood_tex_urid = sh_uint_from_u32(emittance_flood_write->uav_descriptor->index);
constants.tex_width = sh_uint_from_u32(final_target_size.x);
constants.tex_height = sh_uint_from_u32(final_target_size.y);
/* Set parameters */
command_list_set_compute_root_constant(cl, &constants, sizeof(constants));
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 2, descriptor_heap->start_gpu_handle);
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1);
/* Swap buffers */
struct dx12_resource *swp = emittance_flood_read;
emittance_flood_read = emittance_flood_write;
emittance_flood_write = swp;
/* Update step */
if (step_length == -1) {
step_length = max_i32(final_target_size.x, final_target_size.y) / 2;
//step_length = 16;
} else {
step_length /= 2;
}
++step;
}
}
}
/* Shade pass */
{
__profn("Shade pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Shade pass", RGB32_F(0.5, 0.2, 0.2));
/* Transition gbuffers & final target */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->albedo, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, sig->emittance, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_UAV, emittance_flood_read, 0 },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, emittance_flood_read, D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE },
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, final_target, D3D12_RESOURCE_STATE_UNORDERED_ACCESS }
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
}
/* Clear final target */
if (params.clear_target) {
__profn("Clear target");
__profnc_dx12(cl->cq->prof, cl->cl, "Clear target", RGB32_F(0.5, 0.2, 0.2));
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(cl->cl, gpu_handle_from_descriptor(final_target->uav_descriptor, descriptor_heap), final_target->uav_descriptor->handle, final_target->resource, clear_color, 0, 0);
}
/* Dispatch */
if (shade_pipeline->success) {
__profn("Shade pass run");
__profnc_dx12(cl->cq->prof, cl->cl, "Shade pass run", RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shade_pipeline->pso);
ID3D12GraphicsCommandList_SetComputeRootSignature(cl->cl, shade_pipeline->rootsig);
/* Set constants */
struct sh_shade_constants constants = ZI;
constants.albedo_tex_urid = sh_uint_from_u32(sig->albedo->srv_descriptor->index);
constants.emittance_tex_urid = sh_uint_from_u32(sig->emittance->srv_descriptor->index);
constants.emittance_flood_tex_urid = sh_uint_from_u32(emittance_flood_read->srv_descriptor->index);
constants.write_tex_urid = sh_uint_from_u32(final_target->uav_descriptor->index);
constants.tex_width = sh_uint_from_u32(final_target_size.x);
constants.tex_height = sh_uint_from_u32(final_target_size.y);
/* Set parameters */
command_list_set_compute_root_constant(cl, &constants, sizeof(constants));
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 2, descriptor_heap->start_gpu_handle);
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(cl->cl, 3, descriptor_heap->start_gpu_handle);
/* Dispatch */
ID3D12GraphicsCommandList_Dispatch(cl->cl, (final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1);
}
}
/* Shape pass */
{
__profn("Shape pass");
__profnc_dx12(cl->cq->prof, cl->cl, "Shape pass", RGB32_F(0.5, 0.2, 0.2));
/* Bind final target as RTV */
{
struct dx12_resource_barrier_desc barriers[] = {
{ D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, final_target, D3D12_RESOURCE_STATE_RENDER_TARGET },
};
dx12_resource_barriers(cl->cl, countof(barriers), barriers);
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &final_target->rtv_descriptor->handle, 0, 0);
}
/* Dispatch */
if (shape_pipeline->success) {
__profn("Shape pass run");
__profnc_dx12(cl->cq->prof, cl->cl, "Shape pass run", RGB32_F(0.5, 0.2, 0.2));
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, shape_pipeline->pso);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, shape_pipeline->rootsig);
/* Set Rasterizer State */
D3D12_VIEWPORT viewport = viewport_from_rect(params.draw_target_viewport);
D3D12_RECT scissor = scissor_from_rect(params.draw_target_viewport);
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set constants */
struct sh_shape_constants constants = ZI;
constants.projection = sh_float4x4_from_mat4x4(vp_matrix);
/* Set parameters */
command_list_set_graphics_root_constant(cl, &constants, sizeof(constants));
/* Draw */
u32 index_count = shape_indices_buffer->size / sizeof(u32);
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(shape_verts_buffer, sizeof(struct sh_shape_vert));
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(shape_indices_buffer, DXGI_FORMAT_R32_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, index_count, 1, 0, 0, 0);
}
}
}
command_list_close(cl);
pipeline_scope_end(pipeline_scope);
sprite_scope_end(sprite_scope);
sig->old_size = final_target_size;
sig_reset(sig);
scratch_end(scratch);
}
/* ========================== *
* Memory info
* ========================== */
struct gp_memory_info gp_query_memory_info(void)
{
struct gp_memory_info res = ZI;
HRESULT hr = 0;
IDXGIAdapter3 *dxgiAdapter3 = 0;
if (SUCCEEDED(hr)) {
hr = IDXGIAdapter_QueryInterface(G.adapter, &IID_IDXGIAdapter3, (void **)&dxgiAdapter3);
}
if (SUCCEEDED(hr)) {
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
res.local_used = info.CurrentUsage;
res.local_budget = info.Budget;
}
if (SUCCEEDED(hr)) {
struct DXGI_QUERY_VIDEO_MEMORY_INFO info = ZI;
IDXGIAdapter3_QueryVideoMemoryInfo(dxgiAdapter3, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
res.non_local_used = info.CurrentUsage;
res.non_local_budget = info.Budget;
}
if (dxgiAdapter3) {
IDXGIAdapter_Release(dxgiAdapter3);
}
return res;
}
/* ========================== *
* Swapchain
* ========================== */
INTERNAL void swapchain_init_resources(struct swapchain *swapchain)
{
for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
ID3D12Resource *resource = 0;
HRESULT hr = IDXGISwapChain3_GetBuffer(swapchain->swapchain, i, &IID_ID3D12Resource, (void **)&resource);
if (FAILED(hr)) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to get swapchain buffer"));
}
struct swapchain_buffer *sb = &swapchain->buffers[i];
MEMZERO_STRUCT(sb);
sb->swapchain = swapchain;
sb->resource = resource;
sb->rtv_descriptor = descriptor_alloc(G.rtv_heap);
sb->state = D3D12_RESOURCE_STATE_COMMON;
ID3D12Device_CreateRenderTargetView(G.device, sb->resource, 0, sb->rtv_descriptor->handle);
}
}
struct gp_swapchain *gp_swapchain_alloc(struct sys_window *window, struct v2i32 resolution)
{
HRESULT hr = 0;
HWND hwnd = (HWND)sys_window_get_internal_handle(window);
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct swapchain *swapchain = 0;
{
struct snc_lock lock = snc_lock_e(&G.swapchains_mutex);
if (G.first_free_swapchain) {
swapchain = G.first_free_swapchain;
G.first_free_swapchain = swapchain->next_free;
} else {
swapchain = arena_push(G.swapchains_arena, struct swapchain);
}
snc_unlock(&lock);
}
/* Create swapchain1 */
IDXGISwapChain1 *swapchain1 = 0;
{
DXGI_SWAP_CHAIN_DESC1 desc = ZI;
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
desc.Width = resolution.x;
desc.Height = resolution.y;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT;
desc.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT;
desc.Scaling = DXGI_SCALING_NONE;
desc.Flags = DX12_SWAPCHAIN_FLAGS;
desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)cq->cq, hwnd, &desc, 0, 0, &swapchain1);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create IDXGISwapChain1"));
}
}
/* Upgrade to swapchain3 */
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&swapchain->swapchain);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create IDXGISwapChain3"));
}
/* Create waitable object */
#if DX12_WAIT_FRAME_LATENCY
IDXGISwapChain3_SetMaximumFrameLatency(swapchain->swapchain, 1);
swapchain->waitable = IDXGISwapChain2_GetFrameLatencyWaitableObject(swapchain->swapchain);
ASSERT(swapchain->waitable);
#endif
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
IDXGISwapChain1_Release(swapchain1);
swapchain->hwnd = hwnd;
swapchain_init_resources(swapchain);
return (struct gp_swapchain *)swapchain;
}
void gp_swapchain_release(struct gp_swapchain *gp_swapchain)
{
/* TODO */
(UNUSED)gp_swapchain;
}
void gp_swapchain_wait(struct gp_swapchain *gp_swapchain)
{
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
if (swapchain->waitable) {
WaitForSingleObjectEx(swapchain->waitable, 1000, 1);
}
}
INTERNAL struct swapchain_buffer *update_swapchain(struct swapchain *swapchain, struct v2i32 resolution)
{
__prof;
resolution.x = max_i32(resolution.x, 1);
resolution.y = max_i32(resolution.y, 1);
b32 should_rebuild = !v2i32_eq(swapchain->resolution, resolution);
if (should_rebuild) {
HRESULT hr = 0;
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
/* Lock direct queue submissions (in case any write to backbuffer) */
/* TODO: Less overkill approach - Only flush present_blit since we know it's the only operation targeting backbuffer */
struct snc_lock lock = snc_lock_e(&cq->submit_fence_mutex);
//DEBUGBREAKABLE;
//struct snc_lock lock = snc_lock_e(&G.global_command_list_record_mutex);
{
/* Flush direct queue */
//ID3D12CommandQueue_Signal(cq->cq, cq->submit_fence, ++cq->submit_fence_target);
{
HANDLE event = CreateEvent(0, 0, 0, 0);
ID3D12Fence_SetEventOnCompletion(cq->submit_fence, cq->submit_fence_target, event);
WaitForSingleObject(event, INFINITE);
CloseHandle(event);
}
/* Release buffers */
for (u32 i = 0; i < countof(swapchain->buffers); ++i) {
struct swapchain_buffer *sb = &swapchain->buffers[i];
descriptor_release(sb->rtv_descriptor);
ID3D12Resource_Release(sb->resource);
}
/* Resize buffers */
hr = IDXGISwapChain_ResizeBuffers(swapchain->swapchain, 0, resolution.x, resolution.y, DXGI_FORMAT_UNKNOWN, DX12_SWAPCHAIN_FLAGS);
if (FAILED(hr)) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to resize swapchain"));
}
}
snc_unlock(&lock);
swapchain_init_resources(swapchain);
swapchain->resolution = resolution;
}
u32 backbuffer_index = IDXGISwapChain3_GetCurrentBackBufferIndex(swapchain->swapchain);
return &swapchain->buffers[backbuffer_index];
}
/* ========================== *
* Present
* ========================== */
INTERNAL void present_blit(struct swapchain_buffer *dst, struct dx12_resource *src, struct xform src_xf)
{
__prof;
struct pipeline_scope *pipeline_scope = pipeline_scope_begin();
struct pipeline *blit_pipeline = pipeline_from_name(pipeline_scope, LIT("blit"));
if (blit_pipeline->success) {
struct command_queue *cq = G.command_queues[DX12_QUEUE_DIRECT];
struct command_list *cl = command_list_open(cq->cl_pool);
{
__profnc_dx12(cl->cq->prof, cl->cl, "Blit", RGB32_F(0.5, 0.2, 0.2));
struct swapchain *swapchain = dst->swapchain;
/* Upload dummmy vert & index buffer */
/* TODO: Make these static */
/* Dummy vertex buffer */
LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
struct command_buffer *dummy_vertex_buffer = command_list_push_buffer(cl, STRING(0, 0));
struct command_buffer *quad_index_buffer = command_list_push_buffer(cl, STRING_FROM_ARRAY(quad_indices));
/* Upload descriptor heap */
struct command_descriptor_heap *descriptor_heap = command_list_push_descriptor_heap(cl, G.cbv_srv_uav_heap);
ID3D12DescriptorHeap *heaps[] = { descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl->cl, countof(heaps), heaps);
struct rect viewport_rect = RECT_FROM_V2(V2(0, 0), V2(swapchain->resolution.x, swapchain->resolution.y));
D3D12_VIEWPORT viewport = viewport_from_rect(viewport_rect);
D3D12_RECT scissor = scissor_from_rect(viewport_rect);
struct mat4x4 vp_matrix = calculate_vp(src_xf, viewport.Width, viewport.Height);
/* Transition dst to render target */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
ID3D12GraphicsCommandList_OMSetRenderTargets(cl->cl, 1, &dst->rtv_descriptor->handle, 0, 0);
/* Clear */
f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
ID3D12GraphicsCommandList_ClearRenderTargetView(cl->cl, dst->rtv_descriptor->handle, clear_color, 0, 0);
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl->cl, blit_pipeline->pso);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl->cl, blit_pipeline->rootsig);
/* Set Rasterizer State */
ID3D12GraphicsCommandList_RSSetViewports(cl->cl, 1, &viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl->cl, 1, &scissor);
/* Set constants */
struct sh_blit_constants constants = ZI;
constants.projection = sh_float4x4_from_mat4x4(vp_matrix);
constants.tex_urid = sh_uint_from_u32(src->srv_descriptor->index);
constants.gamma = sh_float_from_f32(2.2);
/* Set parameters */
command_list_set_graphics_root_constant(cl, &constants, sizeof(constants));
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl->cl, 1, descriptor_heap->start_gpu_handle);
/* Draw */
D3D12_VERTEX_BUFFER_VIEW vbv = vbv_from_command_buffer(dummy_vertex_buffer, 0);
D3D12_INDEX_BUFFER_VIEW ibv = ibv_from_command_buffer(quad_index_buffer, DXGI_FORMAT_R16_UINT);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl->cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl->cl, 0, 1, &vbv);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl->cl, &ibv);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl->cl, 6, 1, 0, 0, 0);
/* Transition dst to presentable */
{
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = dst->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = dst->state;
rtb.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl->cl, 1, &rb);
dst->state = rtb.StateAfter;
}
}
command_list_close(cl);
}
pipeline_scope_end(pipeline_scope);
}
void gp_present(struct gp_swapchain *gp_swapchain, struct v2i32 backbuffer_resolution, struct gp_resource *texture, struct xform texture_xf, i32 vsync)
{
__prof;
struct swapchain *swapchain = (struct swapchain *)gp_swapchain;
struct swapchain_buffer *swapchain_buffer = update_swapchain(swapchain, backbuffer_resolution);
struct dx12_resource *texture_resource = (struct dx12_resource *)texture;
/* Blit */
present_blit(swapchain_buffer, texture_resource, texture_xf);
u32 present_flags = 0;
if (vsync == 0) {
present_flags |= (DXGI_PRESENT_ALLOW_TEARING * DX12_ALLOW_TEARING);
}
/* Present */
{
__profn("Present");
HRESULT hr = IDXGISwapChain3_Present(swapchain->swapchain, vsync, present_flags);
if (!SUCCEEDED(hr)) {
ASSERT(0);
}
}
#if PROFILING_D3D
{
__profframe(0);
__profn("Mark queue frames");
/* Lock because frame marks shouldn't occur while command lists are recording */
struct snc_lock lock = snc_lock_e(&G.global_command_list_record_mutex);
for (u32 i = 0; i < countof(G.command_queues); ++i) {
{
struct command_queue *cq = G.command_queues[i];
__prof_dx12_new_frame(cq->prof);
}
}
snc_unlock(&lock);
}
{
__profn("Collect queues");
for (u32 i = 0; i < countof(G.command_queues); ++i) {
struct command_queue *cq = G.command_queues[i];
__prof_dx12_collect(cq->prof);
}
}
#endif
}
/* ========================== *
* Evictor thread
* ========================== */
INTERNAL SYS_JOB_DEF(dx12_evictor_job, _)
{
(UNUSED)_;
u64 completed_targets[DX12_NUM_QUEUES] = ZI;
b32 shutdown = 0;
while (!shutdown) {
{
__profn("Dx12 evictor run");
struct arena_temp scratch = scratch_begin_no_conflict();
u64 targets[countof(completed_targets)] = ZI;
/* Copy queued data */
u32 num_fenced_releases = 0;
struct fenced_release_data *fenced_releases = 0;
{
__profn("Copy queued releases");
struct snc_lock lock = snc_lock_e(&G.fenced_releases_mutex);
num_fenced_releases = G.fenced_releases_arena->pos / sizeof(struct fenced_release_data);
fenced_releases = arena_push_array_no_zero(scratch.arena, struct fenced_release_data, num_fenced_releases);
MEMCPY(fenced_releases, arena_base(G.fenced_releases_arena), G.fenced_releases_arena->pos);
arena_reset(G.fenced_releases_arena);
MEMCPY(targets, G.fenced_release_targets, sizeof(targets));
snc_unlock(&lock);
}
/* Wait until fences reach target */
{
__profn("Check fences");
for (u32 i = 0; i < countof(targets); ++i) {
while (completed_targets[i] < targets[i]) {
struct command_queue *cq = G.command_queues[i];
completed_targets[i] = ID3D12Fence_GetCompletedValue(cq->submit_fence);
if (completed_targets[i] < targets[i]) {
__profn("Wait on fence");
{
struct dx12_wait_fence_job_sig sig = ZI;
sig.fence = cq->submit_fence;
sig.target = targets[i];
{
struct snc_counter counter = ZI;
sys_run(1, dx12_wait_fence_job, &sig, SYS_POOL_FLOATING, SYS_PRIORITY_LOW, &counter);
snc_counter_wait(&counter);
}
}
}
}
}
}
/* Process releases */
for (u32 i = 0; i < num_fenced_releases; ++i) {
struct fenced_release_data *fr = &fenced_releases[i];
switch (fr->kind) {
default:
{
/* Unknown handle type */
ASSERT(0);
} break;
case FENCED_RELEASE_KIND_RESOURCE:
{
struct dx12_resource *resource = (struct dx12_resource *)fr->ptr;
dx12_resource_release_now(resource);
} break;
case FENCED_RELEASE_KIND_PIPELINE:
{
struct pipeline *pipeline = (struct pipeline *)fr->ptr;
pipeline_release_now(pipeline);
} break;
}
}
scratch_end(scratch);
}
struct snc_lock lock = snc_lock_e(&G.evictor_wake_mutex);
{
while (!G.evictor_shutdown && G.evictor_wake_gen == 0) {
snc_cv_wait(&G.evictor_wake_cv, &lock);
}
shutdown = G.evictor_shutdown;
G.evictor_wake_gen = 0;
}
snc_unlock(&lock);
}
}