power_play/src/gpu_dx12.c

1852 lines
62 KiB
C

#if DX12_TEST
#include "gpu.h"
#include "sys.h"
#include "arena.h"
#include "memory.h"
#include "string.h"
#include "scratch.h"
#include "app.h"
#include "work.h"
#include "log.h"
#include "resource.h"
#pragma warning(push, 0)
# define UNICODE
# define COBJMACROS
# include <Windows.h>
# include <d3d12.h>
# include <dxgidebug.h>
# include <dxgi1_6.h>
# include <combaseapi.h>
# include <d3dcompiler.h>
#pragma warning(pop)
#pragma comment(lib, "d3d12")
#pragma comment(lib, "dxgi")
#pragma comment(lib, "dxguid")
#pragma comment(lib, "d3dcompiler")
#define SH_CPU 1
//#define DX12_WAIT_FRAME_LATENCY 1
//#define DX12_SWAPCHAIN_FLAGS ((DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) | (DX12_WAIT_FRAME_LATENCY * DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT))
#define DX12_ALLOW_TEARING 1
#define DX12_SWAPCHAIN_FLAGS (DX12_ALLOW_TEARING * DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING)
#define DX12_SWAPCHAIN_BUFFER_COUNT (3)
#define DX12_SWAPCHAIN_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM)
//#define DX12_SWAPCHAIN_RTV_FORMAT (DXGI_FORMAT_R8G8B8A8_UNORM_SRGB)
/* Arbitrary limits */
#define DX12_NUM_CBV_SRV_UAV_DESCRIPTORS (1024 * 64)
#define DX12_NUM_RTV_DESCRIPTORS (1024 * 1)
#if RTC
# define DX12_DEBUG 1
# define DX12_SHADER_DEBUG 1
#else
# define DX12_DEBUG 0
# define DX12_SHADER_DEBUG 0
#endif
struct shader_desc {
char *file;
char *func;
};
struct pipeline_desc {
char *name;
struct shader_desc vs;
struct shader_desc ps;
u32 flags;
};
struct pipeline {
struct pipeline_desc desc;
ID3D12PipelineState *pso;
ID3D12RootSignature *rootsig;
};
struct pipeline_result {
struct pipeline pipeline;
i64 elapsed;
u64 errors_text_len;
u8 errors_text[KILOBYTE(16)];
};
struct pipeline_error {
struct string msg;
};
struct dx12_descriptor {
struct dx12_cpu_descriptor_heap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE handle;
struct dx12_descriptor *next_free;
};
struct dx12_resource {
ID3D12Resource *resource;
enum D3D12_RESOURCE_STATES state;
#if 0
D3D12_CPU_DESCRIPTOR_HANDLE cbv_handle;
D3D12_CPU_DESCRIPTOR_HANDLE srv_handle;
D3D12_CPU_DESCRIPTOR_HANDLE uav_handle;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
#else
struct dx12_descriptor *cbv_descriptor;
struct dx12_descriptor *srv_descriptor;
struct dx12_descriptor *uav_descriptor;
struct dx12_descriptor *rtv_descriptor;
#endif
D3D12_GPU_VIRTUAL_ADDRESS gpu_address; /* NOTE: 0 for textures */
struct dx12_resource *next_free;
};
struct dx12_cpu_descriptor_heap {
enum D3D12_DESCRIPTOR_HEAP_TYPE type;
struct arena arena;
struct sys_mutex mutex;
u32 descriptor_size;
u32 num_descriptors_reserved;
u32 num_descriptors_capacity;
struct dx12_descriptor *first_free_descriptor;
ID3D12DescriptorHeap *heap;
struct D3D12_CPU_DESCRIPTOR_HANDLE handle;
};
struct dx12_gpu_descriptor_heap {
D3D12_DESCRIPTOR_HEAP_TYPE type;
ID3D12DescriptorHeap *heap;
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle;
/* If free_fence < free_fence_value, then descriptor heap is in use by the GPU */
ID3D12Fence *free_fence;
u64 free_fence_value;
struct dx12_gpu_descriptor_heap *prev_free;
struct dx12_gpu_descriptor_heap *next_free;
};
enum dx12_handle_kind {
DX12_HANDLE_KIND_NONE,
DX12_HANDLE_KIND_RESOURCE,
DX12_HANDLE_KIND_PLAN,
DX12_HANDLE_KIND_DISPATCH_STATE,
NUM_DX12_HANDLE_KINDS
};
struct dx12_handle_entry {
enum dx12_handle_kind kind;
u64 gen;
u64 idx;
void *data;
struct dx12_handle_entry *next_free;
};
/* ========================== *
* Global state
* ========================== */
GLOBAL struct {
/* Handles pool */
struct sys_mutex handle_entries_mutex;
struct arena handle_entries_arena;
struct dx12_handle_entry *first_free_handle_entry;
u64 num_handle_entries_reserved;
/* Descriptor heaps pool */
struct sys_mutex gpu_descriptor_heaps_mutex;
struct arena gpu_descriptor_heaps_arena;
struct dx12_gpu_descriptor_heap *first_free_gpu_descriptor_heap;
struct dx12_gpu_descriptor_heap *last_free_gpu_descriptor_heap;
/* Resources pool */
struct sys_mutex resources_mutex;
struct arena resources_arena;
struct dx12_resource *first_free_resource;
/* FIXME: Remove this (testing) */
struct pipeline test_pipeline;
/* Factory */
IDXGIFactory6 *factory;
/* Device */
ID3D12Device *device;
/* Descriptor sizes */
u32 desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
u32 desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
/* Global descriptor heaps */
struct dx12_cpu_descriptor_heap *cbv_srv_uav_heap;
struct dx12_cpu_descriptor_heap *rtv_heap;
/* Command queues */
/* TODO: Add optional mode to route everything to direct queue */
ID3D12CommandQueue *cq_direct;
ID3D12CommandQueue *cq_compute;
ID3D12CommandQueue *cq_copy_critical;
ID3D12CommandQueue *cq_copy_background;
/* Swapchain */
u32 swapchain_frame_index;
ID3D12CommandAllocator *swapchain_ca;
IDXGISwapChain3 *swapchain;
ID3D12DescriptorHeap *swapchain_rtv_heap;
ID3D12Resource *swapchain_buffers[DX12_SWAPCHAIN_BUFFER_COUNT];
/* Dummy vertex buffer */
struct dx12_resource *dummy_vertex_buffer;
struct dx12_resource *dummy_index_buffer;
D3D12_VERTEX_BUFFER_VIEW dummy_vertex_buffer_view;
D3D12_INDEX_BUFFER_VIEW quad_index_buffer_view;
} G = ZI, DEBUG_ALIAS(G, G_gpu_dx12);
/* ========================== *
* Startup
* ========================== */
INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown);
INTERNAL struct dx12_cpu_descriptor_heap *dx12_cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type);
INTERNAL void dx12_init_device(void);
INTERNAL void dx12_init_objects(void);
INTERNAL void dx12_init_swapchain(struct sys_window *window);
INTERNAL void dx12_init_pipelines(void);
struct gpu_startup_receipt gpu_startup(struct work_startup_receipt *work_sr, struct sys_window *window)
{
__prof;
(UNUSED)work_sr;
/* Initialize handles pool */
G.handle_entries_mutex = sys_mutex_alloc();
G.handle_entries_arena = arena_alloc(GIGABYTE(64));
/* Initialize gpu descriptor heaps pool */
G.gpu_descriptor_heaps_mutex = sys_mutex_alloc();
G.gpu_descriptor_heaps_arena = arena_alloc(GIGABYTE(64));
/* Initialize resources pool */
G.resources_mutex = sys_mutex_alloc();
G.resources_arena = arena_alloc(GIGABYTE(64));
/* Initialize dx12 */
dx12_init_device();
dx12_init_objects();
dx12_init_swapchain(window);
dx12_init_pipelines();
/* Init dummy buffers */
#if 0
{
LOCAL_PERSIST const DXGI_FORMAT formats[] = {
[GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM,
[GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB
};
enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.SampleDesc.Count = 1;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
if (flags & GPU_TEXTURE_FLAG_TARGETABLE) {
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV;
}
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags);
//G.dummy_vertex_buffer = dx12_resource_alloc(
/* Dummy vertex buffer */
u8 dummy_data[16] = ZI;
D3D11_BUFFER_DESC vdesc = ZI;
vdesc.Usage = D3D11_USAGE_IMMUTABLE;
vdesc.ByteWidth = sizeof(dummy_data);
vdesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
D3D11_SUBRESOURCE_DATA dummy_data_subres = ZI;
dummy_data_subres.pSysMem = dummy_data;
G.dummy_vertex_buffer = dx11_buffer_alloc(vdesc, &dummy_data_subres);
/* Quad index buffer */
LOCAL_PERSIST u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
D3D11_BUFFER_DESC idesc = ZI;
idesc.Usage = D3D11_USAGE_IMMUTABLE;
idesc.ByteWidth = sizeof(quad_indices);
idesc.BindFlags = D3D11_BIND_INDEX_BUFFER;
D3D11_SUBRESOURCE_DATA idata = ZI;
idata.pSysMem = quad_indices;
G.quad_index_buffer = dx11_buffer_alloc(idesc, &idata);
}
#endif
/* Register callbacks */
app_register_exit_callback(gpu_shutdown);
struct gpu_startup_receipt res = ZI;
return res;
}
INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown)
{
__prof;
#if DX12_DEBUG
/* Release objects to make live object reporting less noisy */
for (u64 i = 0; i < ARRAY_COUNT(G.swapchain_buffers); ++i) {
ID3D12Resource_Release(G.swapchain_buffers[i]);
}
ID3D12DescriptorHeap_Release(G.swapchain_rtv_heap);
IDXGISwapChain3_Release(G.swapchain);
ID3D12CommandAllocator_Release(G.swapchain_ca);
ID3D12CommandQueue_Release(G.cq_copy_background);
ID3D12CommandQueue_Release(G.cq_copy_critical);
ID3D12CommandQueue_Release(G.cq_compute);
ID3D12CommandQueue_Release(G.cq_direct);
ID3D12Device_Release(G.device);
#endif
}
/* ========================== *
* Handle
* ========================== */
INTERNAL void dx12_resource_release(struct dx12_resource *t);
INTERNAL struct gpu_handle handle_alloc(enum dx12_handle_kind kind, void *data)
{
u64 old_gen = 0;
u64 idx = 0;
struct dx12_handle_entry *entry = NULL;
{
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
if (G.first_free_handle_entry) {
entry = G.first_free_handle_entry;
G.first_free_handle_entry = entry->next_free;
old_gen = entry->gen;
idx = entry->idx;
} else {
entry = arena_push_no_zero(&G.handle_entries_arena, struct dx12_handle_entry);
idx = G.num_handle_entries_reserved++;
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(entry);
entry->kind = kind;
entry->gen = old_gen + 1;
entry->idx = idx;
entry->data = data;
struct gpu_handle res = ZI;
res.gen = entry->gen;
res.idx = entry->idx;
return res;
}
INTERNAL struct dx12_handle_entry *handle_get_entry(struct gpu_handle handle, struct sys_lock *lock)
{
sys_assert_locked_e_or_s(lock, &G.handle_entries_mutex);
struct dx12_handle_entry *res = NULL;
if (handle.idx > 0 && handle.idx < G.num_handle_entries_reserved) {
struct dx12_handle_entry *tmp = &((struct dx12_handle_entry *)G.handle_entries_arena.base)[handle.idx];
if (tmp->gen == handle.gen) {
res = tmp;
}
}
return res;
}
INTERNAL void *handle_get_data(struct gpu_handle handle, enum dx12_handle_kind kind)
{
void *data = NULL;
struct sys_lock lock = sys_mutex_lock_s(&G.handle_entries_mutex);
{
struct dx12_handle_entry *entry = handle_get_entry(handle, &lock);
data = entry->data;
#if RTC
/* Handle should match expected kind */
ASSERT(entry->kind == kind);
#endif
}
sys_mutex_unlock(&lock);
return data;
}
/* TODO: The GPU api should ensure that resources freed by the caller will not cause issues on the GPU (via fencing),
* however the caller is responsible for managing resource lifetimes on the CPU side (e.g. using sprites w/ sprite scopes
* to ensure freed textures aren't being used in pending command lists. */
void gpu_release(struct gpu_handle handle)
{
enum dx12_handle_kind kind = 0;
void *data = NULL;
/* Release handle entry */
struct sys_lock lock = sys_mutex_lock_e(&G.handle_entries_mutex);
{
struct dx12_handle_entry *entry = handle_get_entry(handle, &lock);
if (entry) {
kind = entry->kind;
data = entry->data;
}
++entry->gen;
entry->next_free = G.first_free_handle_entry;
G.first_free_handle_entry = entry;
}
sys_mutex_unlock(&lock);
/* Release data */
if (data) {
switch (kind) {
default: break;
case DX12_HANDLE_KIND_RESOURCE:
{
dx12_resource_release(data);
} break;
}
}
}
/* ========================== *
* Dx12 device initialization
* ========================== */
INTERNAL void dx12_init_error(struct string error)
{
struct arena_temp scratch = scratch_begin_no_conflict();
struct string msg = string_format(scratch.arena, LIT("Failed to initialize DirectX 12.\n\n%F"), FMT_STR(error));
sys_panic(msg);
scratch_end(scratch);
}
INTERNAL void dx12_init_device(void)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
HRESULT hr = 0;
/* Enable debug layer */
u32 dxgi_factory_flags = 0;
#if DX12_DEBUG
{
ID3D12Debug *debug_controller0 = NULL;
hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create ID3D12Debug0"));
}
ID3D12Debug1 *debug_controller1 = NULL;
hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create ID3D12Debug1"));
}
ID3D12Debug_EnableDebugLayer(debug_controller0);
/* FIXME: Enable this */
//ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, true);
ID3D12Debug_Release(debug_controller1);
ID3D12Debug_Release(debug_controller0);
dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG;
}
#endif
/* Create factory */
hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G.factory);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to initialize DXGI factory"));
}
/* Create device */
{
ID3D12Device *device = NULL;
struct string error = LIT("Could not initialize GPU device.");
struct string first_gpu_name = ZI;
u32 adapter_index = 0;
while (true) {
IDXGIAdapter1 *adapter = NULL;
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter1, (void **)&adapter);
if (SUCCEEDED(hr)) {
DXGI_ADAPTER_DESC1 desc;
IDXGIAdapter1_GetDesc1(adapter, &desc);
if (first_gpu_name.len == 0) {
first_gpu_name = string_from_wstr_no_limit(scratch.arena, desc.Description);
}
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
if (SUCCEEDED(hr)) {
IDXGIAdapter1_Release(adapter);
adapter = NULL;
break;
}
ID3D12Device_Release(device);
IDXGIAdapter1_Release(adapter);
adapter = NULL;
device = NULL;
++adapter_index;
} else {
break;
}
}
if (!device) {
if (first_gpu_name.len > 0) {
struct string fmt = LIT("Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.");
error = string_format(scratch.arena, fmt, FMT_STR(first_gpu_name));
}
dx12_init_error(error);
}
G.device = device;
}
#if DX12_DEBUG
/* Enable D3D12 Debug break */
{
ID3D12InfoQueue *info = NULL;
hr = ID3D12Device_QueryInterface(G.device, &IID_ID3D12InfoQueue, (void **)&info);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to query ID3D12Device interface"));
}
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);
ID3D12InfoQueue_SetBreakOnSeverity(info, D3D12_MESSAGE_SEVERITY_ERROR, TRUE);
ID3D12InfoQueue_Release(info);
}
/* Enable DXGI Debug break */
{
IDXGIInfoQueue *dxgi_info = NULL;
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgi_info);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to get DXGI debug interface"));
}
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE);
IDXGIInfoQueue_SetBreakOnSeverity(dxgi_info, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE);
IDXGIInfoQueue_Release(dxgi_info);
}
#endif
scratch_end(scratch);
}
/* ========================== *
* Dx12 object initialization
* ========================== */
INTERNAL void dx12_init_objects(void)
{
HRESULT hr = 0;
/* Initialize desc sizes */
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_DSV] = ID3D12Device_GetDescriptorHandleIncrementSize(G.device, D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
/* Initialize desc counts */
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV] = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
G.desc_counts[D3D12_DESCRIPTOR_HEAP_TYPE_RTV] = DX12_NUM_RTV_DESCRIPTORS;
/* Create global descriptor heaps */
G.cbv_srv_uav_heap = dx12_cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
G.rtv_heap = dx12_cpu_descriptor_heap_alloc(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
/* Create direct command queue */
{
D3D12_COMMAND_QUEUE_DESC desc = ZI;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_direct);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create direct command queue"));
}
}
/* Create compute command queue */
{
D3D12_COMMAND_QUEUE_DESC desc = ZI;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_compute);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create compute command queue"));
}
}
/* Create critical copy command queue */
{
D3D12_COMMAND_QUEUE_DESC desc = ZI;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_HIGH;
hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_copy_critical);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create critical copy command queue"));
}
}
/* Create background copy command queue */
{
D3D12_COMMAND_QUEUE_DESC desc = ZI;
desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
desc.Type = D3D12_COMMAND_LIST_TYPE_COPY;
hr = ID3D12Device_CreateCommandQueue(G.device, &desc, &IID_ID3D12CommandQueue, (void **)&G.cq_copy_background);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create background copy command queue"));
}
}
}
/* ========================== *
* Dx12 swapchain initialization
* ========================== */
INTERNAL void dx12_init_swapchain(struct sys_window *window)
{
HRESULT hr = 0;
/* Create swapchain command allocator */
{
hr = ID3D12Device_CreateCommandAllocator(G.device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, (void **)&G.swapchain_ca);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create swapchain command allocator"));
}
}
/* Create swapchain */
{
HWND hwnd = (HWND)sys_window_get_internal_handle(window);
DXGI_SWAP_CHAIN_DESC1 desc = {
.Format = DX12_SWAPCHAIN_FORMAT,
.SampleDesc = { 1, 0 },
.BufferUsage = DXGI_USAGE_SHADER_INPUT | DXGI_USAGE_RENDER_TARGET_OUTPUT,
.BufferCount = DX12_SWAPCHAIN_BUFFER_COUNT,
.Scaling = DXGI_SCALING_NONE,
.Flags = DX12_SWAPCHAIN_FLAGS,
.AlphaMode = DXGI_ALPHA_MODE_IGNORE,
.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD
};
/* Create swapchain1 */
IDXGISwapChain1 *swapchain1 = NULL;
hr = IDXGIFactory2_CreateSwapChainForHwnd(G.factory, (IUnknown *)G.cq_direct, hwnd, &desc, NULL, NULL, &swapchain1);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create IDXGISwapChain1"));
}
/* Upgrade to swapchain3 */
hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain3, (void **)&G.swapchain);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create IDXGISwapChain3"));
}
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(G.factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
/* Get initial frame index */
G.swapchain_frame_index = IDXGISwapChain3_GetCurrentBackBufferIndex(G.swapchain);
IDXGISwapChain1_Release(swapchain1);
}
/* Create swapchain RTV heap */
{
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.NumDescriptors = DX12_SWAPCHAIN_BUFFER_COUNT;
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&G.swapchain_rtv_heap);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to create swapchain RTV heap"));
}
}
/* Create swacphain RTVs */
{
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = ZI;
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(G.swapchain_rtv_heap, &rtv_handle);
for (u32 i = 0; i < DX12_SWAPCHAIN_BUFFER_COUNT; ++i) {
hr = IDXGISwapChain3_GetBuffer(G.swapchain, i, &IID_ID3D12Resource, (void **)&G.swapchain_buffers[i]);
if (FAILED(hr)) {
dx12_init_error(LIT("Failed to get swapchain buffer"));
}
ID3D12Device_CreateRenderTargetView(G.device, G.swapchain_buffers[i], NULL, rtv_handle);
rtv_handle.ptr += G.desc_sizes[D3D12_DESCRIPTOR_HEAP_TYPE_RTV];
}
}
}
/* ========================== *
* Dx12 pipeline initialization
* ========================== */
/* TDOO: Rename 'mesh shader' to 'triangle shader' or something */
/* TODO: Move shader structs into shared C-HLSL header file */
/* ============= */
/* Mesh pipeline */
/* ============= */
/* Material pipeline */
PACK(struct fx_material_constant {
struct mat4x4 vp;
u32 instance_offset;
});
PACK(struct fx_material_instance {
struct xform xf;
struct v2 uv0;
struct v2 uv1;
u32 tint_srgb;
f32 emittance;
});
/* ============= */
/* Grid pipeline */
/* ============= */
/* Init pipelines */
INTERNAL struct pipeline_result *pipeline_alloc_from_descs(struct arena *arena, u64 num_pipelines, struct pipeline_desc *descs);
INTERNAL void pipeline_release(struct pipeline *pipeline);
INTERNAL void dx12_init_pipelines(void)
{
__prof;
struct arena_temp scratch = scratch_begin_no_conflict();
struct pipeline_desc pipeline_descs[] = {
/* Material pipeline */
{
.name = "material",
.vs = { "sh/material.hlsl", "vs" },
.ps = { "sh/material.hlsl", "ps" }
}
};
struct pipeline_result *results = pipeline_alloc_from_descs(scratch.arena, ARRAY_COUNT(pipeline_descs), pipeline_descs);
for (u64 i = 0; i < ARRAY_COUNT(pipeline_descs); ++i) {
struct pipeline_result *result = &results[i];
if (result->errors_text_len > 0) {
struct string msg = STRING(result->errors_text_len, result->errors_text);
sys_panic(msg);
pipeline_release(&result->pipeline);
} else {
/* FIXME: remove this */
G.test_pipeline = result->pipeline;
}
}
scratch_end(scratch);
}
/* ========================== *
* Shader compilation
* ========================== */
struct dx12_include_handler {
ID3DInclude d3d_handler;
ID3DIncludeVtbl vtbl;
struct pipeline *pipeline;
b32 has_open_resource;
struct resource res;
};
INTERNAL HRESULT dx12_include_open(ID3DInclude *d3d_handler, D3D_INCLUDE_TYPE include_type, LPCSTR name_cstr, LPCVOID parent_data, LPCVOID *data_out, UINT *data_len_out)
{
__prof;
(UNUSED)include_type;
(UNUSED)parent_data;
HRESULT result = E_FAIL;
struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler;
struct string name = string_from_cstr_no_limit((char *)name_cstr);
if (handler->has_open_resource) {
sys_panic(LIT("Dx11 include handler somehow already has a resource open"));
}
struct resource res = resource_open(name);
if (resource_exists(&res)) {
handler->res = res;
handler->has_open_resource = true;
struct string data = resource_get_data(&res);
*data_out = data.text;
*data_len_out = data.len;
result = S_OK;
}
#if 0
#if RESOURCE_RELOADING
shader_add_include(&G.shader_info[handler->shader->kind], name);
#endif
#endif
return result;
}
INTERNAL HRESULT dx12_include_close(ID3DInclude *d3d_handler, LPCVOID data)
{
__prof;
(UNUSED)data;
struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler;
if (handler->has_open_resource) {
resource_close(&handler->res);
handler->has_open_resource = false;
}
return S_OK;
}
INTERNAL struct dx12_include_handler dx12_include_handler_alloc(struct pipeline *pipeline)
{
struct dx12_include_handler handler = ZI;
handler.d3d_handler.lpVtbl = &handler.vtbl;
handler.vtbl.Open = dx12_include_open;
handler.vtbl.Close = dx12_include_close;
handler.pipeline = pipeline;
return handler;
}
INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler)
{
if (handler->has_open_resource) {
ASSERT(false); /* Resource should have been closed by handler by now */
resource_close(&handler->res);
}
}
enum shader_compile_task_kind {
SHADER_COMPILE_TASK_KIND_VS,
SHADER_COMPILE_TASK_KIND_PS
};
struct shader_compile_task_arg {
/* In */
enum shader_compile_task_kind kind;
struct pipeline *pipeline;
struct shader_desc shader_desc;
struct resource *shader_res;
/* Out */
b32 success;
ID3DBlob *blob;
ID3DBlob *error_blob;
i64 elapsed;
};
/* TODO: Compile shaders offline w/ dxc for performance & language features like static_assert */
INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw)
{
__prof;
struct shader_compile_task_arg *comp_arg = (struct shader_compile_task_arg *)comp_arg_raw;
enum shader_compile_task_kind kind = comp_arg->kind;
struct pipeline *pipeline = comp_arg->pipeline;
struct shader_desc shader_desc = comp_arg->shader_desc;
struct resource *shader_res = comp_arg->shader_res;
struct arena_temp scratch = scratch_begin_no_conflict();
{
i64 start_ns = sys_time_ns();
b32 success = false;
ID3DBlob *blob = NULL;
ID3DBlob *error_blob = NULL;
struct string file_name = string_from_cstr_no_limit(shader_desc.file);
struct string func_name = string_from_cstr_no_limit(shader_desc.func);
if (resource_exists(shader_res)) {
struct dx12_include_handler include_handler = dx12_include_handler_alloc(pipeline);
u32 d3d_compile_flags = 0;
#if DX12_SHADER_DEBUG
d3d_compile_flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_ENABLE_STRICTNESS;
#else
d3d_compile_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
/* Compile shader */
{
struct string shader_src = resource_get_data(shader_res);
logf_info("Compiling shader \"%F:%F\"", FMT_STR(file_name), FMT_STR(func_name));
/* Compile shader */
struct string friendly_name = string_cat(scratch.arena, LIT("res/"), file_name);
char *friendly_name_cstr = cstr_from_string(scratch.arena, friendly_name);
char *target = NULL;
switch (kind) {
case SHADER_COMPILE_TASK_KIND_VS:
{
target = "vs_5_1";
} break;
case SHADER_COMPILE_TASK_KIND_PS:
{
target = "ps_5_1";
} break;
}
D3D_SHADER_MACRO defines[] = {
{ "SH_CPU", "0" },
{ NULL, NULL }
};
HRESULT hr = D3DCompile(shader_src.text, shader_src.len, friendly_name_cstr, defines, (ID3DInclude *)&include_handler, shader_desc.func, target, d3d_compile_flags, 0, &blob, &error_blob);
success = SUCCEEDED(hr) && !error_blob;
}
dx12_include_handler_release(&include_handler);
}
#if 0
if (success) {
logf_success("Finished compiling shader \"%F\" in %F seconds", FMT_STR(src_name), FMT_FLOAT(SECONDS_FROM_NS(sys_time_ns() - start_ns)));
}
#endif
comp_arg->success = success;
comp_arg->blob = blob;
comp_arg->error_blob = error_blob;
comp_arg->elapsed = sys_time_ns() - start_ns;
}
scratch_end(scratch);
}
/* ========================== *
* Pipeline
* ========================== */
struct pipeline_load_task_arg {
struct pipeline *pipeline;
struct pipeline_result *result;
};
INTERNAL WORK_TASK_FUNC_DEF(pipeline_load_task, load_arg_raw)
{
__prof;
struct pipeline_load_task_arg *load_arg = (struct pipeline_load_task_arg *)load_arg_raw;
struct pipeline *pipeline = load_arg->pipeline;
struct pipeline_desc desc = pipeline->desc;
struct pipeline_result *result = load_arg->result;
struct arena_temp scratch = scratch_begin_no_conflict();
{
i64 start_ns = sys_time_ns();
struct string pipeline_name = string_from_cstr_no_limit(desc.name);
logf_info("Loading pipeline \"%F\"", FMT_STR(pipeline_name));
b32 success = true;
HRESULT hr = 0;
struct string error_str = LIT("Unknown error");
struct string vs_filename = string_from_cstr_no_limit(desc.vs.file);
struct string ps_filename = string_from_cstr_no_limit(desc.ps.file);
b32 ps_res_is_shared = string_eq(vs_filename, ps_filename);
struct resource vs_res = resource_open(vs_filename);
struct resource ps_res = vs_res;
if (!ps_res_is_shared) {
ps_res = resource_open(ps_filename);
}
if (success) {
if (!resource_exists(&vs_res)) {
error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(vs_filename));
success = false;
} else if (!resource_exists(&ps_res)) {
error_str = string_format(scratch.arena, LIT("Shader source \"%F\" not found"), FMT_STR(ps_filename));
success = false;
}
}
struct shader_compile_task_arg vs = ZI;
vs.kind = SHADER_COMPILE_TASK_KIND_VS;
vs.pipeline = pipeline;
vs.shader_desc = desc.vs;
vs.shader_res = &vs_res;
struct shader_compile_task_arg ps = ZI;
ps.kind = SHADER_COMPILE_TASK_KIND_PS;
ps.pipeline = pipeline;
ps.shader_desc = desc.ps;
ps.shader_res = &ps_res;
/* Compile shaders */
if (success) {
struct work_slate ws = work_slate_begin();
work_slate_push_task(&ws, shader_compile_task, &vs);
work_slate_push_task(&ws, shader_compile_task, &ps);
struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH);
work_wait(work);
success = vs.success && ps.success;
}
/* Get root signature blob
* NOTE: This isn't necessary for creating the root signature (since it
* could reuse the shader blob), however we'd like to verify that the
* root signature exists and matches between shaders. */
ID3D10Blob *rootsig_blob = NULL;
if (success) {
__profscope(Validate root signatures);
char *vs_rootsig_data = NULL;
char *ps_rootsig_data = NULL;
u32 vs_rootsig_data_len = 0;
u32 ps_rootsig_data_len = 0;
ID3D10Blob *vs_rootsig_blob = NULL;
ID3D10Blob *ps_rootsig_blob = NULL;
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(vs.blob), ID3D10Blob_GetBufferSize(vs.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &vs_rootsig_blob);
D3DGetBlobPart(ID3D10Blob_GetBufferPointer(ps.blob), ID3D10Blob_GetBufferSize(ps.blob), D3D_BLOB_ROOT_SIGNATURE, 0, &ps_rootsig_blob);
if (vs_rootsig_blob) {
vs_rootsig_data = ID3D10Blob_GetBufferPointer(vs_rootsig_blob);
vs_rootsig_data_len = ID3D10Blob_GetBufferSize(vs_rootsig_blob);
}
if (ps_rootsig_blob) {
ps_rootsig_data = ID3D10Blob_GetBufferPointer(ps_rootsig_blob);
ps_rootsig_data_len = ID3D10Blob_GetBufferSize(ps_rootsig_blob);
}
if (vs_rootsig_data_len == 0) {
success = false;
error_str = LIT("Vertex shader is missing root signature");
} else if (ps_rootsig_data_len == 0) {
success = false;
error_str = LIT("Pixel shader is missing root signature");
} else if (vs_rootsig_data_len != ps_rootsig_data_len || !MEMEQ(vs_rootsig_data, ps_rootsig_data, vs_rootsig_data_len)) {
success = false;
error_str = LIT("Root signature mismatch between vertex and pixel shader");
} else {
rootsig_blob = vs_rootsig_blob;
}
if (ps_rootsig_blob) {
ID3D10Blob_Release(ps_rootsig_blob);
}
}
/* Create root signature */
ID3D12RootSignature *rootsig = NULL;
if (success) {
__profscope(Create root signature);
hr = ID3D12Device_CreateRootSignature(G.device, 0, ID3D10Blob_GetBufferPointer(rootsig_blob), ID3D10Blob_GetBufferSize(rootsig_blob), &IID_ID3D12RootSignature, (void **)&rootsig);
if (FAILED(hr)) {
error_str = LIT("Failed to create root signature");
success = false;
}
}
/* Create PSO */
ID3D12PipelineState *pso = NULL;
if (success) {
/* Default rasterizer state */
__profscope(Create PSO);
D3D12_RASTERIZER_DESC raster_desc = {
.FillMode = D3D12_FILL_MODE_SOLID,
.CullMode = D3D12_CULL_MODE_BACK,
.FrontCounterClockwise = FALSE,
.DepthBias = D3D12_DEFAULT_DEPTH_BIAS,
.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP,
.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS,
.DepthClipEnable = TRUE,
.MultisampleEnable = FALSE,
.AntialiasedLineEnable = FALSE,
.ForcedSampleCount = 0,
.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF
};
/* No input layout */
D3D12_INPUT_LAYOUT_DESC input_layout_desc = {
.pInputElementDescs = NULL,
.NumElements = 0
};
/* Opaque blend state */
D3D12_BLEND_DESC blend_desc = {
.AlphaToCoverageEnable = FALSE,
.IndependentBlendEnable = FALSE
};
blend_desc.RenderTarget[0].BlendEnable = FALSE;
blend_desc.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
/* Disable depth stencil */
D3D12_DEPTH_STENCIL_DESC depth_stencil_desc = {
.DepthEnable = FALSE,
.StencilEnable = FALSE
};
/* PSO */
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc = { 0 };
pso_desc.pRootSignature = rootsig;
if (vs.success) {
pso_desc.VS.pShaderBytecode = ID3D10Blob_GetBufferPointer(vs.blob);
pso_desc.VS.BytecodeLength = ID3D10Blob_GetBufferSize(vs.blob);
}
if (ps.success) {
pso_desc.PS.pShaderBytecode = ID3D10Blob_GetBufferPointer(ps.blob);
pso_desc.PS.BytecodeLength = ID3D10Blob_GetBufferSize(ps.blob);
}
pso_desc.BlendState = blend_desc;
pso_desc.SampleMask = UINT_MAX;
pso_desc.RasterizerState = raster_desc;
pso_desc.DepthStencilState = depth_stencil_desc;
pso_desc.InputLayout = input_layout_desc;
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
pso_desc.NumRenderTargets = 1;
pso_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
pso_desc.SampleDesc.Count = 1;
hr = ID3D12Device_CreateGraphicsPipelineState(G.device, &pso_desc, &IID_ID3D12PipelineState, (void **)&pso);
if (FAILED(hr)) {
error_str = LIT("Failed to create pipeline state object");
success = false;
}
}
/* Copy error */
if (!success) {
ID3D10Blob *error_blob = vs.error_blob ? vs.error_blob : ps.error_blob;
if (error_blob) {
u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob);
char *error_blob_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob);
struct string error_blob_str = string_copy(scratch.arena, string_from_cstr(error_blob_cstr, error_blob_cstr_len));
if (string_ends_with(error_blob_str, LIT("\n"))) {
/* Remove trailing newline */
error_blob_str.len -= 1;
}
if (error_blob_str.len > 0) {
error_str = error_blob_str;
}
}
result->errors_text_len = min_u64(error_str.len, ARRAY_COUNT(result->errors_text));
MEMCPY(result->errors_text, error_str.text, result->errors_text_len);
}
pipeline->pso = pso;
pipeline->rootsig = rootsig;
result->elapsed = sys_time_ns() - start_ns;
resource_close(&vs_res);
if (!ps_res_is_shared) {
resource_close(&ps_res);
}
if (rootsig_blob) {
ID3D10Blob_Release(rootsig_blob);
}
if (vs.blob) {
ID3D10Blob_Release(vs.blob);
}
if (vs.error_blob) {
ID3D10Blob_Release(vs.error_blob);
}
if (ps.blob) {
ID3D10Blob_Release(ps.blob);
}
if (ps.error_blob) {
ID3D10Blob_Release(ps.error_blob);
}
}
scratch_end(scratch);
}
INTERNAL struct pipeline_result *pipeline_alloc_from_descs(struct arena *arena, u64 num_pipelines, struct pipeline_desc *descs)
{
__prof;
struct pipeline_result *results = arena_push_array(arena, struct pipeline_result, num_pipelines);
struct pipeline_load_task_arg *task_args = arena_push_array(arena, struct pipeline_load_task_arg, num_pipelines);
/* Load pipelines */
struct work_slate ws = work_slate_begin();
for (u64 i = 0; i < num_pipelines; ++i) {
struct pipeline_result *result = &results[i];
struct pipeline *pipeline = &results->pipeline;
pipeline->desc = descs[i];
struct pipeline_load_task_arg *arg = &task_args[i];
arg->pipeline = pipeline;
arg->result = result;
work_slate_push_task(&ws, pipeline_load_task, arg);
}
struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_HIGH);
work_wait(work);
return results;
}
INTERNAL void pipeline_release(struct pipeline *pipeline)
{
__prof;
if (pipeline->pso) {
ID3D12PipelineState_Release(pipeline->pso);
}
}
#if 1
/* ========================== *
* CPU descriptor heap
* ========================== */
INTERNAL struct dx12_cpu_descriptor_heap *dx12_cpu_descriptor_heap_alloc(enum D3D12_DESCRIPTOR_HEAP_TYPE type)
{
struct dx12_cpu_descriptor_heap *dh = NULL;
{
struct arena arena = arena_alloc(MEGABYTE(64));
dh = arena_push(&arena, struct dx12_cpu_descriptor_heap);
dh->arena = arena;
}
dh->mutex = sys_mutex_alloc();
u32 num_descriptors = 0;
u32 descriptor_size = 0;
if (type < (i32)ARRAY_COUNT(G.desc_counts) && type < (i32)ARRAY_COUNT(G.desc_sizes)) {
num_descriptors = G.desc_counts[type];
descriptor_size = G.desc_sizes[type];
}
if (num_descriptors == 0 || descriptor_size == 0) {
sys_panic(LIT("Unsupported CPU descriptor type"));
}
dh->num_descriptors_capacity = num_descriptors;
dh->descriptor_size = descriptor_size;
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = type;
desc.NumDescriptors = num_descriptors;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&dh->heap);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create CPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(dh->heap, &dh->handle);
return dh;
}
#if 0
INTERNAL void dx12_cpu_descriptor_heap_release(struct dx12_cpu_descriptor_heap *dh)
{
/* TODO */
(UNUSED)dh;
}
#endif
/* ========================== *
* Descriptor
* ========================== */
INTERNAL struct dx12_descriptor *dx12_descriptor_alloc(struct dx12_cpu_descriptor_heap *dh)
{
struct dx12_descriptor *d = NULL;
D3D12_CPU_DESCRIPTOR_HANDLE handle = ZI;
{
struct sys_lock lock = sys_mutex_lock_e(&dh->mutex);
if (dh->first_free_descriptor) {
d = dh->first_free_descriptor;
handle = d->handle;
} else {
if (dh->num_descriptors_reserved >= dh->num_descriptors_capacity) {
sys_panic(LIT("Max descriptors reached in heap"));
}
d = arena_push_no_zero(&dh->arena, struct dx12_descriptor);
handle.ptr = dh->handle.ptr + (dh->num_descriptors_reserved * dh->descriptor_size);
++dh->num_descriptors_reserved;
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(d);
d->heap = dh;
d->handle = handle;
return d;
}
/* ========================== *
* GPU (shader visible) descriptor heap
* ========================== */
INTERNAL struct dx12_gpu_descriptor_heap *dx12_gpu_descriptor_heap_alloc(struct dx12_cpu_descriptor_heap *dh_cpu)
{
ASSERT(dh_cpu->type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); /* Src heap must have expected type */
/* Allocate GPU heap */
struct dx12_gpu_descriptor_heap *dh_gpu = NULL;
ID3D12DescriptorHeap *heap = NULL;
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = ZI;
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = ZI;
ID3D12Fence *free_fence = NULL;
u64 free_fence_value = 0;
{
struct sys_lock lock = sys_mutex_lock_e(&G.gpu_descriptor_heaps_mutex);
/* Find first free & ready heap for reuse */
/* FIXME: Rather than storing fence per heap, store & increment fence per queue and check against it */
for (struct dx12_gpu_descriptor_heap *tmp = G.first_free_gpu_descriptor_heap; tmp; tmp = tmp->next_free) {
if (ID3D12Fence_GetCompletedValue(tmp->free_fence) >= tmp->free_fence_value) {
dh_gpu = tmp;
break;
}
}
if (dh_gpu) {
/* Free & ready heap found */
dh_gpu = G.first_free_gpu_descriptor_heap;
heap = dh_gpu->heap;
cpu_handle = dh_gpu->cpu_handle;
gpu_handle = dh_gpu->gpu_handle;
free_fence = dh_gpu->free_fence;
free_fence_value = dh_gpu->free_fence_value;
/* Remove from free list */
struct dx12_gpu_descriptor_heap *prev = dh_gpu->prev_free;
struct dx12_gpu_descriptor_heap *next = dh_gpu->next_free;
if (prev) {
prev->next_free = next;
} else {
G.first_free_gpu_descriptor_heap = next;
}
if (next) {
next->prev_free = prev;
} else {
G.last_free_gpu_descriptor_heap = prev;
}
} else {
/* No available heap available for reuse, allocate new */
dh_gpu = arena_push_no_zero(&G.gpu_descriptor_heaps_arena, struct dx12_gpu_descriptor_heap);
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(dh_gpu);
if (!heap) {
D3D12_DESCRIPTOR_HEAP_DESC desc = ZI;
desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
desc.NumDescriptors = DX12_NUM_CBV_SRV_UAV_DESCRIPTORS;
desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
HRESULT hr = ID3D12Device_CreateDescriptorHeap(G.device, &desc, &IID_ID3D12DescriptorHeap, (void **)&heap);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create GPU descriptor heap"));
}
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap, &cpu_handle);
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap, &gpu_handle);
hr = ID3D12Device_CreateFence(G.device, 0, 0, &IID_ID3D12Fence, (void **)&free_fence);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create GPU descriptor heap fence"));
}
}
dh_gpu->heap = heap;
dh_gpu->cpu_handle = cpu_handle;
dh_gpu->gpu_handle = gpu_handle;
dh_gpu->free_fence = free_fence;
dh_gpu->free_fence_value = free_fence_value;
/* Copy CPU heap */
{
struct sys_lock lock = sys_mutex_lock_s(&dh_cpu->mutex);
ID3D12Device_CopyDescriptorsSimple(G.device, dh_cpu->num_descriptors_reserved, dh_gpu->cpu_handle, dh_cpu->handle, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
sys_mutex_unlock(&lock);
}
return dh_gpu;
}
INTERNAL void dx12_gpu_descriptor_heap_release(struct dx12_gpu_descriptor_heap *dh, ID3D12CommandQueue *cq)
{
/* Queue fence signal */
++dh->free_fence_value;
ID3D12CommandQueue_Signal(cq, dh->free_fence, dh->free_fence_value);
/* Add to free list */
struct sys_lock lock = sys_mutex_lock_e(&G.gpu_descriptor_heaps_mutex);
dh->next_free = G.first_free_gpu_descriptor_heap;
if (G.last_free_gpu_descriptor_heap) {
G.last_free_gpu_descriptor_heap->next_free = dh;
} else {
G.first_free_gpu_descriptor_heap = dh;
}
G.last_free_gpu_descriptor_heap = dh;
G.first_free_gpu_descriptor_heap = dh;
sys_mutex_unlock(&lock);
}
/* ========================== *
* Plan
* ========================== */
struct dx12_plan {
struct dx12_plan *next_free;
};
INTERNAL struct dx12_plan *dx12_plan_alloc(void)
{
return NULL;
}
struct gpu_handle gpu_plan_alloc(void)
{
struct dx12_plan *plan = dx12_plan_alloc();
return handle_alloc(DX12_HANDLE_KIND_PLAN, plan);
}
void gpu_push_cmd(struct gpu_handle gpu_plan, struct gpu_cmd_params params)
{
(UNUSED)gpu_plan;
(UNUSED)params;
}
void gpu_submit_plan(struct gpu_handle gpu_plan)
{
(UNUSED)gpu_plan;
}
/* ========================== *
* Resource
* ========================== */
enum dx12_resource_view_flags {
DX12_RESOURCE_VIEW_FLAG_NONE = 0,
DX12_RESOURCE_VIEW_FLAG_CBV = (1 << 1),
DX12_RESOURCE_VIEW_FLAG_SRV = (1 << 2),
DX12_RESOURCE_VIEW_FLAG_UAV = (1 << 3),
DX12_RESOURCE_VIEW_FLAG_RTV = (1 << 4)
};
INTERNAL struct dx12_resource *dx12_resource_alloc(D3D12_HEAP_PROPERTIES heap_props, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_DESC desc, D3D12_RESOURCE_STATES initial_state, enum dx12_resource_view_flags view_flags)
{
struct dx12_resource *r = NULL;
{
struct sys_lock lock = sys_mutex_lock_e(&G.resources_mutex);
if (G.first_free_resource) {
r = G.first_free_resource;
G.first_free_resource = r->next_free;
} else {
r = arena_push_no_zero(&G.resources_arena, struct dx12_resource);
}
sys_mutex_unlock(&lock);
}
MEMZERO_STRUCT(r);
HRESULT hr = ID3D12Device_CreateCommittedResource(G.device, &heap_props, heap_flags, &desc, initial_state, NULL, &IID_ID3D12Resource, (void **)&r->resource);
if (FAILED(hr)) {
/* TODO: Don't panic */
sys_panic(LIT("Failed to create resource"));
}
r->state = initial_state;
if (desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) {
r->gpu_address = ID3D12Resource_GetGPUVirtualAddress(r->resource);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_CBV) {
r->cbv_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap);
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = ZI;
cbv_desc.BufferLocation = r->gpu_address;
//cbv_desc.SizeInBytes = desc.ByteWidth;
/* FIXME: Get actual size */
cbv_desc.SizeInBytes = KILOBYTE(64);
ID3D12Device_CreateConstantBufferView(G.device, &cbv_desc, r->cbv_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_SRV) {
r->srv_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateShaderResourceView(G.device, r->resource, NULL, r->srv_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_UAV) {
r->uav_descriptor = dx12_descriptor_alloc(G.cbv_srv_uav_heap);
ID3D12Device_CreateUnorderedAccessView(G.device, r->resource, NULL, NULL, r->uav_descriptor->handle);
}
if (view_flags & DX12_RESOURCE_VIEW_FLAG_RTV) {
r->rtv_descriptor = dx12_descriptor_alloc(G.rtv_heap);
ID3D12Device_CreateRenderTargetView(G.device, r->resource, NULL, r->rtv_descriptor->handle);
}
return r;
}
INTERNAL enum D3D12_RESOURCE_STATES dx12_resource_barrier(ID3D12GraphicsCommandList *cl, struct dx12_resource *resource, enum D3D12_RESOURCE_STATES state)
{
enum D3D12_RESOURCE_STATES old_state = resource->state;
if (state != resource->state) {
struct D3D12_RESOURCE_TRANSITION_BARRIER rtb = ZI;
rtb.pResource = resource->resource;
rtb.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
rtb.StateBefore = resource->state;
rtb.StateAfter = state;
struct D3D12_RESOURCE_BARRIER rb = ZI;
rb.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
rb.Flags = 0;
rb.Transition = rtb;
ID3D12GraphicsCommandList_ResourceBarrier(cl, 1, &rb);
resource->state = state;
}
return old_state;
}
INTERNAL void dx12_resource_release(struct dx12_resource *t)
{
(UNUSED)t;
}
struct gpu_handle gpu_texture_alloc(enum gpu_texture_format format, u32 flags, struct v2i32 size, void *initial_data)
{
LOCAL_PERSIST const DXGI_FORMAT formats[] = {
[GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM] = DXGI_FORMAT_R8G8B8A8_UNORM,
[GPU_TEXTURE_FORMAT_R8G8B8A8_UNORM_SRGB] = DXGI_FORMAT_R8G8B8A8_UNORM_SRGB
};
DXGI_FORMAT dxgi_format = 0;
if (format < (i32)ARRAY_COUNT(formats)) {
dxgi_format = formats[format];
}
if (format == 0) {
/* TODO: Don't panic */
sys_panic(LIT("Tried to create texture with unknown format"));
}
enum dx12_resource_view_flags view_flags = DX12_RESOURCE_VIEW_FLAG_SRV;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
D3D12_HEAP_FLAGS heap_flags = D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
D3D12_RESOURCE_DESC desc = ZI;
desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
desc.Alignment = 0;
desc.Width = size.x;
desc.Height = size.y;
desc.DepthOrArraySize = 1;
desc.MipLevels = 1;
desc.Format = dxgi_format;
desc.SampleDesc.Count = 1;
desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
if (flags & GPU_TEXTURE_FLAG_TARGETABLE) {
desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
view_flags |= DX12_RESOURCE_VIEW_FLAG_RTV;
}
D3D12_RESOURCE_STATES initial_state = D3D12_RESOURCE_STATE_COPY_DEST;
struct dx12_resource *r = dx12_resource_alloc(heap_props, heap_flags, desc, initial_state, view_flags);
(UNUSED)initial_data;
return handle_alloc(DX12_HANDLE_KIND_RESOURCE, r);
}
void gpu_texture_clear(struct gpu_handle target_resource, u32 clear_color)
{
(UNUSED)target_resource;
(UNUSED)clear_color;
}
struct v2i32 gpu_texture_get_size(struct gpu_handle resource)
{
(UNUSED)resource;
struct v2i32 res = ZI;
return res;
}
/* ========================== *
* Dispatch
* ========================== */
/* TODO: Move command list off of dispatch state */
struct dx12_dispatch_state {
struct arena arena;
ID3D12CommandAllocator *ca_direct;
ID3D12GraphicsCommandList *cl_direct;
};
INTERNAL struct dx12_dispatch_state *dx12_dispatch_state_alloc(void)
{
HRESULT hr = 0;
struct dx12_dispatch_state *ds = NULL;
{
struct arena arena = arena_alloc(MEGABYTE(64));
ds = arena_push(&arena, struct dx12_dispatch_state);
ds->arena = arena;
}
hr = ID3D12Device_CreateCommandAllocator(G.device, D3D12_COMMAND_LIST_TYPE_DIRECT, &IID_ID3D12CommandAllocator, (void **)&ds->ca_direct);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command allocator"));
}
hr = ID3D12Device_CreateCommandList(G.device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT, ds->ca_direct, NULL, &IID_ID3D12GraphicsCommandList, (void **)&ds->cl_direct);
if (FAILED(hr)) {
sys_panic(LIT("Failed to create command list"));
}
hr = ID3D12GraphicsCommandList_Close(ds->cl_direct);
if (FAILED(hr)) {
sys_panic(LIT("Failed to close command list during initialization"));
}
return ds;
}
struct gpu_handle gpu_dispatch_state_alloc(void)
{
struct dx12_dispatch_state *ds = dx12_dispatch_state_alloc();
return handle_alloc(DX12_HANDLE_KIND_DISPATCH_STATE, ds);
}
void gpu_dispatch(struct gpu_handle gpu_dispatch_state, struct gpu_dispatch_params params)
{
HRESULT hr = 0;
/* Viewport */
struct rect viewport = params.draw_target_viewport;
struct D3D12_VIEWPORT d3d12_viewport = ZI;
d3d12_viewport.TopLeftX = viewport.x;
d3d12_viewport.TopLeftY = viewport.y;
d3d12_viewport.Width = viewport.width;
d3d12_viewport.Height = viewport.height;
d3d12_viewport.MinDepth = 0.0f;
d3d12_viewport.MaxDepth = 1.0f;
/* Scissor */
D3D12_RECT d3d12_scissor = ZI;
d3d12_scissor.left = viewport.x;
d3d12_scissor.top = viewport.y;
d3d12_scissor.right = viewport.x + viewport.width;
d3d12_scissor.bottom = viewport.y + viewport.height;
struct dx12_dispatch_state *dispatch_state = handle_get_data(gpu_dispatch_state, DX12_HANDLE_KIND_DISPATCH_STATE);
struct dx12_plan *plan = handle_get_data(params.plan, DX12_HANDLE_KIND_PLAN);
struct dx12_resource *target = handle_get_data(params.draw_target, DX12_HANDLE_KIND_RESOURCE);
ID3D12CommandQueue *cq = G.cq_direct;
ID3D12CommandAllocator *ca = dispatch_state->ca_direct;
ID3D12GraphicsCommandList *cl = dispatch_state->cl_direct;
/* FIXME: Use fence to ensure command allocator has finished execution on GPU before resetting */
hr = ID3D12CommandAllocator_Reset(ca);
if (FAILED(hr)) {
sys_panic(LIT("Failed to reset command allocator"));
}
hr = ID3D12GraphicsCommandList_Reset(cl, ca, NULL);
if (FAILED(hr)) {
sys_panic(LIT("Failed to reset command list"));
}
/* Create temporary srv heap */
struct dx12_gpu_descriptor_heap *temp_descriptor_heap = dx12_gpu_descriptor_heap_alloc(G.cbv_srv_uav_heap);
/* Material pass */
{
u32 instance_count = 0;
(UNUSED)plan;
//struct pipeline *pipeline = dx12_get_pipeline(pipeline_scope, LIT("material"));
struct pipeline *pipeline = &G.test_pipeline;
/* Bind pipeline */
ID3D12GraphicsCommandList_SetPipelineState(cl, pipeline->pso);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(cl, pipeline->rootsig);
/* Bind constant buffer */
/* TODO */
/* Bind srv heap */
ID3D12DescriptorHeap *heaps[] = { temp_descriptor_heap->heap };
ID3D12GraphicsCommandList_SetDescriptorHeaps(cl, ARRAY_COUNT(heaps), heaps);
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(cl, 2, temp_descriptor_heap->gpu_handle);
/* Setup Rasterizer State */
ID3D12GraphicsCommandList_RSSetViewports(cl, 1, &d3d12_viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(cl, 1, &d3d12_scissor);
/* Transition render target */
enum D3D12_RESOURCE_STATES old_state = dx12_resource_barrier(cl, target, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_OMSetRenderTargets(cl, 1, &target->rtv_descriptor->handle, false, NULL);
//f32 clear_color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
//ID3D12GraphicsCommandList_ClearRenderTargetView(cl, rtvHandle, clearColor, 0, nullptr);
/* Draw */
ID3D12GraphicsCommandList_IASetPrimitiveTopology(cl, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_IASetVertexBuffers(cl, 0, 1, &G.dummy_vertex_buffer_view);
ID3D12GraphicsCommandList_IASetIndexBuffer(cl, &G.quad_index_buffer_view);
ID3D12GraphicsCommandList_DrawIndexedInstanced(cl, 6, instance_count, 0, 0, 0);
/* Reset render target */
dx12_resource_barrier(cl, target, old_state);
}
/* Execute command list */
hr = ID3D12GraphicsCommandList_Close(cl);
if (FAILED(hr)) {
sys_panic(LIT("Failed to close command list before execution"));
}
dx12_gpu_descriptor_heap_release(temp_descriptor_heap, cq);
#if 0
__prof;
struct sprite_scope *sprite_scope = sprite_scope_begin();
struct dx11_dispatch_state *state = (struct dx11_dispatch_state *)gpu_dispatch_state.v;
struct rect viewport = params.draw_target_viewport;
/* Set viewport */
D3D11_VIEWPORT d3d11_viewport = ZI;
d3d11_viewport.Width = viewport.width;
d3d11_viewport.Height = viewport.height;
d3d11_viewport.MinDepth = 0.0f;
d3d11_viewport.MaxDepth = 1.0f;
d3d11_viewport.TopLeftX = viewport.x;
d3d11_viewport.TopLeftY = viewport.y;
ID3D11DeviceContext_RSSetViewports(G.devcon, 1, &d3d11_viewport);
struct dx12_resource *final_tex = (struct dx12_resource *)params.draw_target.v;
struct v2i32 final_tex_size = final_tex->size;
/* Texture pass */
{
__profscope(Texture pass);
struct dx11_shader *shader = &G.shaders[DX11_SHADER_KIND_TEXTURE];
if (shader->valid) {
struct dx12_resource *texture = NULL;
if (cmd->texture.texture.v) {
/* Load texture if handle is set */
texture = (struct dx12_resource *)cmd->texture.texture.v;
} else if (cmd->texture.sprite.hash) {
/* Otherwise load sprite */
struct sprite_texture *sprite_texture = sprite_texture_from_tag_async(sprite_scope, cmd->texture.sprite);
if (sprite_texture->loaded) {
texture = (struct dx12_resource *)sprite_texture->texture.v;
}
}
if (texture && texture->srv) {
struct dx11_buffer *instance_buffer = list->buffers.texture.instance_buffer;
u32 instance_offset = cmd->texture.instance_offset;
u32 instance_count = cmd->texture.instance_count;
/* Bind shader */
ID3D11DeviceContext_VSSetShader(G.devcon, shader->vs, 0, 0);
ID3D11DeviceContext_PSSetShader(G.devcon, shader->ps, 0, 0);
/* Fill & bind constant buffer */
{
struct dx11_texture_uniform *uniform = dx11_buffer_push(constant_buffer, sizeof(struct dx11_texture_uniform));
uniform->vp = vp_matrix;
uniform->instance_offset = instance_offset;
dx11_buffer_submit(constant_buffer);
}
ID3D11DeviceContext_VSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer);
ID3D11DeviceContext_PSSetConstantBuffers(G.devcon, 0, 1, &constant_buffer->gpu_buffer);
/* Bind dummy vertex buffer */
u32 zero = 0;
ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &G.dummy_vertex_buffer->gpu_buffer, &zero, &zero);
ID3D11DeviceContext_IASetIndexBuffer(G.devcon, G.quad_index_buffer->gpu_buffer, DXGI_FORMAT_R16_UINT, zero);
/* Bind SRVs */
ID3D11ShaderResourceView *srvs[] = { instance_buffer->srv, texture->srv };
ID3D11DeviceContext_VSSetShaderResources(G.devcon, 0, ARRAY_COUNT(srvs), srvs);
ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, ARRAY_COUNT(srvs), srvs);
/* Bind RTVs */
ID3D11RenderTargetView *rtvs[] = { final_tex->rtv };
ID3D11DeviceContext_OMSetRenderTargets(G.devcon, ARRAY_COUNT(rtvs), rtvs, NULL);
/* Draw */
ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D11DeviceContext_DrawIndexedInstanced(G.devcon, 6, instance_count, 0, 0, 0);
/* Unbind */
dx11_unbind(DX11_UNBIND_VS | DX11_UNBIND_PS | DX11_UNBIND_CBUFF | DX11_UNBIND_VBUFF | DX11_UNBIND_IBUFF | DX11_UNBIND_SRV | DX11_UNBIND_RTV);
}
}
}
sprite_scope_end(sprite_scope);
#endif
}
#else
void gpu_dispatch(struct gpu_handle gpu_dispatch_state, struct gpu_dispatch_params params)
{
(UNUSED)gpu_dispatch_state;
(UNUSED)params;
(UNUSED)handle_get_data;
}
#endif
/* ========================== *
* Backbuffer
* ========================== */
struct gpu_handle gpu_recreate_backbuffer(struct v2i32 size)
{
(UNUSED)size;
struct gpu_handle res = ZI;
return res;
}
void gpu_present(i32 vsync)
{
(UNUSED)vsync;
}
#endif