power_play/src/renderer_d3d11.c

1096 lines
36 KiB
C

#include "renderer.h"
#include "sys.h"
#include "memory.h"
#include "arena.h"
#include "scratch.h"
#include "string.h"
#include "math.h"
#include "inc.h"
#include "tar.h"
#include "sprite.h"
#pragma warning(push, 0)
# define UNICODE
# define CINTERFACE
# define COBJMACROS
# include <Windows.h>
# include <d3d11.h>
# include <d3dcompiler.h>
# include <dxgidebug.h>
# include <dxgi1_3.h>
#pragma warning(pop)
#pragma comment(lib, "d3d11")
#pragma comment(lib, "dxguid")
#pragma comment(lib, "d3dcompiler")
#define MAX_CANVASES 1024
/* FIXME: Enable this and resolve unreleased references */
//#define D3D11_DEBUG RTC
#define D3D11_DEBUG 0
struct dx11_shader {
enum shader_kind kind;
u32 vertex_size;
ID3D11InputLayout *input_layout;
ID3D11VertexShader *vs;
ID3D11PixelShader *ps;
};
struct dx11_constant_buffer_data {
struct mat4x4 vp;
};
struct dx11_buffer {
u32 vertex_count;
u32 index_count;
u8 *cpu_vertex_buffer; /* Array of homogeneous vertices (size depends on shader) */
vidx *cpu_index_buffer; /* Array of vertex indices into cpu_vertex_buffer */
struct arena vertex_arena;
struct arena index_arena;
u32 gpu_vertex_buffer_capacity;
u32 gpu_index_buffer_capacity;
ID3D11Buffer *gpu_vertex_buffer;
ID3D11Buffer *gpu_index_buffer;
};
struct renderer_cmd {
struct dx11_shader *shader;
struct renderer_handle texture_handle; /* Overrides sprite */
struct sprite_tag sprite;
/* Associated buffer data */
u32 vertex_count;
u32 index_count;
u32 vertex_offset;
u32 index_offset;
b32 offsets_set;
struct renderer_cmd *next;
};
struct cmd_store {
struct renderer_cmd *cmd_first;
struct renderer_cmd *cmd_last;
struct arena arena;
};
struct renderer_canvas {
struct dx11_buffer buffers[NUM_SHADERS];
struct cmd_store cpu_cmd_store;
struct cmd_store gpu_cmd_store;
struct xform view;
b32 valid; /* False if uninitialized (in sparse array) */
};
INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height);
/* ========================== *
* Global state
* ========================== */
struct handle_slot {
u64 idx;
u64 gen;
void *data;
struct handle_slot *next_free;
};
struct handle_store {
struct sys_mutex mutex;
struct arena arena;
struct handle_slot *head_free;
struct handle_slot *array;
u64 count;
};
struct dx11_shader_desc {
char *name_cstr;
u32 vertex_size;
D3D11_INPUT_ELEMENT_DESC input_layout_desc[8]; /* NULL terminated array */
};
GLOBAL struct {
struct arena arena;
struct tar_archive shaders_archive; /* Tar archive including shader sources */
ID3D11Device *dev;
ID3D11DeviceContext *devcon;
IDXGISwapChain1 *swapchain;
ID3D11RenderTargetView *backbuffer_view;
/* Here for caching/comparison */
struct v2 backbuffer_size;
struct rect viewport;
ID3D11BlendState *blend_state;
ID3D11RasterizerState *rasterizer_state;
ID3D11DepthStencilState *depth_stencil_state;
ID3D11SamplerState *sampler_state;
ID3D11Buffer *vs_constant_buffer;
struct handle_store handle_store;
/* Sparse array (canvas.valid) */
struct renderer_canvas canvases[MAX_CANVASES];
struct dx11_shader shaders[NUM_SHADERS];
struct dx11_shader_desc shader_info[NUM_SHADERS];
} G = ZI, DEBUG_ALIAS(G, G_renderer_d3d11);
/* ========================== *
* Util
* ========================== */
/* Calculate the view projection matrix */
INLINE struct mat4x4 calculate_vp(struct xform view, f32 viewport_width, f32 viewport_height)
{
struct mat4x4 projection = mat4x4_from_ortho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0);
struct mat4x4 view4x4 = mat4x4_from_xform(view);
return mat4x4_mul(projection, view4x4);
}
INTERNAL void send_constant_buffer_data(ID3D11Buffer *buffer, struct mat4x4 vp)
{
D3D11_MAPPED_SUBRESOURCE ms;
if (ID3D11DeviceContext_Map(G.devcon, (ID3D11Resource *)buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &ms) != S_OK) {
ASSERT(false);
return;
}
struct dx11_constant_buffer_data *data = (struct dx11_constant_buffer_data *)ms.pData;
MEMCPY(&data->vp, &vp, sizeof(vp));
ID3D11DeviceContext_Unmap(G.devcon, (ID3D11Resource *)buffer, 0);
}
/* ========================== *
* Handle
* ========================== */
/* Handle layout
* bits 0-32: Index
* bits 33-64: Generation */
#define HANDLE_IDX_MASK 0x00000000FFFFFFFF
#define HANDLE_GEN_MASK 0xFFFFFFFF00000000
#define HANDLE_IDX_MAX (U32_MAX)
#define HANDLE_GEN_MAX (U32_MAX)
#define HANDLE_CREATE(idx, gen) ((struct renderer_handle) { .v[0] = (u64)(gen) << 32 | ((u64)(idx) & 0xFFFFFFFF) } )
#define HANDLE_IDX(handle) ((u32)((handle).v[0] & HANDLE_IDX_MASK))
#define HANDLE_GEN(handle) ((u32)(((handle).v[0] & HANDLE_GEN_MASK) >> 32))
INTERNAL struct renderer_handle handle_alloc(void *data)
{
__prof;
struct handle_store *store = &G.handle_store;
struct handle_slot *slot = NULL;
struct sys_lock lock = sys_mutex_lock_e(&store->mutex);
{
if (store->head_free) {
/* Take first from free list */
slot = store->head_free;
store->head_free = slot->next_free;
slot->next_free = NULL;
++slot->gen;
} else {
/* Or push onto arena */
if (store->count + 1 >= HANDLE_IDX_MAX) {
sys_panic(STR("Maximum renderer handles exceeded"));
}
slot = arena_push_zero(&store->arena, struct handle_slot);
slot->idx = store->count;
slot->gen = 1;
++store->count;
}
slot->data = data;
}
sys_mutex_unlock(&lock);
struct renderer_handle handle = HANDLE_CREATE(slot->idx, slot->gen);
return handle;
}
INTERNAL void handle_release(struct renderer_handle handle)
{
__prof;
struct handle_store *store = &G.handle_store;
u32 idx = HANDLE_IDX(handle);
u32 gen = HANDLE_GEN(handle);
struct sys_lock lock = sys_mutex_lock_e(&store->mutex);
{
if (idx < store->count) {
struct handle_slot *slot = &store->array[idx];
if (slot->gen == gen) {
/* Insert into free list */
if (gen + 1 < HANDLE_GEN_MAX) {
slot->next_free = store->head_free;
store->head_free = slot;
} else {
/* Maximum generations exceeded. Not a runtime error since it
* shouldn't cause issues in practice (just can't recycle this handle).
* Still probably means there's a problem in the code. */
ASSERT(false);
}
++slot->gen;
} else {
/* Tried to release handle not in store (non-matching generation) */
ASSERT(false);
}
} else {
/* Tried to release out-of-bounds handle */
ASSERT(false);
}
}
sys_mutex_unlock(&lock);
}
INTERNAL void *handle_data(struct renderer_handle handle)
{
__prof;
void *data = NULL;
struct handle_store *store = &G.handle_store;
u32 idx = HANDLE_IDX(handle);
u32 gen = HANDLE_GEN(handle);
if (idx < store->count) {
struct handle_slot *slot = &store->array[idx];
if (slot->gen == gen) {
data = slot->data;
}
}
return data;
}
INTERNAL b32 handle_eq(struct renderer_handle h1, struct renderer_handle h2)
{
CT_ASSERT(sizeof(struct renderer_handle) == 8);
return h1.v[0] == h2.v[0];
}
INTERNAL b32 handle_is_nil(struct renderer_handle h)
{
return h.v[0] == 0;
}
/* ========================== *
* Shader
* ========================== */
/* TODO: don't do fatal error, just don't use shader */
INTERNAL void process_shader_compilation_error(ID3DBlob *error_blob)
{
struct temp_arena scratch = scratch_begin_no_conflict();
struct string error_prefix = string_copy(scratch.arena, STR("Failed to compile shader:\n"));
if (error_blob) {
char *compile_error_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob);
struct string error_msg = string_cat(scratch.arena, error_prefix, string_from_cstr(compile_error_cstr));
sys_panic(error_msg);
}
scratch_end(scratch);
}
INTERNAL void init_shader_table(void)
{
MEMZERO_ARRAY(G.shader_info);
G.shader_info[SHADER_TEXTURE] = (struct dx11_shader_desc) {
"shaders/texture.hlsl",
sizeof(struct texture_shader_vertex),
{
{ "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, FIELD_OFFSETOF(struct texture_shader_vertex, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, FIELD_OFFSETOF(struct texture_shader_vertex, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 },
{ "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, FIELD_OFFSETOF(struct texture_shader_vertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0 }
}
};
}
INTERNAL void shader_init(struct dx11_shader *shader, enum shader_kind kind)
{
__prof;
MEMZERO_STRUCT(shader);
struct temp_arena scratch = scratch_begin_no_conflict();
const struct dx11_shader_desc *shader_desc = &G.shader_info[kind];
shader->kind = kind;
shader->vertex_size = shader_desc->vertex_size;
u32 flags = D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS;
#if D3D11_DEBUG
flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
#else
flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
/* Compile shader */
ID3DBlob *vs_blob, *ps_blob;
{
struct string name = string_from_cstr(shader_desc->name_cstr);
struct tar_entry *tar_entry = tar_get(&G.shaders_archive, name);
if (!tar_entry) {
sys_panic(string_format(scratch.arena,
STR("Could not find shader \"%F\""),
FMT_STR(name)));
}
struct buffer shader_src = tar_entry->buff;
/* Compile shader */
/* TODO: pre-compile shaders w/ FXC? */
ID3DBlob *error_blob;
HRESULT v_res = D3DCompile(shader_src.data, shader_src.size, NULL, NULL, NULL, "vs_main", "vs_5_0", flags, 0, &vs_blob, &error_blob);
if (FAILED(v_res)) {
process_shader_compilation_error(error_blob);
}
HRESULT p_res = D3DCompile(shader_src.data, shader_src.size, NULL, NULL, NULL, "ps_main", "ps_5_0", flags, 0, &ps_blob, &error_blob);
if (FAILED(p_res)) {
process_shader_compilation_error(error_blob);
}
}
/* Get number of device layout elements from NULL terminated array */
u32 elem_count = 0;
for (; elem_count < ARRAY_COUNT(shader_desc->input_layout_desc); ++elem_count) {
const D3D11_INPUT_ELEMENT_DESC *d = &shader_desc->input_layout_desc[elem_count];
if (d->SemanticName == NULL) {
break;
}
}
/* Create device layout */
ID3D11Device_CreateInputLayout(G.dev, shader_desc->input_layout_desc, elem_count, ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), &shader->input_layout);
/* Create shader */
ID3D11Device_CreateVertexShader(G.dev, ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), NULL, &shader->vs);
ID3D11Device_CreatePixelShader(G.dev, ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), NULL, &shader->ps);
ID3D10Blob_Release(vs_blob);
ID3D10Blob_Release(ps_blob);
scratch_end(scratch);
}
/* ========================== *
* Startup
* ========================== */
struct renderer_startup_receipt renderer_startup(struct sys_window *window)
{
__profscope(initializing_d3d11);
G.arena = arena_alloc(GIGABYTE(64));
/* Allocate store */
G.handle_store.arena = arena_alloc(GIGABYTE(64));
G.handle_store.array = (struct handle_slot *)G.handle_store.arena.base;
G.handle_store.mutex = sys_mutex_alloc();
/* Load shader archive */
struct buffer embedded_data = inc_shaders_tar();
if (embedded_data.size > 0) {
G.shaders_archive = tar_parse(&G.arena, embedded_data, STR("shaders/"));
}
/* Initialize shader table */
init_shader_table();
HRESULT hr;
ID3D11Device *device;
ID3D11DeviceContext *context;
IDXGISwapChain1 *swapchain;
/* Create D3D11 device & context */
{
#if D3D11_DEBUG
u32 flags = D3D11_CREATE_DEVICE_DEBUG : 0;
#else
u32 flags = 0;
#endif
D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_0 };
hr = D3D11CreateDevice(
NULL,
D3D_DRIVER_TYPE_HARDWARE,
NULL,
flags,
levels,
ARRAY_COUNT(levels),
D3D11_SDK_VERSION,
&device,
NULL,
&context
);
ASSERT(SUCCEEDED(hr));
}
#if D3D11_DEBUG
/* Enable debug break on API errors */
{
ID3D11InfoQueue *info;
ID3D11Device_QueryInterface(device, &IID_ID3D11InfoQueue, (void **)&info);
ID3D11InfoQueue_SetBreakOnSeverity(info, D3D11_MESSAGE_SEVERITY_CORRUPTION, TRUE);
ID3D11InfoQueue_SetBreakOnSeverity(info, D3D11_MESSAGE_SEVERITY_ERROR, TRUE);
ID3D11InfoQueue_Release(info);
}
/* Enable debug break for DXGI too */
{
IDXGIInfoQueue *dxgiInfo;
hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgiInfo);
ASSERT(SUCCEEDED(hr));
IDXGIInfoQueue_SetBreakOnSeverity(dxgiInfo, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE);
IDXGIInfoQueue_SetBreakOnSeverity(dxgiInfo, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE);
IDXGIInfoQueue_Release(dxgiInfo);
}
#endif
/* Create swap chain */
{
HWND hwnd = (HWND)sys_window_get_internal_handle(window);
/* Get DXGI device from D3D11 device */
IDXGIDevice *dxgiDevice;
hr = ID3D11Device_QueryInterface(device, &IID_IDXGIDevice, (void **)&dxgiDevice);
ASSERT(SUCCEEDED(hr));
/* Get DXGI adapter from DXGI device */
IDXGIAdapter *dxgiAdapter;
hr = IDXGIDevice_GetAdapter(dxgiDevice, &dxgiAdapter);
ASSERT(SUCCEEDED(hr));
/* Get DXGI factory from DXGI adapter */
IDXGIFactory2 *factory;
hr = IDXGIAdapter_GetParent(dxgiAdapter, &IID_IDXGIFactory2, (void **)&factory);
ASSERT(SUCCEEDED(hr));
DXGI_SWAP_CHAIN_DESC1 desc = {
.Format = DXGI_FORMAT_R8G8B8A8_UNORM,
.SampleDesc = { 1, 0 },
.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT,
.BufferCount = 2,
.Scaling = DXGI_SCALING_NONE,
/* Use more efficient FLIP presentation model.
* Windows 10 allows to use DXGI_SWAP_EFFECT_FLIP_DISCARD
* For Windows 8 compatibility use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL
* For Windows 7 compatibility use DXGI_SWAP_EFFECT_DISCARD
*/
.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD,
};
hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *)device, hwnd, &desc, NULL, NULL, &swapchain);
ASSERT(SUCCEEDED(hr));
/* Disable Alt+Enter changing monitor resolution to match window size */
IDXGIFactory_MakeWindowAssociation(factory, hwnd, DXGI_MWA_NO_ALT_ENTER);
IDXGIFactory2_Release(factory);
IDXGIAdapter_Release(dxgiAdapter);
IDXGIDevice_Release(dxgiDevice);
}
if (!SUCCEEDED(hr) || !device || !context || !swapchain) {
/* Renderer initialization failure */
/* TODO: Better message */
sys_panic(STR("Failed to initialize renderer"));
}
G.dev = device;
G.devcon = context;
G.swapchain = swapchain;
/* Create the blending setup */
{
__profscope(create_blend_state);
const f32 blend_factor[4] = { 0.f, 0.f, 0.f, 0.f };
/* TODO: Actually go over these (just want alpha blending/transparency) */
D3D11_BLEND_DESC desc = {
.AlphaToCoverageEnable = false,
.RenderTarget[0].BlendEnable = true,
.RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA,
.RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA,
.RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD,
.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE,
.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA,
.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD,
.RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL
};
/* FIXME: Free this? */
ID3D11Device_CreateBlendState(G.dev, &desc, &G.blend_state);
ID3D11DeviceContext_OMSetBlendState(G.devcon, G.blend_state, blend_factor, 0xffffffff);
}
/* Create depth-stencil State */
{
__profscope(create_depth_stencil_state);
/* TODO: Actually go over these (copied from elsewhere) */
D3D11_DEPTH_STENCIL_DESC desc = ZI;
desc.DepthEnable = false;
desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL;
desc.DepthFunc = D3D11_COMPARISON_ALWAYS;
desc.StencilEnable = false;
desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP;
desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS;
desc.BackFace = desc.FrontFace;
/* FIXME: Free this? */
ID3D11Device_CreateDepthStencilState(G.dev, &desc, &G.depth_stencil_state);
ID3D11DeviceContext_OMSetDepthStencilState(G.devcon, G.depth_stencil_state, 0);
}
/* Create the rasterizer state */
{
__profscope(create_rasterizer_state);
D3D11_RASTERIZER_DESC desc = {
.FillMode = D3D11_FILL_SOLID,
.CullMode = D3D11_CULL_NONE,
//.ScissorEnable = true,
.DepthClipEnable = true
};
/* FIXME: Free this? */
ID3D11Device_CreateRasterizerState(G.dev, &desc, &G.rasterizer_state);
ID3D11DeviceContext_RSSetState(G.devcon, G.rasterizer_state);
}
/* Create the sampler state */
{
__profscope(create_sampler_state);
D3D11_SAMPLER_DESC desc = {
//.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR,
.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT,
.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
.MaxAnisotropy = 1,
//.ComparisonFunc = D3D11_COMPARISON_ALWAYS,
.MaxLOD = D3D11_FLOAT32_MAX
};
/* FIXME: Free this? */
ID3D11Device_CreateSamplerState(G.dev, &desc, &G.sampler_state);
ID3D11DeviceContext_PSSetSamplers(G.devcon, 0, 1, &G.sampler_state);
}
/* Create the constant buffer */
{
__profscope(create_const_buffer);
D3D11_BUFFER_DESC desc = {
.ByteWidth = sizeof(struct dx11_constant_buffer_data),
.Usage = D3D11_USAGE_DYNAMIC,
.BindFlags = D3D11_BIND_CONSTANT_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
.MiscFlags = 0
};
ID3D11Device_CreateBuffer(G.dev, &desc, NULL, &G.vs_constant_buffer);
/* Apparently ByteWidth needs to be in multiples of 16? */
ASSERT(desc.ByteWidth % 16 == 0);
}
/* Init shaders */
for (u32 i = 1; i < NUM_SHADERS; ++i) {
/* Create shader */
shader_init(&G.shaders[i], i);
}
return (struct renderer_startup_receipt) { 0 };
}
/* ========================== *
* Canvas
* ========================== */
struct renderer_canvas *renderer_canvas_alloc(void)
{
struct renderer_canvas *canvas = NULL;
for (u32 i = 0; i < MAX_CANVASES; ++i) {
if (!G.canvases[i].valid) {
canvas = &G.canvases[i];
break;
}
}
if (!canvas) {
sys_panic(STR("Max renderer canvases reached"));
return NULL;
}
MEMZERO_STRUCT(canvas);
canvas->cpu_cmd_store.arena = arena_alloc(GIGABYTE(8));
canvas->gpu_cmd_store.arena = arena_alloc(GIGABYTE(8));
canvas->view = xform_from_trs(TRS());
canvas->valid = true;
/* Initialize buffers, skipping index 0 (SHADER_NONE) */
for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) {
struct dx11_buffer *buffer = &canvas->buffers[i];
buffer->vertex_arena = arena_alloc(GIGABYTE(8));
buffer->index_arena = arena_alloc(GIGABYTE(8));
buffer->cpu_vertex_buffer = arena_dry_push(&buffer->vertex_arena, u8);
buffer->cpu_index_buffer = arena_dry_push(&buffer->index_arena, vidx);
}
return canvas;
}
void renderer_canvas_release(struct renderer_canvas *canvas)
{
canvas->valid = false;
arena_release(&canvas->cpu_cmd_store.arena);
arena_release(&canvas->gpu_cmd_store.arena);
/* Destroy buffers, skipping index 0 (SHADER_NONE) */
for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) {
struct dx11_buffer *buffer = &canvas->buffers[i];
arena_release(&buffer->vertex_arena);
arena_release(&buffer->index_arena);
/* FIXME: Clear GPU buffers */
}
}
void renderer_canvas_set_view(struct renderer_canvas *canvas, struct xform view)
{
canvas->view = view;
}
u32 renderer_canvas_push_vertices(struct renderer_canvas *canvas, u8 **vertices_out, vidx **indices_out, u32 vertices_count, u32 indices_count)
{
struct renderer_cmd *cmd = canvas->cpu_cmd_store.cmd_last;
if (!cmd) {
/* Tried to draw to canvas with no active draw cmd */
ASSERT(false);
return 0;
}
struct dx11_shader *shader = cmd->shader;
struct dx11_buffer *buffer = &canvas->buffers[shader->kind];
if (!cmd->offsets_set) {
cmd->vertex_offset = buffer->vertex_count;
cmd->index_offset = buffer->index_count;
cmd->offsets_set = true;
}
u32 first_vertex_index = cmd->vertex_count;
cmd->vertex_count += vertices_count;
cmd->index_count += indices_count;
buffer->vertex_count += vertices_count;
buffer->index_count += indices_count;
*vertices_out = arena_push_array(&buffer->vertex_arena, u8, shader->vertex_size * vertices_count);
*indices_out = arena_push_array(&buffer->index_arena, vidx, indices_count);
return first_vertex_index;
}
void renderer_canvas_ensure_texture_cmd(struct renderer_canvas *canvas, struct texture_shader_parameters params)
{
struct renderer_cmd *last_cmd = canvas->cpu_cmd_store.cmd_last;
if (!last_cmd || last_cmd->shader->kind != SHADER_TEXTURE ||
!handle_eq(last_cmd->texture_handle, params.texture_handle) ||
!sprite_tag_eq(last_cmd->sprite, params.sprite)) {
/* Command parameters are not the same, insert new command */
struct renderer_cmd *cmd = arena_push(&canvas->cpu_cmd_store.arena, struct renderer_cmd);
*cmd = (struct renderer_cmd){
.shader = &G.shaders[SHADER_TEXTURE],
.texture_handle = params.texture_handle,
.sprite = params.sprite
};
if (!canvas->cpu_cmd_store.cmd_first) {
canvas->cpu_cmd_store.cmd_first = cmd;
} else {
last_cmd->next = cmd;
}
canvas->cpu_cmd_store.cmd_last = cmd;
}
}
/* ========================== *
* Send canvas to GPU
* ========================== */
void renderer_canvas_send_to_gpu(struct renderer_canvas *canvas)
{
__prof;
/* Create / grow vertex buffers */
for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) {
struct dx11_buffer *buffer = &canvas->buffers[i];
struct dx11_shader *shader = &G.shaders[i];
u32 vertex_size = shader->vertex_size;
u32 index_size = sizeof(vidx);
if (buffer->vertex_count == 0 || buffer->index_count == 0) {
continue;
}
if (!buffer->gpu_vertex_buffer || buffer->gpu_vertex_buffer_capacity < buffer->vertex_count) {
buffer->gpu_vertex_buffer_capacity = buffer->vertex_count + 5000;
D3D11_BUFFER_DESC desc = {
.Usage = D3D11_USAGE_DYNAMIC,
.ByteWidth = buffer->gpu_vertex_buffer_capacity * vertex_size,
.BindFlags = D3D11_BIND_VERTEX_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
};
/* TODO: Assert res >= 0 (success) */
ID3D11Device_CreateBuffer(G.dev, &desc, NULL, &buffer->gpu_vertex_buffer);
}
/* Create / grow index buffer */
if (!buffer->gpu_index_buffer || buffer->gpu_index_buffer_capacity < buffer->index_count) {
buffer->gpu_index_buffer_capacity = buffer->index_count + 5000;
D3D11_BUFFER_DESC desc = {
.Usage = D3D11_USAGE_DYNAMIC,
.ByteWidth = buffer->gpu_index_buffer_capacity * index_size,
.BindFlags = D3D11_BIND_INDEX_BUFFER,
.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE
};
/* TODO: Assert res >= 0 (success) */
ID3D11Device_CreateBuffer(G.dev, &desc, NULL, &buffer->gpu_index_buffer);
}
/* Copy data to GPU */
D3D11_MAPPED_SUBRESOURCE vtx_resource, idx_resource;
ID3D11DeviceContext_Map(G.devcon, (ID3D11Resource *)buffer->gpu_vertex_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &vtx_resource);
ID3D11DeviceContext_Map(G.devcon, (ID3D11Resource *)buffer->gpu_index_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &idx_resource);
MEMCPY(vtx_resource.pData, buffer->cpu_vertex_buffer, buffer->vertex_count * vertex_size);
MEMCPY(idx_resource.pData, buffer->cpu_index_buffer, buffer->index_count * index_size);
ID3D11DeviceContext_Unmap(G.devcon, (ID3D11Resource *)buffer->gpu_vertex_buffer, 0);
ID3D11DeviceContext_Unmap(G.devcon, (ID3D11Resource *)buffer->gpu_index_buffer, 0);
/* Reset CPU buffers */
buffer->vertex_count = 0;
buffer->index_count = 0;
arena_reset(&buffer->vertex_arena);
arena_reset(&buffer->index_arena);
}
/* Swap CPU cmds to GPU store */
struct cmd_store temp = canvas->gpu_cmd_store;
canvas->gpu_cmd_store = canvas->cpu_cmd_store;
canvas->cpu_cmd_store = temp;
/* Reset CPU cmds */
canvas->cpu_cmd_store.cmd_first = NULL;
canvas->cpu_cmd_store.cmd_last = NULL;
arena_reset(&canvas->cpu_cmd_store.arena);
}
/* ========================== *
* Present canvas
* ========================== */
INTERNAL void resize_backbuffer(struct v2 size)
{
__prof;
/* TODO: error handling */
/* Release all outstanding references to the swap chain's buffers. */
if (G.backbuffer_view) {
ID3D11RenderTargetView_Release(G.backbuffer_view);
}
IDXGISwapChain_ResizeBuffers(G.swapchain, 0, (UINT)size.x, (UINT)size.y, DXGI_FORMAT_UNKNOWN, 0);
/* Get buffer and create a render-target-view. */
ID3D11Texture2D *backbuffer_texture = NULL;
IDXGISwapChain_GetBuffer(G.swapchain, 0, &IID_ID3D11Texture2D, (LPVOID *)&backbuffer_texture);
ID3D11Device_CreateRenderTargetView(G.dev, (ID3D11Resource *)backbuffer_texture, NULL, &G.backbuffer_view);
ID3D11Texture2D_Release(backbuffer_texture);
}
INTERNAL void resize_viewport(struct rect viewport)
{
D3D11_VIEWPORT d3d11_viewport = {
.Width = viewport.width,
.Height = viewport.height,
.MinDepth = 0.0f,
.MaxDepth = 1.0f,
.TopLeftX = viewport.x,
.TopLeftY = viewport.y
};
ID3D11DeviceContext_RSSetViewports(G.devcon, 1, &d3d11_viewport);
}
/* TODO: Lock canvas or at least global state? (in-case multi-threaded present).
* Another option is to store a separate device on each canvas (need to
* research if that is smart first).
*
* I'm thinking we may also just need to lock texture modification access while presenting */
void renderer_canvas_present(struct renderer_canvas **canvases, u32 canvases_count, struct v2 screen_size, struct rect viewport, i32 vsync, struct sprite_scope *sprite_scope)
{
__prof;
/* Resize back buffer */
if (!v2_eq(G.backbuffer_size, screen_size)) {
resize_backbuffer(screen_size);
G.backbuffer_size = screen_size;
}
if (!rect_eq(G.viewport, viewport)) {
resize_viewport(viewport);
G.viewport = viewport;
}
ID3D11DeviceContext_OMSetRenderTargets(G.devcon, 1, &G.backbuffer_view, NULL);
/* Clear back buffer */
f32 clear_color[4] = { 0.0f, 0.0f, 0.0f, 1.0f };
ID3D11DeviceContext_ClearRenderTargetView(G.devcon, G.backbuffer_view, clear_color);
/* Set draw mode */
ID3D11DeviceContext_IASetPrimitiveTopology(G.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
for (u32 i = 0; i < canvases_count; ++i) {
struct renderer_canvas *canvas = canvases[i];
/* Fill and set constant buffer
* NOTE: We're only doing this once per canvas, rather than once per draw call since
* the only constant right now is VP. */
struct mat4x4 vp_matrix = calculate_vp(canvas->view, viewport.width, viewport.height);
send_constant_buffer_data(G.vs_constant_buffer, vp_matrix);
ID3D11DeviceContext_VSSetConstantBuffers(G.devcon, 0, 1, &G.vs_constant_buffer);
struct dx11_shader *last_shader = NULL;
struct renderer_handle last_texture_handle = ZI;
for (struct renderer_cmd *cmd = canvas->gpu_cmd_store.cmd_first; cmd; cmd = cmd->next) {
struct dx11_shader *shader = cmd->shader;
struct dx11_buffer *buffer = &canvas->buffers[shader->kind];
b32 texture_loaded;
struct renderer_handle texture_handle;
if (handle_is_nil(cmd->texture_handle)) {
struct sprite_texture *sprite_texture = sprite_texture_from_tag_async(sprite_scope, cmd->sprite);
texture_loaded = sprite_texture->loaded;
texture_handle = sprite_texture->renderer_handle;
} else {
texture_loaded = true;
texture_handle = cmd->texture_handle;
}
if (texture_loaded) {
/* Activate shader */
if (shader != last_shader) {
ID3D11DeviceContext_VSSetShader(G.devcon, shader->vs, 0, 0);
ID3D11DeviceContext_PSSetShader(G.devcon, shader->ps, 0, 0);
ID3D11DeviceContext_IASetInputLayout(G.devcon, shader->input_layout);
last_shader = shader;
}
/* FIXME: what if texture_srv is 0? will this unset it correctly? */
/* Activate texture */
if (!handle_eq(texture_handle, last_texture_handle)) {
ID3D11ShaderResourceView *texture_srv = handle_data(texture_handle);
ID3D11DeviceContext_PSSetShaderResources(G.devcon, 0, 1, &texture_srv);
last_texture_handle = texture_handle;
}
u32 vertex_offset = cmd->vertex_offset;
u32 index_offset = cmd->index_offset;
u32 index_count = cmd->index_count;
/* Activate buffer */
u32 zero = 0;
UINT vertex_stride = shader->vertex_size;
ID3D11DeviceContext_IASetVertexBuffers(G.devcon, 0, 1, &buffer->gpu_vertex_buffer, &vertex_stride, &zero);
ID3D11DeviceContext_IASetIndexBuffer(G.devcon, buffer->gpu_index_buffer, DXGI_FORMAT_R32_UINT, zero);
/* Draw */
ID3D11DeviceContext_DrawIndexed(G.devcon, index_count, index_offset, vertex_offset);
}
}
}
/* Present */
{
__profscope(IDXGISwapchain_Present);
IDXGISwapChain1_Present(G.swapchain, vsync, 0);
__profframe(0);
}
renderer_capture_image_for_profiler(viewport.width, viewport.height);
}
/* ========================== *
* Texture
* ========================== */
struct renderer_handle renderer_texture_alloc(struct image_rgba data)
{
__prof;
/* Create texture */
ID3D11Texture2D *texture = NULL;
D3D11_TEXTURE2D_DESC desc = {
.Width = data.width,
.Height = data.height,
.MipLevels = 1,
.ArraySize = 1,
.Format = DXGI_FORMAT_R8G8B8A8_UNORM,
.SampleDesc.Count = 1,
.Usage = D3D11_USAGE_DEFAULT,
.BindFlags = D3D11_BIND_SHADER_RESOURCE,
.CPUAccessFlags = 0
};
D3D11_SUBRESOURCE_DATA subresource_data = {
.pSysMem = data.pixels,
.SysMemPitch = data.width * 4,
.SysMemSlicePitch = 0
};
ID3D11Device_CreateTexture2D(G.dev, &desc, &subresource_data, &texture);
/* Create srv */
ID3D11ShaderResourceView *texture_srv = NULL;
if (texture) {
D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = {
.Format = desc.Format,
.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D,
.Texture2D.MipLevels = desc.MipLevels,
.Texture2D.MostDetailedMip = 0
};
ID3D11Device_CreateShaderResourceView(G.dev, (ID3D11Resource *)texture, &shader_resource_view_desc, &texture_srv);
ID3D11Texture2D_Release(texture);
}
ASSERT(texture_srv != NULL);
struct renderer_handle handle = handle_alloc(texture_srv);
return handle;
}
void renderer_texture_release(struct renderer_handle handle)
{
__prof;
ID3D11ShaderResourceView *texture_srv = handle_data(handle);
if (texture_srv) {
ID3D11ShaderResourceView_Release(texture_srv);
}
handle_release(handle);
}
b32 renderer_texture_is_nil(struct renderer_handle handle)
{
return handle_is_nil(handle);
}
/* ========================== *
* Profiling frame capture
* ========================== */
/* FIXME: enable this */
#if PROFILING && PROFILING_CAPTURE_FRAME_IMAGE
#define CAP_WIDTH 320
#define CAP_HEIGHT 180
struct prof_cap {
ID3D11Texture2D *texture;
struct v2 size;
};
INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height)
{
__prof;
/* A rolling window of staging textures is used. This is because trying to
* map a texture immediately after copying the resource will cause the map
* to hang while it waits for the copy to finish.
*
* At the time of writing this code, 5 textures seems to be the sweet spot
* for performance.
*/
static struct prof_cap staging_caps[5] = ZI;
static u32 cap_index = 0;
static b32 ready_to_read = false;
ID3D11Texture2D *backbuffer = NULL;
IDXGISwapChain_GetBuffer(G.swapchain, 0, &IID_ID3D11Texture2D, (LPVOID *)&backbuffer);
struct prof_cap *write_cap = &staging_caps[cap_index];
*write_cap = (struct prof_cap) { .size = V2(width, height) };
{
D3D11_TEXTURE2D_DESC staging_desc;
ID3D11Texture2D_GetDesc(backbuffer, &staging_desc);
staging_desc.Usage = D3D11_USAGE_STAGING;
staging_desc.BindFlags = 0;
staging_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
ID3D11Device_CreateTexture2D(G.dev, &staging_desc, NULL, &write_cap->texture);
}
ID3D11DeviceContext_CopyResource(G.devcon, (ID3D11Resource *)write_cap->texture, (ID3D11Resource *)backbuffer);
ID3D11Texture2D_Release(backbuffer);
++cap_index;
if (cap_index >= ARRAY_COUNT(staging_caps)) {
cap_index = 0;
ready_to_read = true;
}
if (ready_to_read) {
struct prof_cap *read_cap = &staging_caps[cap_index];
{
D3D11_MAPPED_SUBRESOURCE res;
ID3D11DeviceContext_Map(G.devcon, (ID3D11Resource *)read_cap->texture, 0, D3D11_MAP_READ, 0, &res);
u32 final_width = CAP_WIDTH;
u32 final_height = CAP_HEIGHT;
f32 width_frequency = (f32)read_cap->size.x / (f32)final_width;
f32 height_frequency = (f32)read_cap->size.y / (f32)final_height;
{
struct temp_arena scratch = scratch_begin_no_conflict();
u32 *source = res.pData;
u32 *dest = arena_push_array(scratch.arena, u32, final_width * final_height);
u32 pitch = res.RowPitch / 4;
for (u32 y = 0; y < final_height; ++y) {
for (u32 x = 0; x < final_width; ++x) {
u32 *pixel = &dest[x + (y * final_width)];
u64 source_x = (u64)(width_frequency * (f32)x);
u64 source_y = (u64)(height_frequency * (f32)y);
*pixel = source[source_x + (source_y * pitch)];
}
}
{
__profscope(prof_frame_image);
__profframeimage(dest, (u16)final_width, (u16)final_height, ARRAY_COUNT(staging_caps) - 1, false);
}
scratch_end(scratch);
}
ID3D11DeviceContext_Unmap(G.devcon, (ID3D11Resource *)read_cap->texture, 0);
}
ID3D11Texture2D_Release(read_cap->texture);
}
}
#else
INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height)
{
(UNUSED)width;
(UNUSED)height;
}
#endif