#include "renderer.h" #include "sys.h" #include "app.h" #include "memory.h" #include "arena.h" #include "scratch.h" #include "texture.h" #include "string.h" #include "math.h" #include "inc.h" #include "tar.h" #include #pragma warning(push) #pragma warning(disable : 4201) #pragma warning(disable : 4115) #define CINTERFACE #define COBJMACROS #include #include #include #include #undef CINTERFACE #undef COBJMACROS #pragma warning(pop) #define MAX_CANVASES 1024 /* FIXME: Enable this and resolve unreleased references */ //#define D3D11_DEBUG RTC #define D3D11_DEBUG 0 struct dx11_shader { enum shader_kind kind; u32 vertex_size; ID3D11InputLayout *input_layout; ID3D11VertexShader *vs; ID3D11PixelShader *ps; }; struct dx11_constant_buffer_data { struct mat4x4 vp; }; struct dx11_buffer { u32 vertex_count; u32 index_count; u8 *cpu_vertex_buffer; /* Array of homogeneous vertices (size depends on shader) */ vidx *cpu_index_buffer; /* Array of vertex indices into cpu_vertex_buffer */ struct arena vertex_arena; struct arena index_arena; u32 gpu_vertex_buffer_capacity; u32 gpu_index_buffer_capacity; ID3D11Buffer *gpu_vertex_buffer; ID3D11Buffer *gpu_index_buffer; }; struct renderer_cmd { struct dx11_shader *shader; struct renderer_handle texture; /* Associated buffer data */ u32 vertex_count; u32 index_count; u32 vertex_offset; u32 index_offset; b32 offsets_set; struct renderer_cmd *next; }; struct cmd_store { struct renderer_cmd *cmd_first; struct renderer_cmd *cmd_last; struct arena arena; }; struct renderer_canvas { struct dx11_buffer buffers[NUM_SHADERS]; struct cmd_store cpu_cmd_store; struct cmd_store gpu_cmd_store; struct xform view; b32 valid; /* False if uninitialized (in sparse array) */ }; INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height); /* ========================== * * Global state * ========================== */ struct handle_slot { u64 idx; u64 gen; void *data; struct handle_slot *next_free; }; struct handle_store { struct sys_mutex mutex; struct arena arena; struct handle_slot *head_free; struct handle_slot *array; u64 count; }; struct dx11_shader_desc { char *name_cstr; u32 vertex_size; D3D11_INPUT_ELEMENT_DESC input_layout_desc[8]; /* NULL terminated array */ }; GLOBAL struct { b32 initialized; struct arena arena; struct tar_archive shaders_archive; /* Tar archive including shader sources */ ID3D11Device *dev; ID3D11DeviceContext *devcon; IDXGISwapChain1 *swapchain; ID3D11RenderTargetView *backbuffer_view; /* Here for caching/comparison */ struct v2 backbuffer_size; struct rect viewport; ID3D11BlendState *blend_state; ID3D11RasterizerState *rasterizer_state; ID3D11DepthStencilState *depth_stencil_state; ID3D11SamplerState *sampler_state; ID3D11Buffer *vs_constant_buffer; struct handle_store handle_store; /* Sparse array (canvas.valid) */ struct renderer_canvas canvases[MAX_CANVASES]; struct dx11_shader shaders[NUM_SHADERS]; struct dx11_shader_desc shader_info[NUM_SHADERS]; } L = { 0 }, DEBUG_LVAR(L_renderer_d3d11); /* ========================== * * Util * ========================== */ /* Calculate the view projection matrix */ INLINE struct mat4x4 calculate_vp(struct xform view, f32 viewport_width, f32 viewport_height) { struct mat4x4 projection = mat4x4_from_ortho(0.0, viewport_width, viewport_height, 0.0, -1.0, 1.0); struct mat4x4 view4x4 = mat4x4_from_xform(view); return mat4x4_mul(projection, view4x4); } INTERNAL void send_constant_buffer_data(ID3D11Buffer *buffer, struct mat4x4 vp) { D3D11_MAPPED_SUBRESOURCE ms; if (ID3D11DeviceContext_Map(L.devcon, (ID3D11Resource *)buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &ms) != S_OK) { ASSERT(false); return; } struct dx11_constant_buffer_data *data = (struct dx11_constant_buffer_data *)ms.pData; MEMCPY(&data->vp, &vp, sizeof(vp)); ID3D11DeviceContext_Unmap(L.devcon, (ID3D11Resource *)buffer, 0); } /* ========================== * * Handle * ========================== */ /* Handle layout * bits 0-32: Index * bits 33-64: Generation */ #define HANDLE_IDX_MASK 0x00000000FFFFFFFF #define HANDLE_GEN_MASK 0xFFFFFFFF00000000 #define HANDLE_IDX_MAX (U32_MAX) #define HANDLE_GEN_MAX (U32_MAX) #define HANDLE_CREATE(idx, gen) ((struct renderer_handle) { .v[0] = (u64)(gen) << 32 | ((u64)(idx) & 0xFFFFFFFF) } ) #define HANDLE_IDX(handle) ((u32)((handle).v[0] & HANDLE_IDX_MASK)) #define HANDLE_GEN(handle) ((u32)(((handle).v[0] & HANDLE_GEN_MASK) >> 32)) INTERNAL struct renderer_handle handle_alloc(void *data) { __prof; struct handle_store *store = &L.handle_store; struct handle_slot *slot = NULL; sys_mutex_lock(&store->mutex); { if (store->head_free) { /* Take first from free list */ slot = store->head_free; store->head_free = slot->next_free; slot->next_free = NULL; } else { /* Or push onto arena */ if (store->count + 1 >= HANDLE_IDX_MAX) { sys_panic(STR("Maximum renderer handles exceeded")); } slot = arena_push_zero(&store->arena, struct handle_slot); slot->idx = store->count; slot->gen = 1; ++store->count; } slot->data = data; } sys_mutex_unlock(&store->mutex); struct renderer_handle handle = HANDLE_CREATE(slot->idx, slot->gen); return handle; } INTERNAL void handle_release(struct renderer_handle handle) { __prof; struct handle_store *store = &L.handle_store; u32 idx = HANDLE_IDX(handle); u32 gen = HANDLE_GEN(handle); sys_mutex_lock(&store->mutex); { if (idx < store->count) { struct handle_slot *slot = &store->array[idx]; if (slot->gen == gen) { /* Insert into free list */ if (gen + 1 < HANDLE_GEN_MAX) { slot->next_free = store->head_free; store->head_free = slot; } else { /* Maximum generations exceeded. Not a runtime error since it * shouldn't cause issues in practice (just can't recycle this handle). * Still probably means there's a problem in the code. */ ASSERT(false); } ++slot->gen; } else { /* Tried to release handle not in store (non-matching generation) */ ASSERT(false); } } else { /* Tried to release out-of-bounds handle */ ASSERT(false); } } sys_mutex_unlock(&store->mutex); } INTERNAL void *handle_data(struct renderer_handle handle) { __prof; void *data = NULL; struct handle_store *store = &L.handle_store; u32 idx = HANDLE_IDX(handle); u32 gen = HANDLE_GEN(handle); if (idx < store->count) { struct handle_slot *slot = &store->array[idx]; if (slot->gen == gen) { data = slot->data; } } return data; } INTERNAL b32 handle_eq(struct renderer_handle h1, struct renderer_handle h2) { return h1.v[0] == h2.v[0]; } /* ========================== * * Shader * ========================== */ /* TODO: don't do fatal error, just don't use shader */ INTERNAL void process_shader_compilation_error(ID3DBlob *error_blob) { struct temp_arena scratch = scratch_begin_no_conflict(); struct string error_prefix = string_copy(scratch.arena, STR("Failed to compile shader:\n")); if (error_blob) { char *compile_error_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob); struct string error_msg = string_cat(scratch.arena, error_prefix, string_from_cstr(compile_error_cstr)); sys_panic(error_msg); } scratch_end(scratch); } INTERNAL void init_shader_table(void) { MEMZERO_ARRAY(L.shader_info); L.shader_info[SHADER_TEXTURE] = (struct dx11_shader_desc) { "shaders/texture.hlsl", sizeof(struct texture_shader_vertex), { { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, FIELD_OFFSETOF(struct texture_shader_vertex, pos), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, FIELD_OFFSETOF(struct texture_shader_vertex, uv), D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, FIELD_OFFSETOF(struct texture_shader_vertex, color), D3D11_INPUT_PER_VERTEX_DATA, 0 } } }; } INTERNAL void shader_init(struct dx11_shader *shader, enum shader_kind kind) { __prof; MEMZERO_STRUCT(shader); struct temp_arena scratch = scratch_begin_no_conflict(); const struct dx11_shader_desc *shader_desc = &L.shader_info[kind]; shader->kind = kind; shader->vertex_size = shader_desc->vertex_size; u32 flags = D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS; #if D3D11_DEBUG flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; #else flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3; #endif /* Compile shader */ ID3DBlob *vs_blob, *ps_blob; { struct string name = string_from_cstr(shader_desc->name_cstr); struct tar_entry *tar_entry = tar_get(&L.shaders_archive, name); if (!tar_entry) { sys_panic(string_format(scratch.arena, STR("Could not find shader \"%F\""), FMT_STR(name))); } struct buffer shader_src = tar_entry->buff; /* Compile shader */ /* TODO: pre-compile shaders w/ FXC? */ ID3DBlob *error_blob; HRESULT v_res = D3DCompile(shader_src.data, shader_src.size, NULL, NULL, NULL, "vs_main", "vs_5_0", flags, 0, &vs_blob, &error_blob); if (FAILED(v_res)) { process_shader_compilation_error(error_blob); } HRESULT p_res = D3DCompile(shader_src.data, shader_src.size, NULL, NULL, NULL, "ps_main", "ps_5_0", flags, 0, &ps_blob, &error_blob); if (FAILED(p_res)) { process_shader_compilation_error(error_blob); } } /* Get number of device layout elements from NULL terminated array */ u32 elem_count = 0; for (; elem_count < ARRAY_COUNT(shader_desc->input_layout_desc); ++elem_count) { const D3D11_INPUT_ELEMENT_DESC *d = &shader_desc->input_layout_desc[elem_count]; if (d->SemanticName == NULL) { break; } } /* Create device layout */ ID3D11Device_CreateInputLayout(L.dev, shader_desc->input_layout_desc, elem_count, ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), &shader->input_layout); /* Create shader */ ID3D11Device_CreateVertexShader(L.dev, ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), NULL, &shader->vs); ID3D11Device_CreatePixelShader(L.dev, ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), NULL, &shader->ps); ID3D10Blob_Release(vs_blob); ID3D10Blob_Release(ps_blob); scratch_end(scratch); } /* ========================== * * Startup * ========================== */ void renderer_startup(struct sys_window *window) { __profscope(initializing_d3d11); L.arena = arena_alloc(GIGABYTE(64)); /* Allocate store */ L.handle_store.arena = arena_alloc(GIGABYTE(64)); L.handle_store.array = (struct handle_slot *)L.handle_store.arena.base; L.handle_store.mutex = sys_mutex_alloc(); /* Load shader archive */ struct buffer embedded_data = inc_shaders_tar(); if (embedded_data.size > 0) { L.shaders_archive = tar_parse(&L.arena, embedded_data, STR("shaders/")); } /* Initialize shader table */ init_shader_table(); HRESULT hr; ID3D11Device *device; ID3D11DeviceContext *context; IDXGISwapChain1 *swapchain; /* Create D3D11 device & context */ { #if D3D11_DEBUG u32 flags = D3D11_CREATE_DEVICE_DEBUG : 0; #else u32 flags = 0; #endif D3D_FEATURE_LEVEL levels[] = { D3D_FEATURE_LEVEL_11_0 }; hr = D3D11CreateDevice( NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, ARRAY_COUNT(levels), D3D11_SDK_VERSION, &device, NULL, &context ); ASSERT(SUCCEEDED(hr)); } #if D3D11_DEBUG /* Enable debug break on API errors */ { ID3D11InfoQueue *info; ID3D11Device_QueryInterface(device, &IID_ID3D11InfoQueue, (void **)&info); ID3D11InfoQueue_SetBreakOnSeverity(info, D3D11_MESSAGE_SEVERITY_CORRUPTION, TRUE); ID3D11InfoQueue_SetBreakOnSeverity(info, D3D11_MESSAGE_SEVERITY_ERROR, TRUE); ID3D11InfoQueue_Release(info); } /* Enable debug break for DXGI too */ { IDXGIInfoQueue *dxgiInfo; hr = DXGIGetDebugInterface1(0, &IID_IDXGIInfoQueue, (void **)&dxgiInfo); ASSERT(SUCCEEDED(hr)); IDXGIInfoQueue_SetBreakOnSeverity(dxgiInfo, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, TRUE); IDXGIInfoQueue_SetBreakOnSeverity(dxgiInfo, DXGI_DEBUG_ALL, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, TRUE); IDXGIInfoQueue_Release(dxgiInfo); } #endif /* Create swap chain */ { HWND hwnd = (HWND)sys_window_get_internal_handle(window); /* Get DXGI device from D3D11 device */ IDXGIDevice *dxgiDevice; hr = ID3D11Device_QueryInterface(device, &IID_IDXGIDevice, (void **)&dxgiDevice); ASSERT(SUCCEEDED(hr)); /* Get DXGI adapter from DXGI device */ IDXGIAdapter *dxgiAdapter; hr = IDXGIDevice_GetAdapter(dxgiDevice, &dxgiAdapter); ASSERT(SUCCEEDED(hr)); /* Get DXGI factory from DXGI adapter */ IDXGIFactory2 *factory; hr = IDXGIAdapter_GetParent(dxgiAdapter, &IID_IDXGIFactory2, (void **)&factory); ASSERT(SUCCEEDED(hr)); DXGI_SWAP_CHAIN_DESC1 desc = { .Format = DXGI_FORMAT_R8G8B8A8_UNORM, .SampleDesc = { 1, 0 }, .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, .BufferCount = 2, .Scaling = DXGI_SCALING_NONE, /* Use more efficient FLIP presentation model. * Windows 10 allows to use DXGI_SWAP_EFFECT_FLIP_DISCARD * For Windows 8 compatibility use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL * For Windows 7 compatibility use DXGI_SWAP_EFFECT_DISCARD */ .SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD, }; hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown *)device, hwnd, &desc, NULL, NULL, &swapchain); ASSERT(SUCCEEDED(hr)); /* Disable Alt+Enter changing monitor resolution to match window size */ IDXGIFactory_MakeWindowAssociation(factory, hwnd, DXGI_MWA_NO_ALT_ENTER); IDXGIFactory2_Release(factory); IDXGIAdapter_Release(dxgiAdapter); IDXGIDevice_Release(dxgiDevice); } if (!SUCCEEDED(hr) || !device || !context || !swapchain) { /* Renderer initialization failure */ /* TODO: Better message */ sys_panic(STR("Failed to initialize renderer")); } L.dev = device; L.devcon = context; L.swapchain = swapchain; /* Create the blending setup */ { __profscope(create_blend_state); const f32 blend_factor[4] = { 0.f, 0.f, 0.f, 0.f }; /* TODO: Actually go over these (just want alpha blending/transparency) */ D3D11_BLEND_DESC desc = { .AlphaToCoverageEnable = false, .RenderTarget[0].BlendEnable = true, .RenderTarget[0].SrcBlend = D3D11_BLEND_SRC_ALPHA, .RenderTarget[0].DestBlend = D3D11_BLEND_INV_SRC_ALPHA, .RenderTarget[0].BlendOp = D3D11_BLEND_OP_ADD, .RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE, .RenderTarget[0].DestBlendAlpha = D3D11_BLEND_INV_SRC_ALPHA, .RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD, .RenderTarget[0].RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL }; /* FIXME: Free this? */ ID3D11Device_CreateBlendState(L.dev, &desc, &L.blend_state); ID3D11DeviceContext_OMSetBlendState(L.devcon, L.blend_state, blend_factor, 0xffffffff); } /* Create depth-stencil State */ { __profscope(create_depth_stencil_state); /* TODO: Actually go over these (copied from elsewhere) */ D3D11_DEPTH_STENCIL_DESC desc = { 0 }; desc.DepthEnable = false; desc.DepthWriteMask = D3D11_DEPTH_WRITE_MASK_ALL; desc.DepthFunc = D3D11_COMPARISON_ALWAYS; desc.StencilEnable = false; desc.FrontFace.StencilFailOp = desc.FrontFace.StencilDepthFailOp = desc.FrontFace.StencilPassOp = D3D11_STENCIL_OP_KEEP; desc.FrontFace.StencilFunc = D3D11_COMPARISON_ALWAYS; desc.BackFace = desc.FrontFace; /* FIXME: Free this? */ ID3D11Device_CreateDepthStencilState(L.dev, &desc, &L.depth_stencil_state); ID3D11DeviceContext_OMSetDepthStencilState(L.devcon, L.depth_stencil_state, 0); } /* Create the rasterizer state */ { __profscope(create_rasterizer_state); D3D11_RASTERIZER_DESC desc = { .FillMode = D3D11_FILL_SOLID, .CullMode = D3D11_CULL_NONE, //.ScissorEnable = true, .DepthClipEnable = true }; /* FIXME: Free this? */ ID3D11Device_CreateRasterizerState(L.dev, &desc, &L.rasterizer_state); ID3D11DeviceContext_RSSetState(L.devcon, L.rasterizer_state); } /* Create the sampler state */ { __profscope(create_sampler_state); D3D11_SAMPLER_DESC desc = { //.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR, .Filter = D3D11_FILTER_MIN_MAG_MIP_POINT, .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, .MaxAnisotropy = 1, //.ComparisonFunc = D3D11_COMPARISON_ALWAYS, .MaxLOD = D3D11_FLOAT32_MAX }; /* FIXME: Free this? */ ID3D11Device_CreateSamplerState(L.dev, &desc, &L.sampler_state); ID3D11DeviceContext_PSSetSamplers(L.devcon, 0, 1, &L.sampler_state); } /* Create the constant buffer */ { __profscope(create_const_buffer); D3D11_BUFFER_DESC desc = { .ByteWidth = sizeof(struct dx11_constant_buffer_data), .Usage = D3D11_USAGE_DYNAMIC, .BindFlags = D3D11_BIND_CONSTANT_BUFFER, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, .MiscFlags = 0 }; ID3D11Device_CreateBuffer(L.dev, &desc, NULL, &L.vs_constant_buffer); /* Apparently ByteWidth needs to be in multiples of 16? */ ASSERT(desc.ByteWidth % 16 == 0); } /* Init shaders */ for (u32 i = 1; i < NUM_SHADERS; ++i) { /* Create shader */ shader_init(&L.shaders[i], i); } WRITE_BARRIER(); L.initialized = true; } /* ========================== * * Canvas * ========================== */ struct renderer_canvas *renderer_canvas_alloc(void) { struct renderer_canvas *canvas = NULL; for (u32 i = 0; i < MAX_CANVASES; ++i) { if (!L.canvases[i].valid) { canvas = &L.canvases[i]; break; } } if (!canvas) { sys_panic(STR("Max renderer canvases reached")); return NULL; } MEMZERO_STRUCT(canvas); canvas->cpu_cmd_store.arena = arena_alloc(GIGABYTE(8)); canvas->gpu_cmd_store.arena = arena_alloc(GIGABYTE(8)); canvas->view = xform_from_trs(TRS()); canvas->valid = true; /* Initialize buffers, skipping index 0 (SHADER_NONE) */ for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) { struct dx11_buffer *buffer = &canvas->buffers[i]; buffer->vertex_arena = arena_alloc(GIGABYTE(8)); buffer->index_arena = arena_alloc(GIGABYTE(8)); buffer->cpu_vertex_buffer = arena_dry_push(&buffer->vertex_arena, u8); buffer->cpu_index_buffer = arena_dry_push(&buffer->index_arena, vidx); } return canvas; } void renderer_canvas_release(struct renderer_canvas *canvas) { canvas->valid = false; arena_release(&canvas->cpu_cmd_store.arena); arena_release(&canvas->gpu_cmd_store.arena); /* Destroy buffers, skipping index 0 (SHADER_NONE) */ for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) { struct dx11_buffer *buffer = &canvas->buffers[i]; arena_release(&buffer->vertex_arena); arena_release(&buffer->index_arena); /* FIXME: Clear GPU buffers */ } } void renderer_canvas_set_view(struct renderer_canvas *canvas, struct xform view) { canvas->view = view; } u32 renderer_canvas_push_vertices(struct renderer_canvas *canvas, u8 **vertices_out, vidx **indices_out, u32 vertices_count, u32 indices_count) { struct renderer_cmd *cmd = canvas->cpu_cmd_store.cmd_last; if (!cmd) { /* Tried to draw to canvas with no active draw cmd */ ASSERT(false); return 0; } struct dx11_shader *shader = cmd->shader; struct dx11_buffer *buffer = &canvas->buffers[shader->kind]; if (!cmd->offsets_set) { cmd->vertex_offset = buffer->vertex_count; cmd->index_offset = buffer->index_count; cmd->offsets_set = true; } u32 first_vertex_index = cmd->vertex_count; cmd->vertex_count += vertices_count; cmd->index_count += indices_count; buffer->vertex_count += vertices_count; buffer->index_count += indices_count; *vertices_out = arena_push_array(&buffer->vertex_arena, u8, shader->vertex_size * vertices_count); *indices_out = arena_push_array(&buffer->index_arena, vidx, indices_count); return first_vertex_index; } void renderer_canvas_ensure_texture_cmd(struct renderer_canvas *canvas, struct texture_shader_parameters params) { struct renderer_cmd *last_cmd = canvas->cpu_cmd_store.cmd_last; if (!last_cmd || last_cmd->shader->kind != SHADER_TEXTURE || !handle_eq(last_cmd->texture, params.texture)) { /* Command parameters are not the same, insert new command */ struct renderer_cmd *cmd = arena_push(&canvas->cpu_cmd_store.arena, struct renderer_cmd); *cmd = (struct renderer_cmd){ .shader = &L.shaders[SHADER_TEXTURE], .texture = params.texture }; if (!canvas->cpu_cmd_store.cmd_first) { canvas->cpu_cmd_store.cmd_first = cmd; } else { last_cmd->next = cmd; } canvas->cpu_cmd_store.cmd_last = cmd; } } /* ========================== * * Send canvas to GPU * ========================== */ void renderer_canvas_send_to_gpu(struct renderer_canvas *canvas) { __prof; /* Create / grow vertex buffers */ for (u32 i = 1; i < ARRAY_COUNT(canvas->buffers); ++i) { struct dx11_buffer *buffer = &canvas->buffers[i]; struct dx11_shader *shader = &L.shaders[i]; u32 vertex_size = shader->vertex_size; u32 index_size = sizeof(vidx); if (buffer->vertex_count == 0 || buffer->index_count == 0) { continue; } if (!buffer->gpu_vertex_buffer || buffer->gpu_vertex_buffer_capacity < buffer->vertex_count) { buffer->gpu_vertex_buffer_capacity = buffer->vertex_count + 5000; D3D11_BUFFER_DESC desc = { .Usage = D3D11_USAGE_DYNAMIC, .ByteWidth = buffer->gpu_vertex_buffer_capacity * vertex_size, .BindFlags = D3D11_BIND_VERTEX_BUFFER, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE }; /* TODO: Assert res >= 0 (success) */ ID3D11Device_CreateBuffer(L.dev, &desc, NULL, &buffer->gpu_vertex_buffer); } /* Create / grow index buffer */ if (!buffer->gpu_index_buffer || buffer->gpu_index_buffer_capacity < buffer->index_count) { buffer->gpu_index_buffer_capacity = buffer->index_count + 5000; D3D11_BUFFER_DESC desc = { .Usage = D3D11_USAGE_DYNAMIC, .ByteWidth = buffer->gpu_index_buffer_capacity * index_size, .BindFlags = D3D11_BIND_INDEX_BUFFER, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE }; /* TODO: Assert res >= 0 (success) */ ID3D11Device_CreateBuffer(L.dev, &desc, NULL, &buffer->gpu_index_buffer); } /* Copy data to GPU */ D3D11_MAPPED_SUBRESOURCE vtx_resource, idx_resource; ID3D11DeviceContext_Map(L.devcon, (ID3D11Resource *)buffer->gpu_vertex_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &vtx_resource); ID3D11DeviceContext_Map(L.devcon, (ID3D11Resource *)buffer->gpu_index_buffer, 0, D3D11_MAP_WRITE_DISCARD, 0, &idx_resource); MEMCPY(vtx_resource.pData, buffer->cpu_vertex_buffer, buffer->vertex_count * vertex_size); MEMCPY(idx_resource.pData, buffer->cpu_index_buffer, buffer->index_count * index_size); ID3D11DeviceContext_Unmap(L.devcon, (ID3D11Resource *)buffer->gpu_vertex_buffer, 0); ID3D11DeviceContext_Unmap(L.devcon, (ID3D11Resource *)buffer->gpu_index_buffer, 0); /* Reset CPU buffers */ buffer->vertex_count = 0; buffer->index_count = 0; arena_reset(&buffer->vertex_arena); arena_reset(&buffer->index_arena); } /* Swap CPU cmds to GPU store */ struct cmd_store temp = canvas->gpu_cmd_store; canvas->gpu_cmd_store = canvas->cpu_cmd_store; canvas->cpu_cmd_store = temp; /* Reset CPU cmds */ canvas->cpu_cmd_store.cmd_first = NULL; canvas->cpu_cmd_store.cmd_last = NULL; arena_reset(&canvas->cpu_cmd_store.arena); } /* ========================== * * Present canvas * ========================== */ INTERNAL void resize_backbuffer(struct v2 size) { __prof; /* TODO: error handling */ /* Release all outstanding references to the swap chain's buffers. */ if (L.backbuffer_view) { ID3D11RenderTargetView_Release(L.backbuffer_view); } IDXGISwapChain_ResizeBuffers(L.swapchain, 0, (UINT)size.x, (UINT)size.y, DXGI_FORMAT_UNKNOWN, 0); /* Get buffer and create a render-target-view. */ ID3D11Texture2D *backbuffer_texture = NULL; IDXGISwapChain_GetBuffer(L.swapchain, 0, &IID_ID3D11Texture2D, (LPVOID *)&backbuffer_texture); ID3D11Device_CreateRenderTargetView(L.dev, (ID3D11Resource *)backbuffer_texture, NULL, &L.backbuffer_view); ID3D11Texture2D_Release(backbuffer_texture); } INTERNAL void resize_viewport(struct rect viewport) { D3D11_VIEWPORT d3d11_viewport = { .Width = viewport.width, .Height = viewport.height, .MinDepth = 0.0f, .MaxDepth = 1.0f, .TopLeftX = viewport.x, .TopLeftY = viewport.y }; ID3D11DeviceContext_RSSetViewports(L.devcon, 1, &d3d11_viewport); } /* TODO: Lock canvas or at least global state? (in-case multi-threaded present). * Another option is to store a separate device on each canvas (need to * research if that is smart first). * * I'm thinking we may also just need to lock texture modification access while presenting */ void renderer_canvas_present(struct renderer_canvas **canvases, u32 canvases_count, struct v2 screen_size, struct rect viewport, i32 vsync) { __prof; /* Resize back buffer */ if (!v2_eq(L.backbuffer_size, screen_size)) { resize_backbuffer(screen_size); L.backbuffer_size = screen_size; } if (!rect_eq(L.viewport, viewport)) { resize_viewport(viewport); L.viewport = viewport; } ID3D11DeviceContext_OMSetRenderTargets(L.devcon, 1, &L.backbuffer_view, NULL); /* Clear back buffer */ f32 clear_color[4] = { 0.0f, 0.0f, 0.0f, 1.0f }; ID3D11DeviceContext_ClearRenderTargetView(L.devcon, L.backbuffer_view, clear_color); /* Set draw mode */ ID3D11DeviceContext_IASetPrimitiveTopology(L.devcon, D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); for (u32 i = 0; i < canvases_count; ++i) { struct renderer_canvas *canvas = canvases[i]; /* Fill and set constant buffer * NOTE: We're only doing this once per canvas, rather than once per draw call since * the only constant right now is VP. */ struct mat4x4 vp_matrix = calculate_vp(canvas->view, viewport.width, viewport.height); send_constant_buffer_data(L.vs_constant_buffer, vp_matrix); ID3D11DeviceContext_VSSetConstantBuffers(L.devcon, 0, 1, &L.vs_constant_buffer); struct dx11_shader *last_shader = NULL; struct renderer_handle last_texture_handle = { 0 }; for (struct renderer_cmd *cmd = canvas->gpu_cmd_store.cmd_first; cmd; cmd = cmd->next) { struct dx11_shader *shader = cmd->shader; struct dx11_buffer *buffer = &canvas->buffers[shader->kind]; struct renderer_handle texture_handle = cmd->texture; /* Activate shader */ if (shader != last_shader) { ID3D11DeviceContext_VSSetShader(L.devcon, shader->vs, 0, 0); ID3D11DeviceContext_PSSetShader(L.devcon, shader->ps, 0, 0); ID3D11DeviceContext_IASetInputLayout(L.devcon, shader->input_layout); last_shader = shader; } /* FIXME: what if texture_srv is 0? will this unset it correctly? */ /* Activate texture */ if (!handle_eq(texture_handle, last_texture_handle)) { ID3D11ShaderResourceView *texture_srv = handle_data(texture_handle); ID3D11DeviceContext_PSSetShaderResources(L.devcon, 0, 1, &texture_srv); last_texture_handle = texture_handle; } u32 vertex_offset = cmd->vertex_offset; u32 index_offset = cmd->index_offset; u32 index_count = cmd->index_count; /* Activate buffer */ u32 zero = 0; UINT vertex_stride = shader->vertex_size; ID3D11DeviceContext_IASetVertexBuffers(L.devcon, 0, 1, &buffer->gpu_vertex_buffer, &vertex_stride, &zero); ID3D11DeviceContext_IASetIndexBuffer(L.devcon, buffer->gpu_index_buffer, DXGI_FORMAT_R32_UINT, zero); /* Draw */ ID3D11DeviceContext_DrawIndexed(L.devcon, index_count, index_offset, vertex_offset); } } /* Present */ { __profscope(IDXGISwapchain_Present); IDXGISwapChain1_Present(L.swapchain, vsync, 0); __profframe(0); } renderer_capture_image_for_profiler(viewport.width, viewport.height); } /* ========================== * * Texture * ========================== */ struct renderer_handle renderer_texture_alloc(struct image_rgba data) { __prof; /* Create texture */ ID3D11Texture2D *texture = NULL; D3D11_TEXTURE2D_DESC desc = { .Width = data.width, .Height = data.height, .MipLevels = 1, .ArraySize = 1, .Format = DXGI_FORMAT_R8G8B8A8_UNORM, .SampleDesc.Count = 1, .Usage = D3D11_USAGE_DEFAULT, .BindFlags = D3D11_BIND_SHADER_RESOURCE, .CPUAccessFlags = 0 }; D3D11_SUBRESOURCE_DATA subresource_data = { .pSysMem = data.pixels, .SysMemPitch = data.width * 4, .SysMemSlicePitch = 0 }; ID3D11Device_CreateTexture2D(L.dev, &desc, &subresource_data, &texture); /* Create srv */ ID3D11ShaderResourceView *texture_srv = NULL; if (texture) { D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_view_desc = { .Format = desc.Format, .ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D, .Texture2D.MipLevels = desc.MipLevels, .Texture2D.MostDetailedMip = 0 }; ID3D11Device_CreateShaderResourceView(L.dev, (ID3D11Resource *)texture, &shader_resource_view_desc, &texture_srv); ID3D11Texture2D_Release(texture); } ASSERT(texture_srv != NULL); struct renderer_handle handle = handle_alloc(texture_srv); return handle; } void renderer_texture_release(struct renderer_handle handle) { __prof; ID3D11ShaderResourceView *texture_srv = handle_data(handle); if (texture_srv) { ID3D11ShaderResourceView_Release(texture_srv); } handle_release(handle); } /* ========================== * * Profiling frame capture * ========================== */ /* FIXME: enable this */ #if PROFILING && PROFILING_CAPTURE_FRAME_IMAGE #define CAP_WIDTH 320 #define CAP_HEIGHT 180 struct prof_cap { ID3D11Texture2D *texture; struct v2 size; }; INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height) { __prof; /* A rolling window of staging textures is used. This is because trying to * map a texture immediately after copying the resource will cause the map * to hang while it waits for the copy to finish. * * At the time of writing this code, 5 textures seems to be the sweet spot * for performance. */ static struct prof_cap staging_caps[5] = { 0 }; static u32 cap_index = 0; static b32 ready_to_read = false; ID3D11Texture2D *backbuffer = NULL; IDXGISwapChain_GetBuffer(L.swapchain, 0, &IID_ID3D11Texture2D, (LPVOID *)&backbuffer); struct prof_cap *write_cap = &staging_caps[cap_index]; *write_cap = (struct prof_cap) { .size = V2(width, height) }; { D3D11_TEXTURE2D_DESC staging_desc; ID3D11Texture2D_GetDesc(backbuffer, &staging_desc); staging_desc.Usage = D3D11_USAGE_STAGING; staging_desc.BindFlags = 0; staging_desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; ID3D11Device_CreateTexture2D(L.dev, &staging_desc, NULL, &write_cap->texture); } ID3D11DeviceContext_CopyResource(L.devcon, (ID3D11Resource *)write_cap->texture, (ID3D11Resource *)backbuffer); ID3D11Texture2D_Release(backbuffer); ++cap_index; if (cap_index >= ARRAY_COUNT(staging_caps)) { cap_index = 0; ready_to_read = true; } if (ready_to_read) { struct prof_cap *read_cap = &staging_caps[cap_index]; { D3D11_MAPPED_SUBRESOURCE res; ID3D11DeviceContext_Map(L.devcon, (ID3D11Resource *)read_cap->texture, 0, D3D11_MAP_READ, 0, &res); u32 final_width = CAP_WIDTH; u32 final_height = CAP_HEIGHT; f32 width_frequency = (f32)read_cap->size.x / (f32)final_width; f32 height_frequency = (f32)read_cap->size.y / (f32)final_height; { struct temp_arena scratch = scratch_begin_no_conflict(); u32 *source = res.pData; u32 *dest = arena_push_array(scratch.arena, u32, final_width * final_height);; u32 pitch = res.RowPitch / 4; for (u32 y = 0; y < final_height; ++y) { for (u32 x = 0; x < final_width; ++x) { u32 *pixel = &dest[x + (y * final_width)]; u64 source_x = (u64)(width_frequency * (f32)x); u64 source_y = (u64)(height_frequency * (f32)y); *pixel = source[source_x + (source_y * pitch)]; } } { __profscope(prof_frame_image); __profframeimage(dest, (u16)final_width, (u16)final_height, ARRAY_COUNT(staging_caps) - 1, false); } scratch_end(scratch); } ID3D11DeviceContext_Unmap(L.devcon, (ID3D11Resource *)read_cap->texture, 0); } ID3D11Texture2D_Release(read_cap->texture); } } #else INTERNAL void renderer_capture_image_for_profiler(f32 width, f32 height) { (UNUSED)width; (UNUSED)height; } #endif