From 0551148ae882e05a42ef97515eea30f35e90d191 Mon Sep 17 00:00:00 2001 From: jacob Date: Sat, 7 Jun 2025 16:03:27 -0500 Subject: [PATCH] dx12 shader compilation setup --- src/app.c | 11 +- src/gpu.h | 3 +- src/gpu_dx11.c | 3 +- src/gpu_dx12.c | 397 ++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 405 insertions(+), 9 deletions(-) diff --git a/src/app.c b/src/app.c index 1a8fc966..f5a2d46e 100644 --- a/src/app.c +++ b/src/app.c @@ -245,14 +245,15 @@ void app_entry_point(struct string args_str) { /* FIXME: Switch this on to utilize all cores. Only decreasing worker count for testing purposes. */ #if !PROFILING && !RTC - /* 1. User thread + /* Ideally these layers should have cores "reserved" for them + * 1. User thread * 2. Sim thread - * 3. Playback thread + * 3. Audio mixing/playback thread */ - u32 num_reserved_cores = 3; + i32 num_reserved_cores = 3; i32 min_worker_count = 2; - i32 max_worker_count = 512; + i32 max_worker_count = 128; i32 target_worker_count = (i32)sys_num_logical_processors() - num_reserved_cores; worker_count = (u32)clamp_i32(target_worker_count, min_worker_count, max_worker_count); #endif @@ -325,8 +326,8 @@ void app_entry_point(struct string args_str) struct sock_startup_receipt sock_sr = sock_startup(); struct host_startup_receipt host_sr = host_startup(&sock_sr); struct resource_startup_receipt resource_sr = resource_startup(); - struct gpu_startup_receipt gpu_sr = gpu_startup(&window); struct work_startup_receipt work_sr = work_startup(worker_count); + struct gpu_startup_receipt gpu_sr = gpu_startup(&work_sr, &window); struct asset_cache_startup_receipt asset_cache_sr = asset_cache_startup(&work_sr); struct ttf_startup_receipt ttf_sr = ttf_startup(); struct font_startup_receipt font_sr = font_startup(&work_sr, &gpu_sr, &asset_cache_sr, &ttf_sr, &resource_sr); diff --git a/src/gpu.h b/src/gpu.h index 0d400891..d31447ce 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -2,13 +2,14 @@ #define GPU_H struct sys_window; +struct work_startup_receipt; /* ========================== * * Startup * ========================== */ struct gpu_startup_receipt { i32 _; }; -struct gpu_startup_receipt gpu_startup(struct sys_window *window); +struct gpu_startup_receipt gpu_startup(struct work_startup_receipt *work_sr, struct sys_window *window); /* ========================== * * Handle diff --git a/src/gpu_dx11.c b/src/gpu_dx11.c index 9caccd3c..40664a5b 100644 --- a/src/gpu_dx11.c +++ b/src/gpu_dx11.c @@ -273,9 +273,10 @@ INTERNAL struct dx11_buffer *dx11_buffer_alloc(struct D3D11_BUFFER_DESC desc, D3 INTERNAL RESOURCE_WATCH_CALLBACK_FUNC_DEF(shader_resource_watch_callback, name); #endif -struct gpu_startup_receipt gpu_startup(struct sys_window *window) +struct gpu_startup_receipt gpu_startup(struct work_startup_receipt *work_sr, struct sys_window *window) { __prof; + (UNUSED)work_sr; G.arena = arena_alloc(GIGABYTE(64)); diff --git a/src/gpu_dx12.c b/src/gpu_dx12.c index df04a6b9..babb3ff7 100644 --- a/src/gpu_dx12.c +++ b/src/gpu_dx12.c @@ -7,6 +7,9 @@ #include "string.h" #include "scratch.h" #include "app.h" +#include "work.h" +#include "log.h" +#include "resource.h" #pragma warning(push, 0) # define UNICODE @@ -16,11 +19,13 @@ # include # include # include +# include #pragma warning(pop) #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") #pragma comment(lib, "dxguid") +#pragma comment(lib, "d3dcompiler") #define DX12_WAIT_FRAME_LATENCY 1 #define DX12_ALLOW_TEARING 1 @@ -38,6 +43,31 @@ # define DX12_SHADER_DEBUG 0 #endif +enum dx12_shader_desc_flags { + DX12_SHADER_DESC_FLAG_NONE = 0, + DX12_SHADER_DESC_FLAG_VS = (1 << 0), + DX12_SHADER_DESC_FLAG_PS = (1 << 1) +}; + +struct dx12_shader_desc { + char *name; + u32 flags; +}; + +struct dx12_shader { + struct dx12_shader_desc desc; +}; + +struct dx12_shader_result { + struct dx12_shader shader; + u64 errors_text_len; + u8 errors_text[KILOBYTE(16)]; +}; + +struct dx12_shader_error { + struct string msg; +}; + enum dx12_handle_kind { DX12_HANDLE_KIND_NONE, DX12_HANDLE_KIND_TEXTURE, @@ -85,7 +115,6 @@ GLOBAL struct { IDXGISwapChain3 *swapchain; ID3D12DescriptorHeap *swapchain_rtv_heap; ID3D12Resource *swapchain_rtvs[DX12_SWAPCHAIN_BUFFER_COUNT]; - } G = ZI, DEBUG_ALIAS(G, G_gpu_dx12); /* ========================== * @@ -96,8 +125,11 @@ INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown); INTERNAL void dx12_init_base(struct sys_window *window); INTERNAL void dx12_init_shaders(void); -struct gpu_startup_receipt gpu_startup(struct sys_window *window) +struct gpu_startup_receipt gpu_startup(struct work_startup_receipt *work_sr, struct sys_window *window) { + __prof; + (UNUSED)work_sr; + /* Initialize handles pool */ G.handle_entries_mutex = sys_mutex_alloc(); G.handle_entries_arena = arena_alloc(GIGABYTE(64)); @@ -115,6 +147,7 @@ struct gpu_startup_receipt gpu_startup(struct sys_window *window) INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(gpu_shutdown) { + __prof; #if DX12_DEBUG /* Release objects to make live object reporting less noisy */ for (u64 i = 0; i < ARRAY_COUNT(G.swapchain_rtvs); ++i) { @@ -371,8 +404,368 @@ INTERNAL void dx12_init_base(struct sys_window *window) * Dx12 shader initialization * ========================== */ +/* TDOO: Rename 'mesh shader' to 'triangle shader' or something */ +/* TODO: Move shader structs into shared file */ + +/* ============= */ +/* Mesh shader */ + +/* ============= */ +/* Texture shader */ + +PACK(struct dx12_texture_shader_uniform { + struct mat4x4 vp; + u32 instance_offset; +}); + +PACK(struct dx12_texture_shader_instance { + struct xform xf; + struct v2 uv0; + struct v2 uv1; + u32 tint_srgb; + f32 emittance; +}); + +/* ============= */ +/* Grid shader */ + +/* ============= */ +/* Init shaders */ + +INTERNAL struct dx12_shader_result *shader_alloc_from_descs(struct arena *arena, u64 num_shaders, struct dx12_shader_desc *descs); +INTERNAL void shader_release(struct dx12_shader *shader); + INTERNAL void dx12_init_shaders(void) { + __prof; + struct temp_arena scratch = scratch_begin_no_conflict(); + struct dx12_shader_desc shader_descs[] = { + /* Texture shader */ + { + .name = "shaders/texture.hlsl", + .flags = DX12_SHADER_DESC_FLAG_VS | DX12_SHADER_DESC_FLAG_PS + } + }; + + struct dx12_shader_result *results = shader_alloc_from_descs(scratch.arena, ARRAY_COUNT(shader_descs), shader_descs); + for (u64 i = 0; i < ARRAY_COUNT(shader_descs); ++i) { + struct dx12_shader_result *result = &results[i]; + if (result->errors_text_len <= 0) { + /* TODO */ + } else { + shader_release(&result->shader); + } + } + + scratch_end(scratch); +} + +/* ========================== * + * Shader include handler + * ========================== */ + +struct dx12_include_handler { + ID3DInclude d3d_handler; + ID3DIncludeVtbl vtbl; + struct dx12_shader *shader; + b32 has_open_resource; + struct resource res; +}; + +INTERNAL HRESULT dx12_include_open(ID3DInclude *d3d_handler, D3D_INCLUDE_TYPE include_type, LPCSTR name_cstr, LPCVOID parent_data, LPCVOID *data_out, UINT *data_len_out) +{ + __prof; + (UNUSED)include_type; + (UNUSED)parent_data; + HRESULT result = E_FAIL; + struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; + struct string name = string_from_cstr_no_limit((char *)name_cstr); + + if (handler->has_open_resource) { + sys_panic(LIT("Dx11 include handler somehow already has a resource open")); + } + + struct resource res = resource_open(name); + if (resource_exists(&res)) { + handler->res = res; + handler->has_open_resource = true; + struct string data = resource_get_data(&res); + *data_out = data.text; + *data_len_out = data.len; + result = S_OK; + } + +#if 0 +#if RESOURCE_RELOADING + shader_add_include(&G.shader_info[handler->shader->kind], name); +#endif +#endif + + return result; +} + +INTERNAL HRESULT dx12_include_close(ID3DInclude *d3d_handler, LPCVOID data) +{ + __prof; + (UNUSED)data; + struct dx12_include_handler *handler = (struct dx12_include_handler *)d3d_handler; + if (handler->has_open_resource) { + resource_close(&handler->res); + handler->has_open_resource = false; + } + return S_OK; +} + +INTERNAL struct dx12_include_handler dx12_include_handler_alloc(struct dx12_shader *shader) +{ + struct dx12_include_handler handler = ZI; + handler.d3d_handler.lpVtbl = &handler.vtbl; + handler.vtbl.Open = dx12_include_open; + handler.vtbl.Close = dx12_include_close; + handler.shader = shader; + return handler; +} + +INTERNAL void dx12_include_handler_release(struct dx12_include_handler *handler) +{ + if (handler->has_open_resource) { + ASSERT(false); /* Resource should have been closed by handler by now */ + resource_close(&handler->res); + } +} + +/* ========================== * + * Shader compilation + * ========================== */ + + /* TODO: Compile shaders offline w/ dxc */ + +enum shader_compile_task_kind { + SHADER_COMPILE_TASK_KIND_VS, + SHADER_COMPILE_TASK_KIND_PS +}; + +struct shader_compile_task_arg { + /* In */ + enum shader_compile_task_kind kind; + struct dx12_shader *shader; + struct resource *src_res; + + /* Out */ + b32 success; + ID3DBlob *blob; + ID3DBlob *error_blob; +}; + +struct shader_load_task_arg { + struct dx12_shader *shader; + struct dx12_shader_result *result; +}; + +INTERNAL WORK_TASK_FUNC_DEF(shader_compile_task, comp_arg_raw) +{ + __prof; + struct shader_compile_task_arg *comp_arg = (struct shader_compile_task_arg *)comp_arg_raw; + struct dx12_shader *shader = comp_arg->shader; + struct string shader_name = string_from_cstr_no_limit(shader->desc.name); + enum shader_compile_task_kind kind = comp_arg->kind; + struct resource *src_res = comp_arg->src_res; + + struct temp_arena scratch = scratch_begin_no_conflict(); + { + b32 success = false; + ID3DBlob *blob = NULL; + ID3DBlob *error_blob = NULL; + struct dx12_include_handler include_handler = dx12_include_handler_alloc(shader); + + if (resource_exists(src_res)) { + #if 0 + #if RESOURCE_RELOADING + shader_reset_includes(shader_desc); + #endif + #endif + + u32 d3d_compile_flags = 0; + #if DX12_SHADER_DEBUG + d3d_compile_flags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_ENABLE_STRICTNESS; + #else + d3d_compile_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3; + #endif + + /* Compile shader */ + { + struct string shader_src = resource_get_data(src_res); + logf_info("Compiling shader \"%F\"", FMT_STR(shader_name)); + /* Compile shader */ + struct string friendly_name = string_cat(scratch.arena, LIT("res/"), shader_name); + char *friendly_name_cstr = cstr_from_string(scratch.arena, friendly_name); + char *entry_point = NULL; + char *target = NULL; + switch (kind) { + case SHADER_COMPILE_TASK_KIND_VS: + { + entry_point = "vs_main"; + target = "vs_5_1"; + } break; + + case SHADER_COMPILE_TASK_KIND_PS: + { + entry_point = "ps_main"; + target = "ps_5_1"; + } break; + } + HRESULT hr = D3DCompile(shader_src.text, shader_src.len, friendly_name_cstr, NULL, (ID3DInclude *)&include_handler, entry_point, target, d3d_compile_flags, 0, &blob, &error_blob); + success = SUCCEEDED(hr) && !error_blob; + } + + #if 0 + if (success) { + /* Get number of device layout elements from NULL terminated array */ + u32 elem_count = 0; + for (; elem_count < ARRAY_COUNT(shader_desc->input_layout_desc); ++elem_count) { + const D3D11_INPUT_ELEMENT_DESC *d = &shader_desc->input_layout_desc[elem_count]; + if (d->SemanticName == NULL) { + break; + } + } + + /* Create device layout */ + if (elem_count > 0) { + HRESULT hr = ID3D11Device_CreateInputLayout(G.dev, shader_desc->input_layout_desc, elem_count, ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), &shader->input_layout); + if (!SUCCEEDED(hr)) { + success = false; + error_str = LIT("Failed to create input layout"); + } + } + } + #endif + } + + #if 0 + if (success) { + logf_success("Finished compiling shader \"%F\" in %F seconds", FMT_STR(shader_name), FMT_FLOAT(SECONDS_FROM_NS(sys_time_ns() - start_ns))); + } + #endif + + comp_arg->success = success; + comp_arg->blob = blob; + comp_arg->error_blob = error_blob; + + #if 0 + shader->valid = true; + #endif + + dx12_include_handler_release(&include_handler); + } + scratch_end(scratch); +} + +INTERNAL WORK_TASK_FUNC_DEF(shader_load_task, load_arg_raw) +{ + __prof; + struct shader_load_task_arg *load_arg = (struct shader_load_task_arg *)load_arg_raw; + struct dx12_shader *shader = load_arg->shader; + struct dx12_shader_desc desc = shader->desc; + struct dx12_shader_result *result = load_arg->result; + + struct temp_arena scratch = scratch_begin_no_conflict(); + { + struct string shader_name = string_from_cstr_no_limit(desc.name); + logf_info("Loading shader '%F'", FMT_STR(shader_name)); + struct resource src_res = resource_open(shader_name); + (UNUSED)result; + + struct shader_compile_task_arg vs = ZI; + vs.kind = SHADER_COMPILE_TASK_KIND_VS; + vs.src_res = &src_res; + vs.shader = shader; + + struct shader_compile_task_arg ps = ZI; + ps.kind = SHADER_COMPILE_TASK_KIND_PS; + ps.src_res = &src_res; + ps.shader = shader; + + struct work_slate ws = work_slate_begin(); + if (desc.flags & DX12_SHADER_DESC_FLAG_VS) { + work_slate_push_task(&ws, shader_compile_task, &vs); + } + if (desc.flags & DX12_SHADER_DESC_FLAG_PS) { + work_slate_push_task(&ws, shader_compile_task, &ps); + } + struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_NORMAL); + work_wait(work); + + b32 success = vs.success && ps.success; + if (success) { + } else { + struct string error_str = LIT("Unknown error"); + ID3D10Blob *error_blob = vs.error_blob ? vs.error_blob : ps.error_blob; + if (error_blob) { + u64 error_blob_cstr_len = ID3D10Blob_GetBufferSize(error_blob); + char *error_blob_cstr = (char *)ID3D10Blob_GetBufferPointer(error_blob); + struct string error_blob_str = string_copy(scratch.arena, string_from_cstr(error_blob_cstr, error_blob_cstr_len)); + if (string_ends_with(error_blob_str, LIT("\n"))) { + /* Remove trailing newline */ + error_blob_str.len -= 1; + } + if (error_blob_str.len > 0) { + error_str = error_blob_str; + } + } + result->errors_text_len = max_u64(error_str.len, ARRAY_COUNT(result->errors_text)); + MEMCPY(result->errors_text, error_str.text, result->errors_text_len); + } + + if (vs.blob) { + ID3D10Blob_Release(vs.blob); + } + if (vs.error_blob) { + ID3D10Blob_Release(vs.error_blob); + } + if (ps.blob) { + ID3D10Blob_Release(ps.blob); + } + if (ps.error_blob) { + ID3D10Blob_Release(ps.error_blob); + } + resource_close(&src_res); + } + scratch_end(scratch); +} + +INTERNAL struct dx12_shader_result *shader_alloc_from_descs(struct arena *arena, u64 num_shaders, struct dx12_shader_desc *descs) +{ + __prof; + struct dx12_shader_result *results = arena_push_array(arena, struct dx12_shader_result, num_shaders); + struct shader_load_task_arg *task_args = arena_push_array(arena, struct shader_load_task_arg, num_shaders); + struct sys_mutex arena_mutex = sys_mutex_alloc(); + + /* Create & dispatch work */ + struct work_slate ws = work_slate_begin(); + for (u64 i = 0; i < num_shaders; ++i) { + struct dx12_shader_result *result = &results[i]; + + struct dx12_shader *shader = &results->shader; + shader->desc = descs[i]; + + struct shader_load_task_arg *arg = &task_args[i]; + arg->shader = shader; + arg->result = result; + + work_slate_push_task(&ws, shader_load_task, arg); + } + struct work_handle work = work_slate_end_and_help(&ws, WORK_PRIORITY_NORMAL); + work_wait(work); + + sys_mutex_release(&arena_mutex); + return results; +} + +INTERNAL void shader_release(struct dx12_shader *shader) +{ + __prof; + /* TODO */ + (UNUSED)shader; } /* ========================== *