From 3707db9d94dea4f437dcfa2743c8fd7f9b14610e Mon Sep 17 00:00:00 2001 From: jacob Date: Fri, 6 Mar 2026 21:20:43 -0800 Subject: [PATCH] implement GPU zones w/ optional AMD GPU Service & Pix Event runtimes --- src/base/base.cgh | 36 ++- src/base/base_win32/base_win32.c | 60 +++- src/gpu/gpu_core.h | 7 +- src/gpu/gpu_dx12/gpu_dx12_core.c | 461 +++++++++++++++---------------- src/gpu/gpu_dx12/gpu_dx12_core.h | 117 ++++++-- src/meta/meta.c | 3 +- src/pp/pp_vis/pp_vis_core.c | 18 +- src/ui/ui_core.c | 4 + 8 files changed, 423 insertions(+), 283 deletions(-) diff --git a/src/base/base.cgh b/src/base/base.cgh index b40dff4c..74ae8247 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -251,6 +251,9 @@ #define FALLTHROUGH #endif +//- Defer +#define DeferFor(begin, end) for(i32 __defer_loop__ = ((begin), 0); !__defer_loop__; __defer_loop__ = 1, (end)) + //- Preprocessor concatenation #define CAT1(a, b) a ## b #define CAT(a, b) CAT1(a, b) @@ -730,7 +733,7 @@ Struct(CpuTopologyInfo) #endif //////////////////////////////////////////////////////////// -//~ Shader linkage types +//~ Shader linkage Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; }; Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; }; @@ -762,6 +765,36 @@ Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; }; #define DeclPixelShader(name, resource_hash) #endif +//////////////////////////////////////////////////////////// +//~ Dynamic api linkage + +#if IsCpu + Struct(ApiProcDesc) + { + String name; + void **addr_ptr; + }; + + Struct(ApiDesc) + { + String path; + u64 procs_count; + ApiProcDesc *procs; + }; + + #define DeclApiProcVarX(_name, _return_type, _signature) _return_type (*_name) _signature; + #define DeclApiProcDescX(_name, _return_type, _signature) { .name = CompLit(Stringize(_name)), .addr_ptr = (void **)&_name }, + #define DeclApiFromXList(api_name, xlist, api_path) \ + xlist(DeclApiProcVarX) \ + Global Readonly ApiProcDesc CAT(ApiProcs__,api_name)[] = { xlist(DeclApiProcDescX) }; \ + Global Readonly ApiDesc api_name = { \ + .path = CompLit(api_path), \ + .procs_count = countof(CAT(ApiProcs__,api_name)), \ + .procs = CAT(ApiProcs__,api_name), \ + } \ + /* ----------------------------------------------------------------------------------- */ +#endif + //////////////////////////////////////////////////////////// //~ Exit callback types @@ -798,6 +831,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b) String GetEngineDirectory(void); String GetLibsDirectory(void); String GetAppDirectory(void); + void LoadApi(ApiDesc api); void Echo(String msg); b32 Panic(String msg); b32 DebugBreakPrompt(String title, String msg); diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 96d3f370..e0c47929 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -63,6 +63,41 @@ String GetAppDirectory(void) return W32.app_dir_path; } +void LoadApi(ApiDesc api) +{ + TempArena scratch = BeginScratchNoConflict(); + { + // Load module + HMODULE module = 0; + { + wchar_t *api_path_wstr = WstrFromString(scratch.arena, api.path); + module = LoadLibraryW(api_path_wstr); + if (!module) + { + Panic(StringF(scratch.arena, "Module '%F' not found", FmtString(api.path))); + } + } + // Load procs + for (u64 proc_idx = 0; proc_idx < api.procs_count; ++proc_idx) + { + ApiProcDesc proc = api.procs[proc_idx]; + char *proc_name_cstr = CstrFromString(scratch.arena, proc.name); + void *addr = (void *)GetProcAddress(module, proc_name_cstr); + if (!addr) + { + Panic(StringF( + scratch.arena, + "Failed to load '%F' - '%F' does not exist in the module", + FmtString(api.path), + FmtString(proc.name) + )); + } + *proc.addr_ptr = addr; + } + } + EndScratch(scratch); +} + void Echo(String msg) { HANDLE console_handle = GetStdHandle(STD_OUTPUT_HANDLE); @@ -350,7 +385,7 @@ String W32_StringFromError(Arena *arena, i32 code) 0, 0 ); - result = StringFromWstr(arena, msg_wstr, msg_len); + result = TrimWhitespace(StringFromWstr(arena, msg_wstr, msg_len)); if (msg_wstr) { LocalFree(msg_wstr); @@ -636,22 +671,14 @@ i32 W32_Main(void) } else { - err = SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS); - if (err != ERROR_SUCCESS) - { - LogErrorF( - "Failed call to SetDefaultDllDirectories: %F", - FmtString(W32_StringFromError(perm, err)) - ); - } - else + if (SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS)) { String libs_path = PathFromString(perm, StringF(perm, "%F/libs/", FmtString(W32.engine_dir_path)), '\\'); wchar_t *libs_path_wstr = WstrFromString(perm, libs_path); DLL_DIRECTORY_COOKIE cookie = AddDllDirectory(libs_path_wstr); - err = GetLastError(); - if (!cookie || err != ERROR_SUCCESS) + if (!cookie) { + err = GetLastError(); LogErrorF( "Failed to add libs directory to dll search path at \"%F\": %F", FmtString(path), @@ -659,6 +686,14 @@ i32 W32_Main(void) ); } } + else + { + err = GetLastError(); + LogErrorF( + "Failed call to SetDefaultDllDirectories: %F", + FmtString(W32_StringFromError(perm, err)) + ); + } } } @@ -725,6 +760,7 @@ i32 W32_Main(void) // Wait for exit start or panic if (!Atomic32Fetch(&W32.panicking)) { + LogInfoF("Startup time: %Fs", FmtFloat(SecondsFromNs(TimeNs()), .p = 3)); HANDLE handles[] = { W32.exit_event, W32.panic_event, diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index c99ada48..73059600 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -587,10 +587,11 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size void G_Sync(G_CommandListHandle cl); void G_SyncLayout(G_CommandListHandle cl, G_ResourceHandle resource, G_Layout layout); -//- Event +//- Zone -void G_BeginEvent(G_CommandListHandle cl, String name); -void G_EndEvent(G_CommandListHandle cl); +void G_PushZone(G_CommandListHandle cl, char *name_lit_cstr); +void G_PopZone(G_CommandListHandle cl); +#define G_ZoneDF(cl, name_lit_cstr) DeferFor(G_PushZone((cl), (name_lit_cstr)), G_PopZone(cl)) //- Cpu -> Gpu staged copy diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index d29e6fcd..09309c79 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -10,47 +10,54 @@ void G_Bootstrap(void) Arena *perm = PermArena(); // NOTE: Nsight seems to have trouble attaching when independent devices are enabled - G_D12.independent_devices_enabled = !CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists; G_D12.validation_layer_enabled = CommandlineArgFromName(Lit("gpu-debug-validation")).exists; G_D12.debug_layer_enabled = G_D12.validation_layer_enabled || CommandlineArgFromName(Lit("gpu-debug")).exists; - G_D12.events_enabled = IsDeveloperModeEnabled; - - if (G_D12.independent_devices_enabled && IsRunningInWine()) - { - // NOTE: Independent devices only supported in newer versions of Proton, which just ignores them anyway - LogInfoF("Wine detected, disabling D3D12 independent devices"); - G_D12.independent_devices_enabled = 0; - } - - LogInfoF("D3D12 independent devices enabled: %F", FmtSint(G_D12.independent_devices_enabled)); - LogInfoF("D3D12 events enabled: %F", FmtSint(G_D12.events_enabled)); - LogInfoF("D3D12 debug layer enabled: %F", FmtSint(G_D12.debug_layer_enabled)); - LogInfoF("D3D12 validation layer enabled: %F", FmtSint(G_D12.validation_layer_enabled)); ////////////////////////////// - //- Load pix event runtime + //- Enable Pix event runtime - if (G_D12.events_enabled) + if (CommandlineArgFromName(Lit("pix")).exists) { - HMODULE pix = LoadLibraryW(L"WinPixEventRuntime"); - if (pix) + LoadApi(G_D12_PixApi); + LogInfoF("Pix runtime loaded"); + G_D12.pix_enabled = 1; + } + + ////////////////////////////// + //- Enable AMD GPU Service runtime + + if (CommandlineArgFromName(Lit("ags")).exists) + { + LoadApi(G_D12_AgsApi); + i32 ags_version = agsGetVersionNumber(); + i32 err = agsInitialize(ags_version, 0, &G_D12.ags_ctx, 0); + if (err == 0 && G_D12.ags_ctx) { - G_D12_PixBeginEventOnCommandList = (G_D12_PixBeginEventOnCommandListFunc *)GetProcAddress(pix, "PIXBeginEventOnCommandList"); - G_D12_PixEndEventOnCommandList = (G_D12_PixEndEventOnCommandListFunc *)GetProcAddress(pix, "PIXEndEventOnCommandList"); - G_D12_PixSetMarkerOnCommandList = (G_D12_PixSetMarkerOnCommandListFunc *)GetProcAddress(pix, "PIXSetMarkerOnCommandList"); - if (!G_D12_PixBeginEventOnCommandList && !G_D12_PixEndEventOnCommandList && !G_D12_PixSetMarkerOnCommandList) - { - LogErrorF("Failed to retrieve pix procedures"); - } - else - { - LogInfoF("Pix enabled"); - G_D12.pix_enabled = 1; - } + LogInfoF("AMD GPU Service runtime loaded"); + G_D12.ags_enabled = 1; } else { - LogInfoF("Pix event runtime not found"); + Panic(StringF(perm, "Failed to initialize AMD GPU Service with version %F", FmtSint(ags_version))); + } + } + + ////////////////////////////// + //- Enable independent devices + + if (CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists) + { + G_D12.independent_devices_enabled = 1; + if (IsRunningInWine()) + { + // NOTE: Independent devices only supported in newer versions of Proton, which just ignores them anyway + LogInfoF("Wine detected, disabling D3D12 independent devices"); + G_D12.independent_devices_enabled = 0; + } + if (G_D12.ags_enabled) + { + LogInfoF("AMD GPU Service enabled, disabling D3D12 independent devices "); + G_D12.independent_devices_enabled = 0; } } @@ -158,7 +165,7 @@ void G_Bootstrap(void) { HRESULT hr = 0; - // Create dxgi factory + //- Create dxgi factory { u32 dxgi_factory_flags = 0; if (G_D12.debug_layer_enabled) @@ -172,135 +179,155 @@ void G_Bootstrap(void) } } - // Create device + //- Fetch adapter + String adapter_name = Zi; { IDXGIAdapter3 *adapter = 0; - ID3D12Device10 *device = 0; - String adapter_name = Zi; - String error = Lit("Failed to initialize D3D12 device"); - u32 adapter_index = 0; - b32 done = 0; - i32 skips = 0; // For iGPU testing - while (!done) { - hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); - if (SUCCEEDED(hr)) + i32 target_adapter_idx = 0; + for (i32 adapter_idx = target_adapter_idx; adapter_idx >= 0; --adapter_idx) { + hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_idx, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); + if (SUCCEEDED(hr)) { - DXGI_ADAPTER_DESC1 desc; - IDXGIAdapter3_GetDesc1(adapter, &desc); - adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description); - LogInfoF("D3D12 adapter name: '%F'", FmtString(adapter_name)); + break; } - if (skips <= 0) + IDXGIAdapter3_Release(adapter); + adapter = 0; + } + } + if (!adapter) + { + Panic(Lit("Failed to locate D3D12 adapter")); + } + { + DXGI_ADAPTER_DESC1 desc; + IDXGIAdapter3_GetDesc1(adapter, &desc); + adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description); + LogInfoF("D3D12 adapter: '%F'", FmtString(adapter_name)); + } + G_D12.adapter = adapter; + } + + //- Create device + { + const IID *iid = &IID_ID3D12Device10; + D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_12_0; + String feature_level_name = Lit("D3D_FEATURE_LEVEL_12_0"); + + ID3D12Device10 *device = 0; + String error = Lit("Failed to initialize D3D12 device"); + if (G_D12.independent_devices_enabled) + { + LogInfoF("Creating independent D3D12 device..."); + hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)G_D12.adapter, feature_level, iid, (void **)&device); + } + else + { + if (G_D12.ags_enabled) + { + LogInfoF("Creating D3D12 device using the AMD GPU Service runtime..."); + G_D12_AgsDeviceResult ags_device_result = Zi; { - if (G_D12.independent_devices_enabled) - { - hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); - } - else - { - hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); - } - done = 1; + G_D12_AgsDeviceParams ags_device_params = { + .adapter = (IDXGIAdapter *)G_D12.adapter, + .iid = *iid, + .feature_level = feature_level + }; + G_D12_AgsExtensionParams ags_extension_params = { + .app_name_wstr = WstrFromString(scratch.arena, GetAppName()), + .engine_name_wstr = L"Cabin", + .app_version = 1, + .engine_version = 1, + }; + agsDriverExtensionsDX12_CreateDevice(G_D12.ags_ctx, &ags_device_params, &ags_extension_params, &ags_device_result); } - else + device = (ID3D12Device10 *)ags_device_result.device; + if (!(ags_device_result.extensions & G_D12_AgsExtensionFlag_UserMarkers)) { - skips -= 1; - adapter_index += 1; - if (device) - { - ID3D12Device_Release(device); - } - if (adapter) - { - IDXGIAdapter3_Release(adapter); - } - adapter = 0; - device = 0; + LogWarningF("AMD GPU Service markers not supported, disabling"); + G_D12.ags_enabled = 0; } } else { - done = 1; + LogInfoF("Creating D3D12 device..."); + hr = D3D12CreateDevice((IUnknown *)G_D12.adapter, feature_level, iid, (void **)&device); } } - if (!device || !SUCCEEDED(hr)) { - if (adapter_name.len > 0) - { - error = StringF( - scratch.arena, - "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", - FmtString(adapter_name) - ); - } + error = StringF( + scratch.arena, + "Could not initialize device '%F' with feature level %F. Ensure that the device is capable and drivers are up to date.", + FmtString(feature_level_name), + FmtString(adapter_name) + ); Panic(error); } - - if (device) - { - StringList missing = Zi; - { - D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { D3D_SHADER_MODEL_6_6 }; - D3D12_FEATURE_DATA_D3D12_OPTIONS options = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS9 options9 = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi; - D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi; - { - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11)); - ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); - } - - if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6) - { - PushStringToList(scratch.arena, &missing, Lit(" - Shader model 6.6")); - } - if (options.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) - { - PushStringToList(scratch.arena, &missing, Lit(" - Resource binding tier 3")); - } - // if (!options.DoublePrecisionFloatShaderOps) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - Double precision shader ops")); - // } - // if (!options9.AtomicInt64OnTypedResourceSupported) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on typed resources")); - // } - // if (!options11.AtomicInt64OnDescriptorHeapResourceSupported) - // { - // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on descriptor heap resources")); - // } - if (!options12.EnhancedBarriersSupported) - { - PushStringToList(scratch.arena, &missing, Lit(" - Enhanced barriers")); - } - } - if (missing.count > 0) - { - String msg = StringF( - scratch.arena, - "Could not intiialize D3D12\n\n" - "The driver for device '%F' does not support the following feature(s):\n\n" - "%F\n\n" - "Ensure drivers are up to date and the device is capable.", - FmtString(adapter_name), - FmtString(StringFromList(scratch.arena, missing, Lit("\n"))) - ); - Panic(msg); - } - } - - G_D12.dxgi_adapter = adapter; + LogInfoF("Device created"); G_D12.device = device; } - // Enable debug layer callbacks + //- Validate device capability + if (G_D12.device) + { + StringList missing = Zi; + { + D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { D3D_SHADER_MODEL_6_6 }; + D3D12_FEATURE_DATA_D3D12_OPTIONS options = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS9 options9 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi; + { + ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); + ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); + ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9)); + ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11)); + ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); + } + + if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6) + { + PushStringToList(scratch.arena, &missing, Lit(" - Shader model 6.6")); + } + if (options.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) + { + PushStringToList(scratch.arena, &missing, Lit(" - Resource binding tier 3")); + } + // if (!options.DoublePrecisionFloatShaderOps) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - Double precision shader ops")); + // } + // if (!options9.AtomicInt64OnTypedResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on typed resources")); + // } + // if (!options11.AtomicInt64OnDescriptorHeapResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on descriptor heap resources")); + // } + if (!options12.EnhancedBarriersSupported) + { + PushStringToList(scratch.arena, &missing, Lit(" - Enhanced barriers")); + } + } + if (missing.count > 0) + { + String msg = StringF( + scratch.arena, + "Could not intiialize D3D12\n\n" + "The driver for device '%F' does not support the following feature(s):\n\n" + "%F\n\n" + "Ensure drivers are up to date and the device is capable.", + FmtString(adapter_name), + FmtString(StringFromList(scratch.arena, missing, Lit("\n"))) + ); + Panic(msg); + } + } + + //- Enable debug layer callbacks if (G_D12.debug_layer_enabled) { // D3D12 debug @@ -337,7 +364,7 @@ void G_Bootstrap(void) } } - // Retrieve device configuration + //- Retrieve device configuration if (G_D12.independent_devices_enabled) { hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12DeviceConfiguration, (void **)&G_D12.independent.device_config); @@ -1851,57 +1878,11 @@ void *G_HostPointerFromResource(G_ResourceHandle resource_handle) G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl) { - // Grab chunk - G_D12_CmdChunk *chunk = cl->last_cmd_chunk; - { - if (chunk && chunk->cmds_count >= G_D12_CmdsPerChunk) - { - chunk = 0; - } - if (!chunk) - { - Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); - { - chunk = G_D12.first_free_cmd_chunk; - if (chunk) - { - G_D12.first_free_cmd_chunk = chunk->next; - } - } - Unlock(&lock); - if (chunk) - { - G_D12_Cmd *cmds = chunk->cmds; - ZeroStruct(chunk); - chunk->cmds = cmds; - } - } - if (!chunk) - { - Arena *perm = PermArena(); - chunk = PushStruct(perm, G_D12_CmdChunk); - chunk->cmds = PushStructsNoZero(perm, G_D12_Cmd, G_D12_CmdsPerChunk); - } - if (chunk != cl->last_cmd_chunk) - { - SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk); - } - } - // Push cmd to chunk - G_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++]; - ZeroStruct(cmd); + G_D12_CmdNode *cmd_node = PushStruct(cl->arena, G_D12_CmdNode); + SllQueuePush(cl->first_cmd_node, cl->last_cmd_node, cmd_node); ++cl->cmds_count; - return cmd; -} - -G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v) -{ - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Constant; - cmd->constant.slot = slot; - CopyBytes(&cmd->constant.value, v, 4); - return cmd; + return &cmd_node->cmd; } G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) @@ -2320,15 +2301,24 @@ G_CommandListHandle G_PrepareCommandList(G_QueueKind queue) if (cl) { G_D12.first_free_cmd_list = cl->next; - ZeroStruct(cl); - } - else - { - Arena *perm = PermArena(); - cl = PushStruct(perm, G_D12_CmdList); } } Unlock(&lock); + + if (cl) + { + Arena *old_arena = cl->arena; + ZeroStruct(cl); + cl->arena = old_arena; + } + else + { + Arena *perm = PermArena(); + cl = PushStruct(perm, G_D12_CmdList); + cl->arena = AcquireArena(Mebi(16)); + } + + ResetArena(cl->arena); cl->queue_kind = queue; return G_D12_MakeHandle(G_CommandListHandle, cl); @@ -2341,36 +2331,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); - // Flatten command chunks - u64 cmds_count = 0; - G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count); - { - // Flatten command chunks - { - for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) - { - for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) - { - cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; - } - } - } - // Free command chunks - { - Lock lock = LockE(&G_D12.free_cmd_chunks_mutex); - { - G_D12_CmdChunk *chunk = cl->first_cmd_chunk; - while (chunk) - { - G_D12_CmdChunk *next = chunk->next; - G_D12.first_free_cmd_chunk = chunk; - chunk = next; - } - } - Unlock(&lock); - } - } - ////////////////////////////// //- Build batches @@ -2381,15 +2341,15 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_CmdBatch *batch = PushStruct(scratch.arena, G_D12_CmdBatch); - for (u64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) + for (G_D12_CmdNode *cmd_node = cl->first_cmd_node; cmd_node; cmd_node = cmd_node->next) { - G_D12_Cmd *cmd = &cmds[cmd_idx]; + G_D12_Cmd *cmd = &cmd_node->cmd; G_D12_CmdKind cmd_kind = cmd->kind; if ( cmd_kind != G_D12_CmdKind_Barrier && cmd_kind != G_D12_CmdKind_Constant && - cmd_kind != G_D12_CmdKind_Event + cmd_kind != G_D12_CmdKind_Zone ) { batch->contains_hazard = 1; @@ -2702,6 +2662,35 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } } break; + //- Event + + case G_D12_CmdKind_Zone: + { + if (cmd->zone.push) + { + if (G_D12.pix_enabled) + { + u64 color = (u64)cmd->zone.name_lit_cstr; + PIXBeginEventOnCommandList((ID3D12GraphicsCommandList *)d3d_cl, color, cmd->zone.name_lit_cstr); + } + if (G_D12.ags_enabled) + { + agsDriverExtensionsDX12_PushMarker(G_D12.ags_ctx, (ID3D12GraphicsCommandList *)d3d_cl, cmd->zone.name_lit_cstr); + } + } + else + { + if (G_D12.pix_enabled) + { + PIXEndEventOnCommandList((ID3D12GraphicsCommandList *)d3d_cl); + } + if (G_D12.ags_enabled) + { + agsDriverExtensionsDX12_PopMarker(G_D12.ags_ctx, (ID3D12GraphicsCommandList *)d3d_cl); + } + } + } break; + //- Copy bytes case G_D12_CmdKind_CopyBytes: @@ -3178,20 +3167,22 @@ void G_SyncLayout(G_CommandListHandle cl_handle, G_ResourceHandle resource_handl cmd->barrier.acquire = layout == G_Layout_Exclusive; } -//- Event +//- Zone -void G_BeginEvent(G_CommandListHandle cl_handle, String name) +void G_PushZone(G_CommandListHandle cl_handle, char *name_lit_cstr) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Event; + cmd->kind = G_D12_CmdKind_Zone; + cmd->zone.name_lit_cstr = name_lit_cstr; + cmd->zone.push = 1; } -void G_EndEvent(G_CommandListHandle cl_handle) +void G_PopZone(G_CommandListHandle cl_handle) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Event; + cmd->kind = G_D12_CmdKind_Zone; } //- Cpu -> Gpu staged copy @@ -3570,13 +3561,13 @@ G_Stats G_QueryStats(void) G_Stats result = Zi; { DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; - IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); + IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); result.device_committed = info.CurrentUsage; result.device_budget = info.Budget; } { DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; - IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); + IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); result.host_budget = info.Budget; result.host_committed = info.CurrentUsage; } diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index a23313ed..9bd61c0e 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -10,14 +10,6 @@ #pragma comment(lib, "d3d12") #pragma comment(lib, "dxgi") -//- Pix -typedef void(WINAPI* G_D12_PixBeginEventOnCommandListFunc)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); -typedef void(WINAPI* G_D12_PixEndEventOnCommandListFunc)(ID3D12GraphicsCommandList* commandList); -typedef void(WINAPI* G_D12_PixSetMarkerOnCommandListFunc)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString); -G_D12_PixBeginEventOnCommandListFunc *G_D12_PixBeginEventOnCommandList; -G_D12_PixEndEventOnCommandListFunc *G_D12_PixEndEventOnCommandList; -G_D12_PixSetMarkerOnCommandListFunc *G_D12_PixSetMarkerOnCommandList; - //////////////////////////////////////////////////////////// //~ Tweakable definitions @@ -303,14 +295,12 @@ Struct(G_D12_ReleasableList) //////////////////////////////////////////////////////////// //~ Command list types -#define G_D12_CmdsPerChunk 256 - Enum(G_D12_CmdKind) { G_D12_CmdKind_None, G_D12_CmdKind_Constant, G_D12_CmdKind_Barrier, - G_D12_CmdKind_Event, + G_D12_CmdKind_Zone, G_D12_CmdKind_CopyBytes, G_D12_CmdKind_CopyTexels, G_D12_CmdKind_Compute, @@ -336,6 +326,12 @@ Struct(G_D12_Cmd) b32 acquire; } barrier; + struct + { + char *name_lit_cstr; + b32 push; + } zone; + struct { G_D12_Resource *dst; @@ -386,18 +382,17 @@ Struct(G_D12_Cmd) }; }; -Struct(G_D12_CmdChunk) +Struct(G_D12_CmdNode) { - G_D12_CmdChunk *next; - struct G_D12_CmdList *cl; - G_D12_Cmd *cmds; - u64 cmds_count; + G_D12_CmdNode *next; + G_D12_Cmd cmd; }; Struct(G_D12_CmdList) { G_D12_CmdList *next; G_QueueKind queue_kind; + Arena *arena; G_D12_DescriptorList reset_descriptors; G_D12_ReleasableList releases; @@ -405,10 +400,9 @@ Struct(G_D12_CmdList) G_D12_StagingRegionNode *first_staging_region; G_D12_StagingRegionNode *last_staging_region; - G_D12_CmdChunk *first_cmd_chunk; - G_D12_CmdChunk *last_cmd_chunk; - u64 chunks_count; u64 cmds_count; + G_D12_CmdNode *first_cmd_node; + G_D12_CmdNode *last_cmd_node; }; //////////////////////////////////////////////////////////// @@ -519,6 +513,51 @@ Struct(G_D12_Swapchain) G_D12_Resource backbuffers[G_D12_SwapchainBufferCount]; }; +//////////////////////////////////////////////////////////// +//~ AMD GPU Service ABI types + +Struct(G_D12_AgsContext); + +Enum(G_D12_AgsExtensionFlag) +{ + G_D12_AgsExtensionFlag_None = 0, + G_D12_AgsExtensionFlag_Intrinsics16 = (1 << 0), + G_D12_AgsExtensionFlag_Intrinsics17 = (1 << 1), + G_D12_AgsExtensionFlag_UserMarkers = (1 << 2), + G_D12_AgsExtensionFlag_AppRegistration = (1 << 3), + G_D12_AgsExtensionFlag_UAVBindSlot = (1 << 4), + G_D12_AgsExtensionFlag_Intrinsics19 = (1 << 5), + G_D12_AgsExtensionFlag_BaseVertex = (1 << 6), + G_D12_AgsExtensionFlag_BaseInstance = (1 << 7), + G_D12_AgsExtensionFlag_GetWaveSize = (1 << 8), + G_D12_AgsExtensionFlag_FloatConversion = (1 << 9), + G_D12_AgsExtensionFlag_ReadLaneAt = (1 << 10), + G_D12_AgsExtensionFlag_RayHitToken = (1 << 11), + G_D12_AgsExtensionFlag_ShaderClock = (1 << 12), +}; + +Struct(G_D12_AgsDeviceParams) +{ + IDXGIAdapter *adapter; + IID iid; + D3D_FEATURE_LEVEL feature_level; +}; + +Struct(G_D12_AgsExtensionParams) +{ + wchar_t *app_name_wstr; + wchar_t *engine_name_wstr; + u32 app_version; + u32 engine_version; + u32 uav_slot; +}; + +Struct(G_D12_AgsDeviceResult) +{ + ID3D12Device *device; + G_D12_AgsExtensionFlag extensions; +}; + //////////////////////////////////////////////////////////// //~ State types @@ -530,12 +569,15 @@ Struct(G_D12_AsyncCtx) Struct(G_D12_Ctx) { - IsolatedAtomic64 resource_creation_gen; b32 independent_devices_enabled; b32 debug_layer_enabled; b32 validation_layer_enabled; - b32 events_enabled; + b32 pix_enabled; + b32 ags_enabled; + G_D12_AgsContext *ags_ctx; + + IsolatedAtomic64 resource_creation_gen; // Stats Atomic64 arenas_count; @@ -560,10 +602,6 @@ Struct(G_D12_Ctx) Mutex free_cmd_lists_mutex; G_D12_CmdList *first_free_cmd_list; - // Command chunks - Mutex free_cmd_chunks_mutex; - G_D12_CmdChunk *first_free_cmd_chunk; - // Swapchains Mutex free_swapchains_mutex; G_D12_Swapchain *first_free_swapchain; @@ -578,7 +616,7 @@ Struct(G_D12_Ctx) // Device IDXGIFactory6 *dxgi_factory; - IDXGIAdapter3 *dxgi_adapter; + IDXGIAdapter3 *adapter; ID3D12Device10 *device; // Release-queue @@ -599,6 +637,32 @@ Struct(G_D12_ThreadLocalCtx) extern G_D12_Ctx G_D12; extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl; +//////////////////////////////////////////////////////////// +//~ Pix debug marker API + +#define G_D12_PixApiXList(X) \ + X(PIXBeginEventOnCommandList, void, (ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString)) \ + X(PIXEndEventOnCommandList, void, (ID3D12GraphicsCommandList* commandList)) \ + X(PIXSetMarkerOnCommandList, void, (ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString)) \ +/* ------------------------------------------------------------------------------------------------------------------- */ + +DeclApiFromXList(G_D12_PixApi, G_D12_PixApiXList, "WinPixEventRuntime.dll"); + +//////////////////////////////////////////////////////////// +//~ AMD GPU Service debug marker API + +#define G_D12_AgsApiXList(X) \ + X(agsGetVersionNumber, i32, (void)) \ + X(agsInitialize, i32, (int agsVersion, const void *config, G_D12_AgsContext **context, void *gpu_info)) \ + X(agsDriverExtensionsDX12_CreateDevice, i32, (G_D12_AgsContext *context, const G_D12_AgsDeviceParams *creation_params, const G_D12_AgsExtensionParams *extension_params, G_D12_AgsDeviceResult *returned_params)) \ + X(agsDriverExtensionsDX12_PushMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \ + X(agsDriverExtensionsDX12_PopMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list)) \ + X(agsDriverExtensionsDX12_SetMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \ + X(agsDriverExtensionsDX12_SetMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \ +/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */ + +DeclApiFromXList(G_D12_AgsApi, G_D12_AgsApiXList, "amd_ags_x64.dll"); + //////////////////////////////////////////////////////////// //~ Helpers @@ -649,7 +713,6 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH //~ Command helpers G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl); -G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v); G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); //////////////////////////////////////////////////////////// diff --git a/src/meta/meta.c b/src/meta/meta.c index cebfba0a..7c4a67f6 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -502,7 +502,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4702")); // unreachable code PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4305")); // 'initializing': truncation from 'double' to 'f32' - // PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4152")); // nonstandard extension, function/data pointer conversion in expression + PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4152")); // nonstandard extension, function/data pointer conversion in expression // PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4127")); // conditional expression is constant // PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4820")); // bytes padding added after data member @@ -568,6 +568,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane) { PushStringToList(perm, &cp.flags_dxc, Lit("-O3")); PushStringToList(perm, &cp.flags_dxc, Lit("-HV 202x")); // 202x makes numeric literals less weird + PushStringToList(perm, &cp.flags_dxc, Lit("-Ges")); // Strict mode // TODO: Export debug info separately for release builds PushStringToList(perm, &cp.flags_dxc, Lit("-Zi -Qembed_debug")); diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index d02988ab..b3d42d88 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -5279,7 +5279,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Initialization pass - G_BeginEvent(cl, Lit("Testing")); + G_ZoneDF(cl, "Init") { // Prepare shade G_Compute2D(cl, V_PrepareShadeCS, frame->shade_dims); @@ -5324,11 +5324,11 @@ void V_TickForever(WaveLaneCtx *lane) G_Sync(cl); } - G_EndEvent(cl); ////////////////////////////// //- Quads & emitters pass + G_ZoneDF(cl, "Quads & emitters") { G_ClearRenderTarget(cl, albedo_target, VEC4(0, 0, 0, 0), 0); @@ -5352,6 +5352,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Particle simulation pass + G_ZoneDF(cl, "Particle sim") { // Simulate particles G_Compute(cl, V_SimParticlesCS, V_ParticlesCap); @@ -5365,6 +5366,7 @@ void V_TickForever(WaveLaneCtx *lane) // TODO: Remove this + G_ZoneDF(cl, "Shade") if (0) { G_Compute2D(cl, V_ShadeCS, frame->shade_dims); @@ -5375,6 +5377,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Composite pass + G_ZoneDF(cl, "Composite") { G_Compute2D(cl, V_CompositeCS, frame->screen_dims); @@ -5394,6 +5397,7 @@ void V_TickForever(WaveLaneCtx *lane) // the first mip index in the bloom mip chain //- Downsample + G_ZoneDF(cl, "Bloom up") for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) { Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); @@ -5405,6 +5409,7 @@ void V_TickForever(WaveLaneCtx *lane) } //- Upsample passes + G_ZoneDF(cl, "Bloom down") for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) { Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); @@ -5419,6 +5424,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Finalization pass + G_ZoneDF(cl, "Finalize") { G_Compute2D(cl, V_FinalizeCS, frame->screen_dims); @@ -5428,6 +5434,7 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Debug shapes pass + G_ZoneDF(cl, "Debug shapes") if (dvert_idxs_ib.count > 0) { G_Draw( @@ -5515,8 +5522,6 @@ void V_TickForever(WaveLaneCtx *lane) ResetArena(P_tl.out_msgs_arena); ZeroStruct(&P_tl.out_msgs); - - ////////////////////////////// //- End frame @@ -5525,6 +5530,11 @@ void V_TickForever(WaveLaneCtx *lane) i32 vsync = !!TweakBool("Vsync", 1); vsync = 1; UI_EndFrame(ui_frame, vsync); + + if (frame->tick == 1) + { + LogInfoF("Time to first frame: %Fs", FmtFloat(SecondsFromNs(TimeNs()), .p = 3)); + } } FetchAddFence(&V.shutdown_complete, 1); diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index f65f8180..60e5e96f 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1751,6 +1751,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) //- Clear pass + G_ZoneDF(UI.cl, "UI clear") { G_ClearRenderTarget(UI.cl, draw_target, VEC4(0, 0, 0, 0), 0); } @@ -1760,6 +1761,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) if (rects_count > 0) { // Render rects + G_ZoneDF(UI.cl, "UI rects") G_Draw( UI.cl, UI_DRectVS, UI_DRectPS, @@ -1770,6 +1772,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) ); // Render rect wireframes + G_ZoneDF(UI.cl, "UI debug rects") if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug)) { G_SetConstant(UI.cl, UI_GpuConst_DebugDraw, 1); @@ -1788,6 +1791,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) G_SyncLayout(UI.cl, backbuffer, G_Layout_Exclusive); { + G_ZoneDF(UI.cl, "UI blit to backbuffer") G_Draw( UI.cl, UI_BlitVS, UI_BlitPS,