implement GPU zones w/ optional AMD GPU Service & Pix Event runtimes

This commit is contained in:
jacob 2026-03-06 21:20:43 -08:00
parent 707755e503
commit 3707db9d94
8 changed files with 423 additions and 283 deletions

View File

@ -251,6 +251,9 @@
#define FALLTHROUGH
#endif
//- Defer
#define DeferFor(begin, end) for(i32 __defer_loop__ = ((begin), 0); !__defer_loop__; __defer_loop__ = 1, (end))
//- Preprocessor concatenation
#define CAT1(a, b) a ## b
#define CAT(a, b) CAT1(a, b)
@ -730,7 +733,7 @@ Struct(CpuTopologyInfo)
#endif
////////////////////////////////////////////////////////////
//~ Shader linkage types
//~ Shader linkage
Struct(VertexShaderDesc) { ResourceKey resource; u32 x, y, z; };
Struct(PixelShaderDesc) { ResourceKey resource; u32 x, y, z; };
@ -762,6 +765,36 @@ Struct(ComputeShaderDesc) { ResourceKey resource; u32 x, y, z; };
#define DeclPixelShader(name, resource_hash)
#endif
////////////////////////////////////////////////////////////
//~ Dynamic api linkage
#if IsCpu
Struct(ApiProcDesc)
{
String name;
void **addr_ptr;
};
Struct(ApiDesc)
{
String path;
u64 procs_count;
ApiProcDesc *procs;
};
#define DeclApiProcVarX(_name, _return_type, _signature) _return_type (*_name) _signature;
#define DeclApiProcDescX(_name, _return_type, _signature) { .name = CompLit(Stringize(_name)), .addr_ptr = (void **)&_name },
#define DeclApiFromXList(api_name, xlist, api_path) \
xlist(DeclApiProcVarX) \
Global Readonly ApiProcDesc CAT(ApiProcs__,api_name)[] = { xlist(DeclApiProcDescX) }; \
Global Readonly ApiDesc api_name = { \
.path = CompLit(api_path), \
.procs_count = countof(CAT(ApiProcs__,api_name)), \
.procs = CAT(ApiProcs__,api_name), \
} \
/* ----------------------------------------------------------------------------------- */
#endif
////////////////////////////////////////////////////////////
//~ Exit callback types
@ -798,6 +831,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
String GetEngineDirectory(void);
String GetLibsDirectory(void);
String GetAppDirectory(void);
void LoadApi(ApiDesc api);
void Echo(String msg);
b32 Panic(String msg);
b32 DebugBreakPrompt(String title, String msg);

View File

@ -63,6 +63,41 @@ String GetAppDirectory(void)
return W32.app_dir_path;
}
void LoadApi(ApiDesc api)
{
TempArena scratch = BeginScratchNoConflict();
{
// Load module
HMODULE module = 0;
{
wchar_t *api_path_wstr = WstrFromString(scratch.arena, api.path);
module = LoadLibraryW(api_path_wstr);
if (!module)
{
Panic(StringF(scratch.arena, "Module '%F' not found", FmtString(api.path)));
}
}
// Load procs
for (u64 proc_idx = 0; proc_idx < api.procs_count; ++proc_idx)
{
ApiProcDesc proc = api.procs[proc_idx];
char *proc_name_cstr = CstrFromString(scratch.arena, proc.name);
void *addr = (void *)GetProcAddress(module, proc_name_cstr);
if (!addr)
{
Panic(StringF(
scratch.arena,
"Failed to load '%F' - '%F' does not exist in the module",
FmtString(api.path),
FmtString(proc.name)
));
}
*proc.addr_ptr = addr;
}
}
EndScratch(scratch);
}
void Echo(String msg)
{
HANDLE console_handle = GetStdHandle(STD_OUTPUT_HANDLE);
@ -350,7 +385,7 @@ String W32_StringFromError(Arena *arena, i32 code)
0,
0
);
result = StringFromWstr(arena, msg_wstr, msg_len);
result = TrimWhitespace(StringFromWstr(arena, msg_wstr, msg_len));
if (msg_wstr)
{
LocalFree(msg_wstr);
@ -636,22 +671,14 @@ i32 W32_Main(void)
}
else
{
err = SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS);
if (err != ERROR_SUCCESS)
{
LogErrorF(
"Failed call to SetDefaultDllDirectories: %F",
FmtString(W32_StringFromError(perm, err))
);
}
else
if (SetDefaultDllDirectories(LOAD_LIBRARY_SEARCH_DEFAULT_DIRS))
{
String libs_path = PathFromString(perm, StringF(perm, "%F/libs/", FmtString(W32.engine_dir_path)), '\\');
wchar_t *libs_path_wstr = WstrFromString(perm, libs_path);
DLL_DIRECTORY_COOKIE cookie = AddDllDirectory(libs_path_wstr);
err = GetLastError();
if (!cookie || err != ERROR_SUCCESS)
if (!cookie)
{
err = GetLastError();
LogErrorF(
"Failed to add libs directory to dll search path at \"%F\": %F",
FmtString(path),
@ -659,6 +686,14 @@ i32 W32_Main(void)
);
}
}
else
{
err = GetLastError();
LogErrorF(
"Failed call to SetDefaultDllDirectories: %F",
FmtString(W32_StringFromError(perm, err))
);
}
}
}
@ -725,6 +760,7 @@ i32 W32_Main(void)
// Wait for exit start or panic
if (!Atomic32Fetch(&W32.panicking))
{
LogInfoF("Startup time: %Fs", FmtFloat(SecondsFromNs(TimeNs()), .p = 3));
HANDLE handles[] = {
W32.exit_event,
W32.panic_event,

View File

@ -587,10 +587,11 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size
void G_Sync(G_CommandListHandle cl);
void G_SyncLayout(G_CommandListHandle cl, G_ResourceHandle resource, G_Layout layout);
//- Event
//- Zone
void G_BeginEvent(G_CommandListHandle cl, String name);
void G_EndEvent(G_CommandListHandle cl);
void G_PushZone(G_CommandListHandle cl, char *name_lit_cstr);
void G_PopZone(G_CommandListHandle cl);
#define G_ZoneDF(cl, name_lit_cstr) DeferFor(G_PushZone((cl), (name_lit_cstr)), G_PopZone(cl))
//- Cpu -> Gpu staged copy

View File

@ -10,47 +10,54 @@ void G_Bootstrap(void)
Arena *perm = PermArena();
// NOTE: Nsight seems to have trouble attaching when independent devices are enabled
G_D12.independent_devices_enabled = !CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists;
G_D12.validation_layer_enabled = CommandlineArgFromName(Lit("gpu-debug-validation")).exists;
G_D12.debug_layer_enabled = G_D12.validation_layer_enabled || CommandlineArgFromName(Lit("gpu-debug")).exists;
G_D12.events_enabled = IsDeveloperModeEnabled;
if (G_D12.independent_devices_enabled && IsRunningInWine())
//////////////////////////////
//- Enable Pix event runtime
if (CommandlineArgFromName(Lit("pix")).exists)
{
LoadApi(G_D12_PixApi);
LogInfoF("Pix runtime loaded");
G_D12.pix_enabled = 1;
}
//////////////////////////////
//- Enable AMD GPU Service runtime
if (CommandlineArgFromName(Lit("ags")).exists)
{
LoadApi(G_D12_AgsApi);
i32 ags_version = agsGetVersionNumber();
i32 err = agsInitialize(ags_version, 0, &G_D12.ags_ctx, 0);
if (err == 0 && G_D12.ags_ctx)
{
LogInfoF("AMD GPU Service runtime loaded");
G_D12.ags_enabled = 1;
}
else
{
Panic(StringF(perm, "Failed to initialize AMD GPU Service with version %F", FmtSint(ags_version)));
}
}
//////////////////////////////
//- Enable independent devices
if (CommandlineArgFromName(Lit("no-d3d12-independent-devices")).exists)
{
G_D12.independent_devices_enabled = 1;
if (IsRunningInWine())
{
// NOTE: Independent devices only supported in newer versions of Proton, which just ignores them anyway
LogInfoF("Wine detected, disabling D3D12 independent devices");
G_D12.independent_devices_enabled = 0;
}
LogInfoF("D3D12 independent devices enabled: %F", FmtSint(G_D12.independent_devices_enabled));
LogInfoF("D3D12 events enabled: %F", FmtSint(G_D12.events_enabled));
LogInfoF("D3D12 debug layer enabled: %F", FmtSint(G_D12.debug_layer_enabled));
LogInfoF("D3D12 validation layer enabled: %F", FmtSint(G_D12.validation_layer_enabled));
//////////////////////////////
//- Load pix event runtime
if (G_D12.events_enabled)
if (G_D12.ags_enabled)
{
HMODULE pix = LoadLibraryW(L"WinPixEventRuntime");
if (pix)
{
G_D12_PixBeginEventOnCommandList = (G_D12_PixBeginEventOnCommandListFunc *)GetProcAddress(pix, "PIXBeginEventOnCommandList");
G_D12_PixEndEventOnCommandList = (G_D12_PixEndEventOnCommandListFunc *)GetProcAddress(pix, "PIXEndEventOnCommandList");
G_D12_PixSetMarkerOnCommandList = (G_D12_PixSetMarkerOnCommandListFunc *)GetProcAddress(pix, "PIXSetMarkerOnCommandList");
if (!G_D12_PixBeginEventOnCommandList && !G_D12_PixEndEventOnCommandList && !G_D12_PixSetMarkerOnCommandList)
{
LogErrorF("Failed to retrieve pix procedures");
}
else
{
LogInfoF("Pix enabled");
G_D12.pix_enabled = 1;
}
}
else
{
LogInfoF("Pix event runtime not found");
LogInfoF("AMD GPU Service enabled, disabling D3D12 independent devices ");
G_D12.independent_devices_enabled = 0;
}
}
@ -158,7 +165,7 @@ void G_Bootstrap(void)
{
HRESULT hr = 0;
// Create dxgi factory
//- Create dxgi factory
{
u32 dxgi_factory_flags = 0;
if (G_D12.debug_layer_enabled)
@ -172,74 +179,98 @@ void G_Bootstrap(void)
}
}
// Create device
//- Fetch adapter
String adapter_name = Zi;
{
IDXGIAdapter3 *adapter = 0;
ID3D12Device10 *device = 0;
String adapter_name = Zi;
String error = Lit("Failed to initialize D3D12 device");
u32 adapter_index = 0;
b32 done = 0;
i32 skips = 0; // For iGPU testing
while (!done)
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter);
i32 target_adapter_idx = 0;
for (i32 adapter_idx = target_adapter_idx; adapter_idx >= 0; --adapter_idx)
{
hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.dxgi_factory, adapter_idx, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter);
if (SUCCEEDED(hr))
{
break;
}
IDXGIAdapter3_Release(adapter);
adapter = 0;
}
}
if (!adapter)
{
Panic(Lit("Failed to locate D3D12 adapter"));
}
{
DXGI_ADAPTER_DESC1 desc;
IDXGIAdapter3_GetDesc1(adapter, &desc);
adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
LogInfoF("D3D12 adapter name: '%F'", FmtString(adapter_name));
}
if (skips <= 0)
{
if (G_D12.independent_devices_enabled)
{
hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
}
else
{
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
}
done = 1;
}
else
{
skips -= 1;
adapter_index += 1;
if (device)
{
ID3D12Device_Release(device);
}
if (adapter)
{
IDXGIAdapter3_Release(adapter);
}
adapter = 0;
device = 0;
}
}
else
{
done = 1;
LogInfoF("D3D12 adapter: '%F'", FmtString(adapter_name));
}
G_D12.adapter = adapter;
}
if (!device || !SUCCEEDED(hr))
//- Create device
{
if (adapter_name.len > 0)
const IID *iid = &IID_ID3D12Device10;
D3D_FEATURE_LEVEL feature_level = D3D_FEATURE_LEVEL_12_0;
String feature_level_name = Lit("D3D_FEATURE_LEVEL_12_0");
ID3D12Device10 *device = 0;
String error = Lit("Failed to initialize D3D12 device");
if (G_D12.independent_devices_enabled)
{
LogInfoF("Creating independent D3D12 device...");
hr = ID3D12DeviceFactory_CreateDevice(G_D12.independent.device_factory, (IUnknown *)G_D12.adapter, feature_level, iid, (void **)&device);
}
else
{
if (G_D12.ags_enabled)
{
LogInfoF("Creating D3D12 device using the AMD GPU Service runtime...");
G_D12_AgsDeviceResult ags_device_result = Zi;
{
G_D12_AgsDeviceParams ags_device_params = {
.adapter = (IDXGIAdapter *)G_D12.adapter,
.iid = *iid,
.feature_level = feature_level
};
G_D12_AgsExtensionParams ags_extension_params = {
.app_name_wstr = WstrFromString(scratch.arena, GetAppName()),
.engine_name_wstr = L"Cabin",
.app_version = 1,
.engine_version = 1,
};
agsDriverExtensionsDX12_CreateDevice(G_D12.ags_ctx, &ags_device_params, &ags_extension_params, &ags_device_result);
}
device = (ID3D12Device10 *)ags_device_result.device;
if (!(ags_device_result.extensions & G_D12_AgsExtensionFlag_UserMarkers))
{
LogWarningF("AMD GPU Service markers not supported, disabling");
G_D12.ags_enabled = 0;
}
}
else
{
LogInfoF("Creating D3D12 device...");
hr = D3D12CreateDevice((IUnknown *)G_D12.adapter, feature_level, iid, (void **)&device);
}
}
if (!device || !SUCCEEDED(hr))
{
error = StringF(
scratch.arena,
"Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.",
"Could not initialize device '%F' with feature level %F. Ensure that the device is capable and drivers are up to date.",
FmtString(feature_level_name),
FmtString(adapter_name)
);
}
Panic(error);
}
LogInfoF("Device created");
G_D12.device = device;
}
if (device)
//- Validate device capability
if (G_D12.device)
{
StringList missing = Zi;
{
@ -249,11 +280,11 @@ void G_Bootstrap(void)
D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi;
D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi;
{
ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model));
ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9));
ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11));
ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12));
ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model));
ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9));
ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11));
ID3D12Device_CheckFeatureSupport(G_D12.device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12));
}
if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6)
@ -296,11 +327,7 @@ void G_Bootstrap(void)
}
}
G_D12.dxgi_adapter = adapter;
G_D12.device = device;
}
// Enable debug layer callbacks
//- Enable debug layer callbacks
if (G_D12.debug_layer_enabled)
{
// D3D12 debug
@ -337,7 +364,7 @@ void G_Bootstrap(void)
}
}
// Retrieve device configuration
//- Retrieve device configuration
if (G_D12.independent_devices_enabled)
{
hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12DeviceConfiguration, (void **)&G_D12.independent.device_config);
@ -1851,57 +1878,11 @@ void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl)
{
// Grab chunk
G_D12_CmdChunk *chunk = cl->last_cmd_chunk;
{
if (chunk && chunk->cmds_count >= G_D12_CmdsPerChunk)
{
chunk = 0;
}
if (!chunk)
{
Lock lock = LockE(&G_D12.free_cmd_chunks_mutex);
{
chunk = G_D12.first_free_cmd_chunk;
if (chunk)
{
G_D12.first_free_cmd_chunk = chunk->next;
}
}
Unlock(&lock);
if (chunk)
{
G_D12_Cmd *cmds = chunk->cmds;
ZeroStruct(chunk);
chunk->cmds = cmds;
}
}
if (!chunk)
{
Arena *perm = PermArena();
chunk = PushStruct(perm, G_D12_CmdChunk);
chunk->cmds = PushStructsNoZero(perm, G_D12_Cmd, G_D12_CmdsPerChunk);
}
if (chunk != cl->last_cmd_chunk)
{
SllQueuePush(cl->first_cmd_chunk, cl->last_cmd_chunk, chunk);
}
}
// Push cmd to chunk
G_D12_Cmd *cmd = &chunk->cmds[chunk->cmds_count++];
ZeroStruct(cmd);
G_D12_CmdNode *cmd_node = PushStruct(cl->arena, G_D12_CmdNode);
SllQueuePush(cl->first_cmd_node, cl->last_cmd_node, cmd_node);
++cl->cmds_count;
return cmd;
}
G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v)
{
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Constant;
cmd->constant.slot = slot;
CopyBytes(&cmd->constant.value, v, 4);
return cmd;
return &cmd_node->cmd;
}
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
@ -2320,15 +2301,24 @@ G_CommandListHandle G_PrepareCommandList(G_QueueKind queue)
if (cl)
{
G_D12.first_free_cmd_list = cl->next;
}
}
Unlock(&lock);
if (cl)
{
Arena *old_arena = cl->arena;
ZeroStruct(cl);
cl->arena = old_arena;
}
else
{
Arena *perm = PermArena();
cl = PushStruct(perm, G_D12_CmdList);
cl->arena = AcquireArena(Mebi(16));
}
}
Unlock(&lock);
ResetArena(cl->arena);
cl->queue_kind = queue;
return G_D12_MakeHandle(G_CommandListHandle, cl);
@ -2341,36 +2331,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
TempArena scratch = BeginScratchNoConflict();
// Flatten command chunks
u64 cmds_count = 0;
G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count);
{
// Flatten command chunks
{
for (G_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
{
for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx)
{
cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx];
}
}
}
// Free command chunks
{
Lock lock = LockE(&G_D12.free_cmd_chunks_mutex);
{
G_D12_CmdChunk *chunk = cl->first_cmd_chunk;
while (chunk)
{
G_D12_CmdChunk *next = chunk->next;
G_D12.first_free_cmd_chunk = chunk;
chunk = next;
}
}
Unlock(&lock);
}
}
//////////////////////////////
//- Build batches
@ -2381,15 +2341,15 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
G_D12_CmdBatch *batch = PushStruct(scratch.arena, G_D12_CmdBatch);
for (u64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx)
for (G_D12_CmdNode *cmd_node = cl->first_cmd_node; cmd_node; cmd_node = cmd_node->next)
{
G_D12_Cmd *cmd = &cmds[cmd_idx];
G_D12_Cmd *cmd = &cmd_node->cmd;
G_D12_CmdKind cmd_kind = cmd->kind;
if (
cmd_kind != G_D12_CmdKind_Barrier &&
cmd_kind != G_D12_CmdKind_Constant &&
cmd_kind != G_D12_CmdKind_Event
cmd_kind != G_D12_CmdKind_Zone
)
{
batch->contains_hazard = 1;
@ -2702,6 +2662,35 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
}
} break;
//- Event
case G_D12_CmdKind_Zone:
{
if (cmd->zone.push)
{
if (G_D12.pix_enabled)
{
u64 color = (u64)cmd->zone.name_lit_cstr;
PIXBeginEventOnCommandList((ID3D12GraphicsCommandList *)d3d_cl, color, cmd->zone.name_lit_cstr);
}
if (G_D12.ags_enabled)
{
agsDriverExtensionsDX12_PushMarker(G_D12.ags_ctx, (ID3D12GraphicsCommandList *)d3d_cl, cmd->zone.name_lit_cstr);
}
}
else
{
if (G_D12.pix_enabled)
{
PIXEndEventOnCommandList((ID3D12GraphicsCommandList *)d3d_cl);
}
if (G_D12.ags_enabled)
{
agsDriverExtensionsDX12_PopMarker(G_D12.ags_ctx, (ID3D12GraphicsCommandList *)d3d_cl);
}
}
} break;
//- Copy bytes
case G_D12_CmdKind_CopyBytes:
@ -3178,20 +3167,22 @@ void G_SyncLayout(G_CommandListHandle cl_handle, G_ResourceHandle resource_handl
cmd->barrier.acquire = layout == G_Layout_Exclusive;
}
//- Event
//- Zone
void G_BeginEvent(G_CommandListHandle cl_handle, String name)
void G_PushZone(G_CommandListHandle cl_handle, char *name_lit_cstr)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Event;
cmd->kind = G_D12_CmdKind_Zone;
cmd->zone.name_lit_cstr = name_lit_cstr;
cmd->zone.push = 1;
}
void G_EndEvent(G_CommandListHandle cl_handle)
void G_PopZone(G_CommandListHandle cl_handle)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Event;
cmd->kind = G_D12_CmdKind_Zone;
}
//- Cpu -> Gpu staged copy
@ -3570,13 +3561,13 @@ G_Stats G_QueryStats(void)
G_Stats result = Zi;
{
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
result.device_committed = info.CurrentUsage;
result.device_budget = info.Budget;
}
{
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.dxgi_adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
result.host_budget = info.Budget;
result.host_committed = info.CurrentUsage;
}

View File

@ -10,14 +10,6 @@
#pragma comment(lib, "d3d12")
#pragma comment(lib, "dxgi")
//- Pix
typedef void(WINAPI* G_D12_PixBeginEventOnCommandListFunc)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString);
typedef void(WINAPI* G_D12_PixEndEventOnCommandListFunc)(ID3D12GraphicsCommandList* commandList);
typedef void(WINAPI* G_D12_PixSetMarkerOnCommandListFunc)(ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString);
G_D12_PixBeginEventOnCommandListFunc *G_D12_PixBeginEventOnCommandList;
G_D12_PixEndEventOnCommandListFunc *G_D12_PixEndEventOnCommandList;
G_D12_PixSetMarkerOnCommandListFunc *G_D12_PixSetMarkerOnCommandList;
////////////////////////////////////////////////////////////
//~ Tweakable definitions
@ -303,14 +295,12 @@ Struct(G_D12_ReleasableList)
////////////////////////////////////////////////////////////
//~ Command list types
#define G_D12_CmdsPerChunk 256
Enum(G_D12_CmdKind)
{
G_D12_CmdKind_None,
G_D12_CmdKind_Constant,
G_D12_CmdKind_Barrier,
G_D12_CmdKind_Event,
G_D12_CmdKind_Zone,
G_D12_CmdKind_CopyBytes,
G_D12_CmdKind_CopyTexels,
G_D12_CmdKind_Compute,
@ -336,6 +326,12 @@ Struct(G_D12_Cmd)
b32 acquire;
} barrier;
struct
{
char *name_lit_cstr;
b32 push;
} zone;
struct
{
G_D12_Resource *dst;
@ -386,18 +382,17 @@ Struct(G_D12_Cmd)
};
};
Struct(G_D12_CmdChunk)
Struct(G_D12_CmdNode)
{
G_D12_CmdChunk *next;
struct G_D12_CmdList *cl;
G_D12_Cmd *cmds;
u64 cmds_count;
G_D12_CmdNode *next;
G_D12_Cmd cmd;
};
Struct(G_D12_CmdList)
{
G_D12_CmdList *next;
G_QueueKind queue_kind;
Arena *arena;
G_D12_DescriptorList reset_descriptors;
G_D12_ReleasableList releases;
@ -405,10 +400,9 @@ Struct(G_D12_CmdList)
G_D12_StagingRegionNode *first_staging_region;
G_D12_StagingRegionNode *last_staging_region;
G_D12_CmdChunk *first_cmd_chunk;
G_D12_CmdChunk *last_cmd_chunk;
u64 chunks_count;
u64 cmds_count;
G_D12_CmdNode *first_cmd_node;
G_D12_CmdNode *last_cmd_node;
};
////////////////////////////////////////////////////////////
@ -519,6 +513,51 @@ Struct(G_D12_Swapchain)
G_D12_Resource backbuffers[G_D12_SwapchainBufferCount];
};
////////////////////////////////////////////////////////////
//~ AMD GPU Service ABI types
Struct(G_D12_AgsContext);
Enum(G_D12_AgsExtensionFlag)
{
G_D12_AgsExtensionFlag_None = 0,
G_D12_AgsExtensionFlag_Intrinsics16 = (1 << 0),
G_D12_AgsExtensionFlag_Intrinsics17 = (1 << 1),
G_D12_AgsExtensionFlag_UserMarkers = (1 << 2),
G_D12_AgsExtensionFlag_AppRegistration = (1 << 3),
G_D12_AgsExtensionFlag_UAVBindSlot = (1 << 4),
G_D12_AgsExtensionFlag_Intrinsics19 = (1 << 5),
G_D12_AgsExtensionFlag_BaseVertex = (1 << 6),
G_D12_AgsExtensionFlag_BaseInstance = (1 << 7),
G_D12_AgsExtensionFlag_GetWaveSize = (1 << 8),
G_D12_AgsExtensionFlag_FloatConversion = (1 << 9),
G_D12_AgsExtensionFlag_ReadLaneAt = (1 << 10),
G_D12_AgsExtensionFlag_RayHitToken = (1 << 11),
G_D12_AgsExtensionFlag_ShaderClock = (1 << 12),
};
Struct(G_D12_AgsDeviceParams)
{
IDXGIAdapter *adapter;
IID iid;
D3D_FEATURE_LEVEL feature_level;
};
Struct(G_D12_AgsExtensionParams)
{
wchar_t *app_name_wstr;
wchar_t *engine_name_wstr;
u32 app_version;
u32 engine_version;
u32 uav_slot;
};
Struct(G_D12_AgsDeviceResult)
{
ID3D12Device *device;
G_D12_AgsExtensionFlag extensions;
};
////////////////////////////////////////////////////////////
//~ State types
@ -530,12 +569,15 @@ Struct(G_D12_AsyncCtx)
Struct(G_D12_Ctx)
{
IsolatedAtomic64 resource_creation_gen;
b32 independent_devices_enabled;
b32 debug_layer_enabled;
b32 validation_layer_enabled;
b32 events_enabled;
b32 pix_enabled;
b32 ags_enabled;
G_D12_AgsContext *ags_ctx;
IsolatedAtomic64 resource_creation_gen;
// Stats
Atomic64 arenas_count;
@ -560,10 +602,6 @@ Struct(G_D12_Ctx)
Mutex free_cmd_lists_mutex;
G_D12_CmdList *first_free_cmd_list;
// Command chunks
Mutex free_cmd_chunks_mutex;
G_D12_CmdChunk *first_free_cmd_chunk;
// Swapchains
Mutex free_swapchains_mutex;
G_D12_Swapchain *first_free_swapchain;
@ -578,7 +616,7 @@ Struct(G_D12_Ctx)
// Device
IDXGIFactory6 *dxgi_factory;
IDXGIAdapter3 *dxgi_adapter;
IDXGIAdapter3 *adapter;
ID3D12Device10 *device;
// Release-queue
@ -599,6 +637,32 @@ Struct(G_D12_ThreadLocalCtx)
extern G_D12_Ctx G_D12;
extern ThreadLocal G_D12_ThreadLocalCtx G_D12_tl;
////////////////////////////////////////////////////////////
//~ Pix debug marker API
#define G_D12_PixApiXList(X) \
X(PIXBeginEventOnCommandList, void, (ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString)) \
X(PIXEndEventOnCommandList, void, (ID3D12GraphicsCommandList* commandList)) \
X(PIXSetMarkerOnCommandList, void, (ID3D12GraphicsCommandList* commandList, UINT64 color, _In_ PCSTR formatString)) \
/* ------------------------------------------------------------------------------------------------------------------- */
DeclApiFromXList(G_D12_PixApi, G_D12_PixApiXList, "WinPixEventRuntime.dll");
////////////////////////////////////////////////////////////
//~ AMD GPU Service debug marker API
#define G_D12_AgsApiXList(X) \
X(agsGetVersionNumber, i32, (void)) \
X(agsInitialize, i32, (int agsVersion, const void *config, G_D12_AgsContext **context, void *gpu_info)) \
X(agsDriverExtensionsDX12_CreateDevice, i32, (G_D12_AgsContext *context, const G_D12_AgsDeviceParams *creation_params, const G_D12_AgsExtensionParams *extension_params, G_D12_AgsDeviceResult *returned_params)) \
X(agsDriverExtensionsDX12_PushMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \
X(agsDriverExtensionsDX12_PopMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list)) \
X(agsDriverExtensionsDX12_SetMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \
X(agsDriverExtensionsDX12_SetMarker, i32, (G_D12_AgsContext *context, ID3D12GraphicsCommandList *command_list, const char *data)) \
/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- */
DeclApiFromXList(G_D12_AgsApi, G_D12_AgsApiXList, "amd_ags_x64.dll");
////////////////////////////////////////////////////////////
//~ Helpers
@ -649,7 +713,6 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
//~ Command helpers
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl);
G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
////////////////////////////////////////////////////////////

View File

@ -502,7 +502,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4702")); // unreachable code
PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4305")); // 'initializing': truncation from 'double' to 'f32'
// PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4152")); // nonstandard extension, function/data pointer conversion in expression
PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4152")); // nonstandard extension, function/data pointer conversion in expression
// PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4127")); // conditional expression is constant
// PushStringToList(perm, &cp.warnings_msvc, Lit("-wd4820")); // bytes padding added after data member
@ -568,6 +568,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
{
PushStringToList(perm, &cp.flags_dxc, Lit("-O3"));
PushStringToList(perm, &cp.flags_dxc, Lit("-HV 202x")); // 202x makes numeric literals less weird
PushStringToList(perm, &cp.flags_dxc, Lit("-Ges")); // Strict mode
// TODO: Export debug info separately for release builds
PushStringToList(perm, &cp.flags_dxc, Lit("-Zi -Qembed_debug"));

View File

@ -5279,7 +5279,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Initialization pass
G_BeginEvent(cl, Lit("Testing"));
G_ZoneDF(cl, "Init")
{
// Prepare shade
G_Compute2D(cl, V_PrepareShadeCS, frame->shade_dims);
@ -5324,11 +5324,11 @@ void V_TickForever(WaveLaneCtx *lane)
G_Sync(cl);
}
G_EndEvent(cl);
//////////////////////////////
//- Quads & emitters pass
G_ZoneDF(cl, "Quads & emitters")
{
G_ClearRenderTarget(cl, albedo_target, VEC4(0, 0, 0, 0), 0);
@ -5352,6 +5352,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Particle simulation pass
G_ZoneDF(cl, "Particle sim")
{
// Simulate particles
G_Compute(cl, V_SimParticlesCS, V_ParticlesCap);
@ -5365,6 +5366,7 @@ void V_TickForever(WaveLaneCtx *lane)
// TODO: Remove this
G_ZoneDF(cl, "Shade")
if (0)
{
G_Compute2D(cl, V_ShadeCS, frame->shade_dims);
@ -5375,6 +5377,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Composite pass
G_ZoneDF(cl, "Composite")
{
G_Compute2D(cl, V_CompositeCS, frame->screen_dims);
@ -5394,6 +5397,7 @@ void V_TickForever(WaveLaneCtx *lane)
// the first mip index in the bloom mip chain
//- Downsample
G_ZoneDF(cl, "Bloom up")
for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx)
{
Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
@ -5405,6 +5409,7 @@ void V_TickForever(WaveLaneCtx *lane)
}
//- Upsample passes
G_ZoneDF(cl, "Bloom down")
for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx)
{
Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx);
@ -5419,6 +5424,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Finalization pass
G_ZoneDF(cl, "Finalize")
{
G_Compute2D(cl, V_FinalizeCS, frame->screen_dims);
@ -5428,6 +5434,7 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Debug shapes pass
G_ZoneDF(cl, "Debug shapes")
if (dvert_idxs_ib.count > 0)
{
G_Draw(
@ -5515,8 +5522,6 @@ void V_TickForever(WaveLaneCtx *lane)
ResetArena(P_tl.out_msgs_arena);
ZeroStruct(&P_tl.out_msgs);
//////////////////////////////
//- End frame
@ -5525,6 +5530,11 @@ void V_TickForever(WaveLaneCtx *lane)
i32 vsync = !!TweakBool("Vsync", 1);
vsync = 1;
UI_EndFrame(ui_frame, vsync);
if (frame->tick == 1)
{
LogInfoF("Time to first frame: %Fs", FmtFloat(SecondsFromNs(TimeNs()), .p = 3));
}
}
FetchAddFence(&V.shutdown_complete, 1);

View File

@ -1751,6 +1751,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
//- Clear pass
G_ZoneDF(UI.cl, "UI clear")
{
G_ClearRenderTarget(UI.cl, draw_target, VEC4(0, 0, 0, 0), 0);
}
@ -1760,6 +1761,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
if (rects_count > 0)
{
// Render rects
G_ZoneDF(UI.cl, "UI rects")
G_Draw(
UI.cl,
UI_DRectVS, UI_DRectPS,
@ -1770,6 +1772,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
);
// Render rect wireframes
G_ZoneDF(UI.cl, "UI debug rects")
if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug))
{
G_SetConstant(UI.cl, UI_GpuConst_DebugDraw, 1);
@ -1788,6 +1791,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
G_SyncLayout(UI.cl, backbuffer, G_Layout_Exclusive);
{
G_ZoneDF(UI.cl, "UI blit to backbuffer")
G_Draw(
UI.cl,
UI_BlitVS, UI_BlitPS,