switch dx12 from using placed resources to committed resources for now. add async gpu resource eviction.

This commit is contained in:
jacob 2026-01-03 23:29:18 -06:00
parent 3744bf25a2
commit 235cf72018
22 changed files with 551 additions and 395 deletions

BIN
ppswap/pp_sim.swp.swp Normal file

Binary file not shown.

BIN
ppswap/pp_vis.swp.swp Normal file

Binary file not shown.

View File

@ -699,6 +699,17 @@
}; };
#endif #endif
////////////////////////////////////////////////////////////
//~ Debug types
#if IsLanguageC
Struct(Callstack)
{
u64 count;
void *frames[32];
};
#endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shader linkage types //~ Shader linkage types
@ -757,7 +768,6 @@
} }
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Core api //~ @hookdecl Core api
@ -765,6 +775,7 @@
StringList GetRawCommandline(void); StringList GetRawCommandline(void);
void Echo(String msg); void Echo(String msg);
b32 Panic(String msg); b32 Panic(String msg);
Callstack CaptureCallstack(u64 skip_frames);
b32 IsRunningInDebugger(void); b32 IsRunningInDebugger(void);
i64 TimeNs(void); i64 TimeNs(void);
void TrueRand(String buffer); void TrueRand(String buffer);

View File

@ -51,11 +51,16 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane)
{ {
// Wait for signal // Wait for signal
{ {
i64 passive_timeout_ns = NsFromSeconds(0.25);
i64 now_ns = TimeNs();
i64 passive_run_at_ns = now_ns + passive_timeout_ns;
i64 cur_signal = Atomic64Fetch(&Base.async.signal.v); i64 cur_signal = Atomic64Fetch(&Base.async.signal.v);
while (cur_signal <= w->last_seen_signal) while (cur_signal <= w->last_seen_signal && (passive_run_at_ns - now_ns) > 1000000)
{ {
FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal)); FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal), passive_run_at_ns - now_ns);
cur_signal = Atomic64Fetch(&Base.async.signal.v); cur_signal = Atomic64Fetch(&Base.async.signal.v);
now_ns = TimeNs();
} }
w->last_seen_signal = cur_signal; w->last_seen_signal = cur_signal;
} }
@ -76,6 +81,7 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane)
} }
} }
WaveSync(lane); WaveSync(lane);
////////////////////////////// //////////////////////////////

View File

@ -2,7 +2,7 @@
//~ Async types //~ Async types
Struct(AsyncFrameLaneCtx); Struct(AsyncFrameLaneCtx);
typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame); typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *async_lane_frame_ctx);
Struct(AsyncTickCallback) Struct(AsyncTickCallback)
{ {

View File

@ -4,7 +4,7 @@
// Similar to Win32 WaitOnAddress & WakeByAddressAll // Similar to Win32 WaitOnAddress & WakeByAddressAll
// i.e. - Suprious wait until value at address != cmp // i.e. - Suprious wait until value at address != cmp
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size); void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns);
void FutexWakeNeq(void *addr); void FutexWakeNeq(void *addr);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -16,5 +16,5 @@ void FutexWakeNeq(void *addr);
// wake when the futex progresses past the specified target value, rather than // wake when the futex progresses past the specified target value, rather than
// wake every time the futex is modified. // wake every time the futex is modified.
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size); void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns);
void FutexWakeGte(void *addr); void FutexWakeGte(void *addr);

View File

@ -13,10 +13,6 @@ Struct(GstatCtx)
Atomic64Padded ArenaMemoryCommitted; Atomic64Padded ArenaMemoryCommitted;
Atomic64Padded ArenaMemoryReserved; Atomic64Padded ArenaMemoryReserved;
Atomic64Padded NumGpuArenas;
Atomic64Padded DedicatedGpuArenaMemoryCommitted;
Atomic64Padded SharedGpuArenaMemoryCommitted;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -154,6 +154,7 @@ String StringFromFloat(Arena *arena, f64 src, u32 precision)
if (c == '0') if (c == '0')
{ {
result.len -= 1; result.len -= 1;
PopBytesNoCopy(arena, 1);
} }
else else
{ {

View File

@ -48,7 +48,7 @@ Lock ExclusiveLockEx(Mutex *m, i32 spin)
} }
else else
{ {
FutexYieldNeq(&m->v, &v, 4); FutexYieldNeq(&m->v, &v, 4, I64Max);
spin_cnt = 0; spin_cnt = 0;
} }
} }
@ -94,7 +94,7 @@ Lock SharedLockEx(Mutex *m, i32 spin)
} }
else else
{ {
FutexYieldNeq(&m->v, &v, 4); FutexYieldNeq(&m->v, &v, 4, I64Max);
spin_cnt = 0; spin_cnt = 0;
} }
} }
@ -143,7 +143,7 @@ void YieldOnCv(Cv *cv, Lock *l)
{ {
Unlock(l); Unlock(l);
{ {
FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen)); FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen), I64Max);
} }
if (exclusive) if (exclusive)
{ {
@ -195,8 +195,37 @@ i64 YieldOnFence(Fence *fence, i64 target)
i64 v = Atomic64Fetch(&fence->v.v); i64 v = Atomic64Fetch(&fence->v.v);
while (v < target) while (v < target)
{ {
FutexYieldGte(&fence->v.v, &v, sizeof(v)); FutexYieldGte(&fence->v.v, &v, sizeof(v), I64Max);
v = Atomic64Fetch(&fence->v.v); v = Atomic64Fetch(&fence->v.v);
} }
return v; return v;
} }
////////////////////////////////////////////////////////////
//~ Lazy init
b32 BeginLazyInit(LazyInitBarrier *barrier)
{
b32 is_initializer = 0;
Atomic32 *v = &barrier->v.v;
if (Atomic32Fetch(v) != 2)
{
if (Atomic32FetchTestSet(v, 0, 1) == 0)
{
is_initializer = 1;
}
else
{
while (Atomic32Fetch(v) != 2)
{
_mm_pause();
}
}
}
return is_initializer;
}
void EndLazyInit(LazyInitBarrier *barrier)
{
Atomic32Set(&barrier->v.v, 2);
}

View File

@ -39,6 +39,17 @@ Struct(Fence)
Atomic64Padded v; Atomic64Padded v;
}; };
////////////////////////////////////////////////////////////
//~ Lazy init types
Struct(LazyInitBarrier)
{
// 0 = untouched
// 1 = initializing
// 2 = initialized
Atomic32Padded v;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Mutex //~ Mutex
@ -74,3 +85,9 @@ i64 FetchSetFence(Fence *fence, i64 x);
i64 FetchAddFence(Fence *fence, i64 x); i64 FetchAddFence(Fence *fence, i64 x);
i64 YieldOnFence(Fence *fence, i64 target); i64 YieldOnFence(Fence *fence, i64 target);
////////////////////////////////////////////////////////////
//~ Lazy init
b32 BeginLazyInit(LazyInitBarrier *barrier);
void EndLazyInit(LazyInitBarrier *barrier);

View File

@ -27,7 +27,7 @@ void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
} }
else else
{ {
FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen)); FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen), I64Max);
} }
} }
} }
@ -62,7 +62,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad
} }
else else
{ {
FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen)); FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen), I64Max);
} }
} }
} }
@ -82,7 +82,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad
} }
else else
{ {
FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen)); FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen), I64Max);
} }
} }
} }

View File

@ -85,6 +85,18 @@ b32 Panic(String msg)
return 0; return 0;
} }
Callstack CaptureCallstack(u64 skip_frames)
{
Callstack result;
result.count = CaptureStackBackTrace(
1 + skip_frames,
countof(result.frames),
result.frames,
0 // BackTraceHash
);
return result;
}
b32 IsRunningInDebugger(void) b32 IsRunningInDebugger(void)
{ {
return IsDebuggerPresent(); return IsDebuggerPresent();

View File

@ -43,6 +43,7 @@
#pragma comment(lib, "synchronization") #pragma comment(lib, "synchronization")
#pragma comment(lib, "avrt") #pragma comment(lib, "avrt")
#pragma comment(lib, "ws2_32.lib") #pragma comment(lib, "ws2_32.lib")
#pragma comment(lib, "advapi32.lib")
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Embedded data iter types //~ Embedded data iter types

View File

@ -1,9 +1,21 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Not-equal futex ops //~ @hookimpl Not-equal futex ops
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size) void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns)
{ {
WaitOnAddress(addr, cmp, cmp_size, INFINITE); if (timeout_ns > 0)
{
DWORD timeout_ms;
if (timeout_ns >= 3153600000000000000ull) // ~100 years
{
timeout_ms = INFINITE;
}
else
{
timeout_ms = (DWORD)(SecondsFromNs(timeout_ns) * 1000.0);
}
WaitOnAddress(addr, cmp, cmp_size, timeout_ms);
}
} }
void FutexWakeNeq(void *addr) void FutexWakeNeq(void *addr)
@ -14,10 +26,10 @@ void FutexWakeNeq(void *addr)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Greater-than-or-equal futex ops //~ @hookimpl Greater-than-or-equal futex ops
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size) void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns)
{ {
// TODO: Actually implement this. Just emulating via neq for now. // TODO: Actually implement this. Just emulating via neq for now.
FutexYieldNeq(addr, cmp, cmp_size); FutexYieldNeq(addr, cmp, cmp_size, timeout_ns);
} }
void FutexWakeGte(void *addr) void FutexWakeGte(void *addr)

View File

@ -217,9 +217,10 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Async //~ Async
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame) void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
{ {
GC_AsyncCtx *async = &GC.async_ctx; GC_AsyncCtx *async = &GC.async_ctx;
Arena *frame_arena = base_async_lane_frame->arena;
////////////////////////////// //////////////////////////////
//- Begin tick //- Begin tick
@ -234,7 +235,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
{ {
// Pop cmds from submission queue // Pop cmds from submission queue
async->cmds.count = GC.submit.count; async->cmds.count = GC.submit.count;
async->cmds.v = PushStructsNoZero(frame->arena, GC_Cmd, GC.submit.count); async->cmds.v = PushStructsNoZero(frame_arena, GC_Cmd, GC.submit.count);
u64 cmd_idx = 0; u64 cmd_idx = 0;
for (GC_CmdNode *n = GC.submit.first; n; n = n->next) for (GC_CmdNode *n = GC.submit.first; n; n = n->next)
{ {
@ -267,7 +268,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
GC_Glyph *glyph = cmd->glyph; GC_Glyph *glyph = cmd->glyph;
ResourceKey resource = glyph->desc.font.r; ResourceKey resource = glyph->desc.font.r;
GC_GlyphDesc desc = glyph->desc; GC_GlyphDesc desc = glyph->desc;
TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame->arena, desc.codepoint, resource, desc.font_size); TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame_arena, desc.codepoint, resource, desc.font_size);
glyph->font_size = desc.font_size; glyph->font_size = desc.font_size;
glyph->font_ascent = ttf_result.font_ascent; glyph->font_ascent = ttf_result.font_ascent;
glyph->font_descent = ttf_result.font_descent; glyph->font_descent = ttf_result.font_descent;

View File

@ -160,4 +160,4 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Async //~ Async
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame); void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);

View File

@ -505,16 +505,14 @@ Struct(G_RenderTargetDesc)
Struct(G_Stats) Struct(G_Stats)
{ {
// Memory usage // Memory usage
u64 local_committed; u64 device_committed;
u64 local_budget; u64 device_budget;
u64 non_local_committed; u64 host_committed;
u64 non_local_budget; u64 host_budget;
// Resources // Other stats
u64 driver_resources_allocated; u64 arenas_count;
u64 driver_descriptors_allocated; u64 cumulative_nonreuse_count;
// TODO: Arena stats (committed, reserved, etc)
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -526,7 +524,7 @@ void G_Bootstrap(void);
//~ @hookdecl Arena //~ @hookdecl Arena
G_ArenaHandle G_AcquireArena(void); G_ArenaHandle G_AcquireArena(void);
void G_ReleaseArena(G_ArenaHandle arena); void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena);
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -319,7 +319,6 @@ void G_Bootstrap(void)
} }
} }
} }
} }
////////////////////////////// //////////////////////////////
@ -334,6 +333,8 @@ void G_Bootstrap(void)
// DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind); // DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
// } // }
OnAsyncTick(G_D12_TickAsync);
DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0); DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0);
EndScratch(scratch); EndScratch(scratch);
@ -811,23 +812,21 @@ G_ArenaHandle G_AcquireArena(void)
} }
gpu_arena->arena = AcquireArena(Gibi(1)); gpu_arena->arena = AcquireArena(Gibi(1));
for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx) Atomic64FetchAdd(&G_D12.arenas_count, 1);
{
gpu_arena->resource_heaps[heap_idx].kind = (G_D12_ResourceHeapKind)heap_idx;
}
AddGstat(NumGpuArenas, 1);
return G_D12_MakeHandle(G_ArenaHandle, gpu_arena); return G_D12_MakeHandle(G_ArenaHandle, gpu_arena);
} }
void G_ReleaseArena(G_ArenaHandle arena) void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena)
{ {
// TODO // TODO
// TODO: Unmap heaps // TODO: Release resources
// TODO: Update gstats // TODO: Update gstats
// TODO: Move this to actual release
// Atomic64FetchAdd(&G_D12.arenas_count, -1);
} }
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
@ -842,31 +841,20 @@ void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
{ {
for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx) // Move resources to reset list
if (gpu_arena->resources.first)
{ {
G_D12_ResourceHeap *heap = &gpu_arena->resource_heaps[heap_idx]; if (gpu_arena->reset_resources.last)
heap->pos = 0;
if (heap->resources.first)
{ {
for (G_D12_Resource *resource = heap->resources.first; resource; resource = resource->next) gpu_arena->reset_resources.last->next = gpu_arena->resources.first;
{
ID3D12Resource_Release(resource->d3d_resource);
}
if (gpu_arena->free_resources.last)
{
gpu_arena->free_resources.last->next = heap->resources.first;
}
else
{
gpu_arena->free_resources.first = heap->resources.first;
}
gpu_arena->free_resources.last = heap->resources.last;
gpu_arena->free_resources.count += heap->resources.count;
heap->resources.count = 0;
heap->resources.first = 0;
heap->resources.last = 0;
} }
else
{
gpu_arena->reset_resources.first = gpu_arena->resources.first;
}
gpu_arena->reset_resources.last = gpu_arena->resources.last;
gpu_arena->reset_resources.count += gpu_arena->resources.count;
ZeroStruct(&gpu_arena->resources);
} }
// Push descriptors to cl reset list // Push descriptors to cl reset list
@ -886,7 +874,6 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
gpu_arena->descriptors.first = 0; gpu_arena->descriptors.first = 0;
gpu_arena->descriptors.last = 0; gpu_arena->descriptors.last = 0;
} }
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -894,10 +881,10 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc)
{ {
Arena *perm = PermArena();
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Resource *resource = 0; G_D12_Resource *resource = 0;
HRESULT hr = 0;
b32 is_buffer = desc.kind == G_ResourceKind_Buffer; b32 is_buffer = desc.kind == G_ResourceKind_Buffer;
b32 is_texture = desc.kind == G_ResourceKind_Texture1D || b32 is_texture = desc.kind == G_ResourceKind_Texture1D ||
@ -908,295 +895,244 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
is_texture ? desc.texture.flags : is_texture ? desc.texture.flags :
desc.sampler.flags; desc.sampler.flags;
//////////////////////////////
//- Initialize heap info
D3D12_HEAP_FLAGS heap_flags = 0;
D3D12_HEAP_PROPERTIES heap_props = Zi;
b32 should_map = 0;
if (is_buffer || is_texture) if (is_buffer || is_texture)
{ {
////////////////////////////// G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu;
//- Initialize heap // Heap flags
if (flags & G_ResourceFlag_HostMemory)
G_D12_ResourceHeap *heap = 0;
if (is_buffer || is_texture)
{ {
G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; heap_kind = G_D12_ResourceHeapKind_Cpu;
if (flags & G_ResourceFlag_HostMemory) if (flags & G_ResourceFlag_Uncached)
{ {
heap_kind = G_D12_ResourceHeapKind_Cpu; heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
if (flags & G_ResourceFlag_Uncached)
{
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
}
}
heap = &gpu_arena->resource_heaps[heap_kind];
if (heap->d3d_heap == 0)
{
b32 is_mappable = 0;
// Initialize heap
// FIXME: Dynamic size
if (heap->d3d_heap == 0)
{
// Create d3d heap
{
D3D12_HEAP_DESC d3d_desc = Zi;
d3d_desc.SizeInBytes = Mebi(256);
if (heap_kind == G_D12_ResourceHeapKind_Cpu)
{
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
is_mappable = 1;
}
else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined)
{
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
is_mappable = 1;
}
else
{
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
}
d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; // TODO: Remove this and support tier 1 resource heaps
hr = ID3D12Device_CreateHeap(G_D12.device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap->d3d_heap);
heap->size = d3d_desc.SizeInBytes;
if (d3d_desc.Properties.Type == D3D12_HEAP_TYPE_DEFAULT)
{
AddGstat(DedicatedGpuArenaMemoryCommitted, heap->size);
}
else
{
AddGstat(SharedGpuArenaMemoryCommitted, heap->size);
}
}
// Map heap resource
if (is_mappable)
{
if (SUCCEEDED(hr))
{
D3D12_RESOURCE_DESC1 d3d_desc = Zi;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Width = heap->size;
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
u64 alloc_size = 0;
u64 alloc_align = 0;
{
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi;
ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
alloc_size = alloc_info.SizeInBytes;
alloc_align = alloc_info.Alignment;
}
if (alloc_size > heap->size)
{
Panic(Lit("Gpu heap overflow"));
}
hr = ID3D12Device10_CreatePlacedResource2(
G_D12.device,
heap->d3d_heap,
0,
&d3d_desc,
D3D12_BARRIER_LAYOUT_UNDEFINED,
0,
0,
0,
&IID_ID3D12Resource,
(void **)&heap->d3d_mapped_resource
);
}
if (SUCCEEDED(hr))
{
D3D12_RANGE read_range = Zi;
hr = ID3D12Resource_Map(heap->d3d_mapped_resource, 0, &read_range, &heap->mapped);
}
}
if (!SUCCEEDED(hr))
{
// TODO: Don't panic
Panic(Lit("Failed to create D3D12 resource heap"));
}
}
} }
} }
heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
////////////////////////////// // Heap props
//- Initialize d3d resource desc if (heap_kind == G_D12_ResourceHeapKind_Cpu)
D3D12_BARRIER_LAYOUT initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
D3D12_CLEAR_VALUE clear_value = Zi;
D3D12_RESOURCE_DESC1 d3d_desc = Zi;
{ {
if (is_buffer) heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
{ heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; should_map = 1;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, 1), 4);
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
}
if (is_texture)
{
initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
d3d_desc.Dimension = desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format);
d3d_desc.Width = MaxI32(desc.texture.dims.x, 1);
d3d_desc.Height = MaxI32(desc.texture.dims.y, 1);
d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1);
d3d_desc.MipLevels = MaxI32(desc.texture.mip_levels, 1);
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous);
clear_value.Color[0] = desc.texture.clear_color.x,
clear_value.Color[1] = desc.texture.clear_color.y,
clear_value.Color[2] = desc.texture.clear_color.z,
clear_value.Color[3] = desc.texture.clear_color.w,
clear_value.Format = d3d_desc.Format;
}
} }
else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined)
u64 align_in_heap = 0;
u64 size_in_heap = 0;
{ {
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi; heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
align_in_heap = alloc_info.Alignment; heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
size_in_heap = alloc_info.SizeInBytes; should_map = 1;
}
//////////////////////////////
//- Re-use existing resource
// u64 pos_in_heap = 0;
// ID3D12Resource *d3d_resource = 0;
// {
// resource = heap->first_reset_resource;
// if (resource)
// {
// SllQueuePop(heap->first_reset_resource, heap->last_reset_resource);
// --heap->reset_resources_count;
// b32 can_use = 1;
// can_use = can_use && resource->is_texture == is_texture;
// can_use = can_use && resource->size_in_heap >= size_in_heap;
// can_use = can_use && resource->pos_in_heap % align_in_heap == 0;
// if (can_use)
// {
// d3d_resource = resource->d3d_resource;
// pos_in_heap = resource->pos_in_heap;
// size_in_heap = resource->size_in_heap;
// heap->pos = resource->pos_in_heap + resource->size_in_heap;
// }
// else
// {
// // FIXME: Free d3d resource here?
// ZeroStruct(resource);
// }
// }
// if (!resource)
// {
// resource = PushStruct(gpu_arena->arena, G_D12_Resource);
// }
// }
u64 pos_in_heap = 0;
ID3D12Resource *d3d_resource = 0;
resource = gpu_arena->free_resources.first;
if (resource)
{
SllQueuePop(gpu_arena->free_resources.first, gpu_arena->free_resources.last);
--gpu_arena->free_resources.count;
ZeroStruct(resource);
} }
else else
{ {
resource = PushStruct(gpu_arena->arena, G_D12_Resource); heap_props.Type = D3D12_HEAP_TYPE_DEFAULT;
} }
//////////////////////////////
//- Create new d3d resource
if (!resource->d3d_resource)
{
pos_in_heap = heap->pos;
pos_in_heap = AlignU64(pos_in_heap, align_in_heap);
heap->pos = pos_in_heap + size_in_heap;
if (pos_in_heap + size_in_heap > heap->size)
{
Panic(Lit("Gpu arena overflow"));
}
hr = ID3D12Device10_CreatePlacedResource2(
G_D12.device,
heap->d3d_heap,
pos_in_heap,
&d3d_desc,
initial_layout,
(d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
0,
0,
&IID_ID3D12Resource,
(void **)&d3d_resource
);
}
//////////////////////////////
//- Insert resource
resource->heap = heap;
resource->pos_in_heap = pos_in_heap;
resource->size_in_heap = size_in_heap;
resource->d3d_resource = d3d_resource;
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
resource->flags = flags;
if (is_buffer)
{
resource->buffer_size = desc.buffer.size;
resource->buffer_size_actual = d3d_desc.Width;
// TODO: Cache this
resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
}
if (is_texture)
{
resource->is_texture = 1;
resource->texture_format = desc.texture.format;
resource->texture_dims = desc.texture.dims;
resource->texture_mip_levels = d3d_desc.MipLevels;
resource->texture_layout = initial_layout;
}
SllQueuePush(heap->resources.first, heap->resources.last, resource);
++heap->resources.count;
} }
////////////////////////////// //////////////////////////////
//- Create sampler //- Initialize d3d resource desc
D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
D3D12_CLEAR_VALUE clear_value = Zi;
D3D12_RESOURCE_DESC1 d3d_desc = Zi;
if (is_buffer)
{
u64 min_buffer_size = 1024;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, min_buffer_size), 4);
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
}
else if (is_texture)
{
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
d3d_desc.Dimension =
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format);
d3d_desc.Width = MaxI32(desc.texture.dims.x, 1);
d3d_desc.Height = MaxI32(desc.texture.dims.y, 1);
d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1);
d3d_desc.MipLevels = MaxI32(desc.texture.mip_levels, 1);
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous);
clear_value.Color[0] = desc.texture.clear_color.x,
clear_value.Color[1] = desc.texture.clear_color.y,
clear_value.Color[2] = desc.texture.clear_color.z,
clear_value.Color[3] = desc.texture.clear_color.w,
clear_value.Format = d3d_desc.Format;
}
//////////////////////////////
//- Check for reset-resource reusability
// Pop reset resource
resource = gpu_arena->reset_resources.first;
b32 is_reusing = 0;
if (resource)
{
DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource);
--gpu_arena->reset_resources.count;
D3D12_RESOURCE_DESC1 reset_d3d_desc = resource->d3d_desc;
D3D12_RESOURCE_DESC1 compare_d3d_desc = reset_d3d_desc;
// Buffers can be reused if size fits
if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
{
if (reset_d3d_desc.Width >= d3d_desc.Width)
{
compare_d3d_desc.Width = d3d_desc.Width;
}
}
// TODO: Less stringent reset constraints. We could even create textures as placed resources and reset their underlying heaps.
is_reusing = MatchStruct(&compare_d3d_desc, &d3d_desc);
if (!is_reusing)
{
// Push releasable to command list
{
G_D12_Releasable *release = 0;
{
Lock lock = LockE(&G_D12.free_releases_mutex);
{
release = G_D12.free_releases.first;
if (release)
{
SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last);
}
else
{
release = PushStructNoZero(perm, G_D12_Releasable);
}
}
Unlock(&lock);
}
ZeroStruct(release);
release->d3d_resource = resource->d3d_resource;
SllQueuePush(cl->releases.first, cl->releases.last, release);
}
ZeroStruct(resource);
}
}
else
{
resource = PushStruct(gpu_arena->arena, G_D12_Resource);
}
if (!is_reusing)
{
resource->d3d_desc = d3d_desc;
}
//////////////////////////////
//- Init resource
resource->flags = flags;
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
if (is_buffer)
{
resource->buffer_size = desc.buffer.size;
resource->buffer_size_actual = d3d_desc.Width;
}
if (is_texture)
{
resource->is_texture = is_texture;
resource->texture_format = desc.texture.format;
resource->texture_dims = desc.texture.dims;
resource->texture_mip_levels = d3d_desc.MipLevels;
}
if (is_sampler) if (is_sampler)
{ {
resource = PushStruct(gpu_arena->arena, G_D12_Resource);
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
resource->sampler_desc = desc.sampler; resource->sampler_desc = desc.sampler;
} }
DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource);
++gpu_arena->resources.count;
//////////////////////////////
//- Allocate D3D12 resource
if ((is_buffer || is_texture) && !resource->d3d_resource)
{
D3D12_CLEAR_VALUE *clear_value_arg = 0;
if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
{
clear_value_arg = &clear_value;
}
HRESULT hr = ID3D12Device10_CreateCommittedResource3(
G_D12.device,
&heap_props,
heap_flags,
&resource->d3d_desc,
d3d_initial_layout,
clear_value_arg,
0, // pProtectedSession
0, // NumCastableFormats
0, // pCastableFormats
&IID_ID3D12Resource,
(void **)&resource->d3d_resource
);
Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1);
if (is_texture)
{
resource->cmdlist_texture_layout = d3d_initial_layout;
}
if (!SUCCEEDED(hr))
{
// TODO: Don't panic
Panic(Lit("Failed to allocate D3D12 resource"));
}
if (is_buffer)
{
resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource);
}
}
if (should_map && !resource->mapped)
{
D3D12_RANGE read_range = Zi;
HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped);
if (!SUCCEEDED(hr))
{
// TODO: Don't panic
Panic(Lit("Failed to map D3D12 resource"));
}
}
//////////////////////////////
//- Transition layout if reusing
if (is_reusing)
{
G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout);
}
return G_D12_MakeHandle(G_ResourceHandle, resource); return G_D12_MakeHandle(G_ResourceHandle, resource);
} }
@ -1227,7 +1163,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
if (queue_commit_completion >= descriptor->completion_queue_target) if (queue_commit_completion >= descriptor->completion_queue_target)
{ {
// Descriptor no longer in use by gpu, reuse it // Descriptor no longer in use by gpu, reuse it
SllQueuePop(descriptors->first, descriptors->last); DllQueueRemove(descriptors->first, descriptors->last, descriptor);
--descriptors->count; --descriptors->count;
index = descriptor->index; index = descriptor->index;
} }
@ -1246,7 +1182,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
if (heap->first_free) if (heap->first_free)
{ {
descriptor = heap->first_free; descriptor = heap->first_free;
SllStackPop(heap->first_free); DllStackRemove(heap->first_free, descriptor);
index = descriptor->index; index = descriptor->index;
} }
else else
@ -1270,7 +1206,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
descriptor->heap = heap; descriptor->heap = heap;
SllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor);
++gpu_arena->descriptors.count; ++gpu_arena->descriptors.count;
return descriptor; return descriptor;
@ -1462,8 +1398,7 @@ i32 G_CountDepth(G_ResourceHandle texture)
void *G_HostPointerFromResource(G_ResourceHandle resource_handle) void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
{ {
G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle);
G_D12_ResourceHeap *heap = resource->heap; return resource->mapped;
return ((u8 *)heap->mapped) + resource->pos_in_heap;
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -1471,7 +1406,6 @@ void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl) G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl)
{ {
// Grab chunk // Grab chunk
G_D12_CmdChunk *chunk = cl->last_cmd_chunk; G_D12_CmdChunk *chunk = cl->last_cmd_chunk;
{ {
@ -1932,12 +1866,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{ {
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
layout_before = resource->texture_layout; layout_before = resource->cmdlist_texture_layout;
layout_after = resource->texture_layout; layout_after = resource->cmdlist_texture_layout;
if (desc.layout != G_Layout_NoChange) if (desc.layout != G_Layout_NoChange)
{ {
layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); layout_after = G_D12_BarrierLayoutFromLayout(desc.layout);
resource->texture_layout = layout_after; resource->cmdlist_texture_layout = layout_after;
} }
} }
@ -2396,7 +2330,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
n = next; n = next;
} }
// Attach completion info to descriptors // Attach completion info to reset descriptors
for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;)
{ {
G_D12_Descriptor *next = d->next; G_D12_Descriptor *next = d->next;
@ -2405,12 +2339,41 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
d->completion_queue_kind = queue_kind; d->completion_queue_kind = queue_kind;
d->completion_queue_target = completion_target; d->completion_queue_target = completion_target;
G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind]; G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind];
SllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d); DllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d);
++gpu_arena_reset_descriptors_list->count; ++gpu_arena_reset_descriptors_list->count;
} }
d = next; d = next;
} }
// Attach completion info to releasables & submit for release
if (cl->releases.first)
{
// Attach completion info
for (G_D12_Releasable *release = cl->releases.first; release; release = release->next)
{
release->completion_queue_kind = queue_kind;
release->completion_queue_target = completion_target;
}
// Submit releass
Lock lock = LockE(&G_D12.pending_releases_mutex);
{
if (G_D12.pending_releases.last)
{
G_D12.pending_releases.last->next = cl->releases.last;
}
else
{
G_D12.pending_releases.first = cl->releases.last;
}
G_D12.pending_releases.last = cl->releases.last;
}
Unlock(&lock);
}
// // Attach completion info to resources // // Attach completion info to resources
// for (G_D12_Resource *r = cl->reset_resources.first; r;) // for (G_D12_Resource *r = cl->reset_resources.first; r;)
// { // {
@ -2421,7 +2384,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
// r->completion_queue_kind = queue->kind; // r->completion_queue_kind = queue->kind;
// r->completion_queue_target = completion_target; // r->completion_queue_target = completion_target;
// G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources; // G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources;
// SllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r); // DllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r);
// ++heap_reset_resources_list->count; // ++heap_reset_resources_list->count;
// } // }
// r = next; // r = next;
@ -2843,17 +2806,17 @@ G_Stats G_QueryStats(void)
{ {
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
result.local_committed = info.CurrentUsage; result.device_committed = info.CurrentUsage;
result.local_budget = info.Budget; result.device_budget = info.Budget;
} }
{ {
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
result.non_local_budget = info.Budget; result.host_budget = info.Budget;
result.non_local_committed = info.CurrentUsage; result.host_committed = info.CurrentUsage;
} }
result.driver_resources_allocated = Atomic64Fetch(&G_D12.driver_resources_allocated); result.arenas_count = Atomic64Fetch(&G_D12.arenas_count);
result.driver_descriptors_allocated = Atomic64Fetch(&G_D12.driver_descriptors_allocated); result.cumulative_nonreuse_count = Atomic64Fetch(&G_D12.cumulative_nonreuse_count);
return result; return result;
} }
@ -3003,15 +2966,17 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma
Panic(Lit("Failed to retrieve swapchain buffer")); Panic(Lit("Failed to retrieve swapchain buffer"));
} }
ZeroStruct(backbuffer); ZeroStruct(backbuffer);
backbuffer->d3d_resource = d3d_resource;
backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
backbuffer->flags = G_ResourceFlag_AllowRenderTarget; backbuffer->flags = G_ResourceFlag_AllowRenderTarget;
backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc);
backbuffer->d3d_resource = d3d_resource;
backbuffer->is_texture = 1; backbuffer->is_texture = 1;
backbuffer->texture_format = format; backbuffer->texture_format = format;
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
backbuffer->texture_mip_levels = 1; backbuffer->texture_mip_levels = 1;
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->cmdlist_texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
backbuffer->swapchain = swapchain; backbuffer->swapchain = swapchain;
} }
} }
@ -3086,9 +3051,6 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
// FIXME: Remove this // FIXME: Remove this
SleepSeconds(0.100); SleepSeconds(0.100);
// Copy print-buffers to readback // Copy print-buffers to readback
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind) for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
{ {
@ -3279,3 +3241,83 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
} }
} }
} }
////////////////////////////////////////////////////////////
//~ Async
void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
{
G_D12_AsyncCtx *async = &G_D12.async_ctx;
Arena *frame_arena = base_async_lane_frame->arena;
// TODO: Investigate if we gain anything by going wide here (resource release might be exclusive driver-side)
if (lane->idx == 0)
{
// Pop pending releases
{
Lock lock = LockE(&G_D12.pending_releases_mutex);
{
if (G_D12.pending_releases.first)
{
if (async->pending_releases.last)
{
async->pending_releases.last->next = G_D12.pending_releases.first;
}
else
{
async->pending_releases.first = G_D12.pending_releases.first;
}
async->pending_releases.last = G_D12.pending_releases.last;
G_D12.pending_releases.first = 0;
G_D12.pending_releases.last = 0;
}
}
Unlock(&lock);
}
// Release resources until we reach an uncompleted one
G_D12_Releasable *release = async->pending_releases.first;
if (release)
{
G_QueueCompletions completions = G_CompletionTargetsFromQueues(G_QueueMask_All);
while (release)
{
G_D12_Releasable *next = release->next;
if (completions.v[release->completion_queue_kind] >= release->completion_queue_target)
{
SllQueuePop(async->pending_releases.first, async->pending_releases.last);
if (release->d3d_resource)
{
ID3D12Resource_Release(release->d3d_resource);
}
SllQueuePush(async->free_releases.first, async->free_releases.last, release);
}
else
{
break;
}
release = next;
}
}
// Push releasable nodes to free list
if (async->pending_releases.first)
{
Lock lock = LockE(&G_D12.free_releases_mutex);
{
if (G_D12.free_releases.last)
{
G_D12.free_releases.last->next = async->free_releases.first;
}
else
{
G_D12.free_releases.first = async->free_releases.first;;
}
G_D12.free_releases.last = async->free_releases.last;
async->free_releases.first = 0;
async->free_releases.last = 0;
}
Unlock(&lock);
}
}
}

View File

@ -63,26 +63,27 @@ Struct(G_D12_PipelineBin)
Struct(G_D12_Resource) Struct(G_D12_Resource)
{ {
G_D12_Resource *next; G_D12_Resource *next;
G_D12_Resource *prev;
struct G_D12_ResourceHeap *heap;
u64 pos_in_heap;
u64 size_in_heap;
ID3D12Resource *d3d_resource;
u64 uid;
G_ResourceFlag flags; G_ResourceFlag flags;
u64 uid;
// D3D12 resource
D3D12_RESOURCE_DESC1 d3d_desc;
ID3D12Resource *d3d_resource;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
void *mapped;
// Buffer info // Buffer info
u64 buffer_size; u64 buffer_size;
u64 buffer_size_actual; u64 buffer_size_actual;
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
// Texture info // Texture info
b32 is_texture; b32 is_texture;
G_Format texture_format; G_Format texture_format;
Vec3I32 texture_dims; Vec3I32 texture_dims;
i32 texture_mip_levels; i32 texture_mip_levels;
D3D12_BARRIER_LAYOUT texture_layout; D3D12_BARRIER_LAYOUT cmdlist_texture_layout;
// Sampler info // Sampler info
G_SamplerDesc sampler_desc; G_SamplerDesc sampler_desc;
@ -128,6 +129,7 @@ Struct(G_D12_DescriptorHeap)
Struct(G_D12_Descriptor) Struct(G_D12_Descriptor)
{ {
G_D12_Descriptor *next; G_D12_Descriptor *next;
G_D12_Descriptor *prev;
struct G_D12_Arena *gpu_arena; struct G_D12_Arena *gpu_arena;
G_QueueKind completion_queue_kind; G_QueueKind completion_queue_kind;
@ -162,23 +164,6 @@ Enum(G_D12_ResourceHeapKind)
G_D12_ResourceHeapKind_COUNT G_D12_ResourceHeapKind_COUNT
}; };
Struct(G_D12_ResourceHeap)
{
G_D12_ResourceHeapKind kind;
struct GPU_D12_Arena *gpu_arena;
ID3D12Heap *d3d_heap;
ID3D12Resource *d3d_mapped_resource;
void *mapped;
G_D12_ResourceList resources;
G_D12_ResourceList reset_resources;
u64 pos;
u64 size;
};
Struct(G_D12_Arena) Struct(G_D12_Arena)
{ {
Arena *arena; Arena *arena;
@ -186,9 +171,9 @@ Struct(G_D12_Arena)
G_D12_DescriptorList descriptors; G_D12_DescriptorList descriptors;
G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT]; G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT];
G_D12_ResourceList free_resources; G_D12_ResourceList resources;
G_D12_ResourceList reset_resources;
G_D12_ResourceHeap resource_heaps[G_D12_ResourceHeapKind_COUNT]; // G_D12_ResourceList free_resources;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -277,6 +262,25 @@ Struct(G_D12_RawCommandList)
G_D12_Descriptor *rtv_clear_descriptor; G_D12_Descriptor *rtv_clear_descriptor;
}; };
////////////////////////////////////////////////////////////
//~ Releasable types
Struct(G_D12_Releasable)
{
G_D12_Releasable *next;
G_QueueKind completion_queue_kind;
i64 completion_queue_target;
ID3D12Resource *d3d_resource;
};
Struct(G_D12_ReleasableList)
{
G_D12_Releasable *first;
G_D12_Releasable *last;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Command list types //~ Command list types
@ -379,6 +383,8 @@ Struct(G_D12_CmdList)
G_QueueKind queue_kind; G_QueueKind queue_kind;
G_D12_DescriptorList reset_descriptors; G_D12_DescriptorList reset_descriptors;
G_D12_ReleasableList releases;
G_D12_StagingRegionNode *first_staging_region; G_D12_StagingRegionNode *first_staging_region;
G_D12_StagingRegionNode *last_staging_region; G_D12_StagingRegionNode *last_staging_region;
@ -410,11 +416,20 @@ Struct(G_D12_Swapchain)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ State types //~ State types
Struct(G_D12_AsyncCtx)
{
G_D12_ReleasableList pending_releases;
G_D12_ReleasableList free_releases;
};
Struct(G_D12_Ctx) Struct(G_D12_Ctx)
{ {
Atomic64Padded resource_creation_gen; Atomic64Padded resource_creation_gen;
// Stats // Stats
Atomic64 arenas_count;
Atomic64 cumulative_nonreuse_count;
Atomic64 driver_resources_allocated; Atomic64 driver_resources_allocated;
Atomic64 driver_descriptors_allocated; Atomic64 driver_descriptors_allocated;
@ -446,6 +461,15 @@ Struct(G_D12_Ctx)
IDXGIFactory6 *factory; IDXGIFactory6 *factory;
IDXGIAdapter3 *adapter; IDXGIAdapter3 *adapter;
ID3D12Device10 *device; ID3D12Device10 *device;
// Release-queue
Mutex pending_releases_mutex;
Mutex free_releases_mutex;
G_D12_ReleasableList pending_releases;
G_D12_ReleasableList free_releases;
// Async
G_D12_AsyncCtx async_ctx;
}; };
Struct(G_D12_ThreadLocalCtx) Struct(G_D12_ThreadLocalCtx)
@ -509,3 +533,8 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
//~ Collection worker //~ Collection worker
void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane); void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane);
////////////////////////////////////////////////////////////
//~ Async
void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);

View File

@ -155,6 +155,7 @@ S_Shape S_MulXformShape(Xform xf, S_Shape shape)
Vec2 S_SupportPointFromShape(S_Shape shape, Vec2 dir) Vec2 S_SupportPointFromShape(S_Shape shape, Vec2 dir)
{ {
// FIXME: Properly handle rounded polygons
Vec2 result = Zi; Vec2 result = Zi;
Vec2 dir_norm = NormVec2(dir); Vec2 dir_norm = NormVec2(dir);
f32 max_dot = -Inf; f32 max_dot = -Inf;

View File

@ -237,7 +237,8 @@ V_WidgetTheme V_GetWidgetTheme(void)
theme.icon_font = UI_BuiltinIconFont(); theme.icon_font = UI_BuiltinIconFont();
// theme.font_size = 14; // theme.font_size = 14;
theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0); // theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0);
theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 2);
theme.h1 = 2.00; theme.h1 = 2.00;
theme.h2 = 1.50; theme.h2 = 1.50;
theme.h3 = 1.25; theme.h3 = 1.25;
@ -1986,6 +1987,8 @@ void V_TickForever(WaveLaneCtx *lane)
////////////////////////////// //////////////////////////////
//- Build debug info UI //- Build debug info UI
G_Stats gpu_stats = G_QueryStats();
if (frame->show_console) if (frame->show_console)
{ {
UI_Key dbg_box = UI_KeyF("Debug box"); UI_Key dbg_box = UI_KeyF("Debug box");
@ -2043,9 +2046,10 @@ void V_TickForever(WaveLaneCtx *lane)
UI_BuildLabelF("GPU:"); UI_BuildLabelF("GPU:");
UI_Pop(FontSize); UI_Pop(FontSize);
} }
UI_BuildLabelF(" Arenas: %F", FmtSint(GetGstat(NumGpuArenas))); UI_BuildLabelF(" Arenas: %F", FmtUint(gpu_stats.arenas_count));
UI_BuildLabelF(" Dedicated arena memory committed: %F MiB", FmtFloat((f64)GetGstat(DedicatedGpuArenaMemoryCommitted) / 1024 / 1024)); UI_BuildLabelF(" Device memory usage: %F MiB", FmtFloat((f64)gpu_stats.device_committed / 1024 / 1024));
UI_BuildLabelF(" Shared arena memory committed: %F MiB", FmtFloat((f64)GetGstat(SharedGpuArenaMemoryCommitted) / 1024 / 1024)); UI_BuildLabelF(" Host memory usage: %F MiB", FmtFloat((f64)gpu_stats.host_committed / 1024 / 1024));
UI_BuildLabelF(" Non-reuse tally: %F", FmtUint(gpu_stats.cumulative_nonreuse_count));
} }
UI_BuildSpacer(UI_PIX(padding, 1), Axis_Y); UI_BuildSpacer(UI_PIX(padding, 1), Axis_Y);
} }

View File

@ -1720,8 +1720,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
////////////////////////////// //////////////////////////////
//- Dispatch shaders //- Dispatch shaders
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite);
//- Clear pass //- Clear pass
{ {
G_ClearRenderTarget(frame->cl, draw_target, VEC4(0, 0, 0, 0)); G_ClearRenderTarget(frame->cl, draw_target, VEC4(0, 0, 0, 0));
@ -1729,8 +1727,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
//- Rect pass //- Rect pass
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite);
if (rects_count > 0) if (rects_count > 0)
{ {
// Render rects // Render rects