switch dx12 from using placed resources to committed resources for now. add async gpu resource eviction.
This commit is contained in:
parent
3744bf25a2
commit
235cf72018
BIN
ppswap/pp_sim.swp.swp
Normal file
BIN
ppswap/pp_sim.swp.swp
Normal file
Binary file not shown.
BIN
ppswap/pp_vis.swp.swp
Normal file
BIN
ppswap/pp_vis.swp.swp
Normal file
Binary file not shown.
@ -699,6 +699,17 @@
|
||||
};
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Debug types
|
||||
|
||||
#if IsLanguageC
|
||||
Struct(Callstack)
|
||||
{
|
||||
u64 count;
|
||||
void *frames[32];
|
||||
};
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shader linkage types
|
||||
|
||||
@ -757,7 +768,6 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Core api
|
||||
|
||||
@ -765,6 +775,7 @@
|
||||
StringList GetRawCommandline(void);
|
||||
void Echo(String msg);
|
||||
b32 Panic(String msg);
|
||||
Callstack CaptureCallstack(u64 skip_frames);
|
||||
b32 IsRunningInDebugger(void);
|
||||
i64 TimeNs(void);
|
||||
void TrueRand(String buffer);
|
||||
|
||||
@ -51,11 +51,16 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane)
|
||||
{
|
||||
// Wait for signal
|
||||
{
|
||||
i64 passive_timeout_ns = NsFromSeconds(0.25);
|
||||
i64 now_ns = TimeNs();
|
||||
i64 passive_run_at_ns = now_ns + passive_timeout_ns;
|
||||
|
||||
i64 cur_signal = Atomic64Fetch(&Base.async.signal.v);
|
||||
while (cur_signal <= w->last_seen_signal)
|
||||
while (cur_signal <= w->last_seen_signal && (passive_run_at_ns - now_ns) > 1000000)
|
||||
{
|
||||
FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal));
|
||||
FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal), passive_run_at_ns - now_ns);
|
||||
cur_signal = Atomic64Fetch(&Base.async.signal.v);
|
||||
now_ns = TimeNs();
|
||||
}
|
||||
w->last_seen_signal = cur_signal;
|
||||
}
|
||||
@ -76,6 +81,7 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
WaveSync(lane);
|
||||
|
||||
//////////////////////////////
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
//~ Async types
|
||||
|
||||
Struct(AsyncFrameLaneCtx);
|
||||
typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame);
|
||||
typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *async_lane_frame_ctx);
|
||||
|
||||
Struct(AsyncTickCallback)
|
||||
{
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
// Similar to Win32 WaitOnAddress & WakeByAddressAll
|
||||
// i.e. - Suprious wait until value at address != cmp
|
||||
|
||||
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size);
|
||||
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns);
|
||||
void FutexWakeNeq(void *addr);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -16,5 +16,5 @@ void FutexWakeNeq(void *addr);
|
||||
// wake when the futex progresses past the specified target value, rather than
|
||||
// wake every time the futex is modified.
|
||||
|
||||
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size);
|
||||
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns);
|
||||
void FutexWakeGte(void *addr);
|
||||
|
||||
@ -13,10 +13,6 @@ Struct(GstatCtx)
|
||||
Atomic64Padded ArenaMemoryCommitted;
|
||||
Atomic64Padded ArenaMemoryReserved;
|
||||
|
||||
Atomic64Padded NumGpuArenas;
|
||||
Atomic64Padded DedicatedGpuArenaMemoryCommitted;
|
||||
Atomic64Padded SharedGpuArenaMemoryCommitted;
|
||||
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
@ -154,6 +154,7 @@ String StringFromFloat(Arena *arena, f64 src, u32 precision)
|
||||
if (c == '0')
|
||||
{
|
||||
result.len -= 1;
|
||||
PopBytesNoCopy(arena, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@ -48,7 +48,7 @@ Lock ExclusiveLockEx(Mutex *m, i32 spin)
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&m->v, &v, 4);
|
||||
FutexYieldNeq(&m->v, &v, 4, I64Max);
|
||||
spin_cnt = 0;
|
||||
}
|
||||
}
|
||||
@ -94,7 +94,7 @@ Lock SharedLockEx(Mutex *m, i32 spin)
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&m->v, &v, 4);
|
||||
FutexYieldNeq(&m->v, &v, 4, I64Max);
|
||||
spin_cnt = 0;
|
||||
}
|
||||
}
|
||||
@ -143,7 +143,7 @@ void YieldOnCv(Cv *cv, Lock *l)
|
||||
{
|
||||
Unlock(l);
|
||||
{
|
||||
FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen));
|
||||
FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen), I64Max);
|
||||
}
|
||||
if (exclusive)
|
||||
{
|
||||
@ -195,8 +195,37 @@ i64 YieldOnFence(Fence *fence, i64 target)
|
||||
i64 v = Atomic64Fetch(&fence->v.v);
|
||||
while (v < target)
|
||||
{
|
||||
FutexYieldGte(&fence->v.v, &v, sizeof(v));
|
||||
FutexYieldGte(&fence->v.v, &v, sizeof(v), I64Max);
|
||||
v = Atomic64Fetch(&fence->v.v);
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Lazy init
|
||||
|
||||
b32 BeginLazyInit(LazyInitBarrier *barrier)
|
||||
{
|
||||
b32 is_initializer = 0;
|
||||
Atomic32 *v = &barrier->v.v;
|
||||
if (Atomic32Fetch(v) != 2)
|
||||
{
|
||||
if (Atomic32FetchTestSet(v, 0, 1) == 0)
|
||||
{
|
||||
is_initializer = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (Atomic32Fetch(v) != 2)
|
||||
{
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
}
|
||||
return is_initializer;
|
||||
}
|
||||
|
||||
void EndLazyInit(LazyInitBarrier *barrier)
|
||||
{
|
||||
Atomic32Set(&barrier->v.v, 2);
|
||||
}
|
||||
|
||||
@ -39,6 +39,17 @@ Struct(Fence)
|
||||
Atomic64Padded v;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Lazy init types
|
||||
|
||||
Struct(LazyInitBarrier)
|
||||
{
|
||||
// 0 = untouched
|
||||
// 1 = initializing
|
||||
// 2 = initialized
|
||||
Atomic32Padded v;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Mutex
|
||||
|
||||
@ -74,3 +85,9 @@ i64 FetchSetFence(Fence *fence, i64 x);
|
||||
i64 FetchAddFence(Fence *fence, i64 x);
|
||||
|
||||
i64 YieldOnFence(Fence *fence, i64 target);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Lazy init
|
||||
|
||||
b32 BeginLazyInit(LazyInitBarrier *barrier);
|
||||
void EndLazyInit(LazyInitBarrier *barrier);
|
||||
|
||||
@ -27,7 +27,7 @@ void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count)
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen));
|
||||
FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen), I64Max);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -62,7 +62,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen));
|
||||
FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen), I64Max);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -82,7 +82,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad
|
||||
}
|
||||
else
|
||||
{
|
||||
FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen));
|
||||
FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen), I64Max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,6 +85,18 @@ b32 Panic(String msg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
Callstack CaptureCallstack(u64 skip_frames)
|
||||
{
|
||||
Callstack result;
|
||||
result.count = CaptureStackBackTrace(
|
||||
1 + skip_frames,
|
||||
countof(result.frames),
|
||||
result.frames,
|
||||
0 // BackTraceHash
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
b32 IsRunningInDebugger(void)
|
||||
{
|
||||
return IsDebuggerPresent();
|
||||
|
||||
@ -43,6 +43,7 @@
|
||||
#pragma comment(lib, "synchronization")
|
||||
#pragma comment(lib, "avrt")
|
||||
#pragma comment(lib, "ws2_32.lib")
|
||||
#pragma comment(lib, "advapi32.lib")
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Embedded data iter types
|
||||
|
||||
@ -1,9 +1,21 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Not-equal futex ops
|
||||
|
||||
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size)
|
||||
void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns)
|
||||
{
|
||||
WaitOnAddress(addr, cmp, cmp_size, INFINITE);
|
||||
if (timeout_ns > 0)
|
||||
{
|
||||
DWORD timeout_ms;
|
||||
if (timeout_ns >= 3153600000000000000ull) // ~100 years
|
||||
{
|
||||
timeout_ms = INFINITE;
|
||||
}
|
||||
else
|
||||
{
|
||||
timeout_ms = (DWORD)(SecondsFromNs(timeout_ns) * 1000.0);
|
||||
}
|
||||
WaitOnAddress(addr, cmp, cmp_size, timeout_ms);
|
||||
}
|
||||
}
|
||||
|
||||
void FutexWakeNeq(void *addr)
|
||||
@ -14,10 +26,10 @@ void FutexWakeNeq(void *addr)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Greater-than-or-equal futex ops
|
||||
|
||||
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size)
|
||||
void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns)
|
||||
{
|
||||
// TODO: Actually implement this. Just emulating via neq for now.
|
||||
FutexYieldNeq(addr, cmp, cmp_size);
|
||||
FutexYieldNeq(addr, cmp, cmp_size, timeout_ns);
|
||||
}
|
||||
|
||||
void FutexWakeGte(void *addr)
|
||||
|
||||
@ -217,9 +217,10 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Async
|
||||
|
||||
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
|
||||
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
|
||||
{
|
||||
GC_AsyncCtx *async = &GC.async_ctx;
|
||||
Arena *frame_arena = base_async_lane_frame->arena;
|
||||
|
||||
//////////////////////////////
|
||||
//- Begin tick
|
||||
@ -234,7 +235,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
|
||||
{
|
||||
// Pop cmds from submission queue
|
||||
async->cmds.count = GC.submit.count;
|
||||
async->cmds.v = PushStructsNoZero(frame->arena, GC_Cmd, GC.submit.count);
|
||||
async->cmds.v = PushStructsNoZero(frame_arena, GC_Cmd, GC.submit.count);
|
||||
u64 cmd_idx = 0;
|
||||
for (GC_CmdNode *n = GC.submit.first; n; n = n->next)
|
||||
{
|
||||
@ -267,7 +268,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame)
|
||||
GC_Glyph *glyph = cmd->glyph;
|
||||
ResourceKey resource = glyph->desc.font.r;
|
||||
GC_GlyphDesc desc = glyph->desc;
|
||||
TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame->arena, desc.codepoint, resource, desc.font_size);
|
||||
TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame_arena, desc.codepoint, resource, desc.font_size);
|
||||
glyph->font_size = desc.font_size;
|
||||
glyph->font_ascent = ttf_result.font_ascent;
|
||||
glyph->font_descent = ttf_result.font_descent;
|
||||
|
||||
@ -160,4 +160,4 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Async
|
||||
|
||||
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame);
|
||||
void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);
|
||||
|
||||
@ -505,16 +505,14 @@ Struct(G_RenderTargetDesc)
|
||||
Struct(G_Stats)
|
||||
{
|
||||
// Memory usage
|
||||
u64 local_committed;
|
||||
u64 local_budget;
|
||||
u64 non_local_committed;
|
||||
u64 non_local_budget;
|
||||
u64 device_committed;
|
||||
u64 device_budget;
|
||||
u64 host_committed;
|
||||
u64 host_budget;
|
||||
|
||||
// Resources
|
||||
u64 driver_resources_allocated;
|
||||
u64 driver_descriptors_allocated;
|
||||
|
||||
// TODO: Arena stats (committed, reserved, etc)
|
||||
// Other stats
|
||||
u64 arenas_count;
|
||||
u64 cumulative_nonreuse_count;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -526,7 +524,7 @@ void G_Bootstrap(void);
|
||||
//~ @hookdecl Arena
|
||||
|
||||
G_ArenaHandle G_AcquireArena(void);
|
||||
void G_ReleaseArena(G_ArenaHandle arena);
|
||||
void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena);
|
||||
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
@ -319,7 +319,6 @@ void G_Bootstrap(void)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
@ -334,6 +333,8 @@ void G_Bootstrap(void)
|
||||
// DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind);
|
||||
// }
|
||||
|
||||
OnAsyncTick(G_D12_TickAsync);
|
||||
|
||||
DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0);
|
||||
|
||||
EndScratch(scratch);
|
||||
@ -811,23 +812,21 @@ G_ArenaHandle G_AcquireArena(void)
|
||||
}
|
||||
gpu_arena->arena = AcquireArena(Gibi(1));
|
||||
|
||||
for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx)
|
||||
{
|
||||
gpu_arena->resource_heaps[heap_idx].kind = (G_D12_ResourceHeapKind)heap_idx;
|
||||
}
|
||||
|
||||
AddGstat(NumGpuArenas, 1);
|
||||
Atomic64FetchAdd(&G_D12.arenas_count, 1);
|
||||
|
||||
return G_D12_MakeHandle(G_ArenaHandle, gpu_arena);
|
||||
}
|
||||
|
||||
void G_ReleaseArena(G_ArenaHandle arena)
|
||||
void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena)
|
||||
{
|
||||
// TODO
|
||||
|
||||
// TODO: Unmap heaps
|
||||
// TODO: Release resources
|
||||
|
||||
// TODO: Update gstats
|
||||
|
||||
// TODO: Move this to actual release
|
||||
// Atomic64FetchAdd(&G_D12.arenas_count, -1);
|
||||
}
|
||||
|
||||
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
|
||||
@ -842,31 +841,20 @@ void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
|
||||
|
||||
void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
|
||||
{
|
||||
for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx)
|
||||
// Move resources to reset list
|
||||
if (gpu_arena->resources.first)
|
||||
{
|
||||
G_D12_ResourceHeap *heap = &gpu_arena->resource_heaps[heap_idx];
|
||||
heap->pos = 0;
|
||||
|
||||
if (heap->resources.first)
|
||||
if (gpu_arena->reset_resources.last)
|
||||
{
|
||||
for (G_D12_Resource *resource = heap->resources.first; resource; resource = resource->next)
|
||||
{
|
||||
ID3D12Resource_Release(resource->d3d_resource);
|
||||
}
|
||||
if (gpu_arena->free_resources.last)
|
||||
{
|
||||
gpu_arena->free_resources.last->next = heap->resources.first;
|
||||
gpu_arena->reset_resources.last->next = gpu_arena->resources.first;
|
||||
}
|
||||
else
|
||||
{
|
||||
gpu_arena->free_resources.first = heap->resources.first;
|
||||
}
|
||||
gpu_arena->free_resources.last = heap->resources.last;
|
||||
gpu_arena->free_resources.count += heap->resources.count;
|
||||
heap->resources.count = 0;
|
||||
heap->resources.first = 0;
|
||||
heap->resources.last = 0;
|
||||
gpu_arena->reset_resources.first = gpu_arena->resources.first;
|
||||
}
|
||||
gpu_arena->reset_resources.last = gpu_arena->resources.last;
|
||||
gpu_arena->reset_resources.count += gpu_arena->resources.count;
|
||||
ZeroStruct(&gpu_arena->resources);
|
||||
}
|
||||
|
||||
// Push descriptors to cl reset list
|
||||
@ -886,7 +874,6 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
|
||||
gpu_arena->descriptors.first = 0;
|
||||
gpu_arena->descriptors.last = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -894,10 +881,10 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena)
|
||||
|
||||
G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc)
|
||||
{
|
||||
Arena *perm = PermArena();
|
||||
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
|
||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||
G_D12_Resource *resource = 0;
|
||||
HRESULT hr = 0;
|
||||
|
||||
b32 is_buffer = desc.kind == G_ResourceKind_Buffer;
|
||||
b32 is_texture = desc.kind == G_ResourceKind_Texture1D ||
|
||||
@ -908,15 +895,16 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
is_texture ? desc.texture.flags :
|
||||
desc.sampler.flags;
|
||||
|
||||
if (is_buffer || is_texture)
|
||||
{
|
||||
//////////////////////////////
|
||||
//- Initialize heap
|
||||
//- Initialize heap info
|
||||
|
||||
G_D12_ResourceHeap *heap = 0;
|
||||
D3D12_HEAP_FLAGS heap_flags = 0;
|
||||
D3D12_HEAP_PROPERTIES heap_props = Zi;
|
||||
b32 should_map = 0;
|
||||
if (is_buffer || is_texture)
|
||||
{
|
||||
G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu;
|
||||
// Heap flags
|
||||
if (flags & G_ResourceFlag_HostMemory)
|
||||
{
|
||||
heap_kind = G_D12_ResourceHeapKind_Cpu;
|
||||
@ -925,124 +913,41 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined;
|
||||
}
|
||||
}
|
||||
heap = &gpu_arena->resource_heaps[heap_kind];
|
||||
if (heap->d3d_heap == 0)
|
||||
{
|
||||
b32 is_mappable = 0;
|
||||
|
||||
// Initialize heap
|
||||
// FIXME: Dynamic size
|
||||
if (heap->d3d_heap == 0)
|
||||
{
|
||||
// Create d3d heap
|
||||
{
|
||||
D3D12_HEAP_DESC d3d_desc = Zi;
|
||||
d3d_desc.SizeInBytes = Mebi(256);
|
||||
heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
// Heap props
|
||||
if (heap_kind == G_D12_ResourceHeapKind_Cpu)
|
||||
{
|
||||
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
||||
d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
is_mappable = 1;
|
||||
heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
|
||||
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
should_map = 1;
|
||||
}
|
||||
else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined)
|
||||
{
|
||||
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
|
||||
d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
is_mappable = 1;
|
||||
heap_props.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||
heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE;
|
||||
heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
should_map = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
}
|
||||
d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; // TODO: Remove this and support tier 1 resource heaps
|
||||
hr = ID3D12Device_CreateHeap(G_D12.device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap->d3d_heap);
|
||||
heap->size = d3d_desc.SizeInBytes;
|
||||
if (d3d_desc.Properties.Type == D3D12_HEAP_TYPE_DEFAULT)
|
||||
{
|
||||
AddGstat(DedicatedGpuArenaMemoryCommitted, heap->size);
|
||||
}
|
||||
else
|
||||
{
|
||||
AddGstat(SharedGpuArenaMemoryCommitted, heap->size);
|
||||
}
|
||||
}
|
||||
|
||||
// Map heap resource
|
||||
if (is_mappable)
|
||||
{
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
D3D12_RESOURCE_DESC1 d3d_desc = Zi;
|
||||
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
d3d_desc.Width = heap->size;
|
||||
d3d_desc.Height = 1;
|
||||
d3d_desc.DepthOrArraySize = 1;
|
||||
d3d_desc.MipLevels = 1;
|
||||
d3d_desc.SampleDesc.Count = 1;
|
||||
d3d_desc.SampleDesc.Quality = 0;
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
|
||||
|
||||
u64 alloc_size = 0;
|
||||
u64 alloc_align = 0;
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi;
|
||||
ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
|
||||
alloc_size = alloc_info.SizeInBytes;
|
||||
alloc_align = alloc_info.Alignment;
|
||||
}
|
||||
|
||||
if (alloc_size > heap->size)
|
||||
{
|
||||
Panic(Lit("Gpu heap overflow"));
|
||||
}
|
||||
|
||||
hr = ID3D12Device10_CreatePlacedResource2(
|
||||
G_D12.device,
|
||||
heap->d3d_heap,
|
||||
0,
|
||||
&d3d_desc,
|
||||
D3D12_BARRIER_LAYOUT_UNDEFINED,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
&IID_ID3D12Resource,
|
||||
(void **)&heap->d3d_mapped_resource
|
||||
);
|
||||
}
|
||||
if (SUCCEEDED(hr))
|
||||
{
|
||||
D3D12_RANGE read_range = Zi;
|
||||
hr = ID3D12Resource_Map(heap->d3d_mapped_resource, 0, &read_range, &heap->mapped);
|
||||
}
|
||||
}
|
||||
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
// TODO: Don't panic
|
||||
Panic(Lit("Failed to create D3D12 resource heap"));
|
||||
}
|
||||
}
|
||||
heap_props.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Initialize d3d resource desc
|
||||
|
||||
D3D12_BARRIER_LAYOUT initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
|
||||
D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
|
||||
D3D12_CLEAR_VALUE clear_value = Zi;
|
||||
D3D12_RESOURCE_DESC1 d3d_desc = Zi;
|
||||
{
|
||||
if (is_buffer)
|
||||
{
|
||||
u64 min_buffer_size = 1024;
|
||||
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, 1), 4);
|
||||
d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, min_buffer_size), 4);
|
||||
d3d_desc.Height = 1;
|
||||
d3d_desc.DepthOrArraySize = 1;
|
||||
d3d_desc.MipLevels = 1;
|
||||
@ -1050,10 +955,11 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
d3d_desc.SampleDesc.Quality = 0;
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite);
|
||||
}
|
||||
if (is_texture)
|
||||
else if (is_texture)
|
||||
{
|
||||
initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
|
||||
d3d_desc.Dimension = desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
||||
d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout);
|
||||
d3d_desc.Dimension =
|
||||
desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
||||
desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
@ -1074,129 +980,159 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle
|
||||
clear_value.Color[3] = desc.texture.clear_color.w,
|
||||
clear_value.Format = d3d_desc.Format;
|
||||
}
|
||||
}
|
||||
|
||||
u64 align_in_heap = 0;
|
||||
u64 size_in_heap = 0;
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi;
|
||||
ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
|
||||
align_in_heap = alloc_info.Alignment;
|
||||
size_in_heap = alloc_info.SizeInBytes;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Re-use existing resource
|
||||
//- Check for reset-resource reusability
|
||||
|
||||
// u64 pos_in_heap = 0;
|
||||
// ID3D12Resource *d3d_resource = 0;
|
||||
// {
|
||||
// resource = heap->first_reset_resource;
|
||||
// if (resource)
|
||||
// {
|
||||
// SllQueuePop(heap->first_reset_resource, heap->last_reset_resource);
|
||||
// --heap->reset_resources_count;
|
||||
// b32 can_use = 1;
|
||||
// can_use = can_use && resource->is_texture == is_texture;
|
||||
// can_use = can_use && resource->size_in_heap >= size_in_heap;
|
||||
// can_use = can_use && resource->pos_in_heap % align_in_heap == 0;
|
||||
// if (can_use)
|
||||
// {
|
||||
// d3d_resource = resource->d3d_resource;
|
||||
// pos_in_heap = resource->pos_in_heap;
|
||||
// size_in_heap = resource->size_in_heap;
|
||||
// heap->pos = resource->pos_in_heap + resource->size_in_heap;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// // FIXME: Free d3d resource here?
|
||||
// ZeroStruct(resource);
|
||||
// }
|
||||
// }
|
||||
// if (!resource)
|
||||
// {
|
||||
// resource = PushStruct(gpu_arena->arena, G_D12_Resource);
|
||||
// }
|
||||
// }
|
||||
u64 pos_in_heap = 0;
|
||||
ID3D12Resource *d3d_resource = 0;
|
||||
resource = gpu_arena->free_resources.first;
|
||||
// Pop reset resource
|
||||
resource = gpu_arena->reset_resources.first;
|
||||
b32 is_reusing = 0;
|
||||
if (resource)
|
||||
{
|
||||
SllQueuePop(gpu_arena->free_resources.first, gpu_arena->free_resources.last);
|
||||
--gpu_arena->free_resources.count;
|
||||
DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource);
|
||||
--gpu_arena->reset_resources.count;
|
||||
|
||||
D3D12_RESOURCE_DESC1 reset_d3d_desc = resource->d3d_desc;
|
||||
D3D12_RESOURCE_DESC1 compare_d3d_desc = reset_d3d_desc;
|
||||
|
||||
// Buffers can be reused if size fits
|
||||
if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER)
|
||||
{
|
||||
if (reset_d3d_desc.Width >= d3d_desc.Width)
|
||||
{
|
||||
compare_d3d_desc.Width = d3d_desc.Width;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Less stringent reset constraints. We could even create textures as placed resources and reset their underlying heaps.
|
||||
is_reusing = MatchStruct(&compare_d3d_desc, &d3d_desc);
|
||||
if (!is_reusing)
|
||||
{
|
||||
// Push releasable to command list
|
||||
{
|
||||
G_D12_Releasable *release = 0;
|
||||
{
|
||||
Lock lock = LockE(&G_D12.free_releases_mutex);
|
||||
{
|
||||
release = G_D12.free_releases.first;
|
||||
if (release)
|
||||
{
|
||||
SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last);
|
||||
}
|
||||
else
|
||||
{
|
||||
release = PushStructNoZero(perm, G_D12_Releasable);
|
||||
}
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
ZeroStruct(release);
|
||||
release->d3d_resource = resource->d3d_resource;
|
||||
SllQueuePush(cl->releases.first, cl->releases.last, release);
|
||||
}
|
||||
ZeroStruct(resource);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
resource = PushStruct(gpu_arena->arena, G_D12_Resource);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Create new d3d resource
|
||||
|
||||
if (!resource->d3d_resource)
|
||||
if (!is_reusing)
|
||||
{
|
||||
pos_in_heap = heap->pos;
|
||||
pos_in_heap = AlignU64(pos_in_heap, align_in_heap);
|
||||
heap->pos = pos_in_heap + size_in_heap;
|
||||
if (pos_in_heap + size_in_heap > heap->size)
|
||||
{
|
||||
Panic(Lit("Gpu arena overflow"));
|
||||
}
|
||||
hr = ID3D12Device10_CreatePlacedResource2(
|
||||
G_D12.device,
|
||||
heap->d3d_heap,
|
||||
pos_in_heap,
|
||||
&d3d_desc,
|
||||
initial_layout,
|
||||
(d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
|
||||
0,
|
||||
0,
|
||||
&IID_ID3D12Resource,
|
||||
(void **)&d3d_resource
|
||||
);
|
||||
resource->d3d_desc = d3d_desc;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Insert resource
|
||||
//- Init resource
|
||||
|
||||
resource->heap = heap;
|
||||
resource->pos_in_heap = pos_in_heap;
|
||||
resource->size_in_heap = size_in_heap;
|
||||
resource->d3d_resource = d3d_resource;
|
||||
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
|
||||
resource->flags = flags;
|
||||
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
|
||||
|
||||
if (is_buffer)
|
||||
{
|
||||
resource->buffer_size = desc.buffer.size;
|
||||
resource->buffer_size_actual = d3d_desc.Width;
|
||||
// TODO: Cache this
|
||||
resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
|
||||
}
|
||||
|
||||
if (is_texture)
|
||||
{
|
||||
resource->is_texture = 1;
|
||||
resource->is_texture = is_texture;
|
||||
resource->texture_format = desc.texture.format;
|
||||
resource->texture_dims = desc.texture.dims;
|
||||
resource->texture_mip_levels = d3d_desc.MipLevels;
|
||||
resource->texture_layout = initial_layout;
|
||||
}
|
||||
|
||||
SllQueuePush(heap->resources.first, heap->resources.last, resource);
|
||||
++heap->resources.count;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Create sampler
|
||||
|
||||
if (is_sampler)
|
||||
{
|
||||
resource = PushStruct(gpu_arena->arena, G_D12_Resource);
|
||||
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
|
||||
resource->sampler_desc = desc.sampler;
|
||||
}
|
||||
|
||||
DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource);
|
||||
++gpu_arena->resources.count;
|
||||
|
||||
//////////////////////////////
|
||||
//- Allocate D3D12 resource
|
||||
|
||||
if ((is_buffer || is_texture) && !resource->d3d_resource)
|
||||
{
|
||||
D3D12_CLEAR_VALUE *clear_value_arg = 0;
|
||||
if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
|
||||
{
|
||||
clear_value_arg = &clear_value;
|
||||
}
|
||||
HRESULT hr = ID3D12Device10_CreateCommittedResource3(
|
||||
G_D12.device,
|
||||
&heap_props,
|
||||
heap_flags,
|
||||
&resource->d3d_desc,
|
||||
d3d_initial_layout,
|
||||
clear_value_arg,
|
||||
0, // pProtectedSession
|
||||
0, // NumCastableFormats
|
||||
0, // pCastableFormats
|
||||
&IID_ID3D12Resource,
|
||||
(void **)&resource->d3d_resource
|
||||
);
|
||||
Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1);
|
||||
|
||||
if (is_texture)
|
||||
{
|
||||
resource->cmdlist_texture_layout = d3d_initial_layout;
|
||||
}
|
||||
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
// TODO: Don't panic
|
||||
Panic(Lit("Failed to allocate D3D12 resource"));
|
||||
}
|
||||
|
||||
if (is_buffer)
|
||||
{
|
||||
resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource);
|
||||
}
|
||||
}
|
||||
|
||||
if (should_map && !resource->mapped)
|
||||
{
|
||||
D3D12_RANGE read_range = Zi;
|
||||
HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped);
|
||||
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
// TODO: Don't panic
|
||||
Panic(Lit("Failed to map D3D12 resource"));
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Transition layout if reusing
|
||||
|
||||
if (is_reusing)
|
||||
{
|
||||
G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout);
|
||||
}
|
||||
|
||||
return G_D12_MakeHandle(G_ResourceHandle, resource);
|
||||
}
|
||||
|
||||
@ -1227,7 +1163,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
|
||||
if (queue_commit_completion >= descriptor->completion_queue_target)
|
||||
{
|
||||
// Descriptor no longer in use by gpu, reuse it
|
||||
SllQueuePop(descriptors->first, descriptors->last);
|
||||
DllQueueRemove(descriptors->first, descriptors->last, descriptor);
|
||||
--descriptors->count;
|
||||
index = descriptor->index;
|
||||
}
|
||||
@ -1246,7 +1182,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
|
||||
if (heap->first_free)
|
||||
{
|
||||
descriptor = heap->first_free;
|
||||
SllStackPop(heap->first_free);
|
||||
DllStackRemove(heap->first_free, descriptor);
|
||||
index = descriptor->index;
|
||||
}
|
||||
else
|
||||
@ -1270,7 +1206,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
|
||||
descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size);
|
||||
descriptor->heap = heap;
|
||||
|
||||
SllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor);
|
||||
DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor);
|
||||
++gpu_arena->descriptors.count;
|
||||
|
||||
return descriptor;
|
||||
@ -1462,8 +1398,7 @@ i32 G_CountDepth(G_ResourceHandle texture)
|
||||
void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
|
||||
{
|
||||
G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle);
|
||||
G_D12_ResourceHeap *heap = resource->heap;
|
||||
return ((u8 *)heap->mapped) + resource->pos_in_heap;
|
||||
return resource->mapped;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -1471,7 +1406,6 @@ void *G_HostPointerFromResource(G_ResourceHandle resource_handle)
|
||||
|
||||
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl)
|
||||
{
|
||||
|
||||
// Grab chunk
|
||||
G_D12_CmdChunk *chunk = cl->last_cmd_chunk;
|
||||
{
|
||||
@ -1932,12 +1866,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
{
|
||||
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
|
||||
barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
||||
layout_before = resource->texture_layout;
|
||||
layout_after = resource->texture_layout;
|
||||
layout_before = resource->cmdlist_texture_layout;
|
||||
layout_after = resource->cmdlist_texture_layout;
|
||||
if (desc.layout != G_Layout_NoChange)
|
||||
{
|
||||
layout_after = G_D12_BarrierLayoutFromLayout(desc.layout);
|
||||
resource->texture_layout = layout_after;
|
||||
resource->cmdlist_texture_layout = layout_after;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2396,7 +2330,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
n = next;
|
||||
}
|
||||
|
||||
// Attach completion info to descriptors
|
||||
// Attach completion info to reset descriptors
|
||||
for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;)
|
||||
{
|
||||
G_D12_Descriptor *next = d->next;
|
||||
@ -2405,12 +2339,41 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
d->completion_queue_kind = queue_kind;
|
||||
d->completion_queue_target = completion_target;
|
||||
G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind];
|
||||
SllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d);
|
||||
DllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d);
|
||||
++gpu_arena_reset_descriptors_list->count;
|
||||
}
|
||||
d = next;
|
||||
}
|
||||
|
||||
// Attach completion info to releasables & submit for release
|
||||
if (cl->releases.first)
|
||||
{
|
||||
// Attach completion info
|
||||
for (G_D12_Releasable *release = cl->releases.first; release; release = release->next)
|
||||
{
|
||||
release->completion_queue_kind = queue_kind;
|
||||
release->completion_queue_target = completion_target;
|
||||
}
|
||||
// Submit releass
|
||||
Lock lock = LockE(&G_D12.pending_releases_mutex);
|
||||
{
|
||||
if (G_D12.pending_releases.last)
|
||||
{
|
||||
G_D12.pending_releases.last->next = cl->releases.last;
|
||||
}
|
||||
else
|
||||
{
|
||||
G_D12.pending_releases.first = cl->releases.last;
|
||||
}
|
||||
G_D12.pending_releases.last = cl->releases.last;
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// // Attach completion info to resources
|
||||
// for (G_D12_Resource *r = cl->reset_resources.first; r;)
|
||||
// {
|
||||
@ -2421,7 +2384,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||
// r->completion_queue_kind = queue->kind;
|
||||
// r->completion_queue_target = completion_target;
|
||||
// G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources;
|
||||
// SllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r);
|
||||
// DllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r);
|
||||
// ++heap_reset_resources_list->count;
|
||||
// }
|
||||
// r = next;
|
||||
@ -2843,17 +2806,17 @@ G_Stats G_QueryStats(void)
|
||||
{
|
||||
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
|
||||
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info);
|
||||
result.local_committed = info.CurrentUsage;
|
||||
result.local_budget = info.Budget;
|
||||
result.device_committed = info.CurrentUsage;
|
||||
result.device_budget = info.Budget;
|
||||
}
|
||||
{
|
||||
DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi;
|
||||
IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info);
|
||||
result.non_local_budget = info.Budget;
|
||||
result.non_local_committed = info.CurrentUsage;
|
||||
result.host_budget = info.Budget;
|
||||
result.host_committed = info.CurrentUsage;
|
||||
}
|
||||
result.driver_resources_allocated = Atomic64Fetch(&G_D12.driver_resources_allocated);
|
||||
result.driver_descriptors_allocated = Atomic64Fetch(&G_D12.driver_descriptors_allocated);
|
||||
result.arenas_count = Atomic64Fetch(&G_D12.arenas_count);
|
||||
result.cumulative_nonreuse_count = Atomic64Fetch(&G_D12.cumulative_nonreuse_count);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -3003,15 +2966,17 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma
|
||||
Panic(Lit("Failed to retrieve swapchain buffer"));
|
||||
}
|
||||
ZeroStruct(backbuffer);
|
||||
backbuffer->d3d_resource = d3d_resource;
|
||||
backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
|
||||
backbuffer->flags = G_ResourceFlag_AllowRenderTarget;
|
||||
backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1;
|
||||
|
||||
ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc);
|
||||
backbuffer->d3d_resource = d3d_resource;
|
||||
|
||||
backbuffer->is_texture = 1;
|
||||
backbuffer->texture_format = format;
|
||||
backbuffer->texture_dims = VEC3I32(size.x, size.y, 1);
|
||||
backbuffer->texture_mip_levels = 1;
|
||||
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
|
||||
backbuffer->cmdlist_texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
|
||||
backbuffer->swapchain = swapchain;
|
||||
}
|
||||
}
|
||||
@ -3086,9 +3051,6 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
|
||||
// FIXME: Remove this
|
||||
SleepSeconds(0.100);
|
||||
|
||||
|
||||
|
||||
|
||||
// Copy print-buffers to readback
|
||||
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
|
||||
{
|
||||
@ -3279,3 +3241,83 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Async
|
||||
|
||||
void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame)
|
||||
{
|
||||
G_D12_AsyncCtx *async = &G_D12.async_ctx;
|
||||
Arena *frame_arena = base_async_lane_frame->arena;
|
||||
|
||||
// TODO: Investigate if we gain anything by going wide here (resource release might be exclusive driver-side)
|
||||
if (lane->idx == 0)
|
||||
{
|
||||
// Pop pending releases
|
||||
{
|
||||
Lock lock = LockE(&G_D12.pending_releases_mutex);
|
||||
{
|
||||
if (G_D12.pending_releases.first)
|
||||
{
|
||||
if (async->pending_releases.last)
|
||||
{
|
||||
async->pending_releases.last->next = G_D12.pending_releases.first;
|
||||
}
|
||||
else
|
||||
{
|
||||
async->pending_releases.first = G_D12.pending_releases.first;
|
||||
}
|
||||
async->pending_releases.last = G_D12.pending_releases.last;
|
||||
G_D12.pending_releases.first = 0;
|
||||
G_D12.pending_releases.last = 0;
|
||||
}
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
|
||||
// Release resources until we reach an uncompleted one
|
||||
G_D12_Releasable *release = async->pending_releases.first;
|
||||
if (release)
|
||||
{
|
||||
G_QueueCompletions completions = G_CompletionTargetsFromQueues(G_QueueMask_All);
|
||||
while (release)
|
||||
{
|
||||
G_D12_Releasable *next = release->next;
|
||||
if (completions.v[release->completion_queue_kind] >= release->completion_queue_target)
|
||||
{
|
||||
SllQueuePop(async->pending_releases.first, async->pending_releases.last);
|
||||
if (release->d3d_resource)
|
||||
{
|
||||
ID3D12Resource_Release(release->d3d_resource);
|
||||
}
|
||||
SllQueuePush(async->free_releases.first, async->free_releases.last, release);
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
release = next;
|
||||
}
|
||||
}
|
||||
|
||||
// Push releasable nodes to free list
|
||||
if (async->pending_releases.first)
|
||||
{
|
||||
Lock lock = LockE(&G_D12.free_releases_mutex);
|
||||
{
|
||||
if (G_D12.free_releases.last)
|
||||
{
|
||||
G_D12.free_releases.last->next = async->free_releases.first;
|
||||
}
|
||||
else
|
||||
{
|
||||
G_D12.free_releases.first = async->free_releases.first;;
|
||||
}
|
||||
G_D12.free_releases.last = async->free_releases.last;
|
||||
async->free_releases.first = 0;
|
||||
async->free_releases.last = 0;
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -63,26 +63,27 @@ Struct(G_D12_PipelineBin)
|
||||
Struct(G_D12_Resource)
|
||||
{
|
||||
G_D12_Resource *next;
|
||||
G_D12_Resource *prev;
|
||||
|
||||
struct G_D12_ResourceHeap *heap;
|
||||
u64 pos_in_heap;
|
||||
u64 size_in_heap;
|
||||
|
||||
ID3D12Resource *d3d_resource;
|
||||
u64 uid;
|
||||
G_ResourceFlag flags;
|
||||
u64 uid;
|
||||
|
||||
// D3D12 resource
|
||||
D3D12_RESOURCE_DESC1 d3d_desc;
|
||||
ID3D12Resource *d3d_resource;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
|
||||
void *mapped;
|
||||
|
||||
// Buffer info
|
||||
u64 buffer_size;
|
||||
u64 buffer_size_actual;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address;
|
||||
|
||||
// Texture info
|
||||
b32 is_texture;
|
||||
G_Format texture_format;
|
||||
Vec3I32 texture_dims;
|
||||
i32 texture_mip_levels;
|
||||
D3D12_BARRIER_LAYOUT texture_layout;
|
||||
D3D12_BARRIER_LAYOUT cmdlist_texture_layout;
|
||||
|
||||
// Sampler info
|
||||
G_SamplerDesc sampler_desc;
|
||||
@ -128,6 +129,7 @@ Struct(G_D12_DescriptorHeap)
|
||||
Struct(G_D12_Descriptor)
|
||||
{
|
||||
G_D12_Descriptor *next;
|
||||
G_D12_Descriptor *prev;
|
||||
|
||||
struct G_D12_Arena *gpu_arena;
|
||||
G_QueueKind completion_queue_kind;
|
||||
@ -162,23 +164,6 @@ Enum(G_D12_ResourceHeapKind)
|
||||
G_D12_ResourceHeapKind_COUNT
|
||||
};
|
||||
|
||||
Struct(G_D12_ResourceHeap)
|
||||
{
|
||||
G_D12_ResourceHeapKind kind;
|
||||
|
||||
struct GPU_D12_Arena *gpu_arena;
|
||||
|
||||
ID3D12Heap *d3d_heap;
|
||||
ID3D12Resource *d3d_mapped_resource;
|
||||
void *mapped;
|
||||
|
||||
G_D12_ResourceList resources;
|
||||
G_D12_ResourceList reset_resources;
|
||||
|
||||
u64 pos;
|
||||
u64 size;
|
||||
};
|
||||
|
||||
Struct(G_D12_Arena)
|
||||
{
|
||||
Arena *arena;
|
||||
@ -186,9 +171,9 @@ Struct(G_D12_Arena)
|
||||
G_D12_DescriptorList descriptors;
|
||||
G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT];
|
||||
|
||||
G_D12_ResourceList free_resources;
|
||||
|
||||
G_D12_ResourceHeap resource_heaps[G_D12_ResourceHeapKind_COUNT];
|
||||
G_D12_ResourceList resources;
|
||||
G_D12_ResourceList reset_resources;
|
||||
// G_D12_ResourceList free_resources;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -277,6 +262,25 @@ Struct(G_D12_RawCommandList)
|
||||
G_D12_Descriptor *rtv_clear_descriptor;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Releasable types
|
||||
|
||||
Struct(G_D12_Releasable)
|
||||
{
|
||||
G_D12_Releasable *next;
|
||||
|
||||
G_QueueKind completion_queue_kind;
|
||||
i64 completion_queue_target;
|
||||
|
||||
ID3D12Resource *d3d_resource;
|
||||
};
|
||||
|
||||
Struct(G_D12_ReleasableList)
|
||||
{
|
||||
G_D12_Releasable *first;
|
||||
G_D12_Releasable *last;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Command list types
|
||||
|
||||
@ -379,6 +383,8 @@ Struct(G_D12_CmdList)
|
||||
G_QueueKind queue_kind;
|
||||
|
||||
G_D12_DescriptorList reset_descriptors;
|
||||
G_D12_ReleasableList releases;
|
||||
|
||||
G_D12_StagingRegionNode *first_staging_region;
|
||||
G_D12_StagingRegionNode *last_staging_region;
|
||||
|
||||
@ -410,11 +416,20 @@ Struct(G_D12_Swapchain)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ State types
|
||||
|
||||
Struct(G_D12_AsyncCtx)
|
||||
{
|
||||
G_D12_ReleasableList pending_releases;
|
||||
G_D12_ReleasableList free_releases;
|
||||
};
|
||||
|
||||
Struct(G_D12_Ctx)
|
||||
{
|
||||
Atomic64Padded resource_creation_gen;
|
||||
|
||||
// Stats
|
||||
Atomic64 arenas_count;
|
||||
Atomic64 cumulative_nonreuse_count;
|
||||
|
||||
Atomic64 driver_resources_allocated;
|
||||
Atomic64 driver_descriptors_allocated;
|
||||
|
||||
@ -446,6 +461,15 @@ Struct(G_D12_Ctx)
|
||||
IDXGIFactory6 *factory;
|
||||
IDXGIAdapter3 *adapter;
|
||||
ID3D12Device10 *device;
|
||||
|
||||
// Release-queue
|
||||
Mutex pending_releases_mutex;
|
||||
Mutex free_releases_mutex;
|
||||
G_D12_ReleasableList pending_releases;
|
||||
G_D12_ReleasableList free_releases;
|
||||
|
||||
// Async
|
||||
G_D12_AsyncCtx async_ctx;
|
||||
};
|
||||
|
||||
Struct(G_D12_ThreadLocalCtx)
|
||||
@ -509,3 +533,8 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
|
||||
//~ Collection worker
|
||||
|
||||
void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Async
|
||||
|
||||
void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame);
|
||||
|
||||
@ -155,6 +155,7 @@ S_Shape S_MulXformShape(Xform xf, S_Shape shape)
|
||||
|
||||
Vec2 S_SupportPointFromShape(S_Shape shape, Vec2 dir)
|
||||
{
|
||||
// FIXME: Properly handle rounded polygons
|
||||
Vec2 result = Zi;
|
||||
Vec2 dir_norm = NormVec2(dir);
|
||||
f32 max_dot = -Inf;
|
||||
|
||||
@ -237,7 +237,8 @@ V_WidgetTheme V_GetWidgetTheme(void)
|
||||
theme.icon_font = UI_BuiltinIconFont();
|
||||
|
||||
// theme.font_size = 14;
|
||||
theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0);
|
||||
// theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0);
|
||||
theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 2);
|
||||
theme.h1 = 2.00;
|
||||
theme.h2 = 1.50;
|
||||
theme.h3 = 1.25;
|
||||
@ -1986,6 +1987,8 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
//////////////////////////////
|
||||
//- Build debug info UI
|
||||
|
||||
G_Stats gpu_stats = G_QueryStats();
|
||||
|
||||
if (frame->show_console)
|
||||
{
|
||||
UI_Key dbg_box = UI_KeyF("Debug box");
|
||||
@ -2043,9 +2046,10 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
UI_BuildLabelF("GPU:");
|
||||
UI_Pop(FontSize);
|
||||
}
|
||||
UI_BuildLabelF(" Arenas: %F", FmtSint(GetGstat(NumGpuArenas)));
|
||||
UI_BuildLabelF(" Dedicated arena memory committed: %F MiB", FmtFloat((f64)GetGstat(DedicatedGpuArenaMemoryCommitted) / 1024 / 1024));
|
||||
UI_BuildLabelF(" Shared arena memory committed: %F MiB", FmtFloat((f64)GetGstat(SharedGpuArenaMemoryCommitted) / 1024 / 1024));
|
||||
UI_BuildLabelF(" Arenas: %F", FmtUint(gpu_stats.arenas_count));
|
||||
UI_BuildLabelF(" Device memory usage: %F MiB", FmtFloat((f64)gpu_stats.device_committed / 1024 / 1024));
|
||||
UI_BuildLabelF(" Host memory usage: %F MiB", FmtFloat((f64)gpu_stats.host_committed / 1024 / 1024));
|
||||
UI_BuildLabelF(" Non-reuse tally: %F", FmtUint(gpu_stats.cumulative_nonreuse_count));
|
||||
}
|
||||
UI_BuildSpacer(UI_PIX(padding, 1), Axis_Y);
|
||||
}
|
||||
|
||||
@ -1720,8 +1720,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
//////////////////////////////
|
||||
//- Dispatch shaders
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||
|
||||
//- Clear pass
|
||||
{
|
||||
G_ClearRenderTarget(frame->cl, draw_target, VEC4(0, 0, 0, 0));
|
||||
@ -1729,8 +1727,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
|
||||
|
||||
//- Rect pass
|
||||
|
||||
G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite);
|
||||
|
||||
if (rects_count > 0)
|
||||
{
|
||||
// Render rects
|
||||
|
||||
Loading…
Reference in New Issue
Block a user