diff --git a/ppswap/pp_sim.swp.swp b/ppswap/pp_sim.swp.swp new file mode 100644 index 00000000..9d1caa5d Binary files /dev/null and b/ppswap/pp_sim.swp.swp differ diff --git a/ppswap/pp_vis.swp.swp b/ppswap/pp_vis.swp.swp new file mode 100644 index 00000000..f32a0de7 Binary files /dev/null and b/ppswap/pp_vis.swp.swp differ diff --git a/src/base/base.cgh b/src/base/base.cgh index bf5ce6f8..67759453 100644 --- a/src/base/base.cgh +++ b/src/base/base.cgh @@ -699,6 +699,17 @@ }; #endif +//////////////////////////////////////////////////////////// +//~ Debug types + +#if IsLanguageC + Struct(Callstack) + { + u64 count; + void *frames[32]; + }; +#endif + //////////////////////////////////////////////////////////// //~ Shader linkage types @@ -757,7 +768,6 @@ } #endif - //////////////////////////////////////////////////////////// //~ @hookdecl Core api @@ -765,6 +775,7 @@ StringList GetRawCommandline(void); void Echo(String msg); b32 Panic(String msg); + Callstack CaptureCallstack(u64 skip_frames); b32 IsRunningInDebugger(void); i64 TimeNs(void); void TrueRand(String buffer); diff --git a/src/base/base_async.c b/src/base/base_async.c index 830bc196..639b3099 100644 --- a/src/base/base_async.c +++ b/src/base/base_async.c @@ -51,11 +51,16 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane) { // Wait for signal { + i64 passive_timeout_ns = NsFromSeconds(0.25); + i64 now_ns = TimeNs(); + i64 passive_run_at_ns = now_ns + passive_timeout_ns; + i64 cur_signal = Atomic64Fetch(&Base.async.signal.v); - while (cur_signal <= w->last_seen_signal) + while (cur_signal <= w->last_seen_signal && (passive_run_at_ns - now_ns) > 1000000) { - FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal)); + FutexYieldNeq(&Base.async.signal.v, &cur_signal, sizeof(cur_signal), passive_run_at_ns - now_ns); cur_signal = Atomic64Fetch(&Base.async.signal.v); + now_ns = TimeNs(); } w->last_seen_signal = cur_signal; } @@ -76,6 +81,7 @@ void AsyncWorkerEntryPoint(WaveLaneCtx *lane) } } + WaveSync(lane); ////////////////////////////// diff --git a/src/base/base_async.h b/src/base/base_async.h index c93aea80..07e6f5c2 100644 --- a/src/base/base_async.h +++ b/src/base/base_async.h @@ -2,7 +2,7 @@ //~ Async types Struct(AsyncFrameLaneCtx); -typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame); +typedef void AsyncTickCallbackFunc(WaveLaneCtx *lane, AsyncFrameLaneCtx *async_lane_frame_ctx); Struct(AsyncTickCallback) { diff --git a/src/base/base_futex.h b/src/base/base_futex.h index 2d59465a..fd22e672 100644 --- a/src/base/base_futex.h +++ b/src/base/base_futex.h @@ -4,7 +4,7 @@ // Similar to Win32 WaitOnAddress & WakeByAddressAll // i.e. - Suprious wait until value at address != cmp -void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size); +void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns); void FutexWakeNeq(void *addr); //////////////////////////////////////////////////////////// @@ -16,5 +16,5 @@ void FutexWakeNeq(void *addr); // wake when the futex progresses past the specified target value, rather than // wake every time the futex is modified. -void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size); +void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns); void FutexWakeGte(void *addr); diff --git a/src/base/base_gstat.h b/src/base/base_gstat.h index 3ea6faa7..fcc6f073 100644 --- a/src/base/base_gstat.h +++ b/src/base/base_gstat.h @@ -13,10 +13,6 @@ Struct(GstatCtx) Atomic64Padded ArenaMemoryCommitted; Atomic64Padded ArenaMemoryReserved; - Atomic64Padded NumGpuArenas; - Atomic64Padded DedicatedGpuArenaMemoryCommitted; - Atomic64Padded SharedGpuArenaMemoryCommitted; - }; //////////////////////////////////////////////////////////// diff --git a/src/base/base_string.c b/src/base/base_string.c index eb6b8473..1767e24a 100644 --- a/src/base/base_string.c +++ b/src/base/base_string.c @@ -154,6 +154,7 @@ String StringFromFloat(Arena *arena, f64 src, u32 precision) if (c == '0') { result.len -= 1; + PopBytesNoCopy(arena, 1); } else { diff --git a/src/base/base_sync.c b/src/base/base_sync.c index acd5fe00..31d8d60b 100644 --- a/src/base/base_sync.c +++ b/src/base/base_sync.c @@ -48,7 +48,7 @@ Lock ExclusiveLockEx(Mutex *m, i32 spin) } else { - FutexYieldNeq(&m->v, &v, 4); + FutexYieldNeq(&m->v, &v, 4, I64Max); spin_cnt = 0; } } @@ -94,7 +94,7 @@ Lock SharedLockEx(Mutex *m, i32 spin) } else { - FutexYieldNeq(&m->v, &v, 4); + FutexYieldNeq(&m->v, &v, 4, I64Max); spin_cnt = 0; } } @@ -143,7 +143,7 @@ void YieldOnCv(Cv *cv, Lock *l) { Unlock(l); { - FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen)); + FutexYieldNeq(&cv->wake_gen, &old_wake_gen, sizeof(old_wake_gen), I64Max); } if (exclusive) { @@ -195,8 +195,37 @@ i64 YieldOnFence(Fence *fence, i64 target) i64 v = Atomic64Fetch(&fence->v.v); while (v < target) { - FutexYieldGte(&fence->v.v, &v, sizeof(v)); + FutexYieldGte(&fence->v.v, &v, sizeof(v), I64Max); v = Atomic64Fetch(&fence->v.v); } return v; } + +//////////////////////////////////////////////////////////// +//~ Lazy init + +b32 BeginLazyInit(LazyInitBarrier *barrier) +{ + b32 is_initializer = 0; + Atomic32 *v = &barrier->v.v; + if (Atomic32Fetch(v) != 2) + { + if (Atomic32FetchTestSet(v, 0, 1) == 0) + { + is_initializer = 1; + } + else + { + while (Atomic32Fetch(v) != 2) + { + _mm_pause(); + } + } + } + return is_initializer; +} + +void EndLazyInit(LazyInitBarrier *barrier) +{ + Atomic32Set(&barrier->v.v, 2); +} diff --git a/src/base/base_sync.h b/src/base/base_sync.h index ff685453..91f4ec4d 100644 --- a/src/base/base_sync.h +++ b/src/base/base_sync.h @@ -39,6 +39,17 @@ Struct(Fence) Atomic64Padded v; }; +//////////////////////////////////////////////////////////// +//~ Lazy init types + +Struct(LazyInitBarrier) +{ + // 0 = untouched + // 1 = initializing + // 2 = initialized + Atomic32Padded v; +}; + //////////////////////////////////////////////////////////// //~ Mutex @@ -74,3 +85,9 @@ i64 FetchSetFence(Fence *fence, i64 x); i64 FetchAddFence(Fence *fence, i64 x); i64 YieldOnFence(Fence *fence, i64 target); + +//////////////////////////////////////////////////////////// +//~ Lazy init + +b32 BeginLazyInit(LazyInitBarrier *barrier); +void EndLazyInit(LazyInitBarrier *barrier); diff --git a/src/base/base_wave.c b/src/base/base_wave.c index b99c93c6..0f460a16 100644 --- a/src/base/base_wave.c +++ b/src/base/base_wave.c @@ -27,7 +27,7 @@ void WaveSyncEx(WaveLaneCtx *lane, u64 spin_count) } else { - FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen)); + FutexYieldNeq(&wave->sync_gen.v, &sync_gen, sizeof(sync_gen), I64Max); } } } @@ -62,7 +62,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad } else { - FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen)); + FutexYieldNeq(&wave->ack_gen.v, &ack_gen, sizeof(ack_gen), I64Max); } } } @@ -82,7 +82,7 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad } else { - FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen)); + FutexYieldNeq(&wave->broadcast_gen.v, &seen_broadcast_gen, sizeof(seen_broadcast_gen), I64Max); } } } diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index ca31ffb7..4c98354d 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -85,6 +85,18 @@ b32 Panic(String msg) return 0; } +Callstack CaptureCallstack(u64 skip_frames) +{ + Callstack result; + result.count = CaptureStackBackTrace( + 1 + skip_frames, + countof(result.frames), + result.frames, + 0 // BackTraceHash + ); + return result; +} + b32 IsRunningInDebugger(void) { return IsDebuggerPresent(); diff --git a/src/base/base_win32/base_win32.h b/src/base/base_win32/base_win32.h index 29d91cd6..f996b8fc 100644 --- a/src/base/base_win32/base_win32.h +++ b/src/base/base_win32/base_win32.h @@ -43,6 +43,7 @@ #pragma comment(lib, "synchronization") #pragma comment(lib, "avrt") #pragma comment(lib, "ws2_32.lib") +#pragma comment(lib, "advapi32.lib") //////////////////////////////////////////////////////////// //~ Embedded data iter types diff --git a/src/base/base_win32/base_win32_futex.c b/src/base/base_win32/base_win32_futex.c index 57407f17..90ef6add 100644 --- a/src/base/base_win32/base_win32_futex.c +++ b/src/base/base_win32/base_win32_futex.c @@ -1,9 +1,21 @@ //////////////////////////////////////////////////////////// //~ @hookimpl Not-equal futex ops -void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size) +void FutexYieldNeq(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns) { - WaitOnAddress(addr, cmp, cmp_size, INFINITE); + if (timeout_ns > 0) + { + DWORD timeout_ms; + if (timeout_ns >= 3153600000000000000ull) // ~100 years + { + timeout_ms = INFINITE; + } + else + { + timeout_ms = (DWORD)(SecondsFromNs(timeout_ns) * 1000.0); + } + WaitOnAddress(addr, cmp, cmp_size, timeout_ms); + } } void FutexWakeNeq(void *addr) @@ -14,10 +26,10 @@ void FutexWakeNeq(void *addr) //////////////////////////////////////////////////////////// //~ @hookimpl Greater-than-or-equal futex ops -void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size) +void FutexYieldGte(volatile void *addr, void *cmp, u8 cmp_size, i64 timeout_ns) { // TODO: Actually implement this. Just emulating via neq for now. - FutexYieldNeq(addr, cmp, cmp_size); + FutexYieldNeq(addr, cmp, cmp_size, timeout_ns); } void FutexWakeGte(void *addr) diff --git a/src/glyph_cache/glyph_cache.c b/src/glyph_cache/glyph_cache.c index 180ead43..9d8af52c 100644 --- a/src/glyph_cache/glyph_cache.c +++ b/src/glyph_cache/glyph_cache.c @@ -217,9 +217,10 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon //////////////////////////////////////////////////////////// //~ Async -void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame) +void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) { GC_AsyncCtx *async = &GC.async_ctx; + Arena *frame_arena = base_async_lane_frame->arena; ////////////////////////////// //- Begin tick @@ -234,7 +235,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame) { // Pop cmds from submission queue async->cmds.count = GC.submit.count; - async->cmds.v = PushStructsNoZero(frame->arena, GC_Cmd, GC.submit.count); + async->cmds.v = PushStructsNoZero(frame_arena, GC_Cmd, GC.submit.count); u64 cmd_idx = 0; for (GC_CmdNode *n = GC.submit.first; n; n = n->next) { @@ -267,7 +268,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame) GC_Glyph *glyph = cmd->glyph; ResourceKey resource = glyph->desc.font.r; GC_GlyphDesc desc = glyph->desc; - TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame->arena, desc.codepoint, resource, desc.font_size); + TTF_GlyphResult ttf_result = TTF_RasterizeGlyphFromCodepoint(frame_arena, desc.codepoint, resource, desc.font_size); glyph->font_size = desc.font_size; glyph->font_ascent = ttf_result.font_ascent; glyph->font_descent = ttf_result.font_descent; diff --git a/src/glyph_cache/glyph_cache.h b/src/glyph_cache/glyph_cache.h index edaeccb1..d39f3d05 100644 --- a/src/glyph_cache/glyph_cache.h +++ b/src/glyph_cache/glyph_cache.h @@ -160,4 +160,4 @@ GC_Run GC_RunFromString32(Arena *arena, String32 str32, GC_FontKey font, f32 fon //////////////////////////////////////////////////////////// //~ Async -void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *frame); +void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 0411155e..5cc5ca06 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -505,16 +505,14 @@ Struct(G_RenderTargetDesc) Struct(G_Stats) { // Memory usage - u64 local_committed; - u64 local_budget; - u64 non_local_committed; - u64 non_local_budget; + u64 device_committed; + u64 device_budget; + u64 host_committed; + u64 host_budget; - // Resources - u64 driver_resources_allocated; - u64 driver_descriptors_allocated; - - // TODO: Arena stats (committed, reserved, etc) + // Other stats + u64 arenas_count; + u64 cumulative_nonreuse_count; }; //////////////////////////////////////////////////////////// @@ -526,7 +524,7 @@ void G_Bootstrap(void); //~ @hookdecl Arena G_ArenaHandle G_AcquireArena(void); -void G_ReleaseArena(G_ArenaHandle arena); +void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena); void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index fa9867b8..d28d3468 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -319,7 +319,6 @@ void G_Bootstrap(void) } } } - } ////////////////////////////// @@ -334,6 +333,8 @@ void G_Bootstrap(void) // DispatchWave(name, 1, G_D12_WorkerEntry, (void *)(u64)kind); // } + OnAsyncTick(G_D12_TickAsync); + DispatchWave(Lit("Gpu collection worker"), 1, G_D12_CollectionWorkerEntryPoint, 0); EndScratch(scratch); @@ -811,23 +812,21 @@ G_ArenaHandle G_AcquireArena(void) } gpu_arena->arena = AcquireArena(Gibi(1)); - for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx) - { - gpu_arena->resource_heaps[heap_idx].kind = (G_D12_ResourceHeapKind)heap_idx; - } - - AddGstat(NumGpuArenas, 1); + Atomic64FetchAdd(&G_D12.arenas_count, 1); return G_D12_MakeHandle(G_ArenaHandle, gpu_arena); } -void G_ReleaseArena(G_ArenaHandle arena) +void G_ReleaseArena(G_CommandListHandle cl_handle, G_ArenaHandle arena) { // TODO - // TODO: Unmap heaps + // TODO: Release resources // TODO: Update gstats + + // TODO: Move this to actual release + // Atomic64FetchAdd(&G_D12.arenas_count, -1); } void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) @@ -842,31 +841,20 @@ void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) { - for (u64 heap_idx = 0; heap_idx < countof(gpu_arena->resource_heaps); ++heap_idx) + // Move resources to reset list + if (gpu_arena->resources.first) { - G_D12_ResourceHeap *heap = &gpu_arena->resource_heaps[heap_idx]; - heap->pos = 0; - - if (heap->resources.first) + if (gpu_arena->reset_resources.last) { - for (G_D12_Resource *resource = heap->resources.first; resource; resource = resource->next) - { - ID3D12Resource_Release(resource->d3d_resource); - } - if (gpu_arena->free_resources.last) - { - gpu_arena->free_resources.last->next = heap->resources.first; - } - else - { - gpu_arena->free_resources.first = heap->resources.first; - } - gpu_arena->free_resources.last = heap->resources.last; - gpu_arena->free_resources.count += heap->resources.count; - heap->resources.count = 0; - heap->resources.first = 0; - heap->resources.last = 0; + gpu_arena->reset_resources.last->next = gpu_arena->resources.first; } + else + { + gpu_arena->reset_resources.first = gpu_arena->resources.first; + } + gpu_arena->reset_resources.last = gpu_arena->resources.last; + gpu_arena->reset_resources.count += gpu_arena->resources.count; + ZeroStruct(&gpu_arena->resources); } // Push descriptors to cl reset list @@ -886,7 +874,6 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) gpu_arena->descriptors.first = 0; gpu_arena->descriptors.last = 0; } - } //////////////////////////////////////////////////////////// @@ -894,10 +881,10 @@ void G_D12_ResetArena(G_D12_CmdList *cl, G_D12_Arena *gpu_arena) G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle cl_handle, G_ResourceDesc desc) { + Arena *perm = PermArena(); G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Resource *resource = 0; - HRESULT hr = 0; b32 is_buffer = desc.kind == G_ResourceKind_Buffer; b32 is_texture = desc.kind == G_ResourceKind_Texture1D || @@ -908,295 +895,244 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle is_texture ? desc.texture.flags : desc.sampler.flags; + ////////////////////////////// + //- Initialize heap info + + D3D12_HEAP_FLAGS heap_flags = 0; + D3D12_HEAP_PROPERTIES heap_props = Zi; + b32 should_map = 0; if (is_buffer || is_texture) { - ////////////////////////////// - //- Initialize heap - - G_D12_ResourceHeap *heap = 0; - if (is_buffer || is_texture) + G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; + // Heap flags + if (flags & G_ResourceFlag_HostMemory) { - G_D12_ResourceHeapKind heap_kind = G_D12_ResourceHeapKind_Gpu; - if (flags & G_ResourceFlag_HostMemory) + heap_kind = G_D12_ResourceHeapKind_Cpu; + if (flags & G_ResourceFlag_Uncached) { - heap_kind = G_D12_ResourceHeapKind_Cpu; - if (flags & G_ResourceFlag_Uncached) - { - heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; - } - } - heap = &gpu_arena->resource_heaps[heap_kind]; - if (heap->d3d_heap == 0) - { - b32 is_mappable = 0; - - // Initialize heap - // FIXME: Dynamic size - if (heap->d3d_heap == 0) - { - // Create d3d heap - { - D3D12_HEAP_DESC d3d_desc = Zi; - d3d_desc.SizeInBytes = Mebi(256); - if (heap_kind == G_D12_ResourceHeapKind_Cpu) - { - d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; - d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; - d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; - is_mappable = 1; - } - else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined) - { - d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; - d3d_desc.Properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; - d3d_desc.Properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; - is_mappable = 1; - } - else - { - d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT; - } - d3d_desc.Flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; - d3d_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; // TODO: Remove this and support tier 1 resource heaps - hr = ID3D12Device_CreateHeap(G_D12.device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap->d3d_heap); - heap->size = d3d_desc.SizeInBytes; - if (d3d_desc.Properties.Type == D3D12_HEAP_TYPE_DEFAULT) - { - AddGstat(DedicatedGpuArenaMemoryCommitted, heap->size); - } - else - { - AddGstat(SharedGpuArenaMemoryCommitted, heap->size); - } - } - - // Map heap resource - if (is_mappable) - { - if (SUCCEEDED(hr)) - { - D3D12_RESOURCE_DESC1 d3d_desc = Zi; - d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - d3d_desc.Format = DXGI_FORMAT_UNKNOWN; - d3d_desc.Width = heap->size; - d3d_desc.Height = 1; - d3d_desc.DepthOrArraySize = 1; - d3d_desc.MipLevels = 1; - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE; - - u64 alloc_size = 0; - u64 alloc_align = 0; - { - D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi; - ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); - alloc_size = alloc_info.SizeInBytes; - alloc_align = alloc_info.Alignment; - } - - if (alloc_size > heap->size) - { - Panic(Lit("Gpu heap overflow")); - } - - hr = ID3D12Device10_CreatePlacedResource2( - G_D12.device, - heap->d3d_heap, - 0, - &d3d_desc, - D3D12_BARRIER_LAYOUT_UNDEFINED, - 0, - 0, - 0, - &IID_ID3D12Resource, - (void **)&heap->d3d_mapped_resource - ); - } - if (SUCCEEDED(hr)) - { - D3D12_RANGE read_range = Zi; - hr = ID3D12Resource_Map(heap->d3d_mapped_resource, 0, &read_range, &heap->mapped); - } - } - - if (!SUCCEEDED(hr)) - { - // TODO: Don't panic - Panic(Lit("Failed to create D3D12 resource heap")); - } - } + heap_kind = G_D12_ResourceHeapKind_CpuWriteCombined; } } - - ////////////////////////////// - //- Initialize d3d resource desc - - D3D12_BARRIER_LAYOUT initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; - D3D12_CLEAR_VALUE clear_value = Zi; - D3D12_RESOURCE_DESC1 d3d_desc = Zi; + heap_flags |= D3D12_HEAP_FLAG_CREATE_NOT_ZEROED; + // Heap props + if (heap_kind == G_D12_ResourceHeapKind_Cpu) { - if (is_buffer) - { - d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - d3d_desc.Format = DXGI_FORMAT_UNKNOWN; - d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, 1), 4); - d3d_desc.Height = 1; - d3d_desc.DepthOrArraySize = 1; - d3d_desc.MipLevels = 1; - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); - } - if (is_texture) - { - initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); - d3d_desc.Dimension = desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : - desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : - D3D12_RESOURCE_DIMENSION_TEXTURE3D; - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; - d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format); - d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); - d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); - d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); - d3d_desc.MipLevels = MaxI32(desc.texture.mip_levels, 1); - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); - clear_value.Color[0] = desc.texture.clear_color.x, - clear_value.Color[1] = desc.texture.clear_color.y, - clear_value.Color[2] = desc.texture.clear_color.z, - clear_value.Color[3] = desc.texture.clear_color.w, - clear_value.Format = d3d_desc.Format; - } + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; } - - u64 align_in_heap = 0; - u64 size_in_heap = 0; + else if (heap_kind == G_D12_ResourceHeapKind_CpuWriteCombined) { - D3D12_RESOURCE_ALLOCATION_INFO alloc_info = Zi; - ID3D12Device_GetResourceAllocationInfo(G_D12.device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc); - align_in_heap = alloc_info.Alignment; - size_in_heap = alloc_info.SizeInBytes; - } - - ////////////////////////////// - //- Re-use existing resource - - // u64 pos_in_heap = 0; - // ID3D12Resource *d3d_resource = 0; - // { - // resource = heap->first_reset_resource; - // if (resource) - // { - // SllQueuePop(heap->first_reset_resource, heap->last_reset_resource); - // --heap->reset_resources_count; - // b32 can_use = 1; - // can_use = can_use && resource->is_texture == is_texture; - // can_use = can_use && resource->size_in_heap >= size_in_heap; - // can_use = can_use && resource->pos_in_heap % align_in_heap == 0; - // if (can_use) - // { - // d3d_resource = resource->d3d_resource; - // pos_in_heap = resource->pos_in_heap; - // size_in_heap = resource->size_in_heap; - // heap->pos = resource->pos_in_heap + resource->size_in_heap; - // } - // else - // { - // // FIXME: Free d3d resource here? - // ZeroStruct(resource); - // } - // } - // if (!resource) - // { - // resource = PushStruct(gpu_arena->arena, G_D12_Resource); - // } - // } - u64 pos_in_heap = 0; - ID3D12Resource *d3d_resource = 0; - resource = gpu_arena->free_resources.first; - if (resource) - { - SllQueuePop(gpu_arena->free_resources.first, gpu_arena->free_resources.last); - --gpu_arena->free_resources.count; - ZeroStruct(resource); + heap_props.Type = D3D12_HEAP_TYPE_CUSTOM; + heap_props.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE; + heap_props.MemoryPoolPreference = D3D12_MEMORY_POOL_L0; + should_map = 1; } else { - resource = PushStruct(gpu_arena->arena, G_D12_Resource); + heap_props.Type = D3D12_HEAP_TYPE_DEFAULT; } - - ////////////////////////////// - //- Create new d3d resource - - if (!resource->d3d_resource) - { - pos_in_heap = heap->pos; - pos_in_heap = AlignU64(pos_in_heap, align_in_heap); - heap->pos = pos_in_heap + size_in_heap; - if (pos_in_heap + size_in_heap > heap->size) - { - Panic(Lit("Gpu arena overflow")); - } - hr = ID3D12Device10_CreatePlacedResource2( - G_D12.device, - heap->d3d_heap, - pos_in_heap, - &d3d_desc, - initial_layout, - (d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0, - 0, - 0, - &IID_ID3D12Resource, - (void **)&d3d_resource - ); - } - - ////////////////////////////// - //- Insert resource - - resource->heap = heap; - resource->pos_in_heap = pos_in_heap; - resource->size_in_heap = size_in_heap; - resource->d3d_resource = d3d_resource; - resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; - resource->flags = flags; - if (is_buffer) - { - resource->buffer_size = desc.buffer.size; - resource->buffer_size_actual = d3d_desc.Width; - // TODO: Cache this - resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); - } - if (is_texture) - { - resource->is_texture = 1; - resource->texture_format = desc.texture.format; - resource->texture_dims = desc.texture.dims; - resource->texture_mip_levels = d3d_desc.MipLevels; - resource->texture_layout = initial_layout; - } - - SllQueuePush(heap->resources.first, heap->resources.last, resource); - ++heap->resources.count; } ////////////////////////////// - //- Create sampler + //- Initialize d3d resource desc + + D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + D3D12_CLEAR_VALUE clear_value = Zi; + D3D12_RESOURCE_DESC1 d3d_desc = Zi; + if (is_buffer) + { + u64 min_buffer_size = 1024; + d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + d3d_desc.Format = DXGI_FORMAT_UNKNOWN; + d3d_desc.Width = AlignU64(MaxU64(desc.buffer.size, min_buffer_size), 4); + d3d_desc.Height = 1; + d3d_desc.DepthOrArraySize = 1; + d3d_desc.MipLevels = 1; + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); + } + else if (is_texture) + { + d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); + d3d_desc.Dimension = + desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : + desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : + D3D12_RESOURCE_DIMENSION_TEXTURE3D; + d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + d3d_desc.Format = G_D12_DxgiFormatFromGpuFormat(desc.texture.format); + d3d_desc.Width = MaxI32(desc.texture.dims.x, 1); + d3d_desc.Height = MaxI32(desc.texture.dims.y, 1); + d3d_desc.DepthOrArraySize = MaxI32(desc.texture.dims.z, 1); + d3d_desc.MipLevels = MaxI32(desc.texture.mip_levels, 1); + d3d_desc.SampleDesc.Count = 1; + d3d_desc.SampleDesc.Quality = 0; + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); + clear_value.Color[0] = desc.texture.clear_color.x, + clear_value.Color[1] = desc.texture.clear_color.y, + clear_value.Color[2] = desc.texture.clear_color.z, + clear_value.Color[3] = desc.texture.clear_color.w, + clear_value.Format = d3d_desc.Format; + } + + ////////////////////////////// + //- Check for reset-resource reusability + + // Pop reset resource + resource = gpu_arena->reset_resources.first; + b32 is_reusing = 0; + if (resource) + { + DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource); + --gpu_arena->reset_resources.count; + + D3D12_RESOURCE_DESC1 reset_d3d_desc = resource->d3d_desc; + D3D12_RESOURCE_DESC1 compare_d3d_desc = reset_d3d_desc; + + // Buffers can be reused if size fits + if (d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && reset_d3d_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + { + if (reset_d3d_desc.Width >= d3d_desc.Width) + { + compare_d3d_desc.Width = d3d_desc.Width; + } + } + + // TODO: Less stringent reset constraints. We could even create textures as placed resources and reset their underlying heaps. + is_reusing = MatchStruct(&compare_d3d_desc, &d3d_desc); + if (!is_reusing) + { + // Push releasable to command list + { + G_D12_Releasable *release = 0; + { + Lock lock = LockE(&G_D12.free_releases_mutex); + { + release = G_D12.free_releases.first; + if (release) + { + SllQueuePop(G_D12.free_releases.first, G_D12.free_releases.last); + } + else + { + release = PushStructNoZero(perm, G_D12_Releasable); + } + } + Unlock(&lock); + } + ZeroStruct(release); + release->d3d_resource = resource->d3d_resource; + SllQueuePush(cl->releases.first, cl->releases.last, release); + } + ZeroStruct(resource); + } + } + else + { + resource = PushStruct(gpu_arena->arena, G_D12_Resource); + } + + if (!is_reusing) + { + resource->d3d_desc = d3d_desc; + } + + ////////////////////////////// + //- Init resource + + resource->flags = flags; + resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; + + if (is_buffer) + { + resource->buffer_size = desc.buffer.size; + resource->buffer_size_actual = d3d_desc.Width; + } + + if (is_texture) + { + resource->is_texture = is_texture; + resource->texture_format = desc.texture.format; + resource->texture_dims = desc.texture.dims; + resource->texture_mip_levels = d3d_desc.MipLevels; + } if (is_sampler) { - resource = PushStruct(gpu_arena->arena, G_D12_Resource); - resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; resource->sampler_desc = desc.sampler; } + DllQueuePush(gpu_arena->resources.first, gpu_arena->resources.last, resource); + ++gpu_arena->resources.count; + + ////////////////////////////// + //- Allocate D3D12 resource + + if ((is_buffer || is_texture) && !resource->d3d_resource) + { + D3D12_CLEAR_VALUE *clear_value_arg = 0; + if (d3d_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) + { + clear_value_arg = &clear_value; + } + HRESULT hr = ID3D12Device10_CreateCommittedResource3( + G_D12.device, + &heap_props, + heap_flags, + &resource->d3d_desc, + d3d_initial_layout, + clear_value_arg, + 0, // pProtectedSession + 0, // NumCastableFormats + 0, // pCastableFormats + &IID_ID3D12Resource, + (void **)&resource->d3d_resource + ); + Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); + + if (is_texture) + { + resource->cmdlist_texture_layout = d3d_initial_layout; + } + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to allocate D3D12 resource")); + } + + if (is_buffer) + { + resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(resource->d3d_resource); + } + } + + if (should_map && !resource->mapped) + { + D3D12_RANGE read_range = Zi; + HRESULT hr = ID3D12Resource_Map(resource->d3d_resource, 0, &read_range, &resource->mapped); + + if (!SUCCEEDED(hr)) + { + // TODO: Don't panic + Panic(Lit("Failed to map D3D12 resource")); + } + } + + ////////////////////////////// + //- Transition layout if reusing + + if (is_reusing) + { + G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout); + } + return G_D12_MakeHandle(G_ResourceHandle, resource); } @@ -1227,7 +1163,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH if (queue_commit_completion >= descriptor->completion_queue_target) { // Descriptor no longer in use by gpu, reuse it - SllQueuePop(descriptors->first, descriptors->last); + DllQueueRemove(descriptors->first, descriptors->last, descriptor); --descriptors->count; index = descriptor->index; } @@ -1246,7 +1182,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH if (heap->first_free) { descriptor = heap->first_free; - SllStackPop(heap->first_free); + DllStackRemove(heap->first_free, descriptor); index = descriptor->index; } else @@ -1270,7 +1206,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH descriptor->handle.ptr = heap->start_handle.ptr + (index * heap->descriptor_size); descriptor->heap = heap; - SllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); + DllQueuePush(gpu_arena->descriptors.first, gpu_arena->descriptors.last, descriptor); ++gpu_arena->descriptors.count; return descriptor; @@ -1462,8 +1398,7 @@ i32 G_CountDepth(G_ResourceHandle texture) void *G_HostPointerFromResource(G_ResourceHandle resource_handle) { G_D12_Resource *resource = G_D12_ResourceFromHandle(resource_handle); - G_D12_ResourceHeap *heap = resource->heap; - return ((u8 *)heap->mapped) + resource->pos_in_heap; + return resource->mapped; } //////////////////////////////////////////////////////////// @@ -1471,7 +1406,6 @@ void *G_HostPointerFromResource(G_ResourceHandle resource_handle) G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl) { - // Grab chunk G_D12_CmdChunk *chunk = cl->last_cmd_chunk; { @@ -1932,12 +1866,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; - layout_before = resource->texture_layout; - layout_after = resource->texture_layout; + layout_before = resource->cmdlist_texture_layout; + layout_after = resource->cmdlist_texture_layout; if (desc.layout != G_Layout_NoChange) { layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); - resource->texture_layout = layout_after; + resource->cmdlist_texture_layout = layout_after; } } @@ -2396,7 +2330,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) n = next; } - // Attach completion info to descriptors + // Attach completion info to reset descriptors for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) { G_D12_Descriptor *next = d->next; @@ -2405,12 +2339,41 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) d->completion_queue_kind = queue_kind; d->completion_queue_target = completion_target; G_D12_DescriptorList *gpu_arena_reset_descriptors_list = &gpu_arena->reset_descriptors_by_heap[d->heap->kind]; - SllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d); + DllQueuePush(gpu_arena_reset_descriptors_list->first, gpu_arena_reset_descriptors_list->last, d); ++gpu_arena_reset_descriptors_list->count; } d = next; } + // Attach completion info to releasables & submit for release + if (cl->releases.first) + { + // Attach completion info + for (G_D12_Releasable *release = cl->releases.first; release; release = release->next) + { + release->completion_queue_kind = queue_kind; + release->completion_queue_target = completion_target; + } + // Submit releass + Lock lock = LockE(&G_D12.pending_releases_mutex); + { + if (G_D12.pending_releases.last) + { + G_D12.pending_releases.last->next = cl->releases.last; + } + else + { + G_D12.pending_releases.first = cl->releases.last; + } + G_D12.pending_releases.last = cl->releases.last; + } + Unlock(&lock); + } + + + + + // // Attach completion info to resources // for (G_D12_Resource *r = cl->reset_resources.first; r;) // { @@ -2421,7 +2384,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) // r->completion_queue_kind = queue->kind; // r->completion_queue_target = completion_target; // G_D12_ResourceList *heap_reset_resources_list = &heap->reset_resources; - // SllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r); + // DllQueuePush(heap_reset_resources_list->first, heap_reset_resourecs_list->last, r); // ++heap_reset_resources_list->count; // } // r = next; @@ -2843,17 +2806,17 @@ G_Stats G_QueryStats(void) { DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_LOCAL, &info); - result.local_committed = info.CurrentUsage; - result.local_budget = info.Budget; + result.device_committed = info.CurrentUsage; + result.device_budget = info.Budget; } { DXGI_QUERY_VIDEO_MEMORY_INFO info = Zi; IDXGIAdapter3_QueryVideoMemoryInfo(G_D12.adapter, 0, DXGI_MEMORY_SEGMENT_GROUP_NON_LOCAL, &info); - result.non_local_budget = info.Budget; - result.non_local_committed = info.CurrentUsage; + result.host_budget = info.Budget; + result.host_committed = info.CurrentUsage; } - result.driver_resources_allocated = Atomic64Fetch(&G_D12.driver_resources_allocated); - result.driver_descriptors_allocated = Atomic64Fetch(&G_D12.driver_descriptors_allocated); + result.arenas_count = Atomic64Fetch(&G_D12.arenas_count); + result.cumulative_nonreuse_count = Atomic64Fetch(&G_D12.cumulative_nonreuse_count); return result; } @@ -3003,15 +2966,17 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma Panic(Lit("Failed to retrieve swapchain buffer")); } ZeroStruct(backbuffer); - backbuffer->d3d_resource = d3d_resource; - backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; backbuffer->flags = G_ResourceFlag_AllowRenderTarget; + backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; + + ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc); + backbuffer->d3d_resource = d3d_resource; backbuffer->is_texture = 1; backbuffer->texture_format = format; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mip_levels = 1; - backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; + backbuffer->cmdlist_texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; } } @@ -3086,9 +3051,6 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) // FIXME: Remove this SleepSeconds(0.100); - - - // Copy print-buffers to readback for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind) { @@ -3279,3 +3241,83 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) } } } + +//////////////////////////////////////////////////////////// +//~ Async + +void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) +{ + G_D12_AsyncCtx *async = &G_D12.async_ctx; + Arena *frame_arena = base_async_lane_frame->arena; + + // TODO: Investigate if we gain anything by going wide here (resource release might be exclusive driver-side) + if (lane->idx == 0) + { + // Pop pending releases + { + Lock lock = LockE(&G_D12.pending_releases_mutex); + { + if (G_D12.pending_releases.first) + { + if (async->pending_releases.last) + { + async->pending_releases.last->next = G_D12.pending_releases.first; + } + else + { + async->pending_releases.first = G_D12.pending_releases.first; + } + async->pending_releases.last = G_D12.pending_releases.last; + G_D12.pending_releases.first = 0; + G_D12.pending_releases.last = 0; + } + } + Unlock(&lock); + } + + // Release resources until we reach an uncompleted one + G_D12_Releasable *release = async->pending_releases.first; + if (release) + { + G_QueueCompletions completions = G_CompletionTargetsFromQueues(G_QueueMask_All); + while (release) + { + G_D12_Releasable *next = release->next; + if (completions.v[release->completion_queue_kind] >= release->completion_queue_target) + { + SllQueuePop(async->pending_releases.first, async->pending_releases.last); + if (release->d3d_resource) + { + ID3D12Resource_Release(release->d3d_resource); + } + SllQueuePush(async->free_releases.first, async->free_releases.last, release); + } + else + { + break; + } + release = next; + } + } + + // Push releasable nodes to free list + if (async->pending_releases.first) + { + Lock lock = LockE(&G_D12.free_releases_mutex); + { + if (G_D12.free_releases.last) + { + G_D12.free_releases.last->next = async->free_releases.first; + } + else + { + G_D12.free_releases.first = async->free_releases.first;; + } + G_D12.free_releases.last = async->free_releases.last; + async->free_releases.first = 0; + async->free_releases.last = 0; + } + Unlock(&lock); + } + } +} diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 0fd94459..7dd329d4 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -63,26 +63,27 @@ Struct(G_D12_PipelineBin) Struct(G_D12_Resource) { G_D12_Resource *next; + G_D12_Resource *prev; - struct G_D12_ResourceHeap *heap; - u64 pos_in_heap; - u64 size_in_heap; - - ID3D12Resource *d3d_resource; - u64 uid; G_ResourceFlag flags; + u64 uid; + + // D3D12 resource + D3D12_RESOURCE_DESC1 d3d_desc; + ID3D12Resource *d3d_resource; + D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; + void *mapped; // Buffer info u64 buffer_size; u64 buffer_size_actual; - D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; // Texture info b32 is_texture; G_Format texture_format; Vec3I32 texture_dims; i32 texture_mip_levels; - D3D12_BARRIER_LAYOUT texture_layout; + D3D12_BARRIER_LAYOUT cmdlist_texture_layout; // Sampler info G_SamplerDesc sampler_desc; @@ -128,6 +129,7 @@ Struct(G_D12_DescriptorHeap) Struct(G_D12_Descriptor) { G_D12_Descriptor *next; + G_D12_Descriptor *prev; struct G_D12_Arena *gpu_arena; G_QueueKind completion_queue_kind; @@ -162,23 +164,6 @@ Enum(G_D12_ResourceHeapKind) G_D12_ResourceHeapKind_COUNT }; -Struct(G_D12_ResourceHeap) -{ - G_D12_ResourceHeapKind kind; - - struct GPU_D12_Arena *gpu_arena; - - ID3D12Heap *d3d_heap; - ID3D12Resource *d3d_mapped_resource; - void *mapped; - - G_D12_ResourceList resources; - G_D12_ResourceList reset_resources; - - u64 pos; - u64 size; -}; - Struct(G_D12_Arena) { Arena *arena; @@ -186,9 +171,9 @@ Struct(G_D12_Arena) G_D12_DescriptorList descriptors; G_D12_DescriptorList reset_descriptors_by_heap[G_D12_DescriptorHeapKind_COUNT]; - G_D12_ResourceList free_resources; - - G_D12_ResourceHeap resource_heaps[G_D12_ResourceHeapKind_COUNT]; + G_D12_ResourceList resources; + G_D12_ResourceList reset_resources; + // G_D12_ResourceList free_resources; }; //////////////////////////////////////////////////////////// @@ -277,6 +262,25 @@ Struct(G_D12_RawCommandList) G_D12_Descriptor *rtv_clear_descriptor; }; +//////////////////////////////////////////////////////////// +//~ Releasable types + +Struct(G_D12_Releasable) +{ + G_D12_Releasable *next; + + G_QueueKind completion_queue_kind; + i64 completion_queue_target; + + ID3D12Resource *d3d_resource; +}; + +Struct(G_D12_ReleasableList) +{ + G_D12_Releasable *first; + G_D12_Releasable *last; +}; + //////////////////////////////////////////////////////////// //~ Command list types @@ -379,6 +383,8 @@ Struct(G_D12_CmdList) G_QueueKind queue_kind; G_D12_DescriptorList reset_descriptors; + G_D12_ReleasableList releases; + G_D12_StagingRegionNode *first_staging_region; G_D12_StagingRegionNode *last_staging_region; @@ -410,11 +416,20 @@ Struct(G_D12_Swapchain) //////////////////////////////////////////////////////////// //~ State types +Struct(G_D12_AsyncCtx) +{ + G_D12_ReleasableList pending_releases; + G_D12_ReleasableList free_releases; +}; + Struct(G_D12_Ctx) { Atomic64Padded resource_creation_gen; // Stats + Atomic64 arenas_count; + Atomic64 cumulative_nonreuse_count; + Atomic64 driver_resources_allocated; Atomic64 driver_descriptors_allocated; @@ -446,6 +461,15 @@ Struct(G_D12_Ctx) IDXGIFactory6 *factory; IDXGIAdapter3 *adapter; ID3D12Device10 *device; + + // Release-queue + Mutex pending_releases_mutex; + Mutex free_releases_mutex; + G_D12_ReleasableList pending_releases; + G_D12_ReleasableList free_releases; + + // Async + G_D12_AsyncCtx async_ctx; }; Struct(G_D12_ThreadLocalCtx) @@ -509,3 +533,8 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); //~ Collection worker void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane); + +//////////////////////////////////////////////////////////// +//~ Async + +void G_D12_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame); diff --git a/src/pp/pp_sim/pp_sim_core.c b/src/pp/pp_sim/pp_sim_core.c index 6bf337df..61988934 100644 --- a/src/pp/pp_sim/pp_sim_core.c +++ b/src/pp/pp_sim/pp_sim_core.c @@ -155,6 +155,7 @@ S_Shape S_MulXformShape(Xform xf, S_Shape shape) Vec2 S_SupportPointFromShape(S_Shape shape, Vec2 dir) { + // FIXME: Properly handle rounded polygons Vec2 result = Zi; Vec2 dir_norm = NormVec2(dir); f32 max_dot = -Inf; diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 1817074e..8b992c2a 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -237,7 +237,8 @@ V_WidgetTheme V_GetWidgetTheme(void) theme.icon_font = UI_BuiltinIconFont(); // theme.font_size = 14; - theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0); + // theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 0); + theme.font_size = TweakFloat("Font size", 14, 6, 50, .precision = 2); theme.h1 = 2.00; theme.h2 = 1.50; theme.h3 = 1.25; @@ -1986,6 +1987,8 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Build debug info UI + G_Stats gpu_stats = G_QueryStats(); + if (frame->show_console) { UI_Key dbg_box = UI_KeyF("Debug box"); @@ -2043,9 +2046,10 @@ void V_TickForever(WaveLaneCtx *lane) UI_BuildLabelF("GPU:"); UI_Pop(FontSize); } - UI_BuildLabelF(" Arenas: %F", FmtSint(GetGstat(NumGpuArenas))); - UI_BuildLabelF(" Dedicated arena memory committed: %F MiB", FmtFloat((f64)GetGstat(DedicatedGpuArenaMemoryCommitted) / 1024 / 1024)); - UI_BuildLabelF(" Shared arena memory committed: %F MiB", FmtFloat((f64)GetGstat(SharedGpuArenaMemoryCommitted) / 1024 / 1024)); + UI_BuildLabelF(" Arenas: %F", FmtUint(gpu_stats.arenas_count)); + UI_BuildLabelF(" Device memory usage: %F MiB", FmtFloat((f64)gpu_stats.device_committed / 1024 / 1024)); + UI_BuildLabelF(" Host memory usage: %F MiB", FmtFloat((f64)gpu_stats.host_committed / 1024 / 1024)); + UI_BuildLabelF(" Non-reuse tally: %F", FmtUint(gpu_stats.cumulative_nonreuse_count)); } UI_BuildSpacer(UI_PIX(padding, 1), Axis_Y); } diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index e23416d7..7970cc27 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1720,8 +1720,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) ////////////////////////////// //- Dispatch shaders - G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite); - //- Clear pass { G_ClearRenderTarget(frame->cl, draw_target, VEC4(0, 0, 0, 0)); @@ -1729,8 +1727,6 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) //- Rect pass - G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_RenderTargetWrite); - if (rects_count > 0) { // Render rects