From 4ddc2f16666fcd3bed44d5880939b612686aad5b Mon Sep 17 00:00:00 2001 From: jacob Date: Sat, 13 Dec 2025 23:08:09 -0600 Subject: [PATCH] gpu staging ring working --- src/base/base_win32/base_win32.c | 10 +- src/gpu/gpu_core.h | 7 +- src/gpu/gpu_dx12/gpu_dx12_core.c | 513 +++++++++++++++++-------------- src/gpu/gpu_dx12/gpu_dx12_core.h | 26 +- 4 files changed, 312 insertions(+), 244 deletions(-) diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index e5c7392d..576769d3 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -166,7 +166,15 @@ CpuTopologyInfo GetCpuTopologyInfo(void) void SleepSeconds(f64 seconds) { - Sleep(seconds / 1000.0); + f64 ms = seconds * 1000.0; + if (ms > 4000000000) + { + Sleep(INFINITE); + } + else + { + Sleep((u32)ms); + } } //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 35b723f4..b3e0e63c 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -517,6 +517,7 @@ void G_Bootstrap(void); G_ArenaHandle G_AcquireArena(void); void G_ReleaseArena(G_ArenaHandle arena); +void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle); //////////////////////////////////////////////////////////// //~ @hookdecl Resource @@ -672,10 +673,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc); G_CommandListHandle G_PrepareCommandList(G_QueueKind queue); i64 G_CommitCommandList(G_CommandListHandle cl); -//- Arena - -void G_ResetArena(G_CommandListHandle cl, G_ArenaHandle arena); - //- Cpu -> Gpu copy void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); @@ -684,7 +681,7 @@ void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 ds //- Gpu <-> Gpu copy void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range); -void G_CopyBufferToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Vec3I32 src_dims, Rng3I32 src_copy_range); +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset); void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 4f79c43d..9fc27f01 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -771,6 +771,15 @@ G_ArenaHandle G_AcquireArena(void) void G_ReleaseArena(G_ArenaHandle arena) { /* TODO */ + + /* TODO: Unmap heaps */ +} + +void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); + G_D12_ResetArena(cl, gpu_arena); } //////////////////////////////////////////////////////////// @@ -891,7 +900,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_ResourceDesc desc) /* Create d3d heap */ { D3D12_HEAP_DESC d3d_desc = ZI; - d3d_desc.SizeInBytes = Mebi(256); + d3d_desc.SizeInBytes = Mebi(512); if (heap_kind == G_D12_ResourceHeapKind_Cpu) { d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; @@ -1468,6 +1477,8 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v) G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) { + size = AlignU64(size, MaxU64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, 512)); + G_D12_SharedState *g = &G_D12_shared_state; G_QueueKind queue_kind = cl->queue_kind; G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); @@ -1475,17 +1486,18 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) Lock lock = LockE(&queue->staging_mutex); { - G_D12_StagingHeap *heap = queue->staging_heap; + G_D12_StagingRing *old_ring = 0; + G_D12_StagingRing *ring = queue->staging_ring; i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); /* Find first completed region with matching size. - * For each region in heap: + * For each region in ring: * - If region size > size, split off a smaller region & use it * * - If region size < size, try to merge with next completed region * * - If no available completed region with eligible size, queue the - * current heap for deletion & create a new heap + * current ring for deletion & create a new ring * with larger size */ @@ -1497,15 +1509,15 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) /* Find region with large enough size */ G_D12_StagingRegionNode *match = 0; - if (heap && heap->size >= size) + if (ring && ring->size >= size) { - G_D12_StagingRegionNode *r = heap->head_region_node; + G_D12_StagingRegionNode *r = ring->head_region_node; for (;;) { + G_D12_StagingRegionNode *next = r->next; b32 is_completed = completed >= Atomic64Fetch(&r->completion_target); if (is_completed) { - G_D12_StagingRegionNode *next = r->next; u64 region_size = 0; if (next->pos > r->pos) { @@ -1513,24 +1525,40 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) } else { - region_size = heap->size - r->pos; + region_size = ring->size - r->pos; } if (region_size < size) { - G_D12_StagingRegionNode *prev = r->prev; - b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target); - if (prev_is_completed && prev->pos < r->pos) + b32 next_is_completed = completed >= Atomic64Fetch(&next->completion_target); + if (next_is_completed) { - /* Merge with previous region & retry */ - prev->next = next; - SllStackPush(heap->first_free_region_node, r); - r = prev; + if (next->pos > r->pos) + { + /* Merge with next region & retry */ + if (next == ring->head_region_node) + { + ring->head_region_node = r; + } + r->next = next->next; + r->next->prev = r; + SllStackPush(ring->first_free_region_node, next); + } + else + { + /* Wrap to beginning */ + r = next; + if (r == ring->head_region_node) + { + /* No large-enough completed region found */ + break; + } + } } else { - /* Continue to next region */ - r = next; + /* No large-enough completed region found */ + break; } } else @@ -1542,87 +1570,58 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) } else { - /* No large-enough completed region found */ - break; + /* Continue to next region */ + r = next; + if (r == ring->head_region_node) + { + /* No large-enough completed region found */ + break; + } } } } - /* Create new heap if no match found */ + /* Create new ring if no match found */ if (!match) { - /* Queue old heap for deletion */ - u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64)); - if (heap) + /* Queue old ring for deletion */ + old_ring = ring; + ring = 0; + u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64)); + // u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(128)); + if (old_ring) { - /* FIXME: Queue for deletion here */ - new_heap_size = MaxU64(new_heap_size, heap->size * 2); - heap = 0; + new_ring_size = MaxU64(new_ring_size, old_ring->size * 2); } - /* Create new heap */ + /* Create new ring */ { Arena *arena = AcquireArena(Gibi(1)); - heap = PushStruct(arena, G_D12_StagingHeap); - heap->arena = arena; - heap->size = new_heap_size; + ring = PushStruct(arena, G_D12_StagingRing); + ring->arena = arena; + ring->size = new_ring_size; - /* Create backing upload heap resource */ - ID3D12Resource *d3d_resource = 0; - { - D3D12_RESOURCE_DESC d3d_desc = ZI; - d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; - d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; - d3d_desc.Format = DXGI_FORMAT_UNKNOWN; - d3d_desc.Alignment = 0; - d3d_desc.Width = new_heap_size; - d3d_desc.Height = 1; - d3d_desc.DepthOrArraySize = 1; - d3d_desc.MipLevels = 1; - d3d_desc.SampleDesc.Count = 1; - d3d_desc.SampleDesc.Quality = 0; + G_ArenaHandle gpu_arena_handle = G_AcquireArena(); + ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); - D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; - HRESULT hr = ID3D12Device_CreateCommittedResource( - g->device, - &heap_props, - D3D12_HEAP_FLAG_CREATE_NOT_ZEROED, - &d3d_desc, - D3D12_RESOURCE_STATE_COMMON, - 0, - &IID_ID3D12Resource, - (void **)&d3d_resource - ); - if (!SUCCEEDED(hr)) - { - /* TODO: Don't panic */ - Panic(Lit("Failed to create upload heap")); - } - } - heap->resource.d3d_resource = d3d_resource; - heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; - heap->resource.buffer_size = new_heap_size; - heap->resource.buffer_size_actual = new_heap_size; - heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); - - /* Map */ - { - D3D12_RANGE read_range = ZI; - HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped); - if (!SUCCEEDED(hr)) - { - /* TODO: Don't panic */ - Panic(Lit("Failed to map upload heap")); - } - } + G_ResourceHandle resource_handle = G_PushBuffer( + gpu_arena_handle, + u8, + new_ring_size, + .flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached + ); + ring->resource = G_D12_ResourceFromHandle(resource_handle); + ring->base = G_StructFromResource(resource_handle, u8); } /* Create initial region */ - match = PushStruct(heap->arena, G_D12_StagingRegionNode); - match->heap = heap; + match = PushStruct(ring->arena, G_D12_StagingRegionNode); + match->ring = ring; match->next = match; match->prev = match; - heap->head_region_node = match; + + /* FIXME: Remove this */ + queue->staging_ring = ring; } /* Split extra region space */ @@ -1635,35 +1634,46 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) } else { - region_size = heap->size - match->pos; + region_size = ring->size - match->pos; } if (region_size > size) { - G_D12_StagingRegionNode *new_next = heap->first_free_region_node; + G_D12_StagingRegionNode *new_next = ring->first_free_region_node; if (new_next) { - SllStackPop(heap->first_free_region_node); + SllStackPop(ring->first_free_region_node); + ZeroStruct(new_next); } else { - new_next = PushStruct(heap->arena, G_D12_StagingRegionNode); + new_next = PushStruct(ring->arena, G_D12_StagingRegionNode); } new_next->next = next; new_next->prev = match; next->prev = new_next; match->next = new_next; - new_next->heap = heap; + new_next->ring = ring; new_next->pos = match->pos + size; } } + ring->head_region_node = match->next; + Atomic64Set(&match->completion_target, I64Max); result = match; + + if (old_ring) + { + /* FIXME: Queue old ring for deletion with command list */ + } } Unlock(&lock); + /* Add to command list */ + SllQueuePushN(cl->first_staging_region, cl->last_staging_region, result, next_in_command_list); + return result; } @@ -1960,14 +1970,16 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) case G_D12_CmdKind_CopyBytes: { - u64 src_offset = cmd->copy_bytes.src_copy_range.min; - u64 copy_size = cmd->copy_bytes.src_copy_range.max - cmd->copy_bytes.src_copy_range.min; - ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl, - cmd->copy_bytes.dst->d3d_resource, - cmd->copy_bytes.dst_offset, - cmd->copy_bytes.src->d3d_resource, - src_offset, - copy_size); + u64 src_offset = cmd->copy_bytes.src_range.min; + u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; + ID3D12GraphicsCommandList_CopyBufferRegion( + d3d_cl, + cmd->copy_bytes.dst->d3d_resource, + cmd->copy_bytes.dst_offset, + cmd->copy_bytes.src->d3d_resource, + src_offset, + copy_size + ); cmd_idx += 1; } break; @@ -1979,17 +1991,22 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Resource *src = cmd->copy_texels.src; D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; - Vec3I32 dst_offset = cmd->copy_texels.dst_offset; - Rng3I32 src_copy_range = cmd->copy_texels.src_copy_range; + Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; + Rng3I32 src_range = cmd->copy_texels.src_texture_range; D3D12_BOX src_box = ZI; + D3D12_BOX *src_box_ptr = 0; { - src_box.left = src_copy_range.p0.x; - src_box.top = src_copy_range.p0.y; - src_box.front = src_copy_range.p0.z; - src_box.right = src_copy_range.p1.x; - src_box.bottom = src_copy_range.p1.y; - src_box.back = src_copy_range.p1.z; + src_box.left = src_range.p0.x; + src_box.top = src_range.p0.y; + src_box.front = src_range.p0.z; + src_box.right = src_range.p1.x; + src_box.bottom = src_range.p1.y; + src_box.back = src_range.p1.z; + if (src->is_texture) + { + src_box_ptr = &src_box; + } } if (dst->flags & G_ResourceFlag_AllowDepthStencil) @@ -2001,7 +2018,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } else { - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, &src_box); + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); } cmd_idx += 1; @@ -2301,6 +2318,17 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) /* End dx12 command list */ i64 completion_target = G_D12_CommitRawCommandList(rcl); + /* Attach completion info to staging regions */ + for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;) + { + G_D12_StagingRegionNode *next = n->next_in_command_list; + { + Atomic64Set(&n->completion_target, completion_target); + n->next_in_command_list = 0; + } + n = next; + } + /* Attach completion info to descriptors */ for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) { @@ -2346,173 +2374,196 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) return completion_target; } -//- Arena - -void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle) -{ - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle); - G_D12_ResetArena(cl, gpu_arena); -} - //- Cpu -> Gpu copy void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - u64 copy_size = src_copy_range.max - src_copy_range.min; - G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size); - CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, copy_size); - G_CopyBufferToBuffer(cl_handle, - dst_handle, - dst_offset, - G_D12_MakeHandle(G_ResourceHandle, ®ion->heap->resource), - RNGU64(region->pos, copy_size)); + if (src_copy_range.max > src_copy_range.min) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + u64 copy_size = src_copy_range.max - src_copy_range.min; + G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size); + CopyBytes((u8 *)region->ring->base + region->pos, (u8 *)src + src_copy_range.min, copy_size); + G_CopyBufferToBuffer( + cl_handle, + dst_handle, + dst_offset, + G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), + RNGU64(region->pos, region->pos + copy_size) + ); + } } void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) { - G_D12_SharedState *g = &G_D12_shared_state; - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(dst->is_texture); - Vec3I32 staged_dims = ZI; { staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x; staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y; staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z; } - - /* Grab footprint info */ - u64 footprint_rows_count = 0; - u64 footprint_row_size = 0; - u64 footprint_size = 0; - D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; + if (staged_dims.x > 0 && staged_dims.y > 0 && staged_dims.z > 0) { - D3D12_RESOURCE_DESC src_desc = ZI; - { - ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); - src_desc.Width = staged_dims.x; - src_desc.Height = staged_dims.y; - src_desc.DepthOrArraySize = staged_dims.z; - } - ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size); - } + G_D12_SharedState *g = &G_D12_shared_state; + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(dst->is_texture); - /* Fill staging buffer */ - G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size); - { - D3D12_RANGE read_range = ZI; - u8 *dst_base = (u8 *)region->heap->mapped + region->pos + footprint.Offset; - u8 *src_base = src; - u32 z_size = footprint_row_size * footprint_rows_count; - for (i32 z = 0; z < src_dims.z; ++z) + /* Grab footprint info */ + u64 footprint_rows_count = 0; + u64 footprint_row_size = 0; + u64 footprint_size = 0; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = ZI; { - u32 z_offset = z * z_size; - for (i32 y = 0; y < footprint_rows_count; ++y) + D3D12_RESOURCE_DESC src_desc = ZI; { - u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset; - u8 *src_row = src_base + y * footprint_row_size + z_offset; - CopyBytes(dst_row, src_row, footprint_row_size); + ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); + src_desc.Width = staged_dims.x; + src_desc.Height = staged_dims.y; + src_desc.DepthOrArraySize = staged_dims.z; + } + ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size); + } + + G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size); + footprint.Offset = region->pos; + + /* Fill staging buffer */ + { + D3D12_RANGE read_range = ZI; + u8 *src_base = src; + u8 *dst_base = (u8 *)region->ring->base + footprint.Offset; + u32 z_size = footprint_row_size * footprint_rows_count; + for (i32 z = 0; z < src_dims.z; ++z) + { + u32 z_offset = z * z_size; + for (i32 y = 0; y < footprint_rows_count; ++y) + { + u8 *src_row = src_base + y * footprint_row_size + z_offset; + u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset; + CopyBytes(dst_row, src_row, footprint_row_size); + } } } - } - G_CopyBufferToTexture(cl_handle, - dst_handle, dst_offset, - G_D12_MakeHandle(G_ResourceHandle, ®ion->heap->resource), staged_dims, - RNG3I32(VEC3I32(0, 0, 0), staged_dims)); + Rng3I32 dst_copy_range = ZI; + dst_copy_range.p0 = dst_offset; + dst_copy_range.p1.x = dst_copy_range.p0.x + staged_dims.x; + dst_copy_range.p1.y = dst_copy_range.p0.y + staged_dims.y; + dst_copy_range.p1.z = dst_copy_range.p0.z + staged_dims.z; + G_CopyBufferToTexture( + cl_handle, + dst_handle, dst_copy_range, + G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), footprint.Offset + ); + } } //- Gpu <-> Gpu copy void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range) { - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyBytes; - cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle); - cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle); - cmd->copy_bytes.dst_offset = dst_offset; - cmd->copy_bytes.src_copy_range = src_copy_range; + if (src_copy_range.max > src_copy_range.min) + { + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyBytes; + cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle); + cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle); + cmd->copy_bytes.dst_offset = dst_offset; + cmd->copy_bytes.src_range = src_copy_range; + } } -void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Vec3I32 src_dims, Rng3I32 src_copy_range) +void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset) { - G_D12_SharedState *g = &G_D12_shared_state; - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(!src->is_texture); - Assert(dst->is_texture); - - /* Grab footprint info */ - D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = ZI; + Vec3I32 src_dims = ZI; { - D3D12_RESOURCE_DESC src_desc = ZI; + src_dims.x = dst_copy_range.p1.x - dst_copy_range.p0.x; + src_dims.y = dst_copy_range.p1.y - dst_copy_range.p0.y; + src_dims.z = dst_copy_range.p1.z - dst_copy_range.p0.z; + } + if (src_dims.x > 0 && src_dims.y > 0 && src_dims.z > 0) + { + G_D12_SharedState *g = &G_D12_shared_state; + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(!src->is_texture); + Assert(dst->is_texture); + + /* Grab footprint info */ + D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = ZI; { - ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); - src_desc.Width = src_dims.x; - src_desc.Height = src_dims.y; - src_desc.DepthOrArraySize = src_dims.z; + D3D12_RESOURCE_DESC src_desc = ZI; + { + ID3D12Resource_GetDesc(dst->d3d_resource, &src_desc); + src_desc.Width = src_dims.x; + src_desc.Height = src_dims.y; + src_desc.DepthOrArraySize = src_dims.z; + } + ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0); + src_footprint.Offset = src_offset; } - ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0); - } - D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; - D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; - { - src_loc.pResource = src->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - src_loc.PlacedFootprint = src_footprint; - } - { - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; - } + D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; + D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + { + src_loc.pResource = src->d3d_resource; + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + src_loc.PlacedFootprint = src_footprint; + } + { + dst_loc.pResource = dst->d3d_resource; + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = 0; + } - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyTexels; - cmd->copy_texels.dst = dst; - cmd->copy_texels.src = src; - cmd->copy_texels.dst_loc = dst_loc; - cmd->copy_texels.src_loc = src_loc; - cmd->copy_texels.dst_offset = dst_offset; - cmd->copy_texels.src_copy_range = src_copy_range; + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyTexels; + cmd->copy_texels.dst = dst; + cmd->copy_texels.src = src; + cmd->copy_texels.dst_loc = dst_loc; + cmd->copy_texels.src_loc = src_loc; + cmd->copy_texels.dst_texture_offset = dst_copy_range.p0; + } } void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) { - G_D12_SharedState *g = &G_D12_shared_state; - G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); - G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); - Assert(src->is_texture); - Assert(dst->is_texture); - - D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; - D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + if (src_copy_range.p1.x > src_copy_range.p0.x && + src_copy_range.p1.y > src_copy_range.p0.y && + src_copy_range.p1.z > src_copy_range.p0.z) { - src_loc.pResource = dst->d3d_resource; - src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - src_loc.SubresourceIndex = 0; - } - { - dst_loc.pResource = dst->d3d_resource; - dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - dst_loc.SubresourceIndex = 0; - } + G_D12_SharedState *g = &G_D12_shared_state; + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle); + G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle); + Assert(src->is_texture); + Assert(dst->is_texture); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_CopyTexels; - cmd->copy_texels.dst = dst; - cmd->copy_texels.src = src; - cmd->copy_texels.dst_loc = dst_loc; - cmd->copy_texels.src_loc = src_loc; - cmd->copy_texels.dst_offset = dst_offset; - cmd->copy_texels.src_copy_range = src_copy_range; + D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; + D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + { + src_loc.pResource = dst->d3d_resource; + src_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + src_loc.SubresourceIndex = 0; + } + { + dst_loc.pResource = dst->d3d_resource; + dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst_loc.SubresourceIndex = 0; + } + + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_CopyTexels; + cmd->copy_texels.dst = dst; + cmd->copy_texels.src = src; + cmd->copy_texels.dst_loc = dst_loc; + cmd->copy_texels.src_loc = src_loc; + cmd->copy_texels.dst_texture_offset = dst_offset; + cmd->copy_texels.src_texture_range = src_copy_range; + } } void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) @@ -2949,6 +3000,12 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) { for (;;) { + /* FIXME: Remove this */ + SleepSeconds(0.100); + + + + /* Copy print-buffers to readback */ for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind) { diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 78770be8..a25f1bbd 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -191,14 +191,15 @@ Struct(G_D12_Arena) //////////////////////////////////////////////////////////// //~ Staging types -Struct(G_D12_StagingHeap) +Struct(G_D12_StagingRing) { Arena *arena; - - G_D12_Resource resource; - void *mapped; + G_D12_Arena *gpu_arena; u64 size; + G_D12_Resource *resource; + u8 *base; + struct G_D12_StagingRegionNode *head_region_node; struct G_D12_StagingRegionNode *first_free_region_node; @@ -206,12 +207,15 @@ Struct(G_D12_StagingHeap) Struct(G_D12_StagingRegionNode) { - G_D12_StagingHeap *heap; + G_D12_StagingRing *ring; - /* Heap links (requires heap lock to read) */ + /* Ring links (requires ring lock to read) */ G_D12_StagingRegionNode *prev; G_D12_StagingRegionNode *next; + /* Command list links */ + G_D12_StagingRegionNode *next_in_command_list; + /* Region info */ Atomic64 completion_target; u64 pos; @@ -247,7 +251,7 @@ Struct(G_D12_Queue) /* Staging heap */ Mutex staging_mutex; - G_D12_StagingHeap *staging_heap; + G_D12_StagingRing *staging_ring; Fence sync_fence; }; @@ -313,7 +317,7 @@ Struct(G_D12_Cmd) G_D12_Resource *dst; G_D12_Resource *src; u64 dst_offset; - RngU64 src_copy_range; + RngU64 src_range; } copy_bytes; struct @@ -322,8 +326,8 @@ Struct(G_D12_Cmd) G_D12_Resource *src; D3D12_TEXTURE_COPY_LOCATION dst_loc; D3D12_TEXTURE_COPY_LOCATION src_loc; - Vec3I32 dst_offset; - Rng3I32 src_copy_range; + Vec3I32 dst_texture_offset; + Rng3I32 src_texture_range; } copy_texels; struct @@ -366,6 +370,8 @@ Struct(G_D12_CmdList) G_QueueKind queue_kind; G_D12_DescriptorList reset_descriptors; + G_D12_StagingRegionNode *first_staging_region; + G_D12_StagingRegionNode *last_staging_region; G_D12_CmdChunk *first_cmd_chunk; G_D12_CmdChunk *last_cmd_chunk;