gpu staging ring working

This commit is contained in:
jacob 2025-12-13 23:08:09 -06:00
parent 6c3c7231ff
commit 4ddc2f1666
4 changed files with 312 additions and 244 deletions

View File

@ -166,7 +166,15 @@ CpuTopologyInfo GetCpuTopologyInfo(void)
void SleepSeconds(f64 seconds) void SleepSeconds(f64 seconds)
{ {
Sleep(seconds / 1000.0); f64 ms = seconds * 1000.0;
if (ms > 4000000000)
{
Sleep(INFINITE);
}
else
{
Sleep((u32)ms);
}
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -517,6 +517,7 @@ void G_Bootstrap(void);
G_ArenaHandle G_AcquireArena(void); G_ArenaHandle G_AcquireArena(void);
void G_ReleaseArena(G_ArenaHandle arena); void G_ReleaseArena(G_ArenaHandle arena);
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Resource //~ @hookdecl Resource
@ -672,10 +673,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue); G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
i64 G_CommitCommandList(G_CommandListHandle cl); i64 G_CommitCommandList(G_CommandListHandle cl);
//- Arena
void G_ResetArena(G_CommandListHandle cl, G_ArenaHandle arena);
//- Cpu -> Gpu copy //- Cpu -> Gpu copy
void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range); void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
@ -684,7 +681,7 @@ void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 ds
//- Gpu <-> Gpu copy //- Gpu <-> Gpu copy
void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range); void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range);
void G_CopyBufferToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Vec3I32 src_dims, Rng3I32 src_copy_range); void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset);
void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range); void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);

View File

@ -771,6 +771,15 @@ G_ArenaHandle G_AcquireArena(void)
void G_ReleaseArena(G_ArenaHandle arena) void G_ReleaseArena(G_ArenaHandle arena)
{ {
/* TODO */ /* TODO */
/* TODO: Unmap heaps */
}
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
G_D12_ResetArena(cl, gpu_arena);
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -891,7 +900,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_ResourceDesc desc)
/* Create d3d heap */ /* Create d3d heap */
{ {
D3D12_HEAP_DESC d3d_desc = ZI; D3D12_HEAP_DESC d3d_desc = ZI;
d3d_desc.SizeInBytes = Mebi(256); d3d_desc.SizeInBytes = Mebi(512);
if (heap_kind == G_D12_ResourceHeapKind_Cpu) if (heap_kind == G_D12_ResourceHeapKind_Cpu)
{ {
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM; d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
@ -1468,6 +1477,8 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v)
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size) G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
{ {
size = AlignU64(size, MaxU64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, 512));
G_D12_SharedState *g = &G_D12_shared_state; G_D12_SharedState *g = &G_D12_shared_state;
G_QueueKind queue_kind = cl->queue_kind; G_QueueKind queue_kind = cl->queue_kind;
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
@ -1475,17 +1486,18 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
Lock lock = LockE(&queue->staging_mutex); Lock lock = LockE(&queue->staging_mutex);
{ {
G_D12_StagingHeap *heap = queue->staging_heap; G_D12_StagingRing *old_ring = 0;
G_D12_StagingRing *ring = queue->staging_ring;
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
/* Find first completed region with matching size. /* Find first completed region with matching size.
* For each region in heap: * For each region in ring:
* - If region size > size, split off a smaller region & use it * - If region size > size, split off a smaller region & use it
* *
* - If region size < size, try to merge with next completed region * - If region size < size, try to merge with next completed region
* *
* - If no available completed region with eligible size, queue the * - If no available completed region with eligible size, queue the
* current heap for deletion & create a new heap * current ring for deletion & create a new ring
* with larger size * with larger size
*/ */
@ -1497,15 +1509,15 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
/* Find region with large enough size */ /* Find region with large enough size */
G_D12_StagingRegionNode *match = 0; G_D12_StagingRegionNode *match = 0;
if (heap && heap->size >= size) if (ring && ring->size >= size)
{ {
G_D12_StagingRegionNode *r = heap->head_region_node; G_D12_StagingRegionNode *r = ring->head_region_node;
for (;;) for (;;)
{ {
G_D12_StagingRegionNode *next = r->next;
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target); b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
if (is_completed) if (is_completed)
{ {
G_D12_StagingRegionNode *next = r->next;
u64 region_size = 0; u64 region_size = 0;
if (next->pos > r->pos) if (next->pos > r->pos)
{ {
@ -1513,24 +1525,40 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
} }
else else
{ {
region_size = heap->size - r->pos; region_size = ring->size - r->pos;
} }
if (region_size < size) if (region_size < size)
{ {
G_D12_StagingRegionNode *prev = r->prev; b32 next_is_completed = completed >= Atomic64Fetch(&next->completion_target);
b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target); if (next_is_completed)
if (prev_is_completed && prev->pos < r->pos)
{ {
/* Merge with previous region & retry */ if (next->pos > r->pos)
prev->next = next; {
SllStackPush(heap->first_free_region_node, r); /* Merge with next region & retry */
r = prev; if (next == ring->head_region_node)
{
ring->head_region_node = r;
}
r->next = next->next;
r->next->prev = r;
SllStackPush(ring->first_free_region_node, next);
} }
else else
{ {
/* Continue to next region */ /* Wrap to beginning */
r = next; r = next;
if (r == ring->head_region_node)
{
/* No large-enough completed region found */
break;
}
}
}
else
{
/* No large-enough completed region found */
break;
} }
} }
else else
@ -1541,88 +1569,59 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
} }
} }
else else
{
/* Continue to next region */
r = next;
if (r == ring->head_region_node)
{ {
/* No large-enough completed region found */ /* No large-enough completed region found */
break; break;
} }
} }
} }
}
/* Create new heap if no match found */ /* Create new ring if no match found */
if (!match) if (!match)
{ {
/* Queue old heap for deletion */ /* Queue old ring for deletion */
u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64)); old_ring = ring;
if (heap) ring = 0;
u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
// u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(128));
if (old_ring)
{ {
/* FIXME: Queue for deletion here */ new_ring_size = MaxU64(new_ring_size, old_ring->size * 2);
new_heap_size = MaxU64(new_heap_size, heap->size * 2);
heap = 0;
} }
/* Create new heap */ /* Create new ring */
{ {
Arena *arena = AcquireArena(Gibi(1)); Arena *arena = AcquireArena(Gibi(1));
heap = PushStruct(arena, G_D12_StagingHeap); ring = PushStruct(arena, G_D12_StagingRing);
heap->arena = arena; ring->arena = arena;
heap->size = new_heap_size; ring->size = new_ring_size;
/* Create backing upload heap resource */ G_ArenaHandle gpu_arena_handle = G_AcquireArena();
ID3D12Resource *d3d_resource = 0; ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle);
{
D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Alignment = 0;
d3d_desc.Width = new_heap_size;
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; G_ResourceHandle resource_handle = G_PushBuffer(
HRESULT hr = ID3D12Device_CreateCommittedResource( gpu_arena_handle,
g->device, u8,
&heap_props, new_ring_size,
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED, .flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached
&d3d_desc,
D3D12_RESOURCE_STATE_COMMON,
0,
&IID_ID3D12Resource,
(void **)&d3d_resource
); );
if (!SUCCEEDED(hr)) ring->resource = G_D12_ResourceFromHandle(resource_handle);
{ ring->base = G_StructFromResource(resource_handle, u8);
/* TODO: Don't panic */
Panic(Lit("Failed to create upload heap"));
}
}
heap->resource.d3d_resource = d3d_resource;
heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
heap->resource.buffer_size = new_heap_size;
heap->resource.buffer_size_actual = new_heap_size;
heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
/* Map */
{
D3D12_RANGE read_range = ZI;
HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
if (!SUCCEEDED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to map upload heap"));
}
}
} }
/* Create initial region */ /* Create initial region */
match = PushStruct(heap->arena, G_D12_StagingRegionNode); match = PushStruct(ring->arena, G_D12_StagingRegionNode);
match->heap = heap; match->ring = ring;
match->next = match; match->next = match;
match->prev = match; match->prev = match;
heap->head_region_node = match;
/* FIXME: Remove this */
queue->staging_ring = ring;
} }
/* Split extra region space */ /* Split extra region space */
@ -1635,35 +1634,46 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
} }
else else
{ {
region_size = heap->size - match->pos; region_size = ring->size - match->pos;
} }
if (region_size > size) if (region_size > size)
{ {
G_D12_StagingRegionNode *new_next = heap->first_free_region_node; G_D12_StagingRegionNode *new_next = ring->first_free_region_node;
if (new_next) if (new_next)
{ {
SllStackPop(heap->first_free_region_node); SllStackPop(ring->first_free_region_node);
ZeroStruct(new_next);
} }
else else
{ {
new_next = PushStruct(heap->arena, G_D12_StagingRegionNode); new_next = PushStruct(ring->arena, G_D12_StagingRegionNode);
} }
new_next->next = next; new_next->next = next;
new_next->prev = match; new_next->prev = match;
next->prev = new_next; next->prev = new_next;
match->next = new_next; match->next = new_next;
new_next->heap = heap; new_next->ring = ring;
new_next->pos = match->pos + size; new_next->pos = match->pos + size;
} }
} }
ring->head_region_node = match->next;
Atomic64Set(&match->completion_target, I64Max); Atomic64Set(&match->completion_target, I64Max);
result = match; result = match;
if (old_ring)
{
/* FIXME: Queue old ring for deletion with command list */
}
} }
Unlock(&lock); Unlock(&lock);
/* Add to command list */
SllQueuePushN(cl->first_staging_region, cl->last_staging_region, result, next_in_command_list);
return result; return result;
} }
@ -1960,14 +1970,16 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
case G_D12_CmdKind_CopyBytes: case G_D12_CmdKind_CopyBytes:
{ {
u64 src_offset = cmd->copy_bytes.src_copy_range.min; u64 src_offset = cmd->copy_bytes.src_range.min;
u64 copy_size = cmd->copy_bytes.src_copy_range.max - cmd->copy_bytes.src_copy_range.min; u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min;
ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl, ID3D12GraphicsCommandList_CopyBufferRegion(
d3d_cl,
cmd->copy_bytes.dst->d3d_resource, cmd->copy_bytes.dst->d3d_resource,
cmd->copy_bytes.dst_offset, cmd->copy_bytes.dst_offset,
cmd->copy_bytes.src->d3d_resource, cmd->copy_bytes.src->d3d_resource,
src_offset, src_offset,
copy_size); copy_size
);
cmd_idx += 1; cmd_idx += 1;
} break; } break;
@ -1979,17 +1991,22 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
G_D12_Resource *src = cmd->copy_texels.src; G_D12_Resource *src = cmd->copy_texels.src;
D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc;
D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc;
Vec3I32 dst_offset = cmd->copy_texels.dst_offset; Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset;
Rng3I32 src_copy_range = cmd->copy_texels.src_copy_range; Rng3I32 src_range = cmd->copy_texels.src_texture_range;
D3D12_BOX src_box = ZI; D3D12_BOX src_box = ZI;
D3D12_BOX *src_box_ptr = 0;
{ {
src_box.left = src_copy_range.p0.x; src_box.left = src_range.p0.x;
src_box.top = src_copy_range.p0.y; src_box.top = src_range.p0.y;
src_box.front = src_copy_range.p0.z; src_box.front = src_range.p0.z;
src_box.right = src_copy_range.p1.x; src_box.right = src_range.p1.x;
src_box.bottom = src_copy_range.p1.y; src_box.bottom = src_range.p1.y;
src_box.back = src_copy_range.p1.z; src_box.back = src_range.p1.z;
if (src->is_texture)
{
src_box_ptr = &src_box;
}
} }
if (dst->flags & G_ResourceFlag_AllowDepthStencil) if (dst->flags & G_ResourceFlag_AllowDepthStencil)
@ -2001,7 +2018,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
} }
else else
{ {
ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, &src_box); ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr);
} }
cmd_idx += 1; cmd_idx += 1;
@ -2301,6 +2318,17 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
/* End dx12 command list */ /* End dx12 command list */
i64 completion_target = G_D12_CommitRawCommandList(rcl); i64 completion_target = G_D12_CommitRawCommandList(rcl);
/* Attach completion info to staging regions */
for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;)
{
G_D12_StagingRegionNode *next = n->next_in_command_list;
{
Atomic64Set(&n->completion_target, completion_target);
n->next_in_command_list = 0;
}
n = next;
}
/* Attach completion info to descriptors */ /* Attach completion info to descriptors */
for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;) for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;)
{ {
@ -2346,43 +2374,40 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
return completion_target; return completion_target;
} }
//- Arena
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
G_D12_ResetArena(cl, gpu_arena);
}
//- Cpu -> Gpu copy //- Cpu -> Gpu copy
void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range) void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
{
if (src_copy_range.max > src_copy_range.min)
{ {
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
u64 copy_size = src_copy_range.max - src_copy_range.min; u64 copy_size = src_copy_range.max - src_copy_range.min;
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size); G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size);
CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, copy_size); CopyBytes((u8 *)region->ring->base + region->pos, (u8 *)src + src_copy_range.min, copy_size);
G_CopyBufferToBuffer(cl_handle, G_CopyBufferToBuffer(
cl_handle,
dst_handle, dst_handle,
dst_offset, dst_offset,
G_D12_MakeHandle(G_ResourceHandle, &region->heap->resource), G_D12_MakeHandle(G_ResourceHandle, region->ring->resource),
RNGU64(region->pos, copy_size)); RNGU64(region->pos, region->pos + copy_size)
);
}
} }
void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range) void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
{ {
G_D12_SharedState *g = &G_D12_shared_state;
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle);
Assert(dst->is_texture);
Vec3I32 staged_dims = ZI; Vec3I32 staged_dims = ZI;
{ {
staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x; staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x;
staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y; staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y;
staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z; staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z;
} }
if (staged_dims.x > 0 && staged_dims.y > 0 && staged_dims.z > 0)
{
G_D12_SharedState *g = &G_D12_shared_state;
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle);
Assert(dst->is_texture);
/* Grab footprint info */ /* Grab footprint info */
u64 footprint_rows_count = 0; u64 footprint_rows_count = 0;
@ -2400,34 +2425,45 @@ void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_hand
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size); ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size);
} }
/* Fill staging buffer */
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size); G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size);
footprint.Offset = region->pos;
/* Fill staging buffer */
{ {
D3D12_RANGE read_range = ZI; D3D12_RANGE read_range = ZI;
u8 *dst_base = (u8 *)region->heap->mapped + region->pos + footprint.Offset;
u8 *src_base = src; u8 *src_base = src;
u8 *dst_base = (u8 *)region->ring->base + footprint.Offset;
u32 z_size = footprint_row_size * footprint_rows_count; u32 z_size = footprint_row_size * footprint_rows_count;
for (i32 z = 0; z < src_dims.z; ++z) for (i32 z = 0; z < src_dims.z; ++z)
{ {
u32 z_offset = z * z_size; u32 z_offset = z * z_size;
for (i32 y = 0; y < footprint_rows_count; ++y) for (i32 y = 0; y < footprint_rows_count; ++y)
{ {
u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset;
u8 *src_row = src_base + y * footprint_row_size + z_offset; u8 *src_row = src_base + y * footprint_row_size + z_offset;
u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset;
CopyBytes(dst_row, src_row, footprint_row_size); CopyBytes(dst_row, src_row, footprint_row_size);
} }
} }
} }
G_CopyBufferToTexture(cl_handle, Rng3I32 dst_copy_range = ZI;
dst_handle, dst_offset, dst_copy_range.p0 = dst_offset;
G_D12_MakeHandle(G_ResourceHandle, &region->heap->resource), staged_dims, dst_copy_range.p1.x = dst_copy_range.p0.x + staged_dims.x;
RNG3I32(VEC3I32(0, 0, 0), staged_dims)); dst_copy_range.p1.y = dst_copy_range.p0.y + staged_dims.y;
dst_copy_range.p1.z = dst_copy_range.p0.z + staged_dims.z;
G_CopyBufferToTexture(
cl_handle,
dst_handle, dst_copy_range,
G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), footprint.Offset
);
}
} }
//- Gpu <-> Gpu copy //- Gpu <-> Gpu copy
void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range) void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range)
{
if (src_copy_range.max > src_copy_range.min)
{ {
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Cmd *cmd = G_D12_PushCmd(cl); G_D12_Cmd *cmd = G_D12_PushCmd(cl);
@ -2435,10 +2471,19 @@ void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_ha
cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle); cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle);
cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle); cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle);
cmd->copy_bytes.dst_offset = dst_offset; cmd->copy_bytes.dst_offset = dst_offset;
cmd->copy_bytes.src_copy_range = src_copy_range; cmd->copy_bytes.src_range = src_copy_range;
}
} }
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Vec3I32 src_dims, Rng3I32 src_copy_range) void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset)
{
Vec3I32 src_dims = ZI;
{
src_dims.x = dst_copy_range.p1.x - dst_copy_range.p0.x;
src_dims.y = dst_copy_range.p1.y - dst_copy_range.p0.y;
src_dims.z = dst_copy_range.p1.z - dst_copy_range.p0.z;
}
if (src_dims.x > 0 && src_dims.y > 0 && src_dims.z > 0)
{ {
G_D12_SharedState *g = &G_D12_shared_state; G_D12_SharedState *g = &G_D12_shared_state;
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
@ -2458,6 +2503,7 @@ void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_h
src_desc.DepthOrArraySize = src_dims.z; src_desc.DepthOrArraySize = src_dims.z;
} }
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0); ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0);
src_footprint.Offset = src_offset;
} }
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
@ -2479,11 +2525,15 @@ void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_h
cmd->copy_texels.src = src; cmd->copy_texels.src = src;
cmd->copy_texels.dst_loc = dst_loc; cmd->copy_texels.dst_loc = dst_loc;
cmd->copy_texels.src_loc = src_loc; cmd->copy_texels.src_loc = src_loc;
cmd->copy_texels.dst_offset = dst_offset; cmd->copy_texels.dst_texture_offset = dst_copy_range.p0;
cmd->copy_texels.src_copy_range = src_copy_range; }
} }
void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
{
if (src_copy_range.p1.x > src_copy_range.p0.x &&
src_copy_range.p1.y > src_copy_range.p0.y &&
src_copy_range.p1.z > src_copy_range.p0.z)
{ {
G_D12_SharedState *g = &G_D12_shared_state; G_D12_SharedState *g = &G_D12_shared_state;
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
@ -2511,8 +2561,9 @@ void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_
cmd->copy_texels.src = src; cmd->copy_texels.src = src;
cmd->copy_texels.dst_loc = dst_loc; cmd->copy_texels.dst_loc = dst_loc;
cmd->copy_texels.src_loc = src_loc; cmd->copy_texels.src_loc = src_loc;
cmd->copy_texels.dst_offset = dst_offset; cmd->copy_texels.dst_texture_offset = dst_offset;
cmd->copy_texels.src_copy_range = src_copy_range; cmd->copy_texels.src_texture_range = src_copy_range;
}
} }
void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range) void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
@ -2949,6 +3000,12 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
{ {
for (;;) for (;;)
{ {
/* FIXME: Remove this */
SleepSeconds(0.100);
/* Copy print-buffers to readback */ /* Copy print-buffers to readback */
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind) for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
{ {

View File

@ -191,14 +191,15 @@ Struct(G_D12_Arena)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Staging types //~ Staging types
Struct(G_D12_StagingHeap) Struct(G_D12_StagingRing)
{ {
Arena *arena; Arena *arena;
G_D12_Arena *gpu_arena;
G_D12_Resource resource;
void *mapped;
u64 size; u64 size;
G_D12_Resource *resource;
u8 *base;
struct G_D12_StagingRegionNode *head_region_node; struct G_D12_StagingRegionNode *head_region_node;
struct G_D12_StagingRegionNode *first_free_region_node; struct G_D12_StagingRegionNode *first_free_region_node;
@ -206,12 +207,15 @@ Struct(G_D12_StagingHeap)
Struct(G_D12_StagingRegionNode) Struct(G_D12_StagingRegionNode)
{ {
G_D12_StagingHeap *heap; G_D12_StagingRing *ring;
/* Heap links (requires heap lock to read) */ /* Ring links (requires ring lock to read) */
G_D12_StagingRegionNode *prev; G_D12_StagingRegionNode *prev;
G_D12_StagingRegionNode *next; G_D12_StagingRegionNode *next;
/* Command list links */
G_D12_StagingRegionNode *next_in_command_list;
/* Region info */ /* Region info */
Atomic64 completion_target; Atomic64 completion_target;
u64 pos; u64 pos;
@ -247,7 +251,7 @@ Struct(G_D12_Queue)
/* Staging heap */ /* Staging heap */
Mutex staging_mutex; Mutex staging_mutex;
G_D12_StagingHeap *staging_heap; G_D12_StagingRing *staging_ring;
Fence sync_fence; Fence sync_fence;
}; };
@ -313,7 +317,7 @@ Struct(G_D12_Cmd)
G_D12_Resource *dst; G_D12_Resource *dst;
G_D12_Resource *src; G_D12_Resource *src;
u64 dst_offset; u64 dst_offset;
RngU64 src_copy_range; RngU64 src_range;
} copy_bytes; } copy_bytes;
struct struct
@ -322,8 +326,8 @@ Struct(G_D12_Cmd)
G_D12_Resource *src; G_D12_Resource *src;
D3D12_TEXTURE_COPY_LOCATION dst_loc; D3D12_TEXTURE_COPY_LOCATION dst_loc;
D3D12_TEXTURE_COPY_LOCATION src_loc; D3D12_TEXTURE_COPY_LOCATION src_loc;
Vec3I32 dst_offset; Vec3I32 dst_texture_offset;
Rng3I32 src_copy_range; Rng3I32 src_texture_range;
} copy_texels; } copy_texels;
struct struct
@ -366,6 +370,8 @@ Struct(G_D12_CmdList)
G_QueueKind queue_kind; G_QueueKind queue_kind;
G_D12_DescriptorList reset_descriptors; G_D12_DescriptorList reset_descriptors;
G_D12_StagingRegionNode *first_staging_region;
G_D12_StagingRegionNode *last_staging_region;
G_D12_CmdChunk *first_cmd_chunk; G_D12_CmdChunk *first_cmd_chunk;
G_D12_CmdChunk *last_cmd_chunk; G_D12_CmdChunk *last_cmd_chunk;