gpu staging ring working
This commit is contained in:
parent
6c3c7231ff
commit
4ddc2f1666
@ -166,7 +166,15 @@ CpuTopologyInfo GetCpuTopologyInfo(void)
|
|||||||
|
|
||||||
void SleepSeconds(f64 seconds)
|
void SleepSeconds(f64 seconds)
|
||||||
{
|
{
|
||||||
Sleep(seconds / 1000.0);
|
f64 ms = seconds * 1000.0;
|
||||||
|
if (ms > 4000000000)
|
||||||
|
{
|
||||||
|
Sleep(INFINITE);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Sleep((u32)ms);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -517,6 +517,7 @@ void G_Bootstrap(void);
|
|||||||
|
|
||||||
G_ArenaHandle G_AcquireArena(void);
|
G_ArenaHandle G_AcquireArena(void);
|
||||||
void G_ReleaseArena(G_ArenaHandle arena);
|
void G_ReleaseArena(G_ArenaHandle arena);
|
||||||
|
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookdecl Resource
|
//~ @hookdecl Resource
|
||||||
@ -672,10 +673,6 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
|
|||||||
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
|
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
|
||||||
i64 G_CommitCommandList(G_CommandListHandle cl);
|
i64 G_CommitCommandList(G_CommandListHandle cl);
|
||||||
|
|
||||||
//- Arena
|
|
||||||
|
|
||||||
void G_ResetArena(G_CommandListHandle cl, G_ArenaHandle arena);
|
|
||||||
|
|
||||||
//- Cpu -> Gpu copy
|
//- Cpu -> Gpu copy
|
||||||
|
|
||||||
void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
|
void G_CopyCpuToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range);
|
||||||
@ -684,7 +681,7 @@ void G_CopyCpuToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 ds
|
|||||||
//- Gpu <-> Gpu copy
|
//- Gpu <-> Gpu copy
|
||||||
|
|
||||||
void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range);
|
void G_CopyBufferToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, u64 dst_offset, G_ResourceHandle src, RngU64 src_copy_range);
|
||||||
void G_CopyBufferToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Vec3I32 src_dims, Rng3I32 src_copy_range);
|
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset);
|
||||||
void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
|
void G_CopyTextureToTexture(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
|
||||||
void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
|
void G_CopyTextureToBuffer(G_CommandListHandle cl, G_ResourceHandle dst, Vec3I32 dst_offset, G_ResourceHandle src, Rng3I32 src_copy_range);
|
||||||
|
|
||||||
|
|||||||
@ -771,6 +771,15 @@ G_ArenaHandle G_AcquireArena(void)
|
|||||||
void G_ReleaseArena(G_ArenaHandle arena)
|
void G_ReleaseArena(G_ArenaHandle arena)
|
||||||
{
|
{
|
||||||
/* TODO */
|
/* TODO */
|
||||||
|
|
||||||
|
/* TODO: Unmap heaps */
|
||||||
|
}
|
||||||
|
|
||||||
|
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
|
||||||
|
{
|
||||||
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
|
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
|
||||||
|
G_D12_ResetArena(cl, gpu_arena);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -891,7 +900,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_ResourceDesc desc)
|
|||||||
/* Create d3d heap */
|
/* Create d3d heap */
|
||||||
{
|
{
|
||||||
D3D12_HEAP_DESC d3d_desc = ZI;
|
D3D12_HEAP_DESC d3d_desc = ZI;
|
||||||
d3d_desc.SizeInBytes = Mebi(256);
|
d3d_desc.SizeInBytes = Mebi(512);
|
||||||
if (heap_kind == G_D12_ResourceHeapKind_Cpu)
|
if (heap_kind == G_D12_ResourceHeapKind_Cpu)
|
||||||
{
|
{
|
||||||
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_CUSTOM;
|
||||||
@ -1468,6 +1477,8 @@ G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v)
|
|||||||
|
|
||||||
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
||||||
{
|
{
|
||||||
|
size = AlignU64(size, MaxU64(D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT, 512));
|
||||||
|
|
||||||
G_D12_SharedState *g = &G_D12_shared_state;
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
G_QueueKind queue_kind = cl->queue_kind;
|
G_QueueKind queue_kind = cl->queue_kind;
|
||||||
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
||||||
@ -1475,17 +1486,18 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
|||||||
|
|
||||||
Lock lock = LockE(&queue->staging_mutex);
|
Lock lock = LockE(&queue->staging_mutex);
|
||||||
{
|
{
|
||||||
G_D12_StagingHeap *heap = queue->staging_heap;
|
G_D12_StagingRing *old_ring = 0;
|
||||||
|
G_D12_StagingRing *ring = queue->staging_ring;
|
||||||
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
||||||
|
|
||||||
/* Find first completed region with matching size.
|
/* Find first completed region with matching size.
|
||||||
* For each region in heap:
|
* For each region in ring:
|
||||||
* - If region size > size, split off a smaller region & use it
|
* - If region size > size, split off a smaller region & use it
|
||||||
*
|
*
|
||||||
* - If region size < size, try to merge with next completed region
|
* - If region size < size, try to merge with next completed region
|
||||||
*
|
*
|
||||||
* - If no available completed region with eligible size, queue the
|
* - If no available completed region with eligible size, queue the
|
||||||
* current heap for deletion & create a new heap
|
* current ring for deletion & create a new ring
|
||||||
* with larger size
|
* with larger size
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -1497,15 +1509,15 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
|||||||
|
|
||||||
/* Find region with large enough size */
|
/* Find region with large enough size */
|
||||||
G_D12_StagingRegionNode *match = 0;
|
G_D12_StagingRegionNode *match = 0;
|
||||||
if (heap && heap->size >= size)
|
if (ring && ring->size >= size)
|
||||||
{
|
{
|
||||||
G_D12_StagingRegionNode *r = heap->head_region_node;
|
G_D12_StagingRegionNode *r = ring->head_region_node;
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
G_D12_StagingRegionNode *next = r->next;
|
||||||
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
|
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
|
||||||
if (is_completed)
|
if (is_completed)
|
||||||
{
|
{
|
||||||
G_D12_StagingRegionNode *next = r->next;
|
|
||||||
u64 region_size = 0;
|
u64 region_size = 0;
|
||||||
if (next->pos > r->pos)
|
if (next->pos > r->pos)
|
||||||
{
|
{
|
||||||
@ -1513,24 +1525,40 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
region_size = heap->size - r->pos;
|
region_size = ring->size - r->pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (region_size < size)
|
if (region_size < size)
|
||||||
{
|
{
|
||||||
G_D12_StagingRegionNode *prev = r->prev;
|
b32 next_is_completed = completed >= Atomic64Fetch(&next->completion_target);
|
||||||
b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target);
|
if (next_is_completed)
|
||||||
if (prev_is_completed && prev->pos < r->pos)
|
|
||||||
{
|
{
|
||||||
/* Merge with previous region & retry */
|
if (next->pos > r->pos)
|
||||||
prev->next = next;
|
{
|
||||||
SllStackPush(heap->first_free_region_node, r);
|
/* Merge with next region & retry */
|
||||||
r = prev;
|
if (next == ring->head_region_node)
|
||||||
|
{
|
||||||
|
ring->head_region_node = r;
|
||||||
|
}
|
||||||
|
r->next = next->next;
|
||||||
|
r->next->prev = r;
|
||||||
|
SllStackPush(ring->first_free_region_node, next);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* Continue to next region */
|
/* Wrap to beginning */
|
||||||
r = next;
|
r = next;
|
||||||
|
if (r == ring->head_region_node)
|
||||||
|
{
|
||||||
|
/* No large-enough completed region found */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* No large-enough completed region found */
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1541,88 +1569,59 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
/* Continue to next region */
|
||||||
|
r = next;
|
||||||
|
if (r == ring->head_region_node)
|
||||||
{
|
{
|
||||||
/* No large-enough completed region found */
|
/* No large-enough completed region found */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Create new heap if no match found */
|
/* Create new ring if no match found */
|
||||||
if (!match)
|
if (!match)
|
||||||
{
|
{
|
||||||
/* Queue old heap for deletion */
|
/* Queue old ring for deletion */
|
||||||
u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
|
old_ring = ring;
|
||||||
if (heap)
|
ring = 0;
|
||||||
|
u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
|
||||||
|
// u64 new_ring_size = MaxU64(AlignU64ToNextPow2(size), Kibi(128));
|
||||||
|
if (old_ring)
|
||||||
{
|
{
|
||||||
/* FIXME: Queue for deletion here */
|
new_ring_size = MaxU64(new_ring_size, old_ring->size * 2);
|
||||||
new_heap_size = MaxU64(new_heap_size, heap->size * 2);
|
|
||||||
heap = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create new heap */
|
/* Create new ring */
|
||||||
{
|
{
|
||||||
Arena *arena = AcquireArena(Gibi(1));
|
Arena *arena = AcquireArena(Gibi(1));
|
||||||
heap = PushStruct(arena, G_D12_StagingHeap);
|
ring = PushStruct(arena, G_D12_StagingRing);
|
||||||
heap->arena = arena;
|
ring->arena = arena;
|
||||||
heap->size = new_heap_size;
|
ring->size = new_ring_size;
|
||||||
|
|
||||||
/* Create backing upload heap resource */
|
G_ArenaHandle gpu_arena_handle = G_AcquireArena();
|
||||||
ID3D12Resource *d3d_resource = 0;
|
ring->gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle);
|
||||||
{
|
|
||||||
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
|
||||||
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
|
||||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
|
||||||
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
|
||||||
d3d_desc.Alignment = 0;
|
|
||||||
d3d_desc.Width = new_heap_size;
|
|
||||||
d3d_desc.Height = 1;
|
|
||||||
d3d_desc.DepthOrArraySize = 1;
|
|
||||||
d3d_desc.MipLevels = 1;
|
|
||||||
d3d_desc.SampleDesc.Count = 1;
|
|
||||||
d3d_desc.SampleDesc.Quality = 0;
|
|
||||||
|
|
||||||
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
|
G_ResourceHandle resource_handle = G_PushBuffer(
|
||||||
HRESULT hr = ID3D12Device_CreateCommittedResource(
|
gpu_arena_handle,
|
||||||
g->device,
|
u8,
|
||||||
&heap_props,
|
new_ring_size,
|
||||||
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED,
|
.flags = G_ResourceFlag_HostMemory | G_ResourceFlag_Uncached
|
||||||
&d3d_desc,
|
|
||||||
D3D12_RESOURCE_STATE_COMMON,
|
|
||||||
0,
|
|
||||||
&IID_ID3D12Resource,
|
|
||||||
(void **)&d3d_resource
|
|
||||||
);
|
);
|
||||||
if (!SUCCEEDED(hr))
|
ring->resource = G_D12_ResourceFromHandle(resource_handle);
|
||||||
{
|
ring->base = G_StructFromResource(resource_handle, u8);
|
||||||
/* TODO: Don't panic */
|
|
||||||
Panic(Lit("Failed to create upload heap"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
heap->resource.d3d_resource = d3d_resource;
|
|
||||||
heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
|
||||||
heap->resource.buffer_size = new_heap_size;
|
|
||||||
heap->resource.buffer_size_actual = new_heap_size;
|
|
||||||
heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
|
|
||||||
|
|
||||||
/* Map */
|
|
||||||
{
|
|
||||||
D3D12_RANGE read_range = ZI;
|
|
||||||
HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
|
|
||||||
if (!SUCCEEDED(hr))
|
|
||||||
{
|
|
||||||
/* TODO: Don't panic */
|
|
||||||
Panic(Lit("Failed to map upload heap"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create initial region */
|
/* Create initial region */
|
||||||
match = PushStruct(heap->arena, G_D12_StagingRegionNode);
|
match = PushStruct(ring->arena, G_D12_StagingRegionNode);
|
||||||
match->heap = heap;
|
match->ring = ring;
|
||||||
match->next = match;
|
match->next = match;
|
||||||
match->prev = match;
|
match->prev = match;
|
||||||
heap->head_region_node = match;
|
|
||||||
|
/* FIXME: Remove this */
|
||||||
|
queue->staging_ring = ring;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Split extra region space */
|
/* Split extra region space */
|
||||||
@ -1635,35 +1634,46 @@ G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
region_size = heap->size - match->pos;
|
region_size = ring->size - match->pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (region_size > size)
|
if (region_size > size)
|
||||||
{
|
{
|
||||||
G_D12_StagingRegionNode *new_next = heap->first_free_region_node;
|
G_D12_StagingRegionNode *new_next = ring->first_free_region_node;
|
||||||
if (new_next)
|
if (new_next)
|
||||||
{
|
{
|
||||||
SllStackPop(heap->first_free_region_node);
|
SllStackPop(ring->first_free_region_node);
|
||||||
|
ZeroStruct(new_next);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
new_next = PushStruct(heap->arena, G_D12_StagingRegionNode);
|
new_next = PushStruct(ring->arena, G_D12_StagingRegionNode);
|
||||||
}
|
}
|
||||||
new_next->next = next;
|
new_next->next = next;
|
||||||
new_next->prev = match;
|
new_next->prev = match;
|
||||||
next->prev = new_next;
|
next->prev = new_next;
|
||||||
match->next = new_next;
|
match->next = new_next;
|
||||||
|
|
||||||
new_next->heap = heap;
|
new_next->ring = ring;
|
||||||
new_next->pos = match->pos + size;
|
new_next->pos = match->pos + size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ring->head_region_node = match->next;
|
||||||
|
|
||||||
Atomic64Set(&match->completion_target, I64Max);
|
Atomic64Set(&match->completion_target, I64Max);
|
||||||
result = match;
|
result = match;
|
||||||
|
|
||||||
|
if (old_ring)
|
||||||
|
{
|
||||||
|
/* FIXME: Queue old ring for deletion with command list */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Unlock(&lock);
|
Unlock(&lock);
|
||||||
|
|
||||||
|
/* Add to command list */
|
||||||
|
SllQueuePushN(cl->first_staging_region, cl->last_staging_region, result, next_in_command_list);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1960,14 +1970,16 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
|||||||
|
|
||||||
case G_D12_CmdKind_CopyBytes:
|
case G_D12_CmdKind_CopyBytes:
|
||||||
{
|
{
|
||||||
u64 src_offset = cmd->copy_bytes.src_copy_range.min;
|
u64 src_offset = cmd->copy_bytes.src_range.min;
|
||||||
u64 copy_size = cmd->copy_bytes.src_copy_range.max - cmd->copy_bytes.src_copy_range.min;
|
u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min;
|
||||||
ID3D12GraphicsCommandList_CopyBufferRegion(d3d_cl,
|
ID3D12GraphicsCommandList_CopyBufferRegion(
|
||||||
|
d3d_cl,
|
||||||
cmd->copy_bytes.dst->d3d_resource,
|
cmd->copy_bytes.dst->d3d_resource,
|
||||||
cmd->copy_bytes.dst_offset,
|
cmd->copy_bytes.dst_offset,
|
||||||
cmd->copy_bytes.src->d3d_resource,
|
cmd->copy_bytes.src->d3d_resource,
|
||||||
src_offset,
|
src_offset,
|
||||||
copy_size);
|
copy_size
|
||||||
|
);
|
||||||
cmd_idx += 1;
|
cmd_idx += 1;
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
@ -1979,17 +1991,22 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
|||||||
G_D12_Resource *src = cmd->copy_texels.src;
|
G_D12_Resource *src = cmd->copy_texels.src;
|
||||||
D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc;
|
D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc;
|
||||||
D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc;
|
D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc;
|
||||||
Vec3I32 dst_offset = cmd->copy_texels.dst_offset;
|
Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset;
|
||||||
Rng3I32 src_copy_range = cmd->copy_texels.src_copy_range;
|
Rng3I32 src_range = cmd->copy_texels.src_texture_range;
|
||||||
|
|
||||||
D3D12_BOX src_box = ZI;
|
D3D12_BOX src_box = ZI;
|
||||||
|
D3D12_BOX *src_box_ptr = 0;
|
||||||
{
|
{
|
||||||
src_box.left = src_copy_range.p0.x;
|
src_box.left = src_range.p0.x;
|
||||||
src_box.top = src_copy_range.p0.y;
|
src_box.top = src_range.p0.y;
|
||||||
src_box.front = src_copy_range.p0.z;
|
src_box.front = src_range.p0.z;
|
||||||
src_box.right = src_copy_range.p1.x;
|
src_box.right = src_range.p1.x;
|
||||||
src_box.bottom = src_copy_range.p1.y;
|
src_box.bottom = src_range.p1.y;
|
||||||
src_box.back = src_copy_range.p1.z;
|
src_box.back = src_range.p1.z;
|
||||||
|
if (src->is_texture)
|
||||||
|
{
|
||||||
|
src_box_ptr = &src_box;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dst->flags & G_ResourceFlag_AllowDepthStencil)
|
if (dst->flags & G_ResourceFlag_AllowDepthStencil)
|
||||||
@ -2001,7 +2018,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, &src_box);
|
ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd_idx += 1;
|
cmd_idx += 1;
|
||||||
@ -2301,6 +2318,17 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
|||||||
/* End dx12 command list */
|
/* End dx12 command list */
|
||||||
i64 completion_target = G_D12_CommitRawCommandList(rcl);
|
i64 completion_target = G_D12_CommitRawCommandList(rcl);
|
||||||
|
|
||||||
|
/* Attach completion info to staging regions */
|
||||||
|
for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;)
|
||||||
|
{
|
||||||
|
G_D12_StagingRegionNode *next = n->next_in_command_list;
|
||||||
|
{
|
||||||
|
Atomic64Set(&n->completion_target, completion_target);
|
||||||
|
n->next_in_command_list = 0;
|
||||||
|
}
|
||||||
|
n = next;
|
||||||
|
}
|
||||||
|
|
||||||
/* Attach completion info to descriptors */
|
/* Attach completion info to descriptors */
|
||||||
for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;)
|
for (G_D12_Descriptor *d = cl->reset_descriptors.first; d;)
|
||||||
{
|
{
|
||||||
@ -2346,43 +2374,40 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
|||||||
return completion_target;
|
return completion_target;
|
||||||
}
|
}
|
||||||
|
|
||||||
//- Arena
|
|
||||||
|
|
||||||
void G_ResetArena(G_CommandListHandle cl_handle, G_ArenaHandle arena_handle)
|
|
||||||
{
|
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
|
||||||
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(arena_handle);
|
|
||||||
G_D12_ResetArena(cl, gpu_arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
//- Cpu -> Gpu copy
|
//- Cpu -> Gpu copy
|
||||||
|
|
||||||
void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
|
void G_CopyCpuToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
|
||||||
{
|
{
|
||||||
|
if (src_copy_range.max > src_copy_range.min)
|
||||||
|
{
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
u64 copy_size = src_copy_range.max - src_copy_range.min;
|
u64 copy_size = src_copy_range.max - src_copy_range.min;
|
||||||
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size);
|
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, copy_size);
|
||||||
CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, copy_size);
|
CopyBytes((u8 *)region->ring->base + region->pos, (u8 *)src + src_copy_range.min, copy_size);
|
||||||
G_CopyBufferToBuffer(cl_handle,
|
G_CopyBufferToBuffer(
|
||||||
|
cl_handle,
|
||||||
dst_handle,
|
dst_handle,
|
||||||
dst_offset,
|
dst_offset,
|
||||||
G_D12_MakeHandle(G_ResourceHandle, ®ion->heap->resource),
|
G_D12_MakeHandle(G_ResourceHandle, region->ring->resource),
|
||||||
RNGU64(region->pos, copy_size));
|
RNGU64(region->pos, region->pos + copy_size)
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, void *src, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
||||||
{
|
{
|
||||||
G_D12_SharedState *g = &G_D12_shared_state;
|
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
|
||||||
G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle);
|
|
||||||
Assert(dst->is_texture);
|
|
||||||
|
|
||||||
Vec3I32 staged_dims = ZI;
|
Vec3I32 staged_dims = ZI;
|
||||||
{
|
{
|
||||||
staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x;
|
staged_dims.x = src_copy_range.p1.x - src_copy_range.p0.x;
|
||||||
staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y;
|
staged_dims.y = src_copy_range.p1.y - src_copy_range.p0.y;
|
||||||
staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z;
|
staged_dims.z = src_copy_range.p1.z - src_copy_range.p0.z;
|
||||||
}
|
}
|
||||||
|
if (staged_dims.x > 0 && staged_dims.y > 0 && staged_dims.z > 0)
|
||||||
|
{
|
||||||
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
|
G_D12_Resource *dst = G_D12_ResourceFromHandle(dst_handle);
|
||||||
|
Assert(dst->is_texture);
|
||||||
|
|
||||||
/* Grab footprint info */
|
/* Grab footprint info */
|
||||||
u64 footprint_rows_count = 0;
|
u64 footprint_rows_count = 0;
|
||||||
@ -2400,46 +2425,66 @@ void G_CopyCpuToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_hand
|
|||||||
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size);
|
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &footprint, (u32 *)&footprint_rows_count, &footprint_row_size, &footprint_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fill staging buffer */
|
|
||||||
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size);
|
G_D12_StagingRegionNode *region = G_D12_PushStagingRegion(cl, footprint_size);
|
||||||
|
footprint.Offset = region->pos;
|
||||||
|
|
||||||
|
/* Fill staging buffer */
|
||||||
{
|
{
|
||||||
D3D12_RANGE read_range = ZI;
|
D3D12_RANGE read_range = ZI;
|
||||||
u8 *dst_base = (u8 *)region->heap->mapped + region->pos + footprint.Offset;
|
|
||||||
u8 *src_base = src;
|
u8 *src_base = src;
|
||||||
|
u8 *dst_base = (u8 *)region->ring->base + footprint.Offset;
|
||||||
u32 z_size = footprint_row_size * footprint_rows_count;
|
u32 z_size = footprint_row_size * footprint_rows_count;
|
||||||
for (i32 z = 0; z < src_dims.z; ++z)
|
for (i32 z = 0; z < src_dims.z; ++z)
|
||||||
{
|
{
|
||||||
u32 z_offset = z * z_size;
|
u32 z_offset = z * z_size;
|
||||||
for (i32 y = 0; y < footprint_rows_count; ++y)
|
for (i32 y = 0; y < footprint_rows_count; ++y)
|
||||||
{
|
{
|
||||||
u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset;
|
|
||||||
u8 *src_row = src_base + y * footprint_row_size + z_offset;
|
u8 *src_row = src_base + y * footprint_row_size + z_offset;
|
||||||
|
u8 *dst_row = dst_base + y * footprint.Footprint.RowPitch + z_offset;
|
||||||
CopyBytes(dst_row, src_row, footprint_row_size);
|
CopyBytes(dst_row, src_row, footprint_row_size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
G_CopyBufferToTexture(cl_handle,
|
Rng3I32 dst_copy_range = ZI;
|
||||||
dst_handle, dst_offset,
|
dst_copy_range.p0 = dst_offset;
|
||||||
G_D12_MakeHandle(G_ResourceHandle, ®ion->heap->resource), staged_dims,
|
dst_copy_range.p1.x = dst_copy_range.p0.x + staged_dims.x;
|
||||||
RNG3I32(VEC3I32(0, 0, 0), staged_dims));
|
dst_copy_range.p1.y = dst_copy_range.p0.y + staged_dims.y;
|
||||||
|
dst_copy_range.p1.z = dst_copy_range.p0.z + staged_dims.z;
|
||||||
|
G_CopyBufferToTexture(
|
||||||
|
cl_handle,
|
||||||
|
dst_handle, dst_copy_range,
|
||||||
|
G_D12_MakeHandle(G_ResourceHandle, region->ring->resource), footprint.Offset
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//- Gpu <-> Gpu copy
|
//- Gpu <-> Gpu copy
|
||||||
|
|
||||||
void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range)
|
void G_CopyBufferToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, u64 dst_offset, G_ResourceHandle src_handle, RngU64 src_copy_range)
|
||||||
{
|
{
|
||||||
|
if (src_copy_range.max > src_copy_range.min)
|
||||||
|
{
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
|
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
|
||||||
cmd->kind = G_D12_CmdKind_CopyBytes;
|
cmd->kind = G_D12_CmdKind_CopyBytes;
|
||||||
cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle);
|
cmd->copy_bytes.src = G_D12_ResourceFromHandle(src_handle);
|
||||||
cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle);
|
cmd->copy_bytes.dst = G_D12_ResourceFromHandle(dst_handle);
|
||||||
cmd->copy_bytes.dst_offset = dst_offset;
|
cmd->copy_bytes.dst_offset = dst_offset;
|
||||||
cmd->copy_bytes.src_copy_range = src_copy_range;
|
cmd->copy_bytes.src_range = src_copy_range;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Vec3I32 src_dims, Rng3I32 src_copy_range)
|
void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Rng3I32 dst_copy_range, G_ResourceHandle src_handle, u64 src_offset)
|
||||||
{
|
{
|
||||||
|
Vec3I32 src_dims = ZI;
|
||||||
|
{
|
||||||
|
src_dims.x = dst_copy_range.p1.x - dst_copy_range.p0.x;
|
||||||
|
src_dims.y = dst_copy_range.p1.y - dst_copy_range.p0.y;
|
||||||
|
src_dims.z = dst_copy_range.p1.z - dst_copy_range.p0.z;
|
||||||
|
}
|
||||||
|
if (src_dims.x > 0 && src_dims.y > 0 && src_dims.z > 0)
|
||||||
|
{
|
||||||
G_D12_SharedState *g = &G_D12_shared_state;
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle);
|
G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle);
|
||||||
@ -2458,6 +2503,7 @@ void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_h
|
|||||||
src_desc.DepthOrArraySize = src_dims.z;
|
src_desc.DepthOrArraySize = src_dims.z;
|
||||||
}
|
}
|
||||||
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0);
|
ID3D12Device_GetCopyableFootprints(g->device, &src_desc, 0, 1, 0, &src_footprint, 0, 0, 0);
|
||||||
|
src_footprint.Offset = src_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
D3D12_TEXTURE_COPY_LOCATION src_loc = ZI;
|
||||||
@ -2479,12 +2525,16 @@ void G_CopyBufferToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_h
|
|||||||
cmd->copy_texels.src = src;
|
cmd->copy_texels.src = src;
|
||||||
cmd->copy_texels.dst_loc = dst_loc;
|
cmd->copy_texels.dst_loc = dst_loc;
|
||||||
cmd->copy_texels.src_loc = src_loc;
|
cmd->copy_texels.src_loc = src_loc;
|
||||||
cmd->copy_texels.dst_offset = dst_offset;
|
cmd->copy_texels.dst_texture_offset = dst_copy_range.p0;
|
||||||
cmd->copy_texels.src_copy_range = src_copy_range;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
||||||
{
|
{
|
||||||
|
if (src_copy_range.p1.x > src_copy_range.p0.x &&
|
||||||
|
src_copy_range.p1.y > src_copy_range.p0.y &&
|
||||||
|
src_copy_range.p1.z > src_copy_range.p0.z)
|
||||||
|
{
|
||||||
G_D12_SharedState *g = &G_D12_shared_state;
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle);
|
G_D12_Resource *src = G_D12_ResourceFromHandle(src_handle);
|
||||||
@ -2511,8 +2561,9 @@ void G_CopyTextureToTexture(G_CommandListHandle cl_handle, G_ResourceHandle dst_
|
|||||||
cmd->copy_texels.src = src;
|
cmd->copy_texels.src = src;
|
||||||
cmd->copy_texels.dst_loc = dst_loc;
|
cmd->copy_texels.dst_loc = dst_loc;
|
||||||
cmd->copy_texels.src_loc = src_loc;
|
cmd->copy_texels.src_loc = src_loc;
|
||||||
cmd->copy_texels.dst_offset = dst_offset;
|
cmd->copy_texels.dst_texture_offset = dst_offset;
|
||||||
cmd->copy_texels.src_copy_range = src_copy_range;
|
cmd->copy_texels.src_texture_range = src_copy_range;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
void G_CopyTextureToBuffer(G_CommandListHandle cl_handle, G_ResourceHandle dst_handle, Vec3I32 dst_offset, G_ResourceHandle src_handle, Rng3I32 src_copy_range)
|
||||||
@ -2949,6 +3000,12 @@ void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane)
|
|||||||
{
|
{
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
/* FIXME: Remove this */
|
||||||
|
SleepSeconds(0.100);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* Copy print-buffers to readback */
|
/* Copy print-buffers to readback */
|
||||||
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
|
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -191,14 +191,15 @@ Struct(G_D12_Arena)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Staging types
|
//~ Staging types
|
||||||
|
|
||||||
Struct(G_D12_StagingHeap)
|
Struct(G_D12_StagingRing)
|
||||||
{
|
{
|
||||||
Arena *arena;
|
Arena *arena;
|
||||||
|
G_D12_Arena *gpu_arena;
|
||||||
G_D12_Resource resource;
|
|
||||||
void *mapped;
|
|
||||||
u64 size;
|
u64 size;
|
||||||
|
|
||||||
|
G_D12_Resource *resource;
|
||||||
|
u8 *base;
|
||||||
|
|
||||||
struct G_D12_StagingRegionNode *head_region_node;
|
struct G_D12_StagingRegionNode *head_region_node;
|
||||||
struct G_D12_StagingRegionNode *first_free_region_node;
|
struct G_D12_StagingRegionNode *first_free_region_node;
|
||||||
|
|
||||||
@ -206,12 +207,15 @@ Struct(G_D12_StagingHeap)
|
|||||||
|
|
||||||
Struct(G_D12_StagingRegionNode)
|
Struct(G_D12_StagingRegionNode)
|
||||||
{
|
{
|
||||||
G_D12_StagingHeap *heap;
|
G_D12_StagingRing *ring;
|
||||||
|
|
||||||
/* Heap links (requires heap lock to read) */
|
/* Ring links (requires ring lock to read) */
|
||||||
G_D12_StagingRegionNode *prev;
|
G_D12_StagingRegionNode *prev;
|
||||||
G_D12_StagingRegionNode *next;
|
G_D12_StagingRegionNode *next;
|
||||||
|
|
||||||
|
/* Command list links */
|
||||||
|
G_D12_StagingRegionNode *next_in_command_list;
|
||||||
|
|
||||||
/* Region info */
|
/* Region info */
|
||||||
Atomic64 completion_target;
|
Atomic64 completion_target;
|
||||||
u64 pos;
|
u64 pos;
|
||||||
@ -247,7 +251,7 @@ Struct(G_D12_Queue)
|
|||||||
|
|
||||||
/* Staging heap */
|
/* Staging heap */
|
||||||
Mutex staging_mutex;
|
Mutex staging_mutex;
|
||||||
G_D12_StagingHeap *staging_heap;
|
G_D12_StagingRing *staging_ring;
|
||||||
|
|
||||||
Fence sync_fence;
|
Fence sync_fence;
|
||||||
};
|
};
|
||||||
@ -313,7 +317,7 @@ Struct(G_D12_Cmd)
|
|||||||
G_D12_Resource *dst;
|
G_D12_Resource *dst;
|
||||||
G_D12_Resource *src;
|
G_D12_Resource *src;
|
||||||
u64 dst_offset;
|
u64 dst_offset;
|
||||||
RngU64 src_copy_range;
|
RngU64 src_range;
|
||||||
} copy_bytes;
|
} copy_bytes;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
@ -322,8 +326,8 @@ Struct(G_D12_Cmd)
|
|||||||
G_D12_Resource *src;
|
G_D12_Resource *src;
|
||||||
D3D12_TEXTURE_COPY_LOCATION dst_loc;
|
D3D12_TEXTURE_COPY_LOCATION dst_loc;
|
||||||
D3D12_TEXTURE_COPY_LOCATION src_loc;
|
D3D12_TEXTURE_COPY_LOCATION src_loc;
|
||||||
Vec3I32 dst_offset;
|
Vec3I32 dst_texture_offset;
|
||||||
Rng3I32 src_copy_range;
|
Rng3I32 src_texture_range;
|
||||||
} copy_texels;
|
} copy_texels;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
@ -366,6 +370,8 @@ Struct(G_D12_CmdList)
|
|||||||
G_QueueKind queue_kind;
|
G_QueueKind queue_kind;
|
||||||
|
|
||||||
G_D12_DescriptorList reset_descriptors;
|
G_D12_DescriptorList reset_descriptors;
|
||||||
|
G_D12_StagingRegionNode *first_staging_region;
|
||||||
|
G_D12_StagingRegionNode *last_staging_region;
|
||||||
|
|
||||||
G_D12_CmdChunk *first_cmd_chunk;
|
G_D12_CmdChunk *first_cmd_chunk;
|
||||||
G_D12_CmdChunk *last_cmd_chunk;
|
G_D12_CmdChunk *last_cmd_chunk;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user