gpu queue synchronization
This commit is contained in:
parent
3da749ef51
commit
bc17e94758
@ -21,10 +21,6 @@ void G_BootstrapCommon(void)
|
|||||||
g->quad_indices = G_IdxBuff16(quad_indices);
|
g->quad_indices = G_IdxBuff16(quad_indices);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: Init debug print queues */
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Init point sampler */
|
/* Init point sampler */
|
||||||
{
|
{
|
||||||
G_ResourceHandle pt_sampler = G_PushSampler(gpu_perm, (G_SamplerResourceDesc) { .filter = G_Filter_MinMagMipPoint });
|
G_ResourceHandle pt_sampler = G_PushSampler(gpu_perm, (G_SamplerResourceDesc) { .filter = G_Filter_MinMagMipPoint });
|
||||||
@ -55,7 +51,8 @@ void G_BootstrapCommon(void)
|
|||||||
}
|
}
|
||||||
G_CommitCommandList(cl);
|
G_CommitCommandList(cl);
|
||||||
|
|
||||||
G_SyncOtherQueues(G_QueueKind_Direct);
|
/* Barrier all queues until direct queue finishes initializing resources */
|
||||||
|
G_Sync(G_QueueMask_Direct, G_QueueMask_All);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -18,17 +18,43 @@ Struct(G_SwapchainHandle) { u64 v; };
|
|||||||
|
|
||||||
Enum(G_QueueKind)
|
Enum(G_QueueKind)
|
||||||
{
|
{
|
||||||
#if G_IsMultiQueueEnabled
|
|
||||||
G_QueueKind_Direct = 0,
|
G_QueueKind_Direct = 0,
|
||||||
|
#if G_IsMultiQueueEnabled
|
||||||
G_QueueKind_AsyncCompute = 1,
|
G_QueueKind_AsyncCompute = 1,
|
||||||
G_QueueKind_AsyncCopy = 2,
|
G_QueueKind_AsyncCopy = 2,
|
||||||
G_NumQueues = 3
|
|
||||||
#else
|
#else
|
||||||
G_QueueKind_Direct = 0,
|
G_QueueKind_AsyncCompute = G_QueueKind_Direct,
|
||||||
G_QueueKind_AsyncCompute = 0,
|
G_QueueKind_AsyncCopy = G_QueueKind_Direct,
|
||||||
G_QueueKind_AsyncCopy = 0,
|
|
||||||
G_NumQueues = 1
|
|
||||||
#endif
|
#endif
|
||||||
|
G_NumQueues
|
||||||
|
};
|
||||||
|
|
||||||
|
Enum(G_QueueMask)
|
||||||
|
{
|
||||||
|
G_QueueMask_None = 0,
|
||||||
|
G_QueueMask_Direct = (1 << 0),
|
||||||
|
#if G_IsMultiQueueEnabled
|
||||||
|
G_QueueMask_AsyncCompute = (1 << 1),
|
||||||
|
G_QueueMask_AsyncCopy = (1 << 2),
|
||||||
|
#else
|
||||||
|
G_QueueMask_AsyncCompute = G_QueueMask_Direct,
|
||||||
|
G_QueueMask_AsyncCopy = G_QueueMask_Direct,
|
||||||
|
#endif
|
||||||
|
G_QueueMask_All = (0xFFFFFFFF >> (32 - G_NumQueues))
|
||||||
|
};
|
||||||
|
#define G_MaskFromQueue(queue_kind) (1 << queue_kind)
|
||||||
|
|
||||||
|
Struct(G_QueueCompletions)
|
||||||
|
{
|
||||||
|
i64 v[G_NumQueues]; /* Array of completions indexed by queue kind */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* All waiters will wait until specified queues reach their value in the `completions` array */
|
||||||
|
Struct(G_QueueBarrierDesc)
|
||||||
|
{
|
||||||
|
G_QueueCompletions completions; /* Completions that waiters should wait for */
|
||||||
|
G_QueueMask wait_queues; /* Mask of queues that will wait for completions */
|
||||||
|
b32 wait_cpu; /* Will the cpu wait for completion */
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -163,7 +189,7 @@ Enum(G_Format)
|
|||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Barrier types
|
//~ Memory sync types
|
||||||
|
|
||||||
Enum(G_Stage)
|
Enum(G_Stage)
|
||||||
{
|
{
|
||||||
@ -278,7 +304,7 @@ Enum(G_Layout)
|
|||||||
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
* - Necessary resource flushes will occur based on `access_prev` & `access_next`
|
||||||
* - Texture layout will transition based on `layout` (if specified)
|
* - Texture layout will transition based on `layout` (if specified)
|
||||||
*/
|
*/
|
||||||
Struct(G_BarrierDesc)
|
Struct(G_MemoryBarrierDesc)
|
||||||
{
|
{
|
||||||
G_ResourceHandle resource;
|
G_ResourceHandle resource;
|
||||||
b32 is_global;
|
b32 is_global;
|
||||||
@ -459,25 +485,6 @@ Struct(G_IndexBufferDesc)
|
|||||||
u32 index_count;
|
u32 index_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
//~ Synchronization types
|
|
||||||
|
|
||||||
Enum(G_FenceOpKind)
|
|
||||||
{
|
|
||||||
G_FenceOpKind_Set,
|
|
||||||
G_FenceOpKind_Add,
|
|
||||||
};
|
|
||||||
|
|
||||||
Struct(G_FenceOp)
|
|
||||||
{
|
|
||||||
G_FenceOpKind kind;
|
|
||||||
Fence *fence;
|
|
||||||
i64 v;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define G_SetFence(_fence, _v) ((G_FenceOp) { .kind = G_FenceOpKind_Set, .fence = (_fence), .v = (_v) })
|
|
||||||
#define G_AddFence(_fence, _v) ((G_FenceOp) { .kind = G_FenceOpKind_Add, .fence = (_fence), .v = (_v) })
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Statistic types
|
//~ Statistic types
|
||||||
|
|
||||||
@ -641,9 +648,7 @@ u32 G_PushRef(G_ArenaHandle arena, G_ResourceHandle resource, G_RefDesc desc);
|
|||||||
//- Command list
|
//- Command list
|
||||||
|
|
||||||
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
|
G_CommandListHandle G_PrepareCommandList(G_QueueKind queue);
|
||||||
void G_CommitCommandListEx(G_CommandListHandle cl, u64 fence_ops_count, G_FenceOp *fence_ops);
|
i64 G_CommitCommandList(G_CommandListHandle cl);
|
||||||
|
|
||||||
#define G_CommitCommandList(cl) G_CommitCommandListEx((cl), 0, 0)
|
|
||||||
|
|
||||||
//- Arena
|
//- Arena
|
||||||
|
|
||||||
@ -671,12 +676,12 @@ void G_SetConstant_(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size)
|
|||||||
G_SetConstant_((cl), (name), &__src, sizeof(__src)); \
|
G_SetConstant_((cl), (name), &__src, sizeof(__src)); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
//- Barrier
|
//- Memory sync
|
||||||
|
|
||||||
void G_Sync(G_CommandListHandle cl, G_BarrierDesc desc);
|
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc);
|
||||||
|
|
||||||
#define G_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
#define G_MemorySync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||||
G_Sync((_cl), (G_BarrierDesc) { \
|
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||||
.resource = (_resource), \
|
.resource = (_resource), \
|
||||||
.sync_prev = _sync_prev, \
|
.sync_prev = _sync_prev, \
|
||||||
.access_prev = _access_prev, \
|
.access_prev = _access_prev, \
|
||||||
@ -685,7 +690,7 @@ void G_Sync(G_CommandListHandle cl, G_BarrierDesc desc);
|
|||||||
})
|
})
|
||||||
|
|
||||||
#define G_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
#define G_MemoryLayoutSync(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
||||||
G_Sync((_cl), (G_BarrierDesc) { \
|
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
|
||||||
.resource = (_resource), \
|
.resource = (_resource), \
|
||||||
.sync_prev = _sync_prev, \
|
.sync_prev = _sync_prev, \
|
||||||
.access_prev = _access_prev, \
|
.access_prev = _access_prev, \
|
||||||
@ -695,7 +700,7 @@ void G_Sync(G_CommandListHandle cl, G_BarrierDesc desc);
|
|||||||
})
|
})
|
||||||
|
|
||||||
#define G_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
#define G_GlobalMemorySync(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||||
G_Sync((_cl), (G_BarrierDesc) { \
|
G_MemorySync((_cl), (G_MemoryBarrierDesc) { \
|
||||||
.is_global = 1, \
|
.is_global = 1, \
|
||||||
.sync_prev = _sync_prev, \
|
.sync_prev = _sync_prev, \
|
||||||
.access_prev = _access_prev, \
|
.access_prev = _access_prev, \
|
||||||
@ -730,13 +735,23 @@ void G_Rasterize(G_CommandListHandle cl,
|
|||||||
void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color);
|
void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookdecl Queue synchronization
|
//~ @hookdecl Synchronization
|
||||||
|
|
||||||
/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
|
i64 G_CompletionValueFromQueue(G_QueueKind queue_kind);
|
||||||
void G_SyncQueue(G_QueueKind completion_queue, G_QueueKind waiter_queue);
|
i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind);
|
||||||
|
G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask);
|
||||||
|
G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask);
|
||||||
|
|
||||||
/* All queues will block until `completion_queue` completes all submitted commands */
|
void G_SyncEx(G_QueueBarrierDesc desc);
|
||||||
void G_SyncOtherQueues(G_QueueKind completion_queue);
|
|
||||||
|
#define G_Sync(completion_mask, ...) \
|
||||||
|
G_SyncEx((G_QueueBarrierDesc) { \
|
||||||
|
.completions = G_CompletionTargetsFromQueues(completion_mask), \
|
||||||
|
__VA_ARGS__ \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define G_SyncGpu(completion_mask, wait_mask) G_Sync((completion_mask), .wait_queues = (wait_mask))
|
||||||
|
#define G_SyncCpu(completion_mask) G_Sync((completion_mask), .wait_cpu = 1);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookdecl Statistics
|
//~ @hookdecl Statistics
|
||||||
|
|||||||
@ -1,4 +1,5 @@
|
|||||||
G_D12_SharedState G_D12_shared_state = ZI;
|
G_D12_SharedState G_D12_shared_state = ZI;
|
||||||
|
ThreadLocal G_D12_ThreadLocalState G_D12_tl = ZI;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Bootstrap
|
//~ @hookimpl Bootstrap
|
||||||
@ -293,21 +294,20 @@ void G_Bootstrap(void)
|
|||||||
/* Create debug print buffers */
|
/* Create debug print buffers */
|
||||||
if (GPU_SHADER_PRINT)
|
if (GPU_SHADER_PRINT)
|
||||||
{
|
{
|
||||||
u64 print_buffer_size = Mebi(64);
|
|
||||||
for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
|
for (G_QueueKind kind = 0; kind < G_NumQueues; ++kind)
|
||||||
{
|
{
|
||||||
G_D12_Queue *queue = G_D12_QueueFromKind(kind);
|
G_D12_Queue *queue = G_D12_QueueFromKind(kind);
|
||||||
if (kind != G_QueueKind_AsyncCopy)
|
if (kind != G_QueueKind_AsyncCopy)
|
||||||
{
|
{
|
||||||
/* TODO: Don't create this in host memory. Just double buffer & do an async copy. */
|
|
||||||
G_ArenaHandle gpu_perm = G_PermArena();
|
G_ArenaHandle gpu_perm = G_PermArena();
|
||||||
queue->debug_print_buffer = G_PushBuffer(
|
queue->print_buffer_size = Mebi(64);
|
||||||
|
queue->print_buffer = G_PushBuffer(
|
||||||
gpu_perm,
|
gpu_perm,
|
||||||
u8,
|
u8,
|
||||||
print_buffer_size,
|
queue->print_buffer_size,
|
||||||
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_HostMemory
|
.flags = G_ResourceFlag_AllowShaderReadWrite
|
||||||
);
|
);
|
||||||
queue->debug_print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->debug_print_buffer);
|
queue->print_buffer_ref = G_PushRWByteAddressBufferRef(gpu_perm, queue->print_buffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -692,7 +692,7 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind)
|
|||||||
return cl;
|
return cl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_D12_CommitRawCommandList(G_D12_RawCommandList *cl)
|
i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl)
|
||||||
{
|
{
|
||||||
G_D12_Queue *queue = cl->queue;
|
G_D12_Queue *queue = cl->queue;
|
||||||
|
|
||||||
@ -707,21 +707,23 @@ void G_D12_CommitRawCommandList(G_D12_RawCommandList *cl)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Commit */
|
/* Commit */
|
||||||
|
i64 completion = 0;
|
||||||
{
|
{
|
||||||
Lock lock = LockE(&queue->commit_mutex);
|
Lock lock = LockE(&queue->commit_mutex);
|
||||||
{
|
{
|
||||||
u64 target = ++queue->commit_fence_target;
|
completion = ++queue->commit_fence_target;
|
||||||
cl->commit_fence_target = target;
|
cl->commit_fence_target = completion;
|
||||||
|
|
||||||
/* Execute */
|
/* Execute */
|
||||||
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl);
|
ID3D12CommandQueue_ExecuteCommandLists(queue->d3d_queue, 1, (ID3D12CommandList **)&cl->d3d_cl);
|
||||||
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, target);
|
ID3D12CommandQueue_Signal(queue->d3d_queue, queue->commit_fence, completion);
|
||||||
|
|
||||||
/* Append */
|
/* Append */
|
||||||
SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl);
|
SllQueuePush(queue->first_committed_cl, queue->last_committed_cl, cl);
|
||||||
}
|
}
|
||||||
Unlock(&lock);
|
Unlock(&lock);
|
||||||
}
|
}
|
||||||
|
return completion;
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -1596,7 +1598,7 @@ G_CommandListHandle G_PrepareCommandList(G_QueueKind queue)
|
|||||||
return G_D12_MakeHandle(G_CommandListHandle, cl);
|
return G_D12_MakeHandle(G_CommandListHandle, cl);
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G_FenceOp *fence_ops)
|
i64 G_CommitCommandList(G_CommandListHandle cl_handle)
|
||||||
{
|
{
|
||||||
G_D12_SharedState *g = &G_D12_shared_state;
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
@ -1618,13 +1620,13 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G
|
|||||||
u64 slotted_constants[G_NumConstants];
|
u64 slotted_constants[G_NumConstants];
|
||||||
u64 bound_compute_constants[G_NumConstants];
|
u64 bound_compute_constants[G_NumConstants];
|
||||||
u64 bound_graphics_constants[G_NumConstants];
|
u64 bound_graphics_constants[G_NumConstants];
|
||||||
for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } /* Zero initialze all constant slots */
|
for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } /* Zero-initialize all slots */
|
||||||
for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; }
|
for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; }
|
||||||
for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; }
|
for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; }
|
||||||
|
|
||||||
if (!G_IsRefNil(queue->debug_print_buffer_ref))
|
if (!G_IsRefNil(queue->print_buffer_ref))
|
||||||
{
|
{
|
||||||
slotted_constants[G_ShaderConst_DebugBufferRef] = queue->debug_print_buffer_ref.v;
|
slotted_constants[G_ShaderConst_DebugBufferRef] = queue->print_buffer_ref.v;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Rasterizer state */
|
/* Rasterizer state */
|
||||||
@ -1759,7 +1761,7 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G
|
|||||||
G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
|
G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx];
|
||||||
if (barrier_cmd->kind == G_D12_CmdKind_Barrier)
|
if (barrier_cmd->kind == G_D12_CmdKind_Barrier)
|
||||||
{
|
{
|
||||||
G_BarrierDesc desc = barrier_cmd->barrier.desc;
|
G_MemoryBarrierDesc desc = barrier_cmd->barrier.desc;
|
||||||
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
|
G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource);
|
||||||
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
|
||||||
|
|
||||||
@ -2196,7 +2198,7 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* End dx12 command list */
|
/* End dx12 command list */
|
||||||
G_D12_CommitRawCommandList(rcl);
|
i64 completion = G_D12_CommitRawCommandList(rcl);
|
||||||
|
|
||||||
/* Free command list */
|
/* Free command list */
|
||||||
{
|
{
|
||||||
@ -2209,6 +2211,7 @@ void G_CommitCommandListEx(G_CommandListHandle cl_handle, u64 fence_ops_count, G
|
|||||||
}
|
}
|
||||||
|
|
||||||
EndScratch(scratch);
|
EndScratch(scratch);
|
||||||
|
return completion;
|
||||||
}
|
}
|
||||||
|
|
||||||
//- Arena
|
//- Arena
|
||||||
@ -2406,9 +2409,9 @@ void G_SetConstant_(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u3
|
|||||||
CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4));
|
CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
//- Barrier
|
//- Memory sync
|
||||||
|
|
||||||
void G_Sync(G_CommandListHandle cl_handle, G_BarrierDesc desc)
|
void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc)
|
||||||
{
|
{
|
||||||
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
|
||||||
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
|
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
|
||||||
@ -2464,78 +2467,114 @@ void G_ClearRenderTarget(G_CommandListHandle cl_handle, G_ResourceHandle resourc
|
|||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Queue synchronization
|
//~ @hookimpl Synchronization
|
||||||
|
|
||||||
void G_SyncQueue(G_QueueKind completion_queue_kind, G_QueueKind waiter_queue_kind)
|
i64 G_CompletionValueFromQueue(G_QueueKind queue_kind)
|
||||||
{
|
{
|
||||||
if (completion_queue_kind != waiter_queue_kind)
|
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
||||||
{
|
return ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
||||||
G_D12_Queue *completion_queue = G_D12_QueueFromKind(completion_queue_kind);
|
|
||||||
G_D12_Queue *waiter_queue = G_D12_QueueFromKind(waiter_queue_kind);
|
|
||||||
ID3D12Fence *d3d_fence = completion_queue->commit_fence;
|
|
||||||
u64 fence_target = 0;
|
|
||||||
{
|
|
||||||
Lock lock = LockS(&completion_queue->commit_mutex);
|
|
||||||
fence_target = completion_queue->commit_fence_target;
|
|
||||||
Unlock(&lock);
|
|
||||||
}
|
|
||||||
if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target)
|
|
||||||
{
|
|
||||||
ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_SyncOtherQueues(G_QueueKind completion_queue_kind)
|
i64 G_CompletionTargetFromQueue(G_QueueKind queue_kind)
|
||||||
{
|
{
|
||||||
if (G_IsMultiQueueEnabled)
|
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
||||||
|
i64 target = 0;
|
||||||
|
{
|
||||||
|
Lock lock = LockS(&queue->commit_mutex);
|
||||||
|
target = queue->commit_fence_target;
|
||||||
|
Unlock(&lock);
|
||||||
|
}
|
||||||
|
return target;
|
||||||
|
}
|
||||||
|
|
||||||
|
G_QueueCompletions G_CompletionValuesFromQueues(G_QueueMask queue_mask)
|
||||||
|
{
|
||||||
|
G_QueueCompletions completions = ZI;
|
||||||
|
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
|
||||||
|
{
|
||||||
|
if (queue_mask & (1 << queue_kind))
|
||||||
|
{
|
||||||
|
completions.v[queue_kind] = G_CompletionTargetFromQueue(queue_kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return completions;
|
||||||
|
}
|
||||||
|
|
||||||
|
G_QueueCompletions G_CompletionTargetsFromQueues(G_QueueMask queue_mask)
|
||||||
|
{
|
||||||
|
G_QueueCompletions completions = ZI;
|
||||||
|
for (G_QueueKind queue_kind = 0; queue_kind < G_NumQueues; ++queue_kind)
|
||||||
|
{
|
||||||
|
if (queue_mask & (1 << queue_kind))
|
||||||
|
{
|
||||||
|
completions.v[queue_kind] = G_CompletionTargetFromQueue(queue_kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return completions;
|
||||||
|
}
|
||||||
|
|
||||||
|
void G_SyncEx(G_QueueBarrierDesc desc)
|
||||||
|
{
|
||||||
|
G_D12_SharedState *g = &G_D12_shared_state;
|
||||||
|
|
||||||
|
u64 fences_count = 0;
|
||||||
|
ID3D12Fence *fences[G_NumQueues] = ZI;
|
||||||
|
i64 fence_targets[G_NumQueues] = ZI;
|
||||||
|
|
||||||
|
/* Grab fences */
|
||||||
|
for (G_QueueKind completion_queue_kind = 0; completion_queue_kind < G_NumQueues; ++ completion_queue_kind)
|
||||||
{
|
{
|
||||||
G_D12_Queue *completion_queue = G_D12_QueueFromKind(completion_queue_kind);
|
G_D12_Queue *completion_queue = G_D12_QueueFromKind(completion_queue_kind);
|
||||||
ID3D12Fence *d3d_fence = completion_queue->commit_fence;
|
i64 target = desc.completions.v[completion_queue_kind];
|
||||||
u64 fence_target = 0;
|
if (target > 0)
|
||||||
{
|
{
|
||||||
Lock lock = LockS(&completion_queue->commit_mutex);
|
i64 fence_value = ID3D12Fence_GetCompletedValue(completion_queue->commit_fence);
|
||||||
fence_target = completion_queue->commit_fence_target;
|
if (fence_value < target)
|
||||||
Unlock(&lock);
|
|
||||||
}
|
|
||||||
if (ID3D12Fence_GetCompletedValue(d3d_fence) < fence_target)
|
|
||||||
{
|
|
||||||
for (G_QueueKind waiter_queue_kind = 0; waiter_queue_kind < G_NumQueues; ++waiter_queue_kind)
|
|
||||||
{
|
{
|
||||||
if (waiter_queue_kind != completion_queue_kind)
|
fences[fences_count] = completion_queue->commit_fence;
|
||||||
|
fence_targets[fences_count] = target;
|
||||||
|
fences_count += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sync Queues */
|
||||||
|
for (G_QueueKind waiter_queue_kind = 0; waiter_queue_kind < G_NumQueues; ++ waiter_queue_kind)
|
||||||
|
{
|
||||||
|
if (desc.wait_queues & (1 << waiter_queue_kind))
|
||||||
|
{
|
||||||
|
G_D12_Queue *waiter_queue = G_D12_QueueFromKind(waiter_queue_kind);
|
||||||
|
for (u64 fence_idx = 0; fence_idx < fences_count; ++fence_idx)
|
||||||
|
{
|
||||||
|
ID3D12Fence *fence = fences[fence_idx];
|
||||||
|
if (waiter_queue->commit_fence != fence)
|
||||||
{
|
{
|
||||||
G_D12_Queue *waiter_queue = G_D12_QueueFromKind(waiter_queue_kind);
|
i64 target = fence_targets[fence_idx];
|
||||||
ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, d3d_fence, fence_target);
|
ID3D12CommandQueue_Wait(waiter_queue->d3d_queue, fence, target);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Sync Cpu */
|
||||||
|
if (desc.wait_cpu && fences_count > 0)
|
||||||
|
{
|
||||||
|
if (G_D12_tl.sync_event == 0)
|
||||||
|
{
|
||||||
|
G_D12_tl.sync_event = CreateEvent(0, 0, 0, 0);
|
||||||
|
}
|
||||||
|
ID3D12Device1_SetEventOnMultipleFenceCompletion(
|
||||||
|
g->device,
|
||||||
|
fences,
|
||||||
|
(u64 *)fence_targets,
|
||||||
|
fences_count,
|
||||||
|
D3D12_MULTIPLE_FENCE_WAIT_FLAG_ALL,
|
||||||
|
G_D12_tl.sync_event
|
||||||
|
);
|
||||||
|
WaitForSingleObject(G_D12_tl.sync_event, INFINITE);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
|
||||||
//~ @hookimpl Map
|
|
||||||
|
|
||||||
// G_Mapped G_Map(G_Resource *gpu_r)
|
|
||||||
// {
|
|
||||||
// G_Mapped result = ZI;
|
|
||||||
// result.resource = gpu_r;
|
|
||||||
// G_D12_Resource *r = (G_D12_Resource *)gpu_r;
|
|
||||||
// D3D12_RANGE read_range = ZI;
|
|
||||||
// HRESULT hr = ID3D12Resource_Map(r->d3d_resource, 0, &read_range, &result.mem);
|
|
||||||
// if (FAILED(hr) || !result.mem)
|
|
||||||
// {
|
|
||||||
// /* TODO: Don't panic */
|
|
||||||
// Panic(Lit("Failed to map command buffer resource"));
|
|
||||||
// }
|
|
||||||
// return result;
|
|
||||||
// }
|
|
||||||
|
|
||||||
// void G_Unmap(G_Mapped m)
|
|
||||||
// {
|
|
||||||
// G_D12_Resource *r = (G_D12_Resource *)m.resource;
|
|
||||||
// ID3D12Resource_Unmap(r->d3d_resource, 0, 0);
|
|
||||||
// }
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Statistics
|
//~ @hookimpl Statistics
|
||||||
|
|
||||||
@ -2786,26 +2825,36 @@ void G_D12_WorkerEntry(WaveLaneCtx *lane)
|
|||||||
G_QueueKind queue_kind = (G_QueueKind)lane->wave->udata;
|
G_QueueKind queue_kind = (G_QueueKind)lane->wave->udata;
|
||||||
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind);
|
||||||
|
|
||||||
for (;;)
|
// if (queue->print_buffer_size > 0)
|
||||||
|
if (queue_kind == G_QueueKind_Direct)
|
||||||
{
|
{
|
||||||
/* FIXME: Remove this */
|
G_ArenaHandle gpu_perm = G_PermArena();
|
||||||
|
G_ResourceHandle readback_buff = G_PushBuffer(
|
||||||
|
gpu_perm,
|
||||||
|
u8,
|
||||||
|
queue->print_buffer_size,
|
||||||
|
.flags = G_ResourceFlag_HostMemory
|
||||||
|
);
|
||||||
|
|
||||||
Sleep(500);
|
for (;;)
|
||||||
|
|
||||||
G_ResourceHandle debug_print_buff = queue->debug_print_buffer;
|
|
||||||
G_D12_Resource *resource = G_D12_ResourceFromHandle(debug_print_buff);
|
|
||||||
|
|
||||||
if (!G_IsResourceNil(debug_print_buff))
|
|
||||||
{
|
{
|
||||||
u8 *base = G_StructFromResource(debug_print_buff, u8);
|
/* FIXME: Remove this */
|
||||||
u32 size = *((u32 *)base);
|
|
||||||
String text = STRING(size, base + 4);
|
|
||||||
|
|
||||||
if (queue_kind == G_QueueKind_Direct)
|
Sleep(500);
|
||||||
|
|
||||||
|
G_CommandListHandle cl = G_PrepareCommandList(queue_kind);
|
||||||
{
|
{
|
||||||
DEBUGBREAKABLE;
|
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
|
||||||
}
|
}
|
||||||
}
|
i64 completion = G_CommitCommandList(cl);
|
||||||
|
|
||||||
|
G_SyncCpu(G_MaskFromQueue(queue_kind));
|
||||||
|
u32 size = *G_StructFromResource(readback_buff, u32);
|
||||||
|
u8 *text = G_StructFromResource(readback_buff, u8) + 4;
|
||||||
|
|
||||||
|
String s = STRING(size, text);
|
||||||
|
|
||||||
|
DEBUGBREAKABLE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -220,8 +220,9 @@ Struct(G_D12_Queue)
|
|||||||
u64 commit_fence_target;
|
u64 commit_fence_target;
|
||||||
|
|
||||||
/* Global resources */
|
/* Global resources */
|
||||||
G_ResourceHandle debug_print_buffer;
|
u64 print_buffer_size;
|
||||||
G_RWByteAddressBufferRef debug_print_buffer_ref;
|
G_ResourceHandle print_buffer;
|
||||||
|
G_RWByteAddressBufferRef print_buffer_ref;
|
||||||
|
|
||||||
/* Raw command lists */
|
/* Raw command lists */
|
||||||
struct G_D12_RawCommandList *first_committed_cl;
|
struct G_D12_RawCommandList *first_committed_cl;
|
||||||
@ -283,7 +284,7 @@ Struct(G_D12_Cmd)
|
|||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
G_BarrierDesc desc;
|
G_MemoryBarrierDesc desc;
|
||||||
|
|
||||||
/* Post-batch data */
|
/* Post-batch data */
|
||||||
b32 is_end_of_batch;
|
b32 is_end_of_batch;
|
||||||
@ -413,6 +414,11 @@ Struct(G_D12_SharedState)
|
|||||||
ID3D12Device10 *device;
|
ID3D12Device10 *device;
|
||||||
} extern G_D12_shared_state;
|
} extern G_D12_shared_state;
|
||||||
|
|
||||||
|
Struct(G_D12_ThreadLocalState)
|
||||||
|
{
|
||||||
|
HANDLE sync_event;
|
||||||
|
} extern ThreadLocal G_D12_tl;
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Helpers
|
//~ Helpers
|
||||||
|
|
||||||
@ -453,7 +459,7 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
|
|||||||
//~ Raw command list
|
//~ Raw command list
|
||||||
|
|
||||||
G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind);
|
G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind);
|
||||||
void G_D12_CommitRawCommandList(G_D12_RawCommandList *cl);
|
i64 G_D12_CommitRawCommandList(G_D12_RawCommandList *cl);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Command helpers
|
//~ Command helpers
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user