access barrier batching

This commit is contained in:
jacob 2025-11-23 13:32:37 -06:00
parent 1144eef5b3
commit b9181ea200
3 changed files with 524 additions and 457 deletions

View File

@ -165,9 +165,10 @@ Enum(GPU_Format)
Enum(GPU_AccessKind)
{
GPU_AccessKind_None,
GPU_AccessKind_AnyRead,
GPU_AccessKind_AnyReadWrite,
GPU_AccessKind_CopyRead,
GPU_AccessKind_CopyWrite,
@ -205,7 +206,6 @@ Struct(GPU_BufferDesc)
{
u64 size;
GPU_BufferFlag flags;
GPU_AccessKind initial_access;
};
////////////////////////////////////////////////////////////
@ -386,7 +386,6 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc);
#define GPU_PushBuffer(arena, type, count, ...) GPU_PushBufferEx((arena), \
(GPU_BufferDesc) { \
.initial_access = GPU_AccessKind_AnyReadWrite, \
.size = sizeof(type) * (count), \
__VA_ARGS__ \
} \

View File

@ -1413,18 +1413,17 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI;
D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI;
u64 cmds_count = cl->cmds_count;
GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cmds_count);
/* Flatten command chunks */
u64 cmds_count = 0;
GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cl->cmds_count);
{
/* Flatten command chunks */
{
u64 flattened_idx = 0;
for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next)
{
for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx)
{
cmds[flattened_idx] = chunk->cmds[cmd_chunk_idx];
++flattened_idx;
cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx];
}
}
}
@ -1442,12 +1441,129 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
}
}
/* Determine skippable access cmds & access cmd before kinds based on resource */
{
Struct(ResourceLookupNode)
{
ResourceLookupNode *next_in_list;
ResourceLookupNode *next_in_bin;
GPU_D12_Resource *resource;
GPU_D12_Cmd *last_access_cmd;
};
Struct(ResourceLookupBin)
{
ResourceLookupNode *first;
ResourceLookupNode *last;
};
u64 num_lookup_bins = MaxU64(64, AlignU64Pow2(cmds_count * 4));
ResourceLookupNode *first_resource_node = 0;
ResourceLookupNode *last_resource_node = 0;
ResourceLookupBin *lookup_bins = PushStructs(scratch.arena, ResourceLookupBin, num_lookup_bins);
GPU_D12_Cmd *final_access_cmd = 0;
u64 cmd_idx = 0;
u64 batch_gen = 0;
while (cmd_idx < cmds_count)
{
GPU_D12_Cmd *cmd = &cmds[cmd_idx];
switch (cmd->kind)
{
/* Non-batch-interrupting cmds */
case GPU_D12_CmdKind_Constant:
{
cmd_idx += 1;
} break;
/* Batch-interrupting cmds */
case GPU_D12_CmdKind_Copy:
case GPU_D12_CmdKind_Compute:
case GPU_D12_CmdKind_Rasterize:
case GPU_D12_CmdKind_ClearRtv:
{
/* TODO:
* - Only interrupt batch if cmd actually runs
* - e.g. Rasterize with empty idx buffer will not actually run
* - For non-shader interruptions, only interrupt batches for explicitly bound resources
* - e.g. Copy should only interrupt batches for supplied resources
*/
cmd_idx += 1;
batch_gen += 1;
} break;
case GPU_D12_CmdKind_Access:
{
GPU_D12_Resource *resource = cmd->access.resource;
/* Lookup last resource command resource in current command list */
ResourceLookupNode *lookup = 0;
{
u64 hash = RandU64FromSeed(resource->uid);
ResourceLookupBin *bin = &lookup_bins[hash % num_lookup_bins];
lookup = bin->first;
for (; lookup && lookup->resource->uid != resource->uid;)
{
lookup = lookup->next_in_bin;
}
if (!lookup)
{
lookup = PushStruct(scratch.arena, ResourceLookupNode);
lookup->resource = resource;
SllQueuePushN(bin->first, bin->last, lookup, next_in_bin);
SllQueuePushN(first_resource_node, last_resource_node, lookup, next_in_list);
}
}
/* Determine 'before' state from lookup */
if (lookup->last_access_cmd)
{
GPU_D12_Cmd *last_cmd = lookup->last_access_cmd;
if (last_cmd->access.batch_gen != batch_gen)
{
/* Access is part of new batch */
last_cmd->access.is_end_of_batch = 1;
cmd->access.before = last_cmd->access.after;
}
else
{
/* Last access cmd for this resource is in the same batch, merge them */
cmd->access.before = last_cmd->access.before;
last_cmd->skip = 1;
}
}
lookup->last_access_cmd = cmd;
cmd->access.batch_gen = batch_gen;
final_access_cmd = cmd;
cmd_idx += 1;
} break;
}
}
if (final_access_cmd)
{
final_access_cmd->access.is_end_of_batch = 1;
}
}
/* Process gpu commands into dx12 commands */
{
u64 batch_access_idx_start = 0;
u64 batch_access_idx_opl = 0; /* One past last */
u64 cmd_idx = 0;
while (cmd_idx < cmds_count)
{
GPU_D12_Cmd *cmd = &cmds[cmd_idx];
if (cmd->skip)
{
cmd_idx += 1;
}
else
{
switch (cmd->kind)
{
default:
@ -1455,147 +1571,139 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
cmd_idx += 1;
} break;
//- Resource barrier
// case GPU_D12_CmdKind_TransitionToSrv:
// case GPU_D12_CmdKind_TransitionToUav:
// case GPU_D12_CmdKind_TransitionToRtv:
// case GPU_D12_CmdKind_TransitionToCopySrc:
// case GPU_D12_CmdKind_TransitionToCopyDst:
// case GPU_D12_CmdKind_FlushUav:
// {
// u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1);
case GPU_D12_CmdKind_Access:
{
batch_access_idx_opl = cmd_idx + 1;
// /* Build barriers batch list */
// Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; };
// u32 max_barriers_count = 0;
// TmpBarrier *first_barrier = 0;
// TmpBarrier *last_barrier = 0;
// while (cmd && (cmd->kind == GPU_D12_CmdKind_TransitionToSrv
// || cmd->kind == GPU_D12_CmdKind_TransitionToUav
// || cmd->kind == GPU_D12_CmdKind_TransitionToRtv
// || cmd->kind == GPU_D12_CmdKind_TransitionToCopySrc
// || cmd->kind == GPU_D12_CmdKind_TransitionToCopyDst
// || cmd->kind == GPU_D12_CmdKind_FlushUav))
// {
// D3D12_RESOURCE_BARRIER_TYPE type = ZI;
// D3D12_RESOURCE_STATES state_after = ZI;
// GPU_D12_Resource *resource = cmd->barrier.resource;
/* Submit batched barriers */
if (cmd->access.is_end_of_batch)
{
/* Build barriers */
u64 buffer_barriers_count = 0;
u64 texture_barriers_count = 0;
u64 global_barriers_count = 0;
D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_access_idx_opl - batch_access_idx_start));
for (u64 access_cmd_idx = batch_access_idx_start; access_cmd_idx < batch_access_idx_opl; ++access_cmd_idx)
{
GPU_D12_Cmd *access_cmd = &cmds[access_cmd_idx];
if (access_cmd->kind == GPU_D12_CmdKind_Access && !access_cmd->skip)
{
GPU_D12_Resource *resource = access_cmd->access.resource;
D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER;
// switch (cmd->kind)
// {
// default: break;
// case GPU_D12_CmdKind_TransitionToSrv:
// {
/* Translate gpu access kind -> d3d barrier fields */
D3D12_BARRIER_SYNC d3d_syncs[2] = ZI;
D3D12_BARRIER_ACCESS d3d_accesses[2] = ZI;
D3D12_BARRIER_LAYOUT d3d_layouts[2] = ZI;
for (u32 i = 0; i < 2; ++i)
{
GPU_AccessKind access_kind = i == 0 ? access_cmd->access.before : access_cmd->access.after;
// type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
// state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE;
// } break;
// case GPU_D12_CmdKind_TransitionToUav:
// {
// type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
// state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
// } break;
// case GPU_D12_CmdKind_TransitionToRtv:
// {
// type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
// state_after = D3D12_RESOURCE_STATE_RENDER_TARGET;
// i32 slot = cmd->barrier.rt_slot;
// if (slot >= 0 && slot < countof(slotted_raster_targets))
// {
// slotted_raster_targets[slot] = resource;
// }
// } break;
// case GPU_D12_CmdKind_TransitionToCopySrc:
// {
// type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
// state_after = D3D12_RESOURCE_STATE_COPY_SOURCE;
// } break;
// case GPU_D12_CmdKind_TransitionToCopyDst:
// {
// type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
// state_after = D3D12_RESOURCE_STATE_COPY_DEST;
// } break;
// case GPU_D12_CmdKind_FlushUav:
// {
// type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
// } break;
// }
switch (access_kind)
{
case GPU_AccessKind_None:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_layouts[i] = resource->texture_layout;
} break;
// b32 skip = 0;
// /* Skip UAV transitions on resources that already have transition in the batch */
// if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen)
// {
// skip = 1;
// }
// /* Skip redundant transitions */
// if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && ((resource->barrier_state_after & state_after) == state_after))
// {
// skip = 1;
// }
// /* Skip transitions that will occur via implicit promotion */
// if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->state == D3D12_RESOURCE_STATE_COMMON &&
// (state_after != D3D12_RESOURCE_STATE_RENDER_TARGET &&
// state_after != D3D12_RESOURCE_STATE_DEPTH_WRITE &&
// state_after != D3D12_RESOURCE_STATE_UNORDERED_ACCESS &&
// state_after != D3D12_RESOURCE_STATE_RESOLVE_DEST &&
// state_after != D3D12_RESOURCE_STATE_PRESENT))
// {
// /* Skip transitions into existing state */
// skip = 1;
// }
case GPU_AccessKind_RasterTarget:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET;
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
} break;
// if (!skip)
// {
// resource->barrier_type = type;
// resource->barrier_state_after = state_after;
// if (resource->barrier_gen != barrier_gen)
// {
// TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier);
// resource->barrier_gen = barrier_gen;
// b->r = resource;
// SllQueuePush(first_barrier, last_barrier, b);
// ++max_barriers_count;
// }
// }
case GPU_AccessKind_Present:
{
d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE;
d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_layouts[i] = D3D12_BARRIER_LAYOUT_PRESENT;
} break;
}
}
// cmd = cmd->next;
// }
/* Build barrier */
switch (barrier_type)
{
case D3D12_BARRIER_TYPE_BUFFER:
{
D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++];
barrier->SyncBefore = d3d_syncs[0];
barrier->SyncAfter = d3d_syncs[1];
barrier->AccessBefore = d3d_accesses[0];
barrier->AccessAfter = d3d_accesses[1];
barrier->pResource = resource->d3d_resource;
} break;
// /* Commit batched barriers */
// /* FIXME: Transitions from UAV -> UAV should insert UAV barrier */
// u32 barriers_count = 0;
// D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count);
// for (TmpBarrier *b = first_barrier; b; b = b->next)
// {
// GPU_D12_Resource *resource = b->r;
// D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type;
// D3D12_RESOURCE_STATES state_before = resource->state;
// D3D12_RESOURCE_STATES state_after = resource->barrier_state_after;
// if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after))
// {
// D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++];
// rb->Type = resource->barrier_type;
// if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION)
// {
// rb->Transition.pResource = resource->d3d_resource;
// rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
// rb->Transition.StateBefore = state_before;
// rb->Transition.StateAfter = state_after;
// resource->state = state_after;
// }
// else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV)
// {
// rb->UAV.pResource = resource->d3d_resource;
// }
// }
// }
// if (barriers_count > 0)
// {
// ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs);
// }
// } break;
case D3D12_BARRIER_TYPE_TEXTURE:
{
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++];
barrier->SyncBefore = d3d_syncs[0];
barrier->SyncAfter = d3d_syncs[1];
barrier->AccessBefore = d3d_accesses[0];
barrier->AccessAfter = d3d_accesses[1];
barrier->LayoutBefore = d3d_layouts[0];
barrier->LayoutAfter = d3d_layouts[1];
barrier->pResource = resource->d3d_resource;
barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff;
resource->texture_layout = d3d_layouts[1];
} break;
case D3D12_BARRIER_TYPE_GLOBAL:
{
D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++];
barrier->SyncBefore = d3d_syncs[0];
barrier->SyncAfter = d3d_syncs[1];
barrier->AccessBefore = d3d_accesses[0];
barrier->AccessAfter = d3d_accesses[1];
} break;
}
}
}
/* Dispatch barriers */
{
u32 barrier_groups_count = 0;
D3D12_BARRIER_GROUP barrier_groups[3] = ZI;
if (buffer_barriers_count > 0)
{
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
group->Type = D3D12_BARRIER_TYPE_BUFFER;
group->NumBarriers = buffer_barriers_count;
group->pBufferBarriers = buffer_barriers;
}
if (texture_barriers_count > 0)
{
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
group->Type = D3D12_BARRIER_TYPE_TEXTURE;
group->NumBarriers = texture_barriers_count;
group->pTextureBarriers = texture_barriers;
}
if (global_barriers_count > 0)
{
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
group->Type = D3D12_BARRIER_TYPE_GLOBAL;
group->NumBarriers = global_barriers_count;
group->pGlobalBarriers = global_barriers;
}
if (barrier_groups_count > 0)
{
ID3D12GraphicsCommandList7_Barrier(rcl, barrier_groups_count, barrier_groups);
}
}
batch_access_idx_start = cmd_idx + 1;
}
cmd_idx += 1;
} break;
//- Copy resource
// case GPU_D12_CmdKind_Copy:
// {
// GPU_D12_Resource *dst = cmd->copy.dst;
@ -1653,56 +1761,6 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
// cmd_idx += 1;
// } break;
//- Access
case GPU_D12_CmdKind_Access:
{
/* FIXME: Batch */
/* FIXME: Remove hardcode test */
GPU_D12_Resource *resource = cmd->access.resource;
D3D12_TEXTURE_BARRIER d3d_barrier = ZI;
d3d_barrier.pResource = resource->d3d_resource;
d3d_barrier.Subresources.IndexOrFirstMipLevel = 0xffffffff;
switch (cmd->access.kind)
{
case GPU_AccessKind_RasterTarget:
{
d3d_barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE;
d3d_barrier.SyncAfter = D3D12_BARRIER_SYNC_RENDER_TARGET;
d3d_barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_barrier.AccessAfter = D3D12_BARRIER_ACCESS_RENDER_TARGET;
d3d_barrier.LayoutBefore = resource->texture_layout;
d3d_barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_RENDER_TARGET;
} break;
case GPU_AccessKind_Present:
{
d3d_barrier.SyncBefore = D3D12_BARRIER_SYNC_RENDER_TARGET;
d3d_barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE;
d3d_barrier.AccessBefore = D3D12_BARRIER_ACCESS_RENDER_TARGET;
d3d_barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS;
d3d_barrier.LayoutBefore = resource->texture_layout;
d3d_barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_PRESENT;
} break;
}
D3D12_BARRIER_GROUP d3d_barrier_group = ZI;
d3d_barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE;
d3d_barrier_group.NumBarriers = 1;
d3d_barrier_group.pTextureBarriers = &d3d_barrier;
ID3D12GraphicsCommandList7_Barrier(rcl, 1, &d3d_barrier_group);
resource->texture_layout = d3d_barrier.LayoutAfter;
cmd_idx += 1;
} break;
//- Dispatch compute shader
case GPU_D12_CmdKind_Compute:
{
@ -1748,6 +1806,7 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
} break;
//- Dispatch Vs/Ps shader
case GPU_D12_CmdKind_Rasterize:
{
GPU_D12_Pipeline *pipeline = 0;
@ -1933,11 +1992,11 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
} break;
//- Clear rtv
case GPU_D12_CmdKind_ClearRtv:
{
GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor;
GPU_D12_Resource *resource = descriptor->resource;
Assert(resource->texture_layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET);
f32 clear_color[4] = ZI;
clear_color[0] = cmd->clear_rtv.color.x;
clear_color[1] = cmd->clear_rtv.color.y;
@ -1949,6 +2008,7 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_
}
}
}
}
/* End dx12 command list */
GPU_D12_CommitRawCommandList(dx12_cl);
@ -2049,7 +2109,7 @@ void GPU_SyncAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle,
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
cmd->kind = GPU_D12_CmdKind_Access;
cmd->access.kind = kind;
cmd->access.after = kind;
cmd->access.resource = GPU_D12_ResourceFromHandle(handle);
}
@ -2368,6 +2428,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
}
ZeroStruct(backbuffer);
backbuffer->d3d_resource = d3d_resource;
backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1);
backbuffer->is_texture = 1;
backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT;
backbuffer->swapchain = swapchain;

View File

@ -116,6 +116,7 @@ Struct(GPU_D12_Resource)
{
GPU_D12_Resource *next_free;
ID3D12Resource *d3d_resource;
u64 uid;
/* Buffer info */
GPU_BufferDesc buffer_desc;
@ -198,12 +199,18 @@ Enum(GPU_D12_CmdKind)
Struct(GPU_D12_Cmd)
{
GPU_D12_CmdKind kind;
b32 skip;
union
{
struct
{
GPU_AccessKind kind;
GPU_AccessKind after;
GPU_D12_Resource *resource;
/* Post-batch data */
b32 is_end_of_batch;
u64 batch_gen;
GPU_AccessKind before;
} access;
struct
@ -298,7 +305,7 @@ Struct(GPU_D12_Swapchain)
Struct(GPU_D12_SharedState)
{
Atomic64Padded resource_barrier_gen;
Atomic64Padded next_resource_uid;
/* Stats */
Atomic64 driver_resources_allocated;