alias barriers

This commit is contained in:
jacob 2026-03-12 10:55:37 -05:00
parent fb8c4839e3
commit 80c43d2ebd
6 changed files with 349 additions and 292 deletions

View File

@ -21,7 +21,7 @@ Enum(G_Family)
{
G_Family_Graphics,
G_Family_Compute,
G_Family_Copy
G_Family_Common
};
#if G_IsMultiQueueEnabled
@ -60,6 +60,12 @@ Enum(G_Family)
#define G_QueueMaskFromKind(queue_kind) (1 << queue_kind)
#define G_FamilyFromQueueKind(queue_kind) ( \
queue_kind == G_QueueKind_Direct ? G_Family_Graphics : \
queue_kind == G_QueueKind_AsyncCompute ? G_Family_Compute : \
G_Family_Common \
)
Struct(G_QueueCompletions)
{
i64 v[G_QueueKind_COUNT];
@ -203,25 +209,6 @@ Enum(G_Format)
G_Format_COUNT = 192
};
////////////////////////////////////////////////////////////
//~ Texture layout types
Enum(G_TextureLayout)
{
// Supports present, shader-read, and copy-read/write in any queue family.
// Transitionable from G_TextureLayout_Family in non-copy queue.
G_TextureLayout_Common,
// Supports any access in the current queue family.
// Transitionable from G_TextureLayout_Common in non-copy queue.
G_TextureLayout_Family,
// Supports any read access with up to 1 write access to non overlapping regions from any queue.
// Cannot be transitioned to/from.
// Depth-stencil textures cannot use this layout.
G_TextureLayout_Simultaneous,
};
////////////////////////////////////////////////////////////
//~ Filter types
@ -301,6 +288,22 @@ Enum(G_ComparisonFunc)
G_ComparisonFunc_Always = 8
};
////////////////////////////////////////////////////////////
//~ Texture layout types
Enum(G_TextureLayout)
{
// Supports shader-read, and copy-read/write in any queue family.
G_TextureLayout_Common,
// Supports any access in the current queue family.
G_TextureLayout_Family,
// Supports any read access with up to 1 write access to non overlapping regions from any queue.
// Depth-stencil textures cannot use this layout.
G_TextureLayout_Simultaneous,
};
////////////////////////////////////////////////////////////
//~ Memory types
@ -545,19 +548,20 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size
G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \
} while (0)
//- Barrier
//- Sync
void G_Sync(G_CommandListHandle cl);
void G_SyncLayout(G_CommandListHandle cl, G_TextureRef resource, G_TextureLayout layout);
void G_SyncAcquireFamilyLayout(G_CommandListHandle cl, G_TextureRef texture);
void G_SyncReleaseFamilyLayout(G_CommandListHandle cl, G_TextureRef texture);
//- Zone
void G_PushZoneEx(G_CommandListHandle cl, String name_lit);
void G_PopZoneEx(G_CommandListHandle cl);
#define G_PushZone(cl, name_lit) G_PushZoneEx((cl), Lit(name_lit))
#define G_PushZone(cl, name_lit) G_PushZoneEx((cl), Lit(name_lit"\0"))
#define G_PopZone(cl) G_PopZoneEx(cl)
#define G_ZoneDF(cl, name_lit) DeferFor(G_PushZone((cl), (name_lit)), G_PopZone(cl))
#define G_ZoneDF(cl, name_lit) DeferFor(G_PushZone((cl), name_lit), G_PopZone(cl))
//- Cpu -> Gpu staged copy

View File

@ -1257,6 +1257,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle);
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_QueueKind queue_kind = cl->queue_kind;
G_Family current_family = G_FamilyFromQueueKind(queue_kind);
G_D12_Resource *resource = 0;
b32 is_buffer = memory_desc.kind == G_MemoryKind_Buffer;
@ -1459,6 +1460,28 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
//////////////////////////////
//- Allocate D3D12 resource
G_Family initial_family = G_Family_Common;
D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
if (is_texture)
{
d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON;
if (memory_desc.texture.initial_layout == G_TextureLayout_Family)
{
initial_family = G_FamilyFromQueueKind(queue_kind);
switch (queue_kind)
{
case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break;
case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break;
}
}
}
if (queue_kind == G_QueueKind_AsyncCopy && Atomic32Fetch(&resource->texture_family) != initial_family)
{
// Copy queues cannot perform transitions, so resource can't be aliased
can_reuse = 0;
}
if (!can_reuse)
{
if (is_buffer || is_texture)
@ -1468,21 +1491,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
{
clear_value_arg = &clear_value;
}
D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
if (is_texture)
{
d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON;
if (memory_desc.texture.initial_layout == G_TextureLayout_Family)
{
switch (queue_kind)
{
case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break;
case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break;
}
}
}
HRESULT hr = ID3D12Device10_CreateCommittedResource3(
G_D12.device,
&heap_props,
@ -1504,17 +1512,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1);
resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels);
// Queue initial Rtv/Dsv discard
if (
!AnyBit(flags, G_MemoryFlag_Zero) &&
AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
)
{
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Discard;
cmd->discard.resource = resource;
}
}
if (is_buffer)
@ -1530,6 +1527,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
resource->texture_format = memory_desc.texture.format;
resource->texture_dims = memory_desc.texture.dims;
resource->texture_mips = RNGI32(0, d3d_desc.MipLevels - 1);
Atomic32Set(&resource->texture_family, initial_family);
}
else if (is_sampler)
{
@ -1795,40 +1793,43 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
resource->gpu_descriptor = descriptor;
}
//////////////////////////////
//- Transition intiial layout
//- Initial layout transition
// FIXME: On reset, transition resources to UNDEFINED + No access
// Then here, transition from undefined (w/ discard if rt/dsv)
if (
can_reuse &&
is_texture &&
memory_desc.texture.initial_layout == G_TextureLayout_Common &&
queue_kind != G_QueueKind_AsyncCopy
)
if (can_reuse && is_texture && queue_kind != G_QueueKind_AsyncCopy)
{
G_SyncLayout(cl_handle, G_MakeTextureRef(resource->gpu_descriptor->base_index), G_TextureLayout_Common);
G_Family old_family = Atomic32Fetch(&resource->texture_family);
if (old_family != initial_family)
{
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Barrier;
cmd->barrier.texture = G_MakeTextureRef(resource->gpu_descriptor->base_index);
if (old_family != current_family && old_family != G_Family_Common)
{
cmd->barrier.alias = 1;
}
if (initial_family == G_Family_Common)
{
cmd->barrier.release = 1;
}
}
Atomic32Set(&resource->texture_family, initial_family);
}
//////////////////////////////
//- Queue initial Rtv/Dsv discard
if (
!can_reuse &&
!AnyBit(flags, G_MemoryFlag_Zero) &&
AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
)
{
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Discard;
cmd->discard.resource = resource;
G_Sync(cl_handle);
}
//////////////////////////////
//- Upload initial data
@ -1845,13 +1846,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle
return resource->gpu_descriptor->base_index;
}
//- Count
u64 G_CountStride(G_BufferRef buffer)
@ -2216,7 +2210,7 @@ void G_D12_InsertEvent(ID3D12GraphicsCommandList7 *d3d_cl, G_D12_EventKind kind,
////////////////////////////////////////////////////////////
//~ Tracking
void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind)
void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind, G_D12_Cmd *cmd)
{
b32 should_track = !AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS);
if (should_track)
@ -2272,6 +2266,7 @@ void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resourc
}
}
b32 should_alias = cmd->kind == G_D12_CmdKind_Barrier && cmd->barrier.alias;
for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx)
{
G_D12_TrackedMip *mip = &trn->mips[mip_idx];
@ -2305,7 +2300,19 @@ void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resourc
if (prev_mip)
{
prev_mip->usage = mip->prev_usage;
prev_mip->next_usage = mip->usage;
if (should_alias)
{
prev_mip->next_usage = G_D12_TrackedUsageKind_Undefined;
}
else
{
prev_mip->next_usage = mip->usage;
}
}
if (should_alias)
{
mip->prev_usage = G_D12_TrackedUsageKind_Undefined;
}
}
}
@ -2353,7 +2360,7 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromBatch(G_D12_CmdBatch *batch, G_QueueKind
}
if (result.access == D3D12_BARRIER_ACCESS_NO_ACCESS)
{
result.sync = D3D12_BARRIER_SYNC_NONE;
// result.sync = D3D12_BARRIER_SYNC_NONE;
}
else
{
@ -2389,6 +2396,12 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_ki
result.layout = D3D12_BARRIER_LAYOUT_COMMON;
result.sync |= global.sync;
}
else if (usage_kind == G_D12_TrackedUsageKind_Undefined)
{
result.layout = D3D12_BARRIER_LAYOUT_UNDEFINED;
result.access = D3D12_BARRIER_ACCESS_NO_ACCESS;
result.sync |= global.sync;
}
else
{
result.layout = G_D12_CommonLayoutFromQueueKind(queue_kind);
@ -2401,7 +2414,7 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_ki
}
if (result.access == D3D12_BARRIER_ACCESS_NO_ACCESS)
{
result.sync = D3D12_BARRIER_SYNC_NONE;
// result.sync = D3D12_BARRIER_SYNC_NONE;
}
else
{
@ -2495,7 +2508,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (resource)
{
batch->contains_rtv = 1;
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(rt_desc->mip, rt_desc->mip), G_D12_TrackedUsageKind_RenderTarget);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(rt_desc->mip, rt_desc->mip), G_D12_TrackedUsageKind_RenderTarget, cmd);
}
else
{
@ -2507,7 +2520,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
batch->contains_rtv = 1;
G_D12_Resource *resource = cmd->clear_rtv.resource;
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(cmd->clear_rtv.mip, cmd->clear_rtv.mip), G_D12_TrackedUsageKind_RenderTarget);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(cmd->clear_rtv.mip, cmd->clear_rtv.mip), G_D12_TrackedUsageKind_RenderTarget, cmd);
}
else if (cmd_kind == G_D12_CmdKind_Discard)
{
@ -2517,12 +2530,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (is_rtv)
{
batch->contains_rtv = 1;
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_RenderTarget);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_RenderTarget, cmd);
}
else if (is_dsv)
{
batch->contains_dsv_write = 1;
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_DepthStencilReadWrite);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_DepthStencilReadWrite, cmd);
}
}
else if (cmd_kind == G_D12_CmdKind_CopyBytes || cmd_kind == G_D12_CmdKind_CopyTexels)
@ -2543,13 +2556,13 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
G_D12_Resource *resource = G_D12_ResourceFromTextureRef(cmd->barrier.texture);
if (resource)
{
if (cmd->barrier.acquire)
if (cmd->barrier.release)
{
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Acquire);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Release, cmd);
}
else
{
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Release);
G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Acquire, cmd);
}
}
}
@ -2562,9 +2575,8 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
}
}
if (last_batch->first_tracked_resource)
{
// Final empty batch to implicitly decay tracked resources
// Final empty batch to allow for decaying of tracked resources
++batches_count;
batch = PushStruct(scratch.arena, G_D12_CmdBatch);
DllQueuePush(first_batch, last_batch, batch);
@ -2576,7 +2588,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
{
G_D12_CmdBatch *prev_batch = 0;
for (G_D12_CmdBatch *batch = first_batch; batch; batch = batch->next)
for (G_D12_CmdBatch *batch = first_batch; batch && batch != last_batch; batch = batch->next)
{
G_D12_CmdBatch *next_batch = batch->next;
@ -2602,7 +2614,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
}
// Push decay transition to next batch
if (next_batch && mip->next_usage == G_D12_TrackedUsageKind_Untracked)
if (
mip->usage != G_D12_TrackedUsageKind_Untracked &&
mip->usage != G_D12_TrackedUsageKind_Acquire &&
mip->next_usage == G_D12_TrackedUsageKind_Untracked
)
{
++next_batch->transitions_count;
G_D12_TransitionNode *tn = PushStruct(scratch.arena, G_D12_TransitionNode);
@ -2674,8 +2690,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
u64 texture_barriers_count = batch->transitions_count;
D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, texture_barriers_count);
b32 is_transition_batch = batch->cmds_count == 0;
G_D12_BarrierInfo global_after = G_D12_BarrierInfoFromBatch(batch, queue_kind);
G_D12_BarrierInfo global_before = Zi;
if (batch->prev)
@ -2685,13 +2699,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
if (batch == first_batch)
{
is_transition_batch = 1;
global_before.sync = D3D12_BARRIER_SYNC_NONE;
global_before.access = D3D12_BARRIER_ACCESS_NO_ACCESS;
}
if (batch == last_batch)
{
is_transition_batch = 1;
global_after.sync = D3D12_BARRIER_SYNC_NONE;
global_after.access = D3D12_BARRIER_ACCESS_NO_ACCESS;
}
@ -2706,8 +2718,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
G_D12_TrackedUsageKind new_usage = tn->new;
G_D12_BarrierInfo resource_before = G_D12_BarrierInfoFromUsageKind(old_usage, resource, global_before, queue_kind);
G_D12_BarrierInfo resource_after = G_D12_BarrierInfoFromUsageKind(new_usage, resource, global_after, queue_kind);
// D3D12_BARRIER_LAYOUT layout_before = G_D12_BarrierLayoutFromUsageKind(old_usage, queue_kind);
// D3D12_BARRIER_LAYOUT layout_after = G_D12_BarrierLayoutFromUsageKind(new_usage, queue_kind);
D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[barrier_idx];
{
@ -2722,6 +2732,27 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
barrier->Subresources.NumPlanes = 1;
barrier->Subresources.IndexOrFirstMipLevel = tn->mips.min;
barrier->Subresources.NumMipLevels = tn->mips.max - tn->mips.min + 1;
// if (
// barrier->LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED &&
// AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
// )
// {
// barrier->Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
// }
if (
barrier->LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED
)
{
barrier->Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD;
}
}
++barrier_idx;
}
@ -2737,7 +2768,10 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle)
u32 barrier_groups_count = 0;
D3D12_BARRIER_GROUP barrier_groups[2] = Zi;
if (!is_transition_batch)
if (
(global_barrier.SyncBefore != D3D12_BARRIER_SYNC_NONE || global_barrier.AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS) &&
(global_barrier.SyncAfter != D3D12_BARRIER_SYNC_NONE || global_barrier.AccessAfter != D3D12_BARRIER_ACCESS_NO_ACCESS)
)
{
D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++];
group->Type = D3D12_BARRIER_TYPE_GLOBAL;
@ -3213,7 +3247,7 @@ void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u
CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4));
}
//- Barrier
//- Sync
void G_Sync(G_CommandListHandle cl_handle)
{
@ -3222,13 +3256,32 @@ void G_Sync(G_CommandListHandle cl_handle)
cmd->kind = G_D12_CmdKind_Barrier;
}
void G_SyncLayout(G_CommandListHandle cl_handle, G_TextureRef texture, G_TextureLayout layout)
void G_SyncAcquireFamilyLayout(G_CommandListHandle cl_handle, G_TextureRef texture)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Barrier;
cmd->barrier.texture = texture;
cmd->barrier.acquire = layout == G_TextureLayout_Family;
if (cl->queue_kind != G_QueueKind_AsyncCopy)
{
G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Barrier;
cmd->barrier.texture = texture;
cmd->barrier.release = 0;
Atomic32Set(&resource->texture_family, G_FamilyFromQueueKind(cl->queue_kind));
}
}
void G_SyncReleaseFamilyLayout(G_CommandListHandle cl_handle, G_TextureRef texture)
{
G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle);
if (cl->queue_kind != G_QueueKind_AsyncCopy)
{
G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture);
G_D12_Cmd *cmd = G_D12_PushCmd(cl);
cmd->kind = G_D12_CmdKind_Barrier;
cmd->barrier.texture = texture;
cmd->barrier.release = 1;
Atomic32Set(&resource->texture_family, G_Family_Common);
}
}
//- Zone
@ -3832,10 +3885,8 @@ void G_CommitBackbuffer(G_BackbufferHandle backbuffer_handle, Vec2I32 dst_offset
G_D12_Resource *src_resource = G_D12_ResourceFromTextureRef(src);
G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(G_QueueKind_Direct);
ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl;
G_D12_InsertEvent(d3d_cl, G_D12_EventKind_PushZone, Lit("Copy to backbuffer"));
{
G_D12_InsertEvent(d3d_cl, G_D12_EventKind_Marker, Lit("Copy to backbuffer"));
D3D12_BOX src_box = Zi;
{
src_box.left = src_range.p0.x;
@ -3908,6 +3959,7 @@ void G_CommitBackbuffer(G_BackbufferHandle backbuffer_handle, Vec2I32 dst_offset
ID3D12GraphicsCommandList7_Barrier(d3d_cl, 1, &barrier_group);
}
}
G_D12_InsertEvent(d3d_cl, G_D12_EventKind_PopZone, Zstr);
G_D12_CommitRawCommandList(rcl);
}

View File

@ -93,6 +93,7 @@ Struct(G_D12_Resource)
G_Format texture_format;
Vec3I32 texture_dims;
RngI32 texture_mips;
Atomic32 texture_family;
// Sampler info
G_SamplerDesc sampler_desc;
@ -334,7 +335,8 @@ Struct(G_D12_Cmd)
struct
{
G_TextureRef texture;
b32 acquire;
b32 release;
b32 alias;
} barrier;
struct
@ -424,6 +426,7 @@ Struct(G_D12_CmdList)
Enum(G_D12_TrackedUsageKind)
{
G_D12_TrackedUsageKind_Untracked,
G_D12_TrackedUsageKind_Undefined,
G_D12_TrackedUsageKind_Acquire,
G_D12_TrackedUsageKind_DepthStencilRead,
G_D12_TrackedUsageKind_DepthStencilReadWrite,
@ -690,6 +693,7 @@ DeclApiFromXList(G_D12_AgsApi, G_D12_AgsApiXList, "amd_ags_x64.dll");
G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle);
G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle);
G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle);
G_D12_Backbuffer *G_D12_BackbufferFromHandle(G_BackbufferHandle handle);
DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format);
D3D12_BARRIER_LAYOUT G_D12_CommonLayoutFromQueueKind(G_QueueKind queue_kind);
@ -747,7 +751,7 @@ void G_D12_InsertEvent(ID3D12GraphicsCommandList7 *d3d_cl, G_D12_EventKind kind,
////////////////////////////////////////////////////////////
//~ Tracking
void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind);
void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind, G_D12_Cmd *cmd);
G_D12_BarrierInfo G_D12_BarrierInfoFromBatch(G_D12_CmdBatch *batch, G_QueueKind queue_kind);
G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_kind, G_D12_Resource *resource, G_D12_BarrierInfo global, G_QueueKind queue_kind);

View File

@ -84,10 +84,10 @@ Enum(G_BasicSamplerKind)
////////////////////////////////////////////////////////////
//~ Index buffers
#define G_IB(_count, _buffer) ((G_IndexBufferDesc) { .count = (_count), .buffer = (_buffer), __VA_ARGS__ })
#define G_IB(_count, _buffer, ...) ((G_IndexBufferDesc) { .count = (_count), .buffer = (_buffer), __VA_ARGS__ })
Struct(G_IndexBufferDesc)
{
u64 count;
u32 count;
G_BufferRef buffer;
};

View File

@ -420,7 +420,6 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_MemoryFlag_Zero,
.name = Lit("Tiles")
);
//- Init particle buffer
gpu_particles = G_PushStructs(
cl, gpu_perm,
@ -428,8 +427,7 @@ void V_TickForever(WaveLaneCtx *lane)
.flags = G_MemoryFlag_Zero,
.name = Lit("Particles")
);
//- Init particle textures
//- Init particle layers
for (V_ParticleLayer layer = 0; layer < V_ParticleLayer_COUNT; ++layer)
{
gpu_particle_cells[layer] = G_PushTexture2D(
@ -536,7 +534,7 @@ void V_TickForever(WaveLaneCtx *lane)
while (!shutdown)
{
shutdown = Atomic32Fetch(&V.shutdown);
P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 0);
P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 1);
//////////////////////////////
//- Begin frame
@ -2699,8 +2697,6 @@ void V_TickForever(WaveLaneCtx *lane)
Vec2 hit_entry_normal = bullet->hit_entry_normal;
Vec2 hit_entry_velocity = bullet->hit_entry_velocity;
V_DrawLine(start, end, Color_Cyan);
//////////////////////////////
//- Wall particles
@ -5067,168 +5063,169 @@ void V_TickForever(WaveLaneCtx *lane)
//////////////////////////////
//- Begin gpu frame
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
{
//////////////////////////////
//- Upload data to GPU
//- GPU upload pass
// Backdrop
frame->backdrop_parallax = TweakFloat("Backdrop parallax", 4, 0, 20);
G_ZoneDF(cl, "Setup")
{
SPR_SheetKey sheet = SPR_SheetKeyFromResource(ResourceKeyFromStore(&P_Resources, Lit("backdrop.ase")));
SPR_Sprite sprite = SPR_SpriteFromSheetEx(sheet, SPR_NilSpanKey, 0, SPR_SheetFlag_NoAtlas);
frame->backdrop_src = sprite.tex;
}
// Tiles
{
for (P_TileKind tile_kind = 0; tile_kind < P_TileKind_COUNT; ++tile_kind)
// Backdrop
frame->backdrop_parallax = TweakFloat("Backdrop parallax", 4, 0, 20);
{
SPR_Sprite tile_sprite = Zi;
{
String tile_name = P_NameFromTileKind(tile_kind);
String sheet_name = StringF(frame->arena, "tile/%F.ase", FmtString(tile_name));
ResourceKey sheet_resource = ResourceKeyFromStore(&P_Resources, sheet_name);
SPR_SheetKey sheet = SPR_SheetKeyFromResource(sheet_resource);
tile_sprite = SPR_SpriteFromSheet(sheet, SPR_NilSpanKey, 0);
}
V_TileDesc tile_desc = Zi;
{
tile_desc.tex = tile_sprite.tex;
tile_desc.tex_slice_uv = DivRng2Vec2(tile_sprite.tex_rect, tile_sprite.tex_dims);
}
frame->tile_descs[tile_kind] = tile_desc;
SPR_SheetKey sheet = SPR_SheetKeyFromResource(ResourceKeyFromStore(&P_Resources, Lit("backdrop.ase")));
SPR_Sprite sprite = SPR_SpriteFromSheetEx(sheet, SPR_NilSpanKey, 0, SPR_SheetFlag_NoAtlas);
frame->backdrop_src = sprite.tex;
}
}
// Upload tiles
if (frame->tiles_dirty)
{
// LogDebugF("Uploading tiles to gpu");
G_CopyCpuToTexture(
cl,
gpu_tiles, VEC3I32(0, 0, 0),
frame->local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
);
}
// Screen texture
frame->screen = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
.flags = G_MemoryFlag_AllowTextureRW | G_MemoryFlag_AllowTextureDraw,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
);
Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1));
Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y));
// Albedo texture
frame->albedo = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
.flags = G_MemoryFlag_AllowTextureDraw,
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
);
// Backdrop texture
frame->backdrop_chain = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(frame->screen), 1),
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)),
.max_mips = 4
);
// Bloom texture
// TODO: We can re-use backdrop mip chain for this
frame->bloom_chain = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(frame->screen), 1),
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
.max_mips = G_MaxMips
);
// Shade texture
frame->shade = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->shade_dims,
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1));
Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y));
// Quad buffers
u64 quads_count = ArenaCount(frame->quads_arena, V_Quad);
V_Quad *quads = ArenaFirst(frame->quads_arena, V_Quad);
frame->quads = G_PushStructsFromCpu(
cl, gpu_frame_arena,
quads, quads_count,
.name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick))
);
// Debug verts
u64 dverts_count = ArenaCount(frame->dverts_arena, V_DVert);
V_DVert *dverts = ArenaFirst(frame->dverts_arena, V_DVert);
frame->dverts = G_PushStructsFromCpu(
cl, gpu_frame_arena,
dverts, dverts_count,
.name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick))
);
// Debug vert indices
frame->dvert_idxs.count = ArenaCount(frame->dvert_idxs_arena, u32);
frame->dvert_idxs.buffer = G_PushStructsFromCpu(
cl, gpu_frame_arena,
ArenaFirst(frame->dvert_idxs_arena, u32), frame->dvert_idxs.count,
.name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick))
);
// Particles
G_BufferRef gpu_emitters = Zi;
{
// Flatten emitters list
V_Emitter *flattened_emitters = PushStructsNoZero(frame->arena, V_Emitter, frame->emitters_count);
// Tiles
{
i64 emitter_idx = 0;
for (V_EmitterNode *en = frame->first_emitter_node; en; en = en->next)
for (P_TileKind tile_kind = 0; tile_kind < P_TileKind_COUNT; ++tile_kind)
{
flattened_emitters[emitter_idx] = en->emitter;
++emitter_idx;
SPR_Sprite tile_sprite = Zi;
{
String tile_name = P_NameFromTileKind(tile_kind);
String sheet_name = StringF(frame->arena, "tile/%F.ase", FmtString(tile_name));
ResourceKey sheet_resource = ResourceKeyFromStore(&P_Resources, sheet_name);
SPR_SheetKey sheet = SPR_SheetKeyFromResource(sheet_resource);
tile_sprite = SPR_SpriteFromSheet(sheet, SPR_NilSpanKey, 0);
}
V_TileDesc tile_desc = Zi;
{
tile_desc.tex = tile_sprite.tex;
tile_desc.tex_slice_uv = DivRng2Vec2(tile_sprite.tex_rect, tile_sprite.tex_dims);
}
frame->tile_descs[tile_kind] = tile_desc;
}
}
frame->emitters = G_PushStructsFromCpu(
// Upload tiles
if (frame->tiles_dirty)
{
// LogDebugF("Uploading tiles to gpu");
G_CopyCpuToTexture(
cl,
gpu_tiles, VEC3I32(0, 0, 0),
frame->local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1),
RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1))
);
}
// Screen texture
frame->screen = G_PushTexture2D(
cl, gpu_frame_arena,
flattened_emitters, frame->emitters_count,
.name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick))
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
.flags = G_MemoryFlag_AllowTextureRW | G_MemoryFlag_AllowTextureDraw,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
);
// Albedo texture
frame->albedo = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->screen_dims,
.flags = G_MemoryFlag_AllowTextureDraw,
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
);
// Backdrop texture
frame->backdrop_chain = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(frame->screen), 1),
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)),
.max_mips = 4
);
// Bloom texture
// TODO: We can re-use backdrop mip chain for this
frame->bloom_chain = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(frame->screen), 1),
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
.max_mips = G_MaxMips
);
// Shade texture
frame->shade = G_PushTexture2D(
cl, gpu_frame_arena,
G_TextureLayout_Family,
G_Format_R16G16B16A16_Float,
frame->shade_dims,
.flags = G_MemoryFlag_AllowTextureRW,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
);
Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1));
Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y));
// Quad buffers
frame->quads_count = ArenaCount(frame->quads_arena, V_Quad);
frame->quads = G_PushStructsFromCpu(
cl, gpu_frame_arena,
ArenaFirst(frame->quads_arena, V_Quad), frame->quads_count,
.name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick))
);
// Debug verts
u64 dverts_count = ArenaCount(frame->dverts_arena, V_DVert);
V_DVert *dverts = ArenaFirst(frame->dverts_arena, V_DVert);
frame->dverts = G_PushStructsFromCpu(
cl, gpu_frame_arena,
dverts, dverts_count,
.name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick))
);
// Debug vert indices
frame->dvert_idxs.count = ArenaCount(frame->dvert_idxs_arena, u32);
frame->dvert_idxs.buffer = G_PushStructsFromCpu(
cl, gpu_frame_arena,
ArenaFirst(frame->dvert_idxs_arena, u32), frame->dvert_idxs.count,
.name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick))
);
// Particles
G_BufferRef gpu_emitters = Zi;
{
// Flatten emitters list
V_Emitter *flattened_emitters = PushStructsNoZero(frame->arena, V_Emitter, frame->emitters_count);
{
i64 emitter_idx = 0;
for (V_EmitterNode *en = frame->first_emitter_node; en; en = en->next)
{
flattened_emitters[emitter_idx] = en->emitter;
++emitter_idx;
}
}
frame->emitters = G_PushStructsFromCpu(
cl, gpu_frame_arena,
flattened_emitters, frame->emitters_count,
.name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick))
);
}
// Upload gpu frame
G_BufferRef gpu_frame = G_PushStructFromCpu(
cl, gpu_frame_arena,
&frame->shared_frame,
.name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick))
);
// Set initial constants
V_GpuFlag gpu_flags = V_GpuFlag_None;
G_SetConstant(cl, V_GpuConst_Flags, gpu_flags);
G_SetConstant(cl, V_GpuConst_Frame, gpu_frame);
G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoise3D());
}
// Upload gpu frame
G_BufferRef gpu_frame = G_PushStructFromCpu(
cl, gpu_frame_arena,
&frame->shared_frame,
.name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick))
);
// Set initial constants
V_GpuFlag gpu_flags = V_GpuFlag_None;
G_SetConstant(cl, V_GpuConst_Flags, gpu_flags);
G_SetConstant(cl, V_GpuConst_Frame, gpu_frame);
G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoise3D());
// Sync
G_Sync(cl);
//////////////////////////////
@ -5274,24 +5271,23 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute2D(cl, V_BackdropUpCS, up_dims);
G_Sync(cl);
}
}
}
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Quads & emitters pass
G_ZoneDF(cl, "Quads & emitters")
{
G_ClearRenderTarget(cl, frame->albedo, VEC4(0, 0, 0, 0), 0);
// Draw quads
G_ClearRenderTarget(cl, frame->albedo, VEC4(0, 0, 0, 0), 0);
G_Draw(
cl,
V_QuadVS, V_QuadPS,
quads_count, G_QuadIndices(),
frame->quads_count, G_QuadIndices(),
1, &G_RT(frame->albedo, G_BlendMode_CompositeStraightAlpha),
screen_viewport, screen_scissor,
G_DrawMode_TriangleList
@ -5299,11 +5295,10 @@ void V_TickForever(WaveLaneCtx *lane)
// Emit particles
G_Compute(cl, V_EmitParticlesCS, frame->emitters_count);
// Sync particles, occluders, & albedo
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Particle simulation pass
@ -5311,11 +5306,10 @@ void V_TickForever(WaveLaneCtx *lane)
{
// Simulate particles
G_Compute(cl, V_SimParticlesCS, V_ParticlesCap);
// Sync cells
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Shading pass
@ -5325,21 +5319,20 @@ void V_TickForever(WaveLaneCtx *lane)
if (0)
{
G_Compute2D(cl, V_ShadeCS, frame->shade_dims);
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Composite pass
G_ZoneDF(cl, "Composite")
{
G_Compute2D(cl, V_CompositeCS, frame->screen_dims);
// Sync screen tex
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Bloom passes
@ -5376,16 +5369,18 @@ void V_TickForever(WaveLaneCtx *lane)
}
}
G_SyncReleaseFamilyLayout(cl, frame->bloom_chain);
//////////////////////////////
//- Finalization pass
G_ZoneDF(cl, "Finalize")
{
G_Compute2D(cl, V_FinalizeCS, frame->screen_dims);
G_Sync(cl);
}
G_Sync(cl);
//////////////////////////////
//- Debug shapes pass

View File

@ -372,6 +372,8 @@ Struct(V_SharedFrame)
G_IndexBufferDesc dvert_idxs;
G_BufferRef dverts;
u32 quads_count;
G_BufferRef quads;
};