diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 29c6500e..2ed40d42 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -21,7 +21,7 @@ Enum(G_Family) { G_Family_Graphics, G_Family_Compute, - G_Family_Copy + G_Family_Common }; #if G_IsMultiQueueEnabled @@ -60,6 +60,12 @@ Enum(G_Family) #define G_QueueMaskFromKind(queue_kind) (1 << queue_kind) +#define G_FamilyFromQueueKind(queue_kind) ( \ + queue_kind == G_QueueKind_Direct ? G_Family_Graphics : \ + queue_kind == G_QueueKind_AsyncCompute ? G_Family_Compute : \ + G_Family_Common \ +) + Struct(G_QueueCompletions) { i64 v[G_QueueKind_COUNT]; @@ -203,25 +209,6 @@ Enum(G_Format) G_Format_COUNT = 192 }; -//////////////////////////////////////////////////////////// -//~ Texture layout types - -Enum(G_TextureLayout) -{ - // Supports present, shader-read, and copy-read/write in any queue family. - // Transitionable from G_TextureLayout_Family in non-copy queue. - G_TextureLayout_Common, - - // Supports any access in the current queue family. - // Transitionable from G_TextureLayout_Common in non-copy queue. - G_TextureLayout_Family, - - // Supports any read access with up to 1 write access to non overlapping regions from any queue. - // Cannot be transitioned to/from. - // Depth-stencil textures cannot use this layout. - G_TextureLayout_Simultaneous, -}; - //////////////////////////////////////////////////////////// //~ Filter types @@ -301,6 +288,22 @@ Enum(G_ComparisonFunc) G_ComparisonFunc_Always = 8 }; +//////////////////////////////////////////////////////////// +//~ Texture layout types + +Enum(G_TextureLayout) +{ + // Supports shader-read, and copy-read/write in any queue family. + G_TextureLayout_Common, + + // Supports any access in the current queue family. + G_TextureLayout_Family, + + // Supports any read access with up to 1 write access to non overlapping regions from any queue. + // Depth-stencil textures cannot use this layout. + G_TextureLayout_Simultaneous, +}; + //////////////////////////////////////////////////////////// //~ Memory types @@ -545,19 +548,20 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ } while (0) -//- Barrier +//- Sync void G_Sync(G_CommandListHandle cl); -void G_SyncLayout(G_CommandListHandle cl, G_TextureRef resource, G_TextureLayout layout); +void G_SyncAcquireFamilyLayout(G_CommandListHandle cl, G_TextureRef texture); +void G_SyncReleaseFamilyLayout(G_CommandListHandle cl, G_TextureRef texture); //- Zone void G_PushZoneEx(G_CommandListHandle cl, String name_lit); void G_PopZoneEx(G_CommandListHandle cl); -#define G_PushZone(cl, name_lit) G_PushZoneEx((cl), Lit(name_lit)) +#define G_PushZone(cl, name_lit) G_PushZoneEx((cl), Lit(name_lit"\0")) #define G_PopZone(cl) G_PopZoneEx(cl) -#define G_ZoneDF(cl, name_lit) DeferFor(G_PushZone((cl), (name_lit)), G_PopZone(cl)) +#define G_ZoneDF(cl, name_lit) DeferFor(G_PushZone((cl), name_lit), G_PopZone(cl)) //- Cpu -> Gpu staged copy diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 2c02239a..393ab128 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -1257,6 +1257,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle G_D12_Arena *gpu_arena = G_D12_ArenaFromHandle(gpu_arena_handle); G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_QueueKind queue_kind = cl->queue_kind; + G_Family current_family = G_FamilyFromQueueKind(queue_kind); G_D12_Resource *resource = 0; b32 is_buffer = memory_desc.kind == G_MemoryKind_Buffer; @@ -1459,6 +1460,28 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle ////////////////////////////// //- Allocate D3D12 resource + G_Family initial_family = G_Family_Common; + D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (is_texture) + { + d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON; + if (memory_desc.texture.initial_layout == G_TextureLayout_Family) + { + initial_family = G_FamilyFromQueueKind(queue_kind); + switch (queue_kind) + { + case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break; + case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break; + } + } + } + + if (queue_kind == G_QueueKind_AsyncCopy && Atomic32Fetch(&resource->texture_family) != initial_family) + { + // Copy queues cannot perform transitions, so resource can't be aliased + can_reuse = 0; + } + if (!can_reuse) { if (is_buffer || is_texture) @@ -1468,21 +1491,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle { clear_value_arg = &clear_value; } - - D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; - if (is_texture) - { - d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMMON; - if (memory_desc.texture.initial_layout == G_TextureLayout_Family) - { - switch (queue_kind) - { - case G_QueueKind_Direct: d3d_initial_layout = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON; break; - case G_QueueKind_AsyncCompute: d3d_initial_layout = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON; break; - } - } - } - HRESULT hr = ID3D12Device10_CreateCommittedResource3( G_D12.device, &heap_props, @@ -1504,17 +1512,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); resource->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, d3d_desc.MipLevels); - - // Queue initial Rtv/Dsv discard - if ( - !AnyBit(flags, G_MemoryFlag_Zero) && - AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) - ) - { - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Discard; - cmd->discard.resource = resource; - } } if (is_buffer) @@ -1530,6 +1527,7 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle resource->texture_format = memory_desc.texture.format; resource->texture_dims = memory_desc.texture.dims; resource->texture_mips = RNGI32(0, d3d_desc.MipLevels - 1); + Atomic32Set(&resource->texture_family, initial_family); } else if (is_sampler) { @@ -1795,40 +1793,43 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle resource->gpu_descriptor = descriptor; } - - - - - - - ////////////////////////////// - //- Transition intiial layout + //- Initial layout transition - - - // FIXME: On reset, transition resources to UNDEFINED + No access - // Then here, transition from undefined (w/ discard if rt/dsv) - - - - if ( - can_reuse && - is_texture && - memory_desc.texture.initial_layout == G_TextureLayout_Common && - queue_kind != G_QueueKind_AsyncCopy - ) + if (can_reuse && is_texture && queue_kind != G_QueueKind_AsyncCopy) { - G_SyncLayout(cl_handle, G_MakeTextureRef(resource->gpu_descriptor->base_index), G_TextureLayout_Common); + G_Family old_family = Atomic32Fetch(&resource->texture_family); + if (old_family != initial_family) + { + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.texture = G_MakeTextureRef(resource->gpu_descriptor->base_index); + if (old_family != current_family && old_family != G_Family_Common) + { + cmd->barrier.alias = 1; + } + if (initial_family == G_Family_Common) + { + cmd->barrier.release = 1; + } + } + Atomic32Set(&resource->texture_family, initial_family); } + ////////////////////////////// + //- Queue initial Rtv/Dsv discard - - - - - - + if ( + !can_reuse && + !AnyBit(flags, G_MemoryFlag_Zero) && + AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + ) + { + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Discard; + cmd->discard.resource = resource; + G_Sync(cl_handle); + } ////////////////////////////// //- Upload initial data @@ -1845,13 +1846,6 @@ G_BaseDescriptorIndex G_PushMemory(G_CommandListHandle cl_handle, G_ArenaHandle return resource->gpu_descriptor->base_index; } - - - - - - - //- Count u64 G_CountStride(G_BufferRef buffer) @@ -2216,7 +2210,7 @@ void G_D12_InsertEvent(ID3D12GraphicsCommandList7 *d3d_cl, G_D12_EventKind kind, //////////////////////////////////////////////////////////// //~ Tracking -void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind) +void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind, G_D12_Cmd *cmd) { b32 should_track = !AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); if (should_track) @@ -2272,6 +2266,7 @@ void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resourc } } + b32 should_alias = cmd->kind == G_D12_CmdKind_Barrier && cmd->barrier.alias; for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) { G_D12_TrackedMip *mip = &trn->mips[mip_idx]; @@ -2305,7 +2300,19 @@ void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resourc if (prev_mip) { prev_mip->usage = mip->prev_usage; - prev_mip->next_usage = mip->usage; + if (should_alias) + { + prev_mip->next_usage = G_D12_TrackedUsageKind_Undefined; + } + else + { + prev_mip->next_usage = mip->usage; + } + } + + if (should_alias) + { + mip->prev_usage = G_D12_TrackedUsageKind_Undefined; } } } @@ -2353,7 +2360,7 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromBatch(G_D12_CmdBatch *batch, G_QueueKind } if (result.access == D3D12_BARRIER_ACCESS_NO_ACCESS) { - result.sync = D3D12_BARRIER_SYNC_NONE; + // result.sync = D3D12_BARRIER_SYNC_NONE; } else { @@ -2389,6 +2396,12 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_ki result.layout = D3D12_BARRIER_LAYOUT_COMMON; result.sync |= global.sync; } + else if (usage_kind == G_D12_TrackedUsageKind_Undefined) + { + result.layout = D3D12_BARRIER_LAYOUT_UNDEFINED; + result.access = D3D12_BARRIER_ACCESS_NO_ACCESS; + result.sync |= global.sync; + } else { result.layout = G_D12_CommonLayoutFromQueueKind(queue_kind); @@ -2401,7 +2414,7 @@ G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_ki } if (result.access == D3D12_BARRIER_ACCESS_NO_ACCESS) { - result.sync = D3D12_BARRIER_SYNC_NONE; + // result.sync = D3D12_BARRIER_SYNC_NONE; } else { @@ -2495,7 +2508,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (resource) { batch->contains_rtv = 1; - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(rt_desc->mip, rt_desc->mip), G_D12_TrackedUsageKind_RenderTarget); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(rt_desc->mip, rt_desc->mip), G_D12_TrackedUsageKind_RenderTarget, cmd); } else { @@ -2507,7 +2520,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { batch->contains_rtv = 1; G_D12_Resource *resource = cmd->clear_rtv.resource; - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(cmd->clear_rtv.mip, cmd->clear_rtv.mip), G_D12_TrackedUsageKind_RenderTarget); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, RNGI32(cmd->clear_rtv.mip, cmd->clear_rtv.mip), G_D12_TrackedUsageKind_RenderTarget, cmd); } else if (cmd_kind == G_D12_CmdKind_Discard) { @@ -2517,12 +2530,12 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (is_rtv) { batch->contains_rtv = 1; - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_RenderTarget); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_RenderTarget, cmd); } else if (is_dsv) { batch->contains_dsv_write = 1; - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_DepthStencilReadWrite); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_DepthStencilReadWrite, cmd); } } else if (cmd_kind == G_D12_CmdKind_CopyBytes || cmd_kind == G_D12_CmdKind_CopyTexels) @@ -2543,13 +2556,13 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Resource *resource = G_D12_ResourceFromTextureRef(cmd->barrier.texture); if (resource) { - if (cmd->barrier.acquire) + if (cmd->barrier.release) { - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Acquire); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Release, cmd); } else { - G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Release); + G_D12_UpdateTrackedUsage(scratch.arena, batch, resource, resource->texture_mips, G_D12_TrackedUsageKind_Acquire, cmd); } } } @@ -2562,9 +2575,8 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } } - if (last_batch->first_tracked_resource) { - // Final empty batch to implicitly decay tracked resources + // Final empty batch to allow for decaying of tracked resources ++batches_count; batch = PushStruct(scratch.arena, G_D12_CmdBatch); DllQueuePush(first_batch, last_batch, batch); @@ -2576,7 +2588,7 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) { G_D12_CmdBatch *prev_batch = 0; - for (G_D12_CmdBatch *batch = first_batch; batch; batch = batch->next) + for (G_D12_CmdBatch *batch = first_batch; batch && batch != last_batch; batch = batch->next) { G_D12_CmdBatch *next_batch = batch->next; @@ -2602,7 +2614,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } // Push decay transition to next batch - if (next_batch && mip->next_usage == G_D12_TrackedUsageKind_Untracked) + if ( + mip->usage != G_D12_TrackedUsageKind_Untracked && + mip->usage != G_D12_TrackedUsageKind_Acquire && + mip->next_usage == G_D12_TrackedUsageKind_Untracked + ) { ++next_batch->transitions_count; G_D12_TransitionNode *tn = PushStruct(scratch.arena, G_D12_TransitionNode); @@ -2674,8 +2690,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) u64 texture_barriers_count = batch->transitions_count; D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, texture_barriers_count); - b32 is_transition_batch = batch->cmds_count == 0; - G_D12_BarrierInfo global_after = G_D12_BarrierInfoFromBatch(batch, queue_kind); G_D12_BarrierInfo global_before = Zi; if (batch->prev) @@ -2685,13 +2699,11 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) if (batch == first_batch) { - is_transition_batch = 1; global_before.sync = D3D12_BARRIER_SYNC_NONE; global_before.access = D3D12_BARRIER_ACCESS_NO_ACCESS; } if (batch == last_batch) { - is_transition_batch = 1; global_after.sync = D3D12_BARRIER_SYNC_NONE; global_after.access = D3D12_BARRIER_ACCESS_NO_ACCESS; } @@ -2706,8 +2718,6 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_TrackedUsageKind new_usage = tn->new; G_D12_BarrierInfo resource_before = G_D12_BarrierInfoFromUsageKind(old_usage, resource, global_before, queue_kind); G_D12_BarrierInfo resource_after = G_D12_BarrierInfoFromUsageKind(new_usage, resource, global_after, queue_kind); - // D3D12_BARRIER_LAYOUT layout_before = G_D12_BarrierLayoutFromUsageKind(old_usage, queue_kind); - // D3D12_BARRIER_LAYOUT layout_after = G_D12_BarrierLayoutFromUsageKind(new_usage, queue_kind); D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[barrier_idx]; { @@ -2722,6 +2732,27 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) barrier->Subresources.NumPlanes = 1; barrier->Subresources.IndexOrFirstMipLevel = tn->mips.min; barrier->Subresources.NumMipLevels = tn->mips.max - tn->mips.min + 1; + + + + // if ( + // barrier->LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED && + // AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) + // ) + // { + // barrier->Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD; + // } + + + if ( + barrier->LayoutBefore == D3D12_BARRIER_LAYOUT_UNDEFINED + ) + { + barrier->Flags |= D3D12_TEXTURE_BARRIER_FLAG_DISCARD; + } + + + } ++barrier_idx; } @@ -2737,7 +2768,10 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) u32 barrier_groups_count = 0; D3D12_BARRIER_GROUP barrier_groups[2] = Zi; - if (!is_transition_batch) + if ( + (global_barrier.SyncBefore != D3D12_BARRIER_SYNC_NONE || global_barrier.AccessBefore != D3D12_BARRIER_ACCESS_NO_ACCESS) && + (global_barrier.SyncAfter != D3D12_BARRIER_SYNC_NONE || global_barrier.AccessAfter != D3D12_BARRIER_ACCESS_NO_ACCESS) + ) { D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; group->Type = D3D12_BARRIER_TYPE_GLOBAL; @@ -3213,7 +3247,7 @@ void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u CopyBytes(&cmd->constant.value, src_32bit, MinU32(size, 4)); } -//- Barrier +//- Sync void G_Sync(G_CommandListHandle cl_handle) { @@ -3222,13 +3256,32 @@ void G_Sync(G_CommandListHandle cl_handle) cmd->kind = G_D12_CmdKind_Barrier; } -void G_SyncLayout(G_CommandListHandle cl_handle, G_TextureRef texture, G_TextureLayout layout) +void G_SyncAcquireFamilyLayout(G_CommandListHandle cl_handle, G_TextureRef texture) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); - G_D12_Cmd *cmd = G_D12_PushCmd(cl); - cmd->kind = G_D12_CmdKind_Barrier; - cmd->barrier.texture = texture; - cmd->barrier.acquire = layout == G_TextureLayout_Family; + if (cl->queue_kind != G_QueueKind_AsyncCopy) + { + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.texture = texture; + cmd->barrier.release = 0; + Atomic32Set(&resource->texture_family, G_FamilyFromQueueKind(cl->queue_kind)); + } +} + +void G_SyncReleaseFamilyLayout(G_CommandListHandle cl_handle, G_TextureRef texture) +{ + G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); + if (cl->queue_kind != G_QueueKind_AsyncCopy) + { + G_D12_Resource *resource = G_D12_ResourceFromTextureRef(texture); + G_D12_Cmd *cmd = G_D12_PushCmd(cl); + cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.texture = texture; + cmd->barrier.release = 1; + Atomic32Set(&resource->texture_family, G_Family_Common); + } } //- Zone @@ -3832,10 +3885,8 @@ void G_CommitBackbuffer(G_BackbufferHandle backbuffer_handle, Vec2I32 dst_offset G_D12_Resource *src_resource = G_D12_ResourceFromTextureRef(src); G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(G_QueueKind_Direct); ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; + G_D12_InsertEvent(d3d_cl, G_D12_EventKind_PushZone, Lit("Copy to backbuffer")); { - G_D12_InsertEvent(d3d_cl, G_D12_EventKind_Marker, Lit("Copy to backbuffer")); - - D3D12_BOX src_box = Zi; { src_box.left = src_range.p0.x; @@ -3908,6 +3959,7 @@ void G_CommitBackbuffer(G_BackbufferHandle backbuffer_handle, Vec2I32 dst_offset ID3D12GraphicsCommandList7_Barrier(d3d_cl, 1, &barrier_group); } } + G_D12_InsertEvent(d3d_cl, G_D12_EventKind_PopZone, Zstr); G_D12_CommitRawCommandList(rcl); } diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 31fc4597..5da72de5 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -93,6 +93,7 @@ Struct(G_D12_Resource) G_Format texture_format; Vec3I32 texture_dims; RngI32 texture_mips; + Atomic32 texture_family; // Sampler info G_SamplerDesc sampler_desc; @@ -334,7 +335,8 @@ Struct(G_D12_Cmd) struct { G_TextureRef texture; - b32 acquire; + b32 release; + b32 alias; } barrier; struct @@ -424,6 +426,7 @@ Struct(G_D12_CmdList) Enum(G_D12_TrackedUsageKind) { G_D12_TrackedUsageKind_Untracked, + G_D12_TrackedUsageKind_Undefined, G_D12_TrackedUsageKind_Acquire, G_D12_TrackedUsageKind_DepthStencilRead, G_D12_TrackedUsageKind_DepthStencilReadWrite, @@ -690,6 +693,7 @@ DeclApiFromXList(G_D12_AgsApi, G_D12_AgsApiXList, "amd_ags_x64.dll"); G_D12_Arena *G_D12_ArenaFromHandle(G_ArenaHandle handle); G_D12_CmdList *G_D12_CmdListFromHandle(G_CommandListHandle handle); G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); +G_D12_Backbuffer *G_D12_BackbufferFromHandle(G_BackbufferHandle handle); DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); D3D12_BARRIER_LAYOUT G_D12_CommonLayoutFromQueueKind(G_QueueKind queue_kind); @@ -747,7 +751,7 @@ void G_D12_InsertEvent(ID3D12GraphicsCommandList7 *d3d_cl, G_D12_EventKind kind, //////////////////////////////////////////////////////////// //~ Tracking -void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind); +void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind, G_D12_Cmd *cmd); G_D12_BarrierInfo G_D12_BarrierInfoFromBatch(G_D12_CmdBatch *batch, G_QueueKind queue_kind); G_D12_BarrierInfo G_D12_BarrierInfoFromUsageKind(G_D12_TrackedUsageKind usage_kind, G_D12_Resource *resource, G_D12_BarrierInfo global, G_QueueKind queue_kind); diff --git a/src/gpu/gpu_shared.cgh b/src/gpu/gpu_shared.cgh index 5c867925..9401224b 100644 --- a/src/gpu/gpu_shared.cgh +++ b/src/gpu/gpu_shared.cgh @@ -84,10 +84,10 @@ Enum(G_BasicSamplerKind) //////////////////////////////////////////////////////////// //~ Index buffers -#define G_IB(_count, _buffer) ((G_IndexBufferDesc) { .count = (_count), .buffer = (_buffer), __VA_ARGS__ }) +#define G_IB(_count, _buffer, ...) ((G_IndexBufferDesc) { .count = (_count), .buffer = (_buffer), __VA_ARGS__ }) Struct(G_IndexBufferDesc) { - u64 count; + u32 count; G_BufferRef buffer; }; diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 682a4b3d..08b27a99 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -420,7 +420,6 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_MemoryFlag_Zero, .name = Lit("Tiles") ); - //- Init particle buffer gpu_particles = G_PushStructs( cl, gpu_perm, @@ -428,8 +427,7 @@ void V_TickForever(WaveLaneCtx *lane) .flags = G_MemoryFlag_Zero, .name = Lit("Particles") ); - - //- Init particle textures + //- Init particle layers for (V_ParticleLayer layer = 0; layer < V_ParticleLayer_COUNT; ++layer) { gpu_particle_cells[layer] = G_PushTexture2D( @@ -536,7 +534,7 @@ void V_TickForever(WaveLaneCtx *lane) while (!shutdown) { shutdown = Atomic32Fetch(&V.shutdown); - P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 0); + P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 1); ////////////////////////////// //- Begin frame @@ -2699,8 +2697,6 @@ void V_TickForever(WaveLaneCtx *lane) Vec2 hit_entry_normal = bullet->hit_entry_normal; Vec2 hit_entry_velocity = bullet->hit_entry_velocity; - V_DrawLine(start, end, Color_Cyan); - ////////////////////////////// //- Wall particles @@ -5067,168 +5063,169 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Begin gpu frame + Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); + Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); { ////////////////////////////// - //- Upload data to GPU + //- GPU upload pass - // Backdrop - frame->backdrop_parallax = TweakFloat("Backdrop parallax", 4, 0, 20); + G_ZoneDF(cl, "Setup") { - SPR_SheetKey sheet = SPR_SheetKeyFromResource(ResourceKeyFromStore(&P_Resources, Lit("backdrop.ase"))); - SPR_Sprite sprite = SPR_SpriteFromSheetEx(sheet, SPR_NilSpanKey, 0, SPR_SheetFlag_NoAtlas); - frame->backdrop_src = sprite.tex; - } - - // Tiles - { - for (P_TileKind tile_kind = 0; tile_kind < P_TileKind_COUNT; ++tile_kind) + // Backdrop + frame->backdrop_parallax = TweakFloat("Backdrop parallax", 4, 0, 20); { - SPR_Sprite tile_sprite = Zi; - { - String tile_name = P_NameFromTileKind(tile_kind); - String sheet_name = StringF(frame->arena, "tile/%F.ase", FmtString(tile_name)); - ResourceKey sheet_resource = ResourceKeyFromStore(&P_Resources, sheet_name); - SPR_SheetKey sheet = SPR_SheetKeyFromResource(sheet_resource); - tile_sprite = SPR_SpriteFromSheet(sheet, SPR_NilSpanKey, 0); - } - V_TileDesc tile_desc = Zi; - { - tile_desc.tex = tile_sprite.tex; - tile_desc.tex_slice_uv = DivRng2Vec2(tile_sprite.tex_rect, tile_sprite.tex_dims); - } - frame->tile_descs[tile_kind] = tile_desc; + SPR_SheetKey sheet = SPR_SheetKeyFromResource(ResourceKeyFromStore(&P_Resources, Lit("backdrop.ase"))); + SPR_Sprite sprite = SPR_SpriteFromSheetEx(sheet, SPR_NilSpanKey, 0, SPR_SheetFlag_NoAtlas); + frame->backdrop_src = sprite.tex; } - } - // Upload tiles - if (frame->tiles_dirty) - { - // LogDebugF("Uploading tiles to gpu"); - G_CopyCpuToTexture( - cl, - gpu_tiles, VEC3I32(0, 0, 0), - frame->local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1), - RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1)) - ); - } - - // Screen texture - frame->screen = G_PushTexture2D( - cl, gpu_frame_arena, - G_TextureLayout_Family, - G_Format_R16G16B16A16_Float, - frame->screen_dims, - .flags = G_MemoryFlag_AllowTextureRW | G_MemoryFlag_AllowTextureDraw, - .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) - ); - Rng3 screen_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->screen_dims.x, frame->screen_dims.y, 1)); - Rng2 screen_scissor = RNG2(VEC2(screen_viewport.p0.x, screen_viewport.p0.y), VEC2(screen_viewport.p1.x, screen_viewport.p1.y)); - - // Albedo texture - frame->albedo = G_PushTexture2D( - cl, gpu_frame_arena, - G_TextureLayout_Family, - G_Format_R16G16B16A16_Float, - frame->screen_dims, - .flags = G_MemoryFlag_AllowTextureDraw, - .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) - ); - - // Backdrop texture - frame->backdrop_chain = G_PushTexture2D( - cl, gpu_frame_arena, - G_TextureLayout_Family, - G_Format_R16G16B16A16_Float, - G_DimsFromMip2D(G_Count2D(frame->screen), 1), - .flags = G_MemoryFlag_AllowTextureRW, - .name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)), - .max_mips = 4 - ); - - // Bloom texture - // TODO: We can re-use backdrop mip chain for this - frame->bloom_chain = G_PushTexture2D( - cl, gpu_frame_arena, - G_TextureLayout_Family, - G_Format_R16G16B16A16_Float, - G_DimsFromMip2D(G_Count2D(frame->screen), 1), - .flags = G_MemoryFlag_AllowTextureRW, - .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), - .max_mips = G_MaxMips - ); - - // Shade texture - frame->shade = G_PushTexture2D( - cl, gpu_frame_arena, - G_TextureLayout_Family, - G_Format_R16G16B16A16_Float, - frame->shade_dims, - .flags = G_MemoryFlag_AllowTextureRW, - .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) - ); - Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1)); - Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y)); - - // Quad buffers - u64 quads_count = ArenaCount(frame->quads_arena, V_Quad); - V_Quad *quads = ArenaFirst(frame->quads_arena, V_Quad); - frame->quads = G_PushStructsFromCpu( - cl, gpu_frame_arena, - quads, quads_count, - .name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick)) - ); - - // Debug verts - u64 dverts_count = ArenaCount(frame->dverts_arena, V_DVert); - V_DVert *dverts = ArenaFirst(frame->dverts_arena, V_DVert); - frame->dverts = G_PushStructsFromCpu( - cl, gpu_frame_arena, - dverts, dverts_count, - .name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick)) - ); - - // Debug vert indices - frame->dvert_idxs.count = ArenaCount(frame->dvert_idxs_arena, u32); - frame->dvert_idxs.buffer = G_PushStructsFromCpu( - cl, gpu_frame_arena, - ArenaFirst(frame->dvert_idxs_arena, u32), frame->dvert_idxs.count, - .name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick)) - ); - - // Particles - G_BufferRef gpu_emitters = Zi; - { - // Flatten emitters list - V_Emitter *flattened_emitters = PushStructsNoZero(frame->arena, V_Emitter, frame->emitters_count); + // Tiles { - i64 emitter_idx = 0; - for (V_EmitterNode *en = frame->first_emitter_node; en; en = en->next) + for (P_TileKind tile_kind = 0; tile_kind < P_TileKind_COUNT; ++tile_kind) { - flattened_emitters[emitter_idx] = en->emitter; - ++emitter_idx; + SPR_Sprite tile_sprite = Zi; + { + String tile_name = P_NameFromTileKind(tile_kind); + String sheet_name = StringF(frame->arena, "tile/%F.ase", FmtString(tile_name)); + ResourceKey sheet_resource = ResourceKeyFromStore(&P_Resources, sheet_name); + SPR_SheetKey sheet = SPR_SheetKeyFromResource(sheet_resource); + tile_sprite = SPR_SpriteFromSheet(sheet, SPR_NilSpanKey, 0); + } + V_TileDesc tile_desc = Zi; + { + tile_desc.tex = tile_sprite.tex; + tile_desc.tex_slice_uv = DivRng2Vec2(tile_sprite.tex_rect, tile_sprite.tex_dims); + } + frame->tile_descs[tile_kind] = tile_desc; } } - frame->emitters = G_PushStructsFromCpu( + + // Upload tiles + if (frame->tiles_dirty) + { + // LogDebugF("Uploading tiles to gpu"); + G_CopyCpuToTexture( + cl, + gpu_tiles, VEC3I32(0, 0, 0), + frame->local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1), + RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1)) + ); + } + + // Screen texture + frame->screen = G_PushTexture2D( cl, gpu_frame_arena, - flattened_emitters, frame->emitters_count, - .name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick)) + G_TextureLayout_Family, + G_Format_R16G16B16A16_Float, + frame->screen_dims, + .flags = G_MemoryFlag_AllowTextureRW | G_MemoryFlag_AllowTextureDraw, + .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) ); + + // Albedo texture + frame->albedo = G_PushTexture2D( + cl, gpu_frame_arena, + G_TextureLayout_Family, + G_Format_R16G16B16A16_Float, + frame->screen_dims, + .flags = G_MemoryFlag_AllowTextureDraw, + .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) + ); + + // Backdrop texture + frame->backdrop_chain = G_PushTexture2D( + cl, gpu_frame_arena, + G_TextureLayout_Family, + G_Format_R16G16B16A16_Float, + G_DimsFromMip2D(G_Count2D(frame->screen), 1), + .flags = G_MemoryFlag_AllowTextureRW, + .name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)), + .max_mips = 4 + ); + + // Bloom texture + // TODO: We can re-use backdrop mip chain for this + frame->bloom_chain = G_PushTexture2D( + cl, gpu_frame_arena, + G_TextureLayout_Family, + G_Format_R16G16B16A16_Float, + G_DimsFromMip2D(G_Count2D(frame->screen), 1), + .flags = G_MemoryFlag_AllowTextureRW, + .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), + .max_mips = G_MaxMips + ); + + // Shade texture + frame->shade = G_PushTexture2D( + cl, gpu_frame_arena, + G_TextureLayout_Family, + G_Format_R16G16B16A16_Float, + frame->shade_dims, + .flags = G_MemoryFlag_AllowTextureRW, + .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) + ); + Rng3 shade_viewport = RNG3(VEC3(0, 0, 0), VEC3(frame->shade_dims.x, frame->shade_dims.y, 1)); + Rng2 shade_scissor = RNG2(VEC2(shade_viewport.p0.x, shade_viewport.p0.y), VEC2(shade_viewport.p1.x, shade_viewport.p1.y)); + + // Quad buffers + frame->quads_count = ArenaCount(frame->quads_arena, V_Quad); + frame->quads = G_PushStructsFromCpu( + cl, gpu_frame_arena, + ArenaFirst(frame->quads_arena, V_Quad), frame->quads_count, + .name = StringF(frame->arena, "quads [%F]", FmtSint(frame->tick)) + ); + + // Debug verts + u64 dverts_count = ArenaCount(frame->dverts_arena, V_DVert); + V_DVert *dverts = ArenaFirst(frame->dverts_arena, V_DVert); + frame->dverts = G_PushStructsFromCpu( + cl, gpu_frame_arena, + dverts, dverts_count, + .name = StringF(frame->arena, "dverts [%F]", FmtSint(frame->tick)) + ); + + // Debug vert indices + frame->dvert_idxs.count = ArenaCount(frame->dvert_idxs_arena, u32); + frame->dvert_idxs.buffer = G_PushStructsFromCpu( + cl, gpu_frame_arena, + ArenaFirst(frame->dvert_idxs_arena, u32), frame->dvert_idxs.count, + .name = StringF(frame->arena, "dvert idxs [%F]", FmtSint(frame->tick)) + ); + + // Particles + G_BufferRef gpu_emitters = Zi; + { + // Flatten emitters list + V_Emitter *flattened_emitters = PushStructsNoZero(frame->arena, V_Emitter, frame->emitters_count); + { + i64 emitter_idx = 0; + for (V_EmitterNode *en = frame->first_emitter_node; en; en = en->next) + { + flattened_emitters[emitter_idx] = en->emitter; + ++emitter_idx; + } + } + frame->emitters = G_PushStructsFromCpu( + cl, gpu_frame_arena, + flattened_emitters, frame->emitters_count, + .name = StringF(frame->arena, "emitters [%F]", FmtSint(frame->tick)) + ); + } + + // Upload gpu frame + G_BufferRef gpu_frame = G_PushStructFromCpu( + cl, gpu_frame_arena, + &frame->shared_frame, + .name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick)) + ); + + // Set initial constants + V_GpuFlag gpu_flags = V_GpuFlag_None; + G_SetConstant(cl, V_GpuConst_Flags, gpu_flags); + G_SetConstant(cl, V_GpuConst_Frame, gpu_frame); + G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoise3D()); } - // Upload gpu frame - G_BufferRef gpu_frame = G_PushStructFromCpu( - cl, gpu_frame_arena, - &frame->shared_frame, - .name = StringF(frame->arena, "Gpu frame [%F]", FmtSint(frame->tick)) - ); - - // Set initial constants - V_GpuFlag gpu_flags = V_GpuFlag_None; - G_SetConstant(cl, V_GpuConst_Flags, gpu_flags); - G_SetConstant(cl, V_GpuConst_Frame, gpu_frame); - G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoise3D()); - - // Sync G_Sync(cl); ////////////////////////////// @@ -5274,24 +5271,23 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute2D(cl, V_BackdropUpCS, up_dims); G_Sync(cl); - } + } } - - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Quads & emitters pass G_ZoneDF(cl, "Quads & emitters") { - G_ClearRenderTarget(cl, frame->albedo, VEC4(0, 0, 0, 0), 0); - // Draw quads + G_ClearRenderTarget(cl, frame->albedo, VEC4(0, 0, 0, 0), 0); G_Draw( cl, V_QuadVS, V_QuadPS, - quads_count, G_QuadIndices(), + frame->quads_count, G_QuadIndices(), 1, &G_RT(frame->albedo, G_BlendMode_CompositeStraightAlpha), screen_viewport, screen_scissor, G_DrawMode_TriangleList @@ -5299,11 +5295,10 @@ void V_TickForever(WaveLaneCtx *lane) // Emit particles G_Compute(cl, V_EmitParticlesCS, frame->emitters_count); - - // Sync particles, occluders, & albedo - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Particle simulation pass @@ -5311,11 +5306,10 @@ void V_TickForever(WaveLaneCtx *lane) { // Simulate particles G_Compute(cl, V_SimParticlesCS, V_ParticlesCap); - - // Sync cells - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Shading pass @@ -5325,21 +5319,20 @@ void V_TickForever(WaveLaneCtx *lane) if (0) { G_Compute2D(cl, V_ShadeCS, frame->shade_dims); - - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Composite pass G_ZoneDF(cl, "Composite") { G_Compute2D(cl, V_CompositeCS, frame->screen_dims); - - // Sync screen tex - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Bloom passes @@ -5376,16 +5369,18 @@ void V_TickForever(WaveLaneCtx *lane) } } + G_SyncReleaseFamilyLayout(cl, frame->bloom_chain); + ////////////////////////////// //- Finalization pass G_ZoneDF(cl, "Finalize") { G_Compute2D(cl, V_FinalizeCS, frame->screen_dims); - - G_Sync(cl); } + G_Sync(cl); + ////////////////////////////// //- Debug shapes pass diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh index 64b3830a..c9a8e9d4 100644 --- a/src/pp/pp_vis/pp_vis_shared.cgh +++ b/src/pp/pp_vis/pp_vis_shared.cgh @@ -372,6 +372,8 @@ Struct(V_SharedFrame) G_IndexBufferDesc dvert_idxs; G_BufferRef dverts; + + u32 quads_count; G_BufferRef quads; };