replace granular gpu sync, access, & texture-layouts with queue-exclusive/shared layouts & tracked sync

This commit is contained in:
jacob 2026-03-05 07:07:00 -06:00
parent 0f296ac5fd
commit 3197f18495
14 changed files with 1124 additions and 1150 deletions

5
.gitattributes vendored
View File

@ -23,8 +23,3 @@
*.ttf filter=lfs diff=lfs merge=lfs -text *.ttf filter=lfs diff=lfs merge=lfs -text
*.mp3 filter=lfs diff=lfs merge=lfs -text *.mp3 filter=lfs diff=lfs merge=lfs -text
*.dat filter=lfs diff=lfs merge=lfs -text *.dat filter=lfs diff=lfs merge=lfs -text
##############################
#- Libraries
# vulkan/* filter=lfs diff=lfs merge=lfs -text

View File

@ -798,6 +798,7 @@ Inline u64 MixU64s(u64 seed_a, u64 seed_b)
String GetAppDirectory(void); String GetAppDirectory(void);
void Echo(String msg); void Echo(String msg);
b32 Panic(String msg); b32 Panic(String msg);
b32 DebugBreakPrompt(String title, String msg);
Callstack CaptureCallstack(u64 skip_frames); Callstack CaptureCallstack(u64 skip_frames);
b32 IsRunningInDebugger(void); b32 IsRunningInDebugger(void);
b32 IsRunningInWine(void); b32 IsRunningInWine(void);

View File

@ -96,6 +96,34 @@ b32 Panic(String msg)
return 0; return 0;
} }
b32 DebugBreakPrompt(String title, String msg)
{
LogInfoF("[DEBUG BREAK PROMPT]: %F", FmtString(msg));
TempArena scratch = BeginScratchNoConflict();
i32 result = 0;
{
wchar_t *title_wstr = WstrFromString(scratch.arena, title);
wchar_t *msg_wstr = WstrFromString(scratch.arena, msg);
b32 is_debug = IsRunningInDebugger();
i32 mb_result = 0;
{
u32 mb_flags = MB_SETFOREGROUND | MB_ICONWARNING;
if (is_debug)
{
mb_flags |= MB_CANCELTRYCONTINUE;
}
mb_result = MessageBoxExW(0, msg_wstr, title_wstr, mb_flags, 0);
}
if (mb_result == IDCANCEL)
{
ExitProcess(1);
}
result = is_debug && mb_result != IDCONTINUE;
}
EndScratch(scratch);
return result;
}
Callstack CaptureCallstack(u64 skip_frames) Callstack CaptureCallstack(u64 skip_frames)
{ {
Callstack result; Callstack result;

View File

@ -27,5 +27,4 @@
@IncludeC gpu_common.c @IncludeC gpu_common.c
// @DefaultDownstream Win32 gpu_dx12 @DefaultDownstream Win32 gpu_dx12
@DefaultDownstream Win32 gpu_vk

View File

@ -189,112 +189,22 @@ Enum(G_Format)
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Memory sync types //~ Texture layout types
Enum(G_Stage)
{
G_Stage_None = 0,
// Compute stages
G_Stage_ComputeShading = (1 << 1),
// Draw stages
G_Stage_IndexAssembly = (1 << 2),
G_Stage_VertexShading = (1 << 3),
G_Stage_PixelShading = (1 << 4),
G_Stage_DepthStencil = (1 << 5),
G_Stage_RenderTarget = (1 << 6),
// Copy stages
G_Stage_Copy = (1 << 7),
// Indirect stages
G_Stage_Indirect = (1 << 8),
// Aggregate stages
G_Stage_Drawing = G_Stage_IndexAssembly |
G_Stage_VertexShading |
G_Stage_PixelShading |
G_Stage_DepthStencil |
G_Stage_RenderTarget,
G_Stage_Shading = G_Stage_ComputeShading |
G_Stage_VertexShading |
G_Stage_PixelShading,
G_Stage_All = 0xFFFFFFFF
};
Enum(G_Access)
{
G_Access_None = 0,
G_Access_ShaderReadWrite = (1 << 1),
G_Access_ShaderRead = (1 << 2),
G_Access_CopyWrite = (1 << 3),
G_Access_CopyRead = (1 << 4),
G_Access_DepthStencilRead = (1 << 5),
G_Access_DepthStencilWrite = (1 << 6),
G_Access_RenderTargetWrite = (1 << 7),
G_Access_IndexBuffer = (1 << 8),
G_Access_IndirectArgument = (1 << 9),
G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier
};
Enum(G_Layout) Enum(G_Layout)
{ {
G_Layout_NoChange, // Supports any read access with up to 1 write access to non overlapping regions from any queue.
G_Layout_Undefined, // Cannot be transitioned to/from.
// Depth-stencil textures cannot use this layout.
G_Layout_Simultaneous,
////////////////////////////// // Supports present, shader-read, and copy-read/write in any queue kind.
//- Queue-agnostic // Transitionable from `G_Layout_Exclusive` in non-copy queue.
G_Layout_Common,
// Simultaneous layout allows a resource to be used on any queue with any // Supports any access in the current queue kind.
// access type (except depth-stencil). Resources cannot transition to/from // Transitionable from `G_Layout_Common` in non-copy queue.
// this layout, they must be created with it. Allows concurrent reads G_Layout_Exclusive,
// with up to 1 write to non-overlapping regions.
G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS
G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON
//////////////////////////////
//- Direct queue
G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON
G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ
G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE
G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET
//////////////////////////////
//- Compute queue
G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON
//////////////////////////////
//- Direct & Compute queue
G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ
G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS
G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST
};
// Barrier will execute after stages specified by `stage_prev`, and before stages specified by `stage_next`.
// When barrier executes:
// - Necessary resource flushes will occur based on `access_prev` & `access_next`
// - Texture layout will transition based on `layout` (if specified)
Struct(G_MemoryBarrierDesc)
{
G_ResourceHandle resource;
b32 is_global;
G_Stage stage_prev;
G_Stage stage_next;
G_Access access_prev;
G_Access access_next;
G_Layout layout;
RngI32 mips; // Inclusive range of texture mip levels to sync
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -390,14 +300,14 @@ Enum(G_ResourceKind)
Enum(G_ResourceFlag) Enum(G_ResourceFlag)
{ {
G_ResourceFlag_None = 0, G_ResourceFlag_None = 0,
G_ResourceFlag_AllowShaderReadWrite = (1 << 0), G_ResourceFlag_AllowShaderReadWrite = (1 << 1),
G_ResourceFlag_AllowRenderTarget = (1 << 1), G_ResourceFlag_AllowRenderTarget = (1 << 2),
G_ResourceFlag_AllowDepthStencil = (1 << 2), G_ResourceFlag_AllowDepthStencil = (1 << 3),
G_ResourceFlag_ZeroMemory = (1 << 3), G_ResourceFlag_ZeroMemory = (1 << 4),
G_ResourceFlag_HostMemory = (1 << 4), // Resource will be mapped into the cpu's address space G_ResourceFlag_HostMemory = (1 << 5), // Resource will be mapped into the cpu's address space
G_ResourceFlag_Uncached = (1 << 5), // Cpu writes will be combined & reads will be uncached G_ResourceFlag_Uncached = (1 << 6), // Cpu writes will be combined & reads will be uncached
G_ResourceFlag_ForceNoReuse = (1 << 6), G_ResourceFlag_ForceNoReuse = (1 << 7),
}; };
Struct(G_BufferDesc) Struct(G_BufferDesc)
@ -454,18 +364,18 @@ Struct(G_RefDesc)
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Rasterization types //~ Draw types
Enum(G_RasterMode) Enum(G_DrawMode)
{ {
G_RasterMode_None, G_DrawMode_None,
G_RasterMode_PointList, G_DrawMode_PointList,
G_RasterMode_LineList, G_DrawMode_LineList,
G_RasterMode_LineStrip, G_DrawMode_LineStrip,
G_RasterMode_TriangleList, G_DrawMode_TriangleList,
G_RasterMode_TriangleStrip, G_DrawMode_TriangleStrip,
G_RasterMode_WireTriangleList, G_DrawMode_WireTriangleList,
G_RasterMode_WireTriangleStrip, G_DrawMode_WireTriangleStrip,
}; };
Enum(G_BlendMode) Enum(G_BlendMode)
@ -684,52 +594,10 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size
G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \ G_SetConstantEx((cl), (name), &__src, sizeof(__src)); \
} while (0) } while (0)
//- Memory sync //- Sync
void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); void G_Sync(G_CommandListHandle cl);
void G_SyncLayout(G_CommandListHandle cl, G_ResourceHandle resource, G_Layout layout);
#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.resource = (_resource), \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.layout = _layout, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \
G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \
.is_global = 1, \
.stage_prev = _stage_prev, \
.access_prev = _access_prev, \
.stage_next = _stage_next, \
.access_next = _access_next, \
.mips.max = G_MaxMips, \
__VA_ARGS__ \
})
#define G_DumbMemorySync(cl, resource, ...) \
G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \
G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__)
#define G_DumbGlobalMemorySync(cl, ...) \
G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__)
//- Compute //- Compute
@ -739,25 +607,21 @@ void G_ComputeEx(G_CommandListHandle cl, ComputeShaderDesc cs, Vec3I32 threads);
#define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1)) #define G_Compute2D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, 1))
#define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z)) #define G_Compute3D(cl, cs, threads) G_ComputeEx((cl), (cs), VEC3I32((threads).x, (threads).y, (threads).z))
//- Rasterize //- Draw
void G_Rasterize( void G_Draw(
G_CommandListHandle cl, G_CommandListHandle cl,
VertexShaderDesc vs, PixelShaderDesc ps, VertexShaderDesc vs, PixelShaderDesc ps,
u32 instances_count, G_IndexBufferDesc index_buffer, u32 instances_count, G_IndexBufferDesc index_buffer,
u32 render_targets_count, G_RenderTargetDesc *render_targets, u32 render_targets_count, G_RenderTargetDesc *render_targets,
Rng3 viewport, Rng2 scissor, Rng3 viewport, Rng2 scissor,
G_RasterMode raster_mode G_DrawMode draw_mode
); );
//- Clear //- Clear
void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip); void G_ClearRenderTarget(G_CommandListHandle cl, G_ResourceHandle render_target, Vec4 color, i32 mip);
//- Log
void G_LogResource(G_CommandListHandle cl, G_ResourceHandle resource);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Queue synchronization //~ @hookdecl Queue synchronization

File diff suppressed because it is too large Load Diff

View File

@ -87,7 +87,6 @@ Struct(G_D12_Resource)
G_Format texture_format; G_Format texture_format;
Vec3I32 texture_dims; Vec3I32 texture_dims;
i32 texture_mips; i32 texture_mips;
D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips];
// Sampler info // Sampler info
G_SamplerDesc sampler_desc; G_SamplerDesc sampler_desc;
@ -306,16 +305,14 @@ Enum(G_D12_CmdKind)
G_D12_CmdKind_CopyBytes, G_D12_CmdKind_CopyBytes,
G_D12_CmdKind_CopyTexels, G_D12_CmdKind_CopyTexels,
G_D12_CmdKind_Compute, G_D12_CmdKind_Compute,
G_D12_CmdKind_Rasterize, G_D12_CmdKind_Draw,
G_D12_CmdKind_ClearRtv, G_D12_CmdKind_ClearRtv,
G_D12_CmdKind_Log,
G_D12_CmdKind_Discard, G_D12_CmdKind_Discard,
}; };
Struct(G_D12_Cmd) Struct(G_D12_Cmd)
{ {
G_D12_CmdKind kind; G_D12_CmdKind kind;
b32 skip;
union union
{ {
struct struct
@ -326,11 +323,8 @@ Struct(G_D12_Cmd)
struct struct
{ {
G_MemoryBarrierDesc desc; G_D12_Resource *resource;
b32 to_exclusive;
// Post-batch data
b32 is_end_of_batch;
u64 batch_gen;
} barrier; } barrier;
struct struct
@ -366,20 +360,15 @@ Struct(G_D12_Cmd)
G_RenderTargetDesc render_target_descs[G_MaxRenderTargets]; G_RenderTargetDesc render_target_descs[G_MaxRenderTargets];
Rng3 viewport; Rng3 viewport;
Rng2 scissor; Rng2 scissor;
G_RasterMode raster_mode; G_DrawMode draw_mode;
} rasterize; } draw;
struct
{
G_D12_Resource *render_target;
Vec4 color;
i32 mip;
} clear_rtv;
struct struct
{ {
G_D12_Resource *resource; G_D12_Resource *resource;
} log; Vec4 color;
i32 mip;
} clear_rtv;
struct struct
{ {
@ -413,6 +402,86 @@ Struct(G_D12_CmdList)
u64 cmds_count; u64 cmds_count;
}; };
////////////////////////////////////////////////////////////
//~ Command batching types
// TODO: Use a dynamic bin count, since the maximum number of tracked resources in the list is known at command list creation time
#define G_D12_TrackedResourceBinsCount 64
Enum(G_D12_TrackedUsageKind)
{
G_D12_TrackedUsageKind_Untracked,
G_D12_TrackedUsageKind_MakeExclusive,
G_D12_TrackedUsageKind_DepthStencilRead,
G_D12_TrackedUsageKind_DepthStencilReadWrite,
G_D12_TrackedUsageKind_RenderTarget,
G_D12_TrackedUsageKind_MakeCommon,
};
Struct(G_D12_TransitionNode)
{
G_D12_TransitionNode *next;
G_D12_Resource *resource;
G_D12_TrackedUsageKind old;
G_D12_TrackedUsageKind new;
RngI32 mips;
};
Struct(G_D12_TrackedMip)
{
G_D12_TrackedUsageKind prev_usage;
G_D12_TrackedUsageKind usage;
};
Struct(G_D12_TrackedResourceNode)
{
G_D12_TrackedResourceNode *next;
G_D12_TrackedResourceNode *next_in_bin;
u64 hash;
G_D12_Resource *resource;
G_D12_TrackedMip mips[G_MaxMips];
};
Struct(G_D12_TrackedResourceBin)
{
G_D12_TrackedResourceNode *first;
};
Struct(G_D12_BatchedCmdNode)
{
G_D12_BatchedCmdNode *next;
G_D12_Cmd *cmd;
};
Struct(G_D12_CmdBatch)
{
G_D12_CmdBatch *next;
u64 tracked_resources_count;
G_D12_TrackedResourceNode *first_tracked_resource;
G_D12_TrackedResourceNode *last_tracked_resource;
G_D12_TrackedResourceBin *tracked_resource_bins;
u64 transitions_count;
G_D12_TransitionNode *first_transition;
G_D12_TransitionNode *last_transition;
u64 cmds_count;
G_D12_BatchedCmdNode *first_cmd;
G_D12_BatchedCmdNode *last_cmd;
b32 contains_hazard;
b32 contains_compute_shader;
b32 contains_draw_shader;
b32 contains_rtv;
b32 contains_dsv;
b32 contains_indirect;
b32 contains_copy;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Swapchain types //~ Swapchain types
@ -521,10 +590,7 @@ G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle);
G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle);
DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format);
D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages); D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromUsageKind(G_QueueKind queue_kind, G_D12_TrackedUsageKind usage_kind);
D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses);
D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout);
String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout);
void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip); void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip);
@ -565,6 +631,18 @@ G_D12_Descriptor *G_D12_PushDescriptor(G_D12_Arena *gpu_arena, G_D12_DescriptorH
G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl); G_D12_Cmd *G_D12_PushCmd(G_D12_CmdList *cl);
G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v); G_D12_Cmd *G_D12_PushConstCmd(G_D12_CmdList *cl, i32 slot, void *v);
G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size); G_D12_StagingRegionNode *G_D12_PushStagingRegion(G_D12_CmdList *cl, u64 size);
void G_D12_UpdateTrackedUsage(Arena *arena, G_D12_CmdBatch *batch, G_D12_Resource *resource, RngI32 mips, G_D12_TrackedUsageKind usage_kind);
////////////////////////////////////////////////////////////
//~ Debug
void G_D12_DebugCallback(
D3D12_MESSAGE_CATEGORY category,
D3D12_MESSAGE_SEVERITY severity,
D3D12_MESSAGE_ID id,
LPCSTR description_cstr,
void *context
);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Collection worker //~ Collection worker

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -426,7 +426,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R8_Uint, G_Format_R8_Uint,
tiles_dims, tiles_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory, .flags = G_ResourceFlag_ZeroMemory,
.name = Lit("Tiles") .name = Lit("Tiles")
); );
@ -451,7 +451,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer))
); );
@ -464,7 +464,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer))
); );
@ -479,7 +479,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Stains") .name = Lit("Stains")
); );
@ -491,7 +491,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Dry stains") .name = Lit("Dry stains")
); );
@ -503,7 +503,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Float, G_Format_R32_Float,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Drynesses") .name = Lit("Drynesses")
); );
@ -515,7 +515,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_perm, cl, gpu_perm, cl,
G_Format_R32_Uint, G_Format_R32_Uint,
cells_dims, cells_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite,
.name = Lit("Occluders cells") .name = Lit("Occluders cells")
); );
@ -5153,7 +5153,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_frame_arena, cl, gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
frame->screen_dims, frame->screen_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick))
); );
@ -5166,7 +5166,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_frame_arena, cl, gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
frame->screen_dims, frame->screen_dims,
G_Layout_DirectQueue_RenderTarget, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowRenderTarget,
.name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick))
); );
@ -5177,7 +5177,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_frame_arena, cl, gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1), G_DimsFromMip2D(G_Count2D(screen_target), 1),
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)), .name = StringF(frame->arena, "Backdrop target [%F]", FmtSint(frame->tick)),
.max_mips = 4 .max_mips = 4
@ -5193,7 +5193,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_frame_arena, cl, gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
G_DimsFromMip2D(G_Count2D(screen_target), 1), G_DimsFromMip2D(G_Count2D(screen_target), 1),
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)),
.max_mips = G_MaxMips .max_mips = G_MaxMips
@ -5208,7 +5208,7 @@ void V_TickForever(WaveLaneCtx *lane)
gpu_frame_arena, cl, gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
frame->shade_dims, frame->shade_dims,
G_Layout_DirectQueue_General, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowShaderReadWrite, .flags = G_ResourceFlag_AllowShaderReadWrite,
.name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick))
); );
@ -5274,7 +5274,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); G_SetConstant(cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture());
// Sync // Sync
G_DumbGlobalMemorySync(cl); G_Sync(cl);
////////////////////////////// //////////////////////////////
//- Initialization pass //- Initialization pass
@ -5306,7 +5306,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx);
G_Compute2D(cl, V_BackdropDownCS, down_dims); G_Compute2D(cl, V_BackdropDownCS, down_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
//- Upsample passes //- Upsample passes
@ -5317,11 +5317,11 @@ void V_TickForever(WaveLaneCtx *lane)
G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx);
G_Compute2D(cl, V_BackdropUpCS, up_dims); G_Compute2D(cl, V_BackdropUpCS, up_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
} }
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
////////////////////////////// //////////////////////////////
@ -5331,21 +5331,20 @@ void V_TickForever(WaveLaneCtx *lane)
G_ClearRenderTarget(cl, albedo_target, VEC4(0, 0, 0, 0), 0); G_ClearRenderTarget(cl, albedo_target, VEC4(0, 0, 0, 0), 0);
// Draw quads // Draw quads
G_Rasterize( G_Draw(
cl, cl,
V_QuadVS, V_QuadPS, V_QuadVS, V_QuadPS,
G_CountBuffer(quads_buff, V_Quad), G_QuadIndices(), G_CountBuffer(quads_buff, V_Quad), G_QuadIndices(),
1, &G_Rt(albedo_target, G_BlendMode_CompositeStraightAlpha), 1, &G_Rt(albedo_target, G_BlendMode_CompositeStraightAlpha),
screen_viewport, screen_scissor, screen_viewport, screen_scissor,
G_RasterMode_TriangleList G_DrawMode_TriangleList
); );
// Emit particles // Emit particles
G_Compute(cl, V_EmitParticlesCS, frame->emitters_count); G_Compute(cl, V_EmitParticlesCS, frame->emitters_count);
// Sync particles, occluders, & albedo // Sync particles, occluders, & albedo
G_DumbGlobalMemorySync(cl); G_Sync(cl);
G_DumbMemoryLayoutSync(cl, albedo_target, G_Layout_DirectQueue_General);
} }
////////////////////////////// //////////////////////////////
@ -5356,7 +5355,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute(cl, V_SimParticlesCS, V_ParticlesCap); G_Compute(cl, V_SimParticlesCS, V_ParticlesCap);
// Sync cells // Sync cells
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
////////////////////////////// //////////////////////////////
@ -5368,7 +5367,7 @@ void V_TickForever(WaveLaneCtx *lane)
{ {
G_Compute2D(cl, V_ShadeCS, frame->shade_dims); G_Compute2D(cl, V_ShadeCS, frame->shade_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
////////////////////////////// //////////////////////////////
@ -5378,7 +5377,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_Compute2D(cl, V_CompositeCS, frame->screen_dims); G_Compute2D(cl, V_CompositeCS, frame->screen_dims);
// Sync screen tex // Sync screen tex
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
////////////////////////////// //////////////////////////////
@ -5400,7 +5399,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx);
G_Compute2D(cl, V_BloomDownCS, down_dims); G_Compute2D(cl, V_BloomDownCS, down_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
//- Upsample passes //- Upsample passes
@ -5411,7 +5410,7 @@ void V_TickForever(WaveLaneCtx *lane)
G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx); G_SetConstant(cl, V_GpuConst_MipIdx, mip_idx);
G_Compute2D(cl, V_BloomUpCS, up_dims); G_Compute2D(cl, V_BloomUpCS, up_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
} }
@ -5421,7 +5420,7 @@ void V_TickForever(WaveLaneCtx *lane)
{ {
G_Compute2D(cl, V_FinalizeCS, frame->screen_dims); G_Compute2D(cl, V_FinalizeCS, frame->screen_dims);
G_DumbGlobalMemorySync(cl); G_Sync(cl);
} }
////////////////////////////// //////////////////////////////
@ -5429,18 +5428,14 @@ void V_TickForever(WaveLaneCtx *lane)
if (dvert_idxs_ib.count > 0) if (dvert_idxs_ib.count > 0)
{ {
G_DumbMemoryLayoutSync(cl, screen_target, G_Layout_DirectQueue_RenderTarget); G_Draw(
G_Rasterize(
cl, cl,
V_DVertVS, V_DVertPS, V_DVertVS, V_DVertPS,
1, dvert_idxs_ib, 1, dvert_idxs_ib,
1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha), 1, &G_Rt(screen_target, G_BlendMode_CompositeStraightAlpha),
screen_viewport, screen_scissor, screen_viewport, screen_scissor,
G_RasterMode_TriangleList G_DrawMode_TriangleList
); );
G_DumbMemoryLayoutSync(cl, screen_target, G_Layout_DirectQueue_General);
} }
////////////////////////////// //////////////////////////////

View File

@ -5,6 +5,8 @@ void PT_RunForever(WaveLaneCtx *lane)
for (;;) for (;;)
{ {
WND_Frame window_frame = WND_BeginFrame(G_Format_R16G16B16A16_Float, WND_BackbufferSizeMode_MatchWindow); WND_Frame window_frame = WND_BeginFrame(G_Format_R16G16B16A16_Float, WND_BackbufferSizeMode_MatchWindow);
G_ResourceHandle backbuffer = window_frame.backbuffer;
for (u64 cev_idx = 0; cev_idx < window_frame.controller_events.count; ++cev_idx) for (u64 cev_idx = 0; cev_idx < window_frame.controller_events.count; ++cev_idx)
{ {
ControllerEvent *cev = &window_frame.controller_events.events[cev_idx]; ControllerEvent *cev = &window_frame.controller_events.events[cev_idx];
@ -14,65 +16,57 @@ void PT_RunForever(WaveLaneCtx *lane)
} }
} }
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct);
{ {
G_SyncLayout(cl, backbuffer, G_Layout_Exclusive);
Vec2I32 final_target_size = window_frame.draw_size;
G_ResourceHandle final_target_res = G_PushTexture2D(
gpu_frame_arena, cl,
G_Format_R16G16B16A16_Float,
final_target_size,
G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowShaderReadWrite
);
G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res);
// Prep test pass
{ {
G_CommandListHandle cl = G_PrepareCommandList(G_QueueKind_Direct); G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target);
{ G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
Vec2I32 final_target_size = window_frame.draw_size; G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp));
G_ResourceHandle final_target_res = G_PushTexture2D( G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target);
gpu_frame_arena, cl, G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture());
G_Format_R16G16B16A16_Float,
final_target_size,
G_Layout_DirectQueue_General,
.flags = G_ResourceFlag_AllowShaderReadWrite
);
G_Texture2DRef final_target = G_PushTexture2DRef(gpu_frame_arena, final_target_res);
// Prep test pass
{
G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target);
G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp));
G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target);
G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture());
}
// Test pass
{
G_Compute2D(cl, PT_TestCS, final_target_size);
}
G_DumbMemorySync(cl, final_target_res);
// Prep blit pass
{
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_DirectQueue_RenderTarget);
}
// Blit pass
{
G_Rasterize(
cl,
PT_BlitVS, PT_BlitPS,
1, G_QuadIndices(),
1, &G_Rt(window_frame.backbuffer, G_BlendMode_CompositeStraightAlpha),
G_ViewportFromTexture(window_frame.backbuffer), G_ScissorFromTexture(window_frame.backbuffer),
G_RasterMode_TriangleList
);
}
// Finalize backbuffer layout
{
G_DumbMemoryLayoutSync(cl, window_frame.backbuffer, G_Layout_Common);
}
// Reset
{
G_ResetArena(cl, gpu_frame_arena);
}
}
G_CommitCommandList(cl);
} }
// Test pass
{
G_Compute2D(cl, PT_TestCS, final_target_size);
G_Sync(cl);
}
// Blit pass
{
G_Draw(
cl,
PT_BlitVS, PT_BlitPS,
1, G_QuadIndices(),
1, &G_Rt(backbuffer, G_BlendMode_CompositeStraightAlpha),
G_ViewportFromTexture(backbuffer), G_ScissorFromTexture(backbuffer),
G_DrawMode_TriangleList
);
}
// Reset
{
G_ResetArena(cl, gpu_frame_arena);
}
G_SyncLayout(cl, backbuffer, G_Layout_Common);
} }
G_CommitCommandList(cl);
WND_EndFrame(window_frame, 1); WND_EndFrame(window_frame, 1);
} }
} }

View File

@ -1708,7 +1708,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
UI.gpu_frame_arena, UI.cl, UI.gpu_frame_arena, UI.cl,
G_Format_R16G16B16A16_Float, G_Format_R16G16B16A16_Float,
monitor_size, monitor_size,
G_Layout_DirectQueue_RenderTarget, G_Layout_Exclusive,
.flags = G_ResourceFlag_AllowRenderTarget, .flags = G_ResourceFlag_AllowRenderTarget,
.name = Lit("UI draw target") .name = Lit("UI draw target")
); );
@ -1740,11 +1740,11 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
); );
G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(UI.gpu_frame_arena, params_buff, UI_GpuParams); G_StructuredBufferRef params_ro = G_PushStructuredBufferRef(UI.gpu_frame_arena, params_buff, UI_GpuParams);
// Initial constants // Init constants
G_SetConstant(UI.cl, UI_GpuConst_Params, params_ro); G_SetConstant(UI.cl, UI_GpuConst_Params, params_ro);
// Sync // Sync
G_DumbGlobalMemorySync(UI.cl); G_Sync(UI.cl);
////////////////////////////// //////////////////////////////
//- Dispatch shaders //- Dispatch shaders
@ -1760,47 +1760,44 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync)
if (rects_count > 0) if (rects_count > 0)
{ {
// Render rects // Render rects
G_Rasterize( G_Draw(
UI.cl, UI.cl,
UI_DRectVS, UI_DRectPS, UI_DRectVS, UI_DRectPS,
rects_count, G_QuadIndices(), rects_count, G_QuadIndices(),
1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha), 1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha),
draw_viewport, draw_scissor, draw_viewport, draw_scissor,
G_RasterMode_TriangleList G_DrawMode_TriangleList
); );
// Render rect wireframes // Render rect wireframes
if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug)) if (AnyBit(frame->frame_flags, UI_FrameFlag_Debug))
{ {
G_SetConstant(UI.cl, UI_GpuConst_DebugDraw, 1); G_SetConstant(UI.cl, UI_GpuConst_DebugDraw, 1);
G_Rasterize( G_Draw(
UI.cl, UI.cl,
UI_DRectVS, UI_DRectPS, UI_DRectVS, UI_DRectPS,
rects_count, G_QuadIndices(), rects_count, G_QuadIndices(),
1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha), 1, &G_Rt(draw_target, G_BlendMode_CompositePremultipliedAlpha),
draw_viewport, draw_scissor, draw_viewport, draw_scissor,
G_RasterMode_WireTriangleList G_DrawMode_WireTriangleList
); );
} }
} }
//- Backbuffer blit pass //- Backbuffer blit pass
G_DumbMemoryLayoutSync(UI.cl, draw_target, G_Layout_DirectQueue_Read); G_SyncLayout(UI.cl, backbuffer, G_Layout_Exclusive);
G_DumbMemoryLayoutSync(UI.cl, backbuffer, G_Layout_DirectQueue_RenderTarget);
{ {
G_Rasterize( G_Draw(
UI.cl, UI.cl,
UI_BlitVS, UI_BlitPS, UI_BlitVS, UI_BlitPS,
1, G_QuadIndices(), 1, G_QuadIndices(),
1, &G_Rt(backbuffer, G_BlendMode_Opaque), 1, &G_Rt(backbuffer, G_BlendMode_Opaque),
monitor_viewport, monitor_scissor, monitor_viewport, monitor_scissor,
G_RasterMode_TriangleList G_DrawMode_TriangleList
); );
} }
G_SyncLayout(UI.cl, backbuffer, G_Layout_Common);
G_DumbMemoryLayoutSync(UI.cl, backbuffer, G_Layout_Common);
} }
////////////////////////////// //////////////////////////////