From 98d849c3de26b2a5b2f292d4feab52e82ac79d88 Mon Sep 17 00:00:00 2001 From: jacob Date: Tue, 2 Dec 2025 16:19:10 -0600 Subject: [PATCH] assume non-uniform resource access in shaders by default --- src/base/base_gpu.h | 31 +++++++++--------------- src/base/base_math.h | 1 + src/base/base_util.h | 2 +- src/gpu/gpu_core.h | 24 +++++++++---------- src/gpu/gpu_dx12/gpu_dx12.c | 33 ++++++++++---------------- src/gpu/gpu_dx12/gpu_dx12.h | 2 +- src/meta/meta.c | 3 +++ src/proto/proto.c | 2 +- src/proto/proto_shaders.gpu | 13 ++++++---- src/proto/proto_shaders.h | 5 ++-- src/window/window_win32/window_win32.c | 2 +- 11 files changed, 56 insertions(+), 62 deletions(-) diff --git a/src/base/base_gpu.h b/src/base/base_gpu.h index 262723ed..bdc81ec3 100644 --- a/src/base/base_gpu.h +++ b/src/base/base_gpu.h @@ -24,27 +24,18 @@ typedef float4x4 Mat4x4; //////////////////////////////////////////////////////////// //~ Handle dereference -//- Uniform resource access -template StructuredBuffer StructuredBufferFromUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; } -template RWStructuredBuffer RWStructuredBufferFromUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; } -template Texture1D Texture1DFromUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[h.v]; } -template RWTexture1D RWTexture1DFromUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[h.v]; } -template Texture2D Texture2DFromUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[h.v]; } -template RWTexture2D RWTexture2DFromUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[h.v]; } -template Texture3D Texture3DFromUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[h.v]; } -template RWTexture3D RWTexture3DFromUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[h.v]; } -SamplerState SamplerStateFromUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[h.v]; } +/* NOTE: Non-uniform resource access assumed as the default behavior */ +/* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */ -//- Non-uniform resource access -template StructuredBuffer StructuredBufferFromNonUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWStructuredBuffer RWStructuredBufferFromNonUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture1D Texture1DFromNonUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture1D RWTexture1DFromNonUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture2D Texture2DFromNonUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture2D RWTexture2DFromNonUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture3D Texture3DFromNonUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture3D RWTexture3DFromNonUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -SamplerState SamplerStateFromNonUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template StructuredBuffer StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWStructuredBuffer RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template Texture1D Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture1D RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template Texture2D Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture2D RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template Texture3D Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture3D RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; } //////////////////////////////////////////////////////////// //~ Texture dimension helpers diff --git a/src/base/base_math.h b/src/base/base_math.h index db7e106f..b7e3ffc2 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -284,6 +284,7 @@ f32 SrgbFromLinearF32(f32 lin); f32 LinearFromSrgbF32(f32 srgb); Vec4 LinearFromSrgb(Vec4 srgb); Vec4 SrgbFromLinear(Vec4 lin); +u32 LinearU32FromSrgb(Vec4 srgb); Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t); //////////////////////////////////////////////////////////// diff --git a/src/base/base_util.h b/src/base/base_util.h index dd11a0e6..0750ab46 100644 --- a/src/base/base_util.h +++ b/src/base/base_util.h @@ -60,7 +60,7 @@ Inline u64 HashFnv64(u64 seed, String s) } #define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd) -u64 HashF_(String fmt, ...) +Inline u64 HashF_(String fmt, ...) { TempArena scratch = BeginScratchNoConflict(); u64 result = 0; diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index db5464e4..2c5cc2e8 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -614,6 +614,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); .access_next = _access_next, \ }) +#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ + GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ + .is_global = 1, \ + .sync_prev = _sync_prev, \ + .sync_next = _sync_next, \ + .access_prev = _access_prev, \ + .access_next = _access_next, \ + }) + #define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ .resource = (_resource), \ @@ -624,24 +633,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); .layout = _layout, \ }) -#define GPU_GlobalBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ - GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ - .is_global = 1, \ - .sync_prev = _sync_prev, \ - .sync_next = _sync_next, \ - .access_prev = _access_prev, \ - .access_next = _access_next, \ - }) - #define GPU_DumbMemoryBarrier(_cl, _resource) \ GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) +#define GPU_DumbGlobalMemoryBarrier(_cl) \ + GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) + #define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \ GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout)) -#define GPU_DumbGlobalBarrier(_cl) \ - GPU_GlobalBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) - //- Compute void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 60914303..fc711b2c 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -419,7 +419,7 @@ JobImpl(GPU_D12_LoadPipeline, sig, _) /* Create PSO */ ID3D12PipelineState *pso = 0; - if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource))) + if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource))) { D3D12_RASTERIZER_DESC raster_desc = ZI; if (desc.is_wireframe) @@ -736,19 +736,19 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) //////////////////////////////////////////////////////////// //~ @hookimpl Fence hooks -Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) -{ - GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); - return &queue->sync_fence; -} +// Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) +// { +// GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); +// return &queue->sync_fence; +// } -void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) -{ - GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); - GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); - ID3D12Fence *b_fence = queue_b->commit_fence; - ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); -} +// void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) +// { +// GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); +// GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); +// ID3D12Fence *b_fence = queue_b->commit_fence; +// ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); +// } //////////////////////////////////////////////////////////// //~ @hookimpl Resource hooks @@ -2304,13 +2304,6 @@ GPU_Stats GPU_QueryStats(void) return result; } -GPU_Stats GPU_QuerySharedMemoryStats(void) -{ - GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_Stats result = ZI; - return result; -} - //////////////////////////////////////////////////////////// //~ @hookimpl Swapchain diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index c4621c36..8f1efa99 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -339,7 +339,7 @@ Struct(GPU_D12_SharedState) //~ Helpers GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); -GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle); +GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); diff --git a/src/meta/meta.c b/src/meta/meta.c index 69af812c..b682e1c7 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -989,6 +989,9 @@ JobImpl(Build, _, __) /* Disable warnings */ PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides")); PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference")); + PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-variable")); + PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-parameter")); + PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-incompatible-function-pointer-types")); } //- Dxc diff --git a/src/proto/proto.c b/src/proto/proto.c index 4e4e1638..f92b9a75 100644 --- a/src/proto/proto.c +++ b/src/proto/proto.c @@ -43,7 +43,6 @@ JobImpl(PR_RunForever, _sig, _id) /* Prep test pass */ { - final_target_rwhandle.v = 12; GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle); GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123); } @@ -89,6 +88,7 @@ JobImpl(PR_RunForever, _sig, _id) } } +void PR_Startup(void); void PR_Startup(void) { RunJob(PR_RunForever); diff --git a/src/proto/proto_shaders.gpu b/src/proto/proto_shaders.gpu index 60b367fd..c28e9a8e 100644 --- a/src/proto/proto_shaders.gpu +++ b/src/proto/proto_shaders.gpu @@ -1,15 +1,20 @@ //////////////////////////////////////////////////////////// //~ Test shader +Struct(TestStruct) +{ + i32 i; +}; + ComputeShader2D(PR_TestCS, 8, 8) { - RWTexture2D target_tex = RWTexture2DFromUniformHandle(PR_ShaderConst_TestTarget); + StructuredBuffer sb = StructuredBufferFromHandle(PR_ShaderConst_TestBuff); + + RWTexture2D target_tex = RWTexture2DFromHandle(PR_ShaderConst_TestTarget); Vec2U32 target_tex_size = Count2D(target_tex); - f32 testf = PR_ShaderConst_TestConst; - Vec2I32 id = SV_DispatchThreadID; - if ((id.x < target_tex_size.x && id.y < target_tex_size.y) || testf < 3) + if (id.x < target_tex_size.x && id.y < target_tex_size.y) { target_tex[id] = Vec4(0, 1, 0, 1); } diff --git a/src/proto/proto_shaders.h b/src/proto/proto_shaders.h index 56068190..35c5f9fe 100644 --- a/src/proto/proto_shaders.h +++ b/src/proto/proto_shaders.h @@ -1,5 +1,6 @@ //////////////////////////////////////////////////////////// //~ Constants -ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0); -ShaderConstant(f32, PR_ShaderConst_TestConst, 1); +ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0); +ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1); +ShaderConstant(f32, PR_ShaderConst_TestConst, 2); diff --git a/src/window/window_win32/window_win32.c b/src/window/window_win32/window_win32.c index e5a60024..c5961c09 100644 --- a/src/window/window_win32/window_win32.c +++ b/src/window/window_win32/window_win32.c @@ -201,7 +201,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l //- Keyboard button case WM_SYSKEYUP: - case WM_SYSKEYDOWN:; + case WM_SYSKEYDOWN: case WM_KEYUP: case WM_KEYDOWN: {