assume non-uniform resource access in shaders by default

This commit is contained in:
jacob 2025-12-02 16:19:10 -06:00
parent 8fbcb004fb
commit 98d849c3de
11 changed files with 56 additions and 62 deletions

View File

@ -24,27 +24,18 @@ typedef float4x4 Mat4x4;
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Handle dereference //~ Handle dereference
//- Uniform resource access /* NOTE: Non-uniform resource access assumed as the default behavior */
template<typename T> StructuredBuffer<T> StructuredBufferFromUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; } /* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture1D<T> Texture1DFromUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture1D<T> RWTexture1DFromUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture2D<T> Texture2DFromUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture2D<T> RWTexture2DFromUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture3D<T> Texture3DFromUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture3D<T> RWTexture3DFromUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
SamplerState SamplerStateFromUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[h.v]; }
//- Non-uniform resource access template<typename T> StructuredBuffer<T> StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> StructuredBuffer<T> StructuredBufferFromNonUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromNonUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> Texture1D<T> Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture1D<T> Texture1DFromNonUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> RWTexture1D<T> RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture1D<T> RWTexture1DFromNonUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> Texture2D<T> Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture2D<T> Texture2DFromNonUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> RWTexture2D<T> RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture2D<T> RWTexture2DFromNonUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> Texture3D<T> Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture3D<T> Texture3DFromNonUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template<typename T> RWTexture3D<T> RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture3D<T> RWTexture3DFromNonUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
SamplerState SamplerStateFromNonUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Texture dimension helpers //~ Texture dimension helpers

View File

@ -284,6 +284,7 @@ f32 SrgbFromLinearF32(f32 lin);
f32 LinearFromSrgbF32(f32 srgb); f32 LinearFromSrgbF32(f32 srgb);
Vec4 LinearFromSrgb(Vec4 srgb); Vec4 LinearFromSrgb(Vec4 srgb);
Vec4 SrgbFromLinear(Vec4 lin); Vec4 SrgbFromLinear(Vec4 lin);
u32 LinearU32FromSrgb(Vec4 srgb);
Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t); Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -60,7 +60,7 @@ Inline u64 HashFnv64(u64 seed, String s)
} }
#define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd) #define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd)
u64 HashF_(String fmt, ...) Inline u64 HashF_(String fmt, ...)
{ {
TempArena scratch = BeginScratchNoConflict(); TempArena scratch = BeginScratchNoConflict();
u64 result = 0; u64 result = 0;

View File

@ -614,6 +614,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
.access_next = _access_next, \ .access_next = _access_next, \
}) })
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.is_global = 1, \
.sync_prev = _sync_prev, \
.sync_next = _sync_next, \
.access_prev = _access_prev, \
.access_next = _access_next, \
})
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ #define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.resource = (_resource), \ .resource = (_resource), \
@ -624,24 +633,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
.layout = _layout, \ .layout = _layout, \
}) })
#define GPU_GlobalBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.is_global = 1, \
.sync_prev = _sync_prev, \
.sync_next = _sync_next, \
.access_prev = _access_prev, \
.access_next = _access_next, \
})
#define GPU_DumbMemoryBarrier(_cl, _resource) \ #define GPU_DumbMemoryBarrier(_cl, _resource) \
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
#define GPU_DumbGlobalMemoryBarrier(_cl) \
GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \ #define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout)) GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
#define GPU_DumbGlobalBarrier(_cl) \
GPU_GlobalBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
//- Compute //- Compute
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups); void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);

View File

@ -419,7 +419,7 @@ JobImpl(GPU_D12_LoadPipeline, sig, _)
/* Create PSO */ /* Create PSO */
ID3D12PipelineState *pso = 0; ID3D12PipelineState *pso = 0;
if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource))) if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource)))
{ {
D3D12_RASTERIZER_DESC raster_desc = ZI; D3D12_RASTERIZER_DESC raster_desc = ZI;
if (desc.is_wireframe) if (desc.is_wireframe)
@ -736,19 +736,19 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Fence hooks //~ @hookimpl Fence hooks
Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) // Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
{ // {
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); // GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
return &queue->sync_fence; // return &queue->sync_fence;
} // }
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) // void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
{ // {
GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); // GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); // GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
ID3D12Fence *b_fence = queue_b->commit_fence; // ID3D12Fence *b_fence = queue_b->commit_fence;
ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); // ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
} // }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Resource hooks //~ @hookimpl Resource hooks
@ -2304,13 +2304,6 @@ GPU_Stats GPU_QueryStats(void)
return result; return result;
} }
GPU_Stats GPU_QuerySharedMemoryStats(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_Stats result = ZI;
return result;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Swapchain //~ @hookimpl Swapchain

View File

@ -339,7 +339,7 @@ Struct(GPU_D12_SharedState)
//~ Helpers //~ Helpers
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle); GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle); GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);

View File

@ -989,6 +989,9 @@ JobImpl(Build, _, __)
/* Disable warnings */ /* Disable warnings */
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides")); PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference")); PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-variable"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-parameter"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-incompatible-function-pointer-types"));
} }
//- Dxc //- Dxc

View File

@ -43,7 +43,6 @@ JobImpl(PR_RunForever, _sig, _id)
/* Prep test pass */ /* Prep test pass */
{ {
final_target_rwhandle.v = 12;
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle); GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle);
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123); GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123);
} }
@ -89,6 +88,7 @@ JobImpl(PR_RunForever, _sig, _id)
} }
} }
void PR_Startup(void);
void PR_Startup(void) void PR_Startup(void)
{ {
RunJob(PR_RunForever); RunJob(PR_RunForever);

View File

@ -1,15 +1,20 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Test shader //~ Test shader
Struct(TestStruct)
{
i32 i;
};
ComputeShader2D(PR_TestCS, 8, 8) ComputeShader2D(PR_TestCS, 8, 8)
{ {
RWTexture2D<Vec4> target_tex = RWTexture2DFromUniformHandle<Vec4>(PR_ShaderConst_TestTarget); StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff);
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget);
Vec2U32 target_tex_size = Count2D(target_tex); Vec2U32 target_tex_size = Count2D(target_tex);
f32 testf = PR_ShaderConst_TestConst;
Vec2I32 id = SV_DispatchThreadID; Vec2I32 id = SV_DispatchThreadID;
if ((id.x < target_tex_size.x && id.y < target_tex_size.y) || testf < 3) if (id.x < target_tex_size.x && id.y < target_tex_size.y)
{ {
target_tex[id] = Vec4(0, 1, 0, 1); target_tex[id] = Vec4(0, 1, 0, 1);
} }

View File

@ -1,5 +1,6 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Constants //~ Constants
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0); ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
ShaderConstant(f32, PR_ShaderConst_TestConst, 1); ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1);
ShaderConstant(f32, PR_ShaderConst_TestConst, 2);

View File

@ -201,7 +201,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l
//- Keyboard button //- Keyboard button
case WM_SYSKEYUP: case WM_SYSKEYUP:
case WM_SYSKEYDOWN:; case WM_SYSKEYDOWN:
case WM_KEYUP: case WM_KEYUP:
case WM_KEYDOWN: case WM_KEYDOWN:
{ {