assume non-uniform resource access in shaders by default

This commit is contained in:
jacob 2025-12-02 16:19:10 -06:00
parent 8fbcb004fb
commit 98d849c3de
11 changed files with 56 additions and 62 deletions

View File

@ -24,27 +24,18 @@ typedef float4x4 Mat4x4;
////////////////////////////////////////////////////////////
//~ Handle dereference
//- Uniform resource access
template<typename T> StructuredBuffer<T> StructuredBufferFromUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture1D<T> Texture1DFromUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture1D<T> RWTexture1DFromUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture2D<T> Texture2DFromUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture2D<T> RWTexture2DFromUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> Texture3D<T> Texture3DFromUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
template<typename T> RWTexture3D<T> RWTexture3DFromUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
SamplerState SamplerStateFromUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[h.v]; }
/* NOTE: Non-uniform resource access assumed as the default behavior */
/* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */
//- Non-uniform resource access
template<typename T> StructuredBuffer<T> StructuredBufferFromNonUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromNonUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture1D<T> Texture1DFromNonUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture1D<T> RWTexture1DFromNonUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture2D<T> Texture2DFromNonUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture2D<T> RWTexture2DFromNonUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture3D<T> Texture3DFromNonUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture3D<T> RWTexture3DFromNonUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
SamplerState SamplerStateFromNonUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> StructuredBuffer<T> StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture1D<T> Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture1D<T> RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture2D<T> Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture2D<T> RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> Texture3D<T> Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
template<typename T> RWTexture3D<T> RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
////////////////////////////////////////////////////////////
//~ Texture dimension helpers

View File

@ -284,6 +284,7 @@ f32 SrgbFromLinearF32(f32 lin);
f32 LinearFromSrgbF32(f32 srgb);
Vec4 LinearFromSrgb(Vec4 srgb);
Vec4 SrgbFromLinear(Vec4 lin);
u32 LinearU32FromSrgb(Vec4 srgb);
Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
////////////////////////////////////////////////////////////

View File

@ -60,7 +60,7 @@ Inline u64 HashFnv64(u64 seed, String s)
}
#define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd)
u64 HashF_(String fmt, ...)
Inline u64 HashF_(String fmt, ...)
{
TempArena scratch = BeginScratchNoConflict();
u64 result = 0;

View File

@ -614,6 +614,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
.access_next = _access_next, \
})
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.is_global = 1, \
.sync_prev = _sync_prev, \
.sync_next = _sync_next, \
.access_prev = _access_prev, \
.access_next = _access_next, \
})
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.resource = (_resource), \
@ -624,24 +633,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
.layout = _layout, \
})
#define GPU_GlobalBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.is_global = 1, \
.sync_prev = _sync_prev, \
.sync_next = _sync_next, \
.access_prev = _access_prev, \
.access_next = _access_next, \
})
#define GPU_DumbMemoryBarrier(_cl, _resource) \
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
#define GPU_DumbGlobalMemoryBarrier(_cl) \
GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
#define GPU_DumbGlobalBarrier(_cl) \
GPU_GlobalBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
//- Compute
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);

View File

@ -419,7 +419,7 @@ JobImpl(GPU_D12_LoadPipeline, sig, _)
/* Create PSO */
ID3D12PipelineState *pso = 0;
if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource)))
if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource)))
{
D3D12_RASTERIZER_DESC raster_desc = ZI;
if (desc.is_wireframe)
@ -736,19 +736,19 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl)
////////////////////////////////////////////////////////////
//~ @hookimpl Fence hooks
Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
{
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
return &queue->sync_fence;
}
// Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
// {
// GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
// return &queue->sync_fence;
// }
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
{
GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
ID3D12Fence *b_fence = queue_b->commit_fence;
ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
}
// void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
// {
// GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
// GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
// ID3D12Fence *b_fence = queue_b->commit_fence;
// ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
// }
////////////////////////////////////////////////////////////
//~ @hookimpl Resource hooks
@ -2304,13 +2304,6 @@ GPU_Stats GPU_QueryStats(void)
return result;
}
GPU_Stats GPU_QuerySharedMemoryStats(void)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_Stats result = ZI;
return result;
}
////////////////////////////////////////////////////////////
//~ @hookimpl Swapchain

View File

@ -339,7 +339,7 @@ Struct(GPU_D12_SharedState)
//~ Helpers
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle);
GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);

View File

@ -989,6 +989,9 @@ JobImpl(Build, _, __)
/* Disable warnings */
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-variable"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-parameter"));
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-incompatible-function-pointer-types"));
}
//- Dxc

View File

@ -43,7 +43,6 @@ JobImpl(PR_RunForever, _sig, _id)
/* Prep test pass */
{
final_target_rwhandle.v = 12;
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle);
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123);
}
@ -89,6 +88,7 @@ JobImpl(PR_RunForever, _sig, _id)
}
}
void PR_Startup(void);
void PR_Startup(void)
{
RunJob(PR_RunForever);

View File

@ -1,15 +1,20 @@
////////////////////////////////////////////////////////////
//~ Test shader
Struct(TestStruct)
{
i32 i;
};
ComputeShader2D(PR_TestCS, 8, 8)
{
RWTexture2D<Vec4> target_tex = RWTexture2DFromUniformHandle<Vec4>(PR_ShaderConst_TestTarget);
StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff);
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget);
Vec2U32 target_tex_size = Count2D(target_tex);
f32 testf = PR_ShaderConst_TestConst;
Vec2I32 id = SV_DispatchThreadID;
if ((id.x < target_tex_size.x && id.y < target_tex_size.y) || testf < 3)
if (id.x < target_tex_size.x && id.y < target_tex_size.y)
{
target_tex[id] = Vec4(0, 1, 0, 1);
}

View File

@ -1,5 +1,6 @@
////////////////////////////////////////////////////////////
//~ Constants
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
ShaderConstant(f32, PR_ShaderConst_TestConst, 1);
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1);
ShaderConstant(f32, PR_ShaderConst_TestConst, 2);

View File

@ -201,7 +201,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l
//- Keyboard button
case WM_SYSKEYUP:
case WM_SYSKEYDOWN:;
case WM_SYSKEYDOWN:
case WM_KEYUP:
case WM_KEYDOWN:
{