assume non-uniform resource access in shaders by default
This commit is contained in:
parent
8fbcb004fb
commit
98d849c3de
@ -24,27 +24,18 @@ typedef float4x4 Mat4x4;
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Handle dereference
|
//~ Handle dereference
|
||||||
|
|
||||||
//- Uniform resource access
|
/* NOTE: Non-uniform resource access assumed as the default behavior */
|
||||||
template<typename T> StructuredBuffer<T> StructuredBufferFromUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; }
|
/* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */
|
||||||
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> Texture1D<T> Texture1DFromUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> RWTexture1D<T> RWTexture1DFromUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> Texture2D<T> Texture2DFromUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> RWTexture2D<T> RWTexture2DFromUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> Texture3D<T> Texture3DFromUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
template<typename T> RWTexture3D<T> RWTexture3DFromUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[h.v]; }
|
|
||||||
SamplerState SamplerStateFromUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[h.v]; }
|
|
||||||
|
|
||||||
//- Non-uniform resource access
|
template<typename T> StructuredBuffer<T> StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> StructuredBuffer<T> StructuredBufferFromNonUniformHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> RWStructuredBuffer<T> RWStructuredBufferFromNonUniformHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> Texture1D<T> Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> Texture1D<T> Texture1DFromNonUniformHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> RWTexture1D<T> RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> RWTexture1D<T> RWTexture1DFromNonUniformHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> Texture2D<T> Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> Texture2D<T> Texture2DFromNonUniformHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> RWTexture2D<T> RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> RWTexture2D<T> RWTexture2DFromNonUniformHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> Texture3D<T> Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> Texture3D<T> Texture3DFromNonUniformHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
template<typename T> RWTexture3D<T> RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
template<typename T> RWTexture3D<T> RWTexture3DFromNonUniformHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
||||||
SamplerState SamplerStateFromNonUniformHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; }
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Texture dimension helpers
|
//~ Texture dimension helpers
|
||||||
|
|||||||
@ -284,6 +284,7 @@ f32 SrgbFromLinearF32(f32 lin);
|
|||||||
f32 LinearFromSrgbF32(f32 srgb);
|
f32 LinearFromSrgbF32(f32 srgb);
|
||||||
Vec4 LinearFromSrgb(Vec4 srgb);
|
Vec4 LinearFromSrgb(Vec4 srgb);
|
||||||
Vec4 SrgbFromLinear(Vec4 lin);
|
Vec4 SrgbFromLinear(Vec4 lin);
|
||||||
|
u32 LinearU32FromSrgb(Vec4 srgb);
|
||||||
Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
|
Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -60,7 +60,7 @@ Inline u64 HashFnv64(u64 seed, String s)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd)
|
#define HashF(fmt_cstr, ...) HashF_(StringFromCstrNoLimit(fmt_cstr), __VA_ARGS__, FmtEnd)
|
||||||
u64 HashF_(String fmt, ...)
|
Inline u64 HashF_(String fmt, ...)
|
||||||
{
|
{
|
||||||
TempArena scratch = BeginScratchNoConflict();
|
TempArena scratch = BeginScratchNoConflict();
|
||||||
u64 result = 0;
|
u64 result = 0;
|
||||||
|
|||||||
@ -614,6 +614,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
|
|||||||
.access_next = _access_next, \
|
.access_next = _access_next, \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||||
|
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||||
|
.is_global = 1, \
|
||||||
|
.sync_prev = _sync_prev, \
|
||||||
|
.sync_next = _sync_next, \
|
||||||
|
.access_prev = _access_prev, \
|
||||||
|
.access_next = _access_next, \
|
||||||
|
})
|
||||||
|
|
||||||
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
||||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||||
.resource = (_resource), \
|
.resource = (_resource), \
|
||||||
@ -624,24 +633,15 @@ void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
|
|||||||
.layout = _layout, \
|
.layout = _layout, \
|
||||||
})
|
})
|
||||||
|
|
||||||
#define GPU_GlobalBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
|
||||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
|
||||||
.is_global = 1, \
|
|
||||||
.sync_prev = _sync_prev, \
|
|
||||||
.sync_next = _sync_next, \
|
|
||||||
.access_prev = _access_prev, \
|
|
||||||
.access_next = _access_next, \
|
|
||||||
})
|
|
||||||
|
|
||||||
#define GPU_DumbMemoryBarrier(_cl, _resource) \
|
#define GPU_DumbMemoryBarrier(_cl, _resource) \
|
||||||
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||||
|
|
||||||
|
#define GPU_DumbGlobalMemoryBarrier(_cl) \
|
||||||
|
GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||||
|
|
||||||
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
|
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
|
||||||
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
|
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
|
||||||
|
|
||||||
#define GPU_DumbGlobalBarrier(_cl) \
|
|
||||||
GPU_GlobalBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
|
||||||
|
|
||||||
//- Compute
|
//- Compute
|
||||||
|
|
||||||
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
|
void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
|
||||||
|
|||||||
@ -419,7 +419,7 @@ JobImpl(GPU_D12_LoadPipeline, sig, _)
|
|||||||
|
|
||||||
/* Create PSO */
|
/* Create PSO */
|
||||||
ID3D12PipelineState *pso = 0;
|
ID3D12PipelineState *pso = 0;
|
||||||
if (ok && (!IsResourceNil(desc.vs.resource) != 0 || !IsResourceNil(desc.ps.resource)))
|
if (ok && (!IsResourceNil(desc.vs.resource) || !IsResourceNil(desc.ps.resource)))
|
||||||
{
|
{
|
||||||
D3D12_RASTERIZER_DESC raster_desc = ZI;
|
D3D12_RASTERIZER_DESC raster_desc = ZI;
|
||||||
if (desc.is_wireframe)
|
if (desc.is_wireframe)
|
||||||
@ -736,19 +736,19 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Fence hooks
|
//~ @hookimpl Fence hooks
|
||||||
|
|
||||||
Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
|
// Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind)
|
||||||
{
|
// {
|
||||||
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
// GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
||||||
return &queue->sync_fence;
|
// return &queue->sync_fence;
|
||||||
}
|
// }
|
||||||
|
|
||||||
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
|
// void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value)
|
||||||
{
|
// {
|
||||||
GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
|
// GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a);
|
||||||
GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
|
// GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b);
|
||||||
ID3D12Fence *b_fence = queue_b->commit_fence;
|
// ID3D12Fence *b_fence = queue_b->commit_fence;
|
||||||
ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
|
// ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value);
|
||||||
}
|
// }
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Resource hooks
|
//~ @hookimpl Resource hooks
|
||||||
@ -2304,13 +2304,6 @@ GPU_Stats GPU_QueryStats(void)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
GPU_Stats GPU_QuerySharedMemoryStats(void)
|
|
||||||
{
|
|
||||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
|
||||||
GPU_Stats result = ZI;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ @hookimpl Swapchain
|
//~ @hookimpl Swapchain
|
||||||
|
|
||||||
|
|||||||
@ -339,7 +339,7 @@ Struct(GPU_D12_SharedState)
|
|||||||
//~ Helpers
|
//~ Helpers
|
||||||
|
|
||||||
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
|
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
|
||||||
GPU_D12_CmdList *GPU_D12_CommandListFromHandle(GPU_CommandListHandle handle);
|
GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
|
||||||
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
|
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
|
||||||
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);
|
GPU_D12_Swapchain *GPU_D12_SwapchainFromHandle(GPU_SwapchainHandle handle);
|
||||||
|
|
||||||
|
|||||||
@ -989,6 +989,9 @@ JobImpl(Build, _, __)
|
|||||||
/* Disable warnings */
|
/* Disable warnings */
|
||||||
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides"));
|
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-initializer-overrides"));
|
||||||
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference"));
|
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-microsoft-enum-forward-reference"));
|
||||||
|
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-variable"));
|
||||||
|
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-unused-parameter"));
|
||||||
|
PushStringToList(arena, &cp.warnings_clang, Lit("-Wno-incompatible-function-pointer-types"));
|
||||||
}
|
}
|
||||||
|
|
||||||
//- Dxc
|
//- Dxc
|
||||||
|
|||||||
@ -43,7 +43,6 @@ JobImpl(PR_RunForever, _sig, _id)
|
|||||||
|
|
||||||
/* Prep test pass */
|
/* Prep test pass */
|
||||||
{
|
{
|
||||||
final_target_rwhandle.v = 12;
|
|
||||||
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle);
|
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle);
|
||||||
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123);
|
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123);
|
||||||
}
|
}
|
||||||
@ -89,6 +88,7 @@ JobImpl(PR_RunForever, _sig, _id)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PR_Startup(void);
|
||||||
void PR_Startup(void)
|
void PR_Startup(void)
|
||||||
{
|
{
|
||||||
RunJob(PR_RunForever);
|
RunJob(PR_RunForever);
|
||||||
|
|||||||
@ -1,15 +1,20 @@
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Test shader
|
//~ Test shader
|
||||||
|
|
||||||
|
Struct(TestStruct)
|
||||||
|
{
|
||||||
|
i32 i;
|
||||||
|
};
|
||||||
|
|
||||||
ComputeShader2D(PR_TestCS, 8, 8)
|
ComputeShader2D(PR_TestCS, 8, 8)
|
||||||
{
|
{
|
||||||
RWTexture2D<Vec4> target_tex = RWTexture2DFromUniformHandle<Vec4>(PR_ShaderConst_TestTarget);
|
StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff);
|
||||||
|
|
||||||
|
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget);
|
||||||
Vec2U32 target_tex_size = Count2D(target_tex);
|
Vec2U32 target_tex_size = Count2D(target_tex);
|
||||||
|
|
||||||
f32 testf = PR_ShaderConst_TestConst;
|
|
||||||
|
|
||||||
Vec2I32 id = SV_DispatchThreadID;
|
Vec2I32 id = SV_DispatchThreadID;
|
||||||
if ((id.x < target_tex_size.x && id.y < target_tex_size.y) || testf < 3)
|
if (id.x < target_tex_size.x && id.y < target_tex_size.y)
|
||||||
{
|
{
|
||||||
target_tex[id] = Vec4(0, 1, 0, 1);
|
target_tex[id] = Vec4(0, 1, 0, 1);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,6 @@
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Constants
|
//~ Constants
|
||||||
|
|
||||||
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
|
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
|
||||||
ShaderConstant(f32, PR_ShaderConst_TestConst, 1);
|
ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1);
|
||||||
|
ShaderConstant(f32, PR_ShaderConst_TestConst, 2);
|
||||||
|
|||||||
@ -201,7 +201,7 @@ LRESULT CALLBACK WND_W32_WindowProc(HWND hwnd, UINT msg, WPARAM wparam, LPARAM l
|
|||||||
|
|
||||||
//- Keyboard button
|
//- Keyboard button
|
||||||
case WM_SYSKEYUP:
|
case WM_SYSKEYUP:
|
||||||
case WM_SYSKEYDOWN:;
|
case WM_SYSKEYDOWN:
|
||||||
case WM_KEYUP:
|
case WM_KEYUP:
|
||||||
case WM_KEYDOWN:
|
case WM_KEYDOWN:
|
||||||
{
|
{
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user