move UI rect fetch to vertex shader

This commit is contained in:
jacob 2026-03-03 00:33:54 -06:00
parent 91c7ef684e
commit f52d07d3bc
6 changed files with 37 additions and 39 deletions

View File

@ -83,31 +83,33 @@ Enum(G_BasicSamplerKind)
//~ Resource dereference //~ Resource dereference
#if IsGpu #if IsGpu
//- Scalar/Uniform dereference // NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware
SamplerState G_SDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[idx]; }
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
//- Vector/Non-Uniform dereference (slower on AMD) //- Scalar/Uniform dereference
SamplerState G_VDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[NonUniformResourceIndex(idx)]; } SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; }
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; }
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; } template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
//- Vector/Non-Uniform dereference
SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -563,6 +563,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch")); PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
// PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-inline-asm")); // Disables false-positive "Gradient operations are not affected by wave-sensitive data or control flow."
} }
} }

View File

@ -192,7 +192,7 @@ ImplComputeShader2D(V_BackdropDownCS)
{ {
bd_up = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx - 1]); bd_up = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx - 1]);
} }
RWTexture2D<Vec4> bd_down = G_VDerefRW<Vec4>(frame.backdrop_mips[mip_idx]); RWTexture2D<Vec4> bd_down = G_SDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
Vec2 down_dims = countof(bd_down); Vec2 down_dims = countof(bd_down);
@ -316,9 +316,9 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
V_QuadPSInput result; V_QuadPSInput result;
result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1); result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1);
result.quad_idx = SV_InstanceID;
result.world_pos = world_pos; result.world_pos = world_pos;
result.samp_uv = samp_uv; result.samp_uv = samp_uv;
result.quad = quad;
return result; return result;
} }
@ -328,11 +328,10 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{ {
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_SDeref<V_Quad>(frame.quads);
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders); RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders);
V_Quad quad = quads[input.quad_idx]; V_Quad quad = input.quad;
Texture2D<Vec4> tex = G_VDeref<Vec4>(quad.tex); Texture2D<Vec4> tex = G_VDeref<Vec4>(quad.tex);
Vec2 world_pos = input.world_pos; Vec2 world_pos = input.world_pos;
@ -1173,7 +1172,7 @@ ImplComputeShader2D(V_BloomDownCS)
} }
} }
if (IsInside(bloom_pos, down_dims)) if (all(bloom_pos < down_dims))
{ {
bloom_down[bloom_pos] = result; bloom_down[bloom_pos] = result;
} }

View File

@ -4,9 +4,9 @@
Struct(V_QuadPSInput) Struct(V_QuadPSInput)
{ {
Semantic(Vec4, sv_position); Semantic(Vec4, sv_position);
Semantic(nointerpolation u32, quad_idx);
Semantic(Vec2, world_pos); Semantic(Vec2, world_pos);
Semantic(Vec2, samp_uv); Semantic(Vec2, samp_uv);
Semantic(nointerpolation V_Quad, quad);
}; };
Struct(V_QuadPSOutput) Struct(V_QuadPSOutput)

View File

@ -8,8 +8,8 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
{ {
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects); StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
UI_GpuRect rect = rects[SV_InstanceID];
UI_GpuRect rect = rects[SV_InstanceID];
Vec2 rect_uv = RectUvFromIdx(SV_VertexID); Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
Vec2 tex_uv = lerp(rect.tex_slice_uv.p0, rect.tex_slice_uv.p1, rect_uv); Vec2 tex_uv = lerp(rect.tex_slice_uv.p0, rect.tex_slice_uv.p1, rect_uv);
Vec2 target_pos = lerp(rect.bounds.p0, rect.bounds.p1, rect_uv); Vec2 target_pos = lerp(rect.bounds.p0, rect.bounds.p1, rect_uv);
@ -17,15 +17,13 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
UI_DRectPSInput result; UI_DRectPSInput result;
{ {
result.sv_position = Vec4(NdcFromPos(target_pos, Vec2(params.target_size).xy), 0, 1); result.sv_position = Vec4(NdcFromPos(target_pos, Vec2(params.target_size).xy), 0, 1);
result.rect_idx = SV_InstanceID;
result.base_background_premul = Premul(rect.background_lin); result.base_background_premul = Premul(rect.background_lin);
result.base_border_premul = Premul(rect.border_lin); result.base_border_premul = Premul(rect.border_lin);
result.tint_premul = Premul(rect.tint_lin); result.tint_premul = Premul(rect.tint_lin);
result.debug_premul = Premul(rect.debug_lin); result.debug_premul = Premul(rect.debug_lin);
result.rect_uv = rect_uv; result.rect_uv = rect_uv;
result.tex_uv = tex_uv; result.tex_uv = tex_uv;
result.rect = rect;
} }
return result; return result;
} }
@ -36,13 +34,11 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{ {
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
SamplerState sampler = G_SDeref(params.sampler); SamplerState sampler = G_SDeref(params.sampler);
UI_GpuRect rect = rects[input.rect_idx]; UI_GpuRect rect = input.rect;
Vec2 p = input.sv_position.xy;
Vec2 rect_uv = input.rect_uv; Vec2 rect_uv = input.rect_uv;
Vec2 p = input.sv_position.xy;
Vec2 p0 = rect.bounds.p0; Vec2 p0 = rect.bounds.p0;
Vec2 p1 = rect.bounds.p1; Vec2 p1 = rect.bounds.p1;

View File

@ -4,13 +4,13 @@
Struct(UI_DRectPSInput) Struct(UI_DRectPSInput)
{ {
Semantic(Vec4, sv_position); Semantic(Vec4, sv_position);
Semantic(nointerpolation u32, rect_idx);
Semantic(Vec4, base_background_premul); Semantic(Vec4, base_background_premul);
Semantic(Vec4, base_border_premul); Semantic(Vec4, base_border_premul);
Semantic(Vec4, tint_premul); Semantic(Vec4, tint_premul);
Semantic(Vec4, debug_premul); Semantic(Vec4, debug_premul);
Semantic(Vec2, rect_uv); Semantic(Vec2, rect_uv);
Semantic(Vec2, tex_uv); Semantic(Vec2, tex_uv);
nointerpolation Semantic(UI_GpuRect, rect);
}; };
Struct(UI_DRectPSOutput) Struct(UI_DRectPSOutput)