move UI rect fetch to vertex shader

This commit is contained in:
jacob 2026-03-03 00:33:54 -06:00
parent 91c7ef684e
commit f52d07d3bc
6 changed files with 37 additions and 39 deletions

View File

@ -83,31 +83,33 @@ Enum(G_BasicSamplerKind)
//~ Resource dereference
#if IsGpu
//- Scalar/Uniform dereference
SamplerState G_SDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[idx]; }
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
// NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware
//- Vector/Non-Uniform dereference (slower on AMD)
SamplerState G_VDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
//- Scalar/Uniform dereference
SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; }
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; }
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; }
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
//- Vector/Non-Uniform dereference
SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
#endif
////////////////////////////////////////////////////////////

View File

@ -563,6 +563,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
// PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-inline-asm")); // Disables false-positive "Gradient operations are not affected by wave-sensitive data or control flow."
}
}

View File

@ -192,7 +192,7 @@ ImplComputeShader2D(V_BackdropDownCS)
{
bd_up = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx - 1]);
}
RWTexture2D<Vec4> bd_down = G_VDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
RWTexture2D<Vec4> bd_down = G_SDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
Vec2 down_dims = countof(bd_down);
@ -316,9 +316,9 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
V_QuadPSInput result;
result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1);
result.quad_idx = SV_InstanceID;
result.world_pos = world_pos;
result.samp_uv = samp_uv;
result.quad = quad;
return result;
}
@ -328,11 +328,10 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_SDeref<V_Quad>(frame.quads);
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders);
V_Quad quad = quads[input.quad_idx];
V_Quad quad = input.quad;
Texture2D<Vec4> tex = G_VDeref<Vec4>(quad.tex);
Vec2 world_pos = input.world_pos;
@ -1173,7 +1172,7 @@ ImplComputeShader2D(V_BloomDownCS)
}
}
if (IsInside(bloom_pos, down_dims))
if (all(bloom_pos < down_dims))
{
bloom_down[bloom_pos] = result;
}

View File

@ -4,9 +4,9 @@
Struct(V_QuadPSInput)
{
Semantic(Vec4, sv_position);
Semantic(nointerpolation u32, quad_idx);
Semantic(Vec2, world_pos);
Semantic(Vec2, samp_uv);
Semantic(nointerpolation V_Quad, quad);
};
Struct(V_QuadPSOutput)

View File

@ -8,8 +8,8 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
{
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
UI_GpuRect rect = rects[SV_InstanceID];
UI_GpuRect rect = rects[SV_InstanceID];
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
Vec2 tex_uv = lerp(rect.tex_slice_uv.p0, rect.tex_slice_uv.p1, rect_uv);
Vec2 target_pos = lerp(rect.bounds.p0, rect.bounds.p1, rect_uv);
@ -17,15 +17,13 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
UI_DRectPSInput result;
{
result.sv_position = Vec4(NdcFromPos(target_pos, Vec2(params.target_size).xy), 0, 1);
result.rect_idx = SV_InstanceID;
result.base_background_premul = Premul(rect.background_lin);
result.base_border_premul = Premul(rect.border_lin);
result.tint_premul = Premul(rect.tint_lin);
result.debug_premul = Premul(rect.debug_lin);
result.rect_uv = rect_uv;
result.tex_uv = tex_uv;
result.rect = rect;
}
return result;
}
@ -36,13 +34,11 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
SamplerState sampler = G_SDeref(params.sampler);
UI_GpuRect rect = rects[input.rect_idx];
Vec2 p = input.sv_position.xy;
UI_GpuRect rect = input.rect;
Vec2 rect_uv = input.rect_uv;
Vec2 p = input.sv_position.xy;
Vec2 p0 = rect.bounds.p0;
Vec2 p1 = rect.bounds.p1;

View File

@ -4,13 +4,13 @@
Struct(UI_DRectPSInput)
{
Semantic(Vec4, sv_position);
Semantic(nointerpolation u32, rect_idx);
Semantic(Vec4, base_background_premul);
Semantic(Vec4, base_border_premul);
Semantic(Vec4, tint_premul);
Semantic(Vec4, debug_premul);
Semantic(Vec2, rect_uv);
Semantic(Vec2, tex_uv);
nointerpolation Semantic(UI_GpuRect, rect);
};
Struct(UI_DRectPSOutput)