move UI rect fetch to vertex shader
This commit is contained in:
parent
91c7ef684e
commit
f52d07d3bc
@ -83,31 +83,33 @@ Enum(G_BasicSamplerKind)
|
||||
//~ Resource dereference
|
||||
|
||||
#if IsGpu
|
||||
//- Scalar/Uniform dereference
|
||||
SamplerState G_SDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[idx]; }
|
||||
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||
// NOTE: Uniform dereferencing is faster than Non-Uniform on AMD hardware
|
||||
|
||||
//- Vector/Non-Uniform dereference (slower on AMD)
|
||||
SamplerState G_VDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||
//- Scalar/Uniform dereference
|
||||
SamplerState G_SDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[r.v]; }
|
||||
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v]; }
|
||||
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v]; }
|
||||
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v]; }
|
||||
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v]; }
|
||||
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
|
||||
//- Vector/Non-Uniform dereference
|
||||
SamplerState G_VDeref(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
|
||||
@ -563,6 +563,7 @@ void M_BuildEntryPoint(WaveLaneCtx *lane)
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-unused-local-typedef"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-conversion"));
|
||||
PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-switch"));
|
||||
// PushStringToList(perm, &cp.warnings_dxc, Lit("-Wno-inline-asm")); // Disables false-positive "Gradient operations are not affected by wave-sensitive data or control flow."
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -192,7 +192,7 @@ ImplComputeShader2D(V_BackdropDownCS)
|
||||
{
|
||||
bd_up = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx - 1]);
|
||||
}
|
||||
RWTexture2D<Vec4> bd_down = G_VDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
||||
RWTexture2D<Vec4> bd_down = G_SDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
||||
|
||||
Vec2 down_dims = countof(bd_down);
|
||||
|
||||
@ -316,9 +316,9 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
|
||||
|
||||
V_QuadPSInput result;
|
||||
result.sv_position = Vec4(NdcFromPos(screen_pos, frame.screen_dims).xy, 0, 1);
|
||||
result.quad_idx = SV_InstanceID;
|
||||
result.world_pos = world_pos;
|
||||
result.samp_uv = samp_uv;
|
||||
result.quad = quad;
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -328,11 +328,10 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
|
||||
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
||||
{
|
||||
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
StructuredBuffer<V_Quad> quads = G_SDeref<V_Quad>(frame.quads);
|
||||
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders);
|
||||
|
||||
V_Quad quad = quads[input.quad_idx];
|
||||
V_Quad quad = input.quad;
|
||||
Texture2D<Vec4> tex = G_VDeref<Vec4>(quad.tex);
|
||||
|
||||
Vec2 world_pos = input.world_pos;
|
||||
@ -1173,7 +1172,7 @@ ImplComputeShader2D(V_BloomDownCS)
|
||||
}
|
||||
}
|
||||
|
||||
if (IsInside(bloom_pos, down_dims))
|
||||
if (all(bloom_pos < down_dims))
|
||||
{
|
||||
bloom_down[bloom_pos] = result;
|
||||
}
|
||||
|
||||
@ -4,9 +4,9 @@
|
||||
Struct(V_QuadPSInput)
|
||||
{
|
||||
Semantic(Vec4, sv_position);
|
||||
Semantic(nointerpolation u32, quad_idx);
|
||||
Semantic(Vec2, world_pos);
|
||||
Semantic(Vec2, samp_uv);
|
||||
Semantic(nointerpolation V_Quad, quad);
|
||||
};
|
||||
|
||||
Struct(V_QuadPSOutput)
|
||||
|
||||
@ -8,8 +8,8 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||
{
|
||||
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
|
||||
UI_GpuRect rect = rects[SV_InstanceID];
|
||||
|
||||
UI_GpuRect rect = rects[SV_InstanceID];
|
||||
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
|
||||
Vec2 tex_uv = lerp(rect.tex_slice_uv.p0, rect.tex_slice_uv.p1, rect_uv);
|
||||
Vec2 target_pos = lerp(rect.bounds.p0, rect.bounds.p1, rect_uv);
|
||||
@ -17,15 +17,13 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||
UI_DRectPSInput result;
|
||||
{
|
||||
result.sv_position = Vec4(NdcFromPos(target_pos, Vec2(params.target_size).xy), 0, 1);
|
||||
result.rect_idx = SV_InstanceID;
|
||||
|
||||
result.base_background_premul = Premul(rect.background_lin);
|
||||
result.base_border_premul = Premul(rect.border_lin);
|
||||
result.tint_premul = Premul(rect.tint_lin);
|
||||
result.debug_premul = Premul(rect.debug_lin);
|
||||
|
||||
result.rect_uv = rect_uv;
|
||||
result.tex_uv = tex_uv;
|
||||
result.rect = rect;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -36,13 +34,11 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
||||
{
|
||||
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
|
||||
SamplerState sampler = G_SDeref(params.sampler);
|
||||
|
||||
UI_GpuRect rect = rects[input.rect_idx];
|
||||
|
||||
Vec2 p = input.sv_position.xy;
|
||||
UI_GpuRect rect = input.rect;
|
||||
Vec2 rect_uv = input.rect_uv;
|
||||
Vec2 p = input.sv_position.xy;
|
||||
Vec2 p0 = rect.bounds.p0;
|
||||
Vec2 p1 = rect.bounds.p1;
|
||||
|
||||
|
||||
@ -4,13 +4,13 @@
|
||||
Struct(UI_DRectPSInput)
|
||||
{
|
||||
Semantic(Vec4, sv_position);
|
||||
Semantic(nointerpolation u32, rect_idx);
|
||||
Semantic(Vec4, base_background_premul);
|
||||
Semantic(Vec4, base_border_premul);
|
||||
Semantic(Vec4, tint_premul);
|
||||
Semantic(Vec4, debug_premul);
|
||||
Semantic(Vec2, rect_uv);
|
||||
Semantic(Vec2, tex_uv);
|
||||
nointerpolation Semantic(UI_GpuRect, rect);
|
||||
};
|
||||
|
||||
Struct(UI_DRectPSOutput)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user