revert to non-uniform gpu dereference by default for now

This commit is contained in:
jacob 2026-03-13 08:42:03 -05:00
parent 80c43d2ebd
commit 8e0167125f
5 changed files with 109 additions and 121 deletions

View File

@ -95,31 +95,25 @@ Struct(G_IndexBufferDesc)
//~ Resource dereference //~ Resource dereference
#if IsGpu #if IsGpu
//- Scalar/Uniform dereference (faster on AMD hardware) // TODO: Add explicit uniform dereference functions, since on AMD hardware
template<typename T> StructuredBuffer<T> G_UniformDeref(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 0]; } // non-uniform is slower and there are some shader-compilation issues in older
template<typename T> RWStructuredBuffer<T> G_UniformDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 1]; } // driver versions
ByteAddressBuffer G_UniformDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 2]; }
RWByteAddressBuffer G_UniformDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 3]; }
template<typename T> Texture1D<T> G_UniformDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
template<typename T> Texture2D<T> G_UniformDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
template<typename T> Texture3D<T> G_UniformDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
template<typename T> RWTexture1D<T> G_UniformDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
template<typename T> RWTexture2D<T> G_UniformDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
template<typename T> RWTexture3D<T> G_UniformDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
SamplerState G_UniformDeref(G_SamplerRef r) { return SamplerDescriptorHeap[r.v]; }
//- Vector/Non-Uniform dereference template<typename R> struct G_DerefImpl;
template<typename T> StructuredBuffer<T> G_DynamicDeref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } template<> struct G_DerefImpl< SamplerState > { static SamplerState Deref(G_SamplerRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } };
template<typename T> RWStructuredBuffer<T> G_DynamicDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } template<typename T> struct G_DerefImpl< StructuredBuffer<T> > { static StructuredBuffer<T> Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } };
ByteAddressBuffer G_DynamicDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 2)]; } template<typename T> struct G_DerefImpl< RWStructuredBuffer<T> > { static RWStructuredBuffer<T> Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } };
RWByteAddressBuffer G_DynamicDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } template<> struct G_DerefImpl< ByteAddressBuffer > { static ByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } };
template<typename T> Texture1D<T> G_DynamicDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } template<> struct G_DerefImpl< RWByteAddressBuffer > { static RWByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } };
template<typename T> Texture2D<T> G_DynamicDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } template<typename T> struct G_DerefImpl< Texture1D<T> > { static Texture1D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
template<typename T> Texture3D<T> G_DynamicDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } template<typename T> struct G_DerefImpl< Texture2D<T> > { static Texture2D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
template<typename T> RWTexture1D<T> G_DynamicDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } template<typename T> struct G_DerefImpl< Texture3D<T> > { static Texture3D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
template<typename T> RWTexture2D<T> G_DynamicDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } template<typename T> struct G_DerefImpl< RWTexture1D<T> > { static RWTexture1D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
template<typename T> RWTexture3D<T> G_DynamicDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } template<typename T> struct G_DerefImpl< RWTexture2D<T> > { static RWTexture2D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
SamplerState G_DynamicDeref(G_SamplerRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; } template<typename T> struct G_DerefImpl< RWTexture3D<T> > { static RWTexture3D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
// Wrap since HLSL can't handle template double angle bracket '>>'
#define G_Deref(ref, type, ...) (G_DerefImpl< type >::Deref((ref), ##__VA_ARGS__))
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -229,7 +223,7 @@ Struct(G_FmtArg)
void G_CommitPrint(G_TempPrintBuffer buff) void G_CommitPrint(G_TempPrintBuffer buff)
{ {
RWByteAddressBuffer rw = G_UniformDerefRawRW(G_ShaderConst_PrintBuffer); RWByteAddressBuffer rw = G_Deref(G_ShaderConst_PrintBuffer, RWByteAddressBuffer);
if (buff.overflowed) if (buff.overflowed)
{ {

View File

@ -534,7 +534,7 @@ void V_TickForever(WaveLaneCtx *lane)
while (!shutdown) while (!shutdown)
{ {
shutdown = Atomic32Fetch(&V.shutdown); shutdown = Atomic32Fetch(&V.shutdown);
P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 1); P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 0);
////////////////////////////// //////////////////////////////
//- Begin frame //- Begin frame
@ -5369,8 +5369,6 @@ void V_TickForever(WaveLaneCtx *lane)
} }
} }
G_SyncReleaseFamilyLayout(cl, frame->bloom_chain);
////////////////////////////// //////////////////////////////
//- Finalization pass //- Finalization pass

View File

@ -3,7 +3,7 @@
f32 V_RandFromPos(Vec3 pos) f32 V_RandFromPos(Vec3 pos)
{ {
Texture3D<u32> noise3d = G_UniformDeref3D<u32>(V_GpuConst_NoiseTex); Texture3D<u32> noise3d = G_Deref(V_GpuConst_NoiseTex, Texture3D<u32>);
u32 noise = noise3d[(Vec3U32)pos % G_BasicNoiseDims]; u32 noise = noise3d[(Vec3U32)pos % G_BasicNoiseDims];
f32 rand = Norm16(noise); f32 rand = Norm16(noise);
return rand; return rand;
@ -57,8 +57,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
//- Prepare shade //- Prepare shade
ComputeShader(V_PrepareShadeCS) ComputeShader(V_PrepareShadeCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
RWTexture2D<Vec4> shade = G_UniformDerefRW2D<Vec4>(frame.shade); RWTexture2D<Vec4> shade = G_Deref(frame.shade, RWTexture2D<Vec4>);
Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 shade_pos = SV_DispatchThreadID + 0.5;
if (all(shade_pos < G_Count2D(shade))) if (all(shade_pos < G_Count2D(shade)))
{ {
@ -70,12 +70,12 @@ ComputeShader(V_PrepareShadeCS)
//- Prepare cells //- Prepare cells
ComputeShader(V_PrepareCellsCS) ComputeShader(V_PrepareCellsCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
RWTexture2D<Vec4> stains = G_UniformDerefRW2D<Vec4>(frame.stains); RWTexture2D<Vec4> stains = G_Deref(frame.stains, RWTexture2D<Vec4>);
RWTexture2D<Vec4> dry_stains = G_UniformDerefRW2D<Vec4>(frame.dry_stains); RWTexture2D<Vec4> dry_stains = G_Deref(frame.dry_stains, RWTexture2D<Vec4>);
RWTexture2D<f32> drynesses = G_UniformDerefRW2D<f32>(frame.drynesses); RWTexture2D<f32> drynesses = G_Deref(frame.drynesses, RWTexture2D<f32>);
RWTexture2D<u32> occluders = G_UniformDerefRW2D<u32>(frame.occluders); RWTexture2D<u32> occluders = G_Deref(frame.occluders, RWTexture2D<u32>);
Vec2 cell_pos = SV_DispatchThreadID + 0.5; Vec2 cell_pos = SV_DispatchThreadID + 0.5;
if (all(cell_pos < P_WorldCellsDims)) if (all(cell_pos < P_WorldCellsDims))
@ -101,8 +101,8 @@ ComputeShader(V_PrepareCellsCS)
Vec4 over_dry_stain = 0; Vec4 over_dry_stain = 0;
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{ {
RWTexture2D<u32> cells = G_DynamicDerefRW2D<u32>(frame.particle_cells[layer]); RWTexture2D<u32> cells = G_Deref(frame.particle_cells[layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_DynamicDerefRW2D<u32>(frame.particle_densities[layer]); RWTexture2D<u32> densities = G_Deref(frame.particle_densities[layer], RWTexture2D<u32>);
u32 packed = cells[cell_pos]; u32 packed = cells[cell_pos];
if (packed & (1 << 31)) if (packed & (1 << 31))
{ {
@ -159,8 +159,8 @@ ComputeShader(V_PrepareCellsCS)
//- Clear particles //- Clear particles
ComputeShader(V_ClearParticlesCS) ComputeShader(V_ClearParticlesCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles); RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
u32 particle_idx = SV_DispatchThreadID; u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap) if (particle_idx < V_ParticlesCap)
{ {
@ -179,14 +179,14 @@ ComputeShader(V_BackdropDownCS)
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
b32 is_first_pass = mip_idx == 0; b32 is_first_pass = mip_idx == 0;
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState);
Texture2D<Vec4> bd_up = ( Texture2D<Vec4> bd_up = (
is_first_pass ? is_first_pass ?
G_UniformDeref2D<Vec4>(frame.backdrop_src) : G_Deref(frame.backdrop_src, Texture2D<Vec4>) :
G_UniformDeref2D<Vec4>(frame.backdrop_chain, mip_idx - 1) G_Deref(frame.backdrop_chain, Texture2D<Vec4>, mip_idx - 1)
); );
RWTexture2D<Vec4> bd_down = G_UniformDerefRW2D<Vec4>(frame.backdrop_chain, mip_idx); RWTexture2D<Vec4> bd_down = G_Deref(frame.backdrop_chain, RWTexture2D<Vec4>, mip_idx);
Vec2 down_dims = G_Count2D(bd_down); Vec2 down_dims = G_Count2D(bd_down);
Vec2 bd_pos = SV_DispatchThreadID + 0.5; Vec2 bd_pos = SV_DispatchThreadID + 0.5;
@ -238,10 +238,10 @@ ComputeShader(V_BackdropUpCS)
{ {
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
Texture2D<Vec4> bd_down = G_UniformDeref2D<Vec4>(frame.backdrop_chain, mip_idx + 1); Texture2D<Vec4> bd_down = G_Deref(frame.backdrop_chain, Texture2D<Vec4>, mip_idx + 1);
RWTexture2D<Vec4> bd_up = G_UniformDerefRW2D<Vec4>(frame.backdrop_chain, mip_idx); RWTexture2D<Vec4> bd_up = G_Deref(frame.backdrop_chain, RWTexture2D<Vec4>, mip_idx);
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState);
Vec2 down_dims = G_Count2D(bd_down); Vec2 down_dims = G_Count2D(bd_down);
Vec2 up_dims = G_Count2D(bd_up); Vec2 up_dims = G_Count2D(bd_up);
@ -296,8 +296,8 @@ ComputeShader(V_BackdropUpCS)
VertexShader(V_QuadVS, V_QuadPSInput) VertexShader(V_QuadVS, V_QuadPSInput)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
StructuredBuffer<V_Quad> quads = G_UniformDeref<V_Quad>(frame.quads); StructuredBuffer<V_Quad> quads = G_Deref(frame.quads, StructuredBuffer<V_Quad>);
V_Quad quad = quads[SV_InstanceID]; V_Quad quad = quads[SV_InstanceID];
@ -320,12 +320,12 @@ VertexShader(V_QuadVS, V_QuadPSInput)
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
RWTexture2D<u32> occluders = G_UniformDerefRW2D<u32>(frame.occluders); RWTexture2D<u32> occluders = G_Deref(frame.occluders, RWTexture2D<u32>);
V_Quad quad = input.quad; V_Quad quad = input.quad;
Texture2D<Vec4> tex = G_DynamicDeref2D<Vec4>(quad.tex); Texture2D<Vec4> tex = G_Deref(quad.tex, Texture2D<Vec4>);
Vec2 world_pos = input.world_pos; Vec2 world_pos = input.world_pos;
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1)); Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
@ -356,9 +356,9 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
ComputeShader(V_EmitParticlesCS) ComputeShader(V_EmitParticlesCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
StructuredBuffer<V_Emitter> emitters = G_UniformDeref<V_Emitter>(frame.emitters); StructuredBuffer<V_Emitter> emitters = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>);
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles); RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
u32 emitter_idx = SV_DispatchThreadID; u32 emitter_idx = SV_DispatchThreadID;
if (emitter_idx < frame.emitters_count) if (emitter_idx < frame.emitters_count)
@ -387,10 +387,10 @@ ComputeShader(V_EmitParticlesCS)
ComputeShader(V_SimParticlesCS) ComputeShader(V_SimParticlesCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles); RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
Texture2D<u32> occluders = G_UniformDeref2D<u32>(frame.occluders); Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
u32 particle_idx = SV_DispatchThreadID; u32 particle_idx = SV_DispatchThreadID;
if (particle_idx < V_ParticlesCap) if (particle_idx < V_ParticlesCap)
@ -415,7 +415,7 @@ ComputeShader(V_SimParticlesCS)
if (particle.kind < 0) if (particle.kind < 0)
{ {
u32 emitter_idx = -particle.kind - 1; u32 emitter_idx = -particle.kind - 1;
V_Emitter emitter = G_UniformDeref<V_Emitter>(frame.emitters)[emitter_idx]; V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx];
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle); f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed); f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
@ -430,8 +430,8 @@ ComputeShader(V_SimParticlesCS)
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune) if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
{ {
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind); V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
RWTexture2D<u32> cells = G_DynamicDerefRW2D<u32>(frame.particle_cells[desc.layer]); RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
RWTexture2D<u32> densities = G_DynamicDerefRW2D<u32>(frame.particle_densities[desc.layer]); RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
u32 packed = 0; u32 packed = 0;
packed |= (particle_idx & ((1 >> 24) - 1)) << 0; packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
@ -664,12 +664,12 @@ ComputeShader(V_SimParticlesCS)
ComputeShader(V_ShadeCS) ComputeShader(V_ShadeCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
Texture2D<Vec4> albedo_tex = G_UniformDeref2D<Vec4>(frame.albedo); Texture2D<Vec4> albedo_tex = G_Deref(frame.albedo, Texture2D<Vec4>);
RWTexture2D<Vec4> shade_tex = G_UniformDerefRW2D<Vec4>(frame.shade); RWTexture2D<Vec4> shade_tex = G_Deref(frame.shade, RWTexture2D<Vec4>);
Texture2D<f32> drynesses = G_UniformDeref2D<f32>(frame.drynesses); Texture2D<f32> drynesses = G_Deref(frame.drynesses, Texture2D<f32>);
Vec2 shade_pos = SV_DispatchThreadID + 0.5; Vec2 shade_pos = SV_DispatchThreadID + 0.5;
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1)); Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
@ -700,18 +700,18 @@ ComputeShader(V_ShadeCS)
ComputeShader(V_CompositeCS) ComputeShader(V_CompositeCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
// Texture2D<Vec4> shade_tex = G_UniformDeref2D<Vec4>(frame.shade); // Texture2D<Vec4> shade_tex = G_Deref(frame.shade, Texture2D<Vec4>);
SamplerState point_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState point_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
Texture2D<Vec4> albedo_tex = G_UniformDeref2D<Vec4>(frame.albedo); Texture2D<Vec4> albedo_tex = G_Deref(frame.albedo, Texture2D<Vec4>);
RWTexture2D<Vec4> screen_tex = G_UniformDerefRW2D<Vec4>(frame.screen); RWTexture2D<Vec4> screen_tex = G_Deref(frame.screen, RWTexture2D<Vec4>);
Texture2D<Vec4> stains = G_UniformDeref2D<Vec4>(frame.stains); Texture2D<Vec4> stains = G_Deref(frame.stains, Texture2D<Vec4>);
Texture2D<Vec4> dry_stains = G_UniformDeref2D<Vec4>(frame.dry_stains); Texture2D<Vec4> dry_stains = G_Deref(frame.dry_stains, Texture2D<Vec4>);
Texture2D<f32> drynesses = G_UniformDeref2D<f32>(frame.drynesses); Texture2D<f32> drynesses = G_Deref(frame.drynesses, Texture2D<f32>);
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
Texture2D<Vec4> backdrop = G_UniformDeref2D<Vec4>(frame.backdrop_chain); Texture2D<Vec4> backdrop = G_Deref(frame.backdrop_chain, Texture2D<Vec4>);
StructuredBuffer<V_Particle> particles = G_UniformDeref<V_Particle>(frame.particles); StructuredBuffer<V_Particle> particles = G_Deref(frame.particles, StructuredBuffer<V_Particle>);
Vec2 screen_pos = SV_DispatchThreadID + 0.5; Vec2 screen_pos = SV_DispatchThreadID + 0.5;
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1)); Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
@ -819,7 +819,7 @@ ComputeShader(V_CompositeCS)
else if (tile != P_TileKind_Empty) else if (tile != P_TileKind_Empty)
{ {
V_TileDesc tile_desc = frame.tile_descs[tile]; V_TileDesc tile_desc = frame.tile_descs[tile];
Texture2D<Vec4> tile_tex = G_DynamicDeref2D<Vec4>(tile_desc.tex); Texture2D<Vec4> tile_tex = G_Deref(tile_desc.tex, Texture2D<Vec4>);
Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001); Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001);
Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t); Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t);
tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0); tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0);
@ -848,8 +848,8 @@ ComputeShader(V_CompositeCS)
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
{ {
Texture2D<u32> cells = G_UniformDeref2D<u32>(frame.particle_cells[layer]); Texture2D<u32> cells = G_Deref(frame.particle_cells[layer], Texture2D<u32>);
Texture2D<u32> densities = G_UniformDeref2D<u32>(frame.particle_densities[layer]); Texture2D<u32> densities = G_Deref(frame.particle_densities[layer], Texture2D<u32>);
u32 packed = cells[cell_pos]; u32 packed = cells[cell_pos];
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F); V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
if (particle_kind != V_ParticleKind_None) if (particle_kind != V_ParticleKind_None)
@ -1104,14 +1104,14 @@ ComputeShader(V_BloomDownCS)
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
b32 is_first_pass = mip_idx == 1; b32 is_first_pass = mip_idx == 1;
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
RWTexture2D<Vec4> bloom_down = G_UniformDerefRW2D<Vec4>(frame.bloom_chain, mip_idx - 1); RWTexture2D<Vec4> bloom_down = G_Deref(frame.bloom_chain, RWTexture2D<Vec4>, mip_idx - 1);
Texture2D<Vec4> bloom_up = ( Texture2D<Vec4> bloom_up = (
is_first_pass ? is_first_pass ?
G_UniformDeref2D<Vec4>(frame.screen) : G_Deref(frame.screen, Texture2D<Vec4>) :
G_UniformDeref2D<Vec4>(frame.bloom_chain, mip_idx - 2) G_Deref(frame.bloom_chain, Texture2D<Vec4>, mip_idx - 1)
); );
Vec2 down_dims = G_Count2D(bloom_down); Vec2 down_dims = G_Count2D(bloom_down);
@ -1173,20 +1173,16 @@ ComputeShader(V_BloomUpCS)
{ {
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
Texture2D<Vec4> bloom_down = G_UniformDeref2D<Vec4>(frame.bloom_chain, mip_idx); Texture2D<Vec4> bloom_down = G_Deref(frame.bloom_chain, Texture2D<Vec4>, mip_idx);
b32 is_last_pass = mip_idx == 0; b32 is_last_pass = mip_idx == 0;
RWTexture2D<Vec4> bloom_up; RWTexture2D<Vec4> bloom_up = (
if (is_last_pass) is_last_pass ?
{ G_Deref(frame.screen, RWTexture2D<Vec4>) :
bloom_up = G_UniformDerefRW2D<Vec4>(frame.screen); G_Deref(frame.bloom_chain, RWTexture2D<Vec4>, mip_idx - 1)
} );
else
{
bloom_up = G_UniformDerefRW2D<Vec4>(frame.bloom_chain, mip_idx - 1);
}
Vec2 down_dims = G_Count2D(bloom_down); Vec2 down_dims = G_Count2D(bloom_down);
Vec2 up_dims = G_Count2D(bloom_up); Vec2 up_dims = G_Count2D(bloom_up);
@ -1238,10 +1234,10 @@ ComputeShader(V_BloomUpCS)
ComputeShader(V_FinalizeCS) ComputeShader(V_FinalizeCS)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
Texture2D<Vec4> bloom_tex = G_UniformDeref2D<Vec4>(frame.bloom_chain, 0); Texture2D<Vec4> bloom_tex = G_Deref(frame.bloom_chain, Texture2D<Vec4>);
RWTexture2D<Vec4> screen_tex = G_UniformDerefRW2D<Vec4>(frame.screen); RWTexture2D<Vec4> screen_tex = G_Deref(frame.screen, RWTexture2D<Vec4>);
Vec2 screen_pos = SV_DispatchThreadID + 0.5; Vec2 screen_pos = SV_DispatchThreadID + 0.5;
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
@ -1271,8 +1267,8 @@ ComputeShader(V_FinalizeCS)
VertexShader(V_DVertVS, V_DVertPSInput) VertexShader(V_DVertVS, V_DVertPSInput)
{ {
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
StructuredBuffer<V_DVert> verts = G_UniformDeref<V_DVert>(frame.dverts); StructuredBuffer<V_DVert> verts = G_Deref(frame.dverts, StructuredBuffer<V_DVert>);
V_DVert vert = verts[SV_VertexID]; V_DVert vert = verts[SV_VertexID];

View File

@ -3,8 +3,8 @@
ComputeShader(PT_TestCS) ComputeShader(PT_TestCS)
{ {
PT_SharedFrame frame = G_UniformDeref<PT_SharedFrame>(PT_ShaderConst_Frame)[0]; PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer<PT_SharedFrame>)[0];
RWTexture2D<Vec4> target_tex = G_UniformDerefRW2D<Vec4>(frame.compute_target); RWTexture2D<Vec4> target_tex = G_Deref(frame.compute_target, RWTexture2D<Vec4>);
Vec2U32 target_tex_size = G_Count2D(target_tex); Vec2U32 target_tex_size = G_Count2D(target_tex);
@ -35,10 +35,10 @@ VertexShader(PT_BlitVS, PT_BlitPSInput)
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
{ {
PT_SharedFrame frame = G_UniformDeref<PT_SharedFrame>(PT_ShaderConst_Frame)[0]; PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer<PT_SharedFrame>)[0];
SamplerState sampler = G_UniformDeref(frame.sampler); SamplerState sampler = G_Deref(frame.sampler, SamplerState);
Texture2D<Vec4> src = G_UniformDeref2D<Vec4>(frame.compute_target); Texture2D<Vec4> src = G_Deref(frame.compute_target, Texture2D<Vec4>);
Texture3D<u32> noise = G_UniformDeref3D<u32>(frame.noise_tex); Texture3D<u32> noise = G_Deref(frame.noise_tex, Texture3D<u32>);
Vec2 uv = input.src_uv; Vec2 uv = input.src_uv;
Vec4 tex_col = src.Sample(sampler, uv); Vec4 tex_col = src.Sample(sampler, uv);

View File

@ -6,8 +6,8 @@
VertexShader(UI_DRectVS, UI_DRectPSInput) VertexShader(UI_DRectVS, UI_DRectPSInput)
{ {
UI_GpuParams params = G_UniformDeref<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer<UI_GpuParams>)[0];
StructuredBuffer<UI_GpuRect> rects = G_UniformDeref<UI_GpuRect>(params.rects); StructuredBuffer<UI_GpuRect> rects = G_Deref(params.rects, StructuredBuffer<UI_GpuRect>);
UI_GpuRect rect = rects[SV_InstanceID]; UI_GpuRect rect = rects[SV_InstanceID];
Vec2 rect_uv = RectUvFromIdx(SV_VertexID); Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
@ -33,8 +33,8 @@ VertexShader(UI_DRectVS, UI_DRectPSInput)
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{ {
UI_GpuParams params = G_UniformDeref<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer<UI_GpuParams>)[0];
SamplerState sampler = G_UniformDeref(params.sampler); SamplerState sampler = G_Deref(params.sampler, SamplerState);
UI_GpuRect rect = input.rect; UI_GpuRect rect = input.rect;
Vec2 rect_uv = input.rect_uv; Vec2 rect_uv = input.rect_uv;
@ -73,7 +73,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
} }
else else
{ {
Texture2D<Vec4> tex = G_DynamicDeref2D<Vec4>(rect.tex); Texture2D<Vec4> tex = G_Deref(rect.tex, Texture2D<Vec4>);
background_premul = tex.SampleLevel(sampler, input.tex_uv, 0); background_premul = tex.SampleLevel(sampler, input.tex_uv, 0);
background_premul.rgb *= background_premul.a; background_premul.rgb *= background_premul.a;
} }