revert to non-uniform gpu dereference by default for now
This commit is contained in:
parent
80c43d2ebd
commit
8e0167125f
@ -95,31 +95,25 @@ Struct(G_IndexBufferDesc)
|
||||
//~ Resource dereference
|
||||
|
||||
#if IsGpu
|
||||
//- Scalar/Uniform dereference (faster on AMD hardware)
|
||||
template<typename T> StructuredBuffer<T> G_UniformDeref(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 0]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_UniformDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 1]; }
|
||||
ByteAddressBuffer G_UniformDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 2]; }
|
||||
RWByteAddressBuffer G_UniformDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[r.v + 3]; }
|
||||
template<typename T> Texture1D<T> G_UniformDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
|
||||
template<typename T> Texture2D<T> G_UniformDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
|
||||
template<typename T> Texture3D<T> G_UniformDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 0]; }
|
||||
template<typename T> RWTexture1D<T> G_UniformDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
|
||||
template<typename T> RWTexture2D<T> G_UniformDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
|
||||
template<typename T> RWTexture3D<T> G_UniformDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[r.v + (mip * 2) + 1]; }
|
||||
SamplerState G_UniformDeref(G_SamplerRef r) { return SamplerDescriptorHeap[r.v]; }
|
||||
// TODO: Add explicit uniform dereference functions, since on AMD hardware
|
||||
// non-uniform is slower and there are some shader-compilation issues in older
|
||||
// driver versions
|
||||
|
||||
//- Vector/Non-Uniform dereference
|
||||
template<typename T> StructuredBuffer<T> G_DynamicDeref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; }
|
||||
template<typename T> RWStructuredBuffer<T> G_DynamicDerefRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
||||
ByteAddressBuffer G_DynamicDerefRaw(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 2)]; }
|
||||
RWByteAddressBuffer G_DynamicDerefRawRW(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; }
|
||||
template<typename T> Texture1D<T> G_DynamicDeref1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; }
|
||||
template<typename T> Texture2D<T> G_DynamicDeref2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; }
|
||||
template<typename T> Texture3D<T> G_DynamicDeref3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; }
|
||||
template<typename T> RWTexture1D<T> G_DynamicDerefRW1D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; }
|
||||
template<typename T> RWTexture2D<T> G_DynamicDerefRW2D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; }
|
||||
template<typename T> RWTexture3D<T> G_DynamicDerefRW3D(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; }
|
||||
SamplerState G_DynamicDeref(G_SamplerRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
||||
template<typename R> struct G_DerefImpl;
|
||||
template<> struct G_DerefImpl< SamplerState > { static SamplerState Deref(G_SamplerRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; } };
|
||||
template<typename T> struct G_DerefImpl< StructuredBuffer<T> > { static StructuredBuffer<T> Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 0)]; } };
|
||||
template<typename T> struct G_DerefImpl< RWStructuredBuffer<T> > { static RWStructuredBuffer<T> Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; } };
|
||||
template<> struct G_DerefImpl< ByteAddressBuffer > { static ByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } };
|
||||
template<> struct G_DerefImpl< RWByteAddressBuffer > { static RWByteAddressBuffer Deref(G_BufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 3)]; } };
|
||||
template<typename T> struct G_DerefImpl< Texture1D<T> > { static Texture1D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
|
||||
template<typename T> struct G_DerefImpl< Texture2D<T> > { static Texture2D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
|
||||
template<typename T> struct G_DerefImpl< Texture3D<T> > { static Texture3D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 0)]; } };
|
||||
template<typename T> struct G_DerefImpl< RWTexture1D<T> > { static RWTexture1D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
|
||||
template<typename T> struct G_DerefImpl< RWTexture2D<T> > { static RWTexture2D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
|
||||
template<typename T> struct G_DerefImpl< RWTexture3D<T> > { static RWTexture3D<T> Deref(G_TextureRef r, u32 mip=0) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + (mip * 2) + 1)]; } };
|
||||
|
||||
// Wrap since HLSL can't handle template double angle bracket '>>'
|
||||
#define G_Deref(ref, type, ...) (G_DerefImpl< type >::Deref((ref), ##__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -229,7 +223,7 @@ Struct(G_FmtArg)
|
||||
|
||||
void G_CommitPrint(G_TempPrintBuffer buff)
|
||||
{
|
||||
RWByteAddressBuffer rw = G_UniformDerefRawRW(G_ShaderConst_PrintBuffer);
|
||||
RWByteAddressBuffer rw = G_Deref(G_ShaderConst_PrintBuffer, RWByteAddressBuffer);
|
||||
|
||||
if (buff.overflowed)
|
||||
{
|
||||
|
||||
@ -534,7 +534,7 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
while (!shutdown)
|
||||
{
|
||||
shutdown = Atomic32Fetch(&V.shutdown);
|
||||
P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 1);
|
||||
P_tl.debug_draw_enabled = TweakBool("Vis debug draw", 0);
|
||||
|
||||
//////////////////////////////
|
||||
//- Begin frame
|
||||
@ -5369,8 +5369,6 @@ void V_TickForever(WaveLaneCtx *lane)
|
||||
}
|
||||
}
|
||||
|
||||
G_SyncReleaseFamilyLayout(cl, frame->bloom_chain);
|
||||
|
||||
//////////////////////////////
|
||||
//- Finalization pass
|
||||
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
|
||||
f32 V_RandFromPos(Vec3 pos)
|
||||
{
|
||||
Texture3D<u32> noise3d = G_UniformDeref3D<u32>(V_GpuConst_NoiseTex);
|
||||
Texture3D<u32> noise3d = G_Deref(V_GpuConst_NoiseTex, Texture3D<u32>);
|
||||
u32 noise = noise3d[(Vec3U32)pos % G_BasicNoiseDims];
|
||||
f32 rand = Norm16(noise);
|
||||
return rand;
|
||||
@ -57,8 +57,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
|
||||
//- Prepare shade
|
||||
ComputeShader(V_PrepareShadeCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
RWTexture2D<Vec4> shade = G_UniformDerefRW2D<Vec4>(frame.shade);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
RWTexture2D<Vec4> shade = G_Deref(frame.shade, RWTexture2D<Vec4>);
|
||||
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
||||
if (all(shade_pos < G_Count2D(shade)))
|
||||
{
|
||||
@ -70,12 +70,12 @@ ComputeShader(V_PrepareShadeCS)
|
||||
//- Prepare cells
|
||||
ComputeShader(V_PrepareCellsCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles);
|
||||
RWTexture2D<Vec4> stains = G_UniformDerefRW2D<Vec4>(frame.stains);
|
||||
RWTexture2D<Vec4> dry_stains = G_UniformDerefRW2D<Vec4>(frame.dry_stains);
|
||||
RWTexture2D<f32> drynesses = G_UniformDerefRW2D<f32>(frame.drynesses);
|
||||
RWTexture2D<u32> occluders = G_UniformDerefRW2D<u32>(frame.occluders);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
|
||||
RWTexture2D<Vec4> stains = G_Deref(frame.stains, RWTexture2D<Vec4>);
|
||||
RWTexture2D<Vec4> dry_stains = G_Deref(frame.dry_stains, RWTexture2D<Vec4>);
|
||||
RWTexture2D<f32> drynesses = G_Deref(frame.drynesses, RWTexture2D<f32>);
|
||||
RWTexture2D<u32> occluders = G_Deref(frame.occluders, RWTexture2D<u32>);
|
||||
|
||||
Vec2 cell_pos = SV_DispatchThreadID + 0.5;
|
||||
if (all(cell_pos < P_WorldCellsDims))
|
||||
@ -101,8 +101,8 @@ ComputeShader(V_PrepareCellsCS)
|
||||
Vec4 over_dry_stain = 0;
|
||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
||||
{
|
||||
RWTexture2D<u32> cells = G_DynamicDerefRW2D<u32>(frame.particle_cells[layer]);
|
||||
RWTexture2D<u32> densities = G_DynamicDerefRW2D<u32>(frame.particle_densities[layer]);
|
||||
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[layer], RWTexture2D<u32>);
|
||||
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[layer], RWTexture2D<u32>);
|
||||
u32 packed = cells[cell_pos];
|
||||
if (packed & (1 << 31))
|
||||
{
|
||||
@ -159,8 +159,8 @@ ComputeShader(V_PrepareCellsCS)
|
||||
//- Clear particles
|
||||
ComputeShader(V_ClearParticlesCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
|
||||
u32 particle_idx = SV_DispatchThreadID;
|
||||
if (particle_idx < V_ParticlesCap)
|
||||
{
|
||||
@ -179,14 +179,14 @@ ComputeShader(V_BackdropDownCS)
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
b32 is_first_pass = mip_idx == 0;
|
||||
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState);
|
||||
Texture2D<Vec4> bd_up = (
|
||||
is_first_pass ?
|
||||
G_UniformDeref2D<Vec4>(frame.backdrop_src) :
|
||||
G_UniformDeref2D<Vec4>(frame.backdrop_chain, mip_idx - 1)
|
||||
G_Deref(frame.backdrop_src, Texture2D<Vec4>) :
|
||||
G_Deref(frame.backdrop_chain, Texture2D<Vec4>, mip_idx - 1)
|
||||
);
|
||||
RWTexture2D<Vec4> bd_down = G_UniformDerefRW2D<Vec4>(frame.backdrop_chain, mip_idx);
|
||||
RWTexture2D<Vec4> bd_down = G_Deref(frame.backdrop_chain, RWTexture2D<Vec4>, mip_idx);
|
||||
|
||||
Vec2 down_dims = G_Count2D(bd_down);
|
||||
Vec2 bd_pos = SV_DispatchThreadID + 0.5;
|
||||
@ -238,10 +238,10 @@ ComputeShader(V_BackdropUpCS)
|
||||
{
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<Vec4> bd_down = G_UniformDeref2D<Vec4>(frame.backdrop_chain, mip_idx + 1);
|
||||
RWTexture2D<Vec4> bd_up = G_UniformDerefRW2D<Vec4>(frame.backdrop_chain, mip_idx);
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
Texture2D<Vec4> bd_down = G_Deref(frame.backdrop_chain, Texture2D<Vec4>, mip_idx + 1);
|
||||
RWTexture2D<Vec4> bd_up = G_Deref(frame.backdrop_chain, RWTexture2D<Vec4>, mip_idx);
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror], SamplerState);
|
||||
|
||||
Vec2 down_dims = G_Count2D(bd_down);
|
||||
Vec2 up_dims = G_Count2D(bd_up);
|
||||
@ -296,8 +296,8 @@ ComputeShader(V_BackdropUpCS)
|
||||
|
||||
VertexShader(V_QuadVS, V_QuadPSInput)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
StructuredBuffer<V_Quad> quads = G_UniformDeref<V_Quad>(frame.quads);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
StructuredBuffer<V_Quad> quads = G_Deref(frame.quads, StructuredBuffer<V_Quad>);
|
||||
|
||||
V_Quad quad = quads[SV_InstanceID];
|
||||
|
||||
@ -320,12 +320,12 @@ VertexShader(V_QuadVS, V_QuadPSInput)
|
||||
|
||||
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
RWTexture2D<u32> occluders = G_UniformDerefRW2D<u32>(frame.occluders);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
|
||||
RWTexture2D<u32> occluders = G_Deref(frame.occluders, RWTexture2D<u32>);
|
||||
|
||||
V_Quad quad = input.quad;
|
||||
Texture2D<Vec4> tex = G_DynamicDeref2D<Vec4>(quad.tex);
|
||||
Texture2D<Vec4> tex = G_Deref(quad.tex, Texture2D<Vec4>);
|
||||
|
||||
Vec2 world_pos = input.world_pos;
|
||||
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
||||
@ -356,9 +356,9 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
||||
|
||||
ComputeShader(V_EmitParticlesCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
StructuredBuffer<V_Emitter> emitters = G_UniformDeref<V_Emitter>(frame.emitters);
|
||||
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
StructuredBuffer<V_Emitter> emitters = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>);
|
||||
RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
|
||||
|
||||
u32 emitter_idx = SV_DispatchThreadID;
|
||||
if (emitter_idx < frame.emitters_count)
|
||||
@ -387,10 +387,10 @@ ComputeShader(V_EmitParticlesCS)
|
||||
|
||||
ComputeShader(V_SimParticlesCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles);
|
||||
RWStructuredBuffer<V_Particle> particles = G_UniformDerefRW<V_Particle>(frame.particles);
|
||||
Texture2D<u32> occluders = G_UniformDeref2D<u32>(frame.occluders);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
|
||||
RWStructuredBuffer<V_Particle> particles = G_Deref(frame.particles, RWStructuredBuffer<V_Particle>);
|
||||
Texture2D<u32> occluders = G_Deref(frame.occluders, Texture2D<u32>);
|
||||
|
||||
u32 particle_idx = SV_DispatchThreadID;
|
||||
if (particle_idx < V_ParticlesCap)
|
||||
@ -415,7 +415,7 @@ ComputeShader(V_SimParticlesCS)
|
||||
if (particle.kind < 0)
|
||||
{
|
||||
u32 emitter_idx = -particle.kind - 1;
|
||||
V_Emitter emitter = G_UniformDeref<V_Emitter>(frame.emitters)[emitter_idx];
|
||||
V_Emitter emitter = G_Deref(frame.emitters, StructuredBuffer<V_Emitter>)[emitter_idx];
|
||||
|
||||
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
|
||||
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
|
||||
@ -430,8 +430,8 @@ ComputeShader(V_SimParticlesCS)
|
||||
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
|
||||
{
|
||||
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
|
||||
RWTexture2D<u32> cells = G_DynamicDerefRW2D<u32>(frame.particle_cells[desc.layer]);
|
||||
RWTexture2D<u32> densities = G_DynamicDerefRW2D<u32>(frame.particle_densities[desc.layer]);
|
||||
RWTexture2D<u32> cells = G_Deref(frame.particle_cells[desc.layer], RWTexture2D<u32>);
|
||||
RWTexture2D<u32> densities = G_Deref(frame.particle_densities[desc.layer], RWTexture2D<u32>);
|
||||
|
||||
u32 packed = 0;
|
||||
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
|
||||
@ -664,12 +664,12 @@ ComputeShader(V_SimParticlesCS)
|
||||
|
||||
ComputeShader(V_ShadeCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles);
|
||||
Texture2D<Vec4> albedo_tex = G_UniformDeref2D<Vec4>(frame.albedo);
|
||||
RWTexture2D<Vec4> shade_tex = G_UniformDerefRW2D<Vec4>(frame.shade);
|
||||
Texture2D<f32> drynesses = G_UniformDeref2D<f32>(frame.drynesses);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
|
||||
Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
|
||||
Texture2D<Vec4> albedo_tex = G_Deref(frame.albedo, Texture2D<Vec4>);
|
||||
RWTexture2D<Vec4> shade_tex = G_Deref(frame.shade, RWTexture2D<Vec4>);
|
||||
Texture2D<f32> drynesses = G_Deref(frame.drynesses, Texture2D<f32>);
|
||||
|
||||
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
|
||||
@ -700,18 +700,18 @@ ComputeShader(V_ShadeCS)
|
||||
|
||||
ComputeShader(V_CompositeCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
// Texture2D<Vec4> shade_tex = G_UniformDeref2D<Vec4>(frame.shade);
|
||||
SamplerState point_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||
SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
Texture2D<Vec4> albedo_tex = G_UniformDeref2D<Vec4>(frame.albedo);
|
||||
RWTexture2D<Vec4> screen_tex = G_UniformDerefRW2D<Vec4>(frame.screen);
|
||||
Texture2D<Vec4> stains = G_UniformDeref2D<Vec4>(frame.stains);
|
||||
Texture2D<Vec4> dry_stains = G_UniformDeref2D<Vec4>(frame.dry_stains);
|
||||
Texture2D<f32> drynesses = G_UniformDeref2D<f32>(frame.drynesses);
|
||||
Texture2D<P_TileKind> tiles = G_UniformDeref2D<P_TileKind>(frame.tiles);
|
||||
Texture2D<Vec4> backdrop = G_UniformDeref2D<Vec4>(frame.backdrop_chain);
|
||||
StructuredBuffer<V_Particle> particles = G_UniformDeref<V_Particle>(frame.particles);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
// Texture2D<Vec4> shade_tex = G_Deref(frame.shade, Texture2D<Vec4>);
|
||||
SamplerState point_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_PointClamp], SamplerState);
|
||||
SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
|
||||
Texture2D<Vec4> albedo_tex = G_Deref(frame.albedo, Texture2D<Vec4>);
|
||||
RWTexture2D<Vec4> screen_tex = G_Deref(frame.screen, RWTexture2D<Vec4>);
|
||||
Texture2D<Vec4> stains = G_Deref(frame.stains, Texture2D<Vec4>);
|
||||
Texture2D<Vec4> dry_stains = G_Deref(frame.dry_stains, Texture2D<Vec4>);
|
||||
Texture2D<f32> drynesses = G_Deref(frame.drynesses, Texture2D<f32>);
|
||||
Texture2D<P_TileKind> tiles = G_Deref(frame.tiles, Texture2D<P_TileKind>);
|
||||
Texture2D<Vec4> backdrop = G_Deref(frame.backdrop_chain, Texture2D<Vec4>);
|
||||
StructuredBuffer<V_Particle> particles = G_Deref(frame.particles, StructuredBuffer<V_Particle>);
|
||||
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
|
||||
@ -819,7 +819,7 @@ ComputeShader(V_CompositeCS)
|
||||
else if (tile != P_TileKind_Empty)
|
||||
{
|
||||
V_TileDesc tile_desc = frame.tile_descs[tile];
|
||||
Texture2D<Vec4> tile_tex = G_DynamicDeref2D<Vec4>(tile_desc.tex);
|
||||
Texture2D<Vec4> tile_tex = G_Deref(tile_desc.tex, Texture2D<Vec4>);
|
||||
Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001);
|
||||
Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t);
|
||||
tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0);
|
||||
@ -848,8 +848,8 @@ ComputeShader(V_CompositeCS)
|
||||
|
||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
||||
{
|
||||
Texture2D<u32> cells = G_UniformDeref2D<u32>(frame.particle_cells[layer]);
|
||||
Texture2D<u32> densities = G_UniformDeref2D<u32>(frame.particle_densities[layer]);
|
||||
Texture2D<u32> cells = G_Deref(frame.particle_cells[layer], Texture2D<u32>);
|
||||
Texture2D<u32> densities = G_Deref(frame.particle_densities[layer], Texture2D<u32>);
|
||||
u32 packed = cells[cell_pos];
|
||||
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
|
||||
if (particle_kind != V_ParticleKind_None)
|
||||
@ -1104,14 +1104,14 @@ ComputeShader(V_BloomDownCS)
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
b32 is_first_pass = mip_idx == 1;
|
||||
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
RWTexture2D<Vec4> bloom_down = G_UniformDerefRW2D<Vec4>(frame.bloom_chain, mip_idx - 1);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
|
||||
RWTexture2D<Vec4> bloom_down = G_Deref(frame.bloom_chain, RWTexture2D<Vec4>, mip_idx - 1);
|
||||
|
||||
Texture2D<Vec4> bloom_up = (
|
||||
is_first_pass ?
|
||||
G_UniformDeref2D<Vec4>(frame.screen) :
|
||||
G_UniformDeref2D<Vec4>(frame.bloom_chain, mip_idx - 2)
|
||||
G_Deref(frame.screen, Texture2D<Vec4>) :
|
||||
G_Deref(frame.bloom_chain, Texture2D<Vec4>, mip_idx - 1)
|
||||
);
|
||||
|
||||
Vec2 down_dims = G_Count2D(bloom_down);
|
||||
@ -1173,20 +1173,16 @@ ComputeShader(V_BloomUpCS)
|
||||
{
|
||||
i32 mip_idx = V_GpuConst_MipIdx;
|
||||
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
Texture2D<Vec4> bloom_down = G_UniformDeref2D<Vec4>(frame.bloom_chain, mip_idx);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
|
||||
Texture2D<Vec4> bloom_down = G_Deref(frame.bloom_chain, Texture2D<Vec4>, mip_idx);
|
||||
|
||||
b32 is_last_pass = mip_idx == 0;
|
||||
RWTexture2D<Vec4> bloom_up;
|
||||
if (is_last_pass)
|
||||
{
|
||||
bloom_up = G_UniformDerefRW2D<Vec4>(frame.screen);
|
||||
}
|
||||
else
|
||||
{
|
||||
bloom_up = G_UniformDerefRW2D<Vec4>(frame.bloom_chain, mip_idx - 1);
|
||||
}
|
||||
RWTexture2D<Vec4> bloom_up = (
|
||||
is_last_pass ?
|
||||
G_Deref(frame.screen, RWTexture2D<Vec4>) :
|
||||
G_Deref(frame.bloom_chain, RWTexture2D<Vec4>, mip_idx - 1)
|
||||
);
|
||||
|
||||
Vec2 down_dims = G_Count2D(bloom_down);
|
||||
Vec2 up_dims = G_Count2D(bloom_up);
|
||||
@ -1238,10 +1234,10 @@ ComputeShader(V_BloomUpCS)
|
||||
|
||||
ComputeShader(V_FinalizeCS)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
SamplerState bilinear_sampler = G_UniformDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||
Texture2D<Vec4> bloom_tex = G_UniformDeref2D<Vec4>(frame.bloom_chain, 0);
|
||||
RWTexture2D<Vec4> screen_tex = G_UniformDerefRW2D<Vec4>(frame.screen);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
SamplerState bilinear_sampler = G_Deref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp], SamplerState);
|
||||
Texture2D<Vec4> bloom_tex = G_Deref(frame.bloom_chain, Texture2D<Vec4>);
|
||||
RWTexture2D<Vec4> screen_tex = G_Deref(frame.screen, RWTexture2D<Vec4>);
|
||||
|
||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
||||
@ -1271,8 +1267,8 @@ ComputeShader(V_FinalizeCS)
|
||||
|
||||
VertexShader(V_DVertVS, V_DVertPSInput)
|
||||
{
|
||||
V_SharedFrame frame = G_UniformDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||
StructuredBuffer<V_DVert> verts = G_UniformDeref<V_DVert>(frame.dverts);
|
||||
V_SharedFrame frame = G_Deref(V_GpuConst_Frame, StructuredBuffer<V_SharedFrame>)[0];
|
||||
StructuredBuffer<V_DVert> verts = G_Deref(frame.dverts, StructuredBuffer<V_DVert>);
|
||||
|
||||
V_DVert vert = verts[SV_VertexID];
|
||||
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
|
||||
ComputeShader(PT_TestCS)
|
||||
{
|
||||
PT_SharedFrame frame = G_UniformDeref<PT_SharedFrame>(PT_ShaderConst_Frame)[0];
|
||||
RWTexture2D<Vec4> target_tex = G_UniformDerefRW2D<Vec4>(frame.compute_target);
|
||||
PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer<PT_SharedFrame>)[0];
|
||||
RWTexture2D<Vec4> target_tex = G_Deref(frame.compute_target, RWTexture2D<Vec4>);
|
||||
|
||||
Vec2U32 target_tex_size = G_Count2D(target_tex);
|
||||
|
||||
@ -35,10 +35,10 @@ VertexShader(PT_BlitVS, PT_BlitPSInput)
|
||||
|
||||
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
|
||||
{
|
||||
PT_SharedFrame frame = G_UniformDeref<PT_SharedFrame>(PT_ShaderConst_Frame)[0];
|
||||
SamplerState sampler = G_UniformDeref(frame.sampler);
|
||||
Texture2D<Vec4> src = G_UniformDeref2D<Vec4>(frame.compute_target);
|
||||
Texture3D<u32> noise = G_UniformDeref3D<u32>(frame.noise_tex);
|
||||
PT_SharedFrame frame = G_Deref(PT_ShaderConst_Frame, StructuredBuffer<PT_SharedFrame>)[0];
|
||||
SamplerState sampler = G_Deref(frame.sampler, SamplerState);
|
||||
Texture2D<Vec4> src = G_Deref(frame.compute_target, Texture2D<Vec4>);
|
||||
Texture3D<u32> noise = G_Deref(frame.noise_tex, Texture3D<u32>);
|
||||
|
||||
Vec2 uv = input.src_uv;
|
||||
Vec4 tex_col = src.Sample(sampler, uv);
|
||||
|
||||
@ -6,8 +6,8 @@
|
||||
|
||||
VertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||
{
|
||||
UI_GpuParams params = G_UniformDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||
StructuredBuffer<UI_GpuRect> rects = G_UniformDeref<UI_GpuRect>(params.rects);
|
||||
UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer<UI_GpuParams>)[0];
|
||||
StructuredBuffer<UI_GpuRect> rects = G_Deref(params.rects, StructuredBuffer<UI_GpuRect>);
|
||||
|
||||
UI_GpuRect rect = rects[SV_InstanceID];
|
||||
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
|
||||
@ -33,8 +33,8 @@ VertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||
|
||||
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
||||
{
|
||||
UI_GpuParams params = G_UniformDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||
SamplerState sampler = G_UniformDeref(params.sampler);
|
||||
UI_GpuParams params = G_Deref(UI_GpuConst_Params, StructuredBuffer<UI_GpuParams>)[0];
|
||||
SamplerState sampler = G_Deref(params.sampler, SamplerState);
|
||||
|
||||
UI_GpuRect rect = input.rect;
|
||||
Vec2 rect_uv = input.rect_uv;
|
||||
@ -73,7 +73,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
||||
}
|
||||
else
|
||||
{
|
||||
Texture2D<Vec4> tex = G_DynamicDeref2D<Vec4>(rect.tex);
|
||||
Texture2D<Vec4> tex = G_Deref(rect.tex, Texture2D<Vec4>);
|
||||
background_premul = tex.SampleLevel(sampler, input.tex_uv, 0);
|
||||
background_premul.rgb *= background_premul.a;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user