explicit uniform/non-uniform resource dereference
This commit is contained in:
parent
9dbe56d193
commit
8e9f7b4945
@ -149,7 +149,7 @@ Inline f64 Norm53(u64 v)
|
|||||||
//~ Comparison
|
//~ Comparison
|
||||||
|
|
||||||
#define MatchFloor(a, b) all(floor(a) == floor(b))
|
#define MatchFloor(a, b) all(floor(a) == floor(b))
|
||||||
#define IsInside(pos, dims) (all(pos >= 0) && all(pos <= (dims)))
|
#define IsInside(pos, dims) all(and((pos) >= 0, (pos) < (dims)))
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Rotation
|
//~ Rotation
|
||||||
|
|||||||
@ -83,24 +83,31 @@ Enum(G_BasicSamplerKind)
|
|||||||
//~ Resource dereference
|
//~ Resource dereference
|
||||||
|
|
||||||
#if IsGpu
|
#if IsGpu
|
||||||
// TODO: Non-uniform resource access currently is assumed as the default
|
//- Scalar/Uniform dereference
|
||||||
// behavior. We may want to add explicit "uniform" variants for
|
SamplerState G_SDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[idx]; }
|
||||||
// optimization on AMD hardware in the future.
|
template<typename T> StructuredBuffer<T> G_SDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||||
|
ByteAddressBuffer G_SDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> Texture1D<T> G_SDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> Texture2D<T> G_SDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> Texture3D<T> G_SDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> RWStructuredBuffer<T> G_SDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||||
|
RWByteAddressBuffer G_SDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> RWTexture1D<T> G_SDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> RWTexture2D<T> G_SDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||||
|
template<typename T> RWTexture3D<T> G_SDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[idx]; }
|
||||||
|
|
||||||
template<typename T> StructuredBuffer<T> G_Dereference(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
//- Vector/Non-Uniform dereference (slower on AMD)
|
||||||
template<typename T> RWStructuredBuffer<T> G_DereferenceRW(G_StructuredBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
SamplerState G_VDeref(G_SamplerStateRef r) { u32 idx = r.v; return SamplerDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
|
template<typename T> StructuredBuffer<T> G_VDeref(G_StructuredBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
ByteAddressBuffer G_Dereference(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
ByteAddressBuffer G_VDeref(G_ByteAddressBufferRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
RWByteAddressBuffer G_DereferenceRW(G_ByteAddressBufferRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
template<typename T> Texture1D<T> G_VDeref(G_Texture1DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
|
template<typename T> Texture2D<T> G_VDeref(G_Texture2DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> Texture1D<T> G_Dereference(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
template<typename T> Texture3D<T> G_VDeref(G_Texture3DRef r) { u32 idx = r.v; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> Texture2D<T> G_Dereference(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
template<typename T> RWStructuredBuffer<T> G_VDerefRW(G_StructuredBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> Texture3D<T> G_Dereference(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
RWByteAddressBuffer G_VDerefRW(G_ByteAddressBufferRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> RWTexture1D<T> G_DereferenceRW(G_Texture1DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
template<typename T> RWTexture1D<T> G_VDerefRW(G_Texture1DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> RWTexture2D<T> G_DereferenceRW(G_Texture2DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
template<typename T> RWTexture2D<T> G_VDerefRW(G_Texture2DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
template<typename T> RWTexture3D<T> G_DereferenceRW(G_Texture3DRef r) { return ResourceDescriptorHeap[NonUniformResourceIndex(r.v + 1)]; }
|
template<typename T> RWTexture3D<T> G_VDerefRW(G_Texture3DRef r) { u32 idx = r.v + 1; return ResourceDescriptorHeap[NonUniformResourceIndex(idx)]; }
|
||||||
|
|
||||||
SamplerState G_Dereference(G_SamplerStateRef r) { return SamplerDescriptorHeap[NonUniformResourceIndex(r.v)]; }
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -216,7 +223,7 @@ Struct(G_FmtArg)
|
|||||||
|
|
||||||
void G_CommitPrint(G_TempPrintBuffer buff)
|
void G_CommitPrint(G_TempPrintBuffer buff)
|
||||||
{
|
{
|
||||||
RWByteAddressBuffer rw = G_DereferenceRW(G_ShaderConst_PrintBufferRef);
|
RWByteAddressBuffer rw = G_SDerefRW(G_ShaderConst_PrintBufferRef);
|
||||||
|
|
||||||
if (buff.overflowed)
|
if (buff.overflowed)
|
||||||
{
|
{
|
||||||
|
|||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
f32 V_RandFromPos(Vec3 pos)
|
f32 V_RandFromPos(Vec3 pos)
|
||||||
{
|
{
|
||||||
Texture3D<u32> noise3d = G_Dereference<u32>(V_GpuConst_NoiseTex);
|
Texture3D<u32> noise3d = G_SDeref<u32>(V_GpuConst_NoiseTex);
|
||||||
// TODO: Compile-time noise dims
|
// TODO: Compile-time noise dims
|
||||||
u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)];
|
u32 noise = noise3d[(Vec3U32)pos % countof(noise3d)];
|
||||||
f32 rand = Norm16(noise);
|
f32 rand = Norm16(noise);
|
||||||
@ -58,8 +58,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
|
|||||||
//- Prepare shade
|
//- Prepare shade
|
||||||
ImplComputeShader2D(V_PrepareShadeCS)
|
ImplComputeShader2D(V_PrepareShadeCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
RWTexture2D<Vec4> shade = G_DereferenceRW<Vec4>(frame.shade);
|
RWTexture2D<Vec4> shade = G_SDerefRW<Vec4>(frame.shade);
|
||||||
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
||||||
if (all(shade_pos < countof(shade)))
|
if (all(shade_pos < countof(shade)))
|
||||||
{
|
{
|
||||||
@ -71,12 +71,12 @@ ImplComputeShader2D(V_PrepareShadeCS)
|
|||||||
//- Prepare cells
|
//- Prepare cells
|
||||||
ImplComputeShader2D(V_PrepareCellsCS)
|
ImplComputeShader2D(V_PrepareCellsCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
Texture2D<P_TileKind> tiles = G_SDeref<P_TileKind>(frame.tiles);
|
||||||
RWTexture2D<Vec4> stains = G_DereferenceRW<Vec4>(frame.stains);
|
RWTexture2D<Vec4> stains = G_SDerefRW<Vec4>(frame.stains);
|
||||||
RWTexture2D<Vec4> dry_stains = G_DereferenceRW<Vec4>(frame.dry_stains);
|
RWTexture2D<Vec4> dry_stains = G_SDerefRW<Vec4>(frame.dry_stains);
|
||||||
RWTexture2D<f32> drynesses = G_DereferenceRW<f32>(frame.drynesses);
|
RWTexture2D<f32> drynesses = G_SDerefRW<f32>(frame.drynesses);
|
||||||
RWTexture2D<u32> occluders = G_DereferenceRW<u32>(frame.occluders);
|
RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders);
|
||||||
|
|
||||||
Vec2 cell_pos = SV_DispatchThreadID + 0.5;
|
Vec2 cell_pos = SV_DispatchThreadID + 0.5;
|
||||||
if (all(cell_pos < P_WorldCellsDims))
|
if (all(cell_pos < P_WorldCellsDims))
|
||||||
@ -102,8 +102,8 @@ ImplComputeShader2D(V_PrepareCellsCS)
|
|||||||
Vec4 over_dry_stain = 0;
|
Vec4 over_dry_stain = 0;
|
||||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
||||||
{
|
{
|
||||||
RWTexture2D<u32> cells = G_DereferenceRW<u32>(frame.particle_cells[layer]);
|
RWTexture2D<u32> cells = G_VDerefRW<u32>(frame.particle_cells[layer]);
|
||||||
RWTexture2D<u32> densities = G_DereferenceRW<u32>(frame.particle_densities[layer]);
|
RWTexture2D<u32> densities = G_VDerefRW<u32>(frame.particle_densities[layer]);
|
||||||
u32 packed = cells[cell_pos];
|
u32 packed = cells[cell_pos];
|
||||||
if (packed & (1 << 31))
|
if (packed & (1 << 31))
|
||||||
{
|
{
|
||||||
@ -160,8 +160,8 @@ ImplComputeShader2D(V_PrepareCellsCS)
|
|||||||
//- Clear particles
|
//- Clear particles
|
||||||
ImplComputeShader(V_ClearParticlesCS)
|
ImplComputeShader(V_ClearParticlesCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
|
RWStructuredBuffer<V_Particle> particles = G_SDerefRW<V_Particle>(frame.particles);
|
||||||
u32 particle_idx = SV_DispatchThreadID;
|
u32 particle_idx = SV_DispatchThreadID;
|
||||||
if (particle_idx < V_ParticlesCap)
|
if (particle_idx < V_ParticlesCap)
|
||||||
{
|
{
|
||||||
@ -180,20 +180,19 @@ ImplComputeShader2D(V_BackdropDownCS)
|
|||||||
i32 mip_idx = V_GpuConst_MipIdx;
|
i32 mip_idx = V_GpuConst_MipIdx;
|
||||||
b32 is_first_pass = mip_idx == 0;
|
b32 is_first_pass = mip_idx == 0;
|
||||||
|
|
||||||
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
|
||||||
|
|
||||||
Texture2D<Vec4> bd_up;
|
Texture2D<Vec4> bd_up;
|
||||||
if (is_first_pass)
|
if (is_first_pass)
|
||||||
{
|
{
|
||||||
bd_up = G_Dereference<Vec4>(frame.backdrop_src);
|
bd_up = G_SDeref<Vec4>(frame.backdrop_src);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bd_up = G_Dereference<Vec4>(frame.backdrop_mips[mip_idx - 1]);
|
bd_up = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx - 1]);
|
||||||
}
|
}
|
||||||
RWTexture2D<Vec4> bd_down = G_DereferenceRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
RWTexture2D<Vec4> bd_down = G_VDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
||||||
|
|
||||||
Vec2 down_dims = countof(bd_down);
|
Vec2 down_dims = countof(bd_down);
|
||||||
|
|
||||||
@ -233,7 +232,7 @@ ImplComputeShader2D(V_BackdropDownCS)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsInside(bd_pos, down_dims))
|
if (all(bd_pos < down_dims))
|
||||||
{
|
{
|
||||||
bd_down[bd_pos] = result;
|
bd_down[bd_pos] = result;
|
||||||
}
|
}
|
||||||
@ -246,10 +245,10 @@ ImplComputeShader2D(V_BackdropUpCS)
|
|||||||
{
|
{
|
||||||
i32 mip_idx = V_GpuConst_MipIdx;
|
i32 mip_idx = V_GpuConst_MipIdx;
|
||||||
|
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
Texture2D<Vec4> bd_down = G_Dereference<Vec4>(frame.backdrop_mips[mip_idx + 1]);
|
Texture2D<Vec4> bd_down = G_SDeref<Vec4>(frame.backdrop_mips[mip_idx + 1]);
|
||||||
RWTexture2D<Vec4> bd_up = G_DereferenceRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
RWTexture2D<Vec4> bd_up = G_SDerefRW<Vec4>(frame.backdrop_mips[mip_idx]);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearMirror]);
|
||||||
|
|
||||||
Vec2 down_dims = countof(bd_down);
|
Vec2 down_dims = countof(bd_down);
|
||||||
Vec2 up_dims = countof(bd_up);
|
Vec2 up_dims = countof(bd_up);
|
||||||
@ -290,7 +289,7 @@ ImplComputeShader2D(V_BackdropUpCS)
|
|||||||
) * 1.0f / 41.0f;
|
) * 1.0f / 41.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (IsInside(bd_pos, up_dims))
|
if (all(bd_pos < up_dims))
|
||||||
{
|
{
|
||||||
bd_up[bd_pos] = result;
|
bd_up[bd_pos] = result;
|
||||||
}
|
}
|
||||||
@ -304,8 +303,8 @@ ImplComputeShader2D(V_BackdropUpCS)
|
|||||||
|
|
||||||
ImplVertexShader(V_QuadVS, V_QuadPSInput)
|
ImplVertexShader(V_QuadVS, V_QuadPSInput)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
|
StructuredBuffer<V_Quad> quads = G_SDeref<V_Quad>(frame.quads);
|
||||||
|
|
||||||
V_Quad quad = quads[SV_InstanceID];
|
V_Quad quad = quads[SV_InstanceID];
|
||||||
|
|
||||||
@ -328,13 +327,13 @@ ImplVertexShader(V_QuadVS, V_QuadPSInput)
|
|||||||
|
|
||||||
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
|
StructuredBuffer<V_Quad> quads = G_SDeref<V_Quad>(frame.quads);
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||||
RWTexture2D<u32> occluders = G_DereferenceRW<u32>(frame.occluders);
|
RWTexture2D<u32> occluders = G_SDerefRW<u32>(frame.occluders);
|
||||||
|
|
||||||
V_Quad quad = quads[input.quad_idx];
|
V_Quad quad = quads[input.quad_idx];
|
||||||
Texture2D<Vec4> tex = G_Dereference<Vec4>(quad.tex);
|
Texture2D<Vec4> tex = G_VDeref<Vec4>(quad.tex);
|
||||||
|
|
||||||
Vec2 world_pos = input.world_pos;
|
Vec2 world_pos = input.world_pos;
|
||||||
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
Vec2 cell_pos = mul(frame.af.world_to_cell, Vec3(world_pos, 1));
|
||||||
@ -365,9 +364,9 @@ ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
|
|||||||
|
|
||||||
ImplComputeShader(V_EmitParticlesCS)
|
ImplComputeShader(V_EmitParticlesCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
|
StructuredBuffer<V_Emitter> emitters = G_SDeref<V_Emitter>(frame.emitters);
|
||||||
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
|
RWStructuredBuffer<V_Particle> particles = G_SDerefRW<V_Particle>(frame.particles);
|
||||||
|
|
||||||
u32 emitter_idx = SV_DispatchThreadID;
|
u32 emitter_idx = SV_DispatchThreadID;
|
||||||
if (emitter_idx < frame.emitters_count)
|
if (emitter_idx < frame.emitters_count)
|
||||||
@ -396,10 +395,10 @@ ImplComputeShader(V_EmitParticlesCS)
|
|||||||
|
|
||||||
ImplComputeShader(V_SimParticlesCS)
|
ImplComputeShader(V_SimParticlesCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
Texture2D<P_TileKind> tiles = G_SDeref<P_TileKind>(frame.tiles);
|
||||||
RWStructuredBuffer<V_Particle> particles = G_DereferenceRW<V_Particle>(frame.particles);
|
RWStructuredBuffer<V_Particle> particles = G_SDerefRW<V_Particle>(frame.particles);
|
||||||
Texture2D<u32> occluders = G_Dereference<u32>(frame.occluders);
|
Texture2D<u32> occluders = G_SDeref<u32>(frame.occluders);
|
||||||
|
|
||||||
u32 particle_idx = SV_DispatchThreadID;
|
u32 particle_idx = SV_DispatchThreadID;
|
||||||
if (particle_idx < V_ParticlesCap)
|
if (particle_idx < V_ParticlesCap)
|
||||||
@ -424,7 +423,7 @@ ImplComputeShader(V_SimParticlesCS)
|
|||||||
if (particle.kind < 0)
|
if (particle.kind < 0)
|
||||||
{
|
{
|
||||||
u32 emitter_idx = -particle.kind - 1;
|
u32 emitter_idx = -particle.kind - 1;
|
||||||
V_Emitter emitter = G_Dereference<V_Emitter>(frame.emitters)[emitter_idx];
|
V_Emitter emitter = G_SDeref<V_Emitter>(frame.emitters)[emitter_idx];
|
||||||
|
|
||||||
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
|
f32 initial_angle = lerp(emitter.angle.min, emitter.angle.max, rand_angle);
|
||||||
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
|
f32 initial_speed = lerp(emitter.speed.min, emitter.speed.max, rand_speed);
|
||||||
@ -439,8 +438,8 @@ ImplComputeShader(V_SimParticlesCS)
|
|||||||
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
|
if (particle.kind > V_ParticleKind_None && particle.kind < V_ParticleKind_COUNT && !prune)
|
||||||
{
|
{
|
||||||
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
|
V_ParticleDesc desc = V_DescFromParticleKind((V_ParticleKind)particle.kind);
|
||||||
RWTexture2D<u32> cells = G_DereferenceRW<u32>(frame.particle_cells[desc.layer]);
|
RWTexture2D<u32> cells = G_VDerefRW<u32>(frame.particle_cells[desc.layer]);
|
||||||
RWTexture2D<u32> densities = G_DereferenceRW<u32>(frame.particle_densities[desc.layer]);
|
RWTexture2D<u32> densities = G_VDerefRW<u32>(frame.particle_densities[desc.layer]);
|
||||||
|
|
||||||
u32 packed = 0;
|
u32 packed = 0;
|
||||||
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
|
packed |= (particle_idx & ((1 >> 24) - 1)) << 0;
|
||||||
@ -673,12 +672,12 @@ ImplComputeShader(V_SimParticlesCS)
|
|||||||
|
|
||||||
ImplComputeShader2D(V_ShadeCS)
|
ImplComputeShader2D(V_ShadeCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
Texture2D<P_TileKind> tiles = G_SDeref<P_TileKind>(frame.tiles);
|
||||||
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo);
|
Texture2D<Vec4> albedo_tex = G_SDeref<Vec4>(frame.albedo);
|
||||||
RWTexture2D<Vec4> shade_tex = G_DereferenceRW<Vec4>(frame.shade);
|
RWTexture2D<Vec4> shade_tex = G_SDerefRW<Vec4>(frame.shade);
|
||||||
Texture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
|
Texture2D<f32> drynesses = G_SDeref<f32>(frame.drynesses);
|
||||||
|
|
||||||
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
Vec2 shade_pos = SV_DispatchThreadID + 0.5;
|
||||||
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
|
Vec2 world_pos = mul(frame.af.shade_to_world, Vec3(shade_pos, 1));
|
||||||
@ -709,18 +708,18 @@ ImplComputeShader2D(V_ShadeCS)
|
|||||||
|
|
||||||
ImplComputeShader2D(V_CompositeCS)
|
ImplComputeShader2D(V_CompositeCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade);
|
// Texture2D<Vec4> shade_tex = G_SDeref<Vec4>(frame.shade);
|
||||||
SamplerState point_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
SamplerState point_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
|
||||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState bilinear_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
Texture2D<Vec4> albedo_tex = G_Dereference<Vec4>(frame.albedo);
|
Texture2D<Vec4> albedo_tex = G_SDeref<Vec4>(frame.albedo);
|
||||||
RWTexture2D<Vec4> screen_tex = G_DereferenceRW<Vec4>(frame.screen);
|
RWTexture2D<Vec4> screen_tex = G_SDerefRW<Vec4>(frame.screen);
|
||||||
Texture2D<Vec4> stains = G_Dereference<Vec4>(frame.stains);
|
Texture2D<Vec4> stains = G_SDeref<Vec4>(frame.stains);
|
||||||
Texture2D<Vec4> dry_stains = G_Dereference<Vec4>(frame.dry_stains);
|
Texture2D<Vec4> dry_stains = G_SDeref<Vec4>(frame.dry_stains);
|
||||||
Texture2D<f32> drynesses = G_Dereference<f32>(frame.drynesses);
|
Texture2D<f32> drynesses = G_SDeref<f32>(frame.drynesses);
|
||||||
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
|
Texture2D<P_TileKind> tiles = G_SDeref<P_TileKind>(frame.tiles);
|
||||||
Texture2D<Vec4> backdrop = G_Dereference<Vec4>(frame.backdrop_mips[0]);
|
Texture2D<Vec4> backdrop = G_SDeref<Vec4>(frame.backdrop_mips[0]);
|
||||||
StructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
|
StructuredBuffer<V_Particle> particles = G_SDeref<V_Particle>(frame.particles);
|
||||||
|
|
||||||
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
|
Vec2 screen_pos = SV_DispatchThreadID.xy + 0.5;
|
||||||
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
|
Vec2 world_pos = mul(frame.af.screen_to_world, Vec3(screen_pos, 1));
|
||||||
@ -828,7 +827,7 @@ ImplComputeShader2D(V_CompositeCS)
|
|||||||
else if (tile != P_TileKind_Empty)
|
else if (tile != P_TileKind_Empty)
|
||||||
{
|
{
|
||||||
V_TileDesc tile_desc = frame.tile_descs[tile];
|
V_TileDesc tile_desc = frame.tile_descs[tile];
|
||||||
Texture2D<Vec4> tile_tex = G_Dereference<Vec4>(tile_desc.tex);
|
Texture2D<Vec4> tile_tex = G_VDeref<Vec4>(tile_desc.tex);
|
||||||
Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001);
|
Vec2 samp_t = clamp(frac(world_pos), 0.00001, 1.0 - 0.00001);
|
||||||
Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t);
|
Vec2 samp_uv = lerp(tile_desc.tex_slice_uv.p0, tile_desc.tex_slice_uv.p1, samp_t);
|
||||||
tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0);
|
tile_color = tile_tex.SampleLevel(point_sampler, samp_uv, 0);
|
||||||
@ -857,8 +856,8 @@ ImplComputeShader2D(V_CompositeCS)
|
|||||||
|
|
||||||
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1)
|
||||||
{
|
{
|
||||||
Texture2D<u32> cells = G_Dereference<u32>(frame.particle_cells[layer]);
|
Texture2D<u32> cells = G_SDeref<u32>(frame.particle_cells[layer]);
|
||||||
Texture2D<u32> densities = G_Dereference<u32>(frame.particle_densities[layer]);
|
Texture2D<u32> densities = G_SDeref<u32>(frame.particle_densities[layer]);
|
||||||
u32 packed = cells[cell_pos];
|
u32 packed = cells[cell_pos];
|
||||||
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
|
V_ParticleKind particle_kind = (V_ParticleKind)((packed >> 24) & 0x7F);
|
||||||
if (particle_kind != V_ParticleKind_None)
|
if (particle_kind != V_ParticleKind_None)
|
||||||
@ -1112,19 +1111,19 @@ ImplComputeShader2D(V_BloomDownCS)
|
|||||||
{
|
{
|
||||||
i32 mip_idx = V_GpuConst_MipIdx;
|
i32 mip_idx = V_GpuConst_MipIdx;
|
||||||
|
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
RWTexture2D<Vec4> bloom_down = G_DereferenceRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
|
RWTexture2D<Vec4> bloom_down = G_SDerefRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
|
||||||
|
|
||||||
Texture2D<Vec4> bloom_up;
|
Texture2D<Vec4> bloom_up;
|
||||||
b32 is_first_pass = mip_idx == 1;
|
b32 is_first_pass = mip_idx == 1;
|
||||||
if (is_first_pass)
|
if (is_first_pass)
|
||||||
{
|
{
|
||||||
bloom_up = G_Dereference<Vec4>(frame.screen);
|
bloom_up = G_SDeref<Vec4>(frame.screen);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bloom_up = G_Dereference<Vec4>(frame.bloom_mips[mip_idx - 2]);
|
bloom_up = G_SDeref<Vec4>(frame.bloom_mips[mip_idx - 2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec2 down_dims = countof(bloom_down);
|
Vec2 down_dims = countof(bloom_down);
|
||||||
@ -1187,19 +1186,19 @@ ImplComputeShader2D(V_BloomUpCS)
|
|||||||
{
|
{
|
||||||
i32 mip_idx = V_GpuConst_MipIdx;
|
i32 mip_idx = V_GpuConst_MipIdx;
|
||||||
|
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
Texture2D<Vec4> bloom_down = G_Dereference<Vec4>(frame.bloom_mips[mip_idx]);
|
Texture2D<Vec4> bloom_down = G_SDeref<Vec4>(frame.bloom_mips[mip_idx]);
|
||||||
|
|
||||||
b32 is_last_pass = mip_idx == 0;
|
b32 is_last_pass = mip_idx == 0;
|
||||||
RWTexture2D<Vec4> bloom_up;
|
RWTexture2D<Vec4> bloom_up;
|
||||||
if (is_last_pass)
|
if (is_last_pass)
|
||||||
{
|
{
|
||||||
bloom_up = G_DereferenceRW<Vec4>(frame.screen);
|
bloom_up = G_SDerefRW<Vec4>(frame.screen);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
bloom_up = G_DereferenceRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
|
bloom_up = G_SDerefRW<Vec4>(frame.bloom_mips[mip_idx - 1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
Vec2 down_dims = countof(bloom_down);
|
Vec2 down_dims = countof(bloom_down);
|
||||||
@ -1252,10 +1251,10 @@ ImplComputeShader2D(V_BloomUpCS)
|
|||||||
|
|
||||||
ImplComputeShader2D(V_FinalizeCS)
|
ImplComputeShader2D(V_FinalizeCS)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
SamplerState bilinear_sampler = G_SDeref(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
|
||||||
Texture2D<Vec4> bloom_tex = G_Dereference<Vec4>(frame.bloom_mips[0]);
|
Texture2D<Vec4> bloom_tex = G_SDeref<Vec4>(frame.bloom_mips[0]);
|
||||||
RWTexture2D<Vec4> screen_tex = G_DereferenceRW<Vec4>(frame.screen);
|
RWTexture2D<Vec4> screen_tex = G_SDerefRW<Vec4>(frame.screen);
|
||||||
|
|
||||||
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
Vec2 screen_pos = SV_DispatchThreadID + 0.5;
|
||||||
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
b32 is_in_screen = IsInside(screen_pos, frame.screen_dims);
|
||||||
@ -1285,8 +1284,8 @@ ImplComputeShader2D(V_FinalizeCS)
|
|||||||
|
|
||||||
ImplVertexShader(V_DVertVS, V_DVertPSInput)
|
ImplVertexShader(V_DVertVS, V_DVertPSInput)
|
||||||
{
|
{
|
||||||
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
|
V_SharedFrame frame = G_SDeref<V_SharedFrame>(V_GpuConst_Frame)[0];
|
||||||
StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts);
|
StructuredBuffer<V_DVert> verts = G_SDeref<V_DVert>(frame.dverts);
|
||||||
|
|
||||||
V_DVert vert = verts[SV_VertexID];
|
V_DVert vert = verts[SV_VertexID];
|
||||||
|
|
||||||
|
|||||||
@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
||||||
{
|
{
|
||||||
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
|
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||||
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
|
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
|
||||||
UI_GpuRect rect = rects[SV_InstanceID];
|
UI_GpuRect rect = rects[SV_InstanceID];
|
||||||
|
|
||||||
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
|
Vec2 rect_uv = RectUvFromIdx(SV_VertexID);
|
||||||
@ -35,9 +35,9 @@ ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
|
|||||||
|
|
||||||
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
||||||
{
|
{
|
||||||
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
|
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||||
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
|
StructuredBuffer<UI_GpuRect> rects = G_SDeref<UI_GpuRect>(params.rects);
|
||||||
SamplerState sampler = G_Dereference(params.sampler);
|
SamplerState sampler = G_SDeref(params.sampler);
|
||||||
|
|
||||||
UI_GpuRect rect = rects[input.rect_idx];
|
UI_GpuRect rect = rects[input.rect_idx];
|
||||||
|
|
||||||
@ -77,7 +77,7 @@ ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Texture2D<Vec4> tex = G_Dereference<Vec4>(rect.tex);
|
Texture2D<Vec4> tex = G_VDeref<Vec4>(rect.tex);
|
||||||
background_premul = tex.SampleLevel(sampler, input.tex_uv, 0);
|
background_premul = tex.SampleLevel(sampler, input.tex_uv, 0);
|
||||||
background_premul.rgb *= background_premul.a;
|
background_premul.rgb *= background_premul.a;
|
||||||
}
|
}
|
||||||
@ -135,9 +135,9 @@ ImplVertexShader(UI_BlitVS, UI_BlitPSInput)
|
|||||||
|
|
||||||
ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
|
ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
|
||||||
{
|
{
|
||||||
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
|
UI_GpuParams params = G_SDeref<UI_GpuParams>(UI_GpuConst_Params)[0];
|
||||||
Texture2D<Vec4> tex = G_Dereference<Vec4>(params.target_ro);
|
Texture2D<Vec4> tex = G_SDeref<Vec4>(params.target_ro);
|
||||||
SamplerState sampler = G_Dereference(params.sampler);
|
SamplerState sampler = G_SDeref(params.sampler);
|
||||||
|
|
||||||
Vec2 uv = input.src_uv;
|
Vec2 uv = input.src_uv;
|
||||||
Vec4 result = tex.Sample(sampler, uv);
|
Vec4 result = tex.Sample(sampler, uv);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user