specify compute group size only in declaration

This commit is contained in:
jacob 2026-02-19 17:07:27 -06:00
parent 8a87ec2f6b
commit 65ae383d75
11 changed files with 87 additions and 64 deletions

View File

@ -727,11 +727,16 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
Struct(ComputeShader) { ResourceKey resource; };
#elif IsGpu
#define Semantic(t, n) t n : n
#define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID))
#define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define ComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))
#define VertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define DeclComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID))
#define DeclComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define DeclComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))
#define DeclVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
#define DeclPixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ImplComputeShader(name) void name(Semantic(u32, SV_DispatchThreadID))
#define ImplComputeShader2D(name) void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define ImplComputeShader3D(name) void name(Semantic(Vec3U32, SV_DispatchThreadID))
#define ImplVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
#define ImplPixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#endif
////////////////////////////////////////////////////////////

View File

@ -44,7 +44,7 @@ Struct(G_SamplerStateRef) { u32 v; };
#if IsCpu
#define G_ForceDeclConstant(type, name, slot) \
Enum(name##__shaderconstantenum) { name = slot }; \
enum { name = slot }; \
Struct(name##__shaderconstanttype) { type v; }
#define G_DeclConstant(type, name, slot) \
StaticAssert(sizeof(type) <= 4); \

View File

@ -56,7 +56,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
////////////////////////////////////////////////////////////
//~ Prepare frame
ComputeShader2D(V_PrepareShadeCS, 16, 16)
//- Prepare shade
ImplComputeShader2D(V_PrepareShadeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
@ -69,7 +70,7 @@ ComputeShader2D(V_PrepareShadeCS, 16, 16)
}
//- Prepare cells
ComputeShader2D(V_PrepareCellsCS, 16, 16)
ImplComputeShader2D(V_PrepareCellsCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
@ -158,7 +159,7 @@ ComputeShader2D(V_PrepareCellsCS, 16, 16)
}
//- Clear particles
ComputeShader(V_ClearParticlesCS, 256)
ImplComputeShader(V_ClearParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
@ -175,7 +176,7 @@ ComputeShader(V_ClearParticlesCS, 256)
//////////////////////////////
//- Vertex shader
VertexShader(V_QuadVS, V_QuadPSInput)
ImplVertexShader(V_QuadVS, V_QuadPSInput)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
@ -199,7 +200,7 @@ VertexShader(V_QuadVS, V_QuadPSInput)
//////////////////////////////
//- Pixel shader
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
@ -236,7 +237,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
//////////////////////////////
//- Particle emitter shader
ComputeShader(V_EmitParticlesCS, 256)
ImplComputeShader(V_EmitParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
@ -267,7 +268,7 @@ ComputeShader(V_EmitParticlesCS, 256)
//////////////////////////////
//- Particle sim shader
ComputeShader(V_SimParticlesCS, 256)
ImplComputeShader(V_SimParticlesCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
@ -544,7 +545,7 @@ ComputeShader(V_SimParticlesCS, 256)
// TODO: Remove this
ComputeShader2D(V_ShadeCS, 16, 16)
ImplComputeShader2D(V_ShadeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
@ -580,7 +581,7 @@ ComputeShader2D(V_ShadeCS, 16, 16)
////////////////////////////////////////////////////////////
//~ Composite
ComputeShader2D(V_CompositeCS, 16, 16)
ImplComputeShader2D(V_CompositeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
@ -959,7 +960,7 @@ ComputeShader2D(V_CompositeCS, 16, 16)
//////////////////////////////
//- Downsample
ComputeShader2D(V_BloomDownCS, 16, 16)
ImplComputeShader2D(V_BloomDownCS)
{
i32 mips_count = V_GpuConst_MipsCount;
i32 mip_idx = V_GpuConst_MipIdx;
@ -1035,7 +1036,7 @@ ComputeShader2D(V_BloomDownCS, 16, 16)
//////////////////////////////
//- Upsample
ComputeShader2D(V_BloomUpCS, 16, 16)
ImplComputeShader2D(V_BloomUpCS)
{
i32 mips_count = V_GpuConst_MipsCount;
i32 mip_idx = V_GpuConst_MipIdx;
@ -1103,7 +1104,7 @@ ComputeShader2D(V_BloomUpCS, 16, 16)
////////////////////////////////////////////////////////////
//~ Finalize
ComputeShader2D(V_FinalizeCS, 16, 16)
ImplComputeShader2D(V_FinalizeCS)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
@ -1136,7 +1137,7 @@ ComputeShader2D(V_FinalizeCS, 16, 16)
//////////////////////////////
//- Vertex shader
VertexShader(V_DVertVS, V_DVertPSInput)
ImplVertexShader(V_DVertVS, V_DVertPSInput)
{
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts);
@ -1154,7 +1155,7 @@ VertexShader(V_DVertVS, V_DVertPSInput)
//////////////////////////////
//- Pixel shader
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input)
ImplPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input)
{
V_DVertPSOutput output;
output.sv_target0 = input.color_lin;

View File

@ -50,31 +50,32 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
////////////////////////////////////////////////////////////
//~ Shaders
//- Utility shaders
ComputeShader2D(V_PrepareCellsCS, 16, 16);
ComputeShader(V_ClearParticlesCS, 256);
//- Prepare frame
DeclComputeShader2D(V_PrepareShadeCS, 16, 16);
DeclComputeShader2D(V_PrepareCellsCS, 16, 16);
DeclComputeShader(V_ClearParticlesCS, 256);
//- Quads
VertexShader(V_QuadVS, V_QuadPSInput);
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input);
DeclVertexShader(V_QuadVS, V_QuadPSInput);
DeclPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input);
//- Particle simulation
ComputeShader(V_EmitParticlesCS, 256);
ComputeShader(V_SimParticlesCS, 256);
DeclComputeShader(V_EmitParticlesCS, 256);
DeclComputeShader(V_SimParticlesCS, 256);
//- Shade
ComputeShader2D(V_ShadeCS, 16, 16);
DeclComputeShader2D(V_ShadeCS, 16, 16);
//- Composite
ComputeShader2D(V_CompositeCS, 16, 16);
DeclComputeShader2D(V_CompositeCS, 16, 16);
//- Bloom
ComputeShader2D(V_BloomDownCS, 16, 16);
ComputeShader2D(V_BloomUpCS, 16, 16);
DeclComputeShader2D(V_BloomDownCS, 16, 16);
DeclComputeShader2D(V_BloomUpCS, 16, 16);
//- Finalize
ComputeShader2D(V_FinalizeCS, 16, 16);
DeclComputeShader2D(V_FinalizeCS, 16, 16);
//- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput);
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input);
DeclVertexShader(V_DVertVS, V_DVertPSInput);
DeclPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input);

View File

@ -36,7 +36,7 @@ void PT_RunForever(WaveLaneCtx *lane)
{
G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle);
G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicPointClampSampler());
G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp));
G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle);
G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture());
}

5
src/proto/proto.lay generated
View File

@ -16,8 +16,8 @@
//////////////////////////////
//- Api
@IncludeC proto_shaders.cgh
@IncludeG proto_shaders.cgh
@IncludeC proto_shared.cgh
@IncludeG proto_shared.cgh
@Bootstrap PT_Bootstrap
@ -26,4 +26,5 @@
@IncludeC proto.c
@IncludeG proto_shaders.gh
@IncludeG proto_shaders.g

View File

@ -1,12 +1,7 @@
////////////////////////////////////////////////////////////
//~ Test shader
Struct(TestStruct)
{
i32 i;
};
ComputeShader2D(PT_TestCS, 8, 8)
ImplComputeShader2D(PT_TestCS)
{
StructuredBuffer<TestStruct> sb = G_Dereference<TestStruct>(PT_ShaderConst_TestBuff);
@ -23,21 +18,10 @@ ComputeShader2D(PT_TestCS, 8, 8)
////////////////////////////////////////////////////////////
//~ Blit shader
Struct(PT_BlitPSInput)
{
Semantic(Vec4, sv_position);
Semantic(Vec2, src_uv);
};
Struct(PT_BlitPSOutput)
{
Semantic(Vec4, sv_target0);
};
//////////////////////////////
//- Vertex shader
VertexShader(PT_BlitVS, PT_BlitPSInput)
ImplVertexShader(PT_BlitVS, PT_BlitPSInput)
{
Vec2 uv = RectUvFromIdx(SV_VertexID);
PT_BlitPSInput result;
@ -49,7 +33,7 @@ VertexShader(PT_BlitVS, PT_BlitPSInput)
//////////////////////////////
//- Pixel shader
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
ImplPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
{
SamplerState sampler = G_Dereference(PT_ShaderConst_BlitSampler);
Texture2D<Vec4> tex = G_Dereference<Vec4>(PT_ShaderConst_BlitSrc);

View File

@ -0,0 +1,31 @@
////////////////////////////////////////////////////////////
//~ Test shader types
Struct(TestStruct)
{
i32 i;
};
////////////////////////////////////////////////////////////
//~ Blit shader types
Struct(PT_BlitPSInput)
{
Semantic(Vec4, sv_position);
Semantic(Vec2, src_uv);
};
Struct(PT_BlitPSOutput)
{
Semantic(Vec4, sv_target0);
};
////////////////////////////////////////////////////////////
//~ Shaders
//- Test
DeclComputeShader2D(PT_TestCS, 8, 8);
//- Blit
DeclVertexShader(PT_BlitVS, PT_BlitPSInput);
DeclPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input);

View File

@ -4,7 +4,7 @@
//////////////////////////////
//- Vertex shader
VertexShader(UI_DRectVS, UI_DRectPSInput)
ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
@ -33,7 +33,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput)
//////////////////////////////
//- Pixel shader
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
@ -120,7 +120,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
//////////////////////////////
//- Vertex shader
VertexShader(UI_BlitVS, UI_BlitPSInput)
ImplVertexShader(UI_BlitVS, UI_BlitPSInput)
{
Vec2 uv = RectUvFromIdx(SV_VertexID);
UI_BlitPSInput result;
@ -133,7 +133,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput)
//////////////////////////////
//- Pixel shader
PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
{
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
Texture2D<Vec4> tex = G_Dereference<Vec4>(params.target_ro);

View File

@ -36,9 +36,9 @@ Struct(UI_BlitPSOutput)
//~ Shaders
//- Rects
VertexShader(UI_DRectVS, UI_DRectPSInput);
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input);
DeclVertexShader(UI_DRectVS, UI_DRectPSInput);
DeclPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input);
//- Blit
VertexShader(UI_BlitVS, UI_BlitPSInput);
PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input);
DeclVertexShader(UI_BlitVS, UI_BlitPSInput);
DeclPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input);