specify compute group size only in declaration

This commit is contained in:
jacob 2026-02-19 17:07:27 -06:00
parent 8a87ec2f6b
commit 65ae383d75
11 changed files with 87 additions and 64 deletions

View File

@ -727,11 +727,16 @@ Inline b32 MatchU128(u128 a, u128 b) { return a.lo == b.lo && a.hi == b.hi; }
Struct(ComputeShader) { ResourceKey resource; }; Struct(ComputeShader) { ResourceKey resource; };
#elif IsGpu #elif IsGpu
#define Semantic(t, n) t n : n #define Semantic(t, n) t n : n
#define ComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID)) #define DeclComputeShader(name, x) [numthreads(x, 1, 1)] void name(Semantic(u32, SV_DispatchThreadID))
#define ComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID)) #define DeclComputeShader2D(name, x, y) [numthreads(x, y, 1)] void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define ComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) #define DeclComputeShader3D(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))
#define VertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID)) #define DeclVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
#define PixelShader(name, return_type, ...) return_type name(__VA_ARGS__) #define DeclPixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#define ImplComputeShader(name) void name(Semantic(u32, SV_DispatchThreadID))
#define ImplComputeShader2D(name) void name(Semantic(Vec2U32, SV_DispatchThreadID))
#define ImplComputeShader3D(name) void name(Semantic(Vec3U32, SV_DispatchThreadID))
#define ImplVertexShader(name, return_type) return_type name(Semantic(u32, SV_InstanceID), Semantic(u32, SV_VertexID))
#define ImplPixelShader(name, return_type, ...) return_type name(__VA_ARGS__)
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////

View File

@ -44,7 +44,7 @@ Struct(G_SamplerStateRef) { u32 v; };
#if IsCpu #if IsCpu
#define G_ForceDeclConstant(type, name, slot) \ #define G_ForceDeclConstant(type, name, slot) \
Enum(name##__shaderconstantenum) { name = slot }; \ enum { name = slot }; \
Struct(name##__shaderconstanttype) { type v; } Struct(name##__shaderconstanttype) { type v; }
#define G_DeclConstant(type, name, slot) \ #define G_DeclConstant(type, name, slot) \
StaticAssert(sizeof(type) <= 4); \ StaticAssert(sizeof(type) <= 4); \

View File

@ -56,7 +56,8 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Prepare frame //~ Prepare frame
ComputeShader2D(V_PrepareShadeCS, 16, 16) //- Prepare shade
ImplComputeShader2D(V_PrepareShadeCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw); RWTexture2D<Vec4> shade = G_Dereference<Vec4>(frame.shade_rw);
@ -69,7 +70,7 @@ ComputeShader2D(V_PrepareShadeCS, 16, 16)
} }
//- Prepare cells //- Prepare cells
ComputeShader2D(V_PrepareCellsCS, 16, 16) ImplComputeShader2D(V_PrepareCellsCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
@ -158,7 +159,7 @@ ComputeShader2D(V_PrepareCellsCS, 16, 16)
} }
//- Clear particles //- Clear particles
ComputeShader(V_ClearParticlesCS, 256) ImplComputeShader(V_ClearParticlesCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles); RWStructuredBuffer<V_Particle> particles = G_Dereference<V_Particle>(frame.particles);
@ -175,7 +176,7 @@ ComputeShader(V_ClearParticlesCS, 256)
////////////////////////////// //////////////////////////////
//- Vertex shader //- Vertex shader
VertexShader(V_QuadVS, V_QuadPSInput) ImplVertexShader(V_QuadVS, V_QuadPSInput)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads); StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
@ -199,7 +200,7 @@ VertexShader(V_QuadVS, V_QuadPSInput)
////////////////////////////// //////////////////////////////
//- Pixel shader //- Pixel shader
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input) ImplPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads); StructuredBuffer<V_Quad> quads = G_Dereference<V_Quad>(frame.quads);
@ -236,7 +237,7 @@ PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input)
////////////////////////////// //////////////////////////////
//- Particle emitter shader //- Particle emitter shader
ComputeShader(V_EmitParticlesCS, 256) ImplComputeShader(V_EmitParticlesCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters); StructuredBuffer<V_Emitter> emitters = G_Dereference<V_Emitter>(frame.emitters);
@ -267,7 +268,7 @@ ComputeShader(V_EmitParticlesCS, 256)
////////////////////////////// //////////////////////////////
//- Particle sim shader //- Particle sim shader
ComputeShader(V_SimParticlesCS, 256) ImplComputeShader(V_SimParticlesCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles); Texture2D<P_TileKind> tiles = G_Dereference<P_TileKind>(frame.tiles);
@ -544,7 +545,7 @@ ComputeShader(V_SimParticlesCS, 256)
// TODO: Remove this // TODO: Remove this
ComputeShader2D(V_ShadeCS, 16, 16) ImplComputeShader2D(V_ShadeCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]); SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_PointClamp]);
@ -580,7 +581,7 @@ ComputeShader2D(V_ShadeCS, 16, 16)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Composite //~ Composite
ComputeShader2D(V_CompositeCS, 16, 16) ImplComputeShader2D(V_CompositeCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
// Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro); // Texture2D<Vec4> shade_tex = G_Dereference<Vec4>(frame.shade_ro);
@ -959,7 +960,7 @@ ComputeShader2D(V_CompositeCS, 16, 16)
////////////////////////////// //////////////////////////////
//- Downsample //- Downsample
ComputeShader2D(V_BloomDownCS, 16, 16) ImplComputeShader2D(V_BloomDownCS)
{ {
i32 mips_count = V_GpuConst_MipsCount; i32 mips_count = V_GpuConst_MipsCount;
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
@ -1035,7 +1036,7 @@ ComputeShader2D(V_BloomDownCS, 16, 16)
////////////////////////////// //////////////////////////////
//- Upsample //- Upsample
ComputeShader2D(V_BloomUpCS, 16, 16) ImplComputeShader2D(V_BloomUpCS)
{ {
i32 mips_count = V_GpuConst_MipsCount; i32 mips_count = V_GpuConst_MipsCount;
i32 mip_idx = V_GpuConst_MipIdx; i32 mip_idx = V_GpuConst_MipIdx;
@ -1103,7 +1104,7 @@ ComputeShader2D(V_BloomUpCS, 16, 16)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Finalize //~ Finalize
ComputeShader2D(V_FinalizeCS, 16, 16) ImplComputeShader2D(V_FinalizeCS)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]);
@ -1136,7 +1137,7 @@ ComputeShader2D(V_FinalizeCS, 16, 16)
////////////////////////////// //////////////////////////////
//- Vertex shader //- Vertex shader
VertexShader(V_DVertVS, V_DVertPSInput) ImplVertexShader(V_DVertVS, V_DVertPSInput)
{ {
V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0]; V_SharedFrame frame = G_Dereference<V_SharedFrame>(V_GpuConst_Frame)[0];
StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts); StructuredBuffer<V_DVert> verts = G_Dereference<V_DVert>(frame.dverts);
@ -1154,7 +1155,7 @@ VertexShader(V_DVertVS, V_DVertPSInput)
////////////////////////////// //////////////////////////////
//- Pixel shader //- Pixel shader
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input) ImplPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input)
{ {
V_DVertPSOutput output; V_DVertPSOutput output;
output.sv_target0 = input.color_lin; output.sv_target0 = input.color_lin;

View File

@ -50,31 +50,32 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shaders //~ Shaders
//- Utility shaders //- Prepare frame
ComputeShader2D(V_PrepareCellsCS, 16, 16); DeclComputeShader2D(V_PrepareShadeCS, 16, 16);
ComputeShader(V_ClearParticlesCS, 256); DeclComputeShader2D(V_PrepareCellsCS, 16, 16);
DeclComputeShader(V_ClearParticlesCS, 256);
//- Quads //- Quads
VertexShader(V_QuadVS, V_QuadPSInput); DeclVertexShader(V_QuadVS, V_QuadPSInput);
PixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input); DeclPixelShader(V_QuadPS, V_QuadPSOutput, V_QuadPSInput input);
//- Particle simulation //- Particle simulation
ComputeShader(V_EmitParticlesCS, 256); DeclComputeShader(V_EmitParticlesCS, 256);
ComputeShader(V_SimParticlesCS, 256); DeclComputeShader(V_SimParticlesCS, 256);
//- Shade //- Shade
ComputeShader2D(V_ShadeCS, 16, 16); DeclComputeShader2D(V_ShadeCS, 16, 16);
//- Composite //- Composite
ComputeShader2D(V_CompositeCS, 16, 16); DeclComputeShader2D(V_CompositeCS, 16, 16);
//- Bloom //- Bloom
ComputeShader2D(V_BloomDownCS, 16, 16); DeclComputeShader2D(V_BloomDownCS, 16, 16);
ComputeShader2D(V_BloomUpCS, 16, 16); DeclComputeShader2D(V_BloomUpCS, 16, 16);
//- Finalize //- Finalize
ComputeShader2D(V_FinalizeCS, 16, 16); DeclComputeShader2D(V_FinalizeCS, 16, 16);
//- Debug shapes //- Debug shapes
VertexShader(V_DVertVS, V_DVertPSInput); DeclVertexShader(V_DVertVS, V_DVertPSInput);
PixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input); DeclPixelShader(V_DVertPS, V_DVertPSOutput, V_DVertPSInput input);

View File

@ -36,7 +36,7 @@ void PT_RunForever(WaveLaneCtx *lane)
{ {
G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle); G_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle);
G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123); G_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicPointClampSampler()); G_SetConstant(cl, PT_ShaderConst_BlitSampler, G_BasicSamplerFromKind(G_BasicSamplerKind_PointClamp));
G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle); G_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle);
G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture()); G_SetConstant(cl, PT_ShaderConst_NoiseTex, G_BasicNoiseTexture());
} }

5
src/proto/proto.lay generated
View File

@ -16,8 +16,8 @@
////////////////////////////// //////////////////////////////
//- Api //- Api
@IncludeC proto_shaders.cgh @IncludeC proto_shared.cgh
@IncludeG proto_shaders.cgh @IncludeG proto_shared.cgh
@Bootstrap PT_Bootstrap @Bootstrap PT_Bootstrap
@ -26,4 +26,5 @@
@IncludeC proto.c @IncludeC proto.c
@IncludeG proto_shaders.gh
@IncludeG proto_shaders.g @IncludeG proto_shaders.g

View File

@ -1,12 +1,7 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Test shader //~ Test shader
Struct(TestStruct) ImplComputeShader2D(PT_TestCS)
{
i32 i;
};
ComputeShader2D(PT_TestCS, 8, 8)
{ {
StructuredBuffer<TestStruct> sb = G_Dereference<TestStruct>(PT_ShaderConst_TestBuff); StructuredBuffer<TestStruct> sb = G_Dereference<TestStruct>(PT_ShaderConst_TestBuff);
@ -23,21 +18,10 @@ ComputeShader2D(PT_TestCS, 8, 8)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Blit shader //~ Blit shader
Struct(PT_BlitPSInput)
{
Semantic(Vec4, sv_position);
Semantic(Vec2, src_uv);
};
Struct(PT_BlitPSOutput)
{
Semantic(Vec4, sv_target0);
};
////////////////////////////// //////////////////////////////
//- Vertex shader //- Vertex shader
VertexShader(PT_BlitVS, PT_BlitPSInput) ImplVertexShader(PT_BlitVS, PT_BlitPSInput)
{ {
Vec2 uv = RectUvFromIdx(SV_VertexID); Vec2 uv = RectUvFromIdx(SV_VertexID);
PT_BlitPSInput result; PT_BlitPSInput result;
@ -49,7 +33,7 @@ VertexShader(PT_BlitVS, PT_BlitPSInput)
////////////////////////////// //////////////////////////////
//- Pixel shader //- Pixel shader
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input) ImplPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
{ {
SamplerState sampler = G_Dereference(PT_ShaderConst_BlitSampler); SamplerState sampler = G_Dereference(PT_ShaderConst_BlitSampler);
Texture2D<Vec4> tex = G_Dereference<Vec4>(PT_ShaderConst_BlitSrc); Texture2D<Vec4> tex = G_Dereference<Vec4>(PT_ShaderConst_BlitSrc);

View File

@ -0,0 +1,31 @@
////////////////////////////////////////////////////////////
//~ Test shader types
Struct(TestStruct)
{
i32 i;
};
////////////////////////////////////////////////////////////
//~ Blit shader types
Struct(PT_BlitPSInput)
{
Semantic(Vec4, sv_position);
Semantic(Vec2, src_uv);
};
Struct(PT_BlitPSOutput)
{
Semantic(Vec4, sv_target0);
};
////////////////////////////////////////////////////////////
//~ Shaders
//- Test
DeclComputeShader2D(PT_TestCS, 8, 8);
//- Blit
DeclVertexShader(PT_BlitVS, PT_BlitPSInput);
DeclPixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input);

View File

@ -4,7 +4,7 @@
////////////////////////////// //////////////////////////////
//- Vertex shader //- Vertex shader
VertexShader(UI_DRectVS, UI_DRectPSInput) ImplVertexShader(UI_DRectVS, UI_DRectPSInput)
{ {
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects); StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
@ -33,7 +33,7 @@ VertexShader(UI_DRectVS, UI_DRectPSInput)
////////////////////////////// //////////////////////////////
//- Pixel shader //- Pixel shader
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) ImplPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
{ {
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects); StructuredBuffer<UI_GpuRect> rects = G_Dereference<UI_GpuRect>(params.rects);
@ -120,7 +120,7 @@ PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input)
////////////////////////////// //////////////////////////////
//- Vertex shader //- Vertex shader
VertexShader(UI_BlitVS, UI_BlitPSInput) ImplVertexShader(UI_BlitVS, UI_BlitPSInput)
{ {
Vec2 uv = RectUvFromIdx(SV_VertexID); Vec2 uv = RectUvFromIdx(SV_VertexID);
UI_BlitPSInput result; UI_BlitPSInput result;
@ -133,7 +133,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput)
////////////////////////////// //////////////////////////////
//- Pixel shader //- Pixel shader
PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) ImplPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
{ {
UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0]; UI_GpuParams params = G_Dereference<UI_GpuParams>(UI_GpuConst_Params)[0];
Texture2D<Vec4> tex = G_Dereference<Vec4>(params.target_ro); Texture2D<Vec4> tex = G_Dereference<Vec4>(params.target_ro);

View File

@ -36,9 +36,9 @@ Struct(UI_BlitPSOutput)
//~ Shaders //~ Shaders
//- Rects //- Rects
VertexShader(UI_DRectVS, UI_DRectPSInput); DeclVertexShader(UI_DRectVS, UI_DRectPSInput);
PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input); DeclPixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input);
//- Blit //- Blit
VertexShader(UI_BlitVS, UI_BlitPSInput); DeclVertexShader(UI_BlitVS, UI_BlitPSInput);
PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input); DeclPixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input);