diff --git a/src/base/base.h b/src/base/base.h index 12d55e66..5759d7d5 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -435,8 +435,6 @@ //- countof #if IsLanguageC #define countof(a) (sizeof(a) / sizeof((a)[0])) -#elif IsLanguageHlsl - template uint countof(T ints[N]) { return N; } #endif //- IsArray diff --git a/src/base/base_gpu.hlsl b/src/base/base_gpu.hlsl index 9a40ce20..79d39485 100644 --- a/src/base/base_gpu.hlsl +++ b/src/base/base_gpu.hlsl @@ -24,8 +24,9 @@ typedef float4x4 Mat4x4; //////////////////////////////////////////////////////////// //~ Handle dereference wrappers -/* NOTE: Non-uniform resource access is assumed as the default behavior */ -/* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */ +/* TODO: Non-uniform resource access is assumed as the default behavior. We + * may want to add explicit "uniform" variants for optimization on AMD devices + * in the future. */ template StructuredBuffer StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } template RWStructuredBuffer RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } @@ -43,52 +44,19 @@ template RWTexture3D RWTexture3DFromHandle(RWTexture3DHandle h) { SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; } //////////////////////////////////////////////////////////// -//~ Dimension helpers +//~ Countof -u32 Count1D(Texture1D tex) -{ - u32 result; - tex.GetDimensions(result.x); - return result; -} - -template -u32 Count1D(RWTexture1D tex) -{ - u32 result; - tex.GetDimensions(result.x); - return result; -} - -Vec2U32 Count2D(Texture2D tex) -{ - Vec2U32 result; - tex.GetDimensions(result.x, result.y); - return result; -} - -template -Vec2U32 Count2D(RWTexture2D tex) -{ - Vec2U32 result; - tex.GetDimensions(result.x, result.y); - return result; -} - -Vec3U32 Count3D(Texture3D tex) -{ - Vec3U32 result; - tex.GetDimensions(result.x, result.y, result.z); - return result; -} - -template -Vec3U32 Count3D(RWTexture3D tex) -{ - Vec3U32 result; - tex.GetDimensions(result.x, result.y, result.z); - return result; -} +template u32 countof(T arr[N]) { return N; } +template u32 countof(StructuredBuffer buff) { u32 result; buff.GetDimensions(result); return result; } +template u32 countof(RWStructuredBuffer buff) { u32 result; buff.GetDimensions(result); return result; } + u32 countof(ByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } + u32 countof(RWByteAddressBuffer buff) { u32 result; buff.GetDimensions(result); return result; } + u32 countof(Texture1D tex) { u32 result; tex.GetDimensions(result); return result; } +template u32 countof(RWTexture1D tex) { u32 result; tex.GetDimensions(result); return result; } + Vec2U32 countof(Texture2D tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } +template Vec2U32 countof(RWTexture2D tex) { Vec2U32 result; tex.GetDimensions(result.x, result.y); return result; } + Vec3U32 countof(Texture3D tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } +template Vec3U32 countof(RWTexture3D tex) { Vec3U32 result; tex.GetDimensions(result.x, result.y, result.z); return result; } //////////////////////////////////////////////////////////// //~ Color helpers diff --git a/src/base/base_string.c b/src/base/base_string.c index d628b608..8789982f 100644 --- a/src/base/base_string.c +++ b/src/base/base_string.c @@ -565,52 +565,52 @@ String FormatStringV(Arena *arena, String fmt, va_list args) no_more_args = 1; } break; - case FmtKind_Char: + case FmtArgKind_Char: { parsed_str = StringFromChar(arena, arg.value.c); } break; - case FmtKind_String: + case FmtArgKind_String: { parsed_str = PushString(arena, arg.value.string); } break; - case FmtKind_Uint: + case FmtArgKind_Uint: { parsed_str = StringFromU64(arena, arg.value.uint, 10, arg.z); } break; - case FmtKind_Sint: + case FmtArgKind_Sint: { parsed_str = StringFromI64(arena, arg.value.sint, 10, arg.z); } break; - case FmtKind_Hex: + case FmtArgKind_Hex: { parsed_str = StringFromU64(arena, arg.value.sint, 16, arg.z); } break; - case FmtKind_Ptr: + case FmtArgKind_Ptr: { parsed_str = StringFromPtr(arena, arg.value.ptr); } break; - case FmtKind_Float: + case FmtArgKind_Float: { parsed_str = StringFromF64(arena, arg.value.f, arg.p); } break; - case FmtKind_Handle: + case FmtArgKind_Handle: { parsed_str = StringFromhandle(arena, arg.value.handle.h64[0], arg.value.handle.h64[1]); } break; - case FmtKind_Uid: + case FmtArgKind_Uid: { parsed_str = StringFromUid(arena, arg.value.uid); } break; - case FmtKind_End: + case FmtArgKind_End: { /* Unexpected end. Not enough FMT args passed to function. */ Assert(0); @@ -636,7 +636,7 @@ String FormatStringV(Arena *arena, String fmt, va_list args) { FmtArg last_arg = va_arg(args, FmtArg); /* End arg not reached. Too many FMT values passed to function. */ - Assert(last_arg.kind == FmtKind_End); + Assert(last_arg.kind == FmtArgKind_End); } #endif diff --git a/src/base/base_string.h b/src/base/base_string.h index 84a35338..1a6ddb34 100644 --- a/src/base/base_string.h +++ b/src/base/base_string.h @@ -4,27 +4,27 @@ #define DefaultFmtPrecision 3 #define IntChars ("0123456789abcdef") -Enum(FmtKind) +Enum(FmtArgKind) { - FmtKind_None, + FmtArgKind_None, /* Arbitrary magic numbers for argument validation */ - FmtKind_Char = 0x0f5281df, - FmtKind_String = 0x0a5ffa9a, - FmtKind_Uint = 0x0746f19b, - FmtKind_Sint = 0x08603694, - FmtKind_Hex = 0x0a3d0792, - FmtKind_Ptr = 0x0c4519e4, - FmtKind_Float = 0x04814143, - FmtKind_Uid = 0x3d1cd407, - FmtKind_Handle = 0x6ead3bec, + FmtArgKind_Char = 0x0f5281df, + FmtArgKind_String = 0x0a5ffa9a, + FmtArgKind_Uint = 0x0746f19b, + FmtArgKind_Sint = 0x08603694, + FmtArgKind_Hex = 0x0a3d0792, + FmtArgKind_Ptr = 0x0c4519e4, + FmtArgKind_Float = 0x04814143, + FmtArgKind_Uid = 0x3d1cd407, + FmtArgKind_Handle = 0x6ead3bec, - FmtKind_End = 0x0ecbc5ae + FmtArgKind_End = 0x0ecbc5ae }; Struct(FmtArg) { - FmtKind kind; + FmtArgKind kind; u32 p; /* Precision */ u32 z; /* Z-fill */ union @@ -100,16 +100,16 @@ String StringFromList(Arena *arena, StringList l, String separator); #define FMTARG(_kind, ...) ((FmtArg) { .kind = (_kind), .p = DefaultFmtPrecision, __VA_ARGS__ }) -#define FmtChar(v, ...) FMTARG(FmtKind_Char, .value.c = (v), __VA_ARGS__) -#define FmtString(v, ...) FMTARG(FmtKind_String, .value.string = (v), __VA_ARGS__) -#define FmtUint(v, ...) FMTARG(FmtKind_Uint, .value.uint = (v), __VA_ARGS__) -#define FmtSint(v, ...) FMTARG(FmtKind_Sint, .value.sint = (v), __VA_ARGS__) -#define FmtHex(v, ...) FMTARG(FmtKind_Hex, .value.uint = (v), __VA_ARGS__) -#define FmtPtr(v, ...) FMTARG(FmtKind_Ptr, .value.ptr = (v), __VA_ARGS__) -#define FmtFloat(v, ...) FMTARG(FmtKind_Float, .value.f = (v), __VA_ARGS__) -#define FmtHandle(v, ...) FMTARG(FmtKind_Handle, .value.handle.h64[0] = (v).idx, .value.handle.h64[1] = (v).gen, __VA_ARGS__) -#define FmtUid(v, ...) FMTARG(FmtKind_Uid, .value.uid = (v), __VA_ARGS__) -#define FmtEnd FMTARG(FmtKind_End) /* Denotes end of VA list */ +#define FmtChar(v, ...) FMTARG(FmtArgKind_Char, .value.c = (v), __VA_ARGS__) +#define FmtString(v, ...) FMTARG(FmtArgKind_String, .value.string = (v), __VA_ARGS__) +#define FmtUint(v, ...) FMTARG(FmtArgKind_Uint, .value.uint = (v), __VA_ARGS__) +#define FmtSint(v, ...) FMTARG(FmtArgKind_Sint, .value.sint = (v), __VA_ARGS__) +#define FmtHex(v, ...) FMTARG(FmtArgKind_Hex, .value.uint = (v), __VA_ARGS__) +#define FmtPtr(v, ...) FMTARG(FmtArgKind_Ptr, .value.ptr = (v), __VA_ARGS__) +#define FmtFloat(v, ...) FMTARG(FmtArgKind_Float, .value.f = (v), __VA_ARGS__) +#define FmtHandle(v, ...) FMTARG(FmtArgKind_Handle, .value.handle.h64[0] = (v).idx, .value.handle.h64[1] = (v).gen, __VA_ARGS__) +#define FmtUid(v, ...) FMTARG(FmtArgKind_Uid, .value.uid = (v), __VA_ARGS__) +#define FmtEnd FMTARG(FmtArgKind_End) /* Denotes end of VA list */ #define StringF(arena, lit, ...) FormatString_((arena), Lit(lit), __VA_ARGS__, FmtEnd) #define FormatString(arena, fmt, ...) FormatString_((arena), (fmt), __VA_ARGS__, FmtEnd) diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index c0a447ef..61f3b12b 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -5,7 +5,9 @@ //- Api @IncludeC gpu_core.h +@IncludeC gpu_shader_extras.h @IncludeC gpu_extras.h +@IncludeGpu gpu_shader_extras.h //- Impl @IncludeC gpu_extras.c diff --git a/src/gpu/gpu_shader_extras.h b/src/gpu/gpu_shader_extras.h new file mode 100644 index 00000000..f381dd6c --- /dev/null +++ b/src/gpu/gpu_shader_extras.h @@ -0,0 +1,12 @@ +//////////////////////////////////////////////////////////// +//~ Static handles (common resources available to all shaders) + +#if IsLanguageC + #define GPU_StaticHandle(type, v) ((type) { (v) }) +#elif IsLanguageHlsl + #define GPU_StaticHandle(type, v) (type(v)) +#endif + +#define GPU_ShaderPrintBufferHandle RWByteAddressBufferHandle (1) +#define GPU_BasicPointSamplerHandle SamplerStateHandle (2) +#define GPU_BasicNoiseTextureHandle Texture2DHandle (3) diff --git a/src/gpu/gpu_shader_extras.hlsl b/src/gpu/gpu_shader_extras.hlsl index eb5d7441..cb86ee60 100644 --- a/src/gpu/gpu_shader_extras.hlsl +++ b/src/gpu/gpu_shader_extras.hlsl @@ -1,14 +1,220 @@ -//////////////////////////////////////////////////////////// -//~ Shader printf types - - - //////////////////////////////////////////////////////////// //~ Shader printf -// #define Test(fmt_cstr, ...) do { \ -// } while (0) +/* This technique comes from MJP's article: + * https://therealmjp.github.io/posts/hlsl-printf/ + */ -void Test(u32 count) +template u32 U32FromChar(in T c) { + if(c == ' ') + return 32; + if(c == '!') + return 33; + if(c == '\"' || c == '\"') + return 34; + if(c == '#') + return 35; + if(c == '$') + return 36; + if(c == '%') + return 37; + if(c == '&') + return 38; + if(c == '\'') + return 39; + if(c == '(') + return 40; + if(c == ')') + return 41; + if(c == '*') + return 42; + if(c == '+') + return 43; + if(c == ',') + return 44; + if(c == '-') + return 45; + if(c == '.') + return 46; + if(c == '/') + return 47; + if(c == '0') + return 48; + if(c == '1') + return 49; + if(c == '2') + return 50; + if(c == '3') + return 51; + if(c == '4') + return 52; + if(c == '5') + return 53; + if(c == '6') + return 54; + if(c == '7') + return 55; + if(c == '8') + return 56; + if(c == '9') + return 57; + if(c == ':') + return 58; + if(c == ';') + return 59; + if(c == '<') + return 60; + if(c == '=') + return 61; + if(c == '>') + return 62; + if(c == '?') + return 63; + if(c == '@') + return 64; + if(c == 'A') + return 65; + if(c == 'B') + return 66; + if(c == 'C') + return 67; + if(c == 'D') + return 68; + if(c == 'E') + return 69; + if(c == 'F') + return 70; + if(c == 'G') + return 71; + if(c == 'H') + return 72; + if(c == 'I') + return 73; + if(c == 'J') + return 74; + if(c == 'K') + return 75; + if(c == 'L') + return 76; + if(c == 'M') + return 77; + if(c == 'N') + return 78; + if(c == 'O') + return 79; + if(c == 'P') + return 80; + if(c == 'Q') + return 81; + if(c == 'R') + return 82; + if(c == 'S') + return 83; + if(c == 'T') + return 84; + if(c == 'U') + return 85; + if(c == 'V') + return 86; + if(c == 'W') + return 87; + if(c == 'X') + return 88; + if(c == 'Y') + return 89; + if(c == 'Z') + return 90; + if(c == '[') + return 91; + if(c == '\\') + return 92; + if(c == ']') + return 93; + if(c == '^') + return 94; + if(c == '_') + return 95; + if(c == '`') + return 96; + if(c == 'a') + return 97; + if(c == 'b') + return 98; + if(c == 'c') + return 99; + if(c == 'd') + return 100; + if(c == 'e') + return 101; + if(c == 'f') + return 102; + if(c == 'g') + return 103; + if(c == 'h') + return 104; + if(c == 'i') + return 105; + if(c == 'j') + return 106; + if(c == 'k') + return 107; + if(c == 'l') + return 108; + if(c == 'm') + return 109; + if(c == 'n') + return 110; + if(c == 'o') + return 111; + if(c == 'p') + return 112; + if(c == 'q') + return 113; + if(c == 'r') + return 114; + if(c == 's') + return 115; + if(c == 't') + return 116; + if(c == 'u') + return 117; + if(c == 'v') + return 118; + if(c == 'w') + return 119; + if(c == 'x') + return 120; + if(c == 'y') + return 121; + if(c == 'z') + return 122; + if(c == '{') + return 123; + if(c == '|') + return 124; + if(c == '}') + return 125; + if(c == '~') + return 126; + return 0; } + +#if GPU_DEBUG + #define DebugPrint(fmt_cstr) do { \ + u32 __strlen = 0; \ + for (;;) { if (U32FromChar(fmt_cstr[__strlen]) == 0) { break; } ++__strlen; } \ + RWByteAddressBuffer __print_buff = RWByteAddressBufferFromHandle(GPU_ShaderPrintBufferHandle); \ + u32 __pos; \ + __print_buff.InterlockedAdd(0, __strlen, __pos); \ + if (__pos < countof(__print_buff)) \ + { \ + for (u32 char_idx = 0; char_idx < __strlen; ++char_idx) \ + { \ + __print_buff.Store(__pos + char_idx, U32FromChar(fmt_cstr[char_idx])); \ + } \ + } \ + } while (0) +#else + #define DebugPrint(...) +#endif diff --git a/src/gpu/gpu_static_extras.h b/src/gpu/gpu_static_extras.h new file mode 100644 index 00000000..13103b6e --- /dev/null +++ b/src/gpu/gpu_static_extras.h @@ -0,0 +1 @@ +/* This file contains shader resource handles diff --git a/src/meta/meta.c b/src/meta/meta.c index edda2312..58b3d60c 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -408,7 +408,8 @@ void BuildEntryPoint(WaveLaneCtx *lane) //- Dxc { - PushStringToList(perm, &cp.flags_dxc, Lit("-Od")); + // PushStringToList(perm, &cp.flags_dxc, Lit("-Od")); + PushStringToList(perm, &cp.flags_dxc, Lit("-O3")); PushStringToList(perm, &cp.flags_dxc, Lit("-Zi -Qembed_debug")); } } @@ -955,7 +956,7 @@ void BuildEntryPoint(WaveLaneCtx *lane) for (u32 gpu_obj_idx = 0; gpu_obj_idx < build.gpu_objs.count; ++gpu_obj_idx) { GpuObj *gpu_obj = &build.gpu_objs.array[gpu_obj_idx]; - if (!disp_obj || TrimWhitespace(disp_obj->output).len == 0 || disp_obj->return_code != 0) + if (!disp_obj || TrimWhitespace(disp_obj->output).len == 0 || gpu_obj->return_code != 0) { disp_obj = gpu_obj; } diff --git a/src/pp/pp_vis/pp_vis_shaders.hlsl b/src/pp/pp_vis/pp_vis_shaders.hlsl index 841836f5..8c1f83b0 100644 --- a/src/pp/pp_vis/pp_vis_shaders.hlsl +++ b/src/pp/pp_vis/pp_vis_shaders.hlsl @@ -46,7 +46,7 @@ VertexShader(V_DQuadVS, V_DQuadPSInput) Vec2 target_pos = 0; V_DQuadPSInput result; - result.SV_Position = Vec4(NdcFromPos(target_pos, Count2D(target)).xy, 0, 1); + result.SV_Position = Vec4(NdcFromPos(target_pos, countof(target)).xy, 0, 1); result.quad_idx = SV_InstanceID; return result; } @@ -96,7 +96,7 @@ VertexShader(V_DVertVS, V_DVertPSInput) Vec2 target_pos = vert.pos; V_DVertPSInput result; - result.SV_Position = Vec4(NdcFromPos(target_pos, Count2D(target)).xy, 0, 1); + result.SV_Position = Vec4(NdcFromPos(target_pos, countof(target)).xy, 0, 1); result.color_lin = vert.color_lin; return result; } diff --git a/src/ui/ui_shaders.hlsl b/src/ui/ui_shaders.hlsl index 003c65bf..7108f7cf 100644 --- a/src/ui/ui_shaders.hlsl +++ b/src/ui/ui_shaders.hlsl @@ -166,7 +166,7 @@ VertexShader(UI_BlitVS, UI_BlitPSInput) PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) { UI_DParams params = StructuredBufferFromHandle(UI_ShaderConst_Params)[0]; - SamplerState sampler = SamplerStateFromHandle(params.sampler); + SamplerState sampler = SamplerStateFromHandle(params.sampler); Texture2D tex = Texture2DFromHandle(params.target_ro); Vec2 uv = input.src_uv; @@ -192,15 +192,7 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) - // LogDebugF( - // "uv: (%F, %F), result: (%F, %F, %F, %F)", - // FmtFloat(uv.x), - // FmtFloat(uv.y), - // FmtFloat(result.x), - // FmtFloat(result.y), - // FmtFloat(result.z), - // FmtFloat(result.w), - // ); + DebugPrint("Hello there");