check for overflow in shader printf

This commit is contained in:
jacob 2025-12-11 10:24:30 -06:00
parent 84fbaaf7cd
commit 4453d18d51
4 changed files with 95 additions and 70 deletions

View File

@ -70,7 +70,7 @@
#define FLOOD_DEBUG 0 #define FLOOD_DEBUG 0
#define GPU_DEBUG 1 #define GPU_DEBUG 1
#define GPU_DEBUG_VALIDATION 1 #define GPU_DEBUG_VALIDATION 0
#define GPU_SHADER_PRINT 1 #define GPU_SHADER_PRINT 1
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1); #define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1);

View File

@ -2872,14 +2872,24 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
G_SyncCpu(G_MaskFromQueue(queue_kind)); G_SyncCpu(G_MaskFromQueue(queue_kind));
u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0); u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0); /* The number of bytes shaders attempted to write */
u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1); u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1); /* The number of shader prints that are in the buffer */
u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2); u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2); /* The number of shader prints that could not fit in the buffer */
u8 *start = G_StructFromResource(readback_buff, u8) + 12; u8 *start = G_StructFromResource(readback_buff, u8) + 12;
/* Deserialize */ /* Deserialize */
if (GPU_SHADER_PRINT_LOG) if (GPU_SHADER_PRINT_LOG)
{ {
if (prints_count > 0)
{
LogDebugF(
"Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F",
FmtUint(prints_count),
FmtUint(prints_count + overflows_count),
FmtUint(attempted_print_bytes_count)
);
}
/* FIXME: Remove this */ /* FIXME: Remove this */
TempArena scratch = BeginScratchNoConflict(); TempArena scratch = BeginScratchNoConflict();
u8 *at = start; u8 *at = start;
@ -2888,10 +2898,12 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
{ {
u32 chars_count = 0; u32 chars_count = 0;
u32 args_count = 0; u32 args_count = 0;
b32 internal_overflow = 0;
{ {
u32 header = *(u32 *)at; u32 header = *(u32 *)at;
chars_count = (header & 0x0000FFFF) >> 0; chars_count = (header & 0x0000FFFF) >> 0;
args_count = (header & 0xFFFF0000) >> 16; args_count = (header & 0x7FFF0000) >> 16;
internal_overflow = (header & 0xF0000000) >> 31;
at += 4; at += 4;
} }
@ -2940,11 +2952,16 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
} }
} }
String final_str = FormatString(scratch.arena, fmt, args); String final_str = ZI;
if (GPU_SHADER_PRINT_LOG) if (internal_overflow)
{ {
LogDebug(final_str); final_str = Lit("[Shader PrintF is too large]");
} }
else
{
final_str = FormatString(scratch.arena, fmt, args);
}
LogDebug(final_str);
at = (u8 *)AlignU64((u64)at, 4); at = (u8 *)AlignU64((u64)at, 4);
} }

View File

@ -107,7 +107,7 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Debug printf //~ Debug printf
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */ /* This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/ */
Enum(G_FmtArgKind) Enum(G_FmtArgKind)
{ {
@ -115,6 +115,8 @@ Enum(G_FmtArgKind)
G_FmtArgKind_U32, G_FmtArgKind_U32,
G_FmtArgKind_I32, G_FmtArgKind_I32,
G_FmtArgKind_F32, G_FmtArgKind_F32,
G_FmtArgKind_End,
}; };
Struct(G_FmtArg) Struct(G_FmtArg)
@ -124,41 +126,56 @@ Struct(G_FmtArg)
}; };
#if IsLanguageG && GPU_SHADER_PRINT #if IsLanguageG && GPU_SHADER_PRINT
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_U32; result.v = v; return result; } G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_U32; result.v = v; return result; }
G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_I32; result.v = v; return result; } G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_I32; result.v = v; return result; }
G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_F32; result.v = asuint(v); return result; } G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_F32; result.v = asuint(v); return result; }
G_FmtArg G_FmtEnd(void) { G_FmtArg result; result.kind = G_FmtArgKind_End; result.v = 0; return result; }
Struct(G_TempPrintBuffer) Struct(G_TempPrintBuffer)
{ {
u32 char_chunks[256]; /* NOTE: The larger the array size, the longer the compilation time */
u32 char_pos; u32 byte_chunks[64];
u32 fmt_size; u32 bytes_count;
u32 chars_count;
u32 args_count; u32 args_count;
b32 overflowed;
}; };
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 v) void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v)
{ {
/* TODO: Overflow check */ u32 chunk_idx = buff.bytes_count / 4;
u32 u32_arr_pos = buff.char_pos / 4; if (chunk_idx < countof(buff.byte_chunks))
u32 idx_in_u32 = buff.char_pos & 0x03;
if (idx_in_u32 == 0)
{ {
/* Since buff is not zero initialized, we set the chunk on first write here */ u32 byte_idx_in_chunk = buff.bytes_count & 0x03;
buff.char_chunks[u32_arr_pos] = v & 0xFF; if (byte_idx_in_chunk == 0)
{
/* Since buff is not zero initialized, we set the chunk on first write here */
buff.byte_chunks[chunk_idx] = v & 0xFF;
}
else
{
buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8);
}
buff.bytes_count += 1;
} }
else else
{ {
buff.char_chunks[u32_arr_pos] |= (v & 0xFF) << (idx_in_u32 * 8); buff.overflowed = 1;
} }
buff.char_pos += 1;
} }
void G_CommitPrint(G_TempPrintBuffer buff) void G_CommitPrint(G_TempPrintBuffer buff)
{ {
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef); RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
u32 chunks_count = (buff.char_pos + 3) / 4; if (buff.overflowed)
{
buff.bytes_count = 0;
buff.chars_count = 0;
buff.args_count = 0;
}
u32 chunks_count = (buff.bytes_count + 3) / 4;
u32 alloc_size = 0; u32 alloc_size = 0;
alloc_size += 4; /* Header */ alloc_size += 4; /* Header */
alloc_size += chunks_count * 4; /* Chunks */ alloc_size += chunks_count * 4; /* Chunks */
@ -166,23 +183,22 @@ Struct(G_FmtArg)
/* Atomic fetch + add to base counter */ /* Atomic fetch + add to base counter */
u32 base; u32 base;
rw.InterlockedAdd(0, alloc_size, base); rw.InterlockedAdd(0, alloc_size, base);
base += 4; /* Offset for allocation counter */ base += 4; /* Offset for allocation counter */
base += 4; /* Offset for success counter */ base += 4; /* Offset for success counter */
base += 4; /* Offset for overflow counter */ base += 4; /* Offset for overflow counter */
if ((base + alloc_size) < countof(rw)) if ((base + alloc_size) < countof(rw))
{ {
u32 pos = 0;
/* Increment success counter */ /* Increment success counter */
rw.InterlockedAdd(4, 1); rw.InterlockedAdd(4, 1);
u32 pos = 0;
/* Write header */ /* Write header */
{ {
u32 header = 0; u32 header = 0;
header |= (buff.fmt_size << 0) & 0x0000FFFF; header |= (buff.chars_count << 0) & 0x0000FFFF;
header |= (buff.args_count << 16) & 0xFFFF0000; header |= (buff.args_count << 16) & 0x7FFF0000;
header |= (buff.overflowed << 31) & 0xF0000000;
rw.Store(base + pos, header); rw.Store(base + pos, header);
pos += 4; pos += 4;
} }
@ -190,7 +206,7 @@ Struct(G_FmtArg)
/* Write chunks */ /* Write chunks */
for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx) for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
{ {
u32 chunk = buff.char_chunks[chunk_idx]; u32 chunk = buff.byte_chunks[chunk_idx];
rw.Store(base + pos, chunk); rw.Store(base + pos, chunk);
pos += 4; pos += 4;
} }
@ -202,43 +218,32 @@ Struct(G_FmtArg)
} }
} }
#define G_Print(fmt) do { \ #define G_PrintF_(fmt, ...) do { \
G_TempPrintBuffer __tmp; \ G_TempPrintBuffer __tmp; \
__tmp.char_pos = 0; \ __tmp.bytes_count = 0; \
u32 __pos = 0; \ __tmp.overflowed = 0; \
while (U32FromChar(fmt[__pos]) != 0) \ u32 __char_idx = 0; \
{ \ while (U32FromChar(fmt[__char_idx]) != 0) \
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \ { \
++__pos; \ G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx])); \
} \ ++__char_idx; \
__tmp.fmt_size = __tmp.char_pos; \ } \
G_CommitPrint(__tmp); \ G_FmtArg __args[] = { __VA_ARGS__ }; \
__tmp.chars_count = __tmp.bytes_count; \
__tmp.args_count = (countof(__args) - 1); \
for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx) \
{ \
G_PushPrintByte(__tmp, __args[__arg_idx].kind); \
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 0); \
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 8); \
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 16); \
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 24); \
} \
G_CommitPrint(__tmp); \
} while (0) } while (0)
#define G_PrintF(fmt, ...) do { \ #define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd())
G_TempPrintBuffer __tmp; \
__tmp.char_pos = 0; \
u32 __pos = 0; \
while (U32FromChar(fmt[__pos]) != 0) \
{ \
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
++__pos; \
} \
G_FmtArg __args[] = { __VA_ARGS__ }; \
__tmp.fmt_size = __tmp.char_pos; \
__tmp.args_count = countof(__args); \
for (u32 __arg_idx = 0; __arg_idx < countof(__args); ++__arg_idx) \
{ \
G_PushPrintChar(__tmp, __args[__arg_idx].kind); \
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 0); \
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 8); \
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 16); \
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 24); \
} \
G_CommitPrint(__tmp); \
} while (0)
#else #else
#define G_Print(fmt)
#define G_PrintF(fmt) #define G_PrintF(fmt)
#endif #endif

View File

@ -145,10 +145,13 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
Vec2 uv = input.src_uv; Vec2 uv = input.src_uv;
Vec4 result = tex.Sample(sampler, uv); Vec4 result = tex.Sample(sampler, uv);
// G_Print("Hello there!"); G_PrintF(
G_PrintF("Bla: (%F, %F)", G_Fmt(uv.x), G_Fmt(uv.y)); "Hello there! (%F, %F), (%F, %F)",
G_Fmt(input.SV_Position.x),
G_Fmt(input.SV_Position.y),
G_Fmt(uv.x),
G_Fmt(uv.y)
);
UI_BlitPSOutput output; UI_BlitPSOutput output;
output.SV_Target0 = result; output.SV_Target0 = result;