check for overflow in shader printf
This commit is contained in:
parent
84fbaaf7cd
commit
4453d18d51
@ -70,7 +70,7 @@
|
|||||||
#define FLOOD_DEBUG 0
|
#define FLOOD_DEBUG 0
|
||||||
|
|
||||||
#define GPU_DEBUG 1
|
#define GPU_DEBUG 1
|
||||||
#define GPU_DEBUG_VALIDATION 1
|
#define GPU_DEBUG_VALIDATION 0
|
||||||
|
|
||||||
#define GPU_SHADER_PRINT 1
|
#define GPU_SHADER_PRINT 1
|
||||||
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1);
|
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1);
|
||||||
|
|||||||
@ -2872,14 +2872,24 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
|||||||
|
|
||||||
G_SyncCpu(G_MaskFromQueue(queue_kind));
|
G_SyncCpu(G_MaskFromQueue(queue_kind));
|
||||||
|
|
||||||
u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0);
|
u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0); /* The number of bytes shaders attempted to write */
|
||||||
u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1);
|
u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1); /* The number of shader prints that are in the buffer */
|
||||||
u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2);
|
u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2); /* The number of shader prints that could not fit in the buffer */
|
||||||
u8 *start = G_StructFromResource(readback_buff, u8) + 12;
|
u8 *start = G_StructFromResource(readback_buff, u8) + 12;
|
||||||
|
|
||||||
/* Deserialize */
|
/* Deserialize */
|
||||||
if (GPU_SHADER_PRINT_LOG)
|
if (GPU_SHADER_PRINT_LOG)
|
||||||
{
|
{
|
||||||
|
if (prints_count > 0)
|
||||||
|
{
|
||||||
|
LogDebugF(
|
||||||
|
"Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F",
|
||||||
|
FmtUint(prints_count),
|
||||||
|
FmtUint(prints_count + overflows_count),
|
||||||
|
FmtUint(attempted_print_bytes_count)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/* FIXME: Remove this */
|
/* FIXME: Remove this */
|
||||||
TempArena scratch = BeginScratchNoConflict();
|
TempArena scratch = BeginScratchNoConflict();
|
||||||
u8 *at = start;
|
u8 *at = start;
|
||||||
@ -2888,10 +2898,12 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
|||||||
{
|
{
|
||||||
u32 chars_count = 0;
|
u32 chars_count = 0;
|
||||||
u32 args_count = 0;
|
u32 args_count = 0;
|
||||||
|
b32 internal_overflow = 0;
|
||||||
{
|
{
|
||||||
u32 header = *(u32 *)at;
|
u32 header = *(u32 *)at;
|
||||||
chars_count = (header & 0x0000FFFF) >> 0;
|
chars_count = (header & 0x0000FFFF) >> 0;
|
||||||
args_count = (header & 0xFFFF0000) >> 16;
|
args_count = (header & 0x7FFF0000) >> 16;
|
||||||
|
internal_overflow = (header & 0xF0000000) >> 31;
|
||||||
at += 4;
|
at += 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2940,11 +2952,16 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String final_str = FormatString(scratch.arena, fmt, args);
|
String final_str = ZI;
|
||||||
if (GPU_SHADER_PRINT_LOG)
|
if (internal_overflow)
|
||||||
{
|
{
|
||||||
LogDebug(final_str);
|
final_str = Lit("[Shader PrintF is too large]");
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
final_str = FormatString(scratch.arena, fmt, args);
|
||||||
|
}
|
||||||
|
LogDebug(final_str);
|
||||||
|
|
||||||
at = (u8 *)AlignU64((u64)at, 4);
|
at = (u8 *)AlignU64((u64)at, 4);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -107,7 +107,7 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8)
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Debug printf
|
//~ Debug printf
|
||||||
|
|
||||||
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
|
/* This technique is based on MJP's article - https://therealmjp.github.io/posts/hlsl-printf/ */
|
||||||
|
|
||||||
Enum(G_FmtArgKind)
|
Enum(G_FmtArgKind)
|
||||||
{
|
{
|
||||||
@ -115,6 +115,8 @@ Enum(G_FmtArgKind)
|
|||||||
G_FmtArgKind_U32,
|
G_FmtArgKind_U32,
|
||||||
G_FmtArgKind_I32,
|
G_FmtArgKind_I32,
|
||||||
G_FmtArgKind_F32,
|
G_FmtArgKind_F32,
|
||||||
|
|
||||||
|
G_FmtArgKind_End,
|
||||||
};
|
};
|
||||||
|
|
||||||
Struct(G_FmtArg)
|
Struct(G_FmtArg)
|
||||||
@ -124,41 +126,56 @@ Struct(G_FmtArg)
|
|||||||
};
|
};
|
||||||
|
|
||||||
#if IsLanguageG && GPU_SHADER_PRINT
|
#if IsLanguageG && GPU_SHADER_PRINT
|
||||||
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_U32; result.v = v; return result; }
|
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_U32; result.v = v; return result; }
|
||||||
G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_I32; result.v = v; return result; }
|
G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_I32; result.v = v; return result; }
|
||||||
G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_F32; result.v = asuint(v); return result; }
|
G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_F32; result.v = asuint(v); return result; }
|
||||||
|
G_FmtArg G_FmtEnd(void) { G_FmtArg result; result.kind = G_FmtArgKind_End; result.v = 0; return result; }
|
||||||
|
|
||||||
Struct(G_TempPrintBuffer)
|
Struct(G_TempPrintBuffer)
|
||||||
{
|
{
|
||||||
u32 char_chunks[256];
|
/* NOTE: The larger the array size, the longer the compilation time */
|
||||||
u32 char_pos;
|
u32 byte_chunks[64];
|
||||||
u32 fmt_size;
|
u32 bytes_count;
|
||||||
|
u32 chars_count;
|
||||||
u32 args_count;
|
u32 args_count;
|
||||||
|
b32 overflowed;
|
||||||
};
|
};
|
||||||
|
|
||||||
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 v)
|
void G_PushPrintByte(inout G_TempPrintBuffer buff, u32 v)
|
||||||
{
|
{
|
||||||
/* TODO: Overflow check */
|
u32 chunk_idx = buff.bytes_count / 4;
|
||||||
u32 u32_arr_pos = buff.char_pos / 4;
|
if (chunk_idx < countof(buff.byte_chunks))
|
||||||
u32 idx_in_u32 = buff.char_pos & 0x03;
|
|
||||||
if (idx_in_u32 == 0)
|
|
||||||
{
|
{
|
||||||
/* Since buff is not zero initialized, we set the chunk on first write here */
|
u32 byte_idx_in_chunk = buff.bytes_count & 0x03;
|
||||||
buff.char_chunks[u32_arr_pos] = v & 0xFF;
|
if (byte_idx_in_chunk == 0)
|
||||||
|
{
|
||||||
|
/* Since buff is not zero initialized, we set the chunk on first write here */
|
||||||
|
buff.byte_chunks[chunk_idx] = v & 0xFF;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buff.byte_chunks[chunk_idx] |= (v & 0xFF) << (byte_idx_in_chunk * 8);
|
||||||
|
}
|
||||||
|
buff.bytes_count += 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
buff.char_chunks[u32_arr_pos] |= (v & 0xFF) << (idx_in_u32 * 8);
|
buff.overflowed = 1;
|
||||||
}
|
}
|
||||||
buff.char_pos += 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CommitPrint(G_TempPrintBuffer buff)
|
void G_CommitPrint(G_TempPrintBuffer buff)
|
||||||
{
|
{
|
||||||
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
|
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
|
||||||
|
|
||||||
u32 chunks_count = (buff.char_pos + 3) / 4;
|
if (buff.overflowed)
|
||||||
|
{
|
||||||
|
buff.bytes_count = 0;
|
||||||
|
buff.chars_count = 0;
|
||||||
|
buff.args_count = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 chunks_count = (buff.bytes_count + 3) / 4;
|
||||||
u32 alloc_size = 0;
|
u32 alloc_size = 0;
|
||||||
alloc_size += 4; /* Header */
|
alloc_size += 4; /* Header */
|
||||||
alloc_size += chunks_count * 4; /* Chunks */
|
alloc_size += chunks_count * 4; /* Chunks */
|
||||||
@ -166,23 +183,22 @@ Struct(G_FmtArg)
|
|||||||
/* Atomic fetch + add to base counter */
|
/* Atomic fetch + add to base counter */
|
||||||
u32 base;
|
u32 base;
|
||||||
rw.InterlockedAdd(0, alloc_size, base);
|
rw.InterlockedAdd(0, alloc_size, base);
|
||||||
|
|
||||||
base += 4; /* Offset for allocation counter */
|
base += 4; /* Offset for allocation counter */
|
||||||
base += 4; /* Offset for success counter */
|
base += 4; /* Offset for success counter */
|
||||||
base += 4; /* Offset for overflow counter */
|
base += 4; /* Offset for overflow counter */
|
||||||
|
|
||||||
if ((base + alloc_size) < countof(rw))
|
if ((base + alloc_size) < countof(rw))
|
||||||
{
|
{
|
||||||
u32 pos = 0;
|
|
||||||
|
|
||||||
/* Increment success counter */
|
/* Increment success counter */
|
||||||
rw.InterlockedAdd(4, 1);
|
rw.InterlockedAdd(4, 1);
|
||||||
|
u32 pos = 0;
|
||||||
|
|
||||||
/* Write header */
|
/* Write header */
|
||||||
{
|
{
|
||||||
u32 header = 0;
|
u32 header = 0;
|
||||||
header |= (buff.fmt_size << 0) & 0x0000FFFF;
|
header |= (buff.chars_count << 0) & 0x0000FFFF;
|
||||||
header |= (buff.args_count << 16) & 0xFFFF0000;
|
header |= (buff.args_count << 16) & 0x7FFF0000;
|
||||||
|
header |= (buff.overflowed << 31) & 0xF0000000;
|
||||||
rw.Store(base + pos, header);
|
rw.Store(base + pos, header);
|
||||||
pos += 4;
|
pos += 4;
|
||||||
}
|
}
|
||||||
@ -190,7 +206,7 @@ Struct(G_FmtArg)
|
|||||||
/* Write chunks */
|
/* Write chunks */
|
||||||
for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
|
for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
|
||||||
{
|
{
|
||||||
u32 chunk = buff.char_chunks[chunk_idx];
|
u32 chunk = buff.byte_chunks[chunk_idx];
|
||||||
rw.Store(base + pos, chunk);
|
rw.Store(base + pos, chunk);
|
||||||
pos += 4;
|
pos += 4;
|
||||||
}
|
}
|
||||||
@ -202,43 +218,32 @@ Struct(G_FmtArg)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define G_Print(fmt) do { \
|
#define G_PrintF_(fmt, ...) do { \
|
||||||
G_TempPrintBuffer __tmp; \
|
G_TempPrintBuffer __tmp; \
|
||||||
__tmp.char_pos = 0; \
|
__tmp.bytes_count = 0; \
|
||||||
u32 __pos = 0; \
|
__tmp.overflowed = 0; \
|
||||||
while (U32FromChar(fmt[__pos]) != 0) \
|
u32 __char_idx = 0; \
|
||||||
{ \
|
while (U32FromChar(fmt[__char_idx]) != 0) \
|
||||||
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
{ \
|
||||||
++__pos; \
|
G_PushPrintByte(__tmp, U32FromChar(fmt[__char_idx])); \
|
||||||
} \
|
++__char_idx; \
|
||||||
__tmp.fmt_size = __tmp.char_pos; \
|
} \
|
||||||
G_CommitPrint(__tmp); \
|
G_FmtArg __args[] = { __VA_ARGS__ }; \
|
||||||
|
__tmp.chars_count = __tmp.bytes_count; \
|
||||||
|
__tmp.args_count = (countof(__args) - 1); \
|
||||||
|
for (u32 __arg_idx = 0; __arg_idx < __tmp.args_count; ++__arg_idx) \
|
||||||
|
{ \
|
||||||
|
G_PushPrintByte(__tmp, __args[__arg_idx].kind); \
|
||||||
|
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 0); \
|
||||||
|
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 8); \
|
||||||
|
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 16); \
|
||||||
|
G_PushPrintByte(__tmp, __args[__arg_idx].v >> 24); \
|
||||||
|
} \
|
||||||
|
G_CommitPrint(__tmp); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define G_PrintF(fmt, ...) do { \
|
#define G_PrintF(fmt, ...) G_PrintF_(fmt, ##__VA_ARGS__, G_FmtEnd())
|
||||||
G_TempPrintBuffer __tmp; \
|
|
||||||
__tmp.char_pos = 0; \
|
|
||||||
u32 __pos = 0; \
|
|
||||||
while (U32FromChar(fmt[__pos]) != 0) \
|
|
||||||
{ \
|
|
||||||
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
|
||||||
++__pos; \
|
|
||||||
} \
|
|
||||||
G_FmtArg __args[] = { __VA_ARGS__ }; \
|
|
||||||
__tmp.fmt_size = __tmp.char_pos; \
|
|
||||||
__tmp.args_count = countof(__args); \
|
|
||||||
for (u32 __arg_idx = 0; __arg_idx < countof(__args); ++__arg_idx) \
|
|
||||||
{ \
|
|
||||||
G_PushPrintChar(__tmp, __args[__arg_idx].kind); \
|
|
||||||
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 0); \
|
|
||||||
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 8); \
|
|
||||||
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 16); \
|
|
||||||
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 24); \
|
|
||||||
} \
|
|
||||||
G_CommitPrint(__tmp); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define G_Print(fmt)
|
|
||||||
#define G_PrintF(fmt)
|
#define G_PrintF(fmt)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -145,10 +145,13 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
|
|||||||
Vec2 uv = input.src_uv;
|
Vec2 uv = input.src_uv;
|
||||||
Vec4 result = tex.Sample(sampler, uv);
|
Vec4 result = tex.Sample(sampler, uv);
|
||||||
|
|
||||||
// G_Print("Hello there!");
|
G_PrintF(
|
||||||
G_PrintF("Bla: (%F, %F)", G_Fmt(uv.x), G_Fmt(uv.y));
|
"Hello there! (%F, %F), (%F, %F)",
|
||||||
|
G_Fmt(input.SV_Position.x),
|
||||||
|
G_Fmt(input.SV_Position.y),
|
||||||
|
G_Fmt(uv.x),
|
||||||
|
G_Fmt(uv.y)
|
||||||
|
);
|
||||||
|
|
||||||
UI_BlitPSOutput output;
|
UI_BlitPSOutput output;
|
||||||
output.SV_Target0 = result;
|
output.SV_Target0 = result;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user