shader printf arg parsing on cpu
This commit is contained in:
parent
f911e98c98
commit
bc76a511e6
@ -24,7 +24,11 @@ typedef float4x4 Mat4x4;
|
|||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Countof
|
//~ Countof
|
||||||
|
|
||||||
template<typename T, u32 N> u32 countof(T arr[N]) { return N; }
|
template<typename T, u32 N>
|
||||||
|
u32 countof(T arr[N])
|
||||||
|
{
|
||||||
|
return N;
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
//~ Color helpers
|
//~ Color helpers
|
||||||
|
|||||||
@ -73,6 +73,8 @@
|
|||||||
#define GPU_DEBUG_VALIDATION 1
|
#define GPU_DEBUG_VALIDATION 1
|
||||||
|
|
||||||
#define GPU_SHADER_PRINT 1
|
#define GPU_SHADER_PRINT 1
|
||||||
|
#define GPU_SHADER_PRINT_BUFFER_SIZE Kibi(1);
|
||||||
|
#define GPU_SHADER_PRINT_LOG 1
|
||||||
|
|
||||||
/* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */
|
/* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */
|
||||||
#define BITBUFF_DEBUG 0
|
#define BITBUFF_DEBUG 0
|
||||||
|
|||||||
@ -300,7 +300,7 @@ void G_Bootstrap(void)
|
|||||||
if (kind != G_QueueKind_AsyncCopy)
|
if (kind != G_QueueKind_AsyncCopy)
|
||||||
{
|
{
|
||||||
G_ArenaHandle gpu_perm = G_PermArena();
|
G_ArenaHandle gpu_perm = G_PermArena();
|
||||||
queue->print_buffer_size = Mebi(64);
|
queue->print_buffer_size = GPU_SHADER_PRINT_BUFFER_SIZE;
|
||||||
queue->print_buffer = G_PushBuffer(
|
queue->print_buffer = G_PushBuffer(
|
||||||
gpu_perm,
|
gpu_perm,
|
||||||
u8,
|
u8,
|
||||||
@ -2840,7 +2840,6 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
|||||||
.flags = G_ResourceFlag_HostMemory
|
.flags = G_ResourceFlag_HostMemory
|
||||||
);
|
);
|
||||||
|
|
||||||
u32 zero = 0;
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
/* FIXME: Remove this */
|
/* FIXME: Remove this */
|
||||||
@ -2851,20 +2850,96 @@ void G_D12_CollectionWorkerEntry(WaveLaneCtx *lane)
|
|||||||
{
|
{
|
||||||
/* Copy print buffer to readback buffer */
|
/* Copy print buffer to readback buffer */
|
||||||
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
|
G_CopyBufferToBuffer(cl, readback_buff, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size));
|
||||||
/* Reset size to 0 */
|
/* Reset counters to 0 */
|
||||||
G_MemorySync(cl, queue->print_buffer,
|
G_MemorySync(cl, queue->print_buffer,
|
||||||
G_Stage_Copy, G_Access_CopyRead,
|
G_Stage_Copy, G_Access_CopyRead,
|
||||||
G_Stage_Copy, G_Access_CopyWrite
|
G_Stage_Copy, G_Access_CopyWrite
|
||||||
);
|
);
|
||||||
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, &zero, RNGU64(0, 4));
|
u8 zero[12] = ZI;
|
||||||
|
G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero)));
|
||||||
}
|
}
|
||||||
i64 completion = G_CommitCommandList(cl);
|
G_CommitCommandList(cl);
|
||||||
|
|
||||||
G_SyncCpu(G_MaskFromQueue(queue_kind));
|
G_SyncCpu(G_MaskFromQueue(queue_kind));
|
||||||
u32 size = *G_StructFromResource(readback_buff, u32);
|
u32 attempted_print_bytes_count = *(G_StructFromResource(readback_buff, u32) + 0);
|
||||||
u8 *text = G_StructFromResource(readback_buff, u8) + 4;
|
u32 prints_count = *(G_StructFromResource(readback_buff, u32) + 1);
|
||||||
|
u32 overflows_count = *(G_StructFromResource(readback_buff, u32) + 2);
|
||||||
|
u8 *start = G_StructFromResource(readback_buff, u8) + 12;
|
||||||
|
|
||||||
String s = STRING(size, text);
|
/* Deserialize */
|
||||||
|
if (GPU_SHADER_PRINT_LOG)
|
||||||
|
{
|
||||||
|
/* FIXME: Remove this */
|
||||||
|
TempArena scratch = BeginScratchNoConflict();
|
||||||
|
u8 *at = start;
|
||||||
|
{
|
||||||
|
for (u32 print_num = 1; print_num <= prints_count; ++print_num)
|
||||||
|
{
|
||||||
|
u32 chars_count = 0;
|
||||||
|
u32 args_count = 0;
|
||||||
|
{
|
||||||
|
u32 header = *(u32 *)at;
|
||||||
|
chars_count = (header & 0x0000FFFF) >> 0;
|
||||||
|
args_count = (header & 0xFFFF0000) >> 16;
|
||||||
|
at += 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
String fmt = ZI;
|
||||||
|
{
|
||||||
|
fmt.len = chars_count;
|
||||||
|
fmt.text = at;
|
||||||
|
at += chars_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
FmtArg *args = 0;
|
||||||
|
{
|
||||||
|
if (args_count > 0)
|
||||||
|
{
|
||||||
|
args = PushStructs(scratch.arena, FmtArg, args_count);
|
||||||
|
for (u32 arg_idx = 0; arg_idx <= args_count; ++arg_idx)
|
||||||
|
{
|
||||||
|
G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at);
|
||||||
|
at += 1;
|
||||||
|
u32 gpu_data = *(u32 *)at;
|
||||||
|
at += 4;
|
||||||
|
|
||||||
|
FmtArg *dst = &args[arg_idx];
|
||||||
|
switch (gpu_kind)
|
||||||
|
{
|
||||||
|
case G_FmtArgKind_U32:
|
||||||
|
{
|
||||||
|
dst->kind = FmtArgKind_Uint;
|
||||||
|
dst->value.uint = gpu_data;
|
||||||
|
} break;
|
||||||
|
case G_FmtArgKind_I32:
|
||||||
|
{
|
||||||
|
dst->kind = FmtArgKind_Sint;
|
||||||
|
dst->value.sint = (i32)gpu_data;
|
||||||
|
} break;
|
||||||
|
case G_FmtArgKind_F32:
|
||||||
|
{
|
||||||
|
dst->kind = FmtArgKind_Float;
|
||||||
|
dst->value.f = *(f32 *)&gpu_data;
|
||||||
|
} break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// String final_str = ZI;
|
||||||
|
// if (args_count > 0)
|
||||||
|
// {
|
||||||
|
// }
|
||||||
|
// else
|
||||||
|
// {
|
||||||
|
// final_str = PushString(scratch.arena, fmt);
|
||||||
|
// }
|
||||||
|
|
||||||
|
at = (u8 *)AlignU64((u64)at, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EndScratch(scratch);
|
||||||
|
}
|
||||||
|
|
||||||
DEBUGBREAKABLE;
|
DEBUGBREAKABLE;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -109,62 +109,133 @@ G_ForceDeclConstant(G_RWByteAddressBufferRef, G_ShaderConst_PrintBufferRef, 8)
|
|||||||
|
|
||||||
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
|
/* This technique is based on MJP's article: https://therealmjp.github.io/posts/hlsl-printf/ */
|
||||||
|
|
||||||
|
Enum(G_FmtArgKind)
|
||||||
|
{
|
||||||
|
G_FmtArgKind_None,
|
||||||
|
G_FmtArgKind_U32,
|
||||||
|
G_FmtArgKind_I32,
|
||||||
|
G_FmtArgKind_F32,
|
||||||
|
};
|
||||||
|
|
||||||
|
Struct(G_FmtArg)
|
||||||
|
{
|
||||||
|
G_FmtArgKind kind;
|
||||||
|
u32 v;
|
||||||
|
};
|
||||||
|
|
||||||
#if IsLanguageG && GPU_SHADER_PRINT
|
#if IsLanguageG && GPU_SHADER_PRINT
|
||||||
|
G_FmtArg G_Fmt(u32 v) { G_FmtArg result; result.kind = G_FmtArgKind_U32; result.v = v; return result; }
|
||||||
|
G_FmtArg G_Fmt(i32 v) { G_FmtArg result; result.kind = G_FmtArgKind_I32; result.v = v; return result; }
|
||||||
|
G_FmtArg G_Fmt(f32 v) { G_FmtArg result; result.kind = G_FmtArgKind_F32; result.v = asuint(v); return result; }
|
||||||
|
|
||||||
Struct(G_TempPrintBuffer)
|
Struct(G_TempPrintBuffer)
|
||||||
{
|
{
|
||||||
u32 data_u32[256];
|
u32 char_chunks[256];
|
||||||
u32 byte_pos;
|
u32 char_pos;
|
||||||
|
u32 fmt_size;
|
||||||
|
u32 args_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 c)
|
void G_PushPrintChar(inout G_TempPrintBuffer buff, u32 v)
|
||||||
{
|
{
|
||||||
/* TODO: Overflow check */
|
/* TODO: Overflow check */
|
||||||
u32 u32_arr_pos = buff.byte_pos / 4;
|
u32 u32_arr_pos = buff.char_pos / 4;
|
||||||
u32 idx_in_u32 = buff.byte_pos & 0x03;
|
u32 idx_in_u32 = buff.char_pos & 0x03;
|
||||||
if (idx_in_u32 == 0)
|
if (idx_in_u32 == 0)
|
||||||
{
|
{
|
||||||
/* Since buff is not zero initialized, we set the byte on first write here */
|
/* Since buff is not zero initialized, we set the chunk on first write here */
|
||||||
buff.data_u32[u32_arr_pos] = c & 0xFF;
|
buff.char_chunks[u32_arr_pos] = v & 0xFF;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
buff.data_u32[u32_arr_pos] |= (c & 0xFF) << (idx_in_u32 * 8);
|
buff.char_chunks[u32_arr_pos] |= (v & 0xFF) << (idx_in_u32 * 8);
|
||||||
}
|
}
|
||||||
buff.byte_pos += 1;
|
buff.char_pos += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void G_CommitPrint(G_TempPrintBuffer buff)
|
void G_CommitPrint(G_TempPrintBuffer buff)
|
||||||
{
|
{
|
||||||
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
|
RWByteAddressBuffer rw = G_Dereference(G_ShaderConst_PrintBufferRef);
|
||||||
u32 u32s_count = (buff.byte_pos + 3) / 4;
|
|
||||||
u32 alloc_size = u32s_count * 4;
|
u32 chunks_count = (buff.char_pos + 3) / 4;
|
||||||
|
|
||||||
|
u32 alloc_size = 0;
|
||||||
|
alloc_size += 4; /* Header */
|
||||||
|
alloc_size += chunks_count * 4; /* Chunks */
|
||||||
|
|
||||||
u32 base;
|
u32 base;
|
||||||
rw.InterlockedAdd(0, alloc_size, base);
|
rw.InterlockedAdd(0, alloc_size, base); /* Write to base counter */
|
||||||
base += 4; /* Account for counter at beginning of buff */
|
|
||||||
|
|
||||||
if ((base + alloc_size) < countof(rw))
|
u32 pos = base;
|
||||||
|
pos += 4; /* Offset for base counter */
|
||||||
|
pos += 4; /* Offset for success counter */
|
||||||
|
pos += 4; /* Offset for overflow counter */
|
||||||
|
|
||||||
|
if ((pos + alloc_size) < countof(rw))
|
||||||
{
|
{
|
||||||
for (u32 u32_idx = 0; u32_idx < u32s_count; ++u32_idx)
|
/* Increment success counter */
|
||||||
|
rw.InterlockedAdd(4, 1);
|
||||||
|
|
||||||
|
/* Store header */
|
||||||
{
|
{
|
||||||
u32 data = buff.data_u32[u32_idx];
|
u32 header = 0;
|
||||||
rw.Store(base + (u32_idx * 4), data);
|
header |= (buff.fmt_size << 0) & 0x0000FFFF;
|
||||||
|
header |= (buff.args_count << 16) & 0xFFFF0000;
|
||||||
|
rw.Store(base + pos, header);
|
||||||
|
pos += 4;
|
||||||
}
|
}
|
||||||
|
/* Store chunks */
|
||||||
|
for (u32 chunk_idx = 0; chunk_idx < chunks_count; ++chunk_idx)
|
||||||
|
{
|
||||||
|
u32 chunk = buff.char_chunks[chunk_idx];
|
||||||
|
rw.Store(base + pos, chunk);
|
||||||
|
pos += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Increment overflow counter */
|
||||||
|
rw.InterlockedAdd(8, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define G_Print(fmt) do { \
|
#define G_Print(fmt) do { \
|
||||||
G_TempPrintBuffer __tmp; \
|
G_TempPrintBuffer __tmp; \
|
||||||
__tmp.byte_pos = 0; \
|
__tmp.char_pos = 0; \
|
||||||
u32 __pos = 0; \
|
u32 __pos = 0; \
|
||||||
while (U32FromChar(fmt[__pos]) != 0) \
|
while (U32FromChar(fmt[__pos]) != 0) \
|
||||||
{ \
|
{ \
|
||||||
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
||||||
++__pos; \
|
++__pos; \
|
||||||
} \
|
} \
|
||||||
G_PushPrintChar(__tmp, 0); \
|
__tmp.fmt_size = __tmp.char_pos; \
|
||||||
G_CommitPrint(__tmp); \
|
G_CommitPrint(__tmp); \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define G_PrintF(fmt, ...) do { \
|
||||||
|
G_TempPrintBuffer __tmp; \
|
||||||
|
__tmp.char_pos = 0; \
|
||||||
|
u32 __pos = 0; \
|
||||||
|
while (U32FromChar(fmt[__pos]) != 0) \
|
||||||
|
{ \
|
||||||
|
G_PushPrintChar(__tmp, U32FromChar(fmt[__pos])); \
|
||||||
|
++__pos; \
|
||||||
|
} \
|
||||||
|
G_FmtArg __args[] = { __VA_ARGS__ }; \
|
||||||
|
__tmp.fmt_size = __tmp.char_pos; \
|
||||||
|
__tmp.args_count = countof(__args); \
|
||||||
|
for (u32 __arg_idx = 0; __arg_idx < countof(__args); ++__arg_idx) \
|
||||||
|
{ \
|
||||||
|
G_PushPrintChar(__tmp, __args[__arg_idx].kind); \
|
||||||
|
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 0); \
|
||||||
|
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 8); \
|
||||||
|
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 16); \
|
||||||
|
G_PushPrintChar(__tmp, __args[__arg_idx].v >> 24); \
|
||||||
|
} \
|
||||||
|
G_CommitPrint(__tmp); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
#else
|
#else
|
||||||
#define G_Print(fmt)
|
#define G_Print(fmt)
|
||||||
|
#define G_PrintF(fmt)
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -145,7 +145,10 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input)
|
|||||||
Vec2 uv = input.src_uv;
|
Vec2 uv = input.src_uv;
|
||||||
Vec4 result = tex.Sample(sampler, uv);
|
Vec4 result = tex.Sample(sampler, uv);
|
||||||
|
|
||||||
G_Print("Hello there!");
|
// G_Print("Hello there!");
|
||||||
|
G_PrintF("Hello there: \"%F\"", G_Fmt(3.123));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
UI_BlitPSOutput output;
|
UI_BlitPSOutput output;
|
||||||
output.SV_Target0 = result;
|
output.SV_Target0 = result;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user