From 2f915de96c3d675f52a493f8a4b4ad483f97fce3 Mon Sep 17 00:00:00 2001 From: jacob Date: Mon, 8 Dec 2025 14:12:51 -0600 Subject: [PATCH] add ByteAddressBuffer support to gpu layer --- src/base/base.h | 8 +- src/base/base_gpu.hlsl | 23 +-- src/base/base_string.c | 17 +-- src/base/base_string.h | 30 ++-- src/base/base_win32/base_win32_log.c | 10 +- src/config.h | 2 +- src/gpu/gpu.lay | 7 +- src/gpu/gpu_core.h | 13 +- src/gpu/gpu_dx12/gpu_dx12.c | 185 +++++++++++++------------ src/gpu/gpu_dx12/gpu_dx12.h | 5 +- src/gpu/{gpu_common.c => gpu_extras.c} | 2 +- src/gpu/{gpu_common.h => gpu_extras.h} | 2 +- src/gpu/gpu_shader_extras.hlsl | 14 ++ src/pp/pp_vis/pp_vis_widgets.c | 8 +- src/ui/ui.lay | 4 +- src/ui/ui_core.c | 2 +- src/ui/{ui_common.c => ui_extras.c} | 0 src/ui/{ui_common.h => ui_extras.h} | 0 src/ui/ui_shaders.hlsl | 42 +++++- 19 files changed, 215 insertions(+), 159 deletions(-) rename src/gpu/{gpu_common.c => gpu_extras.c} (99%) rename src/gpu/{gpu_common.h => gpu_extras.h} (97%) create mode 100644 src/gpu/gpu_shader_extras.hlsl rename src/ui/{ui_common.c => ui_extras.c} (100%) rename src/ui/{ui_common.h => ui_extras.h} (100%) diff --git a/src/base/base.h b/src/base/base.h index 6570e9bd..12d55e66 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -433,7 +433,11 @@ #define sizeof_field(type, field) sizeof(((type *)0)->field) //- countof -#define countof(a) (sizeof(a) / sizeof((a)[0])) +#if IsLanguageC + #define countof(a) (sizeof(a) / sizeof((a)[0])) +#elif IsLanguageHlsl + template uint countof(T ints[N]) { return N; } +#endif //- IsArray #define IsIndexable(a) (sizeof(a[0]) != 0) @@ -728,6 +732,8 @@ Struct(StructuredBufferHandle) { u32 v; }; Struct(RWStructuredBufferHandle) { u32 v; }; +Struct(ByteAddressBufferHandle) { u32 v; }; +Struct(RWByteAddressBufferHandle) { u32 v; }; Struct(Texture1DHandle) { u32 v; }; Struct(RWTexture1DHandle) { u32 v; }; Struct(Texture2DHandle) { u32 v; }; diff --git a/src/base/base_gpu.hlsl b/src/base/base_gpu.hlsl index 268ef3c3..9a40ce20 100644 --- a/src/base/base_gpu.hlsl +++ b/src/base/base_gpu.hlsl @@ -27,15 +27,20 @@ typedef float4x4 Mat4x4; /* NOTE: Non-uniform resource access is assumed as the default behavior */ /* TODO: Add explicit "uniform" variants of handle deref operations for optimization on AMD devices */ -template StructuredBuffer StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWStructuredBuffer RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture1D Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture1D RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture2D Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture2D RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template Texture3D Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -template RWTexture3D RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } -SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template StructuredBuffer StructuredBufferFromHandle(StructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWStructuredBuffer RWStructuredBufferFromHandle(RWStructuredBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } + +ByteAddressBuffer ByteAddressBufferFromHandle(ByteAddressBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +RWByteAddressBuffer RWByteAddressBufferFromHandle(RWByteAddressBufferHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } + +template Texture1D Texture1DFromHandle(Texture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template Texture3D Texture3DFromHandle(Texture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture1D RWTexture1DFromHandle(RWTexture1DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template Texture2D Texture2DFromHandle(Texture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture2D RWTexture2DFromHandle(RWTexture2DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } +template RWTexture3D RWTexture3DFromHandle(RWTexture3DHandle h) { return ResourceDescriptorHeap[NonUniformResourceIndex(h.v)]; } + +SamplerState SamplerStateFromHandle(SamplerStateHandle h) { return SamplerDescriptorHeap[NonUniformResourceIndex(h.v)]; } //////////////////////////////////////////////////////////// //~ Dimension helpers diff --git a/src/base/base_string.c b/src/base/base_string.c index 5ccc2b13..d628b608 100644 --- a/src/base/base_string.c +++ b/src/base/base_string.c @@ -518,8 +518,7 @@ String TrimWhitespace(String s) * Example: * FormatString(arena, Lit("Hello there %F"), FmtString(Lit("George"))) * - * NOTE: FmtEnd must be passed as the last arg in the va_list (this is - * done automatically by the `FormatString` macro). + * NOTE: FmtEnd must be passed as the last arg in the va_list * * Format arguments: * FmtChar: Format a single u8 character @@ -527,16 +526,10 @@ String TrimWhitespace(String s) * FmtUint: Format a u64 * FmtSint: Format an i64 * FmtFloat: Format an f64 with DefaultFmtPrecision - * FmtFloatP: Format an f64 with specified precision * FmtHex: Format a u64 in hexadecimal notation * FmtPtr: Format a pointer in hexadecimal notation prefixed by "0x" * * FmtEnd (internal): Denote the end of the va_list - * - * TODO: - * %n equivalent? (nothing) - * %e/%E equivalent? (scientific notation of floats) - * %o equivalent? (octal representation) */ String FormatStringV(Arena *arena, String fmt, va_list args) { @@ -584,17 +577,17 @@ String FormatStringV(Arena *arena, String fmt, va_list args) case FmtKind_Uint: { - parsed_str = StringFromU64(arena, arg.value.uint, 10, arg.zfill); + parsed_str = StringFromU64(arena, arg.value.uint, 10, arg.z); } break; case FmtKind_Sint: { - parsed_str = StringFromI64(arena, arg.value.sint, 10, arg.zfill); + parsed_str = StringFromI64(arena, arg.value.sint, 10, arg.z); } break; case FmtKind_Hex: { - parsed_str = StringFromU64(arena, arg.value.sint, 16, arg.zfill); + parsed_str = StringFromU64(arena, arg.value.sint, 16, arg.z); } break; case FmtKind_Ptr: @@ -604,7 +597,7 @@ String FormatStringV(Arena *arena, String fmt, va_list args) case FmtKind_Float: { - parsed_str = StringFromF64(arena, arg.value.f, arg.precision); + parsed_str = StringFromF64(arena, arg.value.f, arg.p); } break; case FmtKind_Handle: diff --git a/src/base/base_string.h b/src/base/base_string.h index 155ab47f..84a35338 100644 --- a/src/base/base_string.h +++ b/src/base/base_string.h @@ -25,8 +25,8 @@ Enum(FmtKind) Struct(FmtArg) { FmtKind kind; - u32 precision; - u32 zfill; + u32 p; /* Precision */ + u32 z; /* Z-fill */ union { u8 c; @@ -98,21 +98,19 @@ String StringFromList(Arena *arena, StringList l, String separator); //////////////////////////////////////////////////////////// //~ Formatting -//- Format arg helpers -#define FmtChar(v) (FmtArg) {.kind = FmtKind_Char, .value.c = (v)} -#define FmtString(v) (FmtArg) {.kind = FmtKind_String, .value.string = (v)} -#define FmtUint(v) (FmtArg) {.kind = FmtKind_Uint, .value.uint = (v)} -#define FmtUintZ(v, z) (FmtArg) {.kind = FmtKind_Uint, .value.uint = (v), .zfill = (z)} -#define FmtSint(v) (FmtArg) {.kind = FmtKind_Sint, .value.sint = (v)} -#define FmtHex(v) (FmtArg) {.kind = FmtKind_Hex, .value.uint = (v)} -#define FmtPtr(v) (FmtArg) {.kind = FmtKind_Ptr, .value.ptr = (v)} -#define FmtFloat(v) FmtFloatP(v, DefaultFmtPrecision) -#define FmtFloatP(v, p) (FmtArg) {.kind = FmtKind_Float, .value.f = (v), .precision = (p)} -#define FmtHandle(v) (FmtArg) {.kind = FmtKind_Handle, .value.handle.h64[0] = (v).idx, .value.handle.h64[1] = (v).gen} -#define FmtUid(v) (FmtArg) {.kind = FmtKind_Uid, .value.uid = (v) } -#define FmtEnd (FmtArg) {.kind = FmtKind_End} +#define FMTARG(_kind, ...) ((FmtArg) { .kind = (_kind), .p = DefaultFmtPrecision, __VA_ARGS__ }) + +#define FmtChar(v, ...) FMTARG(FmtKind_Char, .value.c = (v), __VA_ARGS__) +#define FmtString(v, ...) FMTARG(FmtKind_String, .value.string = (v), __VA_ARGS__) +#define FmtUint(v, ...) FMTARG(FmtKind_Uint, .value.uint = (v), __VA_ARGS__) +#define FmtSint(v, ...) FMTARG(FmtKind_Sint, .value.sint = (v), __VA_ARGS__) +#define FmtHex(v, ...) FMTARG(FmtKind_Hex, .value.uint = (v), __VA_ARGS__) +#define FmtPtr(v, ...) FMTARG(FmtKind_Ptr, .value.ptr = (v), __VA_ARGS__) +#define FmtFloat(v, ...) FMTARG(FmtKind_Float, .value.f = (v), __VA_ARGS__) +#define FmtHandle(v, ...) FMTARG(FmtKind_Handle, .value.handle.h64[0] = (v).idx, .value.handle.h64[1] = (v).gen, __VA_ARGS__) +#define FmtUid(v, ...) FMTARG(FmtKind_Uid, .value.uid = (v), __VA_ARGS__) +#define FmtEnd FMTARG(FmtKind_End) /* Denotes end of VA list */ -//- Format functions #define StringF(arena, lit, ...) FormatString_((arena), Lit(lit), __VA_ARGS__, FmtEnd) #define FormatString(arena, fmt, ...) FormatString_((arena), (fmt), __VA_ARGS__, FmtEnd) String FormatString_(Arena *arena, String fmt, ...); diff --git a/src/base/base_win32/base_win32_log.c b/src/base/base_win32/base_win32_log.c index 0769b2e2..38833eee 100644 --- a/src/base/base_win32/base_win32_log.c +++ b/src/base/base_win32/base_win32_log.c @@ -55,13 +55,13 @@ void W32_Log(i32 level, String msg) "[%F:%F:%F.%F] <%F> [%F] %F\n", /* Time */ - FmtUintZ(datetime.hour, 2), - FmtUintZ(datetime.minute, 2), - FmtUintZ(datetime.second, 2), - FmtUintZ(datetime.milliseconds, 3), + FmtUint(datetime.hour, .z = 2), + FmtUint(datetime.minute, .z = 2), + FmtUint(datetime.second, .z = 2), + FmtUint(datetime.milliseconds, .z = 3), /* Thread id */ - FmtUintZ(thread_id, 5), + FmtUint(thread_id, .z = 5), /* Level */ FmtString(shorthand), diff --git a/src/config.h b/src/config.h index 36009283..905e5e28 100644 --- a/src/config.h +++ b/src/config.h @@ -70,7 +70,7 @@ #define FLOOD_DEBUG 0 #define GPU_DEBUG 1 -#define GPU_DEBUG_VALIDATION 1 +#define GPU_DEBUG_VALIDATION 0 /* If enabled, bitbuffs will insert/verify magic numbers & length for each read & write */ #define BITBUFF_DEBUG 0 diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index fc636257..c0a447ef 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -5,10 +5,11 @@ //- Api @IncludeC gpu_core.h -@IncludeC gpu_common.h +@IncludeC gpu_extras.h //- Impl -@IncludeC gpu_common.c +@IncludeC gpu_extras.c +@IncludeGpu gpu_shader_extras.hlsl //- Dx12 impl @DefaultWindowsImpl gpu_dx12 @@ -18,4 +19,4 @@ //- Startup @Startup GPU_Startup -@Startup GPU_StartupCommon +@Startup GPU_StartupExtra diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index e0ce4753..19cd7f16 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -543,8 +543,10 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); //- Shader handle creation -StructuredBufferHandle GPU_PushStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); -RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range); +StructuredBufferHandle GPU_PushStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 element_size, u64 element_offset); +RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 element_size, u64 element_offset); +ByteAddressBufferHandle GPU_PushByteAddressBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 u32_offset); +RWByteAddressBufferHandle GPU_PushRWByteAddressBufferHandleEx (GPU_ArenaHandle arena, GPU_ResourceHandle resource, u64 u32_offset); Texture1DHandle GPU_PushTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); RWTexture1DHandle GPU_PushRWTexture1DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); Texture2DHandle GPU_PushTexture2DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); @@ -553,8 +555,11 @@ Texture3DHandle GPU_PushTexture3DHandle (GPU_ArenaHandle RWTexture3DHandle GPU_PushRWTexture3DHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); SamplerStateHandle GPU_PushSamplerStateHandle (GPU_ArenaHandle arena, GPU_ResourceHandle resource); -#define GPU_PushStructuredBufferHandle(arena, resource, type) GPU_PushStructuredBufferHandleEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) -#define GPU_PushRWStructuredBufferHandle(arena, resource, type) GPU_PushRWStructuredBufferHandleEx((arena), (resource), sizeof(type), RNGU32(0, GPU_CountBuffer((resource), type))) +#define GPU_PushStructuredBufferHandle(arena, resource, type) GPU_PushStructuredBufferHandleEx((arena), (resource), sizeof(type), 0) +#define GPU_PushRWStructuredBufferHandle(arena, resource, type) GPU_PushRWStructuredBufferHandleEx((arena), (resource), sizeof(type), 0) + +#define GPU_PushByteAddressBufferHandle(arena, resource) GPU_PushByteAddressBufferHandleEx((arena), (resource), 0) +#define GPU_PushRWByteAddressBufferHandle(arena, resource) GPU_PushRWByteAddressBufferHandleEx((arena), (resource), 0) //- Index buffer creation diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 2de16cf3..6df21710 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -691,54 +691,6 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl) } } -//////////////////////////////////////////////////////////// -//~ Queue sync job - -// JobImpl(GPU_D12_StartQueueSync, _, __) -// { -// GPU_D12_SharedState *g = &GPU_D12_shared_state; -// HANDLE queue_fences_events[GPU_NumQueues] = ZI; -// i64 queue_fences_seen[GPU_NumQueues] = ZI; -// for (i32 i = 0; i < countof(queue_fences_events); ++i) -// { -// queue_fences_events[i] = CreateEvent(0, 0, 1, 0); -// queue_fences_seen[i] = -1; -// } -// for (;;) -// { -// WaitForMultipleObjects(countof(queue_fences_events), queue_fences_events, 0, INFINITE); -// for (GPU_QueueKind queue_kind = 0; queue_kind < GPU_NumQueues; ++queue_kind) -// { -// GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); -// i64 last_seen = queue_fences_seen[queue_kind]; -// i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence); -// if (completed > last_seen) -// { -// SetFence(&queue->sync_fence, completed); -// queue_fences_seen[queue_kind] = completed; -// ID3D12Fence_SetEventOnCompletion(queue->commit_fence, completed + 1, queue_fences_events[queue_kind]); -// } -// } -// } -// } - -//////////////////////////////////////////////////////////// -//~ @hookimpl Fence hooks - -// Fence *GPU_FenceFromQueue(GPU_QueueKind queue_kind) -// { -// GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); -// return &queue->sync_fence; -// } - -// void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value) -// { -// GPU_D12_Queue *queue_a = GPU_D12_QueueFromKind(a); -// GPU_D12_Queue *queue_b = GPU_D12_QueueFromKind(b); -// ID3D12Fence *b_fence = queue_b->commit_fence; -// ID3D12CommandQueue_Wait(queue_a->d3d_queue, b_fence, b_target_fence_value); -// } - //////////////////////////////////////////////////////////// //~ @hookimpl Arena @@ -764,6 +716,14 @@ void GPU_ReleaseArena(GPU_ArenaHandle arena) //////////////////////////////////////////////////////////// //~ Resource helpers +GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; + GPU_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor); + return &descriptors[index]; +} + GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind) { GPU_D12_SharedState *g = &GPU_D12_shared_state; @@ -812,7 +772,7 @@ GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Des else { descriptor = PushStructNoZero(heap->descriptors_arena, GPU_D12_Descriptor); - index = heap->allocated_count++; + index = ArenaCount(heap->descriptors_arena, GPU_D12_Descriptor); if (index >= heap->max_count) { Panic(Lit("Max descriptors reached in heap")); @@ -831,12 +791,52 @@ GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_Des return descriptor; } -GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index) +void GPU_D12_InitBufferDescriptor(GPU_D12_Descriptor *descriptor, GPU_D12_Resource *resource, u64 element_size, u64 element_offset, b32 is_raw, b32 is_uav) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - GPU_D12_DescriptorHeap *heap = &g->descriptor_heaps[heap_kind]; - GPU_D12_Descriptor *descriptors = ArenaFirst(heap->descriptors_arena, GPU_D12_Descriptor); - return &descriptors[index]; + u64 buffer_size_aligned = resource->buffer_size_aligned; + u64 num_elements_in_buffer = buffer_size_aligned / element_size; + u64 num_elements_after_offset = num_elements_in_buffer > element_offset ? num_elements_in_buffer - element_offset : 0; + if (num_elements_after_offset > 0) + { + if (is_uav) + { + D3D12_UNORDERED_ACCESS_VIEW_DESC desc = ZI; + { + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER ; + desc.Buffer.FirstElement = element_offset; + desc.Buffer.NumElements = num_elements_after_offset; + desc.Buffer.StructureByteStride = element_size; + desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + } + if (is_raw) + { + desc.Format = DXGI_FORMAT_R32_TYPELESS; + desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW; + } + ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, &desc, descriptor->handle); + } + else + { + D3D12_SHADER_RESOURCE_VIEW_DESC desc = ZI; + { + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + desc.Buffer.FirstElement = element_offset; + desc.Buffer.NumElements = num_elements_after_offset; + desc.Buffer.StructureByteStride = element_size; + desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + } + if (is_raw) + { + desc.Format = DXGI_FORMAT_R32_TYPELESS; + desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; + } + ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, &desc, descriptor->handle); + } + } } //////////////////////////////////////////////////////////// @@ -873,12 +873,13 @@ GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena_handle, GPU_BufferDesc /* Create d3d resource */ ID3D12Resource *d3d_resource = 0; + u64 aligned_size = AlignU64(MaxU64(desc.size, 1), 4); { D3D12_RESOURCE_DESC1 d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Format = DXGI_FORMAT_UNKNOWN; - d3d_desc.Width = MaxI32(desc.size, 1); + d3d_desc.Width = aligned_size; d3d_desc.Height = 1; d3d_desc.DepthOrArraySize = 1; d3d_desc.MipLevels = 1; @@ -917,11 +918,12 @@ GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena_handle, GPU_BufferDesc } GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource); - resource->d3d_resource = d3d_resource; - resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; - resource->flags = desc.flags; + resource->d3d_resource = d3d_resource; + resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; + resource->flags = desc.flags; - resource->buffer_size = desc.size; + resource->buffer_size = desc.size; + resource->buffer_size_aligned = aligned_size; resource->buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); return GPU_D12_MakeHandle(GPU_ResourceHandle, resource); @@ -1039,46 +1041,44 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena_handle, GPU_SamplerDesc //- Shader handle creation -StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) +StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u64 element_size, u64 element_offset) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - u32 num_elements = element_range.max - element_range.min; - StructuredBufferHandle result = ZI; - if (num_elements > 0) - { - GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); - GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); - GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); - { - D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc = ZI; - srv_desc.Format = DXGI_FORMAT_UNKNOWN; - srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; - srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srv_desc.Buffer.FirstElement = element_range.min; - srv_desc.Buffer.NumElements = MaxU32(num_elements, 1); - srv_desc.Buffer.StructureByteStride = element_size; - srv_desc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; - ID3D12Device_CreateShaderResourceView(g->device, resource->d3d_resource, &srv_desc, descriptor->handle); - } - result = GPU_D12_MakeHandle(StructuredBufferHandle, descriptor->index); - } - return result; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); + GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); + GPU_D12_InitBufferDescriptor(descriptor, resource, element_size, element_offset, 0, 0); + return GPU_D12_MakeHandle(StructuredBufferHandle, descriptor->index); } -RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u32 element_size, RngU32 element_range) +RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u64 element_size, u64 element_offset) { GPU_D12_SharedState *g = &GPU_D12_shared_state; - u32 num_elements = element_range.max - element_range.min; - RWStructuredBufferHandle result = ZI; - if (num_elements > 0) - { - GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); - GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); - GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); - ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle); - result = GPU_D12_MakeHandle(RWStructuredBufferHandle, descriptor->index); - } - return result; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); + GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); + GPU_D12_InitBufferDescriptor(descriptor, resource, element_size, element_offset, 0, 1); + return GPU_D12_MakeHandle(RWStructuredBufferHandle, descriptor->index); +} + +ByteAddressBufferHandle GPU_PushByteAddressBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u64 u32_offset) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); + GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); + GPU_D12_InitBufferDescriptor(descriptor, resource, 4, u32_offset, 1, 0); + return GPU_D12_MakeHandle(ByteAddressBufferHandle, descriptor->index); +} + +RWByteAddressBufferHandle GPU_PushRWByteAddressBufferHandleEx(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle, u64 u32_offset) +{ + GPU_D12_SharedState *g = &GPU_D12_shared_state; + GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle); + GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle); + GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav); + GPU_D12_InitBufferDescriptor(descriptor, resource, 4, u32_offset, 1, 1); + return GPU_D12_MakeHandle(RWByteAddressBufferHandle, descriptor->index); } Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle) @@ -1416,6 +1416,7 @@ GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 si heap->resource.d3d_resource = d3d_resource; heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1; heap->resource.buffer_size = new_heap_size; + heap->resource.buffer_size_aligned = new_heap_size; heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource); /* Map */ diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 1b25bc85..4bdbafcc 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -78,7 +78,6 @@ Struct(GPU_D12_DescriptorHeap) Mutex mutex; struct GPU_D12_Descriptor *first_free; - u32 allocated_count; u32 max_count; }; @@ -130,6 +129,7 @@ Struct(GPU_D12_Resource) /* Buffer info */ u64 buffer_size; + u64 buffer_size_aligned; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; /* Texture info */ @@ -411,8 +411,9 @@ GPU_D12_Queue *GPU_D12_QueueFromKind(GPU_QueueKind kind); //////////////////////////////////////////////////////////// //~ Resource helpers -GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind); GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_kind, u32 index); +GPU_D12_Descriptor *GPU_D12_PushDescriptor(GPU_D12_Arena *gpu_arena, GPU_D12_DescriptorHeapKind heap_kind); +void GPU_D12_InitBufferDescriptor(GPU_D12_Descriptor *descriptor, GPU_D12_Resource *resource, u64 element_size, u64 element_offset, b32 is_raw, b32 is_uav); //////////////////////////////////////////////////////////// //~ Raw command list diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_extras.c similarity index 99% rename from src/gpu/gpu_common.c rename to src/gpu/gpu_extras.c index bf35b054..605dcfd2 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_extras.c @@ -4,7 +4,7 @@ ThreadLocal GPU_ArenaHandle GPU_t_perm_arena = ZI; //////////////////////////////////////////////////////////// //~ Startup -void GPU_StartupCommon(void) +void GPU_StartupExtra(void) { GPU_SharedUtilState *g = &GPU_shared_util_state; diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_extras.h similarity index 97% rename from src/gpu/gpu_common.h rename to src/gpu/gpu_extras.h index 752be9ad..958a66d5 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_extras.h @@ -14,7 +14,7 @@ extern ThreadLocal GPU_ArenaHandle GPU_t_perm_arena; //////////////////////////////////////////////////////////// //~ Startup -void GPU_StartupCommon(void); +void GPU_StartupExtra(void); //////////////////////////////////////////////////////////// //~ Helpers diff --git a/src/gpu/gpu_shader_extras.hlsl b/src/gpu/gpu_shader_extras.hlsl new file mode 100644 index 00000000..eb5d7441 --- /dev/null +++ b/src/gpu/gpu_shader_extras.hlsl @@ -0,0 +1,14 @@ +//////////////////////////////////////////////////////////// +//~ Shader printf types + + + +//////////////////////////////////////////////////////////// +//~ Shader printf + +// #define Test(fmt_cstr, ...) do { \ +// } while (0) + +void Test(u32 count) +{ +} diff --git a/src/pp/pp_vis/pp_vis_widgets.c b/src/pp/pp_vis/pp_vis_widgets.c index ec0d3355..c67160a1 100644 --- a/src/pp/pp_vis/pp_vis_widgets.c +++ b/src/pp/pp_vis/pp_vis_widgets.c @@ -374,10 +374,10 @@ UI_Key V_BuildConsoleWidget(b32 minimized) text = StringF( scratch.arena, "[%F:%F:%F.%F] %F", - FmtUintZ(datetime.hour, 2), - FmtUintZ(datetime.minute, 2), - FmtUintZ(datetime.second, 2), - FmtUintZ(datetime.milliseconds, 3), + FmtUint(datetime.hour, .z = 2), + FmtUint(datetime.minute, .z = 2), + FmtUint(datetime.second, .z = 2), + FmtUint(datetime.milliseconds, .z = 3), FmtString(text)); } UI_PushCP(UI_NilKey); diff --git a/src/ui/ui.lay b/src/ui/ui.lay index 616b0194..06c4b56a 100644 --- a/src/ui/ui.lay +++ b/src/ui/ui.lay @@ -7,13 +7,13 @@ //- Api @IncludeC ui_core.h -@IncludeC ui_common.h +@IncludeC ui_extras.h @IncludeC ui_shaders.h @IncludeGpu ui_shaders.h //- Impl @IncludeC ui_core.c -@IncludeC ui_common.c +@IncludeC ui_extras.c @IncludeGpu ui_shaders.hlsl //- Shaders diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index 1ee193f6..c7f5ad54 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1405,7 +1405,7 @@ void UI_EndFrame(UI_Frame *frame) //- Clear pass { - GPU_ClearRenderTarget(frame->cl, draw_target, VEC4(1, 0, 0, 0)); + GPU_ClearRenderTarget(frame->cl, draw_target, VEC4(1, 0, 0, 1)); } //- Rect pass diff --git a/src/ui/ui_common.c b/src/ui/ui_extras.c similarity index 100% rename from src/ui/ui_common.c rename to src/ui/ui_extras.c diff --git a/src/ui/ui_common.h b/src/ui/ui_extras.h similarity index 100% rename from src/ui/ui_common.h rename to src/ui/ui_extras.h diff --git a/src/ui/ui_shaders.hlsl b/src/ui/ui_shaders.hlsl index 9180b42c..003c65bf 100644 --- a/src/ui/ui_shaders.hlsl +++ b/src/ui/ui_shaders.hlsl @@ -22,9 +22,9 @@ Struct(UI_DRectPSOutput) VertexShader(UI_DRectVS, UI_DRectPSInput) { - UI_DParams params = StructuredBufferFromHandle(UI_ShaderConst_Params)[0]; + UI_DParams params = StructuredBufferFromHandle(UI_ShaderConst_Params)[0]; StructuredBuffer rects = StructuredBufferFromHandle(params.rects); - UI_DRect rect = rects[SV_InstanceID]; + UI_DRect rect = rects[SV_InstanceID]; Vec2 rect_uv = RectUvFromVertexId(SV_VertexID); Vec2 tex_uv = lerp(rect.tex_uv0, rect.tex_uv1, rect_uv); @@ -47,9 +47,9 @@ VertexShader(UI_DRectVS, UI_DRectPSInput) PixelShader(UI_DRectPS, UI_DRectPSOutput, UI_DRectPSInput input) { - UI_DParams params = StructuredBufferFromHandle(UI_ShaderConst_Params)[0]; + UI_DParams params = StructuredBufferFromHandle(UI_ShaderConst_Params)[0]; StructuredBuffer rects = StructuredBufferFromHandle(params.rects); - SamplerState sampler = SamplerStateFromHandle(params.sampler); + SamplerState sampler = SamplerStateFromHandle(params.sampler); UI_DRect rect = rects[input.rect_idx]; @@ -172,7 +172,39 @@ PixelShader(UI_BlitPS, UI_BlitPSOutput, UI_BlitPSInput input) Vec2 uv = input.src_uv; Vec4 result = tex.Sample(sampler, uv); - result = Vec4(1, 1, 0, 1); + + u32 ints[] = { + 1, + 2, + 3, + 4 + }; + u32 count = countof(ints); + if (count == 4) + { + result.g = 1; + } + else + { + // result.b = 1; + } + + + + + // LogDebugF( + // "uv: (%F, %F), result: (%F, %F, %F, %F)", + // FmtFloat(uv.x), + // FmtFloat(uv.y), + // FmtFloat(result.x), + // FmtFloat(result.y), + // FmtFloat(result.z), + // FmtFloat(result.w), + // ); + + + + UI_BlitPSOutput output; output.SV_Target0 = result;