diff --git a/src/base/base.h b/src/base/base.h index 80503db7..76295b8c 100644 --- a/src/base/base.h +++ b/src/base/base.h @@ -762,6 +762,10 @@ Struct(ResourceKey) #if LanguageIsC +Struct(GpuPointer) { u32 v; }; +Struct(GpuBufferPos) { GpuPointer p; u64 byte_offset; }; + +Inline b32 IsGpuPointerNil(GpuPointer p) { return p.v == 0; } Struct(VertexShader) { ResourceKey resource; }; Struct(PixelShader) { ResourceKey resource; }; @@ -780,16 +784,9 @@ Struct(SamplerStateRid) { u32 v; }; #elif LanguageIsGpu -//- Resource descriptor index types -typedef uint StructuredBufferRid; -typedef uint RWStructuredBufferRid; -typedef uint Texture1DRid; -typedef uint Texture2DRid; -typedef uint Texture3DRid; -typedef uint RWTexture1DRid; -typedef uint RWTexture2DRid; -typedef uint RWTexture3DRid; -typedef uint SamplerStateRid; +typedef u32 GpuBuffer; +typedef u32 GpuTexture; +typedef u32 GpuSampler; //- Shader declaration # define ComputeShader(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) diff --git a/src/base/base_math.c b/src/base/base_math.c index a170d0b4..998fde78 100644 --- a/src/base/base_math.c +++ b/src/base/base_math.c @@ -1142,12 +1142,6 @@ u32 U32FromVec4(Vec4 v) return result; } -PackedVec4 PackVec4(Vec4 v) -{ - PackedVec4 result = ZI; - return result; -} - //////////////////////////////////////////////////////////// //~ Xform operations diff --git a/src/base/base_math.h b/src/base/base_math.h index 0cf4694f..db7e106f 100644 --- a/src/base/base_math.h +++ b/src/base/base_math.h @@ -20,60 +20,104 @@ Enum(Axis) //////////////////////////////////////////////////////////// //~ Vector types -#define VEC2(x, y) (Vec2) { (x), (y) } -#define VEC3(x, y, z) (Vec3) { (x), (y), (z) } -#define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) } - -#define VEC2I32(x, y) (Vec2I32) { (x), (y) } -#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) } -#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) } - -#define VEC2U32(x, y) (Vec2U32) { (x), (y) } -#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) } -#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) } - -Struct(Vec2) { f32 x, y; }; -Struct(Vec3) { f32 x, y, z; }; -Struct(Vec4) { f32 x, y, z, w; }; +//- Vec2 +Struct(Vec2) { f32 x, y; }; Struct(Vec2I32) { i32 x, y; }; +Struct(Vec2I64) { i64 x, y; }; +Struct(Vec2U32) { i32 x, y; }; +Struct(Vec2U64) { u64 x, y; }; + +Struct(Vec2Array) { Vec2 *points; u64 count; }; + +#define VEC2(x, y) (Vec2) { (x), (y) } +#define VEC2I32(x, y) (Vec2I32) { (x), (y) } +#define VEC2I64(x, y) (Vec2I64) { (x), (y) } +#define VEC2U32(x, y) (Vec2U32) { (x), (y) } +#define VEC2U64(x, y) (Vec2U64) { (x), (y) } + +#define Vec2FromVec(v) VEC2((v).x, (v).y) + +//- Vec3 + +Struct(Vec3) { f32 x, y, z; }; Struct(Vec3I32) { i32 x, y, z; }; +Struct(Vec3I64) { i64 x, y, z; }; +Struct(Vec3U32) { i32 x, y, z; }; +Struct(Vec3U64) { u64 x, y, z; }; + +Struct(Vec3Array) { Vec3 *points; u64 count; }; + +#define VEC3(x, y, z) (Vec3) { (x), (y), (z) } +#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) } +#define VEC3I64(x, y, z) (Vec3I64) { (x), (y), (z) } +#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) } +#define VEC3U64(x, y, z) (Vec3U64) { (x), (y), (z) } + +#define Vec3FromVec(v) VEC3((v).x, (v).y, (v).z) + +//- Vec4 + +Struct(Vec4) { f32 x, y, z, w; }; Struct(Vec4I32) { i32 x, y, z, w; }; +Struct(Vec4I64) { i64 x, y, z, w; }; +Struct(Vec4U32) { i32 x, y, z, w; }; +Struct(Vec4U64) { u64 x, y, z, w; }; -Struct(Vec2U32) { u32 x, y; }; -Struct(Vec3U32) { u32 x, y, z; }; -Struct(Vec4U32) { u32 x, y, z, w; }; +Struct(Vec4Array) { Vec4 *points; u64 count; }; -Struct(PackedVec4) { u32 hi; u32 lo; }; +#define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) } +#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) } +#define VEC4I64(x, y, z, w) (Vec4I64) { (x), (y), (z), (w) } +#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) } +#define VEC4U64(x, y, z, w) (Vec4U64) { (x), (y), (z), (w) } -Struct(Vec2Array) -{ - Vec2 *points; - u64 count; -}; - -Struct(Vec3Array) -{ - Vec3 *points; - u64 count; -}; - -Struct(Vec4Array) -{ - Vec4 *points; - u64 count; -}; +#define Vec4FromVec(v) VEC4((v).x, (v).y, (v).z, (v).w) //////////////////////////////////////////////////////////// //~ Range types -#define RNG2(p0, p1) (Rng2) { (p0), (p1) } -#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) } -#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) } +//- Rng1 -Struct(Rng2) { Vec2 p0; Vec2 p1; }; +Struct(Rng) { f32 min; f32 max; }; +Struct(RngI32) { i32 min; i32 max; }; +Struct(RngI64) { i64 min; i64 max; }; +Struct(RngU32) { u32 min; u32 max; }; +Struct(RngU64) { u64 min; u64 max; }; + +#define RNG(min, max) (Rng) { (min), (max) } +#define RNGI32(min, max) (RngI32) { (min), (max) } +#define RNGI64(min, max) (RngI64) { (min), (max) } +#define RNGU32(min, max) (RngU32) { (min), (max) } +#define RNGU64(min, max) (RngU64) { (min), (max) } + +//- Rng2 + +Struct(Rng2) { Vec2 p0; Vec2 p1; }; Struct(Rng2I32) { Vec2I32 p0; Vec2I32 p1; }; +Struct(Rng2I64) { Vec2I64 p0; Vec2I64 p1; }; Struct(Rng2U32) { Vec2U32 p0; Vec2U32 p1; }; +Struct(Rng2U64) { Vec2U64 p0; Vec2U64 p1; }; + +#define RNG2(p0, p1) (Rng2) { (p0), (p1) } +#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) } +#define RNG2I64(p0, p1) (Rng2I64) { (p0), (p1) } +#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) } +#define RNG2U64(p0, p1) (Rng2U64) { (p0), (p1) } + +//- Rng3 + +Struct(Rng3) { Vec3 p0; Vec3 p1; }; +Struct(Rng3I32) { Vec3I32 p0; Vec3I32 p1; }; +Struct(Rng3I64) { Vec3I64 p0; Vec3I64 p1; }; +Struct(Rng3U32) { Vec3U32 p0; Vec3U32 p1; }; +Struct(Rng3U64) { Vec3U64 p0; Vec3U64 p1; }; + +#define RNG3(p0, p1) (Rng3) { (p0), (p1) } +#define RNG3I32(p0, p1) (Rng3I32) { (p0), (p1) } +#define RNG3I64(p0, p1) (Rng3I64) { (p0), (p1) } +#define RNG3U32(p0, p1) (Rng3U32) { (p0), (p1) } +#define RNG3U64(p0, p1) (Rng3U64) { (p0), (p1) } //////////////////////////////////////////////////////////// //~ Xform types @@ -245,8 +289,6 @@ Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t); //////////////////////////////////////////////////////////// //~ Vec2 operations -#define Vec2FromFields(v) VEC2((v).x, (v).y) - b32 IsVec2Zero(Vec2 a); b32 MatchVec2(Vec2 a, Vec2 b); @@ -315,7 +357,6 @@ Vec2I32 SubVec2I32(Vec2I32 a, Vec2I32 b); Vec4 Vec4FromU32(u32 v); u32 U32FromVec4(Vec4 v); -PackedVec4 PackVec4(Vec4 v); //////////////////////////////////////////////////////////// //~ Xform operations diff --git a/src/base/base_math_gpu.h b/src/base/base_math_gpu.h index 4b9be0a1..8399bc57 100644 --- a/src/base/base_math_gpu.h +++ b/src/base/base_math_gpu.h @@ -17,7 +17,6 @@ typedef int4 Vec4I32; typedef uint2 Vec2U32; typedef uint3 Vec3U32; typedef uint4 Vec4U32; -typedef uint2 PackedVec4; typedef float2x3 Xform; typedef float4 Rect; typedef float4 ClipRect; @@ -25,19 +24,6 @@ typedef float4 Aabb; typedef float4 Quad; typedef float4x4 Mat4x4; -//////////////////////////////////////////////////////////// -//~ Color helpers - -Vec4 Vec4FromU32(u32 v) -{ - Vec4 result; - result.r = ((v >> 0) & 0xFF) / 255.0; - result.g = ((v >> 8) & 0xFF) / 255.0; - result.b = ((v >> 16) & 0xFF) / 255.0; - result.a = ((v >> 24) & 0xFF) / 255.0; - return result; -} - //////////////////////////////////////////////////////////// //~ Vertex ID helpers diff --git a/src/font/font.c b/src/font/font.c index 25023f5f..129ab370 100644 --- a/src/font/font.c +++ b/src/font/font.c @@ -48,71 +48,37 @@ JobDef(F_Load, sig, _) } TTF_Decoded decoded = TTF_Decode(scratch.arena, resource_data, em_size, font_codes, countof(font_codes)); - /* Send texture to GPU */ - GPU_Resource *texture = 0; + /* Upload texture to GPU */ + Fence completion_fence = ZI; { - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_None; - desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; - desc.texture.size = VEC3I32(decoded.image_width, decoded.image_height, 1); - texture = GPU_AcquireResource(desc); - - /* Fill upload buffer */ - GPU_ResourceDesc upload_desc = ZI; - upload_desc.kind = GPU_ResourceKind_Buffer; - upload_desc.buffer.heap_kind = GPU_HeapKind_Upload; - upload_desc.buffer.count = GPU_GetFootprintSize(texture); - GPU_Resource *upload = GPU_AcquireResource(upload_desc); + GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_BackgroundCopy); + GPU_Arena *gpu_temp = GPU_AcquireArena(); { - GPU_Mapped mapped = GPU_Map(upload); - GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.image_pixels, texture); - GPU_Unmap(mapped); - } - - GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy; - GPU_QueueKind direct_queue = GPU_QueueKind_Direct; - Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue); - i64 direct_queue_fence_target = 0; - if (copy_queue == direct_queue) - { - /* Copy & transition GPU resource on direct queue*/ + GpuTexture gpu_texture = ZI; { - GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); - { - GPU_TransitionToCopyDst(cl, texture); - GPU_CopyResource(cl, texture, upload); - GPU_TransitionToReadable(cl, texture); - } - direct_queue_fence_target = GPU_EndCommandList(cl); + GPU_Arena *gpu_perm = GPU_Perm(); + GPU_ResourceDesc desc = ZI; + desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; + desc.texture.size = VEC3I32(decoded.image_width, decoded.image_height, 1); + gpu_texture = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, desc); } - } - else - { - /* Copy to GPU resource on background copy queue*/ - i64 copy_queue_fence_target = 0; + texture->gpu_texture = gpu_texture; + texture->width = decoded.width; + texture->height = decoded.height; + GpuBuffer src_buff = GPU_PushBuffer(gpu_temp, GPU_GetFootprintSize(gpu_texture), GPU_BufferFlag_CpuWritable); + GpuAddress src_addr = ZI; { - GPU_CommandList *cl = GPU_BeginCommandList(copy_queue); - { - GPU_TransitionToCopyDst(cl, texture); - GPU_CopyResource(cl, texture, upload); - } - copy_queue_fence_target = GPU_EndCommandList(cl); - } - /* Once copy finishes, transition resource to readable on direct queue */ - { - GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target); - GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); - { - GPU_TransitionToReadable(cl, texture); - } - direct_queue_fence_target = GPU_EndCommandList(cl); + u32 *p = GPU_PushStructsNoZero(src_buff, u32, decoded.width * decoded.height); + CopyStructs(p, decoded.pixels, decoded.width * decoded.heigth); + GPU_TransitionBufferToCopySrc(src_buff); + GPU_TransitionTextureToCopyDst(gpu_texture); + GPU_CopyBytesToFootprint(gpu_texture, src_buff, src_addr, decoded.width * decoded.height * 4); + GPU_TransitionTextureToReadonly(gpu_texture); } + GPU_SetFence(&completion_fence, 1); } - - /* Release upload buffer once transition finishes */ - YieldOnFence(direct_queue_fence, direct_queue_fence_target); - GPU_ReleaseResource(upload, GPU_ReleaseFlag_None); + GPU_ReleaseArena(gpu_temp); + GPU_EndCommandList(cl); } /* Acquire store memory */ @@ -155,6 +121,8 @@ JobDef(F_Load, sig, _) font->lookup[codepoint] = decoded.cache_indices[i]; } + YieldOnFence(&completion_fence, 1); + LogSuccessF("Loaded font \"%F\" (font size: %F, em size: %F) in %F seconds", FmtString(name), FmtFloat((f64)font_size), FmtFloat((f64)em_size), FmtFloat(SecondsFromNs(TimeNs() - start_ns))); AC_MarkReady(asset, font); diff --git a/src/gpu/gpu.lay b/src/gpu/gpu.lay index d6e91344..73592d0c 100644 --- a/src/gpu/gpu.lay +++ b/src/gpu/gpu.lay @@ -19,4 +19,4 @@ //- Startup @Startup GPU_Startup -@Startup GPU_StartupUtils +@Startup GPU_StartupCommon diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index 0b672de9..8dd3644c 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -3,227 +3,76 @@ GPU_SharedUtilState GPU_shared_util_state = ZI; //////////////////////////////////////////////////////////// //~ Startup -void GPU_StartupUtils(void) +void GPU_StartupCommon(void) { GPU_SharedUtilState *g = &GPU_shared_util_state; - GPU_QueueKind queue_kind = GPU_QueueKind_Direct; - Fence *queue_fence = GPU_FenceFromQueue(queue_kind); - i64 queue_fence_target = FetchFence(queue_fence); + GPU_Arena *gpu_perm = GPU_Perm(); - GPU_Resource *noise_upload = 0; - GPU_Resource *quad_upload = 0; - GPU_Resource *noise = 0; - GPU_Resource *quad = 0; - - GPU_CommandList *cl = GPU_BeginCommandList(queue_kind); + /* Upload data to gpu */ + GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct); { - /* Upload noise */ + /* Init noise texture */ + String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat"))); + Vec3I32 noise_dims = VEC3I32(128, 128, 64); + GpuPointer noise_tex = ZI; { - Vec3I32 noise_size = VEC3I32(128, 128, 64); - ResourceKey noise_resource = ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat")); - String noise_res_data = DataFromResource(noise_resource); - if (noise_res_data.len != noise_size.x * noise_size.y * noise_size.z * 2) + GPU_TextureDesc noise_desc = ZI; + noise_desc.format = GPU_Format_R16_Uint; + noise_desc.size = noise_dims; + if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2) { Panic(Lit("Unexpected noise texture size")); } - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Texture3D; - desc.texture.format = GPU_Format_R16_Uint; - desc.texture.size = noise_size; - noise = GPU_AcquireResource(desc); - { - u64 footprint_size = GPU_GetFootprintSize(noise); - GPU_ResourceDesc upload_desc = ZI; - upload_desc.kind = GPU_ResourceKind_Buffer; - upload_desc.buffer.heap_kind = GPU_HeapKind_Upload; - upload_desc.buffer.count = footprint_size; - noise_upload = GPU_AcquireResource(upload_desc); - GPU_Mapped mapped = GPU_Map(noise_upload); - GPU_CopyBytesToFootprint(mapped.mem, noise_res_data.text, noise); - GPU_Unmap(mapped); - } - GPU_TransitionToCopyDst(cl, noise); - GPU_CopyResource(cl, noise, noise_upload); - GPU_TransitionToReadable(cl, noise); + noise_tex = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, GPU_Format_R16_Uint, noise_dims, GPU_TextureFlag_None); + GPU_CopyFromCpu(cl, noise_tex, noise_data); } + g->noise_tex = noise_tex; - /* Upload quad indices */ + /* Init quad index buffer */ + GpuPointer quad_indices = ZI; { - u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Buffer; - desc.buffer.count = countof(quad_indices); - desc.buffer.stride = sizeof(quad_indices[0]); - quad = GPU_AcquireResource(desc); - { - GPU_ResourceDesc upload_desc = ZI; - upload_desc.kind = GPU_ResourceKind_Buffer; - upload_desc.buffer.heap_kind = GPU_HeapKind_Upload; - upload_desc.buffer.count = desc.buffer.count * desc.buffer.stride; - quad_upload = GPU_AcquireResource(upload_desc); - GPU_Mapped mapped = GPU_Map(quad_upload); - CopyBytes(mapped.mem, quad_indices, sizeof(quad_indices)); - GPU_Unmap(mapped); - } - GPU_TransitionToCopyDst(cl, quad); - GPU_CopyResource(cl, quad, quad_upload); - GPU_TransitionToReadable(cl, quad); + u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 }; + quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data), GPU_BufferFlag_None); + GPU_CopyFromCpu(cl, quad_indices, StringFromArray(quad_data)); } + g->quad_indices = quad_indices; } - queue_fence_target = GPU_EndCommandList(cl); + GPU_CloseCommandList(cl); /* Init point sampler */ - g->pt_sampler = GPU_AcquireResource((GPU_ResourceDesc) { .kind = GPU_ResourceKind_Sampler, .sampler.filter = GPU_Filter_MinMagMipPoint }); + g->pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint }); +} - /* Wait & cleanup */ - YieldOnFence(queue_fence, queue_fence_target); - GPU_ReleaseResource(noise_upload, GPU_ReleaseFlag_None); - GPU_ReleaseResource(quad_upload, GPU_ReleaseFlag_None); +//////////////////////////////////////////////////////////// +//~ Arena helpers - g->noise = noise; - g->quad_indices = quad; +GPU_Arena *GPU_Perm(void) +{ + i16 fiber_id = FiberId(); + GPU_Arena *perm = GPU_shared_util_state.perm_arenas[fiber_id]; + if (!perm) + { + GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena(); + perm = GPU_shared_util_state.perm_arenas[fiber_id]; + } + return perm; } //////////////////////////////////////////////////////////// //~ Common resource helpers -GPU_Resource *GPU_GetCommonPointSampler(void) +GpuPointer GPU_GetCommonPointSampler(void) { return GPU_shared_util_state.pt_sampler; } -GPU_Resource *GPU_GetCommonQuadIndices(void) +GpuPointer GPU_GetCommonQuadIndices(void) { return GPU_shared_util_state.quad_indices; } -GPU_Resource *GPU_GetCommonNoise(void) +GpuPointer GPU_GetCommonNoise(void) { - return GPU_shared_util_state.noise; -} - -//////////////////////////////////////////////////////////// -//~ Transient buffer operations - -GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size) -{ - GPU_TransientBuffer tbuff = ZI; - tbuff.element_size = MaxU32(element_size, 1); - tbuff.queue_kind = queue_kind; - return tbuff; -} - -void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff) -{ - GPU_SharedUtilState *g = &GPU_shared_util_state; - - Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind); - i64 queue_fence_value = FetchFence(queue_fence); - YieldOnFence(queue_fence, queue_fence_value); - - if (tbuff->first_submitted) - { - for (GPU_SubmittedResourceNode *submitted = tbuff->first_submitted; - submitted; - submitted = submitted->next) - { - GPU_ReleaseResource(submitted->resource, GPU_ReleaseFlag_None); - } - - Lock lock = LockE(&g->submitted_transient_buffers_mutex); - { - tbuff->last_submitted->next = g->first_free_submitted_transient_buffer; - g->first_free_submitted_transient_buffer = tbuff->first_submitted; - } - Unlock(&lock); - } -} - -GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size) -{ - GPU_SharedUtilState *g = &GPU_shared_util_state; - GPU_Resource *resource = 0; - u64 element_count = src_size / tbuff->element_size; - - Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind); - i64 queue_fence_value = FetchFence(queue_fence); - - if (tbuff->uploaded != 0) - { - Panic(Lit("GPU transient buffer uploaded without a reset")); - } - - /* Grab resource node */ - GPU_SubmittedResourceNode *upload = 0; - { - if (tbuff->first_submitted && tbuff->first_submitted->fence_target <= queue_fence_value) - { - upload = tbuff->first_submitted; - SllQueuePop(tbuff->first_submitted, tbuff->last_submitted); - } - if (!upload) - { - Lock lock = LockE(&g->submitted_transient_buffers_mutex); - { - upload = g->first_free_submitted_transient_buffer; - if (upload) - { - g->first_free_submitted_transient_buffer = upload->next; - SllStackPop(g->first_free_submitted_transient_buffer); - } - } - Unlock(&lock); - } - if (!upload) - { - Arena *perm = PermArena(); - upload = PushStruct(perm, GPU_SubmittedResourceNode); - } - } - - /* Create gpu resource */ - { - if (upload->resource) - { - GPU_ReleaseResource(upload->resource, GPU_ReleaseFlag_Reuse); - upload->resource = 0; - } - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Buffer; - desc.flags = GPU_ResourceFlag_None; - desc.buffer.heap_kind = GPU_HeapKind_Upload; - desc.buffer.count = element_count; - desc.buffer.stride = tbuff->element_size; - upload->resource = GPU_AcquireResource(desc); - } - - /* Fill gpu resource */ - { - __profn("Copy to transfer buffer"); - GPU_Mapped m = GPU_Map(upload->resource); - CopyBytes(m.mem, src, src_size); - GPU_Unmap(m); - } - - tbuff->uploaded = upload; - return upload->resource; -} - -GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena) -{ - u32 element_count = arena->pos / tbuff->element_size; - GPU_Resource *result = GPU_UploadTransientBuffer(tbuff, ArenaFirst(arena, u8), tbuff->element_size * element_count); - return result; -} - -void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target) -{ - GPU_SubmittedResourceNode *uploaded = tbuff->uploaded; - if (uploaded) - { - uploaded->fence_target = queue_fence_target; - SllQueuePush(tbuff->first_submitted, tbuff->last_submitted, uploaded); - tbuff->uploaded = 0; - } + return GPU_shared_util_state.noise_tex; } diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h index 65870ab1..5ec9da41 100644 --- a/src/gpu/gpu_common.h +++ b/src/gpu/gpu_common.h @@ -1,62 +1,29 @@ -//////////////////////////////////////////////////////////// -//~ Transient buffer types - -Struct(GPU_SubmittedResourceNode) -{ - GPU_SubmittedResourceNode *next; - - /* Set during transient upload */ - GPU_Resource *resource; - - /* Set during transient reset */ - i64 fence_target; /* Once the buffer's queue reaches the target, the resource can be freed or reused */ -}; - -Struct(GPU_TransientBuffer) -{ - GPU_QueueKind queue_kind; - u32 element_size; - - GPU_SubmittedResourceNode *uploaded; - GPU_SubmittedResourceNode *first_submitted; - GPU_SubmittedResourceNode *last_submitted; - u32 max_in_flight; -}; - - //////////////////////////////////////////////////////////// //~ State types Struct(GPU_SharedUtilState) { /* Common shared resources */ - GPU_Resource *pt_sampler; - GPU_Resource *quad_indices; - GPU_Resource *noise; + GpuPointer pt_sampler; + GpuPointer quad_indices; + GpuPointer noise_tex; - /* Transient buffer pool */ - Mutex submitted_transient_buffers_mutex; - GPU_SubmittedResourceNode *first_free_submitted_transient_buffer; + GPU_Arena *perm_arenas[MaxFibers]; } extern GPU_shared_util_state; //////////////////////////////////////////////////////////// //~ Startup -void GPU_StartupUtils(void); +void GPU_StartupCommon(void); + +//////////////////////////////////////////////////////////// +//~ Arena helpers + +GPU_Arena *GPU_Perm(void); //////////////////////////////////////////////////////////// //~ Common resource helpers -GPU_Resource *GPU_GetCommonPointSampler(void); -GPU_Resource *GPU_GetCommonQuadIndices(void); -GPU_Resource *GPU_GetCommonNoise(void); - -//////////////////////////////////////////////////////////// -//~ Transient buffer operations - -GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size); -void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff); - -GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size); -GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena); -void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target); +GpuPointer GPU_GetCommonPointSampler(void); +GpuPointer GPU_GetCommonQuadIndices(void); +GpuPointer GPU_GetCommonNoise(void); diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index e988a69d..ad23f2a6 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Opaque types -Struct(GPU_Resource); +Struct(GPU_Arena); Struct(GPU_CommandList); Struct(GPU_Swapchain); @@ -158,7 +158,63 @@ Enum(GPU_Format) }; //////////////////////////////////////////////////////////// -//~ Filter types +//~ Shader access types + +Enum(GPU_ShaderAccessKind) +{ + GPU_ShaderAccessKind_Readonly, /* Default state for all resources */ + GPU_ShaderAccessKind_ReadWrite, + GPU_ShaderAccessKind_RasterTarget, +}; + +//////////////////////////////////////////////////////////// +//~ Arena types + +Struct(GPU_TempArena) +{ + GPU_Arena *arena; + u64 start_pos; +}; + +//////////////////////////////////////////////////////////// +//~ Buffer types + +Enum(GPU_BufferFlag) +{ + GPU_BufferFlag_None = 0, + GPU_BufferFlag_Writable = (1 << 0), +}; + +//////////////////////////////////////////////////////////// +//~ Texture types + +#define GPU_MaxRasterTargets 8 + +Enum(GPU_TextureFlag) +{ + GPU_TextureFlag_None = 0, + GPU_TextureFlag_Writable = (1 << 0), + GPU_TextureFlag_Rasterizable = (1 << 1), +}; + +Enum(GPU_TextureKind) +{ + GPU_TextureKind_1D, + GPU_TextureKind_2D, + GPU_TextureKind_3D, +}; + +Struct(GPU_TextureDesc) +{ + GPU_TextureFlag flags; + GPU_Format format; + Vec3I32 size; + Vec4 clear_color; + i32 mip_levels; +}; + +//////////////////////////////////////////////////////////// +//~ Sampler types /* NOTE: Matches DirectX D3D12_FILTER */ Enum(GPU_Filter) @@ -236,119 +292,33 @@ Enum(GPU_ComparisonFunc) GPU_ComparisonFunc_Always = 8 }; -//////////////////////////////////////////////////////////// -//~ Resource types - -#define GPU_MaxRenderTargets 8 - -Enum(GPU_ResourceKind) +Struct(GPU_SamplerDesc) { - GPU_ResourceKind_Unknown, - GPU_ResourceKind_Buffer, - GPU_ResourceKind_Texture1D, - GPU_ResourceKind_Texture2D, - GPU_ResourceKind_Texture3D, - GPU_ResourceKind_Sampler -}; - -Enum(GPU_ResourceFlag) -{ - GPU_ResourceFlag_None = 0, - GPU_ResourceFlag_Writable = (1 << 0), - GPU_ResourceFlag_Renderable = (1 << 1), - GPU_ResourceFlag_MaxMipLevels = (1 << 2), - GPU_ResourceFlag_Zeroed = (1 << 3), -}; - -Enum(GPU_HeapKind) -{ - GPU_HeapKind_Default, - GPU_HeapKind_Upload, - GPU_HeapKind_Download -}; - -Enum(GPU_ReleaseFlag) -{ - GPU_ReleaseFlag_None = 0, - - /* Hints to the GPU layer that more resources using a similar desc will - * be allocated soon, so the resource's memory should be kept around for - * re-use. */ - GPU_ReleaseFlag_Reuse = (1 << 0) -}; - -Struct(GPU_ResourceDesc) -{ - GPU_ResourceKind kind; - GPU_ResourceFlag flags; - Vec4 clear_color; - union - { - struct - { - GPU_Format format; - Vec3I32 size; - i32 mip_levels; /* Defaults to 1 (unless GPU_ResourceFlag_MaxMipLevels is set) */ - } texture; - struct - { - GPU_HeapKind heap_kind; - u32 count; - u32 stride; /* Defaults to 1 */ - } buffer; - struct - { - GPU_Filter filter; - GPU_AddressMode x; - GPU_AddressMode y; - GPU_AddressMode z; - f32 mip_lod_bias; - u32 max_anisotropy; - GPU_ComparisonFunc comparison; - Vec4 border_color; - f32 min_lod; - f32 max_lod; - } sampler; - }; -}; - -Struct(GPU_Mapped) -{ - GPU_Resource *resource; - void *mem; + GPU_Filter filter; + GPU_AddressMode x; + GPU_AddressMode y; + GPU_AddressMode z; + f32 mip_lod_bias; + u32 max_anisotropy; + GPU_ComparisonFunc comparison; + Vec4 border_color; + f32 min_lod; + f32 max_lod; }; //////////////////////////////////////////////////////////// -//~ Rasterizer types +//~ Rasterization types -Enum(GPU_RasterizeMode) +Enum(GPU_RasterMode) { - GPU_RasterizeMode_None, - GPU_RasterizeMode_PointList, - GPU_RasterizeMode_LineList, - GPU_RasterizeMode_LineStrip, - GPU_RasterizeMode_TriangleList, - GPU_RasterizeMode_WireTriangleList, - GPU_RasterizeMode_TriangleStrip, - GPU_RasterizeMode_WireTriangleStrip, -}; - -Struct(GPU_Viewport) -{ - f32 top_left_x; - f32 top_left_y; - f32 width; - f32 height; - f32 min_depth; - f32 max_depth; -}; - -Struct(GPU_Scissor) -{ - f32 left; - f32 top; - f32 right; - f32 bottom; + GPU_RasterMode_None, + GPU_RasterMode_PointList, + GPU_RasterMode_LineList, + GPU_RasterMode_LineStrip, + GPU_RasterMode_TriangleList, + GPU_RasterMode_WireTriangleList, + GPU_RasterMode_TriangleStrip, + GPU_RasterMode_WireTriangleStrip, }; //////////////////////////////////////////////////////////// @@ -373,113 +343,74 @@ Struct(GPU_Stats) void GPU_Startup(void); //////////////////////////////////////////////////////////// -//~ @hookdecl Fence operations +//~ @hookdecl Arenas -Fence *GPU_FenceFromQueue(GPU_QueueKind queue); -void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value); /* Tells queue A Forces `waiting_queue` to wait until `target_queue`'s fence reaches the specified value */ +GPU_Arena *GPU_AcquireArena(void); +void GPU_ReleaseArena(GPU_Arena *arena); //////////////////////////////////////////////////////////// -//~ @hookdecl Rasterizer helpers +//~ @hookdecl Resource creation -GPU_Viewport GPU_ViewportFromRect(Rng2 rect); -GPU_Scissor GPU_ScissorFromRect(Rng2 rect); +GpuPointer GPU_PushBufferEx(GPU_Arena *arena, i32 element_size, i32 element_align, i32 element_count, GPU_BufferFlag flags); +#define GPU_PushBuffer(arena, type, count, flags) GPU_PushBufferEx((arena), sizeof(type), alignof(type), (count), (flags)) + +GpuPointer GPU_PushTextureEx(GPU_Arena *arena, GPU_TextureDesc desc); +GpuPointer GPU_PushTexture(GPU_Arena *arena, GPU_TextureKind kind, GPU_Format format, Vec3I32 size, GPU_TextureFlag flags); + +GpuPointer GPU_PushSampler(GPU_Arena *arena, GPU_SamplerDesc desc); //////////////////////////////////////////////////////////// -//~ @hookdecl Resource operations +//~ @hookdecl Commands -GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc); -void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags); +//- Command list creation +GPU_CommandList *GPU_OpenCommandList(GPU_QueueKind queue); +void GPU_CloseCommandList(GPU_CommandList *cl); -Vec2I32 GPU_GetTextureSize2D(GPU_Resource *resource); -Vec3I32 GPU_GetTextureSize3D(GPU_Resource *resource); -u64 GPU_GetFootprintSize(GPU_Resource *resource); +//- Cpu -> Gpu +void GPU_CopyBytesFromCpu(GPU_CommandList *cl, GpuPointer dst, RngU64 dst_range, void *src); +void GPU_CopyTexelsFromCpu(GPU_CommandList *cl, GpuPointer dst, Rng3U64 dst_range, void *src); +void GPU_CopyFromCpu(GPU_CommandList *cl, GpuPointer dst, String src); -u64 GPU_GetBufferCount(GPU_Resource *gpu_resource); +//- Gpu -> Cpu +void GPU_AddCpuFence(GPU_CommandList *cl, Fence *fence, i64 v); +void GPU_SetCpuFence(GPU_CommandList *cl, Fence *fence, i64 v); -//////////////////////////////////////////////////////////// -//~ @hookdecl Resource index operations +//- Implicit state +void GPU_SetShaderAccess(GPU_CommandList *cl, GpuPointer ptr, GPU_ShaderAccessKind access_kind); +void GPU_SetRasterizeMode(GPU_CommandList *cl, GPU_RasterMode mode); +void GPU_SetConstantU32(GPU_CommandList *cl, i32 slot, u32 v); +void GPU_SetConstantF32(GPU_CommandList *cl, i32 slot, f32 v); +void GPU_SetConstantPtr(GPU_CommandList *cl, i32 slot, GpuPointer v); -StructuredBufferRid GPU_StructuredBufferRidFromResource(GPU_Resource *resource); -RWStructuredBufferRid GPU_RWStructuredBufferRidFromResource(GPU_Resource *resource); -Texture1DRid GPU_Texture1DRidFromResource(GPU_Resource *resource); -Texture2DRid GPU_Texture2DRidFromResource(GPU_Resource *resource); -Texture3DRid GPU_Texture3DRidFromResource(GPU_Resource *resource); -RWTexture1DRid GPU_RWTexture1DRidFromResource(GPU_Resource *resource); -RWTexture2DRid GPU_RWTexture2DRidFromResource(GPU_Resource *resource); -RWTexture3DRid GPU_RWTexture3DRidFromResource(GPU_Resource *resource); -SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource); +//- Clear +void GPU_ClearRasterTarget(GPU_CommandList *cl, GpuPointer target); -//////////////////////////////////////////////////////////// -//~ @hookdecl Command list operations +//- Compute +void GPU_Compute(GPU_CommandList *cl, ComputeShader cs, Vec3U32 threads); -GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue); -i64 GPU_EndCommandList(GPU_CommandList *cl); /* Returns the value that the queue's fence will be set to once the command is completed */ +//- Rasterize +void GPU_RasterizeEx(GPU_CommandList *cl, + VertexShader vs, PixelShader ps, + u32 instances_count, + GpuPointer idx_buff, RngU64 idx_buff_range, + u32 raster_targets_count, GpuPointer *raster_targets, + Rng3 viewport, Rng2 scissor); -//////////////////////////////////////////////////////////// -//~ @hookdecl Profiling helpers +void GPU_Rasterize(GPU_CommandList *cl, + VertexShader vs, PixelShader ps, + u32 instances_count, GpuPointer idx_buff, + u32 raster_targets_count, GpuPointer *raster_targets); +//- Profiling void GPU_ProfN(GPU_CommandList *cl, String name); -//////////////////////////////////////////////////////////// -//~ @hookdecl Barrier operations - -void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read via read-only types in shaders */ -void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read/written to via read-write types in shader */ -void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot); /* Allows the resource to be used as a render target bound at slot */ - -void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a source in copy operations */ -void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a destination in copy operations */ - -void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Waits until writes to a shader writable resource have completed */ - -//////////////////////////////////////////////////////////// -//~ @hookdecl Dispatch operations - -void GPU_ClearRenderable(GPU_CommandList *cl, GPU_Resource *resource); - -#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, viewport, scissor, instances_count, index_buffer, mode) \ - GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (viewport), (scissor), (instances_count), (index_buffer), (mode)) - -#define GPU_Compute(cl, sig_ptr, cs, threads) GPU_Compute_((cl), sizeof(*(sig_ptr)), (sig_ptr), (cs), (threads)) - -void GPU_Rasterize_(GPU_CommandList *cl, - u32 sig_size, - void *sig, - VertexShader vs, - PixelShader ps, - u32 rts_count, - GPU_Viewport viewport, - GPU_Scissor scissor, - u32 instances_count, - GPU_Resource *index_buffer, - GPU_RasterizeMode mode); - -void GPU_Compute_(GPU_CommandList *cl, - u32 sig_size, - void *sig, - ComputeShader cs, - Vec3U32 threads); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Resource copy operations - -void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src); - -//////////////////////////////////////////////////////////// -//~ @hookdecl Map operations - -GPU_Mapped GPU_Map(GPU_Resource *r); -void GPU_Unmap(GPU_Mapped mapped); - -void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference); - //////////////////////////////////////////////////////////// //~ @hookdecl Statistics GPU_Stats GPU_QueryStats(void); //////////////////////////////////////////////////////////// -//~ @hookdecl Swapchain available_to_reserve +//~ @hookdecl Swapchain GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size); void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain); @@ -492,4 +423,4 @@ void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain); * 2. Blits `texture` into position `dst` in the backbuffer * 3. Presents the backbuffer * 4. Returns the value that the Direct queue fence will reach once GPU completes blitting (`texture` shouldn't be released while blit is in flight) */ -i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GPU_Resource *texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color); +i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GpuPointer texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color); diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index f169059f..795a09e1 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -195,6 +195,11 @@ void GPU_D12_InitDevice(void) first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } { + /* TODO: Verify feature support: + * - HighestShaderModel >= D3D_SHADER_MODEL_6_6 + * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 + * - EnhancedBarriersSupported == 1 + */ hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); } if (SUCCEEDED(hr) && !skip) @@ -890,7 +895,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT }; - Assert(!(desc.flags & GPU_ResourceFlag_Renderable)); + Assert(!(desc.flags & GPU_ResourceFlag_Rasterizable)); D3D12_RESOURCE_DESC d3d_desc = ZI; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; @@ -940,7 +945,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Quality = 0; d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable); - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Renderable); + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Rasterizable); r->state = D3D12_RESOURCE_STATE_COMMON; D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; clear_value.Color[0] = desc.clear_color.x; @@ -1007,7 +1012,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc) } /* Create rtv descriptor */ - if (desc.flags & GPU_ResourceFlag_Renderable) + if (desc.flags & GPU_ResourceFlag_Rasterizable) { if (!r->rtv_descriptor->valid) { @@ -1183,7 +1188,7 @@ SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource) //////////////////////////////////////////////////////////// //~ @hookdef Command list hooks -GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind) +GPU_CommandList *GPU_OpenCommnadList(GPU_QueueKind queue_kind) { GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); Arena *perm = PermArena(); @@ -1201,7 +1206,7 @@ GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind) return (GPU_CommandList *)cl; } -i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) +i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl) { GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); @@ -1210,8 +1215,8 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); - GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI; - GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI; + GPU_D12_Resource *slotted_render_targets[GPU_MaxRasterTargets] = ZI; + GPU_D12_Resource *bound_render_targets[GPU_MaxRasterTargets] = ZI; /* Begin dx12 command list */ GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); @@ -1699,7 +1704,7 @@ void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource) cmd->barrier.resource = (GPU_D12_Resource *)resource; } -void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) +void GPU_TransitionToRasterizable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) { GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); cmd->kind = GPU_D12_CommandKind_TransitionToRtv; @@ -1731,7 +1736,7 @@ void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource) //////////////////////////////////////////////////////////// //~ @hookdef Dispatch hooks -void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource) +void GPU_ClearRasterizable(GPU_CommandList *gpu_cl, GPU_Resource *resource) { GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); @@ -1739,14 +1744,12 @@ void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource) cmd->clear.resource = (GPU_D12_Resource *)resource; } -void GPU_Rasterize_(GPU_CommandList *gpu_cl, +void GPU_RasterizeEx(GPU_CommandList *gpu_cl, u32 sig_size, void *sig, VertexShader vs, PixelShader ps, u32 rts_count, - GPU_Viewport viewport, - GPU_Scissor scissor, u32 instances_count, GPU_Resource *index_buffer, GPU_RasterizeMode mode) @@ -1760,7 +1763,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl, cmd->rasterize.vs = vs; cmd->rasterize.ps = ps; cmd->rasterize.rts_count = rts_count; - Assert(rts_count < GPU_MaxRenderTargets); + Assert(rts_count < GPU_MaxRasterTargets); cmd->rasterize.viewport = viewport; cmd->rasterize.scissor = scissor; cmd->rasterize.instances_count = instances_count; @@ -1768,7 +1771,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl, cmd->rasterize.mode = mode; } -void GPU_Compute_(GPU_CommandList *gpu_cl, +void GPU_ComputeEx(GPU_CommandList *gpu_cl, u32 sig_size, void *sig, ComputeShader cs, diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 8f50cb97..d2f857c9 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc) ComputeShader cs; b32 is_wireframe; D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; - GPU_Format render_target_formats[GPU_MaxRenderTargets]; + GPU_Format render_target_formats[GPU_MaxRasterTargets]; }; Struct(GPU_D12_Pipeline) diff --git a/src/proto/pp_vis/pp_vis_core.c b/src/proto/pp_vis/pp_vis_core.c index ae08faf8..e4c186dd 100644 --- a/src/proto/pp_vis/pp_vis_core.c +++ b/src/proto/pp_vis/pp_vis_core.c @@ -29,21 +29,19 @@ JobDef(V_VisWorker, _, __) { V_SharedState *vis_shared = &V_shared_state; S_SharedState *sim_shared = &S_shared_state; + Arena *frame_arena = AcquireArena(Gibi(64)); Arena *perm = PermArena(); + GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable); + GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); + GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); + ////////////////////////////// //- State - Fence *gpu_fence = GPU_FenceFromQueue(GPU_QueueKind_Direct); - i64 gpu_fence_target = 0; i64 frame_gen = 0; - GPU_Resource *draw_target = 0; - - Arena *dverts_arena = AcquireArena(Gibi(64)); - Arena *dvert_idx_arena = AcquireArena(Gibi(64)); - GPU_TransientBuffer dverts_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(V_DVert)); - GPU_TransientBuffer dvert_idx_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(i32)); + GPU_Texture *draw_target = 0; Struct(Persist) { @@ -510,114 +508,104 @@ JobDef(V_VisWorker, _, __) } UnlockTicketMutex(&sim_shared->input_back_tm); - ////////////////////////////// - //- Build render data - - for (S_Ent *ent = S_FirstEnt(frame_arena, &iter, &lookup); ent->active; ent = S_NextEnt(frame_arena, &iter)) - { - Xform ent_to_world_xf = ent->world_xf; - Xform ent_to_draw_xf = MulXform(world_to_draw_xf, ent_to_world_xf); - - /* Draw shape */ - b32 is_visible = ent->tint.w != 0; - if (is_visible) - { - Vec4 color = ent->tint; - i32 detail = 32; - S_Shape shape = S_MulXformShape(ent_to_draw_xf, ent->local_shape); - V_DrawShape(dverts_arena, dvert_idx_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line); - } - } - ////////////////////////////// //- Render - /* Acquire draw target */ + GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct); { - if (draw_target && !MatchVec2I32(draw_size, GPU_GetTextureSize2D(draw_target))) - { - YieldOnFence(gpu_fence, gpu_fence_target); - GPU_ReleaseResource(draw_target, GPU_ReleaseFlag_None); - draw_target = 0; - } - if (!draw_target) - { - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_Writable | GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Zeroed; - desc.texture.format = GPU_Format_R16G16B16A16_Float; - desc.texture.size = VEC3I32(draw_size.x, draw_size.y, 1); - desc.clear_color = LinearFromSrgb(swapchain_color); - draw_target = GPU_AcquireResource(desc); - } - } + ////////////////////////////// + //- Build render data - /* Upload transient buffers */ - GPU_Resource *dverts_buffer = GPU_UploadTransientBufferFromArena(&dverts_tbuff, dverts_arena); - GPU_Resource *dvert_idx_buffer = GPU_UploadTransientBufferFromArena(&dvert_idx_tbuff, dvert_idx_arena); - u64 dverts_count = GPU_GetBufferCount(dverts_buffer); - u64 dvert_idx_count = GPU_GetBufferCount(dvert_idx_buffer); - - GPU_Viewport viewport = GPU_ViewportFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size))); - GPU_Scissor scissor = GPU_ScissorFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size))); - - GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_Direct); - { - /* Prep background pass */ - { - GPU_TransitionToWritable(cl, draw_target); - } - - /* Backdrop pass */ - { - V_BackdropSig sig = ZI; - sig.target_tex = GPU_RWTexture2DRidFromResource(draw_target); - sig.target_size = draw_size; - GPU_Compute(cl, &sig, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size)); - } - - /* Prep shapes pass */ - { - GPU_TransitionToRenderable(cl, draw_target, 0); - } - - /* Shapes pass */ - { - V_DVertSig sig = ZI; - sig.target_size = draw_size; - sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler()); - sig.verts = GPU_StructuredBufferRidFromResource(dverts_buffer); - GPU_Rasterize(cl, - &sig, - V_DVertVS, V_DVertPS, - 1, - viewport, - scissor, - 1, - dvert_idx_buffer, - GPU_RasterizeMode_TriangleList); - } - - /* Transition draw target for UI composition */ - { - GPU_TransitionToReadable(cl, draw_target); - } - } - gpu_fence_target = GPU_EndCommandList(cl); - - /* Reset transient buffers */ - { - GPU_ResetTransientBuffer(&dverts_tbuff, gpu_fence_target); - GPU_ResetTransientBuffer(&dvert_idx_tbuff, gpu_fence_target); + GPU_ResetArena(cl, gpu_frame_arena); ResetArena(dverts_arena); - ResetArena(dvert_idx_arena); + ResetArena(dvert_idxs_arena); + + /* Build shape buffers */ + GpuPointer dverts = ZI; + GpuPointer dvert_idxs = ZI; + { + for (S_Ent *ent = S_FirstEnt(frame_arena, &iter, &lookup); ent->active; ent = S_NextEnt(frame_arena, &iter)) + { + Xform ent_to_world_xf = ent->world_xf; + Xform ent_to_draw_xf = MulXform(world_to_draw_xf, ent_to_world_xf); + + /* Draw shape */ + b32 is_visible = ent->tint.w != 0; + if (is_visible) + { + Vec4 color = ent->tint; + i32 detail = 32; + S_Shape shape = S_MulXformShape(ent_to_draw_xf, ent->local_shape); + V_DrawShape(dverts_arena, dvert_idxs_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line); + } + } + dverts = GPU_PushCpuStructsToArena(gpu_frame_arena, V_DVert, dverts_arena); + dvert_idxs = GPU_PushCpuStructsToArena(gpu_frame_arena, i32, dvert_idxs_arena); + } + + /* Create draw state */ + if (!draw_target || !MatchVec2I32(draw_size, GPU_Count2D(draw_target))) + { + GPU_ResetArena(cl, gpu_arena); + /* Draw target */ + { + GPU_TextureDesc desc = ZI; + desc.kind = GPU_TextureKind_Texture2D; + desc.flags = GPU_TextureFlag_Writable | GPU_TextureFlag_Rasterizable; + desc.format = GPU_Format_R16G16B16A16_Float; + desc.size = VEC3I32(draw_size.x, draw_size.y, 1); + desc.clear_color = LinearFromSrgb(swapchain_color); + draw_target = GPU_PushTexture(gpu_arena, desc); + } + /* Draw params */ + draw_params = GPU_PushStructNoZero(gpu_arena, V_DParams); + } + + /* Build draw params */ + GpuPointer draw_params = ZI; + { + V_DParams params = ZI; + params.world_to_draw_xf = world_to_draw_xf; + GPU_CopyCpuStructToBuffer(draw_params, 0, ¶ms); + } + + ////////////////////////////// + //- Dispatch shaders + + Rng2 viewport = RNG2(VEC2(0, 0), Vec2FromFields(draw_size)); + { + GPU_SetConstantPtr(cl, V_DrawConst_Params, draw_params); + GPU_SetConstantPtr(cl, V_DrawConst_FinalTarget, draw_target); + GPU_SetConstantPtr(cl, V_DrawConst_Sampler, GPU_GetCommonPointSampler()); + GPU_SetConstantPtr(cl, V_DrawConst_DVerts, dverts); + + /* Backdrop pass */ + { + GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_ReadWrite); + GPU_Compute(cl, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size)); + } + + /* Shapes pass */ + { + GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_RasterTarget); + GPU_Rasterize(cl, + V_DVertVS, V_DVertPS, + 1, dvert_idxs_buffer, + 1, draw_target, + viewport, viewport, + GPU_RasterizeMode_TriangleList); + } + + GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_Readonly); + } } + GPU_CloseCommandLiist(cl); ////////////////////////////// //- End vis frame UI_SetRawTexture(vis_box, draw_target, VEC2(0, 0), VEC2(1, 1)); - gpu_fence_target = UI_EndFrame(ui_frame); + UI_EndFrame(ui_frame); ++frame_gen; shutdown = Atomic32Fetch(&vis_shared->shutdown); diff --git a/src/proto/pp_vis/pp_vis_draw.c b/src/proto/pp_vis/pp_vis_draw.c index ae7e9321..c0d20ffd 100644 --- a/src/proto/pp_vis/pp_vis_draw.c +++ b/src/proto/pp_vis/pp_vis_draw.c @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Shape helpers -void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags) +void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags) { if (flags & V_DrawFlag_Line) { @@ -14,10 +14,10 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col i32 lines_count = verts_count == 2 ? 1 : verts_count; i32 line_verts_count = lines_count * 4; i32 idx_count = lines_count * 6; - i32 idx_offset = ArenaCount(verts_arena, V_DVert); + i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert); /* Push dverts */ - V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, line_verts_count); + V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, line_verts_count); for (i32 line_idx = 0; line_idx < lines_count; ++line_idx) { i32 a_idx = line_idx; @@ -46,7 +46,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col } /* Generate indices */ - i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count); + i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count); for (i32 line_idx = 0; line_idx < lines_count; ++line_idx) { i32 indices_offset = line_idx * 6; @@ -67,12 +67,12 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col i32 verts_count = points.count; if (verts_count >= 3) { - i32 idx_offset = ArenaCount(verts_arena, V_DVert); + i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert); i32 tris_count = verts_count - 2; i32 idx_count = tris_count * 3; /* Push dverts */ - V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, verts_count); + V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, verts_count); for (i32 point_idx = 0; point_idx < (i32)points.count; ++point_idx) { V_DVert *dvert = &dverts[point_idx]; @@ -81,7 +81,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col } /* Generate indices in a fan pattern */ - i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count); + i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count); for (i32 i = 0; i < tris_count; ++i) { i32 tri_offset = i * 3; @@ -93,14 +93,14 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col } } -void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags) +void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags) { if (shape.radius == 0) { Vec2Array draw_points = ZI; draw_points.points = shape.points; draw_points.count = shape.points_count; - V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags); + V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags); } else { @@ -116,7 +116,7 @@ void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color Vec2 sp = S_SupportPointFromShape(shape, dir); draw_points.points[i] = sp; } - V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags); + V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags); } EndScratch(scratch); } diff --git a/src/proto/pp_vis/pp_vis_draw.h b/src/proto/pp_vis/pp_vis_draw.h index 3597d97a..fe3a2a8b 100644 --- a/src/proto/pp_vis/pp_vis_draw.h +++ b/src/proto/pp_vis/pp_vis_draw.h @@ -10,5 +10,5 @@ Enum(V_DrawFlag) //////////////////////////////////////////////////////////// //~ Shape helpers -void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags); -void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags); +void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags); +void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags); diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index b90fea9b..fa4b2ce1 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -20,76 +20,27 @@ JobDef(SPR_LoadTexture, sig, _) ASE_DecodedImage decoded = ASE_DecodeImage(scratch.arena, data); ok = decoded.ok; + /* Upload texture to gpu */ if (ok) { - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_None; - desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; - desc.texture.size = VEC3I32(decoded.width, decoded.height, 1); - texture->gpu_texture = GPU_AcquireResource(desc); + GPU_Arena *gpu_perm = GPU_Perm(); + GpuPointer gpu_tex = GPU_PushTexture(gpu_perm, + GPU_TextureKind_2D, + GPU_Format_R8G8B8A8_Unorm_Srgb, + VEC3I32(decoded.width, decoded.height, 1), + GPU_TextureFlag_Allow); + texture->gpu_texture = gpu_tex; texture->width = decoded.width; texture->height = decoded.height; - /* Fill upload buffer */ - GPU_ResourceDesc upload_desc = ZI; - upload_desc.kind = GPU_ResourceKind_Buffer; - upload_desc.buffer.heap_kind = GPU_HeapKind_Upload; - upload_desc.buffer.count = GPU_GetFootprintSize(texture->gpu_texture); - GPU_Resource *upload = GPU_AcquireResource(upload_desc); + GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_BackgroundCopy); { - GPU_Mapped mapped = GPU_Map(upload); - GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.pixels, texture->gpu_texture); - GPU_Unmap(mapped); + GPU_ReadCpu(cl, gpu_tex, decoded.data); + GPU_SetCpuFence(cl, &entry->texture_ready_fence, 1); } - - GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy; - GPU_QueueKind direct_queue = GPU_QueueKind_Direct; - Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue); - i64 direct_queue_fence_target = 0; - if (copy_queue == direct_queue) - { - /* Copy & transition GPU resource on direct queue*/ - { - GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); - { - GPU_TransitionToCopyDst(cl, texture->gpu_texture); - GPU_CopyResource(cl, texture->gpu_texture, upload); - GPU_TransitionToReadable(cl, texture->gpu_texture); - } - direct_queue_fence_target = GPU_EndCommandList(cl); - } - } - else - { - /* Copy to GPU resource on background copy queue*/ - i64 copy_queue_fence_target = 0; - { - GPU_CommandList *cl = GPU_BeginCommandList(copy_queue); - { - GPU_TransitionToCopyDst(cl, texture->gpu_texture); - GPU_CopyResource(cl, texture->gpu_texture, upload); - } - copy_queue_fence_target = GPU_EndCommandList(cl); - } - /* Once copy finishes, transition resource to readable on direct queue */ - { - GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target); - GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); - { - GPU_TransitionToReadable(cl, texture->gpu_texture); - } - direct_queue_fence_target = GPU_EndCommandList(cl); - } - } - - /* Release upload buffer once transition finishes */ - YieldOnFence(direct_queue_fence, direct_queue_fence_target); - GPU_ReleaseResource(upload, GPU_ReleaseFlag_None); + GPU_EndCommandList(cl); } - texture->loaded = 1; - SetFence(&entry->texture_ready_fence, 1); EndScratch(scratch); } @@ -274,7 +225,6 @@ JobDef(SPR_LoadSheet, sig, _) } } - sheet->loaded = 1; SetFence(&entry->sheet_ready_fence, 1); EndScratch(scratch); } diff --git a/src/sprite/sprite.h b/src/sprite/sprite.h index d2b1b8d8..6c6e6d77 100644 --- a/src/sprite/sprite.h +++ b/src/sprite/sprite.h @@ -17,8 +17,7 @@ Struct(SPR_SliceKey) Struct(SPR_Texture) { b32 valid; - b32 loaded; - GPU_Resource *gpu_texture; + GpuTexture gpu_texture; u32 width; u32 height; }; @@ -86,7 +85,6 @@ Struct(SPR_SliceGroupBin) Struct(SPR_Sheet) { b32 valid; - b32 loaded; Vec2 image_size; Vec2 frame_size; diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index f2fa442c..0cde39e2 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -676,59 +676,32 @@ i64 UI_EndFrame(UI_Frame frame) UI_State *g = &UI_state; UI_EFrameState old_eframe = g->eframe; + Vec2I32 monitor_size = frame.window_frame.monitor_size; + + Vec2I32 draw_size = frame.window_frame.draw_size; + Rng2 draw_viewport = ZI; + draw_viewport.p1 = Vec2FromFields(draw_size); + ////////////////////////////// //- Reset state { ZeroStruct(&g->eframe); g->eframe.layout_arena = old_eframe.layout_arena; - g->eframe.rects_arena = old_eframe.rects_arena; + g->eframe.drects_gpu_arena = old_eframe.drects_gpu_arena; g->eframe.draw_target = old_eframe.draw_target; g->eframe.swapchain = old_eframe.swapchain; g->eframe.gpu_submit_fence_target = old_eframe.gpu_submit_fence_target; - g->eframe.draw_rects_tbuff = old_eframe.draw_rects_tbuff; g->eframe.tick = old_eframe.tick; } if (!g->eframe.layout_arena) { g->eframe.layout_arena = AcquireArena(Gibi(64)); - g->eframe.rects_arena = AcquireArena(Gibi(64)); - g->eframe.draw_rects_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(UI_DRect)); + g->eframe.tex_gpu_arena = GPU_AcquireTextureArena(); + g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16)); + g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16)); } ResetArena(g->eframe.layout_arena); - ResetArena(g->eframe.rects_arena); - - ////////////////////////////// - //- Init render state - - Vec2I32 monitor_size = frame.window_frame.monitor_size; - - GPU_QueueKind gpu_render_queue = GPU_QueueKind_Direct; - Fence *submit_fence = GPU_FenceFromQueue(gpu_render_queue); - - /* Acquire render target */ - if (g->eframe.draw_target && !MatchVec2I32(monitor_size, GPU_GetTextureSize2D(g->eframe.draw_target))) - { - __profn("Release ui render target"); - YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target); - GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None); - g->eframe.draw_target = 0; - } - if (!g->eframe.draw_target) - { - __profn("Acquire ui render target"); - GPU_ResourceDesc desc = ZI; - desc.kind = GPU_ResourceKind_Texture2D; - desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable; - // desc.texture.format = GPU_Format_R8G8B8A8_Unorm; - desc.texture.format = GPU_Format_R16G16B16A16_Float; - desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1); - g->eframe.draw_target = GPU_AcquireResource(desc); - } - - Vec2I32 draw_size = frame.window_frame.draw_size; - Rng2 draw_viewport = ZI; - draw_viewport.p1 = Vec2FromFields(draw_size); ////////////////////////////// //- Process commands @@ -1255,245 +1228,266 @@ i64 UI_EndFrame(UI_Frame frame) } } - ////////////////////////////// - //- Build render data - - GPU_QueueKind render_queue = GPU_QueueKind_Direct; - Fence *render_fence = GPU_FenceFromQueue(render_queue); - - /* Build rect instance data */ - for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) - { - UI_Box *box = boxes_pre[pre_index]; - b32 is_visible = 1; - is_visible = is_visible && (box->desc.tint.w != 0); - is_visible = is_visible && (box->p1.x > box->p0.x); - is_visible = is_visible && (box->p1.y > box->p0.y); - if (is_visible || AnyBit(g->bframe.frame_flags, UI_FrameFlag_Debug)) - { - /* Box rect */ - { - UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect); - rect->flags |= UI_DRectFlag_DrawTexture * !!(box->raw_texture != 0); - rect->p0 = box->p0; - rect->p1 = box->p1; - rect->tex_uv0 = VEC2(0, 0); - rect->tex_uv1 = VEC2(1, 1); - rect->background_lin = LinearFromSrgb(box->desc.background_color); - rect->border_lin = LinearFromSrgb(box->desc.border_color); - rect->debug_lin = LinearFromSrgb(box->desc.debug_color); - rect->tint_lin = LinearFromSrgb(box->desc.tint); - rect->border = box->desc.border; - rect->tl_rounding = box->rounding_tl; - rect->tr_rounding = box->rounding_tr; - rect->br_rounding = box->rounding_br; - rect->bl_rounding = box->rounding_bl; - - /* Texture */ - if (box->raw_texture != 0) - { - rect->tex = GPU_Texture2DRidFromResource(box->raw_texture); - rect->tex_uv0 = box->raw_texture_uv0; - rect->tex_uv1 = box->raw_texture_uv1; - } - } - - /* Text rects */ - if (AnyBit(box->desc.flags, UI_BoxFlag_DrawText) && box->glyph_run.count > 0 && box->font) - { - Texture2DRid tex_rid = GPU_Texture2DRidFromResource(box->font->texture); - Vec2 inv_font_image_size = VEC2(1.0f / (f32)box->font->image_width, 1.0f / (f32)box->font->image_height); - - F_Run run = box->glyph_run; - f32 max_baseline = box->p1.x - box->p0.x; - b32 should_truncate = run.count > 0 && (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) > max_baseline; - - /* Truncate run */ - if (should_truncate && !AnyBit(box->desc.flags, UI_BoxFlag_NoTextTruncation)) - { - /* Get elipses run */ - F_Run trunc_run = F_RunFromString(scratch.arena, box->font, Lit("...")); - if (trunc_run.count > 0) - { - max_baseline -= trunc_run.rects[trunc_run.count - 1].pos + trunc_run.rects[trunc_run.count - 1].advance; - } - - /* Subtract glyphs */ - while (run.count > 0) - { - F_RunRect rr = run.rects[run.count - 1]; - if (rr.pos + rr.advance <= max_baseline) - { - break; - } - --run.count; - } - - /* Merge trunc rects */ - F_RunRect *new_rects = 0; - { - new_rects = PushStructsNoZero(scratch.arena, F_RunRect, run.count + trunc_run.count); - CopyStructs(new_rects, run.rects, run.count); - f32 trunc_offset = run.count > 0 ? (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) : 0; - for (u32 i = 0; i < trunc_run.count; ++i) - { - F_RunRect *rr = &new_rects[i + run.count]; - *rr = trunc_run.rects[i]; - rr->pos += trunc_offset; - } - } - run.count += trunc_run.count; - run.rects = new_rects; - } - - UI_AxisAlignment x_alignment = box->desc.child_alignment[Axis_X]; - UI_AxisAlignment y_alignment = box->desc.child_alignment[Axis_Y]; - if (should_truncate) - { - x_alignment = UI_AxisAlignment_Start; - } - - /* Calculate baseline */ - f32 ascent = box->font->ascent; - f32 descent = box->font->descent; - f32 cap = box->font->cap; - f32 baseline_width = run.count > 0 ? (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) : 0; - f32 baseline_height = ascent + descent; - f32 box_width = box->p1.x - box->p0.x; - f32 box_height = box->p1.y - box->p0.y; - Vec2 baseline = ZI; - switch (x_alignment) - { - case UI_AxisAlignment_Start: - { - baseline.x = box->p0.x; - } break; - case UI_AxisAlignment_End: - { - baseline.x = box->p1.x; - baseline.x -= baseline_width; - } break; - case UI_AxisAlignment_Center: - { - baseline.x = box->p0.x; - baseline.x += (box_width - baseline_width) / 2; - } break; - } - switch (y_alignment) - { - case UI_AxisAlignment_Start: - { - baseline.y = box->p0.y; - baseline.y += ascent; - } break; - case UI_AxisAlignment_End: - { - baseline.y = box->p1.y; - baseline.y -= descent; - } break; - case UI_AxisAlignment_Center: - { - baseline.y = box->p0.y; - baseline.y += box_height / 2; - baseline.y += cap / 2; - } break; - } - baseline = CeilVec2(baseline); - - /* Push text rects */ - for (u64 i = 0; i < run.count; ++i) - { - F_RunRect rr = run.rects[i]; - Vec2 atlas_p0 = Vec2FromFields(rr.atlas_p0); - Vec2 atlas_p1 = Vec2FromFields(rr.atlas_p1); - Vec2 glyph_size = SubVec2(atlas_p1, atlas_p0); - if (glyph_size.x != 0 || glyph_size.y != 0) - { - UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect); - rect->flags |= UI_DRectFlag_DrawTexture; - rect->p0 = AddVec2(baseline, VEC2(rr.pos, 0)); - rect->p0 = AddVec2(rect->p0, rr.offset); - rect->p1 = AddVec2(rect->p0, glyph_size); - rect->debug_lin = LinearFromSrgb(box->desc.debug_color); - rect->tint_lin = LinearFromSrgb(box->desc.tint); - rect->tex_uv0 = MulVec2Vec2(atlas_p0, inv_font_image_size); - rect->tex_uv1 = MulVec2Vec2(atlas_p1, inv_font_image_size); - rect->tex = tex_rid; - } - } - } - } - } - ////////////////////////////// //- Render - /* Upload transient buffers */ - GPU_Resource *draw_rects_buffer = GPU_UploadTransientBufferFromArena(&g->eframe.draw_rects_tbuff, g->eframe.rects_arena); - u32 draw_rects_count = GPU_GetBufferCount(draw_rects_buffer); - - /* Build command list */ - GPU_CommandList *cl = GPU_BeginCommandList(render_queue); + GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct); { - //- Prep rect pass + ////////////////////////////// + //- Build render data + + /* Acquire render target */ + if (!g->eframe.draw_target || !MatchVec2I32(monitor_size, GPU_Count2D(g->eframe.draw_target))) { - __profn("Clear target"); - GPU_ProfN(cl, Lit("Clear target")); - GPU_TransitionToRenderable(cl, g->eframe.draw_target, 0); - GPU_ClearRenderable(cl, g->eframe.draw_target); + YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target); + GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None); + g->eframe.draw_target = 0; + } + if (!g->eframe.draw_target) + { + __profn("Acquire ui render target"); + GPU_ResourceDesc desc = ZI; + desc.kind = GPU_ResourceKind_Texture2D; + desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable; + // desc.texture.format = GPU_Format_R8G8B8A8_Unorm; + desc.texture.format = GPU_Format_R16G16B16A16_Float; + desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1); + g->eframe.draw_target = GPU_AcquireResource(desc); } - //- Rect pass - if (draw_rects_count > 0) + /* Build rect instance data */ + for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) { - __profn("UI rect pass"); - GPU_ProfN(cl, Lit("UI rect pass")); - - GPU_Viewport viewport = GPU_ViewportFromRect(draw_viewport); - GPU_Scissor scissor = GPU_ScissorFromRect(draw_viewport); - - /* Render rects */ + UI_Box *box = boxes_pre[pre_index]; + b32 is_visible = 1; + is_visible = is_visible && (box->desc.tint.w != 0); + is_visible = is_visible && (box->p1.x > box->p0.x); + is_visible = is_visible && (box->p1.y > box->p0.y); + if (is_visible || AnyBit(g->bframe.frame_flags, UI_FrameFlag_Debug)) { - UI_DRectSig sig = ZI; - sig.target_size = draw_size; - sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler()); - sig.rects = GPU_StructuredBufferRidFromResource(draw_rects_buffer); - GPU_Rasterize(cl, - &sig, - UI_DRectVS, UI_DRectPS, - 1, - viewport, - scissor, - draw_rects_count, - GPU_GetCommonQuadIndices(), - GPU_RasterizeMode_TriangleList); - } + /* Box rect */ + { + UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect); + rect->flags |= UI_DRectFlag_DrawTexture * !(IsGpuPointerNil(box->raw_texture)); + rect->p0 = box->p0; + rect->p1 = box->p1; + rect->tex_uv0 = VEC2(0, 0); + rect->tex_uv1 = VEC2(1, 1); + rect->background_lin = LinearFromSrgb(box->desc.background_color); + rect->border_lin = LinearFromSrgb(box->desc.border_color); + rect->debug_lin = LinearFromSrgb(box->desc.debug_color); + rect->tint_lin = LinearFromSrgb(box->desc.tint); + rect->border = box->desc.border; + rect->tl_rounding = box->rounding_tl; + rect->tr_rounding = box->rounding_tr; + rect->br_rounding = box->rounding_br; + rect->bl_rounding = box->rounding_bl; - /* Render rect wireframes */ - if (AnyBit(g->bframe.frame_flags, UI_FrameFlag_Debug)) - { - UI_DRectSig sig = ZI; - sig.target_size = draw_size; - sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler()); - sig.rects = GPU_StructuredBufferRidFromResource(draw_rects_buffer); - sig.debug_enabled = 1; - GPU_Rasterize(cl, - &sig, - UI_DRectVS, UI_DRectPS, - 1, - viewport, - scissor, - draw_rects_count, - GPU_GetCommonQuadIndices(), - GPU_RasterizeMode_WireTriangleList); + /* Texture */ + if (!IsGpuPointerNil(box->raw_texture)) + { + rect->tex = box->raw_texture; + rect->tex_uv0 = box->raw_texture_uv0; + rect->tex_uv1 = box->raw_texture_uv1; + } + } + + /* Text rects */ + if (AnyBit(box->desc.flags, UI_BoxFlag_DrawText) && box->glyph_run.count > 0 && box->font) + { + Texture2DRid tex_rid = GPU_Texture2DRidFromResource(box->font->texture); + Vec2 inv_font_image_size = VEC2(1.0f / (f32)box->font->image_width, 1.0f / (f32)box->font->image_height); + + F_Run run = box->glyph_run; + f32 max_baseline = box->p1.x - box->p0.x; + b32 should_truncate = run.count > 0 && (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) > max_baseline; + + /* Truncate run */ + if (should_truncate && !AnyBit(box->desc.flags, UI_BoxFlag_NoTextTruncation)) + { + /* Get elipses run */ + F_Run trunc_run = F_RunFromString(scratch.arena, box->font, Lit("...")); + if (trunc_run.count > 0) + { + max_baseline -= trunc_run.rects[trunc_run.count - 1].pos + trunc_run.rects[trunc_run.count - 1].advance; + } + + /* Subtract glyphs */ + while (run.count > 0) + { + F_RunRect rr = run.rects[run.count - 1]; + if (rr.pos + rr.advance <= max_baseline) + { + break; + } + --run.count; + } + + /* Merge trunc rects */ + F_RunRect *new_rects = 0; + { + new_rects = PushStructsNoZero(scratch.arena, F_RunRect, run.count + trunc_run.count); + CopyStructs(new_rects, run.rects, run.count); + f32 trunc_offset = run.count > 0 ? (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) : 0; + for (u32 i = 0; i < trunc_run.count; ++i) + { + F_RunRect *rr = &new_rects[i + run.count]; + *rr = trunc_run.rects[i]; + rr->pos += trunc_offset; + } + } + run.count += trunc_run.count; + run.rects = new_rects; + } + + UI_AxisAlignment x_alignment = box->desc.child_alignment[Axis_X]; + UI_AxisAlignment y_alignment = box->desc.child_alignment[Axis_Y]; + if (should_truncate) + { + x_alignment = UI_AxisAlignment_Start; + } + + /* Calculate baseline */ + f32 ascent = box->font->ascent; + f32 descent = box->font->descent; + f32 cap = box->font->cap; + f32 baseline_width = run.count > 0 ? (run.rects[run.count - 1].pos + run.rects[run.count - 1].advance) : 0; + f32 baseline_height = ascent + descent; + f32 box_width = box->p1.x - box->p0.x; + f32 box_height = box->p1.y - box->p0.y; + Vec2 baseline = ZI; + switch (x_alignment) + { + case UI_AxisAlignment_Start: + { + baseline.x = box->p0.x; + } break; + case UI_AxisAlignment_End: + { + baseline.x = box->p1.x; + baseline.x -= baseline_width; + } break; + case UI_AxisAlignment_Center: + { + baseline.x = box->p0.x; + baseline.x += (box_width - baseline_width) / 2; + } break; + } + switch (y_alignment) + { + case UI_AxisAlignment_Start: + { + baseline.y = box->p0.y; + baseline.y += ascent; + } break; + case UI_AxisAlignment_End: + { + baseline.y = box->p1.y; + baseline.y -= descent; + } break; + case UI_AxisAlignment_Center: + { + baseline.y = box->p0.y; + baseline.y += box_height / 2; + baseline.y += cap / 2; + } break; + } + baseline = CeilVec2(baseline); + + /* Push text rects */ + for (u64 i = 0; i < run.count; ++i) + { + F_RunRect rr = run.rects[i]; + Vec2 atlas_p0 = Vec2FromFields(rr.atlas_p0); + Vec2 atlas_p1 = Vec2FromFields(rr.atlas_p1); + Vec2 glyph_size = SubVec2(atlas_p1, atlas_p0); + if (glyph_size.x != 0 || glyph_size.y != 0) + { + UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect); + rect->flags |= UI_DRectFlag_DrawTexture; + rect->p0 = AddVec2(baseline, VEC2(rr.pos, 0)); + rect->p0 = AddVec2(rect->p0, rr.offset); + rect->p1 = AddVec2(rect->p0, glyph_size); + rect->debug_lin = LinearFromSrgb(box->desc.debug_color); + rect->tint_lin = LinearFromSrgb(box->desc.tint); + rect->tex_uv0 = MulVec2Vec2(atlas_p0, inv_font_image_size); + rect->tex_uv1 = MulVec2Vec2(atlas_p1, inv_font_image_size); + rect->tex = tex_rid; + } + } + } } } + + ////////////////////////////// + //- Dispatch shaders + + /* Upload transient buffers */ + GPU_Resource *draw_rects_buffer = GPU_UploadTransientBufferFromArena(&g->eframe.draw_rects_tbuff, g->eframe.rects_arena); + u32 draw_rects_count = GPU_GetBufferCount(draw_rects_buffer); + + { + //- Prep rect pass + { + __profn("Clear target"); + GPU_ProfN(cl, Lit("Clear target")); + GPU_TransitionToRenderable(cl, g->eframe.draw_target, 0); + GPU_ClearRenderable(cl, g->eframe.draw_target); + } + + //- Rect pass + if (draw_rects_count > 0) + { + __profn("UI rect pass"); + GPU_ProfN(cl, Lit("UI rect pass")); + + GPU_Viewport viewport = GPU_ViewportFromRect(draw_viewport); + GPU_Scissor scissor = GPU_ScissorFromRect(draw_viewport); + + /* Render rects */ + { + UI_DRectSig sig = ZI; + sig.target_size = draw_size; + sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler()); + sig.rects = GPU_StructuredBufferRidFromResource(draw_rects_buffer); + GPU_Rasterize(cl, + &sig, + UI_DRectVS, UI_DRectPS, + 1, + viewport, + scissor, + draw_rects_count, + GPU_GetCommonQuadIndices(), + GPU_RasterizeMode_TriangleList); + } + + /* Render rect wireframes */ + if (AnyBit(g->bframe.frame_flags, UI_FrameFlag_Debug)) + { + UI_DRectSig sig = ZI; + sig.target_size = draw_size; + sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler()); + sig.rects = GPU_StructuredBufferRidFromResource(draw_rects_buffer); + sig.debug_enabled = 1; + GPU_Rasterize(cl, + &sig, + UI_DRectVS, UI_DRectPS, + 1, + viewport, + scissor, + draw_rects_count, + GPU_GetCommonQuadIndices(), + GPU_RasterizeMode_WireTriangleList); + } + } + } + + /* Reset */ + GPU_ResetArena(cl, g->eframe.frame_gpu_arena); + GPU_ResetArena(cl, g->eframe.drects_gpu_arena); } - g->eframe.gpu_submit_fence_target = GPU_EndCommandList(cl); + GPU_CloseCommandList(cl); - /* Reset render data */ - GPU_ResetTransientBuffer(&g->eframe.draw_rects_tbuff, g->eframe.gpu_submit_fence_target); - ResetArena(g->eframe.rects_arena); ////////////////////////////// //- Present & end frame @@ -1508,12 +1502,11 @@ i64 UI_EndFrame(UI_Frame frame) Vec2I32 dst_p1 = VEC2I32(0, 0); Vec2I32 src_p0 = VEC2I32(0, 0); Vec2I32 src_p1 = draw_size; - g->eframe.gpu_submit_fence_target = GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color)); + GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color)); } WND_EndFrame(frame.window_frame); ++g->eframe.tick; EndScratch(scratch); - return g->eframe.gpu_submit_fence_target; } diff --git a/src/ui/ui_core.h b/src/ui/ui_core.h index 00e935ed..e5054232 100644 --- a/src/ui/ui_core.h +++ b/src/ui/ui_core.h @@ -109,7 +109,7 @@ Enum(UI_BoxFlag) X(Font, ResourceKey) \ X(FontSize, u32) \ X(Text, String) \ - X(BackgroundTexture, GPU_Resource *) \ + X(BackgroundTexture, GpuPointer) \ X(BackgroundTextureUv0, Vec2) \ X(BackgroundTextureUv1, Vec2) \ /* --------------------------------------- */ \ @@ -268,7 +268,7 @@ Struct(UI_Box) //- Cmd data UI_BoxDesc desc; - GPU_Resource *raw_texture; + GpuPointer raw_texture; Vec2 raw_texture_uv0; Vec2 raw_texture_uv1; @@ -372,14 +372,14 @@ Struct(UI_State) struct UI_EFrameState { Arena *layout_arena; - Arena *rects_arena; + GPU_Arena *frame_gpu_arena; + GPU_Arena *drects_gpu_arena; u64 tick; /* Render */ - GPU_Resource *draw_target; + GpuPointer draw_target; GPU_Swapchain *swapchain; i64 gpu_submit_fence_target; - GPU_TransientBuffer draw_rects_tbuff; /* Layout */ UI_Box *root_box; @@ -493,4 +493,4 @@ Vec2 UI_CursorPos(void); //////////////////////////////////////////////////////////// //~ End frame -i64 UI_EndFrame(UI_Frame frame); +void UI_EndFrame(UI_Frame frame);