gpu layer refactor progress

This commit is contained in:
jacob 2025-11-16 06:01:18 -06:00
parent 4b0a12bc20
commit 4a8eacbcda
18 changed files with 694 additions and 1029 deletions

View File

@ -762,6 +762,10 @@ Struct(ResourceKey)
#if LanguageIsC #if LanguageIsC
Struct(GpuPointer) { u32 v; };
Struct(GpuBufferPos) { GpuPointer p; u64 byte_offset; };
Inline b32 IsGpuPointerNil(GpuPointer p) { return p.v == 0; }
Struct(VertexShader) { ResourceKey resource; }; Struct(VertexShader) { ResourceKey resource; };
Struct(PixelShader) { ResourceKey resource; }; Struct(PixelShader) { ResourceKey resource; };
@ -780,16 +784,9 @@ Struct(SamplerStateRid) { u32 v; };
#elif LanguageIsGpu #elif LanguageIsGpu
//- Resource descriptor index types typedef u32 GpuBuffer;
typedef uint StructuredBufferRid; typedef u32 GpuTexture;
typedef uint RWStructuredBufferRid; typedef u32 GpuSampler;
typedef uint Texture1DRid;
typedef uint Texture2DRid;
typedef uint Texture3DRid;
typedef uint RWTexture1DRid;
typedef uint RWTexture2DRid;
typedef uint RWTexture3DRid;
typedef uint SamplerStateRid;
//- Shader declaration //- Shader declaration
# define ComputeShader(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID)) # define ComputeShader(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))

View File

@ -1142,12 +1142,6 @@ u32 U32FromVec4(Vec4 v)
return result; return result;
} }
PackedVec4 PackVec4(Vec4 v)
{
PackedVec4 result = ZI;
return result;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Xform operations //~ Xform operations

View File

@ -20,60 +20,104 @@ Enum(Axis)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Vector types //~ Vector types
#define VEC2(x, y) (Vec2) { (x), (y) } //- Vec2
#define VEC3(x, y, z) (Vec3) { (x), (y), (z) }
#define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) }
#define VEC2I32(x, y) (Vec2I32) { (x), (y) }
#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) }
#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) }
#define VEC2U32(x, y) (Vec2U32) { (x), (y) }
#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) }
#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) }
Struct(Vec2) { f32 x, y; }; Struct(Vec2) { f32 x, y; };
Struct(Vec3) { f32 x, y, z; };
Struct(Vec4) { f32 x, y, z, w; };
Struct(Vec2I32) { i32 x, y; }; Struct(Vec2I32) { i32 x, y; };
Struct(Vec2I64) { i64 x, y; };
Struct(Vec2U32) { i32 x, y; };
Struct(Vec2U64) { u64 x, y; };
Struct(Vec2Array) { Vec2 *points; u64 count; };
#define VEC2(x, y) (Vec2) { (x), (y) }
#define VEC2I32(x, y) (Vec2I32) { (x), (y) }
#define VEC2I64(x, y) (Vec2I64) { (x), (y) }
#define VEC2U32(x, y) (Vec2U32) { (x), (y) }
#define VEC2U64(x, y) (Vec2U64) { (x), (y) }
#define Vec2FromVec(v) VEC2((v).x, (v).y)
//- Vec3
Struct(Vec3) { f32 x, y, z; };
Struct(Vec3I32) { i32 x, y, z; }; Struct(Vec3I32) { i32 x, y, z; };
Struct(Vec3I64) { i64 x, y, z; };
Struct(Vec3U32) { i32 x, y, z; };
Struct(Vec3U64) { u64 x, y, z; };
Struct(Vec3Array) { Vec3 *points; u64 count; };
#define VEC3(x, y, z) (Vec3) { (x), (y), (z) }
#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) }
#define VEC3I64(x, y, z) (Vec3I64) { (x), (y), (z) }
#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) }
#define VEC3U64(x, y, z) (Vec3U64) { (x), (y), (z) }
#define Vec3FromVec(v) VEC3((v).x, (v).y, (v).z)
//- Vec4
Struct(Vec4) { f32 x, y, z, w; };
Struct(Vec4I32) { i32 x, y, z, w; }; Struct(Vec4I32) { i32 x, y, z, w; };
Struct(Vec4I64) { i64 x, y, z, w; };
Struct(Vec4U32) { i32 x, y, z, w; };
Struct(Vec4U64) { u64 x, y, z, w; };
Struct(Vec2U32) { u32 x, y; }; Struct(Vec4Array) { Vec4 *points; u64 count; };
Struct(Vec3U32) { u32 x, y, z; };
Struct(Vec4U32) { u32 x, y, z, w; };
Struct(PackedVec4) { u32 hi; u32 lo; }; #define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) }
#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) }
#define VEC4I64(x, y, z, w) (Vec4I64) { (x), (y), (z), (w) }
#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) }
#define VEC4U64(x, y, z, w) (Vec4U64) { (x), (y), (z), (w) }
Struct(Vec2Array) #define Vec4FromVec(v) VEC4((v).x, (v).y, (v).z, (v).w)
{
Vec2 *points;
u64 count;
};
Struct(Vec3Array)
{
Vec3 *points;
u64 count;
};
Struct(Vec4Array)
{
Vec4 *points;
u64 count;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Range types //~ Range types
#define RNG2(p0, p1) (Rng2) { (p0), (p1) } //- Rng1
#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) }
#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) } Struct(Rng) { f32 min; f32 max; };
Struct(RngI32) { i32 min; i32 max; };
Struct(RngI64) { i64 min; i64 max; };
Struct(RngU32) { u32 min; u32 max; };
Struct(RngU64) { u64 min; u64 max; };
#define RNG(min, max) (Rng) { (min), (max) }
#define RNGI32(min, max) (RngI32) { (min), (max) }
#define RNGI64(min, max) (RngI64) { (min), (max) }
#define RNGU32(min, max) (RngU32) { (min), (max) }
#define RNGU64(min, max) (RngU64) { (min), (max) }
//- Rng2
Struct(Rng2) { Vec2 p0; Vec2 p1; }; Struct(Rng2) { Vec2 p0; Vec2 p1; };
Struct(Rng2I32) { Vec2I32 p0; Vec2I32 p1; }; Struct(Rng2I32) { Vec2I32 p0; Vec2I32 p1; };
Struct(Rng2I64) { Vec2I64 p0; Vec2I64 p1; };
Struct(Rng2U32) { Vec2U32 p0; Vec2U32 p1; }; Struct(Rng2U32) { Vec2U32 p0; Vec2U32 p1; };
Struct(Rng2U64) { Vec2U64 p0; Vec2U64 p1; };
#define RNG2(p0, p1) (Rng2) { (p0), (p1) }
#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) }
#define RNG2I64(p0, p1) (Rng2I64) { (p0), (p1) }
#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) }
#define RNG2U64(p0, p1) (Rng2U64) { (p0), (p1) }
//- Rng3
Struct(Rng3) { Vec3 p0; Vec3 p1; };
Struct(Rng3I32) { Vec3I32 p0; Vec3I32 p1; };
Struct(Rng3I64) { Vec3I64 p0; Vec3I64 p1; };
Struct(Rng3U32) { Vec3U32 p0; Vec3U32 p1; };
Struct(Rng3U64) { Vec3U64 p0; Vec3U64 p1; };
#define RNG3(p0, p1) (Rng3) { (p0), (p1) }
#define RNG3I32(p0, p1) (Rng3I32) { (p0), (p1) }
#define RNG3I64(p0, p1) (Rng3I64) { (p0), (p1) }
#define RNG3U32(p0, p1) (Rng3U32) { (p0), (p1) }
#define RNG3U64(p0, p1) (Rng3U64) { (p0), (p1) }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Xform types //~ Xform types
@ -245,8 +289,6 @@ Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Vec2 operations //~ Vec2 operations
#define Vec2FromFields(v) VEC2((v).x, (v).y)
b32 IsVec2Zero(Vec2 a); b32 IsVec2Zero(Vec2 a);
b32 MatchVec2(Vec2 a, Vec2 b); b32 MatchVec2(Vec2 a, Vec2 b);
@ -315,7 +357,6 @@ Vec2I32 SubVec2I32(Vec2I32 a, Vec2I32 b);
Vec4 Vec4FromU32(u32 v); Vec4 Vec4FromU32(u32 v);
u32 U32FromVec4(Vec4 v); u32 U32FromVec4(Vec4 v);
PackedVec4 PackVec4(Vec4 v);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Xform operations //~ Xform operations

View File

@ -17,7 +17,6 @@ typedef int4 Vec4I32;
typedef uint2 Vec2U32; typedef uint2 Vec2U32;
typedef uint3 Vec3U32; typedef uint3 Vec3U32;
typedef uint4 Vec4U32; typedef uint4 Vec4U32;
typedef uint2 PackedVec4;
typedef float2x3 Xform; typedef float2x3 Xform;
typedef float4 Rect; typedef float4 Rect;
typedef float4 ClipRect; typedef float4 ClipRect;
@ -25,19 +24,6 @@ typedef float4 Aabb;
typedef float4 Quad; typedef float4 Quad;
typedef float4x4 Mat4x4; typedef float4x4 Mat4x4;
////////////////////////////////////////////////////////////
//~ Color helpers
Vec4 Vec4FromU32(u32 v)
{
Vec4 result;
result.r = ((v >> 0) & 0xFF) / 255.0;
result.g = ((v >> 8) & 0xFF) / 255.0;
result.b = ((v >> 16) & 0xFF) / 255.0;
result.a = ((v >> 24) & 0xFF) / 255.0;
return result;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Vertex ID helpers //~ Vertex ID helpers

View File

@ -48,71 +48,37 @@ JobDef(F_Load, sig, _)
} }
TTF_Decoded decoded = TTF_Decode(scratch.arena, resource_data, em_size, font_codes, countof(font_codes)); TTF_Decoded decoded = TTF_Decode(scratch.arena, resource_data, em_size, font_codes, countof(font_codes));
/* Send texture to GPU */ /* Upload texture to GPU */
GPU_Resource *texture = 0; Fence completion_fence = ZI;
{ {
GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_BackgroundCopy);
GPU_Arena *gpu_temp = GPU_AcquireArena();
{
GpuTexture gpu_texture = ZI;
{
GPU_Arena *gpu_perm = GPU_Perm();
GPU_ResourceDesc desc = ZI; GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_None;
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
desc.texture.size = VEC3I32(decoded.image_width, decoded.image_height, 1); desc.texture.size = VEC3I32(decoded.image_width, decoded.image_height, 1);
texture = GPU_AcquireResource(desc); gpu_texture = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, desc);
/* Fill upload buffer */
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.count = GPU_GetFootprintSize(texture);
GPU_Resource *upload = GPU_AcquireResource(upload_desc);
{
GPU_Mapped mapped = GPU_Map(upload);
GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.image_pixels, texture);
GPU_Unmap(mapped);
} }
texture->gpu_texture = gpu_texture;
GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy; texture->width = decoded.width;
GPU_QueueKind direct_queue = GPU_QueueKind_Direct; texture->height = decoded.height;
Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue); GpuBuffer src_buff = GPU_PushBuffer(gpu_temp, GPU_GetFootprintSize(gpu_texture), GPU_BufferFlag_CpuWritable);
i64 direct_queue_fence_target = 0; GpuAddress src_addr = ZI;
if (copy_queue == direct_queue)
{ {
/* Copy & transition GPU resource on direct queue*/ u32 *p = GPU_PushStructsNoZero(src_buff, u32, decoded.width * decoded.height);
{ CopyStructs(p, decoded.pixels, decoded.width * decoded.heigth);
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue); GPU_TransitionBufferToCopySrc(src_buff);
{ GPU_TransitionTextureToCopyDst(gpu_texture);
GPU_TransitionToCopyDst(cl, texture); GPU_CopyBytesToFootprint(gpu_texture, src_buff, src_addr, decoded.width * decoded.height * 4);
GPU_CopyResource(cl, texture, upload); GPU_TransitionTextureToReadonly(gpu_texture);
GPU_TransitionToReadable(cl, texture);
} }
direct_queue_fence_target = GPU_EndCommandList(cl); GPU_SetFence(&completion_fence, 1);
} }
} GPU_ReleaseArena(gpu_temp);
else GPU_EndCommandList(cl);
{
/* Copy to GPU resource on background copy queue*/
i64 copy_queue_fence_target = 0;
{
GPU_CommandList *cl = GPU_BeginCommandList(copy_queue);
{
GPU_TransitionToCopyDst(cl, texture);
GPU_CopyResource(cl, texture, upload);
}
copy_queue_fence_target = GPU_EndCommandList(cl);
}
/* Once copy finishes, transition resource to readable on direct queue */
{
GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target);
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
{
GPU_TransitionToReadable(cl, texture);
}
direct_queue_fence_target = GPU_EndCommandList(cl);
}
}
/* Release upload buffer once transition finishes */
YieldOnFence(direct_queue_fence, direct_queue_fence_target);
GPU_ReleaseResource(upload, GPU_ReleaseFlag_None);
} }
/* Acquire store memory */ /* Acquire store memory */
@ -155,6 +121,8 @@ JobDef(F_Load, sig, _)
font->lookup[codepoint] = decoded.cache_indices[i]; font->lookup[codepoint] = decoded.cache_indices[i];
} }
YieldOnFence(&completion_fence, 1);
LogSuccessF("Loaded font \"%F\" (font size: %F, em size: %F) in %F seconds", FmtString(name), FmtFloat((f64)font_size), FmtFloat((f64)em_size), FmtFloat(SecondsFromNs(TimeNs() - start_ns))); LogSuccessF("Loaded font \"%F\" (font size: %F, em size: %F) in %F seconds", FmtString(name), FmtFloat((f64)font_size), FmtFloat((f64)em_size), FmtFloat(SecondsFromNs(TimeNs() - start_ns)));
AC_MarkReady(asset, font); AC_MarkReady(asset, font);

View File

@ -19,4 +19,4 @@
//- Startup //- Startup
@Startup GPU_Startup @Startup GPU_Startup
@Startup GPU_StartupUtils @Startup GPU_StartupCommon

View File

@ -3,227 +3,76 @@ GPU_SharedUtilState GPU_shared_util_state = ZI;
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Startup //~ Startup
void GPU_StartupUtils(void) void GPU_StartupCommon(void)
{ {
GPU_SharedUtilState *g = &GPU_shared_util_state; GPU_SharedUtilState *g = &GPU_shared_util_state;
GPU_QueueKind queue_kind = GPU_QueueKind_Direct; GPU_Arena *gpu_perm = GPU_Perm();
Fence *queue_fence = GPU_FenceFromQueue(queue_kind);
i64 queue_fence_target = FetchFence(queue_fence);
GPU_Resource *noise_upload = 0; /* Upload data to gpu */
GPU_Resource *quad_upload = 0; GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
GPU_Resource *noise = 0;
GPU_Resource *quad = 0;
GPU_CommandList *cl = GPU_BeginCommandList(queue_kind);
{ {
/* Upload noise */ /* Init noise texture */
String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat")));
Vec3I32 noise_dims = VEC3I32(128, 128, 64);
GpuPointer noise_tex = ZI;
{ {
Vec3I32 noise_size = VEC3I32(128, 128, 64); GPU_TextureDesc noise_desc = ZI;
ResourceKey noise_resource = ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat")); noise_desc.format = GPU_Format_R16_Uint;
String noise_res_data = DataFromResource(noise_resource); noise_desc.size = noise_dims;
if (noise_res_data.len != noise_size.x * noise_size.y * noise_size.z * 2) if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2)
{ {
Panic(Lit("Unexpected noise texture size")); Panic(Lit("Unexpected noise texture size"));
} }
GPU_ResourceDesc desc = ZI; noise_tex = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, GPU_Format_R16_Uint, noise_dims, GPU_TextureFlag_None);
desc.kind = GPU_ResourceKind_Texture3D; GPU_CopyFromCpu(cl, noise_tex, noise_data);
desc.texture.format = GPU_Format_R16_Uint;
desc.texture.size = noise_size;
noise = GPU_AcquireResource(desc);
{
u64 footprint_size = GPU_GetFootprintSize(noise);
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.count = footprint_size;
noise_upload = GPU_AcquireResource(upload_desc);
GPU_Mapped mapped = GPU_Map(noise_upload);
GPU_CopyBytesToFootprint(mapped.mem, noise_res_data.text, noise);
GPU_Unmap(mapped);
}
GPU_TransitionToCopyDst(cl, noise);
GPU_CopyResource(cl, noise, noise_upload);
GPU_TransitionToReadable(cl, noise);
} }
g->noise_tex = noise_tex;
/* Upload quad indices */ /* Init quad index buffer */
GpuPointer quad_indices = ZI;
{ {
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 }; u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
GPU_ResourceDesc desc = ZI; quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data), GPU_BufferFlag_None);
desc.kind = GPU_ResourceKind_Buffer; GPU_CopyFromCpu(cl, quad_indices, StringFromArray(quad_data));
desc.buffer.count = countof(quad_indices);
desc.buffer.stride = sizeof(quad_indices[0]);
quad = GPU_AcquireResource(desc);
{
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.count = desc.buffer.count * desc.buffer.stride;
quad_upload = GPU_AcquireResource(upload_desc);
GPU_Mapped mapped = GPU_Map(quad_upload);
CopyBytes(mapped.mem, quad_indices, sizeof(quad_indices));
GPU_Unmap(mapped);
} }
GPU_TransitionToCopyDst(cl, quad); g->quad_indices = quad_indices;
GPU_CopyResource(cl, quad, quad_upload);
GPU_TransitionToReadable(cl, quad);
} }
} GPU_CloseCommandList(cl);
queue_fence_target = GPU_EndCommandList(cl);
/* Init point sampler */ /* Init point sampler */
g->pt_sampler = GPU_AcquireResource((GPU_ResourceDesc) { .kind = GPU_ResourceKind_Sampler, .sampler.filter = GPU_Filter_MinMagMipPoint }); g->pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
}
/* Wait & cleanup */ ////////////////////////////////////////////////////////////
YieldOnFence(queue_fence, queue_fence_target); //~ Arena helpers
GPU_ReleaseResource(noise_upload, GPU_ReleaseFlag_None);
GPU_ReleaseResource(quad_upload, GPU_ReleaseFlag_None);
g->noise = noise; GPU_Arena *GPU_Perm(void)
g->quad_indices = quad; {
i16 fiber_id = FiberId();
GPU_Arena *perm = GPU_shared_util_state.perm_arenas[fiber_id];
if (!perm)
{
GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena();
perm = GPU_shared_util_state.perm_arenas[fiber_id];
}
return perm;
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Common resource helpers //~ Common resource helpers
GPU_Resource *GPU_GetCommonPointSampler(void) GpuPointer GPU_GetCommonPointSampler(void)
{ {
return GPU_shared_util_state.pt_sampler; return GPU_shared_util_state.pt_sampler;
} }
GPU_Resource *GPU_GetCommonQuadIndices(void) GpuPointer GPU_GetCommonQuadIndices(void)
{ {
return GPU_shared_util_state.quad_indices; return GPU_shared_util_state.quad_indices;
} }
GPU_Resource *GPU_GetCommonNoise(void) GpuPointer GPU_GetCommonNoise(void)
{ {
return GPU_shared_util_state.noise; return GPU_shared_util_state.noise_tex;
}
////////////////////////////////////////////////////////////
//~ Transient buffer operations
GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size)
{
GPU_TransientBuffer tbuff = ZI;
tbuff.element_size = MaxU32(element_size, 1);
tbuff.queue_kind = queue_kind;
return tbuff;
}
void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff)
{
GPU_SharedUtilState *g = &GPU_shared_util_state;
Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind);
i64 queue_fence_value = FetchFence(queue_fence);
YieldOnFence(queue_fence, queue_fence_value);
if (tbuff->first_submitted)
{
for (GPU_SubmittedResourceNode *submitted = tbuff->first_submitted;
submitted;
submitted = submitted->next)
{
GPU_ReleaseResource(submitted->resource, GPU_ReleaseFlag_None);
}
Lock lock = LockE(&g->submitted_transient_buffers_mutex);
{
tbuff->last_submitted->next = g->first_free_submitted_transient_buffer;
g->first_free_submitted_transient_buffer = tbuff->first_submitted;
}
Unlock(&lock);
}
}
GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size)
{
GPU_SharedUtilState *g = &GPU_shared_util_state;
GPU_Resource *resource = 0;
u64 element_count = src_size / tbuff->element_size;
Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind);
i64 queue_fence_value = FetchFence(queue_fence);
if (tbuff->uploaded != 0)
{
Panic(Lit("GPU transient buffer uploaded without a reset"));
}
/* Grab resource node */
GPU_SubmittedResourceNode *upload = 0;
{
if (tbuff->first_submitted && tbuff->first_submitted->fence_target <= queue_fence_value)
{
upload = tbuff->first_submitted;
SllQueuePop(tbuff->first_submitted, tbuff->last_submitted);
}
if (!upload)
{
Lock lock = LockE(&g->submitted_transient_buffers_mutex);
{
upload = g->first_free_submitted_transient_buffer;
if (upload)
{
g->first_free_submitted_transient_buffer = upload->next;
SllStackPop(g->first_free_submitted_transient_buffer);
}
}
Unlock(&lock);
}
if (!upload)
{
Arena *perm = PermArena();
upload = PushStruct(perm, GPU_SubmittedResourceNode);
}
}
/* Create gpu resource */
{
if (upload->resource)
{
GPU_ReleaseResource(upload->resource, GPU_ReleaseFlag_Reuse);
upload->resource = 0;
}
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Buffer;
desc.flags = GPU_ResourceFlag_None;
desc.buffer.heap_kind = GPU_HeapKind_Upload;
desc.buffer.count = element_count;
desc.buffer.stride = tbuff->element_size;
upload->resource = GPU_AcquireResource(desc);
}
/* Fill gpu resource */
{
__profn("Copy to transfer buffer");
GPU_Mapped m = GPU_Map(upload->resource);
CopyBytes(m.mem, src, src_size);
GPU_Unmap(m);
}
tbuff->uploaded = upload;
return upload->resource;
}
GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena)
{
u32 element_count = arena->pos / tbuff->element_size;
GPU_Resource *result = GPU_UploadTransientBuffer(tbuff, ArenaFirst(arena, u8), tbuff->element_size * element_count);
return result;
}
void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target)
{
GPU_SubmittedResourceNode *uploaded = tbuff->uploaded;
if (uploaded)
{
uploaded->fence_target = queue_fence_target;
SllQueuePush(tbuff->first_submitted, tbuff->last_submitted, uploaded);
tbuff->uploaded = 0;
}
} }

View File

@ -1,62 +1,29 @@
////////////////////////////////////////////////////////////
//~ Transient buffer types
Struct(GPU_SubmittedResourceNode)
{
GPU_SubmittedResourceNode *next;
/* Set during transient upload */
GPU_Resource *resource;
/* Set during transient reset */
i64 fence_target; /* Once the buffer's queue reaches the target, the resource can be freed or reused */
};
Struct(GPU_TransientBuffer)
{
GPU_QueueKind queue_kind;
u32 element_size;
GPU_SubmittedResourceNode *uploaded;
GPU_SubmittedResourceNode *first_submitted;
GPU_SubmittedResourceNode *last_submitted;
u32 max_in_flight;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ State types //~ State types
Struct(GPU_SharedUtilState) Struct(GPU_SharedUtilState)
{ {
/* Common shared resources */ /* Common shared resources */
GPU_Resource *pt_sampler; GpuPointer pt_sampler;
GPU_Resource *quad_indices; GpuPointer quad_indices;
GPU_Resource *noise; GpuPointer noise_tex;
/* Transient buffer pool */ GPU_Arena *perm_arenas[MaxFibers];
Mutex submitted_transient_buffers_mutex;
GPU_SubmittedResourceNode *first_free_submitted_transient_buffer;
} extern GPU_shared_util_state; } extern GPU_shared_util_state;
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Startup //~ Startup
void GPU_StartupUtils(void); void GPU_StartupCommon(void);
////////////////////////////////////////////////////////////
//~ Arena helpers
GPU_Arena *GPU_Perm(void);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Common resource helpers //~ Common resource helpers
GPU_Resource *GPU_GetCommonPointSampler(void); GpuPointer GPU_GetCommonPointSampler(void);
GPU_Resource *GPU_GetCommonQuadIndices(void); GpuPointer GPU_GetCommonQuadIndices(void);
GPU_Resource *GPU_GetCommonNoise(void); GpuPointer GPU_GetCommonNoise(void);
////////////////////////////////////////////////////////////
//~ Transient buffer operations
GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size);
void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff);
GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size);
GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena);
void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target);

View File

@ -1,7 +1,7 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Opaque types //~ Opaque types
Struct(GPU_Resource); Struct(GPU_Arena);
Struct(GPU_CommandList); Struct(GPU_CommandList);
Struct(GPU_Swapchain); Struct(GPU_Swapchain);
@ -158,7 +158,63 @@ Enum(GPU_Format)
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Filter types //~ Shader access types
Enum(GPU_ShaderAccessKind)
{
GPU_ShaderAccessKind_Readonly, /* Default state for all resources */
GPU_ShaderAccessKind_ReadWrite,
GPU_ShaderAccessKind_RasterTarget,
};
////////////////////////////////////////////////////////////
//~ Arena types
Struct(GPU_TempArena)
{
GPU_Arena *arena;
u64 start_pos;
};
////////////////////////////////////////////////////////////
//~ Buffer types
Enum(GPU_BufferFlag)
{
GPU_BufferFlag_None = 0,
GPU_BufferFlag_Writable = (1 << 0),
};
////////////////////////////////////////////////////////////
//~ Texture types
#define GPU_MaxRasterTargets 8
Enum(GPU_TextureFlag)
{
GPU_TextureFlag_None = 0,
GPU_TextureFlag_Writable = (1 << 0),
GPU_TextureFlag_Rasterizable = (1 << 1),
};
Enum(GPU_TextureKind)
{
GPU_TextureKind_1D,
GPU_TextureKind_2D,
GPU_TextureKind_3D,
};
Struct(GPU_TextureDesc)
{
GPU_TextureFlag flags;
GPU_Format format;
Vec3I32 size;
Vec4 clear_color;
i32 mip_levels;
};
////////////////////////////////////////////////////////////
//~ Sampler types
/* NOTE: Matches DirectX D3D12_FILTER */ /* NOTE: Matches DirectX D3D12_FILTER */
Enum(GPU_Filter) Enum(GPU_Filter)
@ -236,68 +292,8 @@ Enum(GPU_ComparisonFunc)
GPU_ComparisonFunc_Always = 8 GPU_ComparisonFunc_Always = 8
}; };
//////////////////////////////////////////////////////////// Struct(GPU_SamplerDesc)
//~ Resource types
#define GPU_MaxRenderTargets 8
Enum(GPU_ResourceKind)
{ {
GPU_ResourceKind_Unknown,
GPU_ResourceKind_Buffer,
GPU_ResourceKind_Texture1D,
GPU_ResourceKind_Texture2D,
GPU_ResourceKind_Texture3D,
GPU_ResourceKind_Sampler
};
Enum(GPU_ResourceFlag)
{
GPU_ResourceFlag_None = 0,
GPU_ResourceFlag_Writable = (1 << 0),
GPU_ResourceFlag_Renderable = (1 << 1),
GPU_ResourceFlag_MaxMipLevels = (1 << 2),
GPU_ResourceFlag_Zeroed = (1 << 3),
};
Enum(GPU_HeapKind)
{
GPU_HeapKind_Default,
GPU_HeapKind_Upload,
GPU_HeapKind_Download
};
Enum(GPU_ReleaseFlag)
{
GPU_ReleaseFlag_None = 0,
/* Hints to the GPU layer that more resources using a similar desc will
* be allocated soon, so the resource's memory should be kept around for
* re-use. */
GPU_ReleaseFlag_Reuse = (1 << 0)
};
Struct(GPU_ResourceDesc)
{
GPU_ResourceKind kind;
GPU_ResourceFlag flags;
Vec4 clear_color;
union
{
struct
{
GPU_Format format;
Vec3I32 size;
i32 mip_levels; /* Defaults to 1 (unless GPU_ResourceFlag_MaxMipLevels is set) */
} texture;
struct
{
GPU_HeapKind heap_kind;
u32 count;
u32 stride; /* Defaults to 1 */
} buffer;
struct
{
GPU_Filter filter; GPU_Filter filter;
GPU_AddressMode x; GPU_AddressMode x;
GPU_AddressMode y; GPU_AddressMode y;
@ -308,47 +304,21 @@ Struct(GPU_ResourceDesc)
Vec4 border_color; Vec4 border_color;
f32 min_lod; f32 min_lod;
f32 max_lod; f32 max_lod;
} sampler;
};
};
Struct(GPU_Mapped)
{
GPU_Resource *resource;
void *mem;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Rasterizer types //~ Rasterization types
Enum(GPU_RasterizeMode) Enum(GPU_RasterMode)
{ {
GPU_RasterizeMode_None, GPU_RasterMode_None,
GPU_RasterizeMode_PointList, GPU_RasterMode_PointList,
GPU_RasterizeMode_LineList, GPU_RasterMode_LineList,
GPU_RasterizeMode_LineStrip, GPU_RasterMode_LineStrip,
GPU_RasterizeMode_TriangleList, GPU_RasterMode_TriangleList,
GPU_RasterizeMode_WireTriangleList, GPU_RasterMode_WireTriangleList,
GPU_RasterizeMode_TriangleStrip, GPU_RasterMode_TriangleStrip,
GPU_RasterizeMode_WireTriangleStrip, GPU_RasterMode_WireTriangleStrip,
};
Struct(GPU_Viewport)
{
f32 top_left_x;
f32 top_left_y;
f32 width;
f32 height;
f32 min_depth;
f32 max_depth;
};
Struct(GPU_Scissor)
{
f32 left;
f32 top;
f32 right;
f32 bottom;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -373,105 +343,66 @@ Struct(GPU_Stats)
void GPU_Startup(void); void GPU_Startup(void);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Fence operations //~ @hookdecl Arenas
Fence *GPU_FenceFromQueue(GPU_QueueKind queue); GPU_Arena *GPU_AcquireArena(void);
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value); /* Tells queue A Forces `waiting_queue` to wait until `target_queue`'s fence reaches the specified value */ void GPU_ReleaseArena(GPU_Arena *arena);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Rasterizer helpers //~ @hookdecl Resource creation
GPU_Viewport GPU_ViewportFromRect(Rng2 rect); GpuPointer GPU_PushBufferEx(GPU_Arena *arena, i32 element_size, i32 element_align, i32 element_count, GPU_BufferFlag flags);
GPU_Scissor GPU_ScissorFromRect(Rng2 rect); #define GPU_PushBuffer(arena, type, count, flags) GPU_PushBufferEx((arena), sizeof(type), alignof(type), (count), (flags))
GpuPointer GPU_PushTextureEx(GPU_Arena *arena, GPU_TextureDesc desc);
GpuPointer GPU_PushTexture(GPU_Arena *arena, GPU_TextureKind kind, GPU_Format format, Vec3I32 size, GPU_TextureFlag flags);
GpuPointer GPU_PushSampler(GPU_Arena *arena, GPU_SamplerDesc desc);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Resource operations //~ @hookdecl Commands
GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc); //- Command list creation
void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags); GPU_CommandList *GPU_OpenCommandList(GPU_QueueKind queue);
void GPU_CloseCommandList(GPU_CommandList *cl);
Vec2I32 GPU_GetTextureSize2D(GPU_Resource *resource); //- Cpu -> Gpu
Vec3I32 GPU_GetTextureSize3D(GPU_Resource *resource); void GPU_CopyBytesFromCpu(GPU_CommandList *cl, GpuPointer dst, RngU64 dst_range, void *src);
u64 GPU_GetFootprintSize(GPU_Resource *resource); void GPU_CopyTexelsFromCpu(GPU_CommandList *cl, GpuPointer dst, Rng3U64 dst_range, void *src);
void GPU_CopyFromCpu(GPU_CommandList *cl, GpuPointer dst, String src);
u64 GPU_GetBufferCount(GPU_Resource *gpu_resource); //- Gpu -> Cpu
void GPU_AddCpuFence(GPU_CommandList *cl, Fence *fence, i64 v);
void GPU_SetCpuFence(GPU_CommandList *cl, Fence *fence, i64 v);
//////////////////////////////////////////////////////////// //- Implicit state
//~ @hookdecl Resource index operations void GPU_SetShaderAccess(GPU_CommandList *cl, GpuPointer ptr, GPU_ShaderAccessKind access_kind);
void GPU_SetRasterizeMode(GPU_CommandList *cl, GPU_RasterMode mode);
void GPU_SetConstantU32(GPU_CommandList *cl, i32 slot, u32 v);
void GPU_SetConstantF32(GPU_CommandList *cl, i32 slot, f32 v);
void GPU_SetConstantPtr(GPU_CommandList *cl, i32 slot, GpuPointer v);
StructuredBufferRid GPU_StructuredBufferRidFromResource(GPU_Resource *resource); //- Clear
RWStructuredBufferRid GPU_RWStructuredBufferRidFromResource(GPU_Resource *resource); void GPU_ClearRasterTarget(GPU_CommandList *cl, GpuPointer target);
Texture1DRid GPU_Texture1DRidFromResource(GPU_Resource *resource);
Texture2DRid GPU_Texture2DRidFromResource(GPU_Resource *resource);
Texture3DRid GPU_Texture3DRidFromResource(GPU_Resource *resource);
RWTexture1DRid GPU_RWTexture1DRidFromResource(GPU_Resource *resource);
RWTexture2DRid GPU_RWTexture2DRidFromResource(GPU_Resource *resource);
RWTexture3DRid GPU_RWTexture3DRidFromResource(GPU_Resource *resource);
SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource);
//////////////////////////////////////////////////////////// //- Compute
//~ @hookdecl Command list operations void GPU_Compute(GPU_CommandList *cl, ComputeShader cs, Vec3U32 threads);
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue); //- Rasterize
i64 GPU_EndCommandList(GPU_CommandList *cl); /* Returns the value that the queue's fence will be set to once the command is completed */ void GPU_RasterizeEx(GPU_CommandList *cl,
VertexShader vs, PixelShader ps,
////////////////////////////////////////////////////////////
//~ @hookdecl Profiling helpers
void GPU_ProfN(GPU_CommandList *cl, String name);
////////////////////////////////////////////////////////////
//~ @hookdecl Barrier operations
void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read via read-only types in shaders */
void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read/written to via read-write types in shader */
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot); /* Allows the resource to be used as a render target bound at slot */
void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a source in copy operations */
void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a destination in copy operations */
void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Waits until writes to a shader writable resource have completed */
////////////////////////////////////////////////////////////
//~ @hookdecl Dispatch operations
void GPU_ClearRenderable(GPU_CommandList *cl, GPU_Resource *resource);
#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, viewport, scissor, instances_count, index_buffer, mode) \
GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (viewport), (scissor), (instances_count), (index_buffer), (mode))
#define GPU_Compute(cl, sig_ptr, cs, threads) GPU_Compute_((cl), sizeof(*(sig_ptr)), (sig_ptr), (cs), (threads))
void GPU_Rasterize_(GPU_CommandList *cl,
u32 sig_size,
void *sig,
VertexShader vs,
PixelShader ps,
u32 rts_count,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count, u32 instances_count,
GPU_Resource *index_buffer, GpuPointer idx_buff, RngU64 idx_buff_range,
GPU_RasterizeMode mode); u32 raster_targets_count, GpuPointer *raster_targets,
Rng3 viewport, Rng2 scissor);
void GPU_Compute_(GPU_CommandList *cl, void GPU_Rasterize(GPU_CommandList *cl,
u32 sig_size, VertexShader vs, PixelShader ps,
void *sig, u32 instances_count, GpuPointer idx_buff,
ComputeShader cs, u32 raster_targets_count, GpuPointer *raster_targets);
Vec3U32 threads);
//////////////////////////////////////////////////////////// //- Profiling
//~ @hookdecl Resource copy operations void GPU_ProfN(GPU_CommandList *cl, String name);
void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src);
////////////////////////////////////////////////////////////
//~ @hookdecl Map operations
GPU_Mapped GPU_Map(GPU_Resource *r);
void GPU_Unmap(GPU_Mapped mapped);
void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Statistics //~ @hookdecl Statistics
@ -479,7 +410,7 @@ void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_refe
GPU_Stats GPU_QueryStats(void); GPU_Stats GPU_QueryStats(void);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Swapchain available_to_reserve //~ @hookdecl Swapchain
GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size); GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size);
void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain); void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain);
@ -492,4 +423,4 @@ void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain);
* 2. Blits `texture` into position `dst` in the backbuffer * 2. Blits `texture` into position `dst` in the backbuffer
* 3. Presents the backbuffer * 3. Presents the backbuffer
* 4. Returns the value that the Direct queue fence will reach once GPU completes blitting (`texture` shouldn't be released while blit is in flight) */ * 4. Returns the value that the Direct queue fence will reach once GPU completes blitting (`texture` shouldn't be released while blit is in flight) */
i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GPU_Resource *texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color); i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GpuPointer texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color);

View File

@ -195,6 +195,11 @@ void GPU_D12_InitDevice(void)
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
} }
{ {
/* TODO: Verify feature support:
* - HighestShaderModel >= D3D_SHADER_MODEL_6_6
* - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
* - EnhancedBarriersSupported == 1
*/
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
} }
if (SUCCEEDED(hr) && !skip) if (SUCCEEDED(hr) && !skip)
@ -890,7 +895,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
: desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK : desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK
: D3D12_HEAP_TYPE_DEFAULT : D3D12_HEAP_TYPE_DEFAULT
}; };
Assert(!(desc.flags & GPU_ResourceFlag_Renderable)); Assert(!(desc.flags & GPU_ResourceFlag_Rasterizable));
D3D12_RESOURCE_DESC d3d_desc = ZI; D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
@ -940,7 +945,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
d3d_desc.SampleDesc.Count = 1; d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0; d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Renderable); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Rasterizable);
r->state = D3D12_RESOURCE_STATE_COMMON; r->state = D3D12_RESOURCE_STATE_COMMON;
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } }; D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
clear_value.Color[0] = desc.clear_color.x; clear_value.Color[0] = desc.clear_color.x;
@ -1007,7 +1012,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
} }
/* Create rtv descriptor */ /* Create rtv descriptor */
if (desc.flags & GPU_ResourceFlag_Renderable) if (desc.flags & GPU_ResourceFlag_Rasterizable)
{ {
if (!r->rtv_descriptor->valid) if (!r->rtv_descriptor->valid)
{ {
@ -1183,7 +1188,7 @@ SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdef Command list hooks //~ @hookdef Command list hooks
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind) GPU_CommandList *GPU_OpenCommnadList(GPU_QueueKind queue_kind)
{ {
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
Arena *perm = PermArena(); Arena *perm = PermArena();
@ -1201,7 +1206,7 @@ GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind)
return (GPU_CommandList *)cl; return (GPU_CommandList *)cl;
} }
i64 GPU_EndCommandList(GPU_CommandList *gpu_cl) i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl)
{ {
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId()); GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
@ -1210,8 +1215,8 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
TempArena scratch = BeginScratchNoConflict(); TempArena scratch = BeginScratchNoConflict();
GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI; GPU_D12_Resource *slotted_render_targets[GPU_MaxRasterTargets] = ZI;
GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI; GPU_D12_Resource *bound_render_targets[GPU_MaxRasterTargets] = ZI;
/* Begin dx12 command list */ /* Begin dx12 command list */
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind); GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind);
@ -1699,7 +1704,7 @@ void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource)
cmd->barrier.resource = (GPU_D12_Resource *)resource; cmd->barrier.resource = (GPU_D12_Resource *)resource;
} }
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot) void GPU_TransitionToRasterizable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot)
{ {
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl); GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
cmd->kind = GPU_D12_CommandKind_TransitionToRtv; cmd->kind = GPU_D12_CommandKind_TransitionToRtv;
@ -1731,7 +1736,7 @@ void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdef Dispatch hooks //~ @hookdef Dispatch hooks
void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource) void GPU_ClearRasterizable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
{ {
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl; GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl); GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
@ -1739,14 +1744,12 @@ void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
cmd->clear.resource = (GPU_D12_Resource *)resource; cmd->clear.resource = (GPU_D12_Resource *)resource;
} }
void GPU_Rasterize_(GPU_CommandList *gpu_cl, void GPU_RasterizeEx(GPU_CommandList *gpu_cl,
u32 sig_size, u32 sig_size,
void *sig, void *sig,
VertexShader vs, VertexShader vs,
PixelShader ps, PixelShader ps,
u32 rts_count, u32 rts_count,
GPU_Viewport viewport,
GPU_Scissor scissor,
u32 instances_count, u32 instances_count,
GPU_Resource *index_buffer, GPU_Resource *index_buffer,
GPU_RasterizeMode mode) GPU_RasterizeMode mode)
@ -1760,7 +1763,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl,
cmd->rasterize.vs = vs; cmd->rasterize.vs = vs;
cmd->rasterize.ps = ps; cmd->rasterize.ps = ps;
cmd->rasterize.rts_count = rts_count; cmd->rasterize.rts_count = rts_count;
Assert(rts_count < GPU_MaxRenderTargets); Assert(rts_count < GPU_MaxRasterTargets);
cmd->rasterize.viewport = viewport; cmd->rasterize.viewport = viewport;
cmd->rasterize.scissor = scissor; cmd->rasterize.scissor = scissor;
cmd->rasterize.instances_count = instances_count; cmd->rasterize.instances_count = instances_count;
@ -1768,7 +1771,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl,
cmd->rasterize.mode = mode; cmd->rasterize.mode = mode;
} }
void GPU_Compute_(GPU_CommandList *gpu_cl, void GPU_ComputeEx(GPU_CommandList *gpu_cl,
u32 sig_size, u32 sig_size,
void *sig, void *sig,
ComputeShader cs, ComputeShader cs,

View File

@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc)
ComputeShader cs; ComputeShader cs;
b32 is_wireframe; b32 is_wireframe;
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type; D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
GPU_Format render_target_formats[GPU_MaxRenderTargets]; GPU_Format render_target_formats[GPU_MaxRasterTargets];
}; };
Struct(GPU_D12_Pipeline) Struct(GPU_D12_Pipeline)

View File

@ -29,21 +29,19 @@ JobDef(V_VisWorker, _, __)
{ {
V_SharedState *vis_shared = &V_shared_state; V_SharedState *vis_shared = &V_shared_state;
S_SharedState *sim_shared = &S_shared_state; S_SharedState *sim_shared = &S_shared_state;
Arena *frame_arena = AcquireArena(Gibi(64)); Arena *frame_arena = AcquireArena(Gibi(64));
Arena *perm = PermArena(); Arena *perm = PermArena();
GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable);
GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
////////////////////////////// //////////////////////////////
//- State //- State
Fence *gpu_fence = GPU_FenceFromQueue(GPU_QueueKind_Direct);
i64 gpu_fence_target = 0;
i64 frame_gen = 0; i64 frame_gen = 0;
GPU_Resource *draw_target = 0; GPU_Texture *draw_target = 0;
Arena *dverts_arena = AcquireArena(Gibi(64));
Arena *dvert_idx_arena = AcquireArena(Gibi(64));
GPU_TransientBuffer dverts_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(V_DVert));
GPU_TransientBuffer dvert_idx_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(i32));
Struct(Persist) Struct(Persist)
{ {
@ -510,9 +508,22 @@ JobDef(V_VisWorker, _, __)
} }
UnlockTicketMutex(&sim_shared->input_back_tm); UnlockTicketMutex(&sim_shared->input_back_tm);
//////////////////////////////
//- Render
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
{
////////////////////////////// //////////////////////////////
//- Build render data //- Build render data
GPU_ResetArena(cl, gpu_frame_arena);
ResetArena(dverts_arena);
ResetArena(dvert_idxs_arena);
/* Build shape buffers */
GpuPointer dverts = ZI;
GpuPointer dvert_idxs = ZI;
{
for (S_Ent *ent = S_FirstEnt(frame_arena, &iter, &lookup); ent->active; ent = S_NextEnt(frame_arena, &iter)) for (S_Ent *ent = S_FirstEnt(frame_arena, &iter, &lookup); ent->active; ent = S_NextEnt(frame_arena, &iter))
{ {
Xform ent_to_world_xf = ent->world_xf; Xform ent_to_world_xf = ent->world_xf;
@ -525,99 +536,76 @@ JobDef(V_VisWorker, _, __)
Vec4 color = ent->tint; Vec4 color = ent->tint;
i32 detail = 32; i32 detail = 32;
S_Shape shape = S_MulXformShape(ent_to_draw_xf, ent->local_shape); S_Shape shape = S_MulXformShape(ent_to_draw_xf, ent->local_shape);
V_DrawShape(dverts_arena, dvert_idx_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line); V_DrawShape(dverts_arena, dvert_idxs_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line);
} }
} }
dverts = GPU_PushCpuStructsToArena(gpu_frame_arena, V_DVert, dverts_arena);
dvert_idxs = GPU_PushCpuStructsToArena(gpu_frame_arena, i32, dvert_idxs_arena);
}
/* Create draw state */
if (!draw_target || !MatchVec2I32(draw_size, GPU_Count2D(draw_target)))
{
GPU_ResetArena(cl, gpu_arena);
/* Draw target */
{
GPU_TextureDesc desc = ZI;
desc.kind = GPU_TextureKind_Texture2D;
desc.flags = GPU_TextureFlag_Writable | GPU_TextureFlag_Rasterizable;
desc.format = GPU_Format_R16G16B16A16_Float;
desc.size = VEC3I32(draw_size.x, draw_size.y, 1);
desc.clear_color = LinearFromSrgb(swapchain_color);
draw_target = GPU_PushTexture(gpu_arena, desc);
}
/* Draw params */
draw_params = GPU_PushStructNoZero(gpu_arena, V_DParams);
}
/* Build draw params */
GpuPointer draw_params = ZI;
{
V_DParams params = ZI;
params.world_to_draw_xf = world_to_draw_xf;
GPU_CopyCpuStructToBuffer(draw_params, 0, &params);
}
////////////////////////////// //////////////////////////////
//- Render //- Dispatch shaders
/* Acquire draw target */ Rng2 viewport = RNG2(VEC2(0, 0), Vec2FromFields(draw_size));
{ {
if (draw_target && !MatchVec2I32(draw_size, GPU_GetTextureSize2D(draw_target))) GPU_SetConstantPtr(cl, V_DrawConst_Params, draw_params);
{ GPU_SetConstantPtr(cl, V_DrawConst_FinalTarget, draw_target);
YieldOnFence(gpu_fence, gpu_fence_target); GPU_SetConstantPtr(cl, V_DrawConst_Sampler, GPU_GetCommonPointSampler());
GPU_ReleaseResource(draw_target, GPU_ReleaseFlag_None); GPU_SetConstantPtr(cl, V_DrawConst_DVerts, dverts);
draw_target = 0;
}
if (!draw_target)
{
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_Writable | GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Zeroed;
desc.texture.format = GPU_Format_R16G16B16A16_Float;
desc.texture.size = VEC3I32(draw_size.x, draw_size.y, 1);
desc.clear_color = LinearFromSrgb(swapchain_color);
draw_target = GPU_AcquireResource(desc);
}
}
/* Upload transient buffers */
GPU_Resource *dverts_buffer = GPU_UploadTransientBufferFromArena(&dverts_tbuff, dverts_arena);
GPU_Resource *dvert_idx_buffer = GPU_UploadTransientBufferFromArena(&dvert_idx_tbuff, dvert_idx_arena);
u64 dverts_count = GPU_GetBufferCount(dverts_buffer);
u64 dvert_idx_count = GPU_GetBufferCount(dvert_idx_buffer);
GPU_Viewport viewport = GPU_ViewportFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size)));
GPU_Scissor scissor = GPU_ScissorFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size)));
GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_Direct);
{
/* Prep background pass */
{
GPU_TransitionToWritable(cl, draw_target);
}
/* Backdrop pass */ /* Backdrop pass */
{ {
V_BackdropSig sig = ZI; GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_ReadWrite);
sig.target_tex = GPU_RWTexture2DRidFromResource(draw_target); GPU_Compute(cl, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size));
sig.target_size = draw_size;
GPU_Compute(cl, &sig, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size));
}
/* Prep shapes pass */
{
GPU_TransitionToRenderable(cl, draw_target, 0);
} }
/* Shapes pass */ /* Shapes pass */
{ {
V_DVertSig sig = ZI; GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_RasterTarget);
sig.target_size = draw_size;
sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler());
sig.verts = GPU_StructuredBufferRidFromResource(dverts_buffer);
GPU_Rasterize(cl, GPU_Rasterize(cl,
&sig,
V_DVertVS, V_DVertPS, V_DVertVS, V_DVertPS,
1, 1, dvert_idxs_buffer,
viewport, 1, draw_target,
scissor, viewport, viewport,
1,
dvert_idx_buffer,
GPU_RasterizeMode_TriangleList); GPU_RasterizeMode_TriangleList);
} }
/* Transition draw target for UI composition */ GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_Readonly);
{
GPU_TransitionToReadable(cl, draw_target);
} }
} }
gpu_fence_target = GPU_EndCommandList(cl); GPU_CloseCommandLiist(cl);
/* Reset transient buffers */
{
GPU_ResetTransientBuffer(&dverts_tbuff, gpu_fence_target);
GPU_ResetTransientBuffer(&dvert_idx_tbuff, gpu_fence_target);
ResetArena(dverts_arena);
ResetArena(dvert_idx_arena);
}
////////////////////////////// //////////////////////////////
//- End vis frame //- End vis frame
UI_SetRawTexture(vis_box, draw_target, VEC2(0, 0), VEC2(1, 1)); UI_SetRawTexture(vis_box, draw_target, VEC2(0, 0), VEC2(1, 1));
gpu_fence_target = UI_EndFrame(ui_frame); UI_EndFrame(ui_frame);
++frame_gen; ++frame_gen;
shutdown = Atomic32Fetch(&vis_shared->shutdown); shutdown = Atomic32Fetch(&vis_shared->shutdown);

View File

@ -1,7 +1,7 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shape helpers //~ Shape helpers
void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags) void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags)
{ {
if (flags & V_DrawFlag_Line) if (flags & V_DrawFlag_Line)
{ {
@ -14,10 +14,10 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
i32 lines_count = verts_count == 2 ? 1 : verts_count; i32 lines_count = verts_count == 2 ? 1 : verts_count;
i32 line_verts_count = lines_count * 4; i32 line_verts_count = lines_count * 4;
i32 idx_count = lines_count * 6; i32 idx_count = lines_count * 6;
i32 idx_offset = ArenaCount(verts_arena, V_DVert); i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert);
/* Push dverts */ /* Push dverts */
V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, line_verts_count); V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, line_verts_count);
for (i32 line_idx = 0; line_idx < lines_count; ++line_idx) for (i32 line_idx = 0; line_idx < lines_count; ++line_idx)
{ {
i32 a_idx = line_idx; i32 a_idx = line_idx;
@ -46,7 +46,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
} }
/* Generate indices */ /* Generate indices */
i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count); i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count);
for (i32 line_idx = 0; line_idx < lines_count; ++line_idx) for (i32 line_idx = 0; line_idx < lines_count; ++line_idx)
{ {
i32 indices_offset = line_idx * 6; i32 indices_offset = line_idx * 6;
@ -67,12 +67,12 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
i32 verts_count = points.count; i32 verts_count = points.count;
if (verts_count >= 3) if (verts_count >= 3)
{ {
i32 idx_offset = ArenaCount(verts_arena, V_DVert); i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert);
i32 tris_count = verts_count - 2; i32 tris_count = verts_count - 2;
i32 idx_count = tris_count * 3; i32 idx_count = tris_count * 3;
/* Push dverts */ /* Push dverts */
V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, verts_count); V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, verts_count);
for (i32 point_idx = 0; point_idx < (i32)points.count; ++point_idx) for (i32 point_idx = 0; point_idx < (i32)points.count; ++point_idx)
{ {
V_DVert *dvert = &dverts[point_idx]; V_DVert *dvert = &dverts[point_idx];
@ -81,7 +81,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
} }
/* Generate indices in a fan pattern */ /* Generate indices in a fan pattern */
i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count); i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count);
for (i32 i = 0; i < tris_count; ++i) for (i32 i = 0; i < tris_count; ++i)
{ {
i32 tri_offset = i * 3; i32 tri_offset = i * 3;
@ -93,14 +93,14 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
} }
} }
void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags) void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags)
{ {
if (shape.radius == 0) if (shape.radius == 0)
{ {
Vec2Array draw_points = ZI; Vec2Array draw_points = ZI;
draw_points.points = shape.points; draw_points.points = shape.points;
draw_points.count = shape.points_count; draw_points.count = shape.points_count;
V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags); V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags);
} }
else else
{ {
@ -116,7 +116,7 @@ void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color
Vec2 sp = S_SupportPointFromShape(shape, dir); Vec2 sp = S_SupportPointFromShape(shape, dir);
draw_points.points[i] = sp; draw_points.points[i] = sp;
} }
V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags); V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags);
} }
EndScratch(scratch); EndScratch(scratch);
} }

View File

@ -10,5 +10,5 @@ Enum(V_DrawFlag)
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shape helpers //~ Shape helpers
void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags); void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags);
void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags); void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags);

View File

@ -20,76 +20,27 @@ JobDef(SPR_LoadTexture, sig, _)
ASE_DecodedImage decoded = ASE_DecodeImage(scratch.arena, data); ASE_DecodedImage decoded = ASE_DecodeImage(scratch.arena, data);
ok = decoded.ok; ok = decoded.ok;
/* Upload texture to gpu */
if (ok) if (ok)
{ {
GPU_ResourceDesc desc = ZI; GPU_Arena *gpu_perm = GPU_Perm();
desc.kind = GPU_ResourceKind_Texture2D; GpuPointer gpu_tex = GPU_PushTexture(gpu_perm,
desc.flags = GPU_ResourceFlag_None; GPU_TextureKind_2D,
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb; GPU_Format_R8G8B8A8_Unorm_Srgb,
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1); VEC3I32(decoded.width, decoded.height, 1),
texture->gpu_texture = GPU_AcquireResource(desc); GPU_TextureFlag_Allow);
texture->gpu_texture = gpu_tex;
texture->width = decoded.width; texture->width = decoded.width;
texture->height = decoded.height; texture->height = decoded.height;
/* Fill upload buffer */ GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_BackgroundCopy);
GPU_ResourceDesc upload_desc = ZI;
upload_desc.kind = GPU_ResourceKind_Buffer;
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
upload_desc.buffer.count = GPU_GetFootprintSize(texture->gpu_texture);
GPU_Resource *upload = GPU_AcquireResource(upload_desc);
{ {
GPU_Mapped mapped = GPU_Map(upload); GPU_ReadCpu(cl, gpu_tex, decoded.data);
GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.pixels, texture->gpu_texture); GPU_SetCpuFence(cl, &entry->texture_ready_fence, 1);
GPU_Unmap(mapped); }
GPU_EndCommandList(cl);
} }
GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy;
GPU_QueueKind direct_queue = GPU_QueueKind_Direct;
Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue);
i64 direct_queue_fence_target = 0;
if (copy_queue == direct_queue)
{
/* Copy & transition GPU resource on direct queue*/
{
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
{
GPU_TransitionToCopyDst(cl, texture->gpu_texture);
GPU_CopyResource(cl, texture->gpu_texture, upload);
GPU_TransitionToReadable(cl, texture->gpu_texture);
}
direct_queue_fence_target = GPU_EndCommandList(cl);
}
}
else
{
/* Copy to GPU resource on background copy queue*/
i64 copy_queue_fence_target = 0;
{
GPU_CommandList *cl = GPU_BeginCommandList(copy_queue);
{
GPU_TransitionToCopyDst(cl, texture->gpu_texture);
GPU_CopyResource(cl, texture->gpu_texture, upload);
}
copy_queue_fence_target = GPU_EndCommandList(cl);
}
/* Once copy finishes, transition resource to readable on direct queue */
{
GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target);
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
{
GPU_TransitionToReadable(cl, texture->gpu_texture);
}
direct_queue_fence_target = GPU_EndCommandList(cl);
}
}
/* Release upload buffer once transition finishes */
YieldOnFence(direct_queue_fence, direct_queue_fence_target);
GPU_ReleaseResource(upload, GPU_ReleaseFlag_None);
}
texture->loaded = 1;
SetFence(&entry->texture_ready_fence, 1);
EndScratch(scratch); EndScratch(scratch);
} }
@ -274,7 +225,6 @@ JobDef(SPR_LoadSheet, sig, _)
} }
} }
sheet->loaded = 1;
SetFence(&entry->sheet_ready_fence, 1); SetFence(&entry->sheet_ready_fence, 1);
EndScratch(scratch); EndScratch(scratch);
} }

View File

@ -17,8 +17,7 @@ Struct(SPR_SliceKey)
Struct(SPR_Texture) Struct(SPR_Texture)
{ {
b32 valid; b32 valid;
b32 loaded; GpuTexture gpu_texture;
GPU_Resource *gpu_texture;
u32 width; u32 width;
u32 height; u32 height;
}; };
@ -86,7 +85,6 @@ Struct(SPR_SliceGroupBin)
Struct(SPR_Sheet) Struct(SPR_Sheet)
{ {
b32 valid; b32 valid;
b32 loaded;
Vec2 image_size; Vec2 image_size;
Vec2 frame_size; Vec2 frame_size;

View File

@ -676,59 +676,32 @@ i64 UI_EndFrame(UI_Frame frame)
UI_State *g = &UI_state; UI_State *g = &UI_state;
UI_EFrameState old_eframe = g->eframe; UI_EFrameState old_eframe = g->eframe;
Vec2I32 monitor_size = frame.window_frame.monitor_size;
Vec2I32 draw_size = frame.window_frame.draw_size;
Rng2 draw_viewport = ZI;
draw_viewport.p1 = Vec2FromFields(draw_size);
////////////////////////////// //////////////////////////////
//- Reset state //- Reset state
{ {
ZeroStruct(&g->eframe); ZeroStruct(&g->eframe);
g->eframe.layout_arena = old_eframe.layout_arena; g->eframe.layout_arena = old_eframe.layout_arena;
g->eframe.rects_arena = old_eframe.rects_arena; g->eframe.drects_gpu_arena = old_eframe.drects_gpu_arena;
g->eframe.draw_target = old_eframe.draw_target; g->eframe.draw_target = old_eframe.draw_target;
g->eframe.swapchain = old_eframe.swapchain; g->eframe.swapchain = old_eframe.swapchain;
g->eframe.gpu_submit_fence_target = old_eframe.gpu_submit_fence_target; g->eframe.gpu_submit_fence_target = old_eframe.gpu_submit_fence_target;
g->eframe.draw_rects_tbuff = old_eframe.draw_rects_tbuff;
g->eframe.tick = old_eframe.tick; g->eframe.tick = old_eframe.tick;
} }
if (!g->eframe.layout_arena) if (!g->eframe.layout_arena)
{ {
g->eframe.layout_arena = AcquireArena(Gibi(64)); g->eframe.layout_arena = AcquireArena(Gibi(64));
g->eframe.rects_arena = AcquireArena(Gibi(64)); g->eframe.tex_gpu_arena = GPU_AcquireTextureArena();
g->eframe.draw_rects_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(UI_DRect)); g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16));
g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16));
} }
ResetArena(g->eframe.layout_arena); ResetArena(g->eframe.layout_arena);
ResetArena(g->eframe.rects_arena);
//////////////////////////////
//- Init render state
Vec2I32 monitor_size = frame.window_frame.monitor_size;
GPU_QueueKind gpu_render_queue = GPU_QueueKind_Direct;
Fence *submit_fence = GPU_FenceFromQueue(gpu_render_queue);
/* Acquire render target */
if (g->eframe.draw_target && !MatchVec2I32(monitor_size, GPU_GetTextureSize2D(g->eframe.draw_target)))
{
__profn("Release ui render target");
YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target);
GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None);
g->eframe.draw_target = 0;
}
if (!g->eframe.draw_target)
{
__profn("Acquire ui render target");
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable;
// desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
desc.texture.format = GPU_Format_R16G16B16A16_Float;
desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1);
g->eframe.draw_target = GPU_AcquireResource(desc);
}
Vec2I32 draw_size = frame.window_frame.draw_size;
Rng2 draw_viewport = ZI;
draw_viewport.p1 = Vec2FromFields(draw_size);
////////////////////////////// //////////////////////////////
//- Process commands //- Process commands
@ -1255,11 +1228,32 @@ i64 UI_EndFrame(UI_Frame frame)
} }
} }
//////////////////////////////
//- Render
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
{
////////////////////////////// //////////////////////////////
//- Build render data //- Build render data
GPU_QueueKind render_queue = GPU_QueueKind_Direct; /* Acquire render target */
Fence *render_fence = GPU_FenceFromQueue(render_queue); if (!g->eframe.draw_target || !MatchVec2I32(monitor_size, GPU_Count2D(g->eframe.draw_target)))
{
YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target);
GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None);
g->eframe.draw_target = 0;
}
if (!g->eframe.draw_target)
{
__profn("Acquire ui render target");
GPU_ResourceDesc desc = ZI;
desc.kind = GPU_ResourceKind_Texture2D;
desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable;
// desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
desc.texture.format = GPU_Format_R16G16B16A16_Float;
desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1);
g->eframe.draw_target = GPU_AcquireResource(desc);
}
/* Build rect instance data */ /* Build rect instance data */
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index) for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
@ -1274,7 +1268,7 @@ i64 UI_EndFrame(UI_Frame frame)
/* Box rect */ /* Box rect */
{ {
UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect); UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect);
rect->flags |= UI_DRectFlag_DrawTexture * !!(box->raw_texture != 0); rect->flags |= UI_DRectFlag_DrawTexture * !(IsGpuPointerNil(box->raw_texture));
rect->p0 = box->p0; rect->p0 = box->p0;
rect->p1 = box->p1; rect->p1 = box->p1;
rect->tex_uv0 = VEC2(0, 0); rect->tex_uv0 = VEC2(0, 0);
@ -1290,9 +1284,9 @@ i64 UI_EndFrame(UI_Frame frame)
rect->bl_rounding = box->rounding_bl; rect->bl_rounding = box->rounding_bl;
/* Texture */ /* Texture */
if (box->raw_texture != 0) if (!IsGpuPointerNil(box->raw_texture))
{ {
rect->tex = GPU_Texture2DRidFromResource(box->raw_texture); rect->tex = box->raw_texture;
rect->tex_uv0 = box->raw_texture_uv0; rect->tex_uv0 = box->raw_texture_uv0;
rect->tex_uv1 = box->raw_texture_uv1; rect->tex_uv1 = box->raw_texture_uv1;
} }
@ -1426,14 +1420,12 @@ i64 UI_EndFrame(UI_Frame frame)
} }
////////////////////////////// //////////////////////////////
//- Render //- Dispatch shaders
/* Upload transient buffers */ /* Upload transient buffers */
GPU_Resource *draw_rects_buffer = GPU_UploadTransientBufferFromArena(&g->eframe.draw_rects_tbuff, g->eframe.rects_arena); GPU_Resource *draw_rects_buffer = GPU_UploadTransientBufferFromArena(&g->eframe.draw_rects_tbuff, g->eframe.rects_arena);
u32 draw_rects_count = GPU_GetBufferCount(draw_rects_buffer); u32 draw_rects_count = GPU_GetBufferCount(draw_rects_buffer);
/* Build command list */
GPU_CommandList *cl = GPU_BeginCommandList(render_queue);
{ {
//- Prep rect pass //- Prep rect pass
{ {
@ -1489,11 +1481,13 @@ i64 UI_EndFrame(UI_Frame frame)
} }
} }
} }
g->eframe.gpu_submit_fence_target = GPU_EndCommandList(cl);
/* Reset render data */ /* Reset */
GPU_ResetTransientBuffer(&g->eframe.draw_rects_tbuff, g->eframe.gpu_submit_fence_target); GPU_ResetArena(cl, g->eframe.frame_gpu_arena);
ResetArena(g->eframe.rects_arena); GPU_ResetArena(cl, g->eframe.drects_gpu_arena);
}
GPU_CloseCommandList(cl);
////////////////////////////// //////////////////////////////
//- Present & end frame //- Present & end frame
@ -1508,12 +1502,11 @@ i64 UI_EndFrame(UI_Frame frame)
Vec2I32 dst_p1 = VEC2I32(0, 0); Vec2I32 dst_p1 = VEC2I32(0, 0);
Vec2I32 src_p0 = VEC2I32(0, 0); Vec2I32 src_p0 = VEC2I32(0, 0);
Vec2I32 src_p1 = draw_size; Vec2I32 src_p1 = draw_size;
g->eframe.gpu_submit_fence_target = GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color)); GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color));
} }
WND_EndFrame(frame.window_frame); WND_EndFrame(frame.window_frame);
++g->eframe.tick; ++g->eframe.tick;
EndScratch(scratch); EndScratch(scratch);
return g->eframe.gpu_submit_fence_target;
} }

View File

@ -109,7 +109,7 @@ Enum(UI_BoxFlag)
X(Font, ResourceKey) \ X(Font, ResourceKey) \
X(FontSize, u32) \ X(FontSize, u32) \
X(Text, String) \ X(Text, String) \
X(BackgroundTexture, GPU_Resource *) \ X(BackgroundTexture, GpuPointer) \
X(BackgroundTextureUv0, Vec2) \ X(BackgroundTextureUv0, Vec2) \
X(BackgroundTextureUv1, Vec2) \ X(BackgroundTextureUv1, Vec2) \
/* --------------------------------------- */ \ /* --------------------------------------- */ \
@ -268,7 +268,7 @@ Struct(UI_Box)
//- Cmd data //- Cmd data
UI_BoxDesc desc; UI_BoxDesc desc;
GPU_Resource *raw_texture; GpuPointer raw_texture;
Vec2 raw_texture_uv0; Vec2 raw_texture_uv0;
Vec2 raw_texture_uv1; Vec2 raw_texture_uv1;
@ -372,14 +372,14 @@ Struct(UI_State)
struct UI_EFrameState struct UI_EFrameState
{ {
Arena *layout_arena; Arena *layout_arena;
Arena *rects_arena; GPU_Arena *frame_gpu_arena;
GPU_Arena *drects_gpu_arena;
u64 tick; u64 tick;
/* Render */ /* Render */
GPU_Resource *draw_target; GpuPointer draw_target;
GPU_Swapchain *swapchain; GPU_Swapchain *swapchain;
i64 gpu_submit_fence_target; i64 gpu_submit_fence_target;
GPU_TransientBuffer draw_rects_tbuff;
/* Layout */ /* Layout */
UI_Box *root_box; UI_Box *root_box;
@ -493,4 +493,4 @@ Vec2 UI_CursorPos(void);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ End frame //~ End frame
i64 UI_EndFrame(UI_Frame frame); void UI_EndFrame(UI_Frame frame);