gpu layer refactor progress
This commit is contained in:
parent
4b0a12bc20
commit
4a8eacbcda
@ -762,6 +762,10 @@ Struct(ResourceKey)
|
||||
|
||||
#if LanguageIsC
|
||||
|
||||
Struct(GpuPointer) { u32 v; };
|
||||
Struct(GpuBufferPos) { GpuPointer p; u64 byte_offset; };
|
||||
|
||||
Inline b32 IsGpuPointerNil(GpuPointer p) { return p.v == 0; }
|
||||
|
||||
Struct(VertexShader) { ResourceKey resource; };
|
||||
Struct(PixelShader) { ResourceKey resource; };
|
||||
@ -780,16 +784,9 @@ Struct(SamplerStateRid) { u32 v; };
|
||||
|
||||
#elif LanguageIsGpu
|
||||
|
||||
//- Resource descriptor index types
|
||||
typedef uint StructuredBufferRid;
|
||||
typedef uint RWStructuredBufferRid;
|
||||
typedef uint Texture1DRid;
|
||||
typedef uint Texture2DRid;
|
||||
typedef uint Texture3DRid;
|
||||
typedef uint RWTexture1DRid;
|
||||
typedef uint RWTexture2DRid;
|
||||
typedef uint RWTexture3DRid;
|
||||
typedef uint SamplerStateRid;
|
||||
typedef u32 GpuBuffer;
|
||||
typedef u32 GpuTexture;
|
||||
typedef u32 GpuSampler;
|
||||
|
||||
//- Shader declaration
|
||||
# define ComputeShader(name, x, y, z) [numthreads(x, y, z)] void name(Semantic(Vec3U32, SV_DispatchThreadID))
|
||||
|
||||
@ -1142,12 +1142,6 @@ u32 U32FromVec4(Vec4 v)
|
||||
return result;
|
||||
}
|
||||
|
||||
PackedVec4 PackVec4(Vec4 v)
|
||||
{
|
||||
PackedVec4 result = ZI;
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Xform operations
|
||||
|
||||
|
||||
@ -20,60 +20,104 @@ Enum(Axis)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Vector types
|
||||
|
||||
#define VEC2(x, y) (Vec2) { (x), (y) }
|
||||
#define VEC3(x, y, z) (Vec3) { (x), (y), (z) }
|
||||
#define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) }
|
||||
|
||||
#define VEC2I32(x, y) (Vec2I32) { (x), (y) }
|
||||
#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) }
|
||||
#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) }
|
||||
|
||||
#define VEC2U32(x, y) (Vec2U32) { (x), (y) }
|
||||
#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) }
|
||||
#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) }
|
||||
//- Vec2
|
||||
|
||||
Struct(Vec2) { f32 x, y; };
|
||||
Struct(Vec3) { f32 x, y, z; };
|
||||
Struct(Vec4) { f32 x, y, z, w; };
|
||||
|
||||
Struct(Vec2I32) { i32 x, y; };
|
||||
Struct(Vec2I64) { i64 x, y; };
|
||||
Struct(Vec2U32) { i32 x, y; };
|
||||
Struct(Vec2U64) { u64 x, y; };
|
||||
|
||||
Struct(Vec2Array) { Vec2 *points; u64 count; };
|
||||
|
||||
#define VEC2(x, y) (Vec2) { (x), (y) }
|
||||
#define VEC2I32(x, y) (Vec2I32) { (x), (y) }
|
||||
#define VEC2I64(x, y) (Vec2I64) { (x), (y) }
|
||||
#define VEC2U32(x, y) (Vec2U32) { (x), (y) }
|
||||
#define VEC2U64(x, y) (Vec2U64) { (x), (y) }
|
||||
|
||||
#define Vec2FromVec(v) VEC2((v).x, (v).y)
|
||||
|
||||
//- Vec3
|
||||
|
||||
Struct(Vec3) { f32 x, y, z; };
|
||||
Struct(Vec3I32) { i32 x, y, z; };
|
||||
Struct(Vec3I64) { i64 x, y, z; };
|
||||
Struct(Vec3U32) { i32 x, y, z; };
|
||||
Struct(Vec3U64) { u64 x, y, z; };
|
||||
|
||||
Struct(Vec3Array) { Vec3 *points; u64 count; };
|
||||
|
||||
#define VEC3(x, y, z) (Vec3) { (x), (y), (z) }
|
||||
#define VEC3I32(x, y, z) (Vec3I32) { (x), (y), (z) }
|
||||
#define VEC3I64(x, y, z) (Vec3I64) { (x), (y), (z) }
|
||||
#define VEC3U32(x, y, z) (Vec3U32) { (x), (y), (z) }
|
||||
#define VEC3U64(x, y, z) (Vec3U64) { (x), (y), (z) }
|
||||
|
||||
#define Vec3FromVec(v) VEC3((v).x, (v).y, (v).z)
|
||||
|
||||
//- Vec4
|
||||
|
||||
Struct(Vec4) { f32 x, y, z, w; };
|
||||
Struct(Vec4I32) { i32 x, y, z, w; };
|
||||
Struct(Vec4I64) { i64 x, y, z, w; };
|
||||
Struct(Vec4U32) { i32 x, y, z, w; };
|
||||
Struct(Vec4U64) { u64 x, y, z, w; };
|
||||
|
||||
Struct(Vec2U32) { u32 x, y; };
|
||||
Struct(Vec3U32) { u32 x, y, z; };
|
||||
Struct(Vec4U32) { u32 x, y, z, w; };
|
||||
Struct(Vec4Array) { Vec4 *points; u64 count; };
|
||||
|
||||
Struct(PackedVec4) { u32 hi; u32 lo; };
|
||||
#define VEC4(x, y, z, w) (Vec4) { (x), (y), (z), (w) }
|
||||
#define VEC4I32(x, y, z, w) (Vec4I32) { (x), (y), (z), (w) }
|
||||
#define VEC4I64(x, y, z, w) (Vec4I64) { (x), (y), (z), (w) }
|
||||
#define VEC4U32(x, y, z, w) (Vec4U32) { (x), (y), (z), (w) }
|
||||
#define VEC4U64(x, y, z, w) (Vec4U64) { (x), (y), (z), (w) }
|
||||
|
||||
Struct(Vec2Array)
|
||||
{
|
||||
Vec2 *points;
|
||||
u64 count;
|
||||
};
|
||||
|
||||
Struct(Vec3Array)
|
||||
{
|
||||
Vec3 *points;
|
||||
u64 count;
|
||||
};
|
||||
|
||||
Struct(Vec4Array)
|
||||
{
|
||||
Vec4 *points;
|
||||
u64 count;
|
||||
};
|
||||
#define Vec4FromVec(v) VEC4((v).x, (v).y, (v).z, (v).w)
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Range types
|
||||
|
||||
#define RNG2(p0, p1) (Rng2) { (p0), (p1) }
|
||||
#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) }
|
||||
#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) }
|
||||
//- Rng1
|
||||
|
||||
Struct(Rng) { f32 min; f32 max; };
|
||||
Struct(RngI32) { i32 min; i32 max; };
|
||||
Struct(RngI64) { i64 min; i64 max; };
|
||||
Struct(RngU32) { u32 min; u32 max; };
|
||||
Struct(RngU64) { u64 min; u64 max; };
|
||||
|
||||
#define RNG(min, max) (Rng) { (min), (max) }
|
||||
#define RNGI32(min, max) (RngI32) { (min), (max) }
|
||||
#define RNGI64(min, max) (RngI64) { (min), (max) }
|
||||
#define RNGU32(min, max) (RngU32) { (min), (max) }
|
||||
#define RNGU64(min, max) (RngU64) { (min), (max) }
|
||||
|
||||
//- Rng2
|
||||
|
||||
Struct(Rng2) { Vec2 p0; Vec2 p1; };
|
||||
Struct(Rng2I32) { Vec2I32 p0; Vec2I32 p1; };
|
||||
Struct(Rng2I64) { Vec2I64 p0; Vec2I64 p1; };
|
||||
Struct(Rng2U32) { Vec2U32 p0; Vec2U32 p1; };
|
||||
Struct(Rng2U64) { Vec2U64 p0; Vec2U64 p1; };
|
||||
|
||||
#define RNG2(p0, p1) (Rng2) { (p0), (p1) }
|
||||
#define RNG2I32(p0, p1) (Rng2I32) { (p0), (p1) }
|
||||
#define RNG2I64(p0, p1) (Rng2I64) { (p0), (p1) }
|
||||
#define RNG2U32(p0, p1) (Rng2U32) { (p0), (p1) }
|
||||
#define RNG2U64(p0, p1) (Rng2U64) { (p0), (p1) }
|
||||
|
||||
//- Rng3
|
||||
|
||||
Struct(Rng3) { Vec3 p0; Vec3 p1; };
|
||||
Struct(Rng3I32) { Vec3I32 p0; Vec3I32 p1; };
|
||||
Struct(Rng3I64) { Vec3I64 p0; Vec3I64 p1; };
|
||||
Struct(Rng3U32) { Vec3U32 p0; Vec3U32 p1; };
|
||||
Struct(Rng3U64) { Vec3U64 p0; Vec3U64 p1; };
|
||||
|
||||
#define RNG3(p0, p1) (Rng3) { (p0), (p1) }
|
||||
#define RNG3I32(p0, p1) (Rng3I32) { (p0), (p1) }
|
||||
#define RNG3I64(p0, p1) (Rng3I64) { (p0), (p1) }
|
||||
#define RNG3U32(p0, p1) (Rng3U32) { (p0), (p1) }
|
||||
#define RNG3U64(p0, p1) (Rng3U64) { (p0), (p1) }
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Xform types
|
||||
@ -245,8 +289,6 @@ Vec4 BlendSrgb(Vec4 v0, Vec4 v1, f32 t);
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Vec2 operations
|
||||
|
||||
#define Vec2FromFields(v) VEC2((v).x, (v).y)
|
||||
|
||||
b32 IsVec2Zero(Vec2 a);
|
||||
b32 MatchVec2(Vec2 a, Vec2 b);
|
||||
|
||||
@ -315,7 +357,6 @@ Vec2I32 SubVec2I32(Vec2I32 a, Vec2I32 b);
|
||||
|
||||
Vec4 Vec4FromU32(u32 v);
|
||||
u32 U32FromVec4(Vec4 v);
|
||||
PackedVec4 PackVec4(Vec4 v);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Xform operations
|
||||
|
||||
@ -17,7 +17,6 @@ typedef int4 Vec4I32;
|
||||
typedef uint2 Vec2U32;
|
||||
typedef uint3 Vec3U32;
|
||||
typedef uint4 Vec4U32;
|
||||
typedef uint2 PackedVec4;
|
||||
typedef float2x3 Xform;
|
||||
typedef float4 Rect;
|
||||
typedef float4 ClipRect;
|
||||
@ -25,19 +24,6 @@ typedef float4 Aabb;
|
||||
typedef float4 Quad;
|
||||
typedef float4x4 Mat4x4;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Color helpers
|
||||
|
||||
Vec4 Vec4FromU32(u32 v)
|
||||
{
|
||||
Vec4 result;
|
||||
result.r = ((v >> 0) & 0xFF) / 255.0;
|
||||
result.g = ((v >> 8) & 0xFF) / 255.0;
|
||||
result.b = ((v >> 16) & 0xFF) / 255.0;
|
||||
result.a = ((v >> 24) & 0xFF) / 255.0;
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Vertex ID helpers
|
||||
|
||||
|
||||
@ -48,71 +48,37 @@ JobDef(F_Load, sig, _)
|
||||
}
|
||||
TTF_Decoded decoded = TTF_Decode(scratch.arena, resource_data, em_size, font_codes, countof(font_codes));
|
||||
|
||||
/* Send texture to GPU */
|
||||
GPU_Resource *texture = 0;
|
||||
/* Upload texture to GPU */
|
||||
Fence completion_fence = ZI;
|
||||
{
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_BackgroundCopy);
|
||||
GPU_Arena *gpu_temp = GPU_AcquireArena();
|
||||
{
|
||||
GpuTexture gpu_texture = ZI;
|
||||
{
|
||||
GPU_Arena *gpu_perm = GPU_Perm();
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture2D;
|
||||
desc.flags = GPU_ResourceFlag_None;
|
||||
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
|
||||
desc.texture.size = VEC3I32(decoded.image_width, decoded.image_height, 1);
|
||||
texture = GPU_AcquireResource(desc);
|
||||
|
||||
/* Fill upload buffer */
|
||||
GPU_ResourceDesc upload_desc = ZI;
|
||||
upload_desc.kind = GPU_ResourceKind_Buffer;
|
||||
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||
upload_desc.buffer.count = GPU_GetFootprintSize(texture);
|
||||
GPU_Resource *upload = GPU_AcquireResource(upload_desc);
|
||||
{
|
||||
GPU_Mapped mapped = GPU_Map(upload);
|
||||
GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.image_pixels, texture);
|
||||
GPU_Unmap(mapped);
|
||||
gpu_texture = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, desc);
|
||||
}
|
||||
|
||||
GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy;
|
||||
GPU_QueueKind direct_queue = GPU_QueueKind_Direct;
|
||||
Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue);
|
||||
i64 direct_queue_fence_target = 0;
|
||||
if (copy_queue == direct_queue)
|
||||
texture->gpu_texture = gpu_texture;
|
||||
texture->width = decoded.width;
|
||||
texture->height = decoded.height;
|
||||
GpuBuffer src_buff = GPU_PushBuffer(gpu_temp, GPU_GetFootprintSize(gpu_texture), GPU_BufferFlag_CpuWritable);
|
||||
GpuAddress src_addr = ZI;
|
||||
{
|
||||
/* Copy & transition GPU resource on direct queue*/
|
||||
{
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
|
||||
{
|
||||
GPU_TransitionToCopyDst(cl, texture);
|
||||
GPU_CopyResource(cl, texture, upload);
|
||||
GPU_TransitionToReadable(cl, texture);
|
||||
u32 *p = GPU_PushStructsNoZero(src_buff, u32, decoded.width * decoded.height);
|
||||
CopyStructs(p, decoded.pixels, decoded.width * decoded.heigth);
|
||||
GPU_TransitionBufferToCopySrc(src_buff);
|
||||
GPU_TransitionTextureToCopyDst(gpu_texture);
|
||||
GPU_CopyBytesToFootprint(gpu_texture, src_buff, src_addr, decoded.width * decoded.height * 4);
|
||||
GPU_TransitionTextureToReadonly(gpu_texture);
|
||||
}
|
||||
direct_queue_fence_target = GPU_EndCommandList(cl);
|
||||
GPU_SetFence(&completion_fence, 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Copy to GPU resource on background copy queue*/
|
||||
i64 copy_queue_fence_target = 0;
|
||||
{
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(copy_queue);
|
||||
{
|
||||
GPU_TransitionToCopyDst(cl, texture);
|
||||
GPU_CopyResource(cl, texture, upload);
|
||||
}
|
||||
copy_queue_fence_target = GPU_EndCommandList(cl);
|
||||
}
|
||||
/* Once copy finishes, transition resource to readable on direct queue */
|
||||
{
|
||||
GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target);
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
|
||||
{
|
||||
GPU_TransitionToReadable(cl, texture);
|
||||
}
|
||||
direct_queue_fence_target = GPU_EndCommandList(cl);
|
||||
}
|
||||
}
|
||||
|
||||
/* Release upload buffer once transition finishes */
|
||||
YieldOnFence(direct_queue_fence, direct_queue_fence_target);
|
||||
GPU_ReleaseResource(upload, GPU_ReleaseFlag_None);
|
||||
GPU_ReleaseArena(gpu_temp);
|
||||
GPU_EndCommandList(cl);
|
||||
}
|
||||
|
||||
/* Acquire store memory */
|
||||
@ -155,6 +121,8 @@ JobDef(F_Load, sig, _)
|
||||
font->lookup[codepoint] = decoded.cache_indices[i];
|
||||
}
|
||||
|
||||
YieldOnFence(&completion_fence, 1);
|
||||
|
||||
LogSuccessF("Loaded font \"%F\" (font size: %F, em size: %F) in %F seconds", FmtString(name), FmtFloat((f64)font_size), FmtFloat((f64)em_size), FmtFloat(SecondsFromNs(TimeNs() - start_ns)));
|
||||
AC_MarkReady(asset, font);
|
||||
|
||||
|
||||
@ -19,4 +19,4 @@
|
||||
|
||||
//- Startup
|
||||
@Startup GPU_Startup
|
||||
@Startup GPU_StartupUtils
|
||||
@Startup GPU_StartupCommon
|
||||
|
||||
@ -3,227 +3,76 @@ GPU_SharedUtilState GPU_shared_util_state = ZI;
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Startup
|
||||
|
||||
void GPU_StartupUtils(void)
|
||||
void GPU_StartupCommon(void)
|
||||
{
|
||||
GPU_SharedUtilState *g = &GPU_shared_util_state;
|
||||
|
||||
GPU_QueueKind queue_kind = GPU_QueueKind_Direct;
|
||||
Fence *queue_fence = GPU_FenceFromQueue(queue_kind);
|
||||
i64 queue_fence_target = FetchFence(queue_fence);
|
||||
GPU_Arena *gpu_perm = GPU_Perm();
|
||||
|
||||
GPU_Resource *noise_upload = 0;
|
||||
GPU_Resource *quad_upload = 0;
|
||||
GPU_Resource *noise = 0;
|
||||
GPU_Resource *quad = 0;
|
||||
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(queue_kind);
|
||||
/* Upload data to gpu */
|
||||
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
/* Upload noise */
|
||||
/* Init noise texture */
|
||||
String noise_data = DataFromResource(ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat")));
|
||||
Vec3I32 noise_dims = VEC3I32(128, 128, 64);
|
||||
GpuPointer noise_tex = ZI;
|
||||
{
|
||||
Vec3I32 noise_size = VEC3I32(128, 128, 64);
|
||||
ResourceKey noise_resource = ResourceKeyFromStore(&GPU_Resources, Lit("noise_128x128x64_16.dat"));
|
||||
String noise_res_data = DataFromResource(noise_resource);
|
||||
if (noise_res_data.len != noise_size.x * noise_size.y * noise_size.z * 2)
|
||||
GPU_TextureDesc noise_desc = ZI;
|
||||
noise_desc.format = GPU_Format_R16_Uint;
|
||||
noise_desc.size = noise_dims;
|
||||
if (noise_data.len != noise_dims.x * noise_dims.y * noise_dims.z * 2)
|
||||
{
|
||||
Panic(Lit("Unexpected noise texture size"));
|
||||
}
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture3D;
|
||||
desc.texture.format = GPU_Format_R16_Uint;
|
||||
desc.texture.size = noise_size;
|
||||
noise = GPU_AcquireResource(desc);
|
||||
{
|
||||
u64 footprint_size = GPU_GetFootprintSize(noise);
|
||||
GPU_ResourceDesc upload_desc = ZI;
|
||||
upload_desc.kind = GPU_ResourceKind_Buffer;
|
||||
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||
upload_desc.buffer.count = footprint_size;
|
||||
noise_upload = GPU_AcquireResource(upload_desc);
|
||||
GPU_Mapped mapped = GPU_Map(noise_upload);
|
||||
GPU_CopyBytesToFootprint(mapped.mem, noise_res_data.text, noise);
|
||||
GPU_Unmap(mapped);
|
||||
}
|
||||
GPU_TransitionToCopyDst(cl, noise);
|
||||
GPU_CopyResource(cl, noise, noise_upload);
|
||||
GPU_TransitionToReadable(cl, noise);
|
||||
noise_tex = GPU_PushTexture(gpu_perm, GPU_TextureKind_2D, GPU_Format_R16_Uint, noise_dims, GPU_TextureFlag_None);
|
||||
GPU_CopyFromCpu(cl, noise_tex, noise_data);
|
||||
}
|
||||
g->noise_tex = noise_tex;
|
||||
|
||||
/* Upload quad indices */
|
||||
/* Init quad index buffer */
|
||||
GpuPointer quad_indices = ZI;
|
||||
{
|
||||
u16 quad_indices[6] = { 0, 1, 2, 0, 2, 3 };
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Buffer;
|
||||
desc.buffer.count = countof(quad_indices);
|
||||
desc.buffer.stride = sizeof(quad_indices[0]);
|
||||
quad = GPU_AcquireResource(desc);
|
||||
{
|
||||
GPU_ResourceDesc upload_desc = ZI;
|
||||
upload_desc.kind = GPU_ResourceKind_Buffer;
|
||||
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||
upload_desc.buffer.count = desc.buffer.count * desc.buffer.stride;
|
||||
quad_upload = GPU_AcquireResource(upload_desc);
|
||||
GPU_Mapped mapped = GPU_Map(quad_upload);
|
||||
CopyBytes(mapped.mem, quad_indices, sizeof(quad_indices));
|
||||
GPU_Unmap(mapped);
|
||||
u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
|
||||
quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data), GPU_BufferFlag_None);
|
||||
GPU_CopyFromCpu(cl, quad_indices, StringFromArray(quad_data));
|
||||
}
|
||||
GPU_TransitionToCopyDst(cl, quad);
|
||||
GPU_CopyResource(cl, quad, quad_upload);
|
||||
GPU_TransitionToReadable(cl, quad);
|
||||
g->quad_indices = quad_indices;
|
||||
}
|
||||
}
|
||||
queue_fence_target = GPU_EndCommandList(cl);
|
||||
GPU_CloseCommandList(cl);
|
||||
|
||||
/* Init point sampler */
|
||||
g->pt_sampler = GPU_AcquireResource((GPU_ResourceDesc) { .kind = GPU_ResourceKind_Sampler, .sampler.filter = GPU_Filter_MinMagMipPoint });
|
||||
g->pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
|
||||
}
|
||||
|
||||
/* Wait & cleanup */
|
||||
YieldOnFence(queue_fence, queue_fence_target);
|
||||
GPU_ReleaseResource(noise_upload, GPU_ReleaseFlag_None);
|
||||
GPU_ReleaseResource(quad_upload, GPU_ReleaseFlag_None);
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Arena helpers
|
||||
|
||||
g->noise = noise;
|
||||
g->quad_indices = quad;
|
||||
GPU_Arena *GPU_Perm(void)
|
||||
{
|
||||
i16 fiber_id = FiberId();
|
||||
GPU_Arena *perm = GPU_shared_util_state.perm_arenas[fiber_id];
|
||||
if (!perm)
|
||||
{
|
||||
GPU_shared_util_state.perm_arenas[fiber_id] = GPU_AcquireArena();
|
||||
perm = GPU_shared_util_state.perm_arenas[fiber_id];
|
||||
}
|
||||
return perm;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Common resource helpers
|
||||
|
||||
GPU_Resource *GPU_GetCommonPointSampler(void)
|
||||
GpuPointer GPU_GetCommonPointSampler(void)
|
||||
{
|
||||
return GPU_shared_util_state.pt_sampler;
|
||||
}
|
||||
|
||||
GPU_Resource *GPU_GetCommonQuadIndices(void)
|
||||
GpuPointer GPU_GetCommonQuadIndices(void)
|
||||
{
|
||||
return GPU_shared_util_state.quad_indices;
|
||||
}
|
||||
|
||||
GPU_Resource *GPU_GetCommonNoise(void)
|
||||
GpuPointer GPU_GetCommonNoise(void)
|
||||
{
|
||||
return GPU_shared_util_state.noise;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Transient buffer operations
|
||||
|
||||
GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size)
|
||||
{
|
||||
GPU_TransientBuffer tbuff = ZI;
|
||||
tbuff.element_size = MaxU32(element_size, 1);
|
||||
tbuff.queue_kind = queue_kind;
|
||||
return tbuff;
|
||||
}
|
||||
|
||||
void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff)
|
||||
{
|
||||
GPU_SharedUtilState *g = &GPU_shared_util_state;
|
||||
|
||||
Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind);
|
||||
i64 queue_fence_value = FetchFence(queue_fence);
|
||||
YieldOnFence(queue_fence, queue_fence_value);
|
||||
|
||||
if (tbuff->first_submitted)
|
||||
{
|
||||
for (GPU_SubmittedResourceNode *submitted = tbuff->first_submitted;
|
||||
submitted;
|
||||
submitted = submitted->next)
|
||||
{
|
||||
GPU_ReleaseResource(submitted->resource, GPU_ReleaseFlag_None);
|
||||
}
|
||||
|
||||
Lock lock = LockE(&g->submitted_transient_buffers_mutex);
|
||||
{
|
||||
tbuff->last_submitted->next = g->first_free_submitted_transient_buffer;
|
||||
g->first_free_submitted_transient_buffer = tbuff->first_submitted;
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
}
|
||||
|
||||
GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size)
|
||||
{
|
||||
GPU_SharedUtilState *g = &GPU_shared_util_state;
|
||||
GPU_Resource *resource = 0;
|
||||
u64 element_count = src_size / tbuff->element_size;
|
||||
|
||||
Fence *queue_fence = GPU_FenceFromQueue(tbuff->queue_kind);
|
||||
i64 queue_fence_value = FetchFence(queue_fence);
|
||||
|
||||
if (tbuff->uploaded != 0)
|
||||
{
|
||||
Panic(Lit("GPU transient buffer uploaded without a reset"));
|
||||
}
|
||||
|
||||
/* Grab resource node */
|
||||
GPU_SubmittedResourceNode *upload = 0;
|
||||
{
|
||||
if (tbuff->first_submitted && tbuff->first_submitted->fence_target <= queue_fence_value)
|
||||
{
|
||||
upload = tbuff->first_submitted;
|
||||
SllQueuePop(tbuff->first_submitted, tbuff->last_submitted);
|
||||
}
|
||||
if (!upload)
|
||||
{
|
||||
Lock lock = LockE(&g->submitted_transient_buffers_mutex);
|
||||
{
|
||||
upload = g->first_free_submitted_transient_buffer;
|
||||
if (upload)
|
||||
{
|
||||
g->first_free_submitted_transient_buffer = upload->next;
|
||||
SllStackPop(g->first_free_submitted_transient_buffer);
|
||||
}
|
||||
}
|
||||
Unlock(&lock);
|
||||
}
|
||||
if (!upload)
|
||||
{
|
||||
Arena *perm = PermArena();
|
||||
upload = PushStruct(perm, GPU_SubmittedResourceNode);
|
||||
}
|
||||
}
|
||||
|
||||
/* Create gpu resource */
|
||||
{
|
||||
if (upload->resource)
|
||||
{
|
||||
GPU_ReleaseResource(upload->resource, GPU_ReleaseFlag_Reuse);
|
||||
upload->resource = 0;
|
||||
}
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Buffer;
|
||||
desc.flags = GPU_ResourceFlag_None;
|
||||
desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||
desc.buffer.count = element_count;
|
||||
desc.buffer.stride = tbuff->element_size;
|
||||
upload->resource = GPU_AcquireResource(desc);
|
||||
}
|
||||
|
||||
/* Fill gpu resource */
|
||||
{
|
||||
__profn("Copy to transfer buffer");
|
||||
GPU_Mapped m = GPU_Map(upload->resource);
|
||||
CopyBytes(m.mem, src, src_size);
|
||||
GPU_Unmap(m);
|
||||
}
|
||||
|
||||
tbuff->uploaded = upload;
|
||||
return upload->resource;
|
||||
}
|
||||
|
||||
GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena)
|
||||
{
|
||||
u32 element_count = arena->pos / tbuff->element_size;
|
||||
GPU_Resource *result = GPU_UploadTransientBuffer(tbuff, ArenaFirst(arena, u8), tbuff->element_size * element_count);
|
||||
return result;
|
||||
}
|
||||
|
||||
void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target)
|
||||
{
|
||||
GPU_SubmittedResourceNode *uploaded = tbuff->uploaded;
|
||||
if (uploaded)
|
||||
{
|
||||
uploaded->fence_target = queue_fence_target;
|
||||
SllQueuePush(tbuff->first_submitted, tbuff->last_submitted, uploaded);
|
||||
tbuff->uploaded = 0;
|
||||
}
|
||||
return GPU_shared_util_state.noise_tex;
|
||||
}
|
||||
|
||||
@ -1,62 +1,29 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Transient buffer types
|
||||
|
||||
Struct(GPU_SubmittedResourceNode)
|
||||
{
|
||||
GPU_SubmittedResourceNode *next;
|
||||
|
||||
/* Set during transient upload */
|
||||
GPU_Resource *resource;
|
||||
|
||||
/* Set during transient reset */
|
||||
i64 fence_target; /* Once the buffer's queue reaches the target, the resource can be freed or reused */
|
||||
};
|
||||
|
||||
Struct(GPU_TransientBuffer)
|
||||
{
|
||||
GPU_QueueKind queue_kind;
|
||||
u32 element_size;
|
||||
|
||||
GPU_SubmittedResourceNode *uploaded;
|
||||
GPU_SubmittedResourceNode *first_submitted;
|
||||
GPU_SubmittedResourceNode *last_submitted;
|
||||
u32 max_in_flight;
|
||||
};
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ State types
|
||||
|
||||
Struct(GPU_SharedUtilState)
|
||||
{
|
||||
/* Common shared resources */
|
||||
GPU_Resource *pt_sampler;
|
||||
GPU_Resource *quad_indices;
|
||||
GPU_Resource *noise;
|
||||
GpuPointer pt_sampler;
|
||||
GpuPointer quad_indices;
|
||||
GpuPointer noise_tex;
|
||||
|
||||
/* Transient buffer pool */
|
||||
Mutex submitted_transient_buffers_mutex;
|
||||
GPU_SubmittedResourceNode *first_free_submitted_transient_buffer;
|
||||
GPU_Arena *perm_arenas[MaxFibers];
|
||||
} extern GPU_shared_util_state;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Startup
|
||||
|
||||
void GPU_StartupUtils(void);
|
||||
void GPU_StartupCommon(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Arena helpers
|
||||
|
||||
GPU_Arena *GPU_Perm(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Common resource helpers
|
||||
|
||||
GPU_Resource *GPU_GetCommonPointSampler(void);
|
||||
GPU_Resource *GPU_GetCommonQuadIndices(void);
|
||||
GPU_Resource *GPU_GetCommonNoise(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Transient buffer operations
|
||||
|
||||
GPU_TransientBuffer GPU_AcquireTransientBuffer(GPU_QueueKind queue_kind, u32 element_size);
|
||||
void GPU_ReleaseTransientBuffer(GPU_TransientBuffer *tbuff);
|
||||
|
||||
GPU_Resource *GPU_UploadTransientBuffer(GPU_TransientBuffer *tbuff, void *src, u64 src_size);
|
||||
GPU_Resource *GPU_UploadTransientBufferFromArena(GPU_TransientBuffer *tbuff, Arena *arena);
|
||||
void GPU_ResetTransientBuffer(GPU_TransientBuffer *tbuff, i64 queue_fence_target);
|
||||
GpuPointer GPU_GetCommonPointSampler(void);
|
||||
GpuPointer GPU_GetCommonQuadIndices(void);
|
||||
GpuPointer GPU_GetCommonNoise(void);
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Opaque types
|
||||
|
||||
Struct(GPU_Resource);
|
||||
Struct(GPU_Arena);
|
||||
Struct(GPU_CommandList);
|
||||
Struct(GPU_Swapchain);
|
||||
|
||||
@ -158,7 +158,63 @@ Enum(GPU_Format)
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Filter types
|
||||
//~ Shader access types
|
||||
|
||||
Enum(GPU_ShaderAccessKind)
|
||||
{
|
||||
GPU_ShaderAccessKind_Readonly, /* Default state for all resources */
|
||||
GPU_ShaderAccessKind_ReadWrite,
|
||||
GPU_ShaderAccessKind_RasterTarget,
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Arena types
|
||||
|
||||
Struct(GPU_TempArena)
|
||||
{
|
||||
GPU_Arena *arena;
|
||||
u64 start_pos;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Buffer types
|
||||
|
||||
Enum(GPU_BufferFlag)
|
||||
{
|
||||
GPU_BufferFlag_None = 0,
|
||||
GPU_BufferFlag_Writable = (1 << 0),
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Texture types
|
||||
|
||||
#define GPU_MaxRasterTargets 8
|
||||
|
||||
Enum(GPU_TextureFlag)
|
||||
{
|
||||
GPU_TextureFlag_None = 0,
|
||||
GPU_TextureFlag_Writable = (1 << 0),
|
||||
GPU_TextureFlag_Rasterizable = (1 << 1),
|
||||
};
|
||||
|
||||
Enum(GPU_TextureKind)
|
||||
{
|
||||
GPU_TextureKind_1D,
|
||||
GPU_TextureKind_2D,
|
||||
GPU_TextureKind_3D,
|
||||
};
|
||||
|
||||
Struct(GPU_TextureDesc)
|
||||
{
|
||||
GPU_TextureFlag flags;
|
||||
GPU_Format format;
|
||||
Vec3I32 size;
|
||||
Vec4 clear_color;
|
||||
i32 mip_levels;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Sampler types
|
||||
|
||||
/* NOTE: Matches DirectX D3D12_FILTER */
|
||||
Enum(GPU_Filter)
|
||||
@ -236,67 +292,7 @@ Enum(GPU_ComparisonFunc)
|
||||
GPU_ComparisonFunc_Always = 8
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Resource types
|
||||
|
||||
#define GPU_MaxRenderTargets 8
|
||||
|
||||
Enum(GPU_ResourceKind)
|
||||
{
|
||||
GPU_ResourceKind_Unknown,
|
||||
GPU_ResourceKind_Buffer,
|
||||
GPU_ResourceKind_Texture1D,
|
||||
GPU_ResourceKind_Texture2D,
|
||||
GPU_ResourceKind_Texture3D,
|
||||
GPU_ResourceKind_Sampler
|
||||
};
|
||||
|
||||
Enum(GPU_ResourceFlag)
|
||||
{
|
||||
GPU_ResourceFlag_None = 0,
|
||||
GPU_ResourceFlag_Writable = (1 << 0),
|
||||
GPU_ResourceFlag_Renderable = (1 << 1),
|
||||
GPU_ResourceFlag_MaxMipLevels = (1 << 2),
|
||||
GPU_ResourceFlag_Zeroed = (1 << 3),
|
||||
};
|
||||
|
||||
Enum(GPU_HeapKind)
|
||||
{
|
||||
GPU_HeapKind_Default,
|
||||
GPU_HeapKind_Upload,
|
||||
GPU_HeapKind_Download
|
||||
};
|
||||
|
||||
Enum(GPU_ReleaseFlag)
|
||||
{
|
||||
GPU_ReleaseFlag_None = 0,
|
||||
|
||||
/* Hints to the GPU layer that more resources using a similar desc will
|
||||
* be allocated soon, so the resource's memory should be kept around for
|
||||
* re-use. */
|
||||
GPU_ReleaseFlag_Reuse = (1 << 0)
|
||||
};
|
||||
|
||||
Struct(GPU_ResourceDesc)
|
||||
{
|
||||
GPU_ResourceKind kind;
|
||||
GPU_ResourceFlag flags;
|
||||
Vec4 clear_color;
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
GPU_Format format;
|
||||
Vec3I32 size;
|
||||
i32 mip_levels; /* Defaults to 1 (unless GPU_ResourceFlag_MaxMipLevels is set) */
|
||||
} texture;
|
||||
struct
|
||||
{
|
||||
GPU_HeapKind heap_kind;
|
||||
u32 count;
|
||||
u32 stride; /* Defaults to 1 */
|
||||
} buffer;
|
||||
struct
|
||||
Struct(GPU_SamplerDesc)
|
||||
{
|
||||
GPU_Filter filter;
|
||||
GPU_AddressMode x;
|
||||
@ -308,47 +304,21 @@ Struct(GPU_ResourceDesc)
|
||||
Vec4 border_color;
|
||||
f32 min_lod;
|
||||
f32 max_lod;
|
||||
} sampler;
|
||||
};
|
||||
};
|
||||
|
||||
Struct(GPU_Mapped)
|
||||
{
|
||||
GPU_Resource *resource;
|
||||
void *mem;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Rasterizer types
|
||||
//~ Rasterization types
|
||||
|
||||
Enum(GPU_RasterizeMode)
|
||||
Enum(GPU_RasterMode)
|
||||
{
|
||||
GPU_RasterizeMode_None,
|
||||
GPU_RasterizeMode_PointList,
|
||||
GPU_RasterizeMode_LineList,
|
||||
GPU_RasterizeMode_LineStrip,
|
||||
GPU_RasterizeMode_TriangleList,
|
||||
GPU_RasterizeMode_WireTriangleList,
|
||||
GPU_RasterizeMode_TriangleStrip,
|
||||
GPU_RasterizeMode_WireTriangleStrip,
|
||||
};
|
||||
|
||||
Struct(GPU_Viewport)
|
||||
{
|
||||
f32 top_left_x;
|
||||
f32 top_left_y;
|
||||
f32 width;
|
||||
f32 height;
|
||||
f32 min_depth;
|
||||
f32 max_depth;
|
||||
};
|
||||
|
||||
Struct(GPU_Scissor)
|
||||
{
|
||||
f32 left;
|
||||
f32 top;
|
||||
f32 right;
|
||||
f32 bottom;
|
||||
GPU_RasterMode_None,
|
||||
GPU_RasterMode_PointList,
|
||||
GPU_RasterMode_LineList,
|
||||
GPU_RasterMode_LineStrip,
|
||||
GPU_RasterMode_TriangleList,
|
||||
GPU_RasterMode_WireTriangleList,
|
||||
GPU_RasterMode_TriangleStrip,
|
||||
GPU_RasterMode_WireTriangleStrip,
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -373,105 +343,66 @@ Struct(GPU_Stats)
|
||||
void GPU_Startup(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Fence operations
|
||||
//~ @hookdecl Arenas
|
||||
|
||||
Fence *GPU_FenceFromQueue(GPU_QueueKind queue);
|
||||
void GPU_QueueWait(GPU_QueueKind a, GPU_QueueKind b, i64 b_target_fence_value); /* Tells queue A Forces `waiting_queue` to wait until `target_queue`'s fence reaches the specified value */
|
||||
GPU_Arena *GPU_AcquireArena(void);
|
||||
void GPU_ReleaseArena(GPU_Arena *arena);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Rasterizer helpers
|
||||
//~ @hookdecl Resource creation
|
||||
|
||||
GPU_Viewport GPU_ViewportFromRect(Rng2 rect);
|
||||
GPU_Scissor GPU_ScissorFromRect(Rng2 rect);
|
||||
GpuPointer GPU_PushBufferEx(GPU_Arena *arena, i32 element_size, i32 element_align, i32 element_count, GPU_BufferFlag flags);
|
||||
#define GPU_PushBuffer(arena, type, count, flags) GPU_PushBufferEx((arena), sizeof(type), alignof(type), (count), (flags))
|
||||
|
||||
GpuPointer GPU_PushTextureEx(GPU_Arena *arena, GPU_TextureDesc desc);
|
||||
GpuPointer GPU_PushTexture(GPU_Arena *arena, GPU_TextureKind kind, GPU_Format format, Vec3I32 size, GPU_TextureFlag flags);
|
||||
|
||||
GpuPointer GPU_PushSampler(GPU_Arena *arena, GPU_SamplerDesc desc);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Resource operations
|
||||
//~ @hookdecl Commands
|
||||
|
||||
GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc);
|
||||
void GPU_ReleaseResource(GPU_Resource *resource, GPU_ReleaseFlag flags);
|
||||
//- Command list creation
|
||||
GPU_CommandList *GPU_OpenCommandList(GPU_QueueKind queue);
|
||||
void GPU_CloseCommandList(GPU_CommandList *cl);
|
||||
|
||||
Vec2I32 GPU_GetTextureSize2D(GPU_Resource *resource);
|
||||
Vec3I32 GPU_GetTextureSize3D(GPU_Resource *resource);
|
||||
u64 GPU_GetFootprintSize(GPU_Resource *resource);
|
||||
//- Cpu -> Gpu
|
||||
void GPU_CopyBytesFromCpu(GPU_CommandList *cl, GpuPointer dst, RngU64 dst_range, void *src);
|
||||
void GPU_CopyTexelsFromCpu(GPU_CommandList *cl, GpuPointer dst, Rng3U64 dst_range, void *src);
|
||||
void GPU_CopyFromCpu(GPU_CommandList *cl, GpuPointer dst, String src);
|
||||
|
||||
u64 GPU_GetBufferCount(GPU_Resource *gpu_resource);
|
||||
//- Gpu -> Cpu
|
||||
void GPU_AddCpuFence(GPU_CommandList *cl, Fence *fence, i64 v);
|
||||
void GPU_SetCpuFence(GPU_CommandList *cl, Fence *fence, i64 v);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Resource index operations
|
||||
//- Implicit state
|
||||
void GPU_SetShaderAccess(GPU_CommandList *cl, GpuPointer ptr, GPU_ShaderAccessKind access_kind);
|
||||
void GPU_SetRasterizeMode(GPU_CommandList *cl, GPU_RasterMode mode);
|
||||
void GPU_SetConstantU32(GPU_CommandList *cl, i32 slot, u32 v);
|
||||
void GPU_SetConstantF32(GPU_CommandList *cl, i32 slot, f32 v);
|
||||
void GPU_SetConstantPtr(GPU_CommandList *cl, i32 slot, GpuPointer v);
|
||||
|
||||
StructuredBufferRid GPU_StructuredBufferRidFromResource(GPU_Resource *resource);
|
||||
RWStructuredBufferRid GPU_RWStructuredBufferRidFromResource(GPU_Resource *resource);
|
||||
Texture1DRid GPU_Texture1DRidFromResource(GPU_Resource *resource);
|
||||
Texture2DRid GPU_Texture2DRidFromResource(GPU_Resource *resource);
|
||||
Texture3DRid GPU_Texture3DRidFromResource(GPU_Resource *resource);
|
||||
RWTexture1DRid GPU_RWTexture1DRidFromResource(GPU_Resource *resource);
|
||||
RWTexture2DRid GPU_RWTexture2DRidFromResource(GPU_Resource *resource);
|
||||
RWTexture3DRid GPU_RWTexture3DRidFromResource(GPU_Resource *resource);
|
||||
SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource);
|
||||
//- Clear
|
||||
void GPU_ClearRasterTarget(GPU_CommandList *cl, GpuPointer target);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Command list operations
|
||||
//- Compute
|
||||
void GPU_Compute(GPU_CommandList *cl, ComputeShader cs, Vec3U32 threads);
|
||||
|
||||
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue);
|
||||
i64 GPU_EndCommandList(GPU_CommandList *cl); /* Returns the value that the queue's fence will be set to once the command is completed */
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Profiling helpers
|
||||
|
||||
void GPU_ProfN(GPU_CommandList *cl, String name);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Barrier operations
|
||||
|
||||
void GPU_TransitionToReadable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read via read-only types in shaders */
|
||||
void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be read/written to via read-write types in shader */
|
||||
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot); /* Allows the resource to be used as a render target bound at slot */
|
||||
|
||||
void GPU_TransitionToCopySrc(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a source in copy operations */
|
||||
void GPU_TransitionToCopyDst(GPU_CommandList *cl, GPU_Resource *resource); /* Allows the resource to be used as a destination in copy operations */
|
||||
|
||||
void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource); /* Waits until writes to a shader writable resource have completed */
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Dispatch operations
|
||||
|
||||
void GPU_ClearRenderable(GPU_CommandList *cl, GPU_Resource *resource);
|
||||
|
||||
#define GPU_Rasterize(cl, sig_ptr, vs, ps, rts_count, viewport, scissor, instances_count, index_buffer, mode) \
|
||||
GPU_Rasterize_((cl), sizeof(*(sig_ptr)), (sig_ptr), (vs), (ps), (rts_count), (viewport), (scissor), (instances_count), (index_buffer), (mode))
|
||||
|
||||
#define GPU_Compute(cl, sig_ptr, cs, threads) GPU_Compute_((cl), sizeof(*(sig_ptr)), (sig_ptr), (cs), (threads))
|
||||
|
||||
void GPU_Rasterize_(GPU_CommandList *cl,
|
||||
u32 sig_size,
|
||||
void *sig,
|
||||
VertexShader vs,
|
||||
PixelShader ps,
|
||||
u32 rts_count,
|
||||
GPU_Viewport viewport,
|
||||
GPU_Scissor scissor,
|
||||
//- Rasterize
|
||||
void GPU_RasterizeEx(GPU_CommandList *cl,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count,
|
||||
GPU_Resource *index_buffer,
|
||||
GPU_RasterizeMode mode);
|
||||
GpuPointer idx_buff, RngU64 idx_buff_range,
|
||||
u32 raster_targets_count, GpuPointer *raster_targets,
|
||||
Rng3 viewport, Rng2 scissor);
|
||||
|
||||
void GPU_Compute_(GPU_CommandList *cl,
|
||||
u32 sig_size,
|
||||
void *sig,
|
||||
ComputeShader cs,
|
||||
Vec3U32 threads);
|
||||
void GPU_Rasterize(GPU_CommandList *cl,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, GpuPointer idx_buff,
|
||||
u32 raster_targets_count, GpuPointer *raster_targets);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Resource copy operations
|
||||
|
||||
void GPU_CopyResource(GPU_CommandList *cl, GPU_Resource *dst, GPU_Resource *src);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Map operations
|
||||
|
||||
GPU_Mapped GPU_Map(GPU_Resource *r);
|
||||
void GPU_Unmap(GPU_Mapped mapped);
|
||||
|
||||
void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_reference);
|
||||
//- Profiling
|
||||
void GPU_ProfN(GPU_CommandList *cl, String name);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Statistics
|
||||
@ -479,7 +410,7 @@ void GPU_CopyBytesToFootprint(void *dst, void *src, GPU_Resource *footprint_refe
|
||||
GPU_Stats GPU_QueryStats(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Swapchain available_to_reserve
|
||||
//~ @hookdecl Swapchain
|
||||
|
||||
GPU_Swapchain *GPU_AcquireSwapchain(WND_Handle window, GPU_Format format, Vec2I32 size);
|
||||
void GPU_ReleaseSwapchain(GPU_Swapchain *swapchain);
|
||||
@ -492,4 +423,4 @@ void GPU_YieldOnSwapchain(GPU_Swapchain *swapchain);
|
||||
* 2. Blits `texture` into position `dst` in the backbuffer
|
||||
* 3. Presents the backbuffer
|
||||
* 4. Returns the value that the Direct queue fence will reach once GPU completes blitting (`texture` shouldn't be released while blit is in flight) */
|
||||
i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GPU_Resource *texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color);
|
||||
i64 GPU_PresentSwapchain(GPU_Swapchain *swapchain, GpuPointer texture, i32 vsync, Vec2I32 backbuffer_size, Vec2I32 dst_p0, Vec2I32 dst_p1, Vec2I32 src_p0, Vec2I32 src_p1, Vec4 clear_color);
|
||||
|
||||
@ -195,6 +195,11 @@ void GPU_D12_InitDevice(void)
|
||||
first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description);
|
||||
}
|
||||
{
|
||||
/* TODO: Verify feature support:
|
||||
* - HighestShaderModel >= D3D_SHADER_MODEL_6_6
|
||||
* - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
|
||||
* - EnhancedBarriersSupported == 1
|
||||
*/
|
||||
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
|
||||
}
|
||||
if (SUCCEEDED(hr) && !skip)
|
||||
@ -890,7 +895,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
||||
: desc.buffer.heap_kind == GPU_HeapKind_Download ? D3D12_HEAP_TYPE_READBACK
|
||||
: D3D12_HEAP_TYPE_DEFAULT
|
||||
};
|
||||
Assert(!(desc.flags & GPU_ResourceFlag_Renderable));
|
||||
Assert(!(desc.flags & GPU_ResourceFlag_Rasterizable));
|
||||
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
||||
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
@ -940,7 +945,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
||||
d3d_desc.SampleDesc.Count = 1;
|
||||
d3d_desc.SampleDesc.Quality = 0;
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_Writable);
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Renderable);
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_Rasterizable);
|
||||
r->state = D3D12_RESOURCE_STATE_COMMON;
|
||||
D3D12_CLEAR_VALUE clear_value = { .Format = d3d_desc.Format, .Color = { 0 } };
|
||||
clear_value.Color[0] = desc.clear_color.x;
|
||||
@ -1007,7 +1012,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
||||
}
|
||||
|
||||
/* Create rtv descriptor */
|
||||
if (desc.flags & GPU_ResourceFlag_Renderable)
|
||||
if (desc.flags & GPU_ResourceFlag_Rasterizable)
|
||||
{
|
||||
if (!r->rtv_descriptor->valid)
|
||||
{
|
||||
@ -1183,7 +1188,7 @@ SamplerStateRid GPU_SamplerStateRidFromResource(GPU_Resource *resource)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdef Command list hooks
|
||||
|
||||
GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind)
|
||||
GPU_CommandList *GPU_OpenCommnadList(GPU_QueueKind queue_kind)
|
||||
{
|
||||
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
|
||||
Arena *perm = PermArena();
|
||||
@ -1201,7 +1206,7 @@ GPU_CommandList *GPU_BeginCommandList(GPU_QueueKind queue_kind)
|
||||
return (GPU_CommandList *)cl;
|
||||
}
|
||||
|
||||
i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
|
||||
i64 GPU_CloseCommandList(GPU_CommandList *gpu_cl)
|
||||
{
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_D12_FiberState *f = GPU_D12_FiberStateFromId(FiberId());
|
||||
@ -1210,8 +1215,8 @@ i64 GPU_EndCommandList(GPU_CommandList *gpu_cl)
|
||||
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
||||
TempArena scratch = BeginScratchNoConflict();
|
||||
|
||||
GPU_D12_Resource *slotted_render_targets[GPU_MaxRenderTargets] = ZI;
|
||||
GPU_D12_Resource *bound_render_targets[GPU_MaxRenderTargets] = ZI;
|
||||
GPU_D12_Resource *slotted_render_targets[GPU_MaxRasterTargets] = ZI;
|
||||
GPU_D12_Resource *bound_render_targets[GPU_MaxRasterTargets] = ZI;
|
||||
|
||||
/* Begin dx12 command list */
|
||||
GPU_D12_RawCommandList *dx12_cl = GPU_D12_BeginRawCommandList(queue_kind);
|
||||
@ -1699,7 +1704,7 @@ void GPU_TransitionToWritable(GPU_CommandList *cl, GPU_Resource *resource)
|
||||
cmd->barrier.resource = (GPU_D12_Resource *)resource;
|
||||
}
|
||||
|
||||
void GPU_TransitionToRenderable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot)
|
||||
void GPU_TransitionToRasterizable(GPU_CommandList *cl, GPU_Resource *resource, i32 slot)
|
||||
{
|
||||
GPU_D12_Command *cmd = GPU_D12_PushCmd((GPU_D12_CommandList *)cl);
|
||||
cmd->kind = GPU_D12_CommandKind_TransitionToRtv;
|
||||
@ -1731,7 +1736,7 @@ void GPU_FlushWritable(GPU_CommandList *cl, GPU_Resource *resource)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdef Dispatch hooks
|
||||
|
||||
void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
|
||||
void GPU_ClearRasterizable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
|
||||
{
|
||||
GPU_D12_CommandList *cl = (GPU_D12_CommandList *)gpu_cl;
|
||||
GPU_D12_Command *cmd = GPU_D12_PushCmd(cl);
|
||||
@ -1739,14 +1744,12 @@ void GPU_ClearRenderable(GPU_CommandList *gpu_cl, GPU_Resource *resource)
|
||||
cmd->clear.resource = (GPU_D12_Resource *)resource;
|
||||
}
|
||||
|
||||
void GPU_Rasterize_(GPU_CommandList *gpu_cl,
|
||||
void GPU_RasterizeEx(GPU_CommandList *gpu_cl,
|
||||
u32 sig_size,
|
||||
void *sig,
|
||||
VertexShader vs,
|
||||
PixelShader ps,
|
||||
u32 rts_count,
|
||||
GPU_Viewport viewport,
|
||||
GPU_Scissor scissor,
|
||||
u32 instances_count,
|
||||
GPU_Resource *index_buffer,
|
||||
GPU_RasterizeMode mode)
|
||||
@ -1760,7 +1763,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl,
|
||||
cmd->rasterize.vs = vs;
|
||||
cmd->rasterize.ps = ps;
|
||||
cmd->rasterize.rts_count = rts_count;
|
||||
Assert(rts_count < GPU_MaxRenderTargets);
|
||||
Assert(rts_count < GPU_MaxRasterTargets);
|
||||
cmd->rasterize.viewport = viewport;
|
||||
cmd->rasterize.scissor = scissor;
|
||||
cmd->rasterize.instances_count = instances_count;
|
||||
@ -1768,7 +1771,7 @@ void GPU_Rasterize_(GPU_CommandList *gpu_cl,
|
||||
cmd->rasterize.mode = mode;
|
||||
}
|
||||
|
||||
void GPU_Compute_(GPU_CommandList *gpu_cl,
|
||||
void GPU_ComputeEx(GPU_CommandList *gpu_cl,
|
||||
u32 sig_size,
|
||||
void *sig,
|
||||
ComputeShader cs,
|
||||
|
||||
@ -34,7 +34,7 @@ Struct(GPU_D12_PipelineDesc)
|
||||
ComputeShader cs;
|
||||
b32 is_wireframe;
|
||||
D3D12_PRIMITIVE_TOPOLOGY_TYPE topology_type;
|
||||
GPU_Format render_target_formats[GPU_MaxRenderTargets];
|
||||
GPU_Format render_target_formats[GPU_MaxRasterTargets];
|
||||
};
|
||||
|
||||
Struct(GPU_D12_Pipeline)
|
||||
|
||||
@ -29,21 +29,19 @@ JobDef(V_VisWorker, _, __)
|
||||
{
|
||||
V_SharedState *vis_shared = &V_shared_state;
|
||||
S_SharedState *sim_shared = &S_shared_state;
|
||||
|
||||
Arena *frame_arena = AcquireArena(Gibi(64));
|
||||
Arena *perm = PermArena();
|
||||
|
||||
GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
|
||||
|
||||
//////////////////////////////
|
||||
//- State
|
||||
|
||||
Fence *gpu_fence = GPU_FenceFromQueue(GPU_QueueKind_Direct);
|
||||
i64 gpu_fence_target = 0;
|
||||
i64 frame_gen = 0;
|
||||
GPU_Resource *draw_target = 0;
|
||||
|
||||
Arena *dverts_arena = AcquireArena(Gibi(64));
|
||||
Arena *dvert_idx_arena = AcquireArena(Gibi(64));
|
||||
GPU_TransientBuffer dverts_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(V_DVert));
|
||||
GPU_TransientBuffer dvert_idx_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(i32));
|
||||
GPU_Texture *draw_target = 0;
|
||||
|
||||
Struct(Persist)
|
||||
{
|
||||
@ -510,9 +508,22 @@ JobDef(V_VisWorker, _, __)
|
||||
}
|
||||
UnlockTicketMutex(&sim_shared->input_back_tm);
|
||||
|
||||
//////////////////////////////
|
||||
//- Render
|
||||
|
||||
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
//////////////////////////////
|
||||
//- Build render data
|
||||
|
||||
GPU_ResetArena(cl, gpu_frame_arena);
|
||||
ResetArena(dverts_arena);
|
||||
ResetArena(dvert_idxs_arena);
|
||||
|
||||
/* Build shape buffers */
|
||||
GpuPointer dverts = ZI;
|
||||
GpuPointer dvert_idxs = ZI;
|
||||
{
|
||||
for (S_Ent *ent = S_FirstEnt(frame_arena, &iter, &lookup); ent->active; ent = S_NextEnt(frame_arena, &iter))
|
||||
{
|
||||
Xform ent_to_world_xf = ent->world_xf;
|
||||
@ -525,99 +536,76 @@ JobDef(V_VisWorker, _, __)
|
||||
Vec4 color = ent->tint;
|
||||
i32 detail = 32;
|
||||
S_Shape shape = S_MulXformShape(ent_to_draw_xf, ent->local_shape);
|
||||
V_DrawShape(dverts_arena, dvert_idx_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line);
|
||||
V_DrawShape(dverts_arena, dvert_idxs_arena, shape, LinearFromSrgb(color), detail, V_DrawFlag_Line);
|
||||
}
|
||||
}
|
||||
dverts = GPU_PushCpuStructsToArena(gpu_frame_arena, V_DVert, dverts_arena);
|
||||
dvert_idxs = GPU_PushCpuStructsToArena(gpu_frame_arena, i32, dvert_idxs_arena);
|
||||
}
|
||||
|
||||
/* Create draw state */
|
||||
if (!draw_target || !MatchVec2I32(draw_size, GPU_Count2D(draw_target)))
|
||||
{
|
||||
GPU_ResetArena(cl, gpu_arena);
|
||||
/* Draw target */
|
||||
{
|
||||
GPU_TextureDesc desc = ZI;
|
||||
desc.kind = GPU_TextureKind_Texture2D;
|
||||
desc.flags = GPU_TextureFlag_Writable | GPU_TextureFlag_Rasterizable;
|
||||
desc.format = GPU_Format_R16G16B16A16_Float;
|
||||
desc.size = VEC3I32(draw_size.x, draw_size.y, 1);
|
||||
desc.clear_color = LinearFromSrgb(swapchain_color);
|
||||
draw_target = GPU_PushTexture(gpu_arena, desc);
|
||||
}
|
||||
/* Draw params */
|
||||
draw_params = GPU_PushStructNoZero(gpu_arena, V_DParams);
|
||||
}
|
||||
|
||||
/* Build draw params */
|
||||
GpuPointer draw_params = ZI;
|
||||
{
|
||||
V_DParams params = ZI;
|
||||
params.world_to_draw_xf = world_to_draw_xf;
|
||||
GPU_CopyCpuStructToBuffer(draw_params, 0, ¶ms);
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Render
|
||||
//- Dispatch shaders
|
||||
|
||||
/* Acquire draw target */
|
||||
Rng2 viewport = RNG2(VEC2(0, 0), Vec2FromFields(draw_size));
|
||||
{
|
||||
if (draw_target && !MatchVec2I32(draw_size, GPU_GetTextureSize2D(draw_target)))
|
||||
{
|
||||
YieldOnFence(gpu_fence, gpu_fence_target);
|
||||
GPU_ReleaseResource(draw_target, GPU_ReleaseFlag_None);
|
||||
draw_target = 0;
|
||||
}
|
||||
if (!draw_target)
|
||||
{
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture2D;
|
||||
desc.flags = GPU_ResourceFlag_Writable | GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Zeroed;
|
||||
desc.texture.format = GPU_Format_R16G16B16A16_Float;
|
||||
desc.texture.size = VEC3I32(draw_size.x, draw_size.y, 1);
|
||||
desc.clear_color = LinearFromSrgb(swapchain_color);
|
||||
draw_target = GPU_AcquireResource(desc);
|
||||
}
|
||||
}
|
||||
|
||||
/* Upload transient buffers */
|
||||
GPU_Resource *dverts_buffer = GPU_UploadTransientBufferFromArena(&dverts_tbuff, dverts_arena);
|
||||
GPU_Resource *dvert_idx_buffer = GPU_UploadTransientBufferFromArena(&dvert_idx_tbuff, dvert_idx_arena);
|
||||
u64 dverts_count = GPU_GetBufferCount(dverts_buffer);
|
||||
u64 dvert_idx_count = GPU_GetBufferCount(dvert_idx_buffer);
|
||||
|
||||
GPU_Viewport viewport = GPU_ViewportFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size)));
|
||||
GPU_Scissor scissor = GPU_ScissorFromRect(RNG2(VEC2(0, 0), Vec2FromFields(draw_size)));
|
||||
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
/* Prep background pass */
|
||||
{
|
||||
GPU_TransitionToWritable(cl, draw_target);
|
||||
}
|
||||
GPU_SetConstantPtr(cl, V_DrawConst_Params, draw_params);
|
||||
GPU_SetConstantPtr(cl, V_DrawConst_FinalTarget, draw_target);
|
||||
GPU_SetConstantPtr(cl, V_DrawConst_Sampler, GPU_GetCommonPointSampler());
|
||||
GPU_SetConstantPtr(cl, V_DrawConst_DVerts, dverts);
|
||||
|
||||
/* Backdrop pass */
|
||||
{
|
||||
V_BackdropSig sig = ZI;
|
||||
sig.target_tex = GPU_RWTexture2DRidFromResource(draw_target);
|
||||
sig.target_size = draw_size;
|
||||
GPU_Compute(cl, &sig, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size));
|
||||
}
|
||||
|
||||
/* Prep shapes pass */
|
||||
{
|
||||
GPU_TransitionToRenderable(cl, draw_target, 0);
|
||||
GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_ReadWrite);
|
||||
GPU_Compute(cl, V_BackdropCS, V_BackdropCSThreadSizeFromTexSize(draw_size));
|
||||
}
|
||||
|
||||
/* Shapes pass */
|
||||
{
|
||||
V_DVertSig sig = ZI;
|
||||
sig.target_size = draw_size;
|
||||
sig.sampler = GPU_SamplerStateRidFromResource(GPU_GetCommonPointSampler());
|
||||
sig.verts = GPU_StructuredBufferRidFromResource(dverts_buffer);
|
||||
GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_RasterTarget);
|
||||
GPU_Rasterize(cl,
|
||||
&sig,
|
||||
V_DVertVS, V_DVertPS,
|
||||
1,
|
||||
viewport,
|
||||
scissor,
|
||||
1,
|
||||
dvert_idx_buffer,
|
||||
1, dvert_idxs_buffer,
|
||||
1, draw_target,
|
||||
viewport, viewport,
|
||||
GPU_RasterizeMode_TriangleList);
|
||||
}
|
||||
|
||||
/* Transition draw target for UI composition */
|
||||
{
|
||||
GPU_TransitionToReadable(cl, draw_target);
|
||||
GPU_SetShaderAccess(cl, draw_target, GPU_ShaderAccessKind_Readonly);
|
||||
}
|
||||
}
|
||||
gpu_fence_target = GPU_EndCommandList(cl);
|
||||
|
||||
/* Reset transient buffers */
|
||||
{
|
||||
GPU_ResetTransientBuffer(&dverts_tbuff, gpu_fence_target);
|
||||
GPU_ResetTransientBuffer(&dvert_idx_tbuff, gpu_fence_target);
|
||||
ResetArena(dverts_arena);
|
||||
ResetArena(dvert_idx_arena);
|
||||
}
|
||||
GPU_CloseCommandLiist(cl);
|
||||
|
||||
//////////////////////////////
|
||||
//- End vis frame
|
||||
|
||||
UI_SetRawTexture(vis_box, draw_target, VEC2(0, 0), VEC2(1, 1));
|
||||
gpu_fence_target = UI_EndFrame(ui_frame);
|
||||
UI_EndFrame(ui_frame);
|
||||
|
||||
++frame_gen;
|
||||
shutdown = Atomic32Fetch(&vis_shared->shutdown);
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shape helpers
|
||||
|
||||
void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags)
|
||||
void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags)
|
||||
{
|
||||
if (flags & V_DrawFlag_Line)
|
||||
{
|
||||
@ -14,10 +14,10 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
|
||||
i32 lines_count = verts_count == 2 ? 1 : verts_count;
|
||||
i32 line_verts_count = lines_count * 4;
|
||||
i32 idx_count = lines_count * 6;
|
||||
i32 idx_offset = ArenaCount(verts_arena, V_DVert);
|
||||
i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert);
|
||||
|
||||
/* Push dverts */
|
||||
V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, line_verts_count);
|
||||
V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, line_verts_count);
|
||||
for (i32 line_idx = 0; line_idx < lines_count; ++line_idx)
|
||||
{
|
||||
i32 a_idx = line_idx;
|
||||
@ -46,7 +46,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
|
||||
}
|
||||
|
||||
/* Generate indices */
|
||||
i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count);
|
||||
i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count);
|
||||
for (i32 line_idx = 0; line_idx < lines_count; ++line_idx)
|
||||
{
|
||||
i32 indices_offset = line_idx * 6;
|
||||
@ -67,12 +67,12 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
|
||||
i32 verts_count = points.count;
|
||||
if (verts_count >= 3)
|
||||
{
|
||||
i32 idx_offset = ArenaCount(verts_arena, V_DVert);
|
||||
i32 idx_offset = GPU_ArenaCount(verts_gpu_arena, V_DVert);
|
||||
i32 tris_count = verts_count - 2;
|
||||
i32 idx_count = tris_count * 3;
|
||||
|
||||
/* Push dverts */
|
||||
V_DVert *dverts = PushStructsNoZero(verts_arena, V_DVert, verts_count);
|
||||
V_DVert *dverts = GPU_PushStructsNoZero(verts_gpu_arena, V_DVert, verts_count);
|
||||
for (i32 point_idx = 0; point_idx < (i32)points.count; ++point_idx)
|
||||
{
|
||||
V_DVert *dvert = &dverts[point_idx];
|
||||
@ -81,7 +81,7 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
|
||||
}
|
||||
|
||||
/* Generate indices in a fan pattern */
|
||||
i32 *indices = PushStructsNoZero(idx_arena, i32, idx_count);
|
||||
i32 *indices = PushStructsNoZero(idxs_gpu_arena, i32, idx_count);
|
||||
for (i32 i = 0; i < tris_count; ++i)
|
||||
{
|
||||
i32 tri_offset = i * 3;
|
||||
@ -93,14 +93,14 @@ void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 col
|
||||
}
|
||||
}
|
||||
|
||||
void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags)
|
||||
void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags)
|
||||
{
|
||||
if (shape.radius == 0)
|
||||
{
|
||||
Vec2Array draw_points = ZI;
|
||||
draw_points.points = shape.points;
|
||||
draw_points.count = shape.points_count;
|
||||
V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags);
|
||||
V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -116,7 +116,7 @@ void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color
|
||||
Vec2 sp = S_SupportPointFromShape(shape, dir);
|
||||
draw_points.points[i] = sp;
|
||||
}
|
||||
V_DrawPoly(verts_arena, idx_arena, draw_points, color_lin, flags);
|
||||
V_DrawPoly(verts_gpu_arena, idxs_gpu_arena, draw_points, color_lin, flags);
|
||||
}
|
||||
EndScratch(scratch);
|
||||
}
|
||||
|
||||
@ -10,5 +10,5 @@ Enum(V_DrawFlag)
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shape helpers
|
||||
|
||||
void V_DrawPoly(Arena *verts_arena, Arena *idx_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags);
|
||||
void V_DrawShape(Arena *verts_arena, Arena *idx_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags);
|
||||
void V_DrawPoly(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, Vec2Array points, Vec4 color_lin, V_DrawFlag flags);
|
||||
void V_DrawShape(GPU_Arena *verts_gpu_arena, GPU_Arena *idxs_gpu_arena, S_Shape shape, Vec4 color_lin, i32 detail, V_DrawFlag flags);
|
||||
|
||||
@ -20,76 +20,27 @@ JobDef(SPR_LoadTexture, sig, _)
|
||||
ASE_DecodedImage decoded = ASE_DecodeImage(scratch.arena, data);
|
||||
ok = decoded.ok;
|
||||
|
||||
/* Upload texture to gpu */
|
||||
if (ok)
|
||||
{
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture2D;
|
||||
desc.flags = GPU_ResourceFlag_None;
|
||||
desc.texture.format = GPU_Format_R8G8B8A8_Unorm_Srgb;
|
||||
desc.texture.size = VEC3I32(decoded.width, decoded.height, 1);
|
||||
texture->gpu_texture = GPU_AcquireResource(desc);
|
||||
GPU_Arena *gpu_perm = GPU_Perm();
|
||||
GpuPointer gpu_tex = GPU_PushTexture(gpu_perm,
|
||||
GPU_TextureKind_2D,
|
||||
GPU_Format_R8G8B8A8_Unorm_Srgb,
|
||||
VEC3I32(decoded.width, decoded.height, 1),
|
||||
GPU_TextureFlag_Allow);
|
||||
texture->gpu_texture = gpu_tex;
|
||||
texture->width = decoded.width;
|
||||
texture->height = decoded.height;
|
||||
|
||||
/* Fill upload buffer */
|
||||
GPU_ResourceDesc upload_desc = ZI;
|
||||
upload_desc.kind = GPU_ResourceKind_Buffer;
|
||||
upload_desc.buffer.heap_kind = GPU_HeapKind_Upload;
|
||||
upload_desc.buffer.count = GPU_GetFootprintSize(texture->gpu_texture);
|
||||
GPU_Resource *upload = GPU_AcquireResource(upload_desc);
|
||||
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_BackgroundCopy);
|
||||
{
|
||||
GPU_Mapped mapped = GPU_Map(upload);
|
||||
GPU_CopyBytesToFootprint(mapped.mem, (u8 *)decoded.pixels, texture->gpu_texture);
|
||||
GPU_Unmap(mapped);
|
||||
GPU_ReadCpu(cl, gpu_tex, decoded.data);
|
||||
GPU_SetCpuFence(cl, &entry->texture_ready_fence, 1);
|
||||
}
|
||||
GPU_EndCommandList(cl);
|
||||
}
|
||||
|
||||
GPU_QueueKind copy_queue = GPU_QueueKind_BackgroundCopy;
|
||||
GPU_QueueKind direct_queue = GPU_QueueKind_Direct;
|
||||
Fence *direct_queue_fence = GPU_FenceFromQueue(direct_queue);
|
||||
i64 direct_queue_fence_target = 0;
|
||||
if (copy_queue == direct_queue)
|
||||
{
|
||||
/* Copy & transition GPU resource on direct queue*/
|
||||
{
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
|
||||
{
|
||||
GPU_TransitionToCopyDst(cl, texture->gpu_texture);
|
||||
GPU_CopyResource(cl, texture->gpu_texture, upload);
|
||||
GPU_TransitionToReadable(cl, texture->gpu_texture);
|
||||
}
|
||||
direct_queue_fence_target = GPU_EndCommandList(cl);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Copy to GPU resource on background copy queue*/
|
||||
i64 copy_queue_fence_target = 0;
|
||||
{
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(copy_queue);
|
||||
{
|
||||
GPU_TransitionToCopyDst(cl, texture->gpu_texture);
|
||||
GPU_CopyResource(cl, texture->gpu_texture, upload);
|
||||
}
|
||||
copy_queue_fence_target = GPU_EndCommandList(cl);
|
||||
}
|
||||
/* Once copy finishes, transition resource to readable on direct queue */
|
||||
{
|
||||
GPU_QueueWait(direct_queue, copy_queue, copy_queue_fence_target);
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(direct_queue);
|
||||
{
|
||||
GPU_TransitionToReadable(cl, texture->gpu_texture);
|
||||
}
|
||||
direct_queue_fence_target = GPU_EndCommandList(cl);
|
||||
}
|
||||
}
|
||||
|
||||
/* Release upload buffer once transition finishes */
|
||||
YieldOnFence(direct_queue_fence, direct_queue_fence_target);
|
||||
GPU_ReleaseResource(upload, GPU_ReleaseFlag_None);
|
||||
}
|
||||
|
||||
texture->loaded = 1;
|
||||
SetFence(&entry->texture_ready_fence, 1);
|
||||
EndScratch(scratch);
|
||||
}
|
||||
|
||||
@ -274,7 +225,6 @@ JobDef(SPR_LoadSheet, sig, _)
|
||||
}
|
||||
}
|
||||
|
||||
sheet->loaded = 1;
|
||||
SetFence(&entry->sheet_ready_fence, 1);
|
||||
EndScratch(scratch);
|
||||
}
|
||||
|
||||
@ -17,8 +17,7 @@ Struct(SPR_SliceKey)
|
||||
Struct(SPR_Texture)
|
||||
{
|
||||
b32 valid;
|
||||
b32 loaded;
|
||||
GPU_Resource *gpu_texture;
|
||||
GpuTexture gpu_texture;
|
||||
u32 width;
|
||||
u32 height;
|
||||
};
|
||||
@ -86,7 +85,6 @@ Struct(SPR_SliceGroupBin)
|
||||
Struct(SPR_Sheet)
|
||||
{
|
||||
b32 valid;
|
||||
b32 loaded;
|
||||
Vec2 image_size;
|
||||
Vec2 frame_size;
|
||||
|
||||
|
||||
@ -676,59 +676,32 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
UI_State *g = &UI_state;
|
||||
UI_EFrameState old_eframe = g->eframe;
|
||||
|
||||
Vec2I32 monitor_size = frame.window_frame.monitor_size;
|
||||
|
||||
Vec2I32 draw_size = frame.window_frame.draw_size;
|
||||
Rng2 draw_viewport = ZI;
|
||||
draw_viewport.p1 = Vec2FromFields(draw_size);
|
||||
|
||||
//////////////////////////////
|
||||
//- Reset state
|
||||
|
||||
{
|
||||
ZeroStruct(&g->eframe);
|
||||
g->eframe.layout_arena = old_eframe.layout_arena;
|
||||
g->eframe.rects_arena = old_eframe.rects_arena;
|
||||
g->eframe.drects_gpu_arena = old_eframe.drects_gpu_arena;
|
||||
g->eframe.draw_target = old_eframe.draw_target;
|
||||
g->eframe.swapchain = old_eframe.swapchain;
|
||||
g->eframe.gpu_submit_fence_target = old_eframe.gpu_submit_fence_target;
|
||||
g->eframe.draw_rects_tbuff = old_eframe.draw_rects_tbuff;
|
||||
g->eframe.tick = old_eframe.tick;
|
||||
}
|
||||
if (!g->eframe.layout_arena)
|
||||
{
|
||||
g->eframe.layout_arena = AcquireArena(Gibi(64));
|
||||
g->eframe.rects_arena = AcquireArena(Gibi(64));
|
||||
g->eframe.draw_rects_tbuff = GPU_AcquireTransientBuffer(GPU_QueueKind_Direct, sizeof(UI_DRect));
|
||||
g->eframe.tex_gpu_arena = GPU_AcquireTextureArena();
|
||||
g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16));
|
||||
g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16));
|
||||
}
|
||||
ResetArena(g->eframe.layout_arena);
|
||||
ResetArena(g->eframe.rects_arena);
|
||||
|
||||
//////////////////////////////
|
||||
//- Init render state
|
||||
|
||||
Vec2I32 monitor_size = frame.window_frame.monitor_size;
|
||||
|
||||
GPU_QueueKind gpu_render_queue = GPU_QueueKind_Direct;
|
||||
Fence *submit_fence = GPU_FenceFromQueue(gpu_render_queue);
|
||||
|
||||
/* Acquire render target */
|
||||
if (g->eframe.draw_target && !MatchVec2I32(monitor_size, GPU_GetTextureSize2D(g->eframe.draw_target)))
|
||||
{
|
||||
__profn("Release ui render target");
|
||||
YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target);
|
||||
GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None);
|
||||
g->eframe.draw_target = 0;
|
||||
}
|
||||
if (!g->eframe.draw_target)
|
||||
{
|
||||
__profn("Acquire ui render target");
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture2D;
|
||||
desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable;
|
||||
// desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
|
||||
desc.texture.format = GPU_Format_R16G16B16A16_Float;
|
||||
desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1);
|
||||
g->eframe.draw_target = GPU_AcquireResource(desc);
|
||||
}
|
||||
|
||||
Vec2I32 draw_size = frame.window_frame.draw_size;
|
||||
Rng2 draw_viewport = ZI;
|
||||
draw_viewport.p1 = Vec2FromFields(draw_size);
|
||||
|
||||
//////////////////////////////
|
||||
//- Process commands
|
||||
@ -1255,11 +1228,32 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Render
|
||||
|
||||
GPU_CommandList *cl = GPU_OpenCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
//////////////////////////////
|
||||
//- Build render data
|
||||
|
||||
GPU_QueueKind render_queue = GPU_QueueKind_Direct;
|
||||
Fence *render_fence = GPU_FenceFromQueue(render_queue);
|
||||
/* Acquire render target */
|
||||
if (!g->eframe.draw_target || !MatchVec2I32(monitor_size, GPU_Count2D(g->eframe.draw_target)))
|
||||
{
|
||||
YieldOnFence(submit_fence, g->eframe.gpu_submit_fence_target);
|
||||
GPU_ReleaseResource(g->eframe.draw_target, GPU_ReleaseFlag_None);
|
||||
g->eframe.draw_target = 0;
|
||||
}
|
||||
if (!g->eframe.draw_target)
|
||||
{
|
||||
__profn("Acquire ui render target");
|
||||
GPU_ResourceDesc desc = ZI;
|
||||
desc.kind = GPU_ResourceKind_Texture2D;
|
||||
desc.flags = GPU_ResourceFlag_Renderable | GPU_ResourceFlag_Writable;
|
||||
// desc.texture.format = GPU_Format_R8G8B8A8_Unorm;
|
||||
desc.texture.format = GPU_Format_R16G16B16A16_Float;
|
||||
desc.texture.size = VEC3I32(monitor_size.x, monitor_size.y, 1);
|
||||
g->eframe.draw_target = GPU_AcquireResource(desc);
|
||||
}
|
||||
|
||||
/* Build rect instance data */
|
||||
for (u64 pre_index = 0; pre_index < boxes_count; ++pre_index)
|
||||
@ -1274,7 +1268,7 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
/* Box rect */
|
||||
{
|
||||
UI_DRect *rect = PushStruct(g->eframe.rects_arena, UI_DRect);
|
||||
rect->flags |= UI_DRectFlag_DrawTexture * !!(box->raw_texture != 0);
|
||||
rect->flags |= UI_DRectFlag_DrawTexture * !(IsGpuPointerNil(box->raw_texture));
|
||||
rect->p0 = box->p0;
|
||||
rect->p1 = box->p1;
|
||||
rect->tex_uv0 = VEC2(0, 0);
|
||||
@ -1290,9 +1284,9 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
rect->bl_rounding = box->rounding_bl;
|
||||
|
||||
/* Texture */
|
||||
if (box->raw_texture != 0)
|
||||
if (!IsGpuPointerNil(box->raw_texture))
|
||||
{
|
||||
rect->tex = GPU_Texture2DRidFromResource(box->raw_texture);
|
||||
rect->tex = box->raw_texture;
|
||||
rect->tex_uv0 = box->raw_texture_uv0;
|
||||
rect->tex_uv1 = box->raw_texture_uv1;
|
||||
}
|
||||
@ -1426,14 +1420,12 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Render
|
||||
//- Dispatch shaders
|
||||
|
||||
/* Upload transient buffers */
|
||||
GPU_Resource *draw_rects_buffer = GPU_UploadTransientBufferFromArena(&g->eframe.draw_rects_tbuff, g->eframe.rects_arena);
|
||||
u32 draw_rects_count = GPU_GetBufferCount(draw_rects_buffer);
|
||||
|
||||
/* Build command list */
|
||||
GPU_CommandList *cl = GPU_BeginCommandList(render_queue);
|
||||
{
|
||||
//- Prep rect pass
|
||||
{
|
||||
@ -1489,11 +1481,13 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
}
|
||||
}
|
||||
}
|
||||
g->eframe.gpu_submit_fence_target = GPU_EndCommandList(cl);
|
||||
|
||||
/* Reset render data */
|
||||
GPU_ResetTransientBuffer(&g->eframe.draw_rects_tbuff, g->eframe.gpu_submit_fence_target);
|
||||
ResetArena(g->eframe.rects_arena);
|
||||
/* Reset */
|
||||
GPU_ResetArena(cl, g->eframe.frame_gpu_arena);
|
||||
GPU_ResetArena(cl, g->eframe.drects_gpu_arena);
|
||||
}
|
||||
GPU_CloseCommandList(cl);
|
||||
|
||||
|
||||
//////////////////////////////
|
||||
//- Present & end frame
|
||||
@ -1508,12 +1502,11 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
Vec2I32 dst_p1 = VEC2I32(0, 0);
|
||||
Vec2I32 src_p0 = VEC2I32(0, 0);
|
||||
Vec2I32 src_p1 = draw_size;
|
||||
g->eframe.gpu_submit_fence_target = GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color));
|
||||
GPU_PresentSwapchain(g->eframe.swapchain, g->eframe.draw_target, AnyBit(g->bframe.frame_flags, UI_FrameFlag_Vsync), backbuffer_size, dst_p0, dst_p1, src_p0, src_p1, LinearFromSrgb(g->bframe.swapchain_color));
|
||||
}
|
||||
WND_EndFrame(frame.window_frame);
|
||||
|
||||
++g->eframe.tick;
|
||||
|
||||
EndScratch(scratch);
|
||||
return g->eframe.gpu_submit_fence_target;
|
||||
}
|
||||
|
||||
@ -109,7 +109,7 @@ Enum(UI_BoxFlag)
|
||||
X(Font, ResourceKey) \
|
||||
X(FontSize, u32) \
|
||||
X(Text, String) \
|
||||
X(BackgroundTexture, GPU_Resource *) \
|
||||
X(BackgroundTexture, GpuPointer) \
|
||||
X(BackgroundTextureUv0, Vec2) \
|
||||
X(BackgroundTextureUv1, Vec2) \
|
||||
/* --------------------------------------- */ \
|
||||
@ -268,7 +268,7 @@ Struct(UI_Box)
|
||||
|
||||
//- Cmd data
|
||||
UI_BoxDesc desc;
|
||||
GPU_Resource *raw_texture;
|
||||
GpuPointer raw_texture;
|
||||
Vec2 raw_texture_uv0;
|
||||
Vec2 raw_texture_uv1;
|
||||
|
||||
@ -372,14 +372,14 @@ Struct(UI_State)
|
||||
struct UI_EFrameState
|
||||
{
|
||||
Arena *layout_arena;
|
||||
Arena *rects_arena;
|
||||
GPU_Arena *frame_gpu_arena;
|
||||
GPU_Arena *drects_gpu_arena;
|
||||
u64 tick;
|
||||
|
||||
/* Render */
|
||||
GPU_Resource *draw_target;
|
||||
GpuPointer draw_target;
|
||||
GPU_Swapchain *swapchain;
|
||||
i64 gpu_submit_fence_target;
|
||||
GPU_TransientBuffer draw_rects_tbuff;
|
||||
|
||||
/* Layout */
|
||||
UI_Box *root_box;
|
||||
@ -493,4 +493,4 @@ Vec2 UI_CursorPos(void);
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ End frame
|
||||
|
||||
i64 UI_EndFrame(UI_Frame frame);
|
||||
void UI_EndFrame(UI_Frame frame);
|
||||
|
||||
Loading…
Reference in New Issue
Block a user