gpu staging heap progress

This commit is contained in:
jacob 2025-12-02 21:40:28 -06:00
parent 0c796768e5
commit f0f3da0bcd
16 changed files with 684 additions and 163 deletions

View File

@ -704,9 +704,10 @@
#endif #endif
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Shader types //~ C <-> Shader interop types
//- Shader linkage //- Shader linkage
#if IsLanguageC #if IsLanguageC
Struct(VertexShader) { ResourceKey resource; }; Struct(VertexShader) { ResourceKey resource; };
Struct(PixelShader) { ResourceKey resource; }; Struct(PixelShader) { ResourceKey resource; };
@ -721,6 +722,7 @@
#endif #endif
//- Shader resource handles //- Shader resource handles
Struct(StructuredBufferHandle) { u32 v; }; Struct(StructuredBufferHandle) { u32 v; };
Struct(RWStructuredBufferHandle) { u32 v; }; Struct(RWStructuredBufferHandle) { u32 v; };
Struct(Texture1DHandle) { u32 v; }; Struct(Texture1DHandle) { u32 v; };

View File

@ -110,3 +110,12 @@ Vec2 NdcFromPos(Vec2 pos, Vec2 size)
result += Vec2(-1, 1); result += Vec2(-1, 1);
return result; return result;
} }
Vec2 NdcFromUv(Vec2 uv)
{
Vec2 result;
result = uv;
result *= Vec2(2, -2);
result += Vec2(-1, 1);
return result;
}

View File

@ -122,7 +122,7 @@ f64 ModF64(f64 x, f64 m)
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Floating point sign //~ Abs
f32 AbsF32(f32 f) f32 AbsF32(f32 f)
{ {
@ -159,7 +159,9 @@ i64 SignF64(f64 f)
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ U64 pow //~ Exponential ops
//- Pow u64
/* Taken from https://gist.github.com/orlp/3551590 */ /* Taken from https://gist.github.com/orlp/3551590 */
u64 PowU64(u64 base, u8 exp) u64 PowU64(u64 base, u8 exp)
@ -254,28 +256,7 @@ u64 PowU64(u64 base, u8 exp)
} }
} }
//////////////////////////////////////////////////////////// //- Logn
//~ Align up
u64 AlignU64Pow2(u64 x)
{
u64 result = 0;
if (x > 0)
{
result = x - 1;
result |= result >> 1;
result |= result >> 2;
result |= result >> 4;
result |= result >> 8;
result |= result >> 16;
result |= result >> 32;
++result;
}
return result;
}
////////////////////////////////////////////////////////////
//~ Logn
/* Based on FreeBSD's implementation /* Based on FreeBSD's implementation
* https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */ * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */
@ -369,8 +350,7 @@ f32 LnF32(f32 x)
} }
} }
//////////////////////////////////////////////////////////// //- Exp
//~ Exp
/* Based on FreeBSD's implementation /* Based on FreeBSD's implementation
* https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */ * https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */
@ -481,8 +461,7 @@ f32 ExpF32(f32 x)
} }
} }
//////////////////////////////////////////////////////////// //- Pow
//~ Pow
f32 PowF32(f32 a, f32 b) f32 PowF32(f32 a, f32 b)
{ {
@ -499,8 +478,7 @@ f32 PowF32(f32 a, f32 b)
} }
} }
//////////////////////////////////////////////////////////// //- Sqrt
//~ Sqrt
f32 SqrtF32(f32 x) f32 SqrtF32(f32 x)
{ {
@ -517,6 +495,34 @@ f32 RSqrtF32(f32 x)
return IxRsqrtF32(x); return IxRsqrtF32(x);
} }
////////////////////////////////////////////////////////////
//~ Align
u64 AlignU64(u64 x, u64 align)
{
align = MaxU64(align, 1);
u64 result = (x + (align - 1));
result -= result % align;
return result;
}
u64 AlignU64ToNextPow2(u64 x)
{
u64 result = 0;
if (x > 0)
{
result = x - 1;
result |= result >> 1;
result |= result >> 2;
result |= result >> 4;
result |= result >> 8;
result |= result >> 16;
result |= result >> 32;
++result;
}
return result;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Trig //~ Trig

View File

@ -235,7 +235,6 @@ i64 SignF64(f64 f);
//~ Exponential ops //~ Exponential ops
u64 PowU64(u64 base, u8 exp); u64 PowU64(u64 base, u8 exp);
u64 AlignU64Pow2(u64 x);
f32 LnF32(f32 x); f32 LnF32(f32 x);
f32 ExpF32(f32 x); f32 ExpF32(f32 x);
f32 PowF32(f32 a, f32 b); f32 PowF32(f32 a, f32 b);
@ -243,6 +242,12 @@ f32 SqrtF32(f32 x);
f64 SqrtF64(f64 x); f64 SqrtF64(f64 x);
f32 RSqrtF32(f32 x); f32 RSqrtF32(f32 x);
////////////////////////////////////////////////////////////
//~ Align
u64 AlignU64(u64 x, u64 align);
u64 AlignU64ToNextPow2(u64 x);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Trig //~ Trig

View File

@ -70,7 +70,7 @@
#define FLOOD_DEBUG 0 #define FLOOD_DEBUG 0
#define GPU_DEBUG 1 #define GPU_DEBUG 1
#define GPU_DEBUG_VALIDATION 0 #define GPU_DEBUG_VALIDATION 1
/* If virtual fibers are enabled, each fiber will get its own OS thread, /* If virtual fibers are enabled, each fiber will get its own OS thread,
* and fiber suspend/resume will be emulated using OS thread primitives. * and fiber suspend/resume will be emulated using OS thread primitives.

View File

@ -7,11 +7,38 @@ void GPU_StartupCommon(void)
{ {
GPU_SharedUtilState *g = &GPU_shared_util_state; GPU_SharedUtilState *g = &GPU_shared_util_state;
// GPU_ArenaHandle gpu_perm = GPU_PermArena(); GPU_ArenaHandle gpu_perm = GPU_PermArena();
/* Init point sampler */
{
GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
g->pt_sampler = GPU_PushSamplerStateHandle(gpu_perm, pt_sampler);
}
GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
{
/* Init quad index buffer */
{
u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
GPU_ResourceHandle quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data));
GPU_CopyCpuBytes(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
g->quad_indices.resource = quad_indices;
g->quad_indices.index_size = sizeof(quad_data[0]);
g->quad_indices.index_count = countof(quad_data);
}
/* TODO: Init noise texture */
{
}
}
GPU_CommitCommandList(cl);
GPU_SyncAllQueues(GPU_QueueKind_Direct);
// /* Init point sampler */
// GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
// g->pt_sampler = GPU_PushSamplerPtr(gpu_perm, pt_sampler);
// GPU_CommandListHandle cl = GPU_PrepareCommandList(); // GPU_CommandListHandle cl = GPU_PrepareCommandList();
// { // {

View File

@ -326,7 +326,7 @@ Struct(GPU_TextureDesc)
GPU_Format format; GPU_Format format;
Vec3I32 dims; Vec3I32 dims;
GPU_Layout initial_layout; GPU_Layout initial_layout;
i32 mip_levels; /* Will be clamped to range [1, max] */ i32 mip_levels; /* Will be clamped to range [1, inf) */
Vec4 clear_color; Vec4 clear_color;
}; };
@ -560,10 +560,13 @@ SamplerStateHandle GPU_PushSamplerStateHandle (GPU_ArenaHandle
//- Count //- Count
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer); u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer);
u64 GPU_Count1D(GPU_ResourceHandle texture1d); i32 GPU_Count1D(GPU_ResourceHandle texture);
u64 GPU_Count2D(GPU_ResourceHandle texture2d); Vec2I32 GPU_Count2D(GPU_ResourceHandle texture);
u64 GPU_Count3D(GPU_ResourceHandle texture3d); Vec3I32 GPU_Count3D(GPU_ResourceHandle texture);
i32 GPU_CountWidth(GPU_ResourceHandle texture);
i32 GPU_CountHeight(GPU_ResourceHandle texture);
i32 GPU_CountDepth(GPU_ResourceHandle texture);
#define GPU_CountBuffer(buffer, type) GPU_CountBufferSize(buffer) / sizeof(type) #define GPU_CountBuffer(buffer, type) GPU_CountBufferSize(buffer) / sizeof(type)
@ -572,10 +575,10 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d);
//- Command list //- Command list
GPU_CommandListHandle GPU_PrepareCommandList(void); GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue);
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops); void GPU_CommitCommandListEx(GPU_CommandListHandle cl, u64 fence_ops_count, GPU_FenceOp *fence_ops);
#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0) #define GPU_CommitCommandList(cl) GPU_CommitCommandListEx((cl), 0, 0)
//- Arena //- Arena
@ -605,42 +608,42 @@ void GPU_SetConstant_(GPU_CommandListHandle cl, i32 slot, void *src_32bit, u32 s
void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc); void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \ #define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.resource = (_resource), \ .resource = (_resource), \
.sync_prev = _sync_prev, \ .sync_prev = _sync_prev, \
.sync_next = _sync_next, \ .sync_next = _sync_next, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.access_next = _access_next, \ .access_next = _access_next, \
}) })
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \ #define GPU_MemoryLayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.is_global = 1, \ .resource = (_resource), \
.sync_prev = _sync_prev, \ .sync_prev = _sync_prev, \
.sync_next = _sync_next, \ .sync_next = _sync_next, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.access_next = _access_next, \ .access_next = _access_next, \
.layout = _layout, \
}) })
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \ #define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \ GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
.resource = (_resource), \ .is_global = 1, \
.sync_prev = _sync_prev, \ .sync_prev = _sync_prev, \
.sync_next = _sync_next, \ .sync_next = _sync_next, \
.access_prev = _access_prev, \ .access_prev = _access_prev, \
.access_next = _access_next, \ .access_next = _access_next, \
.layout = _layout, \
}) })
#define GPU_DumbMemoryBarrier(_cl, _resource) \ #define GPU_DumbMemoryBarrier(cl, resource) \
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) GPU_MemoryBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
#define GPU_DumbGlobalMemoryBarrier(_cl) \ #define GPU_DumbMemoryLayoutBarrier(cl, resource, layout) \
GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All) GPU_MemoryLayoutBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (layout))
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \ #define GPU_DumbGlobalMemoryBarrier(cl) \
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout)) GPU_GlobalMemoryBarrier((cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
//- Compute //- Compute
@ -648,12 +651,29 @@ void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
//- Rasterize //- Rasterize
void GPU_Rasterize(GPU_CommandListHandle cl, void GPU_RasterizeEx(GPU_CommandListHandle cl,
VertexShader vs, PixelShader ps, VertexShader vs, PixelShader ps,
u32 instances_count, GPU_IndexBufferDesc index_buffer, u32 instances_count, GPU_IndexBufferDesc index_buffer,
u32 render_targets_count, GPU_ResourceHandle *render_targets, u32 render_targets_count, GPU_ResourceHandle *render_targets,
Rng3 viewport, Rng2 scissor, Rng3 viewport, Rng2 scissor,
GPU_RasterMode mode); GPU_RasterMode mode);
#define GPU_Rasterize(cl, vs, ps, instances_count, index_buffer, render_target, mode) \
GPU_RasterizeEx( \
(cl), \
(vs), (ps), \
(instances_count), (index_buffer), \
1, &(render_target), \
RNG3( \
VEC3(0, 0, 0), \
VEC3(GPU_CountWidth(render_target), GPU_CountHeight(render_target), 1) \
), \
RNG2( \
VEC2(0, 0), \
Vec2FromVec(GPU_Count2D(render_target)) \
), \
(mode) \
)
//- Clear //- Clear
@ -663,6 +683,15 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_t
void GPU_ProfN(GPU_CommandListHandle cl, String name); void GPU_ProfN(GPU_CommandListHandle cl, String name);
////////////////////////////////////////////////////////////
//~ @hookdecl Synchronization
/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue);
/* All queues will block until `completion_queue` completes all submitted commands */
void GPU_SyncAllQueues(GPU_QueueKind completion_queue);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookdecl Statistics //~ @hookdecl Statistics

View File

@ -62,7 +62,7 @@ void GPU_Startup(void)
{ {
__profn("Create device"); __profn("Create device");
IDXGIAdapter3 *adapter = 0; IDXGIAdapter3 *adapter = 0;
ID3D12Device *device = 0; ID3D12Device10 *device = 0;
String error = Lit("Could not initialize GPU device."); String error = Lit("Could not initialize GPU device.");
String first_gpu_name = ZI; String first_gpu_name = ZI;
u32 adapter_index = 0; u32 adapter_index = 0;
@ -86,7 +86,7 @@ void GPU_Startup(void)
* - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 * - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
* - EnhancedBarriersSupported == 1 * - EnhancedBarriersSupported == 1
*/ */
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device); hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
} }
if (SUCCEEDED(hr) && !skip) if (SUCCEEDED(hr) && !skip)
{ {
@ -769,7 +769,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
if (desc.kind == GPU_ResourceKind_Buffer) if (desc.kind == GPU_ResourceKind_Buffer)
{ {
desc.buffer.stride = MaxU32(desc.buffer.stride, 1); desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64)); buffer_size = MaxU64(AlignU64ToNextPow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
} }
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size); u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
@ -1137,9 +1137,13 @@ GPU_ArenaHandle GPU_AcquireArena(void)
GPU_D12_Arena *gpu_arena = 0; GPU_D12_Arena *gpu_arena = 0;
{ {
Arena *perm = PermArena(); Arena *perm = PermArena();
PushAlign(perm, CachelineSize);
gpu_arena = PushStruct(perm, GPU_D12_Arena); gpu_arena = PushStruct(perm, GPU_D12_Arena);
PushAlign(perm, CachelineSize);
} }
return (GPU_ArenaHandle) { .v = (u64)gpu_arena }; gpu_arena->arena = AcquireArena(Gibi(1));
return GPU_D12_HandleFromPointer(GPU_ArenaHandle, gpu_arena);
} }
void GPU_ReleaseArena(GPU_ArenaHandle arena) void GPU_ReleaseArena(GPU_ArenaHandle arena)
@ -1238,19 +1242,109 @@ GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_
GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc) GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc)
{ {
/* TODO */ /* TODO */
return (GPU_ResourceHandle) { 0 }; return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
} }
GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc) GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena_handle, GPU_TextureDesc desc)
{ {
/* TODO */ GPU_D12_SharedState *g = &GPU_D12_shared_state;
return (GPU_ResourceHandle) { 0 }; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout);
/* Create resource heap */
if (!gpu_arena->d3d_resource_heap)
{
/* FIXME: Dynamic size */
D3D12_HEAP_DESC d3d_desc = ZI;
d3d_desc.SizeInBytes = Mebi(64);
d3d_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
ID3D12Heap *heap = 0;
HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap);
if (!SUCCEEDED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to create D3D12 resource heap"));
}
gpu_arena->d3d_resource_heap = heap;
gpu_arena->heap_size = d3d_desc.SizeInBytes;
}
ID3D12Resource *d3d_resource = 0;
{
D3D12_RESOURCE_DESC1 d3d_desc = ZI;
d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
d3d_desc.Width = MaxI32(desc.dims.x, 1);
d3d_desc.Height = MaxI32(desc.dims.y, 1);
d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1);
d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1);
d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format);
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget);
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil);
u64 alloc_size = 0;
u64 alloc_align = 0;
{
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI;
ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
alloc_size = alloc_info.SizeInBytes;
alloc_align = alloc_info.Alignment;
}
u64 alloc_pos = gpu_arena->heap_pos;
alloc_pos = AlignU64(alloc_pos, alloc_align);
gpu_arena->heap_pos = alloc_pos + alloc_size;
if (alloc_pos + alloc_size > gpu_arena->heap_size)
{
Panic(Lit("Gpu arena overflow"));
}
D3D12_CLEAR_VALUE clear_value = {
.Color[0] = desc.clear_color.x,
.Color[1] = desc.clear_color.y,
.Color[2] = desc.clear_color.z,
.Color[3] = desc.clear_color.w,
.Format = d3d_desc.Format
};
HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device,
gpu_arena->d3d_resource_heap,
alloc_pos,
&d3d_desc,
initial_layout,
(d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
0,
0,
&IID_ID3D12Resource,
(void **)&d3d_resource);
}
GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
resource->d3d_resource = d3d_resource;
resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
resource->flags = desc.flags;
resource->is_texture = 1;
resource->texture_format = desc.format;
resource->texture_dims = desc.dims;
resource->texture_mip_levels = desc.mip_levels;
resource->texture_layout = initial_layout;
return GPU_D12_HandleFromPointer(GPU_ResourceHandle, resource);
} }
GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc) GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc)
{ {
/* TODO */ /* TODO */
return (GPU_ResourceHandle) { 0 }; return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
} }
b32 GPU_IsResourceNil(GPU_ResourceHandle handle) b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
@ -1263,81 +1357,103 @@ b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
{ {
/* TODO */ /* TODO */
return (StructuredBufferHandle) { 0 }; return GPU_D12_HandleFromPointer(StructuredBufferHandle, 0);
} }
RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range) RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
{ {
/* TODO */ /* TODO */
return (RWStructuredBufferHandle) { 0 }; return GPU_D12_HandleFromPointer(RWStructuredBufferHandle, 0);
} }
Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (Texture1DHandle) { 0 }; return GPU_D12_HandleFromPointer(Texture1DHandle, 0);
} }
RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (RWTexture1DHandle) { 0 }; return GPU_D12_HandleFromPointer(RWTexture1DHandle, 0);
} }
Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (Texture2DHandle) { 0 }; return GPU_D12_HandleFromPointer(Texture2DHandle, 0);
} }
RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
{ {
/* TODO */ GPU_D12_SharedState *g = &GPU_D12_shared_state;
return (RWTexture2DHandle) { 0 }; GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle);
GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav);
ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle);
return GPU_D12_HandleFromPointer(RWTexture2DHandle, descriptor->index);
} }
Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (Texture3DHandle) { 0 }; return GPU_D12_HandleFromPointer(Texture3DHandle, 0);
} }
RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (RWTexture3DHandle) { 0 }; return GPU_D12_HandleFromPointer(RWTexture3DHandle, 0);
} }
SamplerStateHandle GPU_PushSamplerStateHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource) SamplerStateHandle GPU_PushSamplerStateHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
{ {
/* TODO */ /* TODO */
return (SamplerStateHandle) { 0 }; return GPU_D12_HandleFromPointer(SamplerStateHandle, 0);
} }
//- Count //- Count
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer) u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer)
{ {
/* TODO */ GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer);
return 0; return resource->buffer_size;
} }
u64 GPU_Count1D(GPU_ResourceHandle texture1d) i32 GPU_Count1D(GPU_ResourceHandle texture)
{ {
/* TODO */ GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return 0; return resource->texture_dims.x;
} }
u64 GPU_Count2D(GPU_ResourceHandle texture2d) Vec2I32 GPU_Count2D(GPU_ResourceHandle texture)
{ {
/* TODO */ GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return 0; return VEC2I32(resource->texture_dims.x, resource->texture_dims.y);
} }
u64 GPU_Count3D(GPU_ResourceHandle texture3d) Vec3I32 GPU_Count3D(GPU_ResourceHandle texture)
{ {
/* TODO */ GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return 0; return resource->texture_dims;
}
i32 GPU_CountWidth(GPU_ResourceHandle texture)
{
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return resource->texture_dims.x;
}
i32 GPU_CountHeight(GPU_ResourceHandle texture)
{
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return resource->texture_dims.y;
}
i32 GPU_CountDepth(GPU_ResourceHandle texture)
{
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
return resource->texture_dims.z;
} }
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -1399,12 +1515,210 @@ GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v)
return cmd; return cmd;
} }
GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size)
{
GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_QueueKind queue_kind = cl->queue_kind;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
GPU_D12_StagingRegionNode *result = 0;
Lock lock = LockE(&queue->staging_mutex);
{
GPU_D12_StagingHeap *heap = queue->staging_heap;
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
/* Find first completed region with matching size.
* For each region in heap:
* - If region size > size, split off a smaller region & use it
*
* - If region size < size, try to merge with next completed region
*
* - If no available completed region with eligible size, queue the
* current heap for deletion & create a new heap
* with larger size
*/
/* FIXME: Region completion target should be atomic, and initialized to
* u64/i64 max until cl submission actually sets value */
/* Find region with large enough size */
GPU_D12_StagingRegionNode *match = 0;
if (heap && heap->size >= size)
{
GPU_D12_StagingRegionNode *r = heap->head_region_node;
for (;;)
{
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
if (is_completed)
{
GPU_D12_StagingRegionNode *next = r->next;
u64 region_size = 0;
if (next->pos > r->pos)
{
region_size = next->pos - r->pos;
}
else
{
region_size = heap->size - r->pos;
}
if (region_size < size)
{
GPU_D12_StagingRegionNode *prev = r->prev;
b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target);
if (prev_is_completed && prev->pos < r->pos)
{
/* Merge with previous region & retry */
prev->next = next;
SllStackPush(heap->first_free_region_node, r);
r = prev;
}
else
{
/* Continue to next region */
r = next;
}
}
else
{
/* Found matching region */
match = r;
break;
}
}
else
{
/* No large-enough completed region found */
break;
}
}
}
/* Create new heap if no match found */
if (!match)
{
/* Queue old heap for deletion */
u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
if (heap)
{
/* FIXME: Queue for deletion here */
new_heap_size = MaxU64(new_heap_size, heap->size * 2);
heap = 0;
}
/* Create new heap */
{
Arena *arena = AcquireArena(Gibi(1));
heap = PushStruct(arena, GPU_D12_StagingHeap);
heap->arena = arena;
heap->size = new_heap_size;
/* Create backing upload heap resource */
ID3D12Resource *d3d_resource = 0;
{
D3D12_RESOURCE_DESC d3d_desc = ZI;
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
d3d_desc.Alignment = 0;
d3d_desc.Width = new_heap_size;
d3d_desc.Height = 1;
d3d_desc.DepthOrArraySize = 1;
d3d_desc.MipLevels = 1;
d3d_desc.SampleDesc.Count = 1;
d3d_desc.SampleDesc.Quality = 0;
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device,
&heap_props,
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED,
&d3d_desc,
D3D12_RESOURCE_STATE_COMMON,
0,
&IID_ID3D12Resource,
(void **)&d3d_resource);
if (!SUCCEEDED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to create upload heap"));
}
}
heap->resource.d3d_resource = d3d_resource;
heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
heap->resource.buffer_size = new_heap_size;
heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
/* Map */
{
D3D12_RANGE read_range = ZI;
HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
if (!SUCCEEDED(hr))
{
/* TODO: Don't panic */
Panic(Lit("Failed to map upload heap"));
}
}
}
/* Create initial region */
match = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
match->heap = heap;
match->next = match;
match->prev = match;
heap->head_region_node = match;
}
/* Split extra region space */
{
GPU_D12_StagingRegionNode *next = match->next;
u64 region_size = 0;
if (next->pos > match->pos)
{
region_size = next->pos - match->pos;
}
else
{
region_size = heap->size - match->pos;
}
if (region_size > size)
{
GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node;
if (new_next)
{
SllStackPop(heap->first_free_region_node);
}
else
{
new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
}
new_next->next = next;
new_next->prev = match;
next->prev = new_next;
match->next = new_next;
new_next->heap = heap;
new_next->pos = match->pos + size;
}
}
Atomic64Set(&match->completion_target, I64Max);
result = match;
}
Unlock(&lock);
return result;
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Command //~ @hookimpl Command
//- Command list //- Command list
GPU_CommandListHandle GPU_PrepareCommandList(void) GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue)
{ {
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_CmdList *cl = 0; GPU_D12_CmdList *cl = 0;
@ -1423,13 +1737,16 @@ GPU_CommandListHandle GPU_PrepareCommandList(void)
} }
} }
Unlock(&lock); Unlock(&lock);
return (GPU_CommandListHandle) { .v = (u64)cl }; cl->queue_kind = queue;
return GPU_D12_HandleFromPointer(GPU_CommandListHandle, cl);
} }
void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops) void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops)
{ {
GPU_D12_SharedState *g = &GPU_D12_shared_state; GPU_D12_SharedState *g = &GPU_D12_shared_state;
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_QueueKind queue_kind = cl->queue_kind;
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind); GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
TempArena scratch = BeginScratchNoConflict(); TempArena scratch = BeginScratchNoConflict();
@ -1849,8 +2166,9 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
u32 indices_count = 0; u32 indices_count = 0;
D3D12_INDEX_BUFFER_VIEW ibv = ZI; D3D12_INDEX_BUFFER_VIEW ibv = ZI;
{ {
GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
if (desc.index_count > 0)
{ {
GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource); GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource);
ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; ibv.BufferLocation = index_buffer_resource->buffer_gpu_address;
ibv.SizeInBytes = desc.index_size * desc.index_count; ibv.SizeInBytes = desc.index_size * desc.index_count;
@ -2057,16 +2375,27 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
//- Arena //- Arena
void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena) void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle)
{ {
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
/* TODO */ /* TODO */
/* FIXME: Move descriptors into committed lists */
/* FIXME: Release id3d12 resource com object references */
gpu_arena->heap_pos = 0;
} }
//- Cpu -> Gpu copy //- Cpu -> Gpu copy
void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range) void GPU_CopyCpuBytes(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
{ {
/* TODO */ GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
u64 size = src_copy_range.max - src_copy_range.min;
GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, size);
CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, size);
GPU_CopyBytes(cl_handle, dst_handle, dst_offset, GPU_D12_HandleFromPointer(GPU_ResourceHandle, &region->heap->resource), RNGU64(region->pos, region->pos + size));
} }
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range) void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range)
@ -2180,12 +2509,12 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou
//- Rasterize //- Rasterize
void GPU_Rasterize(GPU_CommandListHandle cl_handle, void GPU_RasterizeEx(GPU_CommandListHandle cl_handle,
VertexShader vs, PixelShader ps, VertexShader vs, PixelShader ps,
u32 instances_count, GPU_IndexBufferDesc index_buffer, u32 instances_count, GPU_IndexBufferDesc index_buffer,
u32 render_targets_count, GPU_ResourceHandle *render_targets, u32 render_targets_count, GPU_ResourceHandle *render_targets,
Rng3 viewport, Rng2 scissor, Rng3 viewport, Rng2 scissor,
GPU_RasterMode mode) GPU_RasterMode mode)
{ {
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
@ -2221,6 +2550,19 @@ void GPU_ProfN(GPU_CommandListHandle cl, String name)
/* TODO */ /* TODO */
} }
////////////////////////////////////////////////////////////
//~ @hookimpl Synchronization
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue)
{
/* TODO */
}
void GPU_SyncAllQueues(GPU_QueueKind completion_queue)
{
/* TODO */
}
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ @hookimpl Map hooks //~ @hookimpl Map hooks
@ -2315,7 +2657,7 @@ GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window)
swapchain = PushStruct(perm, GPU_D12_Swapchain); swapchain = PushStruct(perm, GPU_D12_Swapchain);
} }
swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window); swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window);
return (GPU_SwapchainHandle) { .v = (u64)swapchain }; return GPU_D12_HandleFromPointer(GPU_SwapchainHandle, swapchain);
} }
void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle) void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle)
@ -2483,7 +2825,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; cur_backbuffer = &swapchain->backbuffers[backbuffer_idx];
} }
return (GPU_ResourceHandle) { .v = (u64)cur_backbuffer }; return GPU_D12_HandleFromPointer(GPU_ResourceHandle, cur_backbuffer);
} }
void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync) void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync)

View File

@ -104,7 +104,18 @@ Struct(GPU_D12_DescriptorList)
Struct(GPU_D12_Arena) Struct(GPU_D12_Arena)
{ {
Arena *arena;
GPU_D12_DescriptorList committed_descriptors_by_heap_and_queue[GPU_D12_DescriptorHeapKind_Count][GPU_NumQueues]; GPU_D12_DescriptorList committed_descriptors_by_heap_and_queue[GPU_D12_DescriptorHeapKind_Count][GPU_NumQueues];
/* TODO:
* To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for:
* - Buffers
* - Non-render target & non-depth stencil textures
* - Render target or depth stencil textures
*/
ID3D12Heap *d3d_resource_heap;
u64 heap_pos;
u64 heap_size;
}; };
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
@ -132,6 +143,35 @@ Struct(GPU_D12_Resource)
struct GPU_D12_Swapchain *swapchain; struct GPU_D12_Swapchain *swapchain;
}; };
////////////////////////////////////////////////////////////
//~ Staging types
Struct(GPU_D12_StagingHeap)
{
Arena *arena;
GPU_D12_Resource resource;
void *mapped;
u64 size;
struct GPU_D12_StagingRegionNode *head_region_node;
struct GPU_D12_StagingRegionNode *first_free_region_node;
};
Struct(GPU_D12_StagingRegionNode)
{
GPU_D12_StagingHeap *heap;
/* Heap links (requires heap lock to read) */
GPU_D12_StagingRegionNode *prev;
GPU_D12_StagingRegionNode *next;
/* Region info */
Atomic64 completion_target;
u64 pos;
};
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Command queue types //~ Command queue types
@ -149,9 +189,15 @@ Struct(GPU_D12_Queue)
Mutex commit_mutex; Mutex commit_mutex;
ID3D12Fence *commit_fence; ID3D12Fence *commit_fence;
u64 commit_fence_target; u64 commit_fence_target;
/* Raw command lists */
struct GPU_D12_RawCommandList *first_committed_cl; struct GPU_D12_RawCommandList *first_committed_cl;
struct GPU_D12_RawCommandList *last_committed_cl; struct GPU_D12_RawCommandList *last_committed_cl;
/* Staging heap */
Mutex staging_mutex;
GPU_D12_StagingHeap *staging_heap;
Fence sync_fence; Fence sync_fence;
}; };
@ -264,6 +310,7 @@ Struct(GPU_D12_CmdChunk)
Struct(GPU_D12_CmdList) Struct(GPU_D12_CmdList)
{ {
GPU_QueueKind queue_kind;
GPU_D12_CmdList *next; GPU_D12_CmdList *next;
GPU_D12_CmdChunk *first_cmd_chunk; GPU_D12_CmdChunk *first_cmd_chunk;
@ -332,12 +379,14 @@ Struct(GPU_D12_SharedState)
/* Device */ /* Device */
IDXGIFactory6 *factory; IDXGIFactory6 *factory;
IDXGIAdapter3 *adapter; IDXGIAdapter3 *adapter;
ID3D12Device *device; ID3D12Device10 *device;
} extern GPU_D12_shared_state; } extern GPU_D12_shared_state;
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Helpers //~ Helpers
#define GPU_D12_HandleFromPointer(type, ptr) (type) { .v = (u64)(ptr) }
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle); GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle); GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle); GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
@ -376,6 +425,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl);
GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl); GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl);
GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v); GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v);
GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size);
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Sync job //~ Sync job

View File

@ -33,9 +33,9 @@ JobImpl(V_VisWorker, _, __)
Arena *frame_arena = AcquireArena(Gibi(64)); Arena *frame_arena = AcquireArena(Gibi(64));
Arena *perm = PermArena(); Arena *perm = PermArena();
GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable); GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mebi(8), GPU_CpuAccessFlag_Writable);
GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable); GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
////////////////////////////// //////////////////////////////
//- State //- State

View File

@ -1,5 +1,5 @@
JobDecl(PR_RunForever, EmptySig); JobDecl(PT_RunForever, EmptySig);
JobImpl(PR_RunForever, _sig, _id) JobImpl(PT_RunForever, _sig, _id)
{ {
GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena(); GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena();
@ -28,7 +28,7 @@ JobImpl(PR_RunForever, _sig, _id)
GPU_ResourceHandle backbuffer = GPU_PrepareBackbuffer(swapchain, GPU_Format_R16G16B16A16_Float, window_frame.draw_size); GPU_ResourceHandle backbuffer = GPU_PrepareBackbuffer(swapchain, GPU_Format_R16G16B16A16_Float, window_frame.draw_size);
{ {
GPU_CommandListHandle cl = GPU_PrepareCommandList(); GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
{ {
/* Push resources */ /* Push resources */
Vec2I32 final_target_size = window_frame.draw_size; Vec2I32 final_target_size = window_frame.draw_size;
@ -36,43 +36,44 @@ JobImpl(PR_RunForever, _sig, _id)
GPU_Format_R16G16B16A16_Float, GPU_Format_R16G16B16A16_Float,
final_target_size, final_target_size,
GPU_Layout_DirectQueue_ShaderReadWrite, GPU_Layout_DirectQueue_ShaderReadWrite,
.flags = GPU_ResourceFlag_AllowShaderReadWrite | GPU_ResourceFlag_AllowRenderTarget); .flags = GPU_ResourceFlag_AllowShaderReadWrite);
/* Push resource handles */ /* Push resource handles */
Texture2DHandle final_target_rhandle = GPU_PushTexture2DHandle(gpu_frame_arena, final_target);
RWTexture2DHandle final_target_rwhandle = GPU_PushRWTexture2DHandle(gpu_frame_arena, final_target); RWTexture2DHandle final_target_rwhandle = GPU_PushRWTexture2DHandle(gpu_frame_arena, final_target);
/* Prep test pass */ /* Prep test pass */
{ {
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle); GPU_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle);
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123); GPU_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
GPU_SetConstant(cl, PT_ShaderConst_BlitSampler, GPU_GetCommonPointSampler());
GPU_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle);
} }
/* Test pass */ /* Test pass */
{ {
GPU_Compute(cl, PR_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1)); GPU_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
} }
GPU_DumbMemoryBarrier(cl, final_target); GPU_DumbMemoryBarrier(cl, final_target);
/* Prep clear pass */ /* Prep blit pass */
{ {
GPU_LayoutBarrier(cl, backbuffer, GPU_DumbMemoryLayoutBarrier(cl, final_target, GPU_Layout_DirectQueue_ShaderRead);
GPU_Stage_None, GPU_Access_None, GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_DirectQueue_RenderTargetWrite);
GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
GPU_Layout_DirectQueue_RenderTargetWrite);
} }
/* Clear pass */ /* Blit pass */
{ {
GPU_Rasterize(cl,
PT_BlitVS, PT_BlitPS,
1, GPU_GetCommonQuadIndices(),
backbuffer, GPU_RasterMode_TriangleList);
GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1)); GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1));
} }
/* Finalize backbuffer layout */ /* Finalize backbuffer layout */
{ {
GPU_LayoutBarrier(cl, backbuffer, GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
GPU_Stage_None, GPU_Access_None,
GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
} }
/* Reset */ /* Reset */
@ -80,7 +81,7 @@ JobImpl(PR_RunForever, _sig, _id)
GPU_ResetArena(cl, gpu_frame_arena); GPU_ResetArena(cl, gpu_frame_arena);
} }
} }
GPU_CommitCommandList(cl, GPU_QueueKind_Direct); GPU_CommitCommandList(cl);
} }
GPU_CommitBackbuffer(backbuffer, VSYNC); GPU_CommitBackbuffer(backbuffer, VSYNC);
} }
@ -88,8 +89,8 @@ JobImpl(PR_RunForever, _sig, _id)
} }
} }
void PR_Startup(void); void PT_Startup(void);
void PR_Startup(void) void PT_Startup(void)
{ {
RunJob(PR_RunForever); RunJob(PT_RunForever);
} }

View File

@ -13,7 +13,9 @@
@IncludeGpu proto_shaders.gpu @IncludeGpu proto_shaders.gpu
//- Shaders //- Shaders
@ComputeShader PR_TestCS @ComputeShader PT_TestCS
@VertexShader PT_BlitVS
@PixelShader PT_BlitPS
//- Startup //- Startup
@Startup PR_Startup @Startup PT_Startup

View File

@ -6,11 +6,11 @@ Struct(TestStruct)
i32 i; i32 i;
}; };
ComputeShader2D(PR_TestCS, 8, 8) ComputeShader2D(PT_TestCS, 8, 8)
{ {
StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff); StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PT_ShaderConst_TestBuff);
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget); RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PT_ShaderConst_TestTarget);
Vec2U32 target_tex_size = Count2D(target_tex); Vec2U32 target_tex_size = Count2D(target_tex);
Vec2I32 id = SV_DispatchThreadID; Vec2I32 id = SV_DispatchThreadID;
@ -19,3 +19,46 @@ ComputeShader2D(PR_TestCS, 8, 8)
target_tex[id] = Vec4(0, 1, 0, 1); target_tex[id] = Vec4(0, 1, 0, 1);
} }
} }
////////////////////////////////////////////////////////////
//~ Blit shader
Struct(PT_BlitPSInput)
{
Semantic(Vec4, SV_Position);
Semantic(Vec2, src_uv);
};
Struct(PT_BlitPSOutput)
{
Semantic(Vec4, SV_Target0);
};
//////////////////////////////
//- Vertex shader
VertexShader(PT_BlitVS, PT_BlitPSInput)
{
Vec2 uv = RectUvFromVertexId(SV_VertexID);
PT_BlitPSInput result;
result.SV_Position = Vec4(NdcFromUv(uv).xy, 0, 1);
result.src_uv = uv;
return result;
}
//////////////////////////////
//- Pixel shader
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
{
SamplerState sampler = SamplerStateFromHandle(PT_ShaderConst_BlitSampler);
Texture2D<Vec4> tex = Texture2DFromHandle<Vec4>(PT_ShaderConst_BlitSrc);
Vec2 uv = input.src_uv;
Vec4 result = tex.Sample(sampler, uv);
PT_BlitPSOutput output;
output.SV_Target0 = result;
return output;
}

View File

@ -1,6 +1,11 @@
//////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////
//~ Constants //~ Constants
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0); /* Test shader */
ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1); ShaderConstant(RWTexture2DHandle, PT_ShaderConst_TestTarget, 0);
ShaderConstant(f32, PR_ShaderConst_TestConst, 2); ShaderConstant(StructuredBufferHandle, PT_ShaderConst_TestBuff, 1);
ShaderConstant(f32, PT_ShaderConst_TestConst, 2);
/* Blit shader */
ShaderConstant(SamplerStateHandle, PT_ShaderConst_BlitSampler, 3);
ShaderConstant(Texture2DHandle, PT_ShaderConst_BlitSrc, 4);

View File

@ -84,7 +84,7 @@ JobImpl(SPR_LoadSheet, sig, _)
/* Init spans */ /* Init spans */
sheet->spans_count = decoded.num_spans; sheet->spans_count = decoded.num_spans;
sheet->span_bins_count = MaxU32(AlignU64Pow2(sheet->spans_count * 2), 1); sheet->span_bins_count = MaxU32(AlignU64ToNextPow2(sheet->spans_count * 2), 1);
sheet->spans = PushStructs(perm, SPR_Span, sheet->spans_count); sheet->spans = PushStructs(perm, SPR_Span, sheet->spans_count);
sheet->span_bins = PushStructs(perm, SPR_SpanBin, sheet->span_bins_count); sheet->span_bins = PushStructs(perm, SPR_SpanBin, sheet->span_bins_count);
{ {
@ -107,7 +107,7 @@ JobImpl(SPR_LoadSheet, sig, _)
/* Init slice groups */ /* Init slice groups */
sheet->slice_groups_count = decoded.num_slice_keys; sheet->slice_groups_count = decoded.num_slice_keys;
sheet->slice_group_bins_count = MaxU32(AlignU64Pow2(sheet->slice_groups_count * 2), 1); sheet->slice_group_bins_count = MaxU32(AlignU64ToNextPow2(sheet->slice_groups_count * 2), 1);
sheet->slice_groups = PushStructs(perm, SPR_SliceGroup, sheet->slice_groups_count); sheet->slice_groups = PushStructs(perm, SPR_SliceGroup, sheet->slice_groups_count);
sheet->slice_group_bins = PushStructs(perm, SPR_SliceGroupBin, sheet->slice_group_bins_count); sheet->slice_group_bins = PushStructs(perm, SPR_SliceGroupBin, sheet->slice_group_bins_count);
{ {

View File

@ -698,8 +698,8 @@ i64 UI_EndFrame(UI_Frame frame)
{ {
g->eframe.layout_arena = AcquireArena(Gibi(64)); g->eframe.layout_arena = AcquireArena(Gibi(64));
g->eframe.tex_gpu_arena = GPU_AcquireTextureArena(); g->eframe.tex_gpu_arena = GPU_AcquireTextureArena();
g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16)); g->eframe.frame_gpu_arena = GPU_AcquireArena(Mebi(16));
g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16)); g->eframe.drects_gpu_arena = GPU_AcquireArena(Mebi(16));
} }
ResetArena(g->eframe.layout_arena); ResetArena(g->eframe.layout_arena);