gpu staging heap progress
This commit is contained in:
parent
0c796768e5
commit
f0f3da0bcd
@ -704,9 +704,10 @@
|
||||
#endif
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Shader types
|
||||
//~ C <-> Shader interop types
|
||||
|
||||
//- Shader linkage
|
||||
|
||||
#if IsLanguageC
|
||||
Struct(VertexShader) { ResourceKey resource; };
|
||||
Struct(PixelShader) { ResourceKey resource; };
|
||||
@ -721,6 +722,7 @@
|
||||
#endif
|
||||
|
||||
//- Shader resource handles
|
||||
|
||||
Struct(StructuredBufferHandle) { u32 v; };
|
||||
Struct(RWStructuredBufferHandle) { u32 v; };
|
||||
Struct(Texture1DHandle) { u32 v; };
|
||||
|
||||
@ -110,3 +110,12 @@ Vec2 NdcFromPos(Vec2 pos, Vec2 size)
|
||||
result += Vec2(-1, 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
Vec2 NdcFromUv(Vec2 uv)
|
||||
{
|
||||
Vec2 result;
|
||||
result = uv;
|
||||
result *= Vec2(2, -2);
|
||||
result += Vec2(-1, 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -122,7 +122,7 @@ f64 ModF64(f64 x, f64 m)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Floating point sign
|
||||
//~ Abs
|
||||
|
||||
f32 AbsF32(f32 f)
|
||||
{
|
||||
@ -159,7 +159,9 @@ i64 SignF64(f64 f)
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ U64 pow
|
||||
//~ Exponential ops
|
||||
|
||||
//- Pow u64
|
||||
|
||||
/* Taken from https://gist.github.com/orlp/3551590 */
|
||||
u64 PowU64(u64 base, u8 exp)
|
||||
@ -254,28 +256,7 @@ u64 PowU64(u64 base, u8 exp)
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Align up
|
||||
|
||||
u64 AlignU64Pow2(u64 x)
|
||||
{
|
||||
u64 result = 0;
|
||||
if (x > 0)
|
||||
{
|
||||
result = x - 1;
|
||||
result |= result >> 1;
|
||||
result |= result >> 2;
|
||||
result |= result >> 4;
|
||||
result |= result >> 8;
|
||||
result |= result >> 16;
|
||||
result |= result >> 32;
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Logn
|
||||
//- Logn
|
||||
|
||||
/* Based on FreeBSD's implementation
|
||||
* https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_logf.c */
|
||||
@ -369,8 +350,7 @@ f32 LnF32(f32 x)
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Exp
|
||||
//- Exp
|
||||
|
||||
/* Based on FreeBSD's implementation
|
||||
* https://github.com/freebsd/freebsd-src/blob/main/lib/msun/src/e_expf.c */
|
||||
@ -481,8 +461,7 @@ f32 ExpF32(f32 x)
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Pow
|
||||
//- Pow
|
||||
|
||||
f32 PowF32(f32 a, f32 b)
|
||||
{
|
||||
@ -499,8 +478,7 @@ f32 PowF32(f32 a, f32 b)
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Sqrt
|
||||
//- Sqrt
|
||||
|
||||
f32 SqrtF32(f32 x)
|
||||
{
|
||||
@ -517,6 +495,34 @@ f32 RSqrtF32(f32 x)
|
||||
return IxRsqrtF32(x);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Align
|
||||
|
||||
u64 AlignU64(u64 x, u64 align)
|
||||
{
|
||||
align = MaxU64(align, 1);
|
||||
u64 result = (x + (align - 1));
|
||||
result -= result % align;
|
||||
return result;
|
||||
}
|
||||
|
||||
u64 AlignU64ToNextPow2(u64 x)
|
||||
{
|
||||
u64 result = 0;
|
||||
if (x > 0)
|
||||
{
|
||||
result = x - 1;
|
||||
result |= result >> 1;
|
||||
result |= result >> 2;
|
||||
result |= result >> 4;
|
||||
result |= result >> 8;
|
||||
result |= result >> 16;
|
||||
result |= result >> 32;
|
||||
++result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Trig
|
||||
|
||||
|
||||
@ -235,7 +235,6 @@ i64 SignF64(f64 f);
|
||||
//~ Exponential ops
|
||||
|
||||
u64 PowU64(u64 base, u8 exp);
|
||||
u64 AlignU64Pow2(u64 x);
|
||||
f32 LnF32(f32 x);
|
||||
f32 ExpF32(f32 x);
|
||||
f32 PowF32(f32 a, f32 b);
|
||||
@ -243,6 +242,12 @@ f32 SqrtF32(f32 x);
|
||||
f64 SqrtF64(f64 x);
|
||||
f32 RSqrtF32(f32 x);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Align
|
||||
|
||||
u64 AlignU64(u64 x, u64 align);
|
||||
u64 AlignU64ToNextPow2(u64 x);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Trig
|
||||
|
||||
|
||||
@ -70,7 +70,7 @@
|
||||
#define FLOOD_DEBUG 0
|
||||
|
||||
#define GPU_DEBUG 1
|
||||
#define GPU_DEBUG_VALIDATION 0
|
||||
#define GPU_DEBUG_VALIDATION 1
|
||||
|
||||
/* If virtual fibers are enabled, each fiber will get its own OS thread,
|
||||
* and fiber suspend/resume will be emulated using OS thread primitives.
|
||||
|
||||
@ -7,11 +7,38 @@ void GPU_StartupCommon(void)
|
||||
{
|
||||
GPU_SharedUtilState *g = &GPU_shared_util_state;
|
||||
|
||||
// GPU_ArenaHandle gpu_perm = GPU_PermArena();
|
||||
GPU_ArenaHandle gpu_perm = GPU_PermArena();
|
||||
|
||||
/* Init point sampler */
|
||||
{
|
||||
GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
|
||||
g->pt_sampler = GPU_PushSamplerStateHandle(gpu_perm, pt_sampler);
|
||||
}
|
||||
|
||||
GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
/* Init quad index buffer */
|
||||
{
|
||||
u16 quad_data[6] = { 0, 1, 2, 0, 2, 3 };
|
||||
GPU_ResourceHandle quad_indices = GPU_PushBuffer(gpu_perm, u16, countof(quad_data));
|
||||
GPU_CopyCpuBytes(cl, quad_indices, 0, quad_data, RNGU64(0, sizeof(quad_data)));
|
||||
g->quad_indices.resource = quad_indices;
|
||||
g->quad_indices.index_size = sizeof(quad_data[0]);
|
||||
g->quad_indices.index_count = countof(quad_data);
|
||||
}
|
||||
|
||||
/* TODO: Init noise texture */
|
||||
{
|
||||
}
|
||||
}
|
||||
GPU_CommitCommandList(cl);
|
||||
|
||||
GPU_SyncAllQueues(GPU_QueueKind_Direct);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// /* Init point sampler */
|
||||
// GPU_ResourceHandle pt_sampler = GPU_PushSampler(gpu_perm, (GPU_SamplerDesc) { .filter = GPU_Filter_MinMagMipPoint });
|
||||
// g->pt_sampler = GPU_PushSamplerPtr(gpu_perm, pt_sampler);
|
||||
|
||||
// GPU_CommandListHandle cl = GPU_PrepareCommandList();
|
||||
// {
|
||||
|
||||
@ -326,7 +326,7 @@ Struct(GPU_TextureDesc)
|
||||
GPU_Format format;
|
||||
Vec3I32 dims;
|
||||
GPU_Layout initial_layout;
|
||||
i32 mip_levels; /* Will be clamped to range [1, max] */
|
||||
i32 mip_levels; /* Will be clamped to range [1, inf) */
|
||||
Vec4 clear_color;
|
||||
};
|
||||
|
||||
@ -560,10 +560,13 @@ SamplerStateHandle GPU_PushSamplerStateHandle (GPU_ArenaHandle
|
||||
|
||||
//- Count
|
||||
|
||||
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer);
|
||||
u64 GPU_Count1D(GPU_ResourceHandle texture1d);
|
||||
u64 GPU_Count2D(GPU_ResourceHandle texture2d);
|
||||
u64 GPU_Count3D(GPU_ResourceHandle texture3d);
|
||||
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer);
|
||||
i32 GPU_Count1D(GPU_ResourceHandle texture);
|
||||
Vec2I32 GPU_Count2D(GPU_ResourceHandle texture);
|
||||
Vec3I32 GPU_Count3D(GPU_ResourceHandle texture);
|
||||
i32 GPU_CountWidth(GPU_ResourceHandle texture);
|
||||
i32 GPU_CountHeight(GPU_ResourceHandle texture);
|
||||
i32 GPU_CountDepth(GPU_ResourceHandle texture);
|
||||
|
||||
#define GPU_CountBuffer(buffer, type) GPU_CountBufferSize(buffer) / sizeof(type)
|
||||
|
||||
@ -572,10 +575,10 @@ u64 GPU_Count3D(GPU_ResourceHandle texture3d);
|
||||
|
||||
//- Command list
|
||||
|
||||
GPU_CommandListHandle GPU_PrepareCommandList(void);
|
||||
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, GPU_QueueKind queue, u64 fence_ops_count, GPU_FenceOp *fence_ops);
|
||||
GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue);
|
||||
void GPU_CommitCommandListEx(GPU_CommandListHandle cl, u64 fence_ops_count, GPU_FenceOp *fence_ops);
|
||||
|
||||
#define GPU_CommitCommandList(cl, queue) GPU_CommitCommandListEx((cl), (queue), 0, 0)
|
||||
#define GPU_CommitCommandList(cl) GPU_CommitCommandListEx((cl), 0, 0)
|
||||
|
||||
//- Arena
|
||||
|
||||
@ -605,42 +608,42 @@ void GPU_SetConstant_(GPU_CommandListHandle cl, i32 slot, void *src_32bit, u32 s
|
||||
|
||||
void GPU_BarrierEx(GPU_CommandListHandle cl, GPU_BarrierDesc desc);
|
||||
|
||||
#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
#define GPU_MemoryBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
})
|
||||
|
||||
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.is_global = 1, \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
#define GPU_MemoryLayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
.layout = _layout, \
|
||||
})
|
||||
|
||||
#define GPU_LayoutBarrier(_cl, _resource, _sync_prev, _access_prev, _sync_next, _access_next, _layout) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.resource = (_resource), \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
.layout = _layout, \
|
||||
#define GPU_GlobalMemoryBarrier(_cl, _sync_prev, _access_prev, _sync_next, _access_next) \
|
||||
GPU_BarrierEx((_cl), (GPU_BarrierDesc) { \
|
||||
.is_global = 1, \
|
||||
.sync_prev = _sync_prev, \
|
||||
.sync_next = _sync_next, \
|
||||
.access_prev = _access_prev, \
|
||||
.access_next = _access_next, \
|
||||
})
|
||||
|
||||
#define GPU_DumbMemoryBarrier(_cl, _resource) \
|
||||
GPU_MemoryBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||
#define GPU_DumbMemoryBarrier(cl, resource) \
|
||||
GPU_MemoryBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||
|
||||
#define GPU_DumbGlobalMemoryBarrier(_cl) \
|
||||
GPU_GlobalMemoryBarrier((_cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||
#define GPU_DumbMemoryLayoutBarrier(cl, resource, layout) \
|
||||
GPU_MemoryLayoutBarrier((cl), (resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (layout))
|
||||
|
||||
#define GPU_DumbLayoutBarrier(_cl, _resource, _layout) \
|
||||
GPU_LayoutBarrier((_cl), (_resource), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All, (_layout))
|
||||
#define GPU_DumbGlobalMemoryBarrier(cl) \
|
||||
GPU_GlobalMemoryBarrier((cl), GPU_Stage_All, GPU_Access_All, GPU_Stage_All, GPU_Access_All)
|
||||
|
||||
//- Compute
|
||||
|
||||
@ -648,12 +651,29 @@ void GPU_Compute(GPU_CommandListHandle cl, ComputeShader cs, Vec3I32 groups);
|
||||
|
||||
//- Rasterize
|
||||
|
||||
void GPU_Rasterize(GPU_CommandListHandle cl,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
||||
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode);
|
||||
void GPU_RasterizeEx(GPU_CommandListHandle cl,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
||||
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode);
|
||||
|
||||
#define GPU_Rasterize(cl, vs, ps, instances_count, index_buffer, render_target, mode) \
|
||||
GPU_RasterizeEx( \
|
||||
(cl), \
|
||||
(vs), (ps), \
|
||||
(instances_count), (index_buffer), \
|
||||
1, &(render_target), \
|
||||
RNG3( \
|
||||
VEC3(0, 0, 0), \
|
||||
VEC3(GPU_CountWidth(render_target), GPU_CountHeight(render_target), 1) \
|
||||
), \
|
||||
RNG2( \
|
||||
VEC2(0, 0), \
|
||||
Vec2FromVec(GPU_Count2D(render_target)) \
|
||||
), \
|
||||
(mode) \
|
||||
)
|
||||
|
||||
//- Clear
|
||||
|
||||
@ -663,6 +683,15 @@ void GPU_ClearRenderTarget(GPU_CommandListHandle cl, GPU_ResourceHandle render_t
|
||||
|
||||
void GPU_ProfN(GPU_CommandListHandle cl, String name);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Synchronization
|
||||
|
||||
/* `waiter_queue` will block until `completion_queue` completes all submitted commands */
|
||||
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue);
|
||||
|
||||
/* All queues will block until `completion_queue` completes all submitted commands */
|
||||
void GPU_SyncAllQueues(GPU_QueueKind completion_queue);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookdecl Statistics
|
||||
|
||||
|
||||
@ -62,7 +62,7 @@ void GPU_Startup(void)
|
||||
{
|
||||
__profn("Create device");
|
||||
IDXGIAdapter3 *adapter = 0;
|
||||
ID3D12Device *device = 0;
|
||||
ID3D12Device10 *device = 0;
|
||||
String error = Lit("Could not initialize GPU device.");
|
||||
String first_gpu_name = ZI;
|
||||
u32 adapter_index = 0;
|
||||
@ -86,7 +86,7 @@ void GPU_Startup(void)
|
||||
* - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3
|
||||
* - EnhancedBarriersSupported == 1
|
||||
*/
|
||||
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device, (void **)&device);
|
||||
hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device);
|
||||
}
|
||||
if (SUCCEEDED(hr) && !skip)
|
||||
{
|
||||
@ -769,7 +769,7 @@ GPU_Resource *GPU_AcquireResource(GPU_ResourceDesc desc)
|
||||
if (desc.kind == GPU_ResourceKind_Buffer)
|
||||
{
|
||||
desc.buffer.stride = MaxU32(desc.buffer.stride, 1);
|
||||
buffer_size = MaxU64(AlignU64Pow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
|
||||
buffer_size = MaxU64(AlignU64ToNextPow2(desc.buffer.count * desc.buffer.stride), Kibi(64));
|
||||
}
|
||||
|
||||
u64 reuse_hash = GPU_D12_ReuseHashFromResourceDesc(desc, buffer_size);
|
||||
@ -1137,9 +1137,13 @@ GPU_ArenaHandle GPU_AcquireArena(void)
|
||||
GPU_D12_Arena *gpu_arena = 0;
|
||||
{
|
||||
Arena *perm = PermArena();
|
||||
PushAlign(perm, CachelineSize);
|
||||
gpu_arena = PushStruct(perm, GPU_D12_Arena);
|
||||
PushAlign(perm, CachelineSize);
|
||||
}
|
||||
return (GPU_ArenaHandle) { .v = (u64)gpu_arena };
|
||||
gpu_arena->arena = AcquireArena(Gibi(1));
|
||||
|
||||
return GPU_D12_HandleFromPointer(GPU_ArenaHandle, gpu_arena);
|
||||
}
|
||||
|
||||
void GPU_ReleaseArena(GPU_ArenaHandle arena)
|
||||
@ -1238,19 +1242,109 @@ GPU_D12_Descriptor *GPU_D12_DescriptorFromIndex(GPU_D12_DescriptorHeapKind heap_
|
||||
GPU_ResourceHandle GPU_PushBufferEx(GPU_ArenaHandle arena, GPU_BufferDesc desc)
|
||||
{
|
||||
/* TODO */
|
||||
return (GPU_ResourceHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
|
||||
}
|
||||
|
||||
GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena, GPU_TextureDesc desc)
|
||||
GPU_ResourceHandle GPU_PushTextureEx(GPU_ArenaHandle arena_handle, GPU_TextureDesc desc)
|
||||
{
|
||||
/* TODO */
|
||||
return (GPU_ResourceHandle) { 0 };
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
||||
D3D12_BARRIER_LAYOUT initial_layout = GPU_D12_BarrierLayoutFromLayout(desc.initial_layout);
|
||||
|
||||
/* Create resource heap */
|
||||
if (!gpu_arena->d3d_resource_heap)
|
||||
{
|
||||
/* FIXME: Dynamic size */
|
||||
D3D12_HEAP_DESC d3d_desc = ZI;
|
||||
d3d_desc.SizeInBytes = Mebi(64);
|
||||
d3d_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ALL_BUFFERS_AND_TEXTURES; /* TODO: Remove this and support tier 1 resource heaps */
|
||||
d3d_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
|
||||
ID3D12Heap *heap = 0;
|
||||
HRESULT hr = ID3D12Device_CreateHeap(g->device, &d3d_desc, &IID_ID3D12Heap, (void **)&heap);
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
/* TODO: Don't panic */
|
||||
Panic(Lit("Failed to create D3D12 resource heap"));
|
||||
}
|
||||
|
||||
gpu_arena->d3d_resource_heap = heap;
|
||||
gpu_arena->heap_size = d3d_desc.SizeInBytes;
|
||||
}
|
||||
|
||||
ID3D12Resource *d3d_resource = 0;
|
||||
{
|
||||
D3D12_RESOURCE_DESC1 d3d_desc = ZI;
|
||||
d3d_desc.Dimension = desc.kind == GPU_TextureKind_1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D :
|
||||
GPU_TextureKind_2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D :
|
||||
D3D12_RESOURCE_DIMENSION_TEXTURE3D;
|
||||
d3d_desc.Width = MaxI32(desc.dims.x, 1);
|
||||
d3d_desc.Height = MaxI32(desc.dims.y, 1);
|
||||
d3d_desc.DepthOrArraySize = MaxI32(desc.dims.z, 1);
|
||||
d3d_desc.MipLevels = MaxI32(desc.mip_levels, 1);
|
||||
d3d_desc.Format = GPU_D12_DxgiFormatFromGpuFormat(desc.format);
|
||||
d3d_desc.SampleDesc.Count = 1;
|
||||
d3d_desc.SampleDesc.Quality = 0;
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(desc.flags, GPU_ResourceFlag_AllowShaderReadWrite);
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(desc.flags, GPU_ResourceFlag_AllowRenderTarget);
|
||||
d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(desc.flags, GPU_ResourceFlag_AllowDepthStencil);
|
||||
|
||||
u64 alloc_size = 0;
|
||||
u64 alloc_align = 0;
|
||||
{
|
||||
D3D12_RESOURCE_ALLOCATION_INFO alloc_info = ZI;
|
||||
ID3D12Device_GetResourceAllocationInfo(g->device, &alloc_info, 0, 1, (D3D12_RESOURCE_DESC *)&d3d_desc);
|
||||
alloc_size = alloc_info.SizeInBytes;
|
||||
alloc_align = alloc_info.Alignment;
|
||||
}
|
||||
|
||||
u64 alloc_pos = gpu_arena->heap_pos;
|
||||
alloc_pos = AlignU64(alloc_pos, alloc_align);
|
||||
gpu_arena->heap_pos = alloc_pos + alloc_size;
|
||||
|
||||
if (alloc_pos + alloc_size > gpu_arena->heap_size)
|
||||
{
|
||||
Panic(Lit("Gpu arena overflow"));
|
||||
}
|
||||
|
||||
D3D12_CLEAR_VALUE clear_value = {
|
||||
.Color[0] = desc.clear_color.x,
|
||||
.Color[1] = desc.clear_color.y,
|
||||
.Color[2] = desc.clear_color.z,
|
||||
.Color[3] = desc.clear_color.w,
|
||||
.Format = d3d_desc.Format
|
||||
};
|
||||
|
||||
HRESULT hr = ID3D12Device10_CreatePlacedResource2(g->device,
|
||||
gpu_arena->d3d_resource_heap,
|
||||
alloc_pos,
|
||||
&d3d_desc,
|
||||
initial_layout,
|
||||
(d3d_desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET) ? &clear_value : 0,
|
||||
0,
|
||||
0,
|
||||
&IID_ID3D12Resource,
|
||||
(void **)&d3d_resource);
|
||||
}
|
||||
|
||||
GPU_D12_Resource *resource = PushStruct(gpu_arena->arena, GPU_D12_Resource);
|
||||
resource->d3d_resource = d3d_resource;
|
||||
resource->uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
||||
resource->flags = desc.flags;
|
||||
|
||||
resource->is_texture = 1;
|
||||
resource->texture_format = desc.format;
|
||||
resource->texture_dims = desc.dims;
|
||||
resource->texture_mip_levels = desc.mip_levels;
|
||||
resource->texture_layout = initial_layout;
|
||||
|
||||
return GPU_D12_HandleFromPointer(GPU_ResourceHandle, resource);
|
||||
}
|
||||
|
||||
GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc)
|
||||
{
|
||||
/* TODO */
|
||||
return (GPU_ResourceHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(GPU_ResourceHandle, 0);
|
||||
}
|
||||
|
||||
b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
|
||||
@ -1263,81 +1357,103 @@ b32 GPU_IsResourceNil(GPU_ResourceHandle handle)
|
||||
StructuredBufferHandle GPU_PushStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
|
||||
{
|
||||
/* TODO */
|
||||
return (StructuredBufferHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(StructuredBufferHandle, 0);
|
||||
}
|
||||
|
||||
RWStructuredBufferHandle GPU_PushRWStructuredBufferHandleEx(GPU_ArenaHandle arena, GPU_ResourceHandle resource, u32 element_size, RngU32 element_range)
|
||||
{
|
||||
/* TODO */
|
||||
return (RWStructuredBufferHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(RWStructuredBufferHandle, 0);
|
||||
}
|
||||
|
||||
Texture1DHandle GPU_PushTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (Texture1DHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(Texture1DHandle, 0);
|
||||
}
|
||||
|
||||
RWTexture1DHandle GPU_PushRWTexture1DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (RWTexture1DHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(RWTexture1DHandle, 0);
|
||||
}
|
||||
|
||||
Texture2DHandle GPU_PushTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (Texture2DHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(Texture2DHandle, 0);
|
||||
}
|
||||
|
||||
RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
RWTexture2DHandle GPU_PushRWTexture2DHandle(GPU_ArenaHandle arena_handle, GPU_ResourceHandle resource_handle)
|
||||
{
|
||||
/* TODO */
|
||||
return (RWTexture2DHandle) { 0 };
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(resource_handle);
|
||||
GPU_D12_Descriptor *descriptor = GPU_D12_PushDescriptor(gpu_arena, GPU_D12_DescriptorHeapKind_CbvSrvUav);
|
||||
ID3D12Device_CreateUnorderedAccessView(g->device, resource->d3d_resource, 0, 0, descriptor->handle);
|
||||
return GPU_D12_HandleFromPointer(RWTexture2DHandle, descriptor->index);
|
||||
}
|
||||
|
||||
Texture3DHandle GPU_PushTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (Texture3DHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(Texture3DHandle, 0);
|
||||
}
|
||||
|
||||
RWTexture3DHandle GPU_PushRWTexture3DHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (RWTexture3DHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(RWTexture3DHandle, 0);
|
||||
}
|
||||
|
||||
SamplerStateHandle GPU_PushSamplerStateHandle(GPU_ArenaHandle arena, GPU_ResourceHandle resource)
|
||||
{
|
||||
/* TODO */
|
||||
return (SamplerStateHandle) { 0 };
|
||||
return GPU_D12_HandleFromPointer(SamplerStateHandle, 0);
|
||||
}
|
||||
|
||||
//- Count
|
||||
|
||||
u64 GPU_CountBufferBytes(GPU_ResourceHandle buffer)
|
||||
{
|
||||
/* TODO */
|
||||
return 0;
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(buffer);
|
||||
return resource->buffer_size;
|
||||
}
|
||||
|
||||
u64 GPU_Count1D(GPU_ResourceHandle texture1d)
|
||||
i32 GPU_Count1D(GPU_ResourceHandle texture)
|
||||
{
|
||||
/* TODO */
|
||||
return 0;
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return resource->texture_dims.x;
|
||||
}
|
||||
|
||||
u64 GPU_Count2D(GPU_ResourceHandle texture2d)
|
||||
Vec2I32 GPU_Count2D(GPU_ResourceHandle texture)
|
||||
{
|
||||
/* TODO */
|
||||
return 0;
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return VEC2I32(resource->texture_dims.x, resource->texture_dims.y);
|
||||
}
|
||||
|
||||
u64 GPU_Count3D(GPU_ResourceHandle texture3d)
|
||||
Vec3I32 GPU_Count3D(GPU_ResourceHandle texture)
|
||||
{
|
||||
/* TODO */
|
||||
return 0;
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return resource->texture_dims;
|
||||
}
|
||||
|
||||
i32 GPU_CountWidth(GPU_ResourceHandle texture)
|
||||
{
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return resource->texture_dims.x;
|
||||
}
|
||||
|
||||
i32 GPU_CountHeight(GPU_ResourceHandle texture)
|
||||
{
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return resource->texture_dims.y;
|
||||
}
|
||||
|
||||
i32 GPU_CountDepth(GPU_ResourceHandle texture)
|
||||
{
|
||||
GPU_D12_Resource *resource = GPU_D12_ResourceFromHandle(texture);
|
||||
return resource->texture_dims.z;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -1399,12 +1515,210 @@ GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v)
|
||||
return cmd;
|
||||
}
|
||||
|
||||
GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size)
|
||||
{
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_QueueKind queue_kind = cl->queue_kind;
|
||||
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
||||
GPU_D12_StagingRegionNode *result = 0;
|
||||
|
||||
Lock lock = LockE(&queue->staging_mutex);
|
||||
{
|
||||
GPU_D12_StagingHeap *heap = queue->staging_heap;
|
||||
i64 completed = ID3D12Fence_GetCompletedValue(queue->commit_fence);
|
||||
|
||||
/* Find first completed region with matching size.
|
||||
* For each region in heap:
|
||||
* - If region size > size, split off a smaller region & use it
|
||||
*
|
||||
* - If region size < size, try to merge with next completed region
|
||||
*
|
||||
* - If no available completed region with eligible size, queue the
|
||||
* current heap for deletion & create a new heap
|
||||
* with larger size
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/* FIXME: Region completion target should be atomic, and initialized to
|
||||
* u64/i64 max until cl submission actually sets value */
|
||||
|
||||
/* Find region with large enough size */
|
||||
GPU_D12_StagingRegionNode *match = 0;
|
||||
if (heap && heap->size >= size)
|
||||
{
|
||||
GPU_D12_StagingRegionNode *r = heap->head_region_node;
|
||||
for (;;)
|
||||
{
|
||||
b32 is_completed = completed >= Atomic64Fetch(&r->completion_target);
|
||||
if (is_completed)
|
||||
{
|
||||
GPU_D12_StagingRegionNode *next = r->next;
|
||||
u64 region_size = 0;
|
||||
if (next->pos > r->pos)
|
||||
{
|
||||
region_size = next->pos - r->pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
region_size = heap->size - r->pos;
|
||||
}
|
||||
|
||||
if (region_size < size)
|
||||
{
|
||||
GPU_D12_StagingRegionNode *prev = r->prev;
|
||||
b32 prev_is_completed = completed >= Atomic64Fetch(&prev->completion_target);
|
||||
if (prev_is_completed && prev->pos < r->pos)
|
||||
{
|
||||
/* Merge with previous region & retry */
|
||||
prev->next = next;
|
||||
SllStackPush(heap->first_free_region_node, r);
|
||||
r = prev;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Continue to next region */
|
||||
r = next;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Found matching region */
|
||||
match = r;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No large-enough completed region found */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Create new heap if no match found */
|
||||
if (!match)
|
||||
{
|
||||
/* Queue old heap for deletion */
|
||||
u64 new_heap_size = MaxU64(AlignU64ToNextPow2(size), Kibi(64));
|
||||
if (heap)
|
||||
{
|
||||
/* FIXME: Queue for deletion here */
|
||||
new_heap_size = MaxU64(new_heap_size, heap->size * 2);
|
||||
heap = 0;
|
||||
}
|
||||
|
||||
/* Create new heap */
|
||||
{
|
||||
Arena *arena = AcquireArena(Gibi(1));
|
||||
heap = PushStruct(arena, GPU_D12_StagingHeap);
|
||||
heap->arena = arena;
|
||||
heap->size = new_heap_size;
|
||||
|
||||
/* Create backing upload heap resource */
|
||||
ID3D12Resource *d3d_resource = 0;
|
||||
{
|
||||
D3D12_RESOURCE_DESC d3d_desc = ZI;
|
||||
d3d_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
d3d_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
d3d_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
d3d_desc.Alignment = 0;
|
||||
d3d_desc.Width = new_heap_size;
|
||||
d3d_desc.Height = 1;
|
||||
d3d_desc.DepthOrArraySize = 1;
|
||||
d3d_desc.MipLevels = 1;
|
||||
d3d_desc.SampleDesc.Count = 1;
|
||||
d3d_desc.SampleDesc.Quality = 0;
|
||||
|
||||
D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD };
|
||||
HRESULT hr = ID3D12Device_CreateCommittedResource(g->device,
|
||||
&heap_props,
|
||||
D3D12_HEAP_FLAG_CREATE_NOT_ZEROED,
|
||||
&d3d_desc,
|
||||
D3D12_RESOURCE_STATE_COMMON,
|
||||
0,
|
||||
&IID_ID3D12Resource,
|
||||
(void **)&d3d_resource);
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
/* TODO: Don't panic */
|
||||
Panic(Lit("Failed to create upload heap"));
|
||||
}
|
||||
}
|
||||
heap->resource.d3d_resource = d3d_resource;
|
||||
heap->resource.uid = Atomic64FetchAdd(&g->resource_creation_gen.v, 1) + 1;
|
||||
heap->resource.buffer_size = new_heap_size;
|
||||
heap->resource.buffer_gpu_address = ID3D12Resource_GetGPUVirtualAddress(d3d_resource);
|
||||
|
||||
/* Map */
|
||||
{
|
||||
D3D12_RANGE read_range = ZI;
|
||||
HRESULT hr = ID3D12Resource_Map(d3d_resource, 0, &read_range, &heap->mapped);
|
||||
if (!SUCCEEDED(hr))
|
||||
{
|
||||
/* TODO: Don't panic */
|
||||
Panic(Lit("Failed to map upload heap"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Create initial region */
|
||||
match = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
|
||||
match->heap = heap;
|
||||
match->next = match;
|
||||
match->prev = match;
|
||||
heap->head_region_node = match;
|
||||
}
|
||||
|
||||
/* Split extra region space */
|
||||
{
|
||||
GPU_D12_StagingRegionNode *next = match->next;
|
||||
u64 region_size = 0;
|
||||
if (next->pos > match->pos)
|
||||
{
|
||||
region_size = next->pos - match->pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
region_size = heap->size - match->pos;
|
||||
}
|
||||
|
||||
if (region_size > size)
|
||||
{
|
||||
GPU_D12_StagingRegionNode *new_next = heap->first_free_region_node;
|
||||
if (new_next)
|
||||
{
|
||||
SllStackPop(heap->first_free_region_node);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_next = PushStruct(heap->arena, GPU_D12_StagingRegionNode);
|
||||
}
|
||||
new_next->next = next;
|
||||
new_next->prev = match;
|
||||
next->prev = new_next;
|
||||
match->next = new_next;
|
||||
|
||||
new_next->heap = heap;
|
||||
new_next->pos = match->pos + size;
|
||||
}
|
||||
}
|
||||
|
||||
Atomic64Set(&match->completion_target, I64Max);
|
||||
result = match;
|
||||
}
|
||||
Unlock(&lock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Command
|
||||
|
||||
//- Command list
|
||||
|
||||
GPU_CommandListHandle GPU_PrepareCommandList(void)
|
||||
GPU_CommandListHandle GPU_PrepareCommandList(GPU_QueueKind queue)
|
||||
{
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_D12_CmdList *cl = 0;
|
||||
@ -1423,13 +1737,16 @@ GPU_CommandListHandle GPU_PrepareCommandList(void)
|
||||
}
|
||||
}
|
||||
Unlock(&lock);
|
||||
return (GPU_CommandListHandle) { .v = (u64)cl };
|
||||
cl->queue_kind = queue;
|
||||
|
||||
return GPU_D12_HandleFromPointer(GPU_CommandListHandle, cl);
|
||||
}
|
||||
|
||||
void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_kind, u64 fence_ops_count, GPU_FenceOp *fence_ops)
|
||||
void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, u64 fence_ops_count, GPU_FenceOp *fence_ops)
|
||||
{
|
||||
GPU_D12_SharedState *g = &GPU_D12_shared_state;
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
GPU_QueueKind queue_kind = cl->queue_kind;
|
||||
GPU_D12_Queue *queue = GPU_D12_QueueFromKind(queue_kind);
|
||||
TempArena scratch = BeginScratchNoConflict();
|
||||
|
||||
@ -1849,8 +2166,9 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
u32 indices_count = 0;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv = ZI;
|
||||
{
|
||||
GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
|
||||
if (desc.index_count > 0)
|
||||
{
|
||||
GPU_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc;
|
||||
GPU_D12_Resource *index_buffer_resource = GPU_D12_ResourceFromHandle(desc.resource);
|
||||
ibv.BufferLocation = index_buffer_resource->buffer_gpu_address;
|
||||
ibv.SizeInBytes = desc.index_size * desc.index_count;
|
||||
@ -2057,16 +2375,27 @@ void GPU_CommitCommandListEx(GPU_CommandListHandle cl_handle, GPU_QueueKind queu
|
||||
|
||||
//- Arena
|
||||
|
||||
void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena)
|
||||
void GPU_ResetArena(GPU_CommandListHandle cl_handle, GPU_ArenaHandle arena_handle)
|
||||
{
|
||||
GPU_D12_Arena *gpu_arena = GPU_D12_ArenaFromHandle(arena_handle);
|
||||
|
||||
/* TODO */
|
||||
|
||||
/* FIXME: Move descriptors into committed lists */
|
||||
|
||||
/* FIXME: Release id3d12 resource com object references */
|
||||
gpu_arena->heap_pos = 0;
|
||||
}
|
||||
|
||||
//- Cpu -> Gpu copy
|
||||
|
||||
void GPU_CopyCpuBytes(GPU_CommandListHandle cl, GPU_ResourceHandle dst, u64 dst_offset, void *src, RngU64 src_copy_range)
|
||||
void GPU_CopyCpuBytes(GPU_CommandListHandle cl_handle, GPU_ResourceHandle dst_handle, u64 dst_offset, void *src, RngU64 src_copy_range)
|
||||
{
|
||||
/* TODO */
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
u64 size = src_copy_range.max - src_copy_range.min;
|
||||
GPU_D12_StagingRegionNode *region = GPU_D12_PushStagingRegion(cl, size);
|
||||
CopyBytes((u8 *)region->heap->mapped + region->pos, (u8 *)src + src_copy_range.min, size);
|
||||
GPU_CopyBytes(cl_handle, dst_handle, dst_offset, GPU_D12_HandleFromPointer(GPU_ResourceHandle, ®ion->heap->resource), RNGU64(region->pos, region->pos + size));
|
||||
}
|
||||
|
||||
void GPU_CopyCpuTexels(GPU_CommandListHandle cl, GPU_ResourceHandle dst_handle, Vec3I32 dst_offset, void *cpu_src, Vec3I32 cpu_src_dims, Rng3I32 cpu_src_copy_range)
|
||||
@ -2180,12 +2509,12 @@ void GPU_Compute(GPU_CommandListHandle cl_handle, ComputeShader cs, Vec3I32 grou
|
||||
|
||||
//- Rasterize
|
||||
|
||||
void GPU_Rasterize(GPU_CommandListHandle cl_handle,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
||||
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode)
|
||||
void GPU_RasterizeEx(GPU_CommandListHandle cl_handle,
|
||||
VertexShader vs, PixelShader ps,
|
||||
u32 instances_count, GPU_IndexBufferDesc index_buffer,
|
||||
u32 render_targets_count, GPU_ResourceHandle *render_targets,
|
||||
Rng3 viewport, Rng2 scissor,
|
||||
GPU_RasterMode mode)
|
||||
{
|
||||
GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle);
|
||||
GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl);
|
||||
@ -2221,6 +2550,19 @@ void GPU_ProfN(GPU_CommandListHandle cl, String name)
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Synchronization
|
||||
|
||||
void GPU_SyncQueue(GPU_QueueKind completion_queue, GPU_QueueKind waiter_queue)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
void GPU_SyncAllQueues(GPU_QueueKind completion_queue)
|
||||
{
|
||||
/* TODO */
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ @hookimpl Map hooks
|
||||
|
||||
@ -2315,7 +2657,7 @@ GPU_SwapchainHandle GPU_AcquireSwapchain(WND_Handle window)
|
||||
swapchain = PushStruct(perm, GPU_D12_Swapchain);
|
||||
}
|
||||
swapchain->window_hwnd = (HWND)WND_OsHandleFromWindow(window);
|
||||
return (GPU_SwapchainHandle) { .v = (u64)swapchain };
|
||||
return GPU_D12_HandleFromPointer(GPU_SwapchainHandle, swapchain);
|
||||
}
|
||||
|
||||
void GPU_ReleaseSwapchain(GPU_SwapchainHandle swapchain_handle)
|
||||
@ -2483,7 +2825,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G
|
||||
cur_backbuffer = &swapchain->backbuffers[backbuffer_idx];
|
||||
}
|
||||
|
||||
return (GPU_ResourceHandle) { .v = (u64)cur_backbuffer };
|
||||
return GPU_D12_HandleFromPointer(GPU_ResourceHandle, cur_backbuffer);
|
||||
}
|
||||
|
||||
void GPU_CommitBackbuffer(GPU_ResourceHandle backbuffer_handle, i32 vsync)
|
||||
|
||||
@ -104,7 +104,18 @@ Struct(GPU_D12_DescriptorList)
|
||||
|
||||
Struct(GPU_D12_Arena)
|
||||
{
|
||||
Arena *arena;
|
||||
GPU_D12_DescriptorList committed_descriptors_by_heap_and_queue[GPU_D12_DescriptorHeapKind_Count][GPU_NumQueues];
|
||||
|
||||
/* TODO:
|
||||
* To support D3D12_RESOURCE_HEAP_TIER_1 devices, create separate heaps for:
|
||||
* - Buffers
|
||||
* - Non-render target & non-depth stencil textures
|
||||
* - Render target or depth stencil textures
|
||||
*/
|
||||
ID3D12Heap *d3d_resource_heap;
|
||||
u64 heap_pos;
|
||||
u64 heap_size;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -132,6 +143,35 @@ Struct(GPU_D12_Resource)
|
||||
struct GPU_D12_Swapchain *swapchain;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Staging types
|
||||
|
||||
Struct(GPU_D12_StagingHeap)
|
||||
{
|
||||
Arena *arena;
|
||||
|
||||
GPU_D12_Resource resource;
|
||||
void *mapped;
|
||||
u64 size;
|
||||
|
||||
struct GPU_D12_StagingRegionNode *head_region_node;
|
||||
struct GPU_D12_StagingRegionNode *first_free_region_node;
|
||||
|
||||
};
|
||||
|
||||
Struct(GPU_D12_StagingRegionNode)
|
||||
{
|
||||
GPU_D12_StagingHeap *heap;
|
||||
|
||||
/* Heap links (requires heap lock to read) */
|
||||
GPU_D12_StagingRegionNode *prev;
|
||||
GPU_D12_StagingRegionNode *next;
|
||||
|
||||
/* Region info */
|
||||
Atomic64 completion_target;
|
||||
u64 pos;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Command queue types
|
||||
|
||||
@ -149,9 +189,15 @@ Struct(GPU_D12_Queue)
|
||||
Mutex commit_mutex;
|
||||
ID3D12Fence *commit_fence;
|
||||
u64 commit_fence_target;
|
||||
|
||||
/* Raw command lists */
|
||||
struct GPU_D12_RawCommandList *first_committed_cl;
|
||||
struct GPU_D12_RawCommandList *last_committed_cl;
|
||||
|
||||
/* Staging heap */
|
||||
Mutex staging_mutex;
|
||||
GPU_D12_StagingHeap *staging_heap;
|
||||
|
||||
Fence sync_fence;
|
||||
};
|
||||
|
||||
@ -264,6 +310,7 @@ Struct(GPU_D12_CmdChunk)
|
||||
|
||||
Struct(GPU_D12_CmdList)
|
||||
{
|
||||
GPU_QueueKind queue_kind;
|
||||
GPU_D12_CmdList *next;
|
||||
|
||||
GPU_D12_CmdChunk *first_cmd_chunk;
|
||||
@ -332,12 +379,14 @@ Struct(GPU_D12_SharedState)
|
||||
/* Device */
|
||||
IDXGIFactory6 *factory;
|
||||
IDXGIAdapter3 *adapter;
|
||||
ID3D12Device *device;
|
||||
ID3D12Device10 *device;
|
||||
} extern GPU_D12_shared_state;
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Helpers
|
||||
|
||||
#define GPU_D12_HandleFromPointer(type, ptr) (type) { .v = (u64)(ptr) }
|
||||
|
||||
GPU_D12_Arena *GPU_D12_ArenaFromHandle(GPU_ArenaHandle handle);
|
||||
GPU_D12_CmdList *GPU_D12_CmdListFromHandle(GPU_CommandListHandle handle);
|
||||
GPU_D12_Resource *GPU_D12_ResourceFromHandle(GPU_ResourceHandle handle);
|
||||
@ -376,6 +425,7 @@ void GPU_D12_CommitRawCommandList(GPU_D12_RawCommandList *cl);
|
||||
|
||||
GPU_D12_Cmd *GPU_D12_PushCmd(GPU_D12_CmdList *cl);
|
||||
GPU_D12_Cmd *GPU_D12_PushConstCmd(GPU_D12_CmdList *cl, i32 slot, void *v);
|
||||
GPU_D12_StagingRegionNode *GPU_D12_PushStagingRegion(GPU_D12_CmdList *cl, u64 size);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Sync job
|
||||
|
||||
@ -33,9 +33,9 @@ JobImpl(V_VisWorker, _, __)
|
||||
Arena *frame_arena = AcquireArena(Gibi(64));
|
||||
Arena *perm = PermArena();
|
||||
|
||||
GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mibi(8), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mibi(32), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *frame_gpu_arena = GPU_AcquireArena(Mebi(8), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dverts_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
|
||||
GPU_Arena *dvert_idxs_gpu_arena = GPU_AcquireArena(Mebi(32), GPU_CpuAccessFlag_Writable);
|
||||
|
||||
//////////////////////////////
|
||||
//- State
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
JobDecl(PR_RunForever, EmptySig);
|
||||
JobImpl(PR_RunForever, _sig, _id)
|
||||
JobDecl(PT_RunForever, EmptySig);
|
||||
JobImpl(PT_RunForever, _sig, _id)
|
||||
{
|
||||
GPU_ArenaHandle gpu_frame_arena = GPU_AcquireArena();
|
||||
|
||||
@ -28,7 +28,7 @@ JobImpl(PR_RunForever, _sig, _id)
|
||||
|
||||
GPU_ResourceHandle backbuffer = GPU_PrepareBackbuffer(swapchain, GPU_Format_R16G16B16A16_Float, window_frame.draw_size);
|
||||
{
|
||||
GPU_CommandListHandle cl = GPU_PrepareCommandList();
|
||||
GPU_CommandListHandle cl = GPU_PrepareCommandList(GPU_QueueKind_Direct);
|
||||
{
|
||||
/* Push resources */
|
||||
Vec2I32 final_target_size = window_frame.draw_size;
|
||||
@ -36,43 +36,44 @@ JobImpl(PR_RunForever, _sig, _id)
|
||||
GPU_Format_R16G16B16A16_Float,
|
||||
final_target_size,
|
||||
GPU_Layout_DirectQueue_ShaderReadWrite,
|
||||
.flags = GPU_ResourceFlag_AllowShaderReadWrite | GPU_ResourceFlag_AllowRenderTarget);
|
||||
.flags = GPU_ResourceFlag_AllowShaderReadWrite);
|
||||
|
||||
/* Push resource handles */
|
||||
Texture2DHandle final_target_rhandle = GPU_PushTexture2DHandle(gpu_frame_arena, final_target);
|
||||
RWTexture2DHandle final_target_rwhandle = GPU_PushRWTexture2DHandle(gpu_frame_arena, final_target);
|
||||
|
||||
/* Prep test pass */
|
||||
{
|
||||
GPU_SetConstant(cl, PR_ShaderConst_TestTarget, final_target_rwhandle);
|
||||
GPU_SetConstant(cl, PR_ShaderConst_TestConst, 3.123);
|
||||
GPU_SetConstant(cl, PT_ShaderConst_TestTarget, final_target_rwhandle);
|
||||
GPU_SetConstant(cl, PT_ShaderConst_TestConst, 3.123);
|
||||
GPU_SetConstant(cl, PT_ShaderConst_BlitSampler, GPU_GetCommonPointSampler());
|
||||
GPU_SetConstant(cl, PT_ShaderConst_BlitSrc, final_target_rhandle);
|
||||
}
|
||||
|
||||
/* Test pass */
|
||||
{
|
||||
GPU_Compute(cl, PR_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
|
||||
GPU_Compute(cl, PT_TestCS, VEC3I32((final_target_size.x + 7) / 8, (final_target_size.y + 7) / 8, 1));
|
||||
}
|
||||
|
||||
GPU_DumbMemoryBarrier(cl, final_target);
|
||||
|
||||
/* Prep clear pass */
|
||||
/* Prep blit pass */
|
||||
{
|
||||
GPU_LayoutBarrier(cl, backbuffer,
|
||||
GPU_Stage_None, GPU_Access_None,
|
||||
GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
|
||||
GPU_Layout_DirectQueue_RenderTargetWrite);
|
||||
GPU_DumbMemoryLayoutBarrier(cl, final_target, GPU_Layout_DirectQueue_ShaderRead);
|
||||
GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_DirectQueue_RenderTargetWrite);
|
||||
}
|
||||
|
||||
/* Clear pass */
|
||||
/* Blit pass */
|
||||
{
|
||||
GPU_Rasterize(cl,
|
||||
PT_BlitVS, PT_BlitPS,
|
||||
1, GPU_GetCommonQuadIndices(),
|
||||
backbuffer, GPU_RasterMode_TriangleList);
|
||||
GPU_ClearRenderTarget(cl, backbuffer, VEC4(1, 0, 0, 1));
|
||||
}
|
||||
|
||||
/* Finalize backbuffer layout */
|
||||
{
|
||||
GPU_LayoutBarrier(cl, backbuffer,
|
||||
GPU_Stage_RenderTarget, GPU_Access_RenderTargetWrite,
|
||||
GPU_Stage_None, GPU_Access_None,
|
||||
GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
|
||||
GPU_DumbMemoryLayoutBarrier(cl, backbuffer, GPU_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present);
|
||||
}
|
||||
|
||||
/* Reset */
|
||||
@ -80,7 +81,7 @@ JobImpl(PR_RunForever, _sig, _id)
|
||||
GPU_ResetArena(cl, gpu_frame_arena);
|
||||
}
|
||||
}
|
||||
GPU_CommitCommandList(cl, GPU_QueueKind_Direct);
|
||||
GPU_CommitCommandList(cl);
|
||||
}
|
||||
GPU_CommitBackbuffer(backbuffer, VSYNC);
|
||||
}
|
||||
@ -88,8 +89,8 @@ JobImpl(PR_RunForever, _sig, _id)
|
||||
}
|
||||
}
|
||||
|
||||
void PR_Startup(void);
|
||||
void PR_Startup(void)
|
||||
void PT_Startup(void);
|
||||
void PT_Startup(void)
|
||||
{
|
||||
RunJob(PR_RunForever);
|
||||
RunJob(PT_RunForever);
|
||||
}
|
||||
|
||||
@ -13,7 +13,9 @@
|
||||
@IncludeGpu proto_shaders.gpu
|
||||
|
||||
//- Shaders
|
||||
@ComputeShader PR_TestCS
|
||||
@ComputeShader PT_TestCS
|
||||
@VertexShader PT_BlitVS
|
||||
@PixelShader PT_BlitPS
|
||||
|
||||
//- Startup
|
||||
@Startup PR_Startup
|
||||
@Startup PT_Startup
|
||||
|
||||
@ -6,11 +6,11 @@ Struct(TestStruct)
|
||||
i32 i;
|
||||
};
|
||||
|
||||
ComputeShader2D(PR_TestCS, 8, 8)
|
||||
ComputeShader2D(PT_TestCS, 8, 8)
|
||||
{
|
||||
StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PR_ShaderConst_TestBuff);
|
||||
StructuredBuffer<TestStruct> sb = StructuredBufferFromHandle<TestStruct>(PT_ShaderConst_TestBuff);
|
||||
|
||||
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PR_ShaderConst_TestTarget);
|
||||
RWTexture2D<Vec4> target_tex = RWTexture2DFromHandle<Vec4>(PT_ShaderConst_TestTarget);
|
||||
Vec2U32 target_tex_size = Count2D(target_tex);
|
||||
|
||||
Vec2I32 id = SV_DispatchThreadID;
|
||||
@ -19,3 +19,46 @@ ComputeShader2D(PR_TestCS, 8, 8)
|
||||
target_tex[id] = Vec4(0, 1, 0, 1);
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Blit shader
|
||||
|
||||
Struct(PT_BlitPSInput)
|
||||
{
|
||||
Semantic(Vec4, SV_Position);
|
||||
Semantic(Vec2, src_uv);
|
||||
};
|
||||
|
||||
Struct(PT_BlitPSOutput)
|
||||
{
|
||||
Semantic(Vec4, SV_Target0);
|
||||
};
|
||||
|
||||
//////////////////////////////
|
||||
//- Vertex shader
|
||||
|
||||
VertexShader(PT_BlitVS, PT_BlitPSInput)
|
||||
{
|
||||
Vec2 uv = RectUvFromVertexId(SV_VertexID);
|
||||
|
||||
PT_BlitPSInput result;
|
||||
result.SV_Position = Vec4(NdcFromUv(uv).xy, 0, 1);
|
||||
result.src_uv = uv;
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////
|
||||
//- Pixel shader
|
||||
|
||||
PixelShader(PT_BlitPS, PT_BlitPSOutput, PT_BlitPSInput input)
|
||||
{
|
||||
SamplerState sampler = SamplerStateFromHandle(PT_ShaderConst_BlitSampler);
|
||||
Texture2D<Vec4> tex = Texture2DFromHandle<Vec4>(PT_ShaderConst_BlitSrc);
|
||||
|
||||
Vec2 uv = input.src_uv;
|
||||
Vec4 result = tex.Sample(sampler, uv);
|
||||
|
||||
PT_BlitPSOutput output;
|
||||
output.SV_Target0 = result;
|
||||
return output;
|
||||
}
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
////////////////////////////////////////////////////////////
|
||||
//~ Constants
|
||||
|
||||
ShaderConstant(RWTexture2DHandle, PR_ShaderConst_TestTarget, 0);
|
||||
ShaderConstant(StructuredBufferHandle, PR_ShaderConst_TestBuff, 1);
|
||||
ShaderConstant(f32, PR_ShaderConst_TestConst, 2);
|
||||
/* Test shader */
|
||||
ShaderConstant(RWTexture2DHandle, PT_ShaderConst_TestTarget, 0);
|
||||
ShaderConstant(StructuredBufferHandle, PT_ShaderConst_TestBuff, 1);
|
||||
ShaderConstant(f32, PT_ShaderConst_TestConst, 2);
|
||||
|
||||
/* Blit shader */
|
||||
ShaderConstant(SamplerStateHandle, PT_ShaderConst_BlitSampler, 3);
|
||||
ShaderConstant(Texture2DHandle, PT_ShaderConst_BlitSrc, 4);
|
||||
|
||||
@ -84,7 +84,7 @@ JobImpl(SPR_LoadSheet, sig, _)
|
||||
|
||||
/* Init spans */
|
||||
sheet->spans_count = decoded.num_spans;
|
||||
sheet->span_bins_count = MaxU32(AlignU64Pow2(sheet->spans_count * 2), 1);
|
||||
sheet->span_bins_count = MaxU32(AlignU64ToNextPow2(sheet->spans_count * 2), 1);
|
||||
sheet->spans = PushStructs(perm, SPR_Span, sheet->spans_count);
|
||||
sheet->span_bins = PushStructs(perm, SPR_SpanBin, sheet->span_bins_count);
|
||||
{
|
||||
@ -107,7 +107,7 @@ JobImpl(SPR_LoadSheet, sig, _)
|
||||
|
||||
/* Init slice groups */
|
||||
sheet->slice_groups_count = decoded.num_slice_keys;
|
||||
sheet->slice_group_bins_count = MaxU32(AlignU64Pow2(sheet->slice_groups_count * 2), 1);
|
||||
sheet->slice_group_bins_count = MaxU32(AlignU64ToNextPow2(sheet->slice_groups_count * 2), 1);
|
||||
sheet->slice_groups = PushStructs(perm, SPR_SliceGroup, sheet->slice_groups_count);
|
||||
sheet->slice_group_bins = PushStructs(perm, SPR_SliceGroupBin, sheet->slice_group_bins_count);
|
||||
{
|
||||
|
||||
@ -698,8 +698,8 @@ i64 UI_EndFrame(UI_Frame frame)
|
||||
{
|
||||
g->eframe.layout_arena = AcquireArena(Gibi(64));
|
||||
g->eframe.tex_gpu_arena = GPU_AcquireTextureArena();
|
||||
g->eframe.frame_gpu_arena = GPU_AcquireArena(Mibi(16));
|
||||
g->eframe.drects_gpu_arena = GPU_AcquireArena(Mibi(16));
|
||||
g->eframe.frame_gpu_arena = GPU_AcquireArena(Mebi(16));
|
||||
g->eframe.drects_gpu_arena = GPU_AcquireArena(Mebi(16));
|
||||
}
|
||||
ResetArena(g->eframe.layout_arena);
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user