diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index da901ec1..5f80e04b 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -165,9 +165,10 @@ Enum(GPU_Format) Enum(GPU_AccessKind) { + GPU_AccessKind_None, + GPU_AccessKind_AnyRead, - GPU_AccessKind_AnyReadWrite, GPU_AccessKind_CopyRead, GPU_AccessKind_CopyWrite, @@ -205,7 +206,6 @@ Struct(GPU_BufferDesc) { u64 size; GPU_BufferFlag flags; - GPU_AccessKind initial_access; }; //////////////////////////////////////////////////////////// @@ -386,7 +386,6 @@ GPU_ResourceHandle GPU_PushSampler(GPU_ArenaHandle arena, GPU_SamplerDesc desc); #define GPU_PushBuffer(arena, type, count, ...) GPU_PushBufferEx((arena), \ (GPU_BufferDesc) { \ - .initial_access = GPU_AccessKind_AnyReadWrite, \ .size = sizeof(type) * (count), \ __VA_ARGS__ \ } \ diff --git a/src/gpu/gpu_dx12/gpu_dx12.c b/src/gpu/gpu_dx12/gpu_dx12.c index 0651ef27..0a89b397 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.c +++ b/src/gpu/gpu_dx12/gpu_dx12.c @@ -1413,18 +1413,17 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ D3D12_INDEX_BUFFER_VIEW bound_ibv = ZI; D3D12_CPU_DESCRIPTOR_HANDLE bound_raster_targets[GPU_MaxRasterTargets] = ZI; - u64 cmds_count = cl->cmds_count; - GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cmds_count); + /* Flatten command chunks */ + u64 cmds_count = 0; + GPU_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, GPU_D12_Cmd, cl->cmds_count); { /* Flatten command chunks */ { - u64 flattened_idx = 0; for (GPU_D12_CmdChunk *chunk = cl->first_cmd_chunk; chunk; chunk = chunk->next) { for (u64 cmd_chunk_idx = 0; cmd_chunk_idx < chunk->cmds_count; ++cmd_chunk_idx) { - cmds[flattened_idx] = chunk->cmds[cmd_chunk_idx]; - ++flattened_idx; + cmds[cmds_count++] = chunk->cmds[cmd_chunk_idx]; } } } @@ -1442,510 +1441,571 @@ void GPU_CommitCommandList(GPU_CommandListHandle cl_handle, GPU_QueueKind queue_ } } - /* Process gpu commands into dx12 commands */ + /* Determine skippable access cmds & access cmd before kinds based on resource */ { + Struct(ResourceLookupNode) + { + ResourceLookupNode *next_in_list; + ResourceLookupNode *next_in_bin; + + GPU_D12_Resource *resource; + GPU_D12_Cmd *last_access_cmd; + }; + + Struct(ResourceLookupBin) + { + ResourceLookupNode *first; + ResourceLookupNode *last; + }; + + u64 num_lookup_bins = MaxU64(64, AlignU64Pow2(cmds_count * 4)); + ResourceLookupNode *first_resource_node = 0; + ResourceLookupNode *last_resource_node = 0; + ResourceLookupBin *lookup_bins = PushStructs(scratch.arena, ResourceLookupBin, num_lookup_bins); + + GPU_D12_Cmd *final_access_cmd = 0; + u64 cmd_idx = 0; + u64 batch_gen = 0; while (cmd_idx < cmds_count) { GPU_D12_Cmd *cmd = &cmds[cmd_idx]; switch (cmd->kind) { - default: + /* Non-batch-interrupting cmds */ + case GPU_D12_CmdKind_Constant: { cmd_idx += 1; } break; - //- Resource barrier - // case GPU_D12_CmdKind_TransitionToSrv: - // case GPU_D12_CmdKind_TransitionToUav: - // case GPU_D12_CmdKind_TransitionToRtv: - // case GPU_D12_CmdKind_TransitionToCopySrc: - // case GPU_D12_CmdKind_TransitionToCopyDst: - // case GPU_D12_CmdKind_FlushUav: - // { - // u64 barrier_gen = 1 + Atomic64FetchAdd(&g->resource_barrier_gen.v, 1); - - // /* Build barriers batch list */ - // Struct(TmpBarrier) { TmpBarrier *next; GPU_D12_Resource *r; }; - // u32 max_barriers_count = 0; - // TmpBarrier *first_barrier = 0; - // TmpBarrier *last_barrier = 0; - // while (cmd && (cmd->kind == GPU_D12_CmdKind_TransitionToSrv - // || cmd->kind == GPU_D12_CmdKind_TransitionToUav - // || cmd->kind == GPU_D12_CmdKind_TransitionToRtv - // || cmd->kind == GPU_D12_CmdKind_TransitionToCopySrc - // || cmd->kind == GPU_D12_CmdKind_TransitionToCopyDst - // || cmd->kind == GPU_D12_CmdKind_FlushUav)) - // { - // D3D12_RESOURCE_BARRIER_TYPE type = ZI; - // D3D12_RESOURCE_STATES state_after = ZI; - // GPU_D12_Resource *resource = cmd->barrier.resource; - - // switch (cmd->kind) - // { - // default: break; - // case GPU_D12_CmdKind_TransitionToSrv: - // { - - // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // state_after = D3D12_RESOURCE_STATE_ALL_SHADER_RESOURCE; - // } break; - // case GPU_D12_CmdKind_TransitionToUav: - // { - // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // state_after = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; - // } break; - // case GPU_D12_CmdKind_TransitionToRtv: - // { - // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // state_after = D3D12_RESOURCE_STATE_RENDER_TARGET; - // i32 slot = cmd->barrier.rt_slot; - // if (slot >= 0 && slot < countof(slotted_raster_targets)) - // { - // slotted_raster_targets[slot] = resource; - // } - // } break; - // case GPU_D12_CmdKind_TransitionToCopySrc: - // { - // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // state_after = D3D12_RESOURCE_STATE_COPY_SOURCE; - // } break; - // case GPU_D12_CmdKind_TransitionToCopyDst: - // { - // type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // state_after = D3D12_RESOURCE_STATE_COPY_DEST; - // } break; - // case GPU_D12_CmdKind_FlushUav: - // { - // type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - // } break; - // } - - // b32 skip = 0; - // /* Skip UAV transitions on resources that already have transition in the batch */ - // if (type == D3D12_RESOURCE_BARRIER_TYPE_UAV && resource->barrier_gen == barrier_gen) - // { - // skip = 1; - // } - // /* Skip redundant transitions */ - // if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && ((resource->barrier_state_after & state_after) == state_after)) - // { - // skip = 1; - // } - // /* Skip transitions that will occur via implicit promotion */ - // if (type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && resource->state == D3D12_RESOURCE_STATE_COMMON && - // (state_after != D3D12_RESOURCE_STATE_RENDER_TARGET && - // state_after != D3D12_RESOURCE_STATE_DEPTH_WRITE && - // state_after != D3D12_RESOURCE_STATE_UNORDERED_ACCESS && - // state_after != D3D12_RESOURCE_STATE_RESOLVE_DEST && - // state_after != D3D12_RESOURCE_STATE_PRESENT)) - // { - // /* Skip transitions into existing state */ - // skip = 1; - // } - - // if (!skip) - // { - // resource->barrier_type = type; - // resource->barrier_state_after = state_after; - // if (resource->barrier_gen != barrier_gen) - // { - // TmpBarrier *b = PushStruct(scratch.arena, TmpBarrier); - // resource->barrier_gen = barrier_gen; - // b->r = resource; - // SllQueuePush(first_barrier, last_barrier, b); - // ++max_barriers_count; - // } - // } - - // cmd = cmd->next; - // } - - // /* Commit batched barriers */ - // /* FIXME: Transitions from UAV -> UAV should insert UAV barrier */ - // u32 barriers_count = 0; - // D3D12_RESOURCE_BARRIER *rbs = PushStructs(scratch.arena, D3D12_RESOURCE_BARRIER, max_barriers_count); - // for (TmpBarrier *b = first_barrier; b; b = b->next) - // { - // GPU_D12_Resource *resource = b->r; - // D3D12_RESOURCE_BARRIER_TYPE type = resource->barrier_type; - // D3D12_RESOURCE_STATES state_before = resource->state; - // D3D12_RESOURCE_STATES state_after = resource->barrier_state_after; - // if (!(type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION && state_before == state_after)) - // { - // D3D12_RESOURCE_BARRIER *rb = &rbs[barriers_count++]; - // rb->Type = resource->barrier_type; - // if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_TRANSITION) - // { - // rb->Transition.pResource = resource->d3d_resource; - // rb->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // rb->Transition.StateBefore = state_before; - // rb->Transition.StateAfter = state_after; - // resource->state = state_after; - // } - // else if (rb->Type == D3D12_RESOURCE_BARRIER_TYPE_UAV) - // { - // rb->UAV.pResource = resource->d3d_resource; - // } - // } - // } - // if (barriers_count > 0) - // { - // ID3D12GraphicsCommandList_ResourceBarrier(rcl, barriers_count, rbs); - // } - // } break; - - //- Copy resource - // case GPU_D12_CmdKind_Copy: - // { - // GPU_D12_Resource *dst = cmd->copy.dst; - // GPU_D12_Resource *src = cmd->copy.src; - - // D3D12_RESOURCE_DESC dst_desc = ZI; - // D3D12_RESOURCE_DESC src_desc = ZI; - // ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); - // ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); - - // if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - // { /* Copy buffer -> buffer */ - // u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; - // u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; - // u64 cpy_len = MinU64(dst_len, src_len); - // if (cpy_len > 0) - // { - // ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); - // /* Implicit promotion */ - // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; - // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; - // } - // } - // else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - // { /* Copy buffer -> texture */ - // D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; - // ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); - - // D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; - // dst_loc.pResource = dst->d3d_resource; - // dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; - // dst_loc.SubresourceIndex = 0; - - // D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; - // src_loc.pResource = src->d3d_resource; - // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; - // src_loc.PlacedFootprint = dst_placed_footprint; - - // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); - // /* Implicit promotion */ - // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; - // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; - // } - // else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) - // { /* Copy texture -> buffer */ - // /* TODO */ - // Assert(0); - // } - // else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) - // { /* Copy texture -> texture */ - // /* TODO */ - // Assert(0); - // } - - // cmd_idx += 1; - // } break; - - //- Access + /* Batch-interrupting cmds */ + case GPU_D12_CmdKind_Copy: + case GPU_D12_CmdKind_Compute: + case GPU_D12_CmdKind_Rasterize: + case GPU_D12_CmdKind_ClearRtv: + { + /* TODO: + * - Only interrupt batch if cmd actually runs + * - e.g. Rasterize with empty idx buffer will not actually run + * - For non-shader interruptions, only interrupt batches for explicitly bound resources + * - e.g. Copy should only interrupt batches for supplied resources + */ + cmd_idx += 1; + batch_gen += 1; + } break; case GPU_D12_CmdKind_Access: { - /* FIXME: Batch */ - - /* FIXME: Remove hardcode test */ - GPU_D12_Resource *resource = cmd->access.resource; - D3D12_TEXTURE_BARRIER d3d_barrier = ZI; - d3d_barrier.pResource = resource->d3d_resource; - d3d_barrier.Subresources.IndexOrFirstMipLevel = 0xffffffff; - - - switch (cmd->access.kind) + /* Lookup last resource command resource in current command list */ + ResourceLookupNode *lookup = 0; { - case GPU_AccessKind_RasterTarget: + u64 hash = RandU64FromSeed(resource->uid); + ResourceLookupBin *bin = &lookup_bins[hash % num_lookup_bins]; + lookup = bin->first; + for (; lookup && lookup->resource->uid != resource->uid;) { - d3d_barrier.SyncBefore = D3D12_BARRIER_SYNC_NONE; - d3d_barrier.SyncAfter = D3D12_BARRIER_SYNC_RENDER_TARGET; - d3d_barrier.AccessBefore = D3D12_BARRIER_ACCESS_NO_ACCESS; - d3d_barrier.AccessAfter = D3D12_BARRIER_ACCESS_RENDER_TARGET; - d3d_barrier.LayoutBefore = resource->texture_layout; - d3d_barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_RENDER_TARGET; - } break; - - case GPU_AccessKind_Present: + lookup = lookup->next_in_bin; + } + if (!lookup) { - d3d_barrier.SyncBefore = D3D12_BARRIER_SYNC_RENDER_TARGET; - d3d_barrier.SyncAfter = D3D12_BARRIER_SYNC_NONE; - d3d_barrier.AccessBefore = D3D12_BARRIER_ACCESS_RENDER_TARGET; - d3d_barrier.AccessAfter = D3D12_BARRIER_ACCESS_NO_ACCESS; - d3d_barrier.LayoutBefore = resource->texture_layout; - d3d_barrier.LayoutAfter = D3D12_BARRIER_LAYOUT_PRESENT; - } break; + lookup = PushStruct(scratch.arena, ResourceLookupNode); + lookup->resource = resource; + SllQueuePushN(bin->first, bin->last, lookup, next_in_bin); + SllQueuePushN(first_resource_node, last_resource_node, lookup, next_in_list); + } } - D3D12_BARRIER_GROUP d3d_barrier_group = ZI; - d3d_barrier_group.Type = D3D12_BARRIER_TYPE_TEXTURE; - d3d_barrier_group.NumBarriers = 1; - d3d_barrier_group.pTextureBarriers = &d3d_barrier; - - ID3D12GraphicsCommandList7_Barrier(rcl, 1, &d3d_barrier_group); - - resource->texture_layout = d3d_barrier.LayoutAfter; + /* Determine 'before' state from lookup */ + if (lookup->last_access_cmd) + { + GPU_D12_Cmd *last_cmd = lookup->last_access_cmd; + if (last_cmd->access.batch_gen != batch_gen) + { + /* Access is part of new batch */ + last_cmd->access.is_end_of_batch = 1; + cmd->access.before = last_cmd->access.after; + } + else + { + /* Last access cmd for this resource is in the same batch, merge them */ + cmd->access.before = last_cmd->access.before; + last_cmd->skip = 1; + } + } + lookup->last_access_cmd = cmd; + cmd->access.batch_gen = batch_gen; + final_access_cmd = cmd; cmd_idx += 1; } break; + } + } - //- Dispatch compute shader - case GPU_D12_CmdKind_Compute: + if (final_access_cmd) + { + final_access_cmd->access.is_end_of_batch = 1; + } + } + + /* Process gpu commands into dx12 commands */ + { + u64 batch_access_idx_start = 0; + u64 batch_access_idx_opl = 0; /* One past last */ + + u64 cmd_idx = 0; + while (cmd_idx < cmds_count) + { + GPU_D12_Cmd *cmd = &cmds[cmd_idx]; + if (cmd->skip) + { + cmd_idx += 1; + } + else + { + switch (cmd->kind) { - GPU_D12_Pipeline *pipeline = 0; + default: { - GPU_D12_PipelineDesc pipeline_desc = ZI; - pipeline_desc.cs = cmd->compute.cs; - pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); - } + cmd_idx += 1; + } break; - if (pipeline) + case GPU_D12_CmdKind_Access: { - /* Set descriptor heaps */ - if (!descriptor_heaps_set) + batch_access_idx_opl = cmd_idx + 1; + + /* Submit batched barriers */ + if (cmd->access.is_end_of_batch) { - ID3D12DescriptorHeap *heaps[] = { - g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); - descriptor_heaps_set = 1; + /* Build barriers */ + u64 buffer_barriers_count = 0; + u64 texture_barriers_count = 0; + u64 global_barriers_count = 0; + D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); + D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, (batch_access_idx_opl - batch_access_idx_start)); + for (u64 access_cmd_idx = batch_access_idx_start; access_cmd_idx < batch_access_idx_opl; ++access_cmd_idx) + { + GPU_D12_Cmd *access_cmd = &cmds[access_cmd_idx]; + if (access_cmd->kind == GPU_D12_CmdKind_Access && !access_cmd->skip) + { + GPU_D12_Resource *resource = access_cmd->access.resource; + D3D12_BARRIER_TYPE barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; + + /* Translate gpu access kind -> d3d barrier fields */ + D3D12_BARRIER_SYNC d3d_syncs[2] = ZI; + D3D12_BARRIER_ACCESS d3d_accesses[2] = ZI; + D3D12_BARRIER_LAYOUT d3d_layouts[2] = ZI; + for (u32 i = 0; i < 2; ++i) + { + GPU_AccessKind access_kind = i == 0 ? access_cmd->access.before : access_cmd->access.after; + + switch (access_kind) + { + case GPU_AccessKind_None: + { + d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE; + d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS; + d3d_layouts[i] = resource->texture_layout; + } break; + + case GPU_AccessKind_RasterTarget: + { + d3d_syncs[i] = D3D12_BARRIER_SYNC_RENDER_TARGET; + d3d_accesses[i] = D3D12_BARRIER_ACCESS_RENDER_TARGET; + d3d_layouts[i] = D3D12_BARRIER_LAYOUT_RENDER_TARGET; + } break; + + case GPU_AccessKind_Present: + { + d3d_syncs[i] = D3D12_BARRIER_SYNC_NONE; + d3d_accesses[i] = D3D12_BARRIER_ACCESS_NO_ACCESS; + d3d_layouts[i] = D3D12_BARRIER_LAYOUT_PRESENT; + } break; + } + } + + /* Build barrier */ + switch (barrier_type) + { + case D3D12_BARRIER_TYPE_BUFFER: + { + D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; + barrier->SyncBefore = d3d_syncs[0]; + barrier->SyncAfter = d3d_syncs[1]; + barrier->AccessBefore = d3d_accesses[0]; + barrier->AccessAfter = d3d_accesses[1]; + barrier->pResource = resource->d3d_resource; + } break; + + case D3D12_BARRIER_TYPE_TEXTURE: + { + D3D12_TEXTURE_BARRIER *barrier = &texture_barriers[texture_barriers_count++]; + barrier->SyncBefore = d3d_syncs[0]; + barrier->SyncAfter = d3d_syncs[1]; + barrier->AccessBefore = d3d_accesses[0]; + barrier->AccessAfter = d3d_accesses[1]; + barrier->LayoutBefore = d3d_layouts[0]; + barrier->LayoutAfter = d3d_layouts[1]; + barrier->pResource = resource->d3d_resource; + barrier->Subresources.IndexOrFirstMipLevel = 0xffffffff; + resource->texture_layout = d3d_layouts[1]; + } break; + + case D3D12_BARRIER_TYPE_GLOBAL: + { + D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; + barrier->SyncBefore = d3d_syncs[0]; + barrier->SyncAfter = d3d_syncs[1]; + barrier->AccessBefore = d3d_accesses[0]; + barrier->AccessAfter = d3d_accesses[1]; + } break; + } + } + } + + /* Dispatch barriers */ + { + u32 barrier_groups_count = 0; + D3D12_BARRIER_GROUP barrier_groups[3] = ZI; + if (buffer_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_BUFFER; + group->NumBarriers = buffer_barriers_count; + group->pBufferBarriers = buffer_barriers; + } + if (texture_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_TEXTURE; + group->NumBarriers = texture_barriers_count; + group->pTextureBarriers = texture_barriers; + } + if (global_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_GLOBAL; + group->NumBarriers = global_barriers_count; + group->pGlobalBarriers = global_barriers; + } + if (barrier_groups_count > 0) + { + ID3D12GraphicsCommandList7_Barrier(rcl, barrier_groups_count, barrier_groups); + } + } + + batch_access_idx_start = cmd_idx + 1; } - /* Bind rootsig */ - if (!compute_rootsig_set) - { - ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); - compute_rootsig_set = 1; - } + cmd_idx += 1; + } break; - /* Bind pipeline */ - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); - bound_pipeline = pipeline; - } + //- Copy resource - /* Dispatch */ - ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); - } + // case GPU_D12_CmdKind_Copy: + // { + // GPU_D12_Resource *dst = cmd->copy.dst; + // GPU_D12_Resource *src = cmd->copy.src; - cmd_idx += 1; - } break; + // D3D12_RESOURCE_DESC dst_desc = ZI; + // D3D12_RESOURCE_DESC src_desc = ZI; + // ID3D12Resource_GetDesc(dst->d3d_resource, &dst_desc); + // ID3D12Resource_GetDesc(src->d3d_resource, &src_desc); - //- Dispatch Vs/Ps shader - case GPU_D12_CmdKind_Rasterize: - { - GPU_D12_Pipeline *pipeline = 0; + // if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy buffer -> buffer */ + // u64 dst_len = dst->desc.buffer.count * dst->desc.buffer.stride; + // u64 src_len = src->desc.buffer.count * src->desc.buffer.stride; + // u64 cpy_len = MinU64(dst_len, src_len); + // if (cpy_len > 0) + // { + // ID3D12GraphicsCommandList_CopyBufferRegion(rcl, dst->d3d_resource, 0, src->d3d_resource, 0, cpy_len); + // /* Implicit promotion */ + // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; + // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + // } + // } + // else if (src_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy buffer -> texture */ + // D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_placed_footprint = ZI; + // ID3D12Device_GetCopyableFootprints(g->device, &dst_desc, 0, 1, 0, &dst_placed_footprint, 0, 0, 0); + + // D3D12_TEXTURE_COPY_LOCATION dst_loc = ZI; + // dst_loc.pResource = dst->d3d_resource; + // dst_loc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + // dst_loc.SubresourceIndex = 0; + + // D3D12_TEXTURE_COPY_LOCATION src_loc = ZI; + // src_loc.pResource = src->d3d_resource; + // src_loc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + // src_loc.PlacedFootprint = dst_placed_footprint; + + // ID3D12GraphicsCommandList_CopyTextureRegion(rcl, &dst_loc, 0, 0, 0, &src_loc, 0); + // /* Implicit promotion */ + // if (dst->state == D3D12_RESOURCE_STATE_COMMON) dst->state = D3D12_RESOURCE_STATE_COPY_DEST; + // if (src->state == D3D12_RESOURCE_STATE_COMMON) src->state = D3D12_RESOURCE_STATE_COPY_SOURCE; + // } + // else if (dst_desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy texture -> buffer */ + // /* TODO */ + // Assert(0); + // } + // else if (dst_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER && src_desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER) + // { /* Copy texture -> texture */ + // /* TODO */ + // Assert(0); + // } + + // cmd_idx += 1; + // } break; + + //- Dispatch compute shader + case GPU_D12_CmdKind_Compute: { - GPU_D12_PipelineDesc pipeline_desc = ZI; - pipeline_desc.vs = cmd->rasterize.vs; - pipeline_desc.ps = cmd->rasterize.ps; + GPU_D12_Pipeline *pipeline = 0; { - pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - switch (cmd->rasterize.mode) - { - default: Assert(0); break; - case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - } + GPU_D12_PipelineDesc pipeline_desc = ZI; + pipeline_desc.cs = cmd->compute.cs; + pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } - if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip) - { - pipeline_desc.is_wireframe = 1; - } - for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) - { - GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; - if (rtv_descriptor != 0) - { - pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format; - } - else - { - pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; - } - } - pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); - } - /* Calculate IBV count */ - u32 indices_count = 0; - D3D12_INDEX_BUFFER_VIEW ibv = cmd->rasterize.ibv; - if (ibv.Format == DXGI_FORMAT_R16_UINT) + if (pipeline) + { + /* Set descriptor heaps */ + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + /* Bind rootsig */ + if (!compute_rootsig_set) + { + ID3D12GraphicsCommandList_SetComputeRootSignature(rcl, g->bindless_rootsig); + compute_rootsig_set = 1; + } + + /* Bind pipeline */ + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); + bound_pipeline = pipeline; + } + + /* Dispatch */ + ID3D12GraphicsCommandList_Dispatch(rcl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); + } + + cmd_idx += 1; + } break; + + //- Dispatch Vs/Ps shader + + case GPU_D12_CmdKind_Rasterize: { - indices_count = ibv.SizeInBytes / 2; - } - else if (ibv.Format == DXGI_FORMAT_R32_UINT) - { - indices_count = ibv.SizeInBytes / 4; - } - - /* Prepare & dispatch */ - if (pipeline && indices_count > 0) - { - /* Set descriptor heaps */ - if (!descriptor_heaps_set) + GPU_D12_Pipeline *pipeline = 0; { - ID3D12DescriptorHeap *heaps[] = { - g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - /* Bind rootsig */ - if (!graphics_rootsig_set) - { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); - graphics_rootsig_set = 1; - } - - /* Bind pipeline */ - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); - bound_pipeline = pipeline; - } - - // /* Fill signature */ - // /* TODO: Only upload dirty */ - // { - // u32 sig_size = cmd->rasterize.sig_size; - // void *sig = cmd->rasterize.sig; - // u32 num32bit = sig_size / 4; - // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); - // } - - /* Set viewport */ - { - D3D12_VIEWPORT viewport = ZI; + GPU_D12_PipelineDesc pipeline_desc = ZI; + pipeline_desc.vs = cmd->rasterize.vs; + pipeline_desc.ps = cmd->rasterize.ps; { - Rng3 range = cmd->rasterize.viewport; - viewport.TopLeftX = range.p0.x; - viewport.TopLeftY = range.p0.y; - viewport.Width = range.p1.x - range.p0.x; - viewport.Height = range.p1.y - range.p0.y; - viewport.MinDepth = range.p0.z; - viewport.MaxDepth = range.p1.z; + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->rasterize.mode) + { + default: Assert(0); break; + case GPU_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case GPU_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case GPU_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case GPU_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } } - if (!MatchStruct(&viewport, &bound_viewport)) + if (cmd->rasterize.mode == GPU_RasterMode_WireTriangleList || cmd->rasterize.mode == GPU_RasterMode_WireTriangleStrip) { - bound_viewport = viewport; - ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); + pipeline_desc.is_wireframe = 1; } - } - - /* Set scissor */ - { - D3D12_RECT scissor = ZI; - { - Rng2 range = cmd->rasterize.scissor; - scissor.left = range.p0.x; - scissor.top = range.p0.y; - scissor.right = range.p1.x; - scissor.bottom = range.p1.y; - } - if (!MatchStruct(&scissor, &bound_scissor)) - { - bound_scissor = scissor; - ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); - } - } - - /* Set topology */ - { - D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - switch (cmd->rasterize.mode) - { - default: Assert(0); break; - case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - } - if (topology != bound_primitive_topology) - { - ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); - } - } - - /* Set index buffer */ - if (!MatchStruct(&ibv, &bound_ibv)) - { - ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); - bound_ibv = ibv; - } - - /* Bind render targets */ - { - b32 om_dirty = 0; - u32 rtvs_count = 0; - D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_raster_targets)] = ZI; for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) { - GPU_D12_Descriptor *rtv_desc = cmd->rasterize.rtv_descriptors[i]; - if (rtv_desc != 0) + GPU_D12_Descriptor *rtv_descriptor = cmd->rasterize.rtv_descriptors[i]; + if (rtv_descriptor != 0) { - om_dirty = om_dirty || (bound_raster_targets[i].ptr != rtv_desc->handle.ptr); - rtvs[rtvs_count++] = rtv_desc->handle; + pipeline_desc.render_target_formats[i] = rtv_descriptor->resource->texture_desc.format; } else { - break; + pipeline_desc.render_target_formats[i] = GPU_Format_Unknown; } } - if (om_dirty) - { - CopyStructs(bound_raster_targets, rtvs, rtvs_count); - ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, rtvs_count, rtvs, 0, 0); - } + pipeline = GPU_D12_PipelineFromDesc(pipeline_desc); } - /* Dispatch */ - ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - } + /* Calculate IBV count */ + u32 indices_count = 0; + D3D12_INDEX_BUFFER_VIEW ibv = cmd->rasterize.ibv; + if (ibv.Format == DXGI_FORMAT_R16_UINT) + { + indices_count = ibv.SizeInBytes / 2; + } + else if (ibv.Format == DXGI_FORMAT_R32_UINT) + { + indices_count = ibv.SizeInBytes / 4; + } - cmd_idx += 1; - } break; + /* Prepare & dispatch */ + if (pipeline && indices_count > 0) + { + /* Set descriptor heaps */ + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + g->descriptor_heaps[GPU_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + g->descriptor_heaps[GPU_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(rcl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } - //- Clear rtv - case GPU_D12_CmdKind_ClearRtv: - { - GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor; - GPU_D12_Resource *resource = descriptor->resource; - Assert(resource->texture_layout == D3D12_BARRIER_LAYOUT_RENDER_TARGET); - f32 clear_color[4] = ZI; - clear_color[0] = cmd->clear_rtv.color.x; - clear_color[1] = cmd->clear_rtv.color.y; - clear_color[2] = cmd->clear_rtv.color.z; - clear_color[3] = cmd->clear_rtv.color.w; - ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, descriptor->handle, clear_color, 0, 0); - cmd_idx += 1; - } break; + /* Bind rootsig */ + if (!graphics_rootsig_set) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(rcl, g->bindless_rootsig); + graphics_rootsig_set = 1; + } + + /* Bind pipeline */ + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(rcl, pipeline->pso); + bound_pipeline = pipeline; + } + + // /* Fill signature */ + // /* TODO: Only upload dirty */ + // { + // u32 sig_size = cmd->rasterize.sig_size; + // void *sig = cmd->rasterize.sig; + // u32 num32bit = sig_size / 4; + // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(rcl, 0, num32bit, sig, 0); + // } + + /* Set viewport */ + { + D3D12_VIEWPORT viewport = ZI; + { + Rng3 range = cmd->rasterize.viewport; + viewport.TopLeftX = range.p0.x; + viewport.TopLeftY = range.p0.y; + viewport.Width = range.p1.x - range.p0.x; + viewport.Height = range.p1.y - range.p0.y; + viewport.MinDepth = range.p0.z; + viewport.MaxDepth = range.p1.z; + } + if (!MatchStruct(&viewport, &bound_viewport)) + { + bound_viewport = viewport; + ID3D12GraphicsCommandList_RSSetViewports(rcl, 1, &viewport); + } + } + + /* Set scissor */ + { + D3D12_RECT scissor = ZI; + { + Rng2 range = cmd->rasterize.scissor; + scissor.left = range.p0.x; + scissor.top = range.p0.y; + scissor.right = range.p1.x; + scissor.bottom = range.p1.y; + } + if (!MatchStruct(&scissor, &bound_scissor)) + { + bound_scissor = scissor; + ID3D12GraphicsCommandList_RSSetScissorRects(rcl, 1, &scissor); + } + } + + /* Set topology */ + { + D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + switch (cmd->rasterize.mode) + { + default: Assert(0); break; + case GPU_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case GPU_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; + case GPU_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case GPU_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case GPU_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case GPU_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case GPU_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + } + if (topology != bound_primitive_topology) + { + ID3D12GraphicsCommandList_IASetPrimitiveTopology(rcl, topology); + } + } + + /* Set index buffer */ + if (!MatchStruct(&ibv, &bound_ibv)) + { + ID3D12GraphicsCommandList_IASetIndexBuffer(rcl, &ibv); + bound_ibv = ibv; + } + + /* Bind render targets */ + { + b32 om_dirty = 0; + u32 rtvs_count = 0; + D3D12_CPU_DESCRIPTOR_HANDLE rtvs[countof(bound_raster_targets)] = ZI; + for (u32 i = 0; i < countof(cmd->rasterize.rtv_descriptors); ++i) + { + GPU_D12_Descriptor *rtv_desc = cmd->rasterize.rtv_descriptors[i]; + if (rtv_desc != 0) + { + om_dirty = om_dirty || (bound_raster_targets[i].ptr != rtv_desc->handle.ptr); + rtvs[rtvs_count++] = rtv_desc->handle; + } + else + { + break; + } + } + if (om_dirty) + { + CopyStructs(bound_raster_targets, rtvs, rtvs_count); + ID3D12GraphicsCommandList_OMSetRenderTargets(rcl, rtvs_count, rtvs, 0, 0); + } + } + + /* Dispatch */ + ID3D12GraphicsCommandList_DrawIndexedInstanced(rcl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); + } + + cmd_idx += 1; + } break; + + //- Clear rtv + + case GPU_D12_CmdKind_ClearRtv: + { + GPU_D12_Descriptor *descriptor = cmd->clear_rtv.rtv_descriptor; + GPU_D12_Resource *resource = descriptor->resource; + f32 clear_color[4] = ZI; + clear_color[0] = cmd->clear_rtv.color.x; + clear_color[1] = cmd->clear_rtv.color.y; + clear_color[2] = cmd->clear_rtv.color.z; + clear_color[3] = cmd->clear_rtv.color.w; + ID3D12GraphicsCommandList_ClearRenderTargetView(rcl, descriptor->handle, clear_color, 0, 0); + cmd_idx += 1; + } break; + } } } } @@ -2049,7 +2109,7 @@ void GPU_SyncAccess(GPU_CommandListHandle cl_handle, GPU_ResourceHandle handle, GPU_D12_CmdList *cl = GPU_D12_CmdListFromHandle(cl_handle); GPU_D12_Cmd *cmd = GPU_D12_PushCmd(cl); cmd->kind = GPU_D12_CmdKind_Access; - cmd->access.kind = kind; + cmd->access.after = kind; cmd->access.resource = GPU_D12_ResourceFromHandle(handle); } @@ -2368,6 +2428,7 @@ GPU_ResourceHandle GPU_PrepareBackbuffer(GPU_SwapchainHandle swapchain_handle, G } ZeroStruct(backbuffer); backbuffer->d3d_resource = d3d_resource; + backbuffer->uid = Atomic64FetchAdd(&g->next_resource_uid.v, 1); backbuffer->is_texture = 1; backbuffer->texture_layout = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; diff --git a/src/gpu/gpu_dx12/gpu_dx12.h b/src/gpu/gpu_dx12/gpu_dx12.h index 1dab2579..40f814da 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.h +++ b/src/gpu/gpu_dx12/gpu_dx12.h @@ -116,6 +116,7 @@ Struct(GPU_D12_Resource) { GPU_D12_Resource *next_free; ID3D12Resource *d3d_resource; + u64 uid; /* Buffer info */ GPU_BufferDesc buffer_desc; @@ -198,12 +199,18 @@ Enum(GPU_D12_CmdKind) Struct(GPU_D12_Cmd) { GPU_D12_CmdKind kind; + b32 skip; union { struct { - GPU_AccessKind kind; + GPU_AccessKind after; GPU_D12_Resource *resource; + + /* Post-batch data */ + b32 is_end_of_batch; + u64 batch_gen; + GPU_AccessKind before; } access; struct @@ -298,7 +305,7 @@ Struct(GPU_D12_Swapchain) Struct(GPU_D12_SharedState) { - Atomic64Padded resource_barrier_gen; + Atomic64Padded next_resource_uid; /* Stats */ Atomic64 driver_resources_allocated;