diff --git a/.gitattributes b/.gitattributes index 49e060ad..615ae978 100644 --- a/.gitattributes +++ b/.gitattributes @@ -17,6 +17,7 @@ *.exe filter=lfs diff=lfs merge=lfs -text *.dll filter=lfs diff=lfs merge=lfs -text *.lib filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text *.tga filter=lfs diff=lfs merge=lfs -text *.ase filter=lfs diff=lfs merge=lfs -text *.ttf filter=lfs diff=lfs merge=lfs -text diff --git a/src/base/base_win32/base_win32.c b/src/base/base_win32/base_win32.c index 5d200387..8518b436 100644 --- a/src/base/base_win32/base_win32.c +++ b/src/base/base_win32/base_win32.c @@ -501,7 +501,7 @@ i32 W32_Main(void) // Create app dir { String path = PathFromString(perm, appdir_path, '\\'); - wchar_t *path_wstr = WstrFromString(perm, appdir_path); + wchar_t *path_wstr = WstrFromString(perm, path); i32 err_code = SHCreateDirectoryExW(0, path_wstr, 0); String err = StringF(perm, "Error code %F", FmtSint(err_code)); switch (err_code) @@ -519,6 +519,10 @@ i32 W32_Main(void) { err = Lit("User canceled the operation"); } break; + case ERROR_PATH_NOT_FOUND: + { + err = Lit("The system cannot find the path specified."); + } break; } if (err_code != ERROR_SUCCESS && err_code != ERROR_ALREADY_EXISTS && err_code != ERROR_FILE_EXISTS) { diff --git a/src/base/base_win32/base_win32.h b/src/base/base_win32/base_win32.h index ed967b77..86946f86 100644 --- a/src/base/base_win32/base_win32.h +++ b/src/base/base_win32/base_win32.h @@ -25,6 +25,7 @@ #include #include #include + #include // #pragma warning(pop) #ifndef BCRYPT_RNG_ALG_HANDLE @@ -37,6 +38,8 @@ #pragma comment(lib, "kernel32") #pragma comment(lib, "user32") #pragma comment(lib, "bcrypt") +#pragma comment(lib, "gdi32") +#pragma comment(lib, "cabinet") #pragma comment(lib, "shell32") #pragma comment(lib, "ole32") #pragma comment(lib, "winmm") diff --git a/src/glyph_cache/glyph_cache.c b/src/glyph_cache/glyph_cache.c index adbad80e..978dcd2e 100644 --- a/src/glyph_cache/glyph_cache.c +++ b/src/glyph_cache/glyph_cache.c @@ -316,6 +316,7 @@ void GC_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) gpu_perm, cl, G_Format_R8G8B8A8_Unorm_Srgb, atlas->dims, + G_Layout_Simultaneous, .name = Lit("Glyph atlas") ); atlas->tex = G_PushTexture2DRef(gpu_perm, atlas->tex_res); diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c index a7ba8e3f..3fef7737 100644 --- a/src/gpu/gpu_common.c +++ b/src/gpu/gpu_common.c @@ -25,6 +25,7 @@ void G_BootstrapCommon(void) gpu_perm, cl, G_Format_R8G8B8A8_Uint, VEC2I32(8, 8), + G_Layout_Common, .flags = G_ResourceFlag_ZeroMemory ); G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); @@ -42,7 +43,8 @@ void G_BootstrapCommon(void) noise_tex = G_PushTexture3D( gpu_perm, cl, G_Format_R16_Uint, - noise_dims + noise_dims, + G_Layout_Common ); G_CopyCpuToTexture( cl, diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h index 34ff9449..5f7489c6 100644 --- a/src/gpu/gpu_core.h +++ b/src/gpu/gpu_core.h @@ -188,6 +188,117 @@ Enum(G_Format) G_Format_COUNT = 192 }; +//////////////////////////////////////////////////////////// +//~ Memory sync types + +Enum(G_Stage) +{ + G_Stage_None = 0, + + // Compute stages + G_Stage_ComputeShading = (1 << 1), + + // Draw stages + G_Stage_IndexAssembly = (1 << 2), + G_Stage_VertexShading = (1 << 3), + G_Stage_PixelShading = (1 << 4), + G_Stage_DepthStencil = (1 << 5), + G_Stage_RenderTarget = (1 << 6), + + // Copy stages + G_Stage_Copy = (1 << 7), + + // Indirect stages + G_Stage_Indirect = (1 << 8), + + // Aggregate stages + G_Stage_Drawing = G_Stage_IndexAssembly | + G_Stage_VertexShading | + G_Stage_PixelShading | + G_Stage_DepthStencil | + G_Stage_RenderTarget, + + G_Stage_Shading = G_Stage_ComputeShading | + G_Stage_VertexShading | + G_Stage_PixelShading, + + G_Stage_All = 0xFFFFFFFF +}; + +Enum(G_Access) +{ + G_Access_None = 0, + + G_Access_ShaderReadWrite = (1 << 1), + G_Access_ShaderRead = (1 << 2), + + G_Access_CopyWrite = (1 << 3), + G_Access_CopyRead = (1 << 4), + + G_Access_DepthStencilRead = (1 << 5), + G_Access_DepthStencilWrite = (1 << 6), + G_Access_RenderTargetWrite = (1 << 7), + + G_Access_IndexBuffer = (1 << 8), + G_Access_IndirectArgument = (1 << 9), + + G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the stage specified in the barrier +}; + +Enum(G_Layout) +{ + G_Layout_NoChange, + + G_Layout_Undefined, // No access <-- D3D12_BARRIER_LAYOUT_UNDEFINED + + // Simultaneous layout allows a resource to be used on any queue with any + // access type (except depth-stencil). Resources cannot transition to/from + // this layout, they must be created with it. Allows concurrent reads + // while up to 1 write is occuring to non-overlapping regions. + + G_Layout_Simultaneous, // Any access except depth-stencil <-- D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS + + + G_Layout_Common, // ShaderRead/CopyRead/CopyWrite/Present <-- D3D12_BARRIER_LAYOUT_COMMON + + ////////////////////////////// + //- Direct queue + + G_Layout_DirectQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON + G_Layout_DirectQueue_Read, // ShaderRead/CopyRead/DepthStencilRead <-- D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ + + G_Layout_DirectQueue_DepthStencil, // DepthStencilRead/DepthStencilWrite <-- D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE + G_Layout_DirectQueue_RenderTarget, // RenderTargetWrite <-- D3D12_BARRIER_LAYOUT_RENDER_TARGET + + ////////////////////////////// + //- Compute queue + + G_Layout_ComputeQueue_General, // ShaderRead/ShaderReadWrite/CopyRead/CopyWrite <-- D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON + + ////////////////////////////// + //- Direct & Compute queue + + G_Layout_DirectComputeQueue_Read, // ShaderRead/CopyRead <-- D3D12_BARRIER_LAYOUT_GENERIC_READ + G_Layout_DirectComputeQueue_ShaderReadWrite, // ShaderReadWrite <-- D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS + G_Layout_DirectComputeQueue_CopyWrite, // CopyWrite <-- D3D12_BARRIER_LAYOUT_COPY_DEST +}; + +// Barrier will execute after previous stages specified by `stage_prev`, and before next stages specified by `stage_next`. +// When barrier executes: +// - Necessary resource flushes will occur based on `access_prev` & `access_next` +// - Texture layout will transition based on `layout` (if specified) +Struct(G_MemoryBarrierDesc) +{ + G_ResourceHandle resource; + b32 is_global; + G_Stage stage_prev; + G_Stage stage_next; + G_Access access_prev; + G_Access access_next; + G_Layout layout; + RngI32 mips; // Inclusive range of texture mip indices to sync +}; + //////////////////////////////////////////////////////////// //~ Filter types @@ -303,6 +414,7 @@ Struct(G_TextureDesc) G_ResourceFlag flags; G_Format format; Vec3I32 dims; + G_Layout initial_layout; Vec4 clear_color; i32 max_mips; // Will be clamped to range [1, max mips] String name; @@ -424,34 +536,37 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena, G_CommandListHandle cl, G_R } \ ) -#define G_PushTexture1D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \ +#define G_PushTexture1D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ (G_ResourceDesc) { \ .kind = G_ResourceKind_Texture1D, \ .texture = { \ .format = (_format), \ .dims = VEC3I32((_size), 1, 1), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ } \ ) -#define G_PushTexture2D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \ +#define G_PushTexture2D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ (G_ResourceDesc) { \ .kind = G_ResourceKind_Texture2D, \ .texture = { \ .format = (_format), \ .dims = VEC3I32((_size).x, (_size).y, 1), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ } \ ) -#define G_PushTexture3D(arena, cl, _format, _size, ...) G_PushResource((arena), (cl), \ +#define G_PushTexture3D(arena, cl, _format, _size, _initial_layout, ...) G_PushResource((arena), (cl), \ (G_ResourceDesc) { \ .kind = G_ResourceKind_Texture3D, \ .texture = { \ .format = (_format), \ .dims = (_size), \ + .initial_layout = (_initial_layout), \ __VA_ARGS__ \ } \ } \ @@ -608,7 +723,50 @@ void G_SetConstantEx(G_CommandListHandle cl, i32 slot, void *src_32bit, u32 size //- Memory sync -void G_Barrier(G_CommandListHandle cl); +void G_MemorySyncEx(G_CommandListHandle cl, G_MemoryBarrierDesc desc); + +#define G_MemorySync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .resource = (_resource), \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .mips.max = 64, \ + __VA_ARGS__ \ + }) + +#define G_MemoryLayoutSync(_cl, _resource, _stage_prev, _access_prev, _stage_next, _access_next, _layout, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .resource = (_resource), \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .layout = _layout, \ + .mips.max = 64, \ + __VA_ARGS__ \ + }) + +#define G_GlobalMemorySync(_cl, _stage_prev, _access_prev, _stage_next, _access_next, ...) \ + G_MemorySyncEx((_cl), (G_MemoryBarrierDesc) { \ + .is_global = 1, \ + .stage_prev = _stage_prev, \ + .access_prev = _access_prev, \ + .stage_next = _stage_next, \ + .access_next = _access_next, \ + .mips.max = 64, \ + __VA_ARGS__ \ + }) + +#define G_DumbMemorySync(cl, resource, ...) \ + G_MemorySync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) + +#define G_DumbMemoryLayoutSync(cl, resource, layout, ...) \ + G_MemoryLayoutSync((cl), (resource), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, (layout), __VA_ARGS__) + +#define G_DumbGlobalMemorySync(cl, ...) \ + G_GlobalMemorySync((cl), G_Stage_All, G_Access_All, G_Stage_All, G_Access_All, __VA_ARGS__) //- Compute diff --git a/src/gpu/gpu_dx12/gpu_dx12.lay b/src/gpu/gpu_dx12/gpu_dx12.lay index 46821b69..86f8f543 100644 --- a/src/gpu/gpu_dx12/gpu_dx12.lay +++ b/src/gpu/gpu_dx12/gpu_dx12.lay @@ -1,5 +1,10 @@ @Layer gpu_dx12 +////////////////////////////// +//- Resources + +@EmbedDir G_D12_Resources gpu_dx12_res + ////////////////////////////// //- Api diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.c b/src/gpu/gpu_dx12/gpu_dx12_core.c index 63deaeff..e9966051 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.c +++ b/src/gpu/gpu_dx12/gpu_dx12_core.c @@ -9,44 +9,99 @@ void G_Bootstrap(void) TempArena scratch = BeginScratchNoConflict(); Arena *perm = PermArena(); + ////////////////////////////// + //- Extract agility SDK + + String appdir = GetAppDirectory(); + + // FIXME: Include actual sdk header for this value + u32 sdk_ver_num = 618; + String sdk_ver_str = Lit("1.618.5"); + String sdk_dir_path = StringF(scratch.arena, "%FD3D12/%F/", FmtString(appdir), FmtString(sdk_ver_str)); + String core_path = StringF(scratch.arena, "%FD3D12Core.dll", FmtString(sdk_dir_path)); + String layers_path = StringF(scratch.arena, "%Fd3d12SDKLayers.dll", FmtString(sdk_dir_path)); + { + if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) + { + LogInfoF("Unpacking D3D12 Agility SDK to %F", FmtString(sdk_dir_path)); + ResourceKey core_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/D3D12Core.dat")); + ResourceKey layers_key = ResourceKeyFromStore(&G_D12_Resources, Lit("AgilitySDK/1.618.5/d3d12SDKLayers.dat")); + String core_data = PLT_Decompress(scratch.arena, DataFromResource(core_key), PLT_CompressionLevel_3); + String layers_data = PLT_Decompress(scratch.arena, DataFromResource(layers_key), PLT_CompressionLevel_3); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/", FmtString(appdir))); + PLT_MkDir(StringF(scratch.arena, "%Fd3d12/%F/", FmtString(appdir), FmtString(sdk_ver_str))); + { + PLT_File file = PLT_OpenFileWrite(core_path); + PLT_WriteFile(file, core_data); + PLT_CloseFile(file); + } + { + PLT_File file = PLT_OpenFileWrite(layers_path); + PLT_WriteFile(file, layers_data); + PLT_CloseFile(file); + } + if (!PLT_IsFile(core_path) || !PLT_IsFile(layers_path)) + { + Panic(StringF( + scratch.arena, + "Failed to extract D3D12 Agility SDK to \"%F\"", + FmtString(core_path) + )); + } + } + } + + ////////////////////////////// + //- Create device factory + + ID3D12DeviceFactory *device_factory = 0; + { + ID3D12SDKConfiguration1 *sdk_config = 0; + D3D12GetInterface(&CLSID_D3D12SDKConfiguration, &IID_ID3D12SDKConfiguration1, (void **)&sdk_config); + + // Create device factory + char *sdk_path_cstr = CstrFromString(scratch.arena, sdk_dir_path); + HRESULT hr = ID3D12SDKConfiguration1_CreateDeviceFactory( + sdk_config, + sdk_ver_num, + sdk_path_cstr, + &IID_ID3D12DeviceFactory, + (void **)&device_factory + ); + + if (FAILED(hr)) + { + Panic(StringF(scratch.arena, "Failed to create ID3D12DeviceFactory: Error code 0x%F", FmtHex(hr))); + } + + // Enable debug layer + if (GPU_DEBUG) + { + ID3D12Debug1 *debug = 0; + ID3D12DeviceFactory_GetConfigurationInterface(device_factory, &CLSID_D3D12Debug, &IID_ID3D12Debug1, (void **)&debug); + ID3D12Debug1_EnableDebugLayer(debug); + + if (GPU_DEBUG_VALIDATION) + { + ID3D12Debug1_SetEnableGPUBasedValidation(debug, 1); + } + } + } + ////////////////////////////// //- Initialize device { HRESULT hr = 0; - // Enable debug layer - u32 dxgi_factory_flags = 0; - if (GPU_DEBUG) + // Create dxgi factory { - ID3D12Debug *debug_controller0 = 0; + u32 dxgi_factory_flags = 0; + if (GPU_DEBUG) { - hr = D3D12GetDebugInterface(&IID_ID3D12Debug, (void **)&debug_controller0); - if (FAILED(hr)) - { - Panic(Lit("Failed to create ID3D12Debug0")); - } - ID3D12Debug_EnableDebugLayer(debug_controller0); - if (GPU_DEBUG_VALIDATION) - { - ID3D12Debug1 *debug_controller1 = 0; - { - hr = ID3D12Debug_QueryInterface(debug_controller0, &IID_ID3D12Debug1, (void **)&debug_controller1); - if (FAILED(hr)) - { - Panic(Lit("Failed to create ID3D12Debug1")); - } - ID3D12Debug1_SetEnableGPUBasedValidation(debug_controller1, 1); - } - ID3D12Debug_Release(debug_controller1); - } + dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; } - ID3D12Debug_Release(debug_controller0); - dxgi_factory_flags |= DXGI_CREATE_FACTORY_DEBUG; - } - - // Create factory - { hr = CreateDXGIFactory2(dxgi_factory_flags, &IID_IDXGIFactory6, (void **)&G_D12.factory); if (FAILED(hr)) { @@ -57,9 +112,9 @@ void G_Bootstrap(void) // Create device { IDXGIAdapter3 *adapter = 0; - ID3D12Device1 *device = 0; + ID3D12Device10 *device = 0; + String adapter_name = Zi; String error = Lit("Failed to initialize D3D12 device"); - String first_gpu_name = Zi; u32 adapter_index = 0; b32 done = 0; i32 skips = 0; // For iGPU testing @@ -68,30 +123,28 @@ void G_Bootstrap(void) hr = IDXGIFactory6_EnumAdapterByGpuPreference(G_D12.factory, adapter_index, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, &IID_IDXGIAdapter3, (void **)&adapter); if (SUCCEEDED(hr)) { - DXGI_ADAPTER_DESC1 desc; - IDXGIAdapter3_GetDesc1(adapter, &desc); - if (first_gpu_name.len == 0) { - first_gpu_name = StringFromWstrNoLimit(scratch.arena, desc.Description); + DXGI_ADAPTER_DESC1 desc; + IDXGIAdapter3_GetDesc1(adapter, &desc); + adapter_name = StringFromWstrNoLimit(scratch.arena, desc.Description); } + if (skips <= 0) { - // TODO: Verify feature support: - // - HighestShaderModel >= D3D_SHADER_MODEL_6_6 - // - ResourceBindingTier >= D3D12_RESOURCE_BINDING_TIER_3 - // - EnhancedBarriersSupported == 1 - // - AtomicInt64OnDescriptorHeapResourceSupported == 1 - hr = D3D12CreateDevice((IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device1, (void **)&device); - } - if (SUCCEEDED(hr) && skips <= 0) - { + hr = ID3D12DeviceFactory_CreateDevice(device_factory, (IUnknown *)adapter, D3D_FEATURE_LEVEL_12_0, &IID_ID3D12Device10, (void **)&device); done = 1; } else { skips -= 1; adapter_index += 1; - ID3D12Device_Release(device); - IDXGIAdapter3_Release(adapter); + if (device) + { + ID3D12Device_Release(device); + } + if (adapter) + { + IDXGIAdapter3_Release(adapter); + } adapter = 0; device = 0; } @@ -104,16 +157,74 @@ void G_Bootstrap(void) if (!device) { - if (first_gpu_name.len > 0) + if (adapter_name.len > 0) { error = StringF( scratch.arena, "Could not initialize device '%F' with D3D_FEATURE_LEVEL_12_0. Ensure that the device is capable and drivers are up to date.", - FmtString(first_gpu_name) + FmtString(adapter_name) ); } Panic(error); } + + if (device) + { + StringList missing = Zi; + { + D3D12_FEATURE_DATA_SHADER_MODEL shader_model = { D3D_SHADER_MODEL_6_6 }; + D3D12_FEATURE_DATA_D3D12_OPTIONS options = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS9 options9 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS11 options11 = Zi; + D3D12_FEATURE_DATA_D3D12_OPTIONS12 options12 = Zi; + { + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_SHADER_MODEL, &shader_model, sizeof(shader_model)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS9, &options9, sizeof(options9)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS11, &options11, sizeof(options11)); + ID3D12Device_CheckFeatureSupport(device, D3D12_FEATURE_D3D12_OPTIONS12, &options12, sizeof(options12)); + } + + if (shader_model.HighestShaderModel < D3D_SHADER_MODEL_6_6) + { + PushStringToList(scratch.arena, &missing, Lit(" - Shader model 6.6")); + } + if (options.ResourceBindingTier < D3D12_RESOURCE_BINDING_TIER_3) + { + PushStringToList(scratch.arena, &missing, Lit(" - Resource binding tier 3")); + } + // if (!options.DoublePrecisionFloatShaderOps) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - Double precision shader ops")); + // } + // if (!options9.AtomicInt64OnTypedResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on typed resources")); + // } + // if (!options11.AtomicInt64OnDescriptorHeapResourceSupported) + // { + // PushStringToList(scratch.arena, &missing, Lit(" - 64-bit atomics on descriptor heap resources")); + // } + if (!options12.EnhancedBarriersSupported) + { + PushStringToList(scratch.arena, &missing, Lit(" - Enhanced barriers")); + } + } + if (missing.count > 0) + { + String msg = StringF( + scratch.arena, + "Could not intiialize D3D12\n\n" + "The driver for device '%F' does not support the following feature(s):\n\n" + "%F\n\n" + "Ensure drivers are up to date and the device is capable.", + FmtString(adapter_name), + FmtString(StringFromList(scratch.arena, missing, Lit("\n"))) + ); + Panic(msg); + } + } + G_D12.adapter = adapter; G_D12.device = device; } @@ -123,8 +234,8 @@ void G_Bootstrap(void) { // Enable D3D12 Debug break { - ID3D12InfoQueue *info = 0; - hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12InfoQueue, (void **)&info); + ID3D12InfoQueue1 *info = 0; + hr = ID3D12Device_QueryInterface(G_D12.device, &IID_ID3D12InfoQueue1, (void **)&info); if (FAILED(hr)) { Panic(Lit("Failed to query ID3D12Device interface")); @@ -378,9 +489,122 @@ DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format) return (DXGI_FORMAT)format; } -b32 G_D12_IsSimultaneous(G_D12_Resource *resource) +D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages) { - return AnyBit(resource->d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS); + D3D12_BARRIER_SYNC result = 0; + if (stages == G_Stage_All) + { + result = D3D12_BARRIER_SYNC_ALL; + } + else + { + result |= D3D12_BARRIER_SYNC_COMPUTE_SHADING * AnyBit(stages, G_Stage_ComputeShading); + result |= D3D12_BARRIER_SYNC_INDEX_INPUT * AnyBit(stages, G_Stage_IndexAssembly); + result |= D3D12_BARRIER_SYNC_VERTEX_SHADING * AnyBit(stages, G_Stage_VertexShading); + result |= D3D12_BARRIER_SYNC_PIXEL_SHADING * AnyBit(stages, G_Stage_PixelShading); + result |= D3D12_BARRIER_SYNC_DEPTH_STENCIL * AnyBit(stages, G_Stage_DepthStencil); + result |= D3D12_BARRIER_SYNC_RENDER_TARGET * AnyBit(stages, G_Stage_RenderTarget); + result |= D3D12_BARRIER_SYNC_COPY * AnyBit(stages, G_Stage_Copy); + result |= D3D12_BARRIER_SYNC_EXECUTE_INDIRECT * AnyBit(stages, G_Stage_Indirect); + } + return result; +} + +D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses) +{ + D3D12_BARRIER_ACCESS result = 0; + if (accesses == 0) + { + result = D3D12_BARRIER_ACCESS_NO_ACCESS; + } + else if (accesses == G_Access_All) + { + result = D3D12_BARRIER_ACCESS_COMMON; + } + else + { + result |= D3D12_BARRIER_ACCESS_UNORDERED_ACCESS * AnyBit(accesses, G_Access_ShaderReadWrite); + result |= D3D12_BARRIER_ACCESS_SHADER_RESOURCE * AnyBit(accesses, G_Access_ShaderRead); + result |= D3D12_BARRIER_ACCESS_COPY_DEST * AnyBit(accesses, G_Access_CopyWrite); + result |= D3D12_BARRIER_ACCESS_COPY_SOURCE * AnyBit(accesses, G_Access_CopyRead); + result |= D3D12_BARRIER_ACCESS_INDEX_BUFFER * AnyBit(accesses, G_Access_IndexBuffer); + result |= D3D12_BARRIER_ACCESS_INDIRECT_ARGUMENT * AnyBit(accesses, G_Access_IndirectArgument); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_READ * AnyBit(accesses, G_Access_DepthStencilRead); + result |= D3D12_BARRIER_ACCESS_DEPTH_STENCIL_WRITE * AnyBit(accesses, G_Access_DepthStencilWrite); + result |= D3D12_BARRIER_ACCESS_RENDER_TARGET * AnyBit(accesses, G_Access_RenderTargetWrite); + } + return result; +} + +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout) +{ + PERSIST Readonly D3D12_BARRIER_LAYOUT translate[] = { + [G_Layout_Undefined] = D3D12_BARRIER_LAYOUT_UNDEFINED, + [G_Layout_Simultaneous] = D3D12_BARRIER_LAYOUT_COMMON, + [G_Layout_Common] = D3D12_BARRIER_LAYOUT_COMMON, + [G_Layout_DirectQueue_General] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON, + [G_Layout_DirectQueue_Read] = D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ, + [G_Layout_DirectQueue_DepthStencil] = D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE, + [G_Layout_DirectQueue_RenderTarget] = D3D12_BARRIER_LAYOUT_RENDER_TARGET, + [G_Layout_ComputeQueue_General] = D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON, + [G_Layout_DirectComputeQueue_ShaderReadWrite] = D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS, + [G_Layout_DirectComputeQueue_Read] = D3D12_BARRIER_LAYOUT_GENERIC_READ, + [G_Layout_DirectComputeQueue_CopyWrite] = D3D12_BARRIER_LAYOUT_COPY_DEST, + }; + D3D12_BARRIER_LAYOUT result = D3D12_BARRIER_LAYOUT_UNDEFINED; + if (layout >= 0 && layout < countof(translate)) + { + result = translate[layout]; + } + return result; +}; + +String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout) +{ + PERSIST Readonly String names[] = { + [D3D12_BARRIER_LAYOUT_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMMON"), + [D3D12_BARRIER_LAYOUT_PRESENT] = CompLit("D3D12_BARRIER_LAYOUT_PRESENT"), + [D3D12_BARRIER_LAYOUT_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_RENDER_TARGET] = CompLit("D3D12_BARRIER_LAYOUT_RENDER_TARGET"), + [D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_WRITE"), + [D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ] = CompLit("D3D12_BARRIER_LAYOUT_DEPTH_STENCIL_READ"), + [D3D12_BARRIER_LAYOUT_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_SOURCE"), + [D3D12_BARRIER_LAYOUT_RESOLVE_DEST] = CompLit("D3D12_BARRIER_LAYOUT_RESOLVE_DEST"), + [D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_SHADING_RATE_SOURCE"), + [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_DECODE_WRITE"), + [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_PROCESS_WRITE"), + [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_READ"), + [D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_ENCODE_WRITE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COMMON"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_DIRECT_QUEUE_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COMMON"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_GENERIC_READ"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_UNORDERED_ACCESS"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_SHADER_RESOURCE"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_SOURCE"), + [D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST] = CompLit("D3D12_BARRIER_LAYOUT_COMPUTE_QUEUE_COPY_DEST"), + [D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON] = CompLit("D3D12_BARRIER_LAYOUT_VIDEO_QUEUE_COMMON") + }; + String result = Zi; + if (layout >= 0 && layout < countof(names)) + { + result = names[layout]; + } + else if (layout == D3D12_BARRIER_LAYOUT_UNDEFINED) + { + result = Lit("D3D12_BARRIER_LAYOUT_UNDEFINED"); + } + return result; } void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip) @@ -746,7 +970,7 @@ G_D12_RawCommandList *G_D12_PrepareRawCommandList(G_QueueKind queue_kind) if (SUCCEEDED(hr)) { - hr = ID3D12Device_CreateCommandList(G_D12.device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList, (void **)&cl->d3d_cl); + hr = ID3D12Device_CreateCommandList(G_D12.device, 0, queue->desc.type, cl->d3d_ca, 0, &IID_ID3D12GraphicsCommandList7, (void **)&cl->d3d_cl); } if (SUCCEEDED(hr)) @@ -990,9 +1214,9 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle ////////////////////////////// //- Initialize d3d resource desc - D3D12_RESOURCE_STATES d3d_initial_state = D3D12_RESOURCE_STATE_COMMON; + D3D12_BARRIER_LAYOUT d3d_initial_layout = D3D12_BARRIER_LAYOUT_UNDEFINED; D3D12_CLEAR_VALUE clear_value = Zi; - D3D12_RESOURCE_DESC d3d_desc = Zi; + D3D12_RESOURCE_DESC1 d3d_desc = Zi; if (is_buffer) { u64 min_buffer_size = 1024; @@ -1011,6 +1235,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle { i32 largest_dim = MaxI32(MaxI32(desc.texture.dims.x, desc.texture.dims.y), desc.texture.dims.z); i32 max_mips = MinI32(FloorF32(Log2F32(largest_dim)) + 1, G_MaxMips); + d3d_initial_layout = G_D12_BarrierLayoutFromLayout(desc.texture.initial_layout); d3d_desc.Dimension = ( desc.kind == G_ResourceKind_Texture1D ? D3D12_RESOURCE_DIMENSION_TEXTURE1D : desc.kind == G_ResourceKind_Texture2D ? D3D12_RESOURCE_DIMENSION_TEXTURE2D : @@ -1027,10 +1252,7 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS * AnyBit(flags, G_ResourceFlag_AllowShaderReadWrite); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET * AnyBit(flags, G_ResourceFlag_AllowRenderTarget); d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL * AnyBit(flags, G_ResourceFlag_AllowDepthStencil); - if (!AnyBit(d3d_desc.Flags, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) - { - d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS; - } + d3d_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS * (desc.texture.initial_layout == G_Layout_Simultaneous); clear_value.Color[0] = desc.texture.clear_color.x, clear_value.Color[1] = desc.texture.clear_color.y, clear_value.Color[2] = desc.texture.clear_color.z, @@ -1048,8 +1270,8 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle DllQueueRemove(gpu_arena->reset_resources.first, gpu_arena->reset_resources.last, resource); --gpu_arena->reset_resources.count; - D3D12_RESOURCE_DESC reset_d3d_desc = Zi; - D3D12_RESOURCE_DESC compare_d3d_desc = Zi; + D3D12_RESOURCE_DESC1 reset_d3d_desc = Zi; + D3D12_RESOURCE_DESC1 compare_d3d_desc = Zi; CopyStruct(&reset_d3d_desc, &resource->d3d_desc); CopyStruct(&compare_d3d_desc, &reset_d3d_desc); @@ -1146,18 +1368,26 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle { clear_value_arg = &clear_value; } - HRESULT hr = ID3D12Device_CreateCommittedResource( + HRESULT hr = ID3D12Device10_CreateCommittedResource3( G_D12.device, &heap_props, heap_flags, &resource->d3d_desc, - d3d_initial_state, + d3d_initial_layout, clear_value_arg, + 0, // pProtectedSession + 0, // NumCastableFormats + 0, // pCastableFormats &IID_ID3D12Resource, (void **)&resource->d3d_resource ); Atomic64FetchAdd(&G_D12.cumulative_nonreuse_count, 1); + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + resource->cmdlist_texture_layouts[mip_idx] = d3d_initial_layout; + } + if (!SUCCEEDED(hr)) { // TODO: Don't panic @@ -1208,8 +1438,14 @@ G_ResourceHandle G_PushResource(G_ArenaHandle arena_handle, G_CommandListHandle if (can_reuse) { - // FIXME: Remove this - G_Barrier(cl_handle); + if (is_buffer) + { + G_DumbMemorySync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource)); + } + else if (is_texture) + { + G_DumbMemoryLayoutSync(cl_handle, G_D12_MakeHandle(G_ResourceHandle, resource), desc.texture.initial_layout); + } } return G_D12_MakeHandle(G_ResourceHandle, resource); @@ -1821,9 +2057,46 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); TempArena scratch = BeginScratchNoConflict(); + // Begin dx12 command list + G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); + ID3D12GraphicsCommandList7 *d3d_cl = rcl->d3d_cl; + + // Pipeline state + b32 graphics_rootsig_set = 0; + b32 compute_rootsig_set = 0; + b32 descriptor_heaps_set = 0; + G_D12_Pipeline *bound_pipeline = 0; + + // Constants state + u64 slotted_constants[G_NumConstants]; + u64 bound_compute_constants[G_NumConstants]; + u64 bound_graphics_constants[G_NumConstants]; + for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots + for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } + for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } + + // Fill built-in constants + if (!G_IsRefNil(queue->print_buffer_ref)) + { + slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; + } + { + b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); + f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); + slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; + slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; + } + + // Rasterizer state + D3D12_VIEWPORT bound_viewport = Zi; + D3D12_RECT bound_scissor = Zi; + D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; + D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; + u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; + u64 bound_render_clear_target_uid = 0; // Flatten command chunks - i64 cmds_count = 0; + u64 cmds_count = 0; G_D12_Cmd *cmds = PushStructsNoZero(scratch.arena, G_D12_Cmd, cl->cmds_count); { // Flatten command chunks @@ -1852,1244 +2125,636 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) } } - Struct(Batch) - { - Batch *next; - i64 first_cmd_idx; - i64 last_cmd_idx; - i64 actionable_cmds_count; - }; - Batch *first_batch = 0; - Batch *last_batch = 0; - // Batch barrier cmds + i64 max_buffer_barriers = 0; + i64 max_texture_barriers = 0; + i64 max_global_barriers = 0; { - Batch *batch = PushStruct(scratch.arena, Batch); - + u64 cmd_idx = 0; u64 batch_gen = 0; G_D12_Cmd *prev_barrier_cmd = 0; - for (i64 cmd_idx = 0; cmd_idx < cmds_count; ++cmd_idx) + while (cmd_idx < cmds_count) { - b32 should_submit_batch = 0; G_D12_Cmd *cmd = &cmds[cmd_idx]; - batch->last_cmd_idx = cmd_idx; switch (cmd->kind) { // Batch-interrupting cmds default: { - batch->actionable_cmds_count += 1; + cmd_idx += 1; batch_gen += 1; } break; // Non-batch-interrupting cmds case G_D12_CmdKind_Constant: { + cmd_idx += 1; } break; case G_D12_CmdKind_Barrier: { // Determine 'before' state from lookup - if (!prev_barrier_cmd || prev_barrier_cmd->barrier.batch_gen != batch_gen) + if (prev_barrier_cmd && prev_barrier_cmd->barrier.batch_gen != batch_gen) { - // This barrier signals end of batch - should_submit_batch = 1; + // This barrier is part of new batch + prev_barrier_cmd->barrier.is_end_of_batch = 1; } - cmd->barrier.batch_gen = batch_gen; prev_barrier_cmd = cmd; - } break; - } - if (should_submit_batch && batch->actionable_cmds_count > 0) - { - SllQueuePush(first_batch, last_batch, batch); - batch = PushStruct(scratch.arena, Batch); - batch->first_cmd_idx = cmd_idx + 1; - batch->last_cmd_idx = cmd_idx + 1; - } - } - if (batch->actionable_cmds_count > 0) - { - SllQueuePush(first_batch, last_batch, batch); - } - } - - - - - - // FIXME: We need to global lock so barriers don't allow other work to slip in - - - - - - - - - - - - - - u64 slotted_constants[G_NumConstants]; - for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots - - i64 completion_target = 0; - for (Batch *batch = first_batch; batch; batch = batch->next) - { - // Begin dx12 command list - G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); - ID3D12GraphicsCommandList *d3d_cl = rcl->d3d_cl; - - // Pipeline state - b32 graphics_rootsig_set = 0; - b32 compute_rootsig_set = 0; - b32 descriptor_heaps_set = 0; - G_D12_Pipeline *bound_pipeline = 0; - - // Constants state - u64 bound_compute_constants[G_NumConstants]; - u64 bound_graphics_constants[G_NumConstants]; - for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } - for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } - - // Fill built-in constants - if (!G_IsRefNil(queue->print_buffer_ref)) - { - slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; - } - { - b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); - f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); - slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; - slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; - } - - // Rasterizer state - D3D12_VIEWPORT bound_viewport = Zi; - D3D12_RECT bound_scissor = Zi; - D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; - D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; - u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; - u64 bound_render_clear_target_uid = 0; - - // Build d3d commands - { - for (i64 cmd_idx = batch->first_cmd_idx; cmd_idx <= batch->last_cmd_idx; ++cmd_idx) - { - G_D12_Cmd *cmd = &cmds[cmd_idx]; - switch (cmd->kind) - { - default: + if (cmd->barrier.desc.is_global) { - } break; - - //- Constant - - case G_D12_CmdKind_Constant: + max_global_barriers += 1; + } + else { - i32 slot = cmd->constant.slot; - u32 value = cmd->constant.value; - if (slot >= 0 && slot < countof(slotted_constants)) + G_D12_Resource *resource = G_D12_ResourceFromHandle(cmd->barrier.desc.resource); + if (resource->is_texture) { - slotted_constants[slot] = value; - } - } break; - - //- Copy bytes - - case G_D12_CmdKind_CopyBytes: - { - u64 src_offset = cmd->copy_bytes.src_range.min; - u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; - ID3D12GraphicsCommandList_CopyBufferRegion( - d3d_cl, - cmd->copy_bytes.dst->d3d_resource, - cmd->copy_bytes.dst_offset, - cmd->copy_bytes.src->d3d_resource, - src_offset, - copy_size - ); - - - - // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - i64 barriers_count = 0; - D3D12_RESOURCE_BARRIER barriers[2] = Zi; - if (G_D12_IsSimultaneous(cmd->copy_bytes.dst)) - { - D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier->Transition.pResource = cmd->copy_bytes.dst->d3d_resource; - barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (G_D12_IsSimultaneous(cmd->copy_bytes.src)) - { - D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier->Transition.pResource = cmd->copy_bytes.src->d3d_resource; - barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, barriers); - } - - - - - } break; - - //- Copy texels - - case G_D12_CmdKind_CopyTexels: - { - G_D12_Resource *dst = cmd->copy_texels.dst; - G_D12_Resource *src = cmd->copy_texels.src; - D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; - D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; - Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; - Rng3I32 src_range = cmd->copy_texels.src_texture_range; - - D3D12_BOX src_box = Zi; - D3D12_BOX *src_box_ptr = 0; - { - src_box.left = src_range.p0.x; - src_box.top = src_range.p0.y; - src_box.front = src_range.p0.z; - src_box.right = src_range.p1.x; - src_box.bottom = src_range.p1.y; - src_box.back = src_range.p1.z; - if (src->is_texture) - { - src_box_ptr = &src_box; - } - } - - if (dst->flags & G_ResourceFlag_AllowDepthStencil) - { - // Depth-stencil textures must have src box & dst offset set to 0 - // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); + RngI32 mips = cmd->barrier.desc.mips; + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + max_texture_barriers += mips.max - mips.min + 1; } else { - ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); + max_buffer_barriers += 1; } + } + cmd_idx += 1; + } break; + } + } + if (prev_barrier_cmd) + { + prev_barrier_cmd->barrier.is_end_of_batch = 1; + } + } - // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - i64 barriers_count = 0; - D3D12_RESOURCE_BARRIER barriers[2] = Zi; - if (G_D12_IsSimultaneous(dst)) - { - D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier->Transition.pResource = dst->d3d_resource; - barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (G_D12_IsSimultaneous(src)) - { - D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier->Transition.pResource = src->d3d_resource; - barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, barriers); - } + // Build d3d commands + { + u64 batch_barrier_idx_start = 0; + u64 batch_barrier_idx_opl = 0; // One past last - } break; + u64 cmd_idx = 0; + while (cmd_idx < cmds_count) + { + G_D12_Cmd *cmd = &cmds[cmd_idx]; + switch (cmd->kind) + { + default: + { + cmd_idx += 1; + } break; - //- Compute + //- Constant - case G_D12_CmdKind_Compute: + case G_D12_CmdKind_Constant: + { + i32 slot = cmd->constant.slot; + u32 value = cmd->constant.value; + if (slot >= 0 && slot < countof(slotted_constants)) { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; + slotted_constants[slot] = value; + } + cmd_idx += 1; + } break; + + //- Barrier + + case G_D12_CmdKind_Barrier: + { + batch_barrier_idx_opl = cmd_idx + 1; + + // Submit batched barriers + if (cmd->barrier.is_end_of_batch) + { + // Build barriers + u64 buffer_barriers_count = 0; + u64 texture_barriers_count = 0; + u64 global_barriers_count = 0; + D3D12_BUFFER_BARRIER *buffer_barriers = PushStructs(scratch.arena, D3D12_BUFFER_BARRIER, max_buffer_barriers); + D3D12_TEXTURE_BARRIER *texture_barriers = PushStructs(scratch.arena, D3D12_TEXTURE_BARRIER, max_texture_barriers); + D3D12_GLOBAL_BARRIER *global_barriers = PushStructs(scratch.arena, D3D12_GLOBAL_BARRIER, max_global_barriers); + for (u64 barrier_cmd_idx = batch_barrier_idx_start; barrier_cmd_idx < batch_barrier_idx_opl; ++barrier_cmd_idx) { - G_D12_PipelineDesc pipeline_desc = Zi; - pipeline_desc.cs = cmd->compute.cs; - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - if (pipeline) - { - // Set descriptor heaps - if (!descriptor_heaps_set) + G_D12_Cmd *barrier_cmd = &cmds[barrier_cmd_idx]; + if (barrier_cmd->kind == G_D12_CmdKind_Barrier) { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } - - // Bind rootsig - if (!compute_rootsig_set) - { - ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); - compute_rootsig_set = 1; - } - - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } - - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_compute_constants[slot] != slotted_constants[slot]) + G_MemoryBarrierDesc desc = barrier_cmd->barrier.desc; + // Translate gpu barrier kind -> d3d barrier fields + D3D12_BARRIER_SYNC sync_before = G_D12_BarrierSyncFromStages(desc.stage_prev); + D3D12_BARRIER_SYNC sync_after = G_D12_BarrierSyncFromStages(desc.stage_next); + D3D12_BARRIER_ACCESS access_before = G_D12_BarrierAccessFromAccesses(desc.access_prev); + D3D12_BARRIER_ACCESS access_after = G_D12_BarrierAccessFromAccesses(desc.access_next); + D3D12_BARRIER_TYPE barrier_type = D3D12_BARRIER_TYPE_GLOBAL; + if (!desc.is_global) { - ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_compute_constants[slot] = slotted_constants[slot]; + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + barrier_type = resource->is_texture ? D3D12_BARRIER_TYPE_TEXTURE : D3D12_BARRIER_TYPE_BUFFER; + } + + // Build barrier + switch (barrier_type) + { + case D3D12_BARRIER_TYPE_BUFFER: + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + D3D12_BUFFER_BARRIER *barrier = &buffer_barriers[buffer_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->pResource = resource->d3d_resource; + barrier->Offset = 0; + barrier->Size = U64Max; + } break; + + case D3D12_BARRIER_TYPE_TEXTURE: + { + G_D12_Resource *resource = G_D12_ResourceFromHandle(desc.resource); + RngI32 mips = barrier_cmd->barrier.desc.mips; + { + mips.min = ClampI32(mips.min, 0, resource->texture_mips - 1); + mips.max = ClampI32(mips.max, mips.min, resource->texture_mips - 1); + } + // Create a barrier for each contiguous span of mips with matching layout + D3D12_TEXTURE_BARRIER *barrier = 0; + for (i32 mip_idx = mips.min; mip_idx <= mips.max; ++mip_idx) + { + D3D12_BARRIER_LAYOUT layout_before = resource->cmdlist_texture_layouts[mip_idx]; + D3D12_BARRIER_LAYOUT layout_after = layout_before; + if (desc.layout != G_Layout_NoChange) + { + layout_after = G_D12_BarrierLayoutFromLayout(desc.layout); + } + if (barrier == 0 || barrier->LayoutBefore != layout_before) + { + barrier = &texture_barriers[texture_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + barrier->LayoutBefore = layout_before; + barrier->LayoutAfter = layout_after; + barrier->pResource = resource->d3d_resource; + barrier->Subresources.IndexOrFirstMipLevel = mip_idx; + barrier->Subresources.NumArraySlices = 1; + barrier->Subresources.NumPlanes = 1; + } + barrier->Subresources.NumMipLevels += 1; + resource->cmdlist_texture_layouts[mip_idx] = layout_after; + } + } break; + + case D3D12_BARRIER_TYPE_GLOBAL: + { + D3D12_GLOBAL_BARRIER *barrier = &global_barriers[global_barriers_count++]; + barrier->SyncBefore = sync_before; + barrier->SyncAfter = sync_after; + barrier->AccessBefore = access_before; + barrier->AccessAfter = access_after; + } break; } } - - // Dispatch - ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); } - } break; - - //- Rasterize - - case G_D12_CmdKind_Rasterize: - { - // Fetch pipeline - G_D12_Pipeline *pipeline = 0; + // Dispatch barriers { - G_D12_PipelineDesc pipeline_desc = Zi; - pipeline_desc.vs = cmd->rasterize.vs; - pipeline_desc.ps = cmd->rasterize.ps; + u32 barrier_groups_count = 0; + D3D12_BARRIER_GROUP barrier_groups[3] = Zi; + if (buffer_barriers_count > 0) { - pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - switch (cmd->rasterize.raster_mode) - { - default: Assert(0); break; - case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - } + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_BUFFER; + group->NumBarriers = buffer_barriers_count; + group->pBufferBarriers = buffer_barriers; } - if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) + if (texture_barriers_count > 0) { - pipeline_desc.is_wireframe = 1; + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_TEXTURE; + group->NumBarriers = texture_barriers_count; + group->pTextureBarriers = texture_barriers; } + if (global_barriers_count > 0) + { + D3D12_BARRIER_GROUP *group = &barrier_groups[barrier_groups_count++]; + group->Type = D3D12_BARRIER_TYPE_GLOBAL; + group->NumBarriers = global_barriers_count; + group->pGlobalBarriers = global_barriers; + } + if (barrier_groups_count > 0) + { + ID3D12GraphicsCommandList7_Barrier(d3d_cl, barrier_groups_count, barrier_groups); + } + } + + batch_barrier_idx_start = cmd_idx + 1; + } + + cmd_idx += 1; + } break; + + //- Copy bytes + + case G_D12_CmdKind_CopyBytes: + { + u64 src_offset = cmd->copy_bytes.src_range.min; + u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; + ID3D12GraphicsCommandList_CopyBufferRegion( + d3d_cl, + cmd->copy_bytes.dst->d3d_resource, + cmd->copy_bytes.dst_offset, + cmd->copy_bytes.src->d3d_resource, + src_offset, + copy_size + ); + cmd_idx += 1; + } break; + + //- Copy texels + + case G_D12_CmdKind_CopyTexels: + { + G_D12_Resource *dst = cmd->copy_texels.dst; + G_D12_Resource *src = cmd->copy_texels.src; + D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; + D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; + Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; + Rng3I32 src_range = cmd->copy_texels.src_texture_range; + + D3D12_BOX src_box = Zi; + D3D12_BOX *src_box_ptr = 0; + { + src_box.left = src_range.p0.x; + src_box.top = src_range.p0.y; + src_box.front = src_range.p0.z; + src_box.right = src_range.p1.x; + src_box.bottom = src_range.p1.y; + src_box.back = src_range.p1.z; + if (src->is_texture) + { + src_box_ptr = &src_box; + } + } + + if (dst->flags & G_ResourceFlag_AllowDepthStencil) + { + // Depth-stencil textures must have src box & dst offset set to 0 + // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); + } + else + { + ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); + } + + cmd_idx += 1; + } break; + + //- Compute + + case G_D12_CmdKind_Compute: + { + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; + { + G_D12_PipelineDesc pipeline_desc = Zi; + pipeline_desc.cs = cmd->compute.cs; + pipeline = G_D12_PipelineFromDesc(pipeline_desc); + } + + if (pipeline) + { + // Set descriptor heaps + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + // Bind rootsig + if (!compute_rootsig_set) + { + ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); + compute_rootsig_set = 1; + } + + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_compute_constants[slot] != slotted_constants[slot]) + { + ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_compute_constants[slot] = slotted_constants[slot]; + } + } + + // Dispatch + ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); + } + + cmd_idx += 1; + } break; + + //- Rasterize + + case G_D12_CmdKind_Rasterize: + { + // Fetch pipeline + G_D12_Pipeline *pipeline = 0; + { + G_D12_PipelineDesc pipeline_desc = Zi; + pipeline_desc.vs = cmd->rasterize.vs; + pipeline_desc.ps = cmd->rasterize.ps; + { + pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; + switch (cmd->rasterize.raster_mode) + { + default: Assert(0); break; + case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; + case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; + case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; + } + } + if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) + { + pipeline_desc.is_wireframe = 1; + } + for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) + { + G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; + G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); + if (rt) + { + pipeline_desc.render_target_formats[i] = rt->texture_format; + pipeline_desc.render_target_blend_modes[i] = desc.blend; + } + else + { + pipeline_desc.render_target_formats[i] = G_Format_Unknown; + } + } + pipeline = G_D12_PipelineFromDesc(pipeline_desc); + } + + // Create ibv + u32 indices_count = 0; + D3D12_INDEX_BUFFER_VIEW ibv = Zi; + { + G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; + if (desc.index_count > 0) + { + G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); + ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; + ibv.SizeInBytes = desc.index_size * desc.index_count; + if (desc.index_size == 2) + { + ibv.Format = DXGI_FORMAT_R16_UINT; + indices_count = ibv.SizeInBytes / 2; + } + else if (desc.index_size == 4) + { + ibv.Format = DXGI_FORMAT_R32_UINT; + indices_count = ibv.SizeInBytes / 4; + } + else + { + Assert(0); // Invalid index size + } + } + } + + // Prepare & dispatch + if (pipeline && indices_count > 0) + { + // Set descriptor heaps + if (!descriptor_heaps_set) + { + ID3D12DescriptorHeap *heaps[] = { + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, + G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, + }; + ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); + descriptor_heaps_set = 1; + } + + // Bind rootsig + if (!graphics_rootsig_set) + { + ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); + graphics_rootsig_set = 1; + } + + // Bind pipeline + if (pipeline != bound_pipeline) + { + ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); + bound_pipeline = pipeline; + } + + // Update root constants + for (i32 slot = 0; slot < countof(slotted_constants); ++slot) + { + if (bound_graphics_constants[slot] != slotted_constants[slot]) + { + ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); + bound_graphics_constants[slot] = slotted_constants[slot]; + } + } + + // Set viewport + { + D3D12_VIEWPORT viewport = Zi; + { + Rng3 range = cmd->rasterize.viewport; + viewport.TopLeftX = range.p0.x; + viewport.TopLeftY = range.p0.y; + viewport.Width = range.p1.x - range.p0.x; + viewport.Height = range.p1.y - range.p0.y; + viewport.MinDepth = range.p0.z; + viewport.MaxDepth = range.p1.z; + } + if (!MatchStruct(&viewport, &bound_viewport)) + { + bound_viewport = viewport; + ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); + } + } + + // Set scissor + { + D3D12_RECT scissor = Zi; + { + Rng2 range = cmd->rasterize.scissor; + scissor.left = range.p0.x; + scissor.top = range.p0.y; + scissor.right = range.p1.x; + scissor.bottom = range.p1.y; + } + if (!MatchStruct(&scissor, &bound_scissor)) + { + bound_scissor = scissor; + ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); + } + } + + // Set topology + { + D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + switch (cmd->rasterize.raster_mode) + { + default: Assert(0); break; + case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; + case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; + case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; + case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; + case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; + } + if (topology != bound_primitive_topology) + { + ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); + } + } + + // Set index buffer + if (!MatchStruct(&ibv, &bound_ibv)) + { + ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); + bound_ibv = ibv; + } + + // Bind render targets + { + b32 om_dirty = 0; + u32 rtvs_count = 0; for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) { G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); if (rt) { - pipeline_desc.render_target_formats[i] = rt->texture_format; - pipeline_desc.render_target_blend_modes[i] = desc.blend; + if (bound_render_target_uids[i] != rt->uid + desc.mip) + { + G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[i]; + G_D12_InitRtv(rt, rtv_descriptor->handle, desc.mip); + bound_render_target_uids[i] = rt->uid + desc.mip; + om_dirty = 1; + } + ++rtvs_count; } else { - pipeline_desc.render_target_formats[i] = G_Format_Unknown; + break; } } - pipeline = G_D12_PipelineFromDesc(pipeline_desc); - } - - // Create ibv - u32 indices_count = 0; - D3D12_INDEX_BUFFER_VIEW ibv = Zi; - { - G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; - if (desc.index_count > 0) + if (om_dirty) { - G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); - ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; - ibv.SizeInBytes = desc.index_size * desc.index_count; - if (desc.index_size == 2) + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; + for (u32 i = 0; i < rtvs_count; ++i) { - ibv.Format = DXGI_FORMAT_R16_UINT; - indices_count = ibv.SizeInBytes / 2; - } - else if (desc.index_size == 4) - { - ibv.Format = DXGI_FORMAT_R32_UINT; - indices_count = ibv.SizeInBytes / 4; - } - else - { - Assert(0); // Invalid index size + rtv_handles[i] = rcl->rtv_descriptors[i]->handle; } + ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); } } - // Prepare & dispatch - if (pipeline && indices_count > 0) - { - // Set descriptor heaps - if (!descriptor_heaps_set) - { - ID3D12DescriptorHeap *heaps[] = { - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - }; - ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - descriptor_heaps_set = 1; - } + // Dispatch + ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); + } - // Bind rootsig - if (!graphics_rootsig_set) - { - ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); - graphics_rootsig_set = 1; - } + cmd_idx += 1; + } break; - // Bind pipeline - if (pipeline != bound_pipeline) - { - ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - bound_pipeline = pipeline; - } + //- Clear rtv - // Update root constants - for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - { - if (bound_graphics_constants[slot] != slotted_constants[slot]) - { - ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - bound_graphics_constants[slot] = slotted_constants[slot]; - } - } - - // Set viewport - { - D3D12_VIEWPORT viewport = Zi; - { - Rng3 range = cmd->rasterize.viewport; - viewport.TopLeftX = range.p0.x; - viewport.TopLeftY = range.p0.y; - viewport.Width = range.p1.x - range.p0.x; - viewport.Height = range.p1.y - range.p0.y; - viewport.MinDepth = range.p0.z; - viewport.MaxDepth = range.p1.z; - } - if (!MatchStruct(&viewport, &bound_viewport)) - { - bound_viewport = viewport; - ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); - } - } - - // Set scissor - { - D3D12_RECT scissor = Zi; - { - Rng2 range = cmd->rasterize.scissor; - scissor.left = range.p0.x; - scissor.top = range.p0.y; - scissor.right = range.p1.x; - scissor.bottom = range.p1.y; - } - if (!MatchStruct(&scissor, &bound_scissor)) - { - bound_scissor = scissor; - ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); - } - } - - // Set topology - { - D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - switch (cmd->rasterize.raster_mode) - { - default: Assert(0); break; - case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - } - if (topology != bound_primitive_topology) - { - ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); - } - } - - // Set index buffer - if (!MatchStruct(&ibv, &bound_ibv)) - { - ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); - bound_ibv = ibv; - } - - // Bind render targets - u32 barriers_count = 0; - D3D12_RESOURCE_BARRIER barriers[G_MaxRenderTargets] = Zi; - { - u32 rtvs_count = 0; - b32 om_dirty = 0; - for (u32 rtv_idx = 0; rtv_idx < countof(cmd->rasterize.render_target_descs); ++rtv_idx) - { - G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[rtv_idx]; - G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - if (rt) - { - if (G_D12_IsSimultaneous(rt)) - { - D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier->Transition.pResource = rt->d3d_resource; - barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (bound_render_target_uids[rtv_idx] != rt->uid + desc.mip) - { - G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[rtv_idx]; - G_D12_InitRtv(rt, rtv_descriptor->handle, desc.mip); - bound_render_target_uids[rtv_idx] = rt->uid + desc.mip; - om_dirty = 1; - } - ++rtvs_count; - } - else - { - break; - } - } - if (om_dirty) - { - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; - for (u32 rtv_idx = 0; rtv_idx < rtvs_count; ++rtv_idx) - { - rtv_handles[rtv_idx] = rcl->rtv_descriptors[rtv_idx]->handle; - } - ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); - } - } - - // Dispatch - ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - - // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // FIXME: Does the implicit promotion happen during Draw? Or during OMSetRenderTargets? - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, barriers); - } - } - - } break; - - //- Clear rtv - - case G_D12_CmdKind_ClearRtv: + case G_D12_CmdKind_ClearRtv: + { + G_D12_Resource *rt = cmd->clear_rtv.render_target; + f32 clear_color[4] = Zi; { - G_D12_Resource *rt = cmd->clear_rtv.render_target; - f32 clear_color[4] = Zi; - { - clear_color[0] = cmd->clear_rtv.color.x; - clear_color[1] = cmd->clear_rtv.color.y; - clear_color[2] = cmd->clear_rtv.color.z; - clear_color[3] = cmd->clear_rtv.color.w; - } - D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; - if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) - { - G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); - bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; - } - ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); - - // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - i64 barriers_count = 0; - D3D12_RESOURCE_BARRIER barrier = Zi; - if (G_D12_IsSimultaneous(rt)) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = rt->d3d_resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, &barrier); - } - } break; - - //- Discard rtv - - case G_D12_CmdKind_DiscardRtv: + clear_color[0] = cmd->clear_rtv.color.x; + clear_color[1] = cmd->clear_rtv.color.y; + clear_color[2] = cmd->clear_rtv.color.z; + clear_color[3] = cmd->clear_rtv.color.w; + } + D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; + if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) { - D3D12_DISCARD_REGION region = Zi; - region.FirstSubresource = cmd->discard_rtv.mip; - region.NumSubresources = 1; - G_D12_Resource *resource = cmd->discard_rtv.render_target; - ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); + G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); + bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; + } + ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); + cmd_idx += 1; + } break; - // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - i64 barriers_count = 0; - D3D12_RESOURCE_BARRIER barrier = Zi; - if (G_D12_IsSimultaneous(resource)) - { - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = resource->d3d_resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - ++barriers_count; - } - if (barriers_count > 0) - { - ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, &barrier); - } - } break; + //- Discard rtv - //- Log + case G_D12_CmdKind_DiscardRtv: + { + D3D12_DISCARD_REGION region = Zi; + region.FirstSubresource = cmd->discard_rtv.mip; + region.NumSubresources = 1; + G_D12_Resource *resource = cmd->discard_rtv.render_target; + ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); + cmd_idx += 1; + } break; - case G_D12_CmdKind_Log: + //- Log + + case G_D12_CmdKind_Log: + { + G_D12_Resource *resource = cmd->log.resource; + String resource_name = STRING(resource->name_len, resource->name_text); + + String layouts_str = Zi; { - // FIXME + StringList layout_names = Zi; + for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) + { + String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); + String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); + PushStringToList(scratch.arena, &layout_names, layout_str); + } + layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); + } - // G_D12_Resource *resource = cmd->log.resource; - // String resource_name = STRING(resource->name_len, resource->name_text); - - // String layouts_str = Zi; - // { - // StringList layout_names = Zi; - // for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) - // { - // String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); - // String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); - // PushStringToList(scratch.arena, &layout_names, layout_str); - // } - // layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); - // } - - // String msg = StringF( - // scratch.arena, - // "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", - // FmtUint(resource->uid), - // FmtString(resource_name), - // FmtString(layouts_str) - // ); - // LogDebug(msg); - - } break; - } + String msg = StringF( + scratch.arena, + "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", + FmtUint(resource->uid), + FmtString(resource_name), + FmtString(layouts_str) + ); + LogDebug(msg); + cmd_idx += 1; + } break; } } - - // End dx12 command list - // FIXME: Only lock & signal fence on final batch - completion_target = G_D12_CommitRawCommandList(rcl); } - - - - - - - - - - // // Begin dx12 command list - // G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(queue_kind); - // ID3D12GraphicsCommandList *d3d_cl = rcl->d3d_cl; - - // // Pipeline state - // b32 graphics_rootsig_set = 0; - // b32 compute_rootsig_set = 0; - // b32 descriptor_heaps_set = 0; - // G_D12_Pipeline *bound_pipeline = 0; - - // // Constants state - // u64 slotted_constants[G_NumConstants]; - // u64 bound_compute_constants[G_NumConstants]; - // u64 bound_graphics_constants[G_NumConstants]; - // for (i32 i = 0; i < countof(slotted_constants); ++i) { slotted_constants[i] = 0; } // Zero-initialize all slots - // for (i32 i = 0; i < countof(bound_compute_constants); ++i) { bound_compute_constants[i] = U64Max; } - // for (i32 i = 0; i < countof(bound_graphics_constants); ++i) { bound_graphics_constants[i] = U64Max; } - - // // Fill built-in constants - // if (!G_IsRefNil(queue->print_buffer_ref)) - // { - // slotted_constants[G_ShaderConst_PrintBufferRef] = queue->print_buffer_ref.v; - // } - // { - // b32 tweak_b32 = TweakBool("Shader tweak-bool", 1); - // f32 tweak_f32 = TweakFloat("Shader tweak-float", 1, 0, 1); - // slotted_constants[G_ShaderConst_TweakB32] = tweak_b32; - // slotted_constants[G_ShaderConst_TweakF32] = *(u32 *)&tweak_f32; - // } - - // // Rasterizer state - // D3D12_VIEWPORT bound_viewport = Zi; - // D3D12_RECT bound_scissor = Zi; - // D3D_PRIMITIVE_TOPOLOGY bound_primitive_topology = -1; - // D3D12_INDEX_BUFFER_VIEW bound_ibv = Zi; - // u64 bound_render_target_uids[G_MaxRenderTargets] = Zi; - // u64 bound_render_clear_target_uid = 0; - - // // Build d3d commands - // { - // u64 cmd_idx = 0; - // while (cmd_idx < cmds_count) - // { - // G_D12_Cmd *cmd = &cmds[cmd_idx]; - // switch (cmd->kind) - // { - // default: - // { - // cmd_idx += 1; - // } break; - - // //- Constant - - // case G_D12_CmdKind_Constant: - // { - // i32 slot = cmd->constant.slot; - // u32 value = cmd->constant.value; - // if (slot >= 0 && slot < countof(slotted_constants)) - // { - // slotted_constants[slot] = value; - // } - // cmd_idx += 1; - // } break; - - // //- Barrier - - // case G_D12_CmdKind_Barrier: - // { - // if (cmd->barrier.should_commit) - // { - // D3D12_RESOURCE_BARRIER barrier = Zi; - // barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, 1, &barrier); - // } - - // cmd_idx += 1; - // } break; - - // //- Copy bytes - - // case G_D12_CmdKind_CopyBytes: - // { - // u64 src_offset = cmd->copy_bytes.src_range.min; - // u64 copy_size = cmd->copy_bytes.src_range.max - cmd->copy_bytes.src_range.min; - // ID3D12GraphicsCommandList_CopyBufferRegion( - // d3d_cl, - // cmd->copy_bytes.dst->d3d_resource, - // cmd->copy_bytes.dst_offset, - // cmd->copy_bytes.src->d3d_resource, - // src_offset, - // copy_size - // ); - // cmd_idx += 1; - - - - // // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // D3D12_RESOURCE_BARRIER barriers[2] = Zi; - // { - // D3D12_RESOURCE_BARRIER *barrier = &barriers[0]; - // barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier->Transition.pResource = cmd->copy_bytes.dst->d3d_resource; - // barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - // barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // { - // D3D12_RESOURCE_BARRIER *barrier = &barriers[1]; - // barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier->Transition.pResource = cmd->copy_bytes.src->d3d_resource; - // barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - // barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, 2, barriers); - - - - - // } break; - - // //- Copy texels - - // case G_D12_CmdKind_CopyTexels: - // { - // G_D12_Resource *dst = cmd->copy_texels.dst; - // G_D12_Resource *src = cmd->copy_texels.src; - // D3D12_TEXTURE_COPY_LOCATION dst_loc = cmd->copy_texels.dst_loc; - // D3D12_TEXTURE_COPY_LOCATION src_loc = cmd->copy_texels.src_loc; - // Vec3I32 dst_offset = cmd->copy_texels.dst_texture_offset; - // Rng3I32 src_range = cmd->copy_texels.src_texture_range; - - // D3D12_BOX src_box = Zi; - // D3D12_BOX *src_box_ptr = 0; - // { - // src_box.left = src_range.p0.x; - // src_box.top = src_range.p0.y; - // src_box.front = src_range.p0.z; - // src_box.right = src_range.p1.x; - // src_box.bottom = src_range.p1.y; - // src_box.back = src_range.p1.z; - // if (src->is_texture) - // { - // src_box_ptr = &src_box; - // } - // } - - // if (dst->flags & G_ResourceFlag_AllowDepthStencil) - // { - // // Depth-stencil textures must have src box & dst offset set to 0 - // // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12graphicscommandlist-copytextureregion - // ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, 0, 0, 0, &src_loc, 0); - // } - // else - // { - // ID3D12GraphicsCommandList_CopyTextureRegion(d3d_cl, &dst_loc, dst_offset.x, dst_offset.y, dst_offset.z, &src_loc, src_box_ptr); - // } - - - - // // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // D3D12_RESOURCE_BARRIER barriers[2] = Zi; - // { - // D3D12_RESOURCE_BARRIER *barrier = &barriers[0]; - // barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier->Transition.pResource = dst->d3d_resource; - // barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; - // barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // { - // D3D12_RESOURCE_BARRIER *barrier = &barriers[1]; - // barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier->Transition.pResource = src->d3d_resource; - // barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; - // barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, 2, barriers); - - // cmd_idx += 1; - // } break; - - // //- Compute - - // case G_D12_CmdKind_Compute: - // { - // // Fetch pipeline - // G_D12_Pipeline *pipeline = 0; - // { - // G_D12_PipelineDesc pipeline_desc = Zi; - // pipeline_desc.cs = cmd->compute.cs; - // pipeline = G_D12_PipelineFromDesc(pipeline_desc); - // } - - // if (pipeline) - // { - // // Set descriptor heaps - // if (!descriptor_heaps_set) - // { - // ID3D12DescriptorHeap *heaps[] = { - // G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - // G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - // }; - // ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - // descriptor_heaps_set = 1; - // } - - // // Bind rootsig - // if (!compute_rootsig_set) - // { - // ID3D12GraphicsCommandList_SetComputeRootSignature(d3d_cl, G_D12.bindless_rootsig); - // compute_rootsig_set = 1; - // } - - // // Bind pipeline - // if (pipeline != bound_pipeline) - // { - // ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - // bound_pipeline = pipeline; - // } - - // // Update root constants - // for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - // { - // if (bound_compute_constants[slot] != slotted_constants[slot]) - // { - // ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - // bound_compute_constants[slot] = slotted_constants[slot]; - // } - // } - - // // Dispatch - // ID3D12GraphicsCommandList_Dispatch(d3d_cl, cmd->compute.groups.x, cmd->compute.groups.y, cmd->compute.groups.z); - // } - - // cmd_idx += 1; - // } break; - - // //- Rasterize - - // case G_D12_CmdKind_Rasterize: - // { - // // Fetch pipeline - // G_D12_Pipeline *pipeline = 0; - // { - // G_D12_PipelineDesc pipeline_desc = Zi; - // pipeline_desc.vs = cmd->rasterize.vs; - // pipeline_desc.ps = cmd->rasterize.ps; - // { - // pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED; - // switch (cmd->rasterize.raster_mode) - // { - // default: Assert(0); break; - // case G_RasterMode_PointList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; break; - // case G_RasterMode_LineList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - // case G_RasterMode_LineStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; break; - // case G_RasterMode_TriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - // case G_RasterMode_TriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - // case G_RasterMode_WireTriangleList: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - // case G_RasterMode_WireTriangleStrip: pipeline_desc.topology_type = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; break; - // } - // } - // if (cmd->rasterize.raster_mode == G_RasterMode_WireTriangleList || cmd->rasterize.raster_mode == G_RasterMode_WireTriangleStrip) - // { - // pipeline_desc.is_wireframe = 1; - // } - // for (u32 i = 0; i < countof(cmd->rasterize.render_target_descs); ++i) - // { - // G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[i]; - // G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - // if (rt) - // { - // pipeline_desc.render_target_formats[i] = rt->texture_format; - // pipeline_desc.render_target_blend_modes[i] = desc.blend; - // } - // else - // { - // pipeline_desc.render_target_formats[i] = G_Format_Unknown; - // } - // } - // pipeline = G_D12_PipelineFromDesc(pipeline_desc); - // } - - // // Create ibv - // u32 indices_count = 0; - // D3D12_INDEX_BUFFER_VIEW ibv = Zi; - // { - // G_IndexBufferDesc desc = cmd->rasterize.index_buffer_desc; - // if (desc.index_count > 0) - // { - // G_D12_Resource *index_buffer_resource = G_D12_ResourceFromHandle(desc.resource); - // ibv.BufferLocation = index_buffer_resource->buffer_gpu_address; - // ibv.SizeInBytes = desc.index_size * desc.index_count; - // if (desc.index_size == 2) - // { - // ibv.Format = DXGI_FORMAT_R16_UINT; - // indices_count = ibv.SizeInBytes / 2; - // } - // else if (desc.index_size == 4) - // { - // ibv.Format = DXGI_FORMAT_R32_UINT; - // indices_count = ibv.SizeInBytes / 4; - // } - // else - // { - // Assert(0); // Invalid index size - // } - // } - // } - - // // Prepare & dispatch - // if (pipeline && indices_count > 0) - // { - // // Set descriptor heaps - // if (!descriptor_heaps_set) - // { - // ID3D12DescriptorHeap *heaps[] = { - // G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_CbvSrvUav].d3d_heap, - // G_D12.descriptor_heaps[G_D12_DescriptorHeapKind_Sampler].d3d_heap, - // }; - // ID3D12GraphicsCommandList_SetDescriptorHeaps(d3d_cl, countof(heaps), heaps); - // descriptor_heaps_set = 1; - // } - - // // Bind rootsig - // if (!graphics_rootsig_set) - // { - // ID3D12GraphicsCommandList_SetGraphicsRootSignature(d3d_cl, G_D12.bindless_rootsig); - // graphics_rootsig_set = 1; - // } - - // // Bind pipeline - // if (pipeline != bound_pipeline) - // { - // ID3D12GraphicsCommandList_SetPipelineState(d3d_cl, pipeline->pso); - // bound_pipeline = pipeline; - // } - - // // Update root constants - // for (i32 slot = 0; slot < countof(slotted_constants); ++slot) - // { - // if (bound_graphics_constants[slot] != slotted_constants[slot]) - // { - // ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstant(d3d_cl, slot, slotted_constants[slot], 0); - // bound_graphics_constants[slot] = slotted_constants[slot]; - // } - // } - - // // Set viewport - // { - // D3D12_VIEWPORT viewport = Zi; - // { - // Rng3 range = cmd->rasterize.viewport; - // viewport.TopLeftX = range.p0.x; - // viewport.TopLeftY = range.p0.y; - // viewport.Width = range.p1.x - range.p0.x; - // viewport.Height = range.p1.y - range.p0.y; - // viewport.MinDepth = range.p0.z; - // viewport.MaxDepth = range.p1.z; - // } - // if (!MatchStruct(&viewport, &bound_viewport)) - // { - // bound_viewport = viewport; - // ID3D12GraphicsCommandList_RSSetViewports(d3d_cl, 1, &viewport); - // } - // } - - // // Set scissor - // { - // D3D12_RECT scissor = Zi; - // { - // Rng2 range = cmd->rasterize.scissor; - // scissor.left = range.p0.x; - // scissor.top = range.p0.y; - // scissor.right = range.p1.x; - // scissor.bottom = range.p1.y; - // } - // if (!MatchStruct(&scissor, &bound_scissor)) - // { - // bound_scissor = scissor; - // ID3D12GraphicsCommandList_RSSetScissorRects(d3d_cl, 1, &scissor); - // } - // } - - // // Set topology - // { - // D3D_PRIMITIVE_TOPOLOGY topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; - // switch (cmd->rasterize.raster_mode) - // { - // default: Assert(0); break; - // case G_RasterMode_PointList: topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; - // case G_RasterMode_LineList: topology = D3D_PRIMITIVE_TOPOLOGY_LINELIST; break; - // case G_RasterMode_LineStrip: topology = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; break; - // case G_RasterMode_TriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - // case G_RasterMode_TriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - // case G_RasterMode_WireTriangleList: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break; - // case G_RasterMode_WireTriangleStrip: topology = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; break; - // } - // if (topology != bound_primitive_topology) - // { - // ID3D12GraphicsCommandList_IASetPrimitiveTopology(d3d_cl, topology); - // } - // } - - // // Set index buffer - // if (!MatchStruct(&ibv, &bound_ibv)) - // { - // ID3D12GraphicsCommandList_IASetIndexBuffer(d3d_cl, &ibv); - // bound_ibv = ibv; - // } - - // // Bind render targets - // u32 barriers_count = 0; - // D3D12_RESOURCE_BARRIER barriers[G_MaxRenderTargets] = Zi; - // { - // u32 rtvs_count = 0; - // b32 om_dirty = 0; - // for (u32 rtv_idx = 0; rtv_idx < countof(cmd->rasterize.render_target_descs); ++rtv_idx) - // { - // G_RenderTargetDesc desc = cmd->rasterize.render_target_descs[rtv_idx]; - // G_D12_Resource *rt = G_D12_ResourceFromHandle(desc.resource); - // if (rt) - // { - // { - // D3D12_RESOURCE_BARRIER *barrier = &barriers[barriers_count]; - // barrier->Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier->Transition.pResource = rt->d3d_resource; - // barrier->Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier->Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - // barrier->Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // ++barriers_count; - // } - // if (bound_render_target_uids[rtv_idx] != rt->uid + desc.mip) - // { - // G_D12_Descriptor *rtv_descriptor = rcl->rtv_descriptors[rtv_idx]; - // G_D12_InitRtv(rt, rtv_descriptor->handle, desc.mip); - // bound_render_target_uids[rtv_idx] = rt->uid + desc.mip; - // om_dirty = 1; - // } - // ++rtvs_count; - // } - // else - // { - // break; - // } - // } - // if (om_dirty) - // { - // D3D12_CPU_DESCRIPTOR_HANDLE rtv_handles[G_MaxRenderTargets] = Zi; - // for (u32 rtv_idx = 0; rtv_idx < rtvs_count; ++rtv_idx) - // { - // rtv_handles[rtv_idx] = rcl->rtv_descriptors[rtv_idx]->handle; - // } - // ID3D12GraphicsCommandList_OMSetRenderTargets(d3d_cl, rtvs_count, rtv_handles, 0, 0); - // } - // } - - // // Dispatch - // ID3D12GraphicsCommandList_DrawIndexedInstanced(d3d_cl, indices_count, cmd->rasterize.instances_count, 0, 0, 0); - - // // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // // FIXME: Does the implicit promotion happen during Draw? Or during OMSetRenderTargets? - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, barriers_count, barriers); - // } - - // cmd_idx += 1; - // } break; - - // //- Clear rtv - - // case G_D12_CmdKind_ClearRtv: - // { - // G_D12_Resource *rt = cmd->clear_rtv.render_target; - // f32 clear_color[4] = Zi; - // { - // clear_color[0] = cmd->clear_rtv.color.x; - // clear_color[1] = cmd->clear_rtv.color.y; - // clear_color[2] = cmd->clear_rtv.color.z; - // clear_color[3] = cmd->clear_rtv.color.w; - // } - // D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle = rcl->rtv_clear_descriptor->handle; - // if (bound_render_clear_target_uid != rt->uid + cmd->clear_rtv.mip) - // { - // G_D12_InitRtv(rt, rtv_handle, cmd->clear_rtv.mip); - // bound_render_clear_target_uid = rt->uid + cmd->clear_rtv.mip; - // } - // ID3D12GraphicsCommandList_ClearRenderTargetView(d3d_cl, rtv_handle, clear_color, 0, 0); - // cmd_idx += 1; - - // // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // D3D12_RESOURCE_BARRIER barrier = Zi; - // { - // barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier.Transition.pResource = rt->d3d_resource; - // barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - // barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, 1, &barrier); - // } break; - - // //- Discard rtv - - // case G_D12_CmdKind_DiscardRtv: - // { - // D3D12_DISCARD_REGION region = Zi; - // region.FirstSubresource = cmd->discard_rtv.mip; - // region.NumSubresources = 1; - // G_D12_Resource *resource = cmd->discard_rtv.render_target; - // ID3D12GraphicsCommandList_DiscardResource(d3d_cl, resource->d3d_resource, 0); - // cmd_idx += 1; - - // // FIXME: Remove this (only apply during Barrier command on any implicit resource transitions that occured) - // D3D12_RESOURCE_BARRIER barrier = Zi; - // { - // barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - // barrier.Transition.pResource = resource->d3d_resource; - // barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - // barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - // barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; - // } - // ID3D12GraphicsCommandList_ResourceBarrier(d3d_cl, 1, &barrier); - // } break; - - // //- Log - - // case G_D12_CmdKind_Log: - // { - // // FIXME - - // // G_D12_Resource *resource = cmd->log.resource; - // // String resource_name = STRING(resource->name_len, resource->name_text); - - // // String layouts_str = Zi; - // // { - // // StringList layout_names = Zi; - // // for (i32 mip_idx = 0; mip_idx < resource->texture_mips; ++mip_idx) - // // { - // // String layout_name = G_D12_NameFromBarrierLayout(resource->cmdlist_texture_layouts[mip_idx]); - // // String layout_str = StringF(scratch.arena, "[%F] %F", FmtSint(mip_idx), FmtString(layout_name)); - // // PushStringToList(scratch.arena, &layout_names, layout_str); - // // } - // // layouts_str = StringFromList(scratch.arena, layout_names, Lit(", ")); - // // } - - // // String msg = StringF( - // // scratch.arena, - // // "[Gpu command list resource log] uid: %F, name: \"%F\", layouts: { %F }", - // // FmtUint(resource->uid), - // // FmtString(resource_name), - // // FmtString(layouts_str) - // // ); - // // LogDebug(msg); - - // cmd_idx += 1; - // } break; - // } - // } - // } - - // // End dx12 command list - // i64 completion_target = G_D12_CommitRawCommandList(rcl); - - - - - - - + // End dx12 command list + i64 completion_target = G_D12_CommitRawCommandList(rcl); // Attach completion info to staging regions for (G_D12_StagingRegionNode *n = cl->first_staging_region; n;) @@ -3142,6 +2807,10 @@ i64 G_CommitCommandList(G_CommandListHandle cl_handle) Unlock(&lock); } + + + + // // Attach completion info to resources // for (G_D12_Resource *r = cl->reset_resources.first; r;) // { @@ -3390,11 +3059,12 @@ void G_SetConstantEx(G_CommandListHandle cl_handle, i32 slot, void *src_32bit, u //- Memory sync -void G_Barrier(G_CommandListHandle cl_handle) +void G_MemorySyncEx(G_CommandListHandle cl_handle, G_MemoryBarrierDesc desc) { G_D12_CmdList *cl = G_D12_CmdListFromHandle(cl_handle); G_D12_Cmd *cmd = G_D12_PushCmd(cl); cmd->kind = G_D12_CmdKind_Barrier; + cmd->barrier.desc = desc; } //- Compute @@ -3751,13 +3421,14 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma backbuffer->flags = G_ResourceFlag_AllowRenderTarget; backbuffer->uid = Atomic64FetchAdd(&G_D12.resource_creation_gen.v, 1) + 1; - ID3D12Resource_GetDesc(d3d_resource, &backbuffer->d3d_desc); + ID3D12Resource_GetDesc(d3d_resource, (D3D12_RESOURCE_DESC *)&backbuffer->d3d_desc); backbuffer->d3d_resource = d3d_resource; backbuffer->is_texture = 1; backbuffer->texture_format = format; backbuffer->texture_dims = VEC3I32(size.x, size.y, 1); backbuffer->texture_mips = 1; + backbuffer->cmdlist_texture_layouts[0] = D3D12_BARRIER_LAYOUT_PRESENT; backbuffer->swapchain = swapchain; } } @@ -3783,21 +3454,6 @@ G_ResourceHandle G_PrepareBackbuffer(G_SwapchainHandle swapchain_handle, G_Forma cur_backbuffer = &swapchain->backbuffers[backbuffer_idx]; } - // Transition backbuffer to render target - { - G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(G_QueueKind_Direct); - { - D3D12_RESOURCE_BARRIER barrier = Zi; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = cur_backbuffer->d3d_resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; - ID3D12GraphicsCommandList_ResourceBarrier(rcl->d3d_cl, 1, &barrier); - } - G_D12_CommitRawCommandList(rcl); - } - return G_D12_MakeHandle(G_ResourceHandle, cur_backbuffer); } @@ -3807,21 +3463,6 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) G_D12_Swapchain *swapchain = backbuffer->swapchain; G_D12_Queue *direct_queue = G_D12_QueueFromKind(G_QueueKind_Direct); - // Transition backbuffer to presentable - { - G_D12_RawCommandList *rcl = G_D12_PrepareRawCommandList(G_QueueKind_Direct); - { - D3D12_RESOURCE_BARRIER barrier = Zi; - barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; - barrier.Transition.pResource = backbuffer->d3d_resource; - barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; - barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; - barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; - ID3D12GraphicsCommandList_ResourceBarrier(rcl->d3d_cl, 1, &barrier); - } - G_D12_CommitRawCommandList(rcl); - } - u32 present_flags = 0; if (G_D12_TearingIsAllowed && vsync == 0) { @@ -3857,196 +3498,200 @@ void G_CommitBackbuffer(G_ResourceHandle backbuffer_handle, i32 vsync) void G_D12_CollectionWorkerEntryPoint(WaveLaneCtx *lane) { - // for (;;) - // { - // // FIXME: Remove this - // SleepSeconds(0.100); + for (;;) + { + // FIXME: Remove this + SleepSeconds(0.100); - // // Copy print-buffers to readback - // for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - // { - // G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - // if (!G_IsResourceNil(queue->print_buffer)) - // { - // G_CommandListHandle cl = G_PrepareCommandList(queue_kind); - // { - // // Copy print buffer to readback buffer - // G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); - // G_Barrier(cl); - // // Reset counters to 0 - // u8 zero[12] = Zi; - // G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero))); - // } - // G_CommitCommandList(cl); - // } - // } + // Copy print-buffers to readback + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + if (!G_IsResourceNil(queue->print_buffer)) + { + G_CommandListHandle cl = G_PrepareCommandList(queue_kind); + { + // Copy print buffer to readback buffer + G_CopyBufferToBuffer(cl, queue->print_readback_buffer, 0, queue->print_buffer, RNGU64(0, queue->print_buffer_size)); + // Reset counters to 0 + G_MemorySync( + cl, queue->print_buffer, + G_Stage_Copy, G_Access_CopyRead, + G_Stage_Copy, G_Access_CopyWrite + ); + u8 zero[12] = Zi; + G_CopyCpuToBuffer(cl, queue->print_buffer, 0, zero, RNGU64(0, sizeof(zero))); + } + G_CommitCommandList(cl); + } + } - // // TODO: Collect asynchronously - // G_SyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); + // TODO: Collect asynchronously + G_SyncCpu(G_QueueMask_Direct | G_QueueMask_AsyncCompute); - // for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) - // { - // G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); - // if (!G_IsResourceNil(queue->print_buffer)) - // { - // u32 attempted_print_bytes_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 0); // The number of bytes shaders attempted to write - // u32 prints_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 1); // The number of shader prints that are in the buffer - // u32 overflows_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 2); // The number of shader prints that could not fit in the buffer - // u8 *start = G_StructFromResource(queue->print_readback_buffer, u8) + 12; + for (G_QueueKind queue_kind = 0; queue_kind < G_QueueKind_COUNT; ++queue_kind) + { + G_D12_Queue *queue = G_D12_QueueFromKind(queue_kind); + if (!G_IsResourceNil(queue->print_buffer)) + { + u32 attempted_print_bytes_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 0); // The number of bytes shaders attempted to write + u32 prints_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 1); // The number of shader prints that are in the buffer + u32 overflows_count = *(G_StructFromResource(queue->print_readback_buffer, u32) + 2); // The number of shader prints that could not fit in the buffer + u8 *start = G_StructFromResource(queue->print_readback_buffer, u8) + 12; - // // Deserialize - // if (GPU_SHADER_PRINT_LOG) - // { - // if (prints_count > 0) - // { - // LogDebugF( - // "Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F", - // FmtUint(prints_count), - // FmtUint(prints_count + overflows_count), - // FmtUint(attempted_print_bytes_count) - // ); - // } + // Deserialize + if (GPU_SHADER_PRINT_LOG) + { + if (prints_count > 0) + { + LogDebugF( + "Forwarding logs collected from GPU - Resident prints: %F, Total attempted prints: %F, Total attempted bytes: %F", + FmtUint(prints_count), + FmtUint(prints_count + overflows_count), + FmtUint(attempted_print_bytes_count) + ); + } - // // FIXME: Remove this - // TempArena scratch = BeginScratchNoConflict(); - // u8 *at = start; - // { - // for (u32 print_idx = 0; print_idx < prints_count; ++print_idx) - // { - // u32 chars_count = 0; - // u32 args_count = 0; - // b32 internal_overflow = 0; - // { - // u32 header = *(u32 *)at; - // chars_count = (header & 0x0000FFFF) >> 0; - // args_count = (header & 0x7FFF0000) >> 16; - // internal_overflow = (header & 0xF0000000) >> 31; - // at += 4; - // } + // FIXME: Remove this + TempArena scratch = BeginScratchNoConflict(); + u8 *at = start; + { + for (u32 print_idx = 0; print_idx < prints_count; ++print_idx) + { + u32 chars_count = 0; + u32 args_count = 0; + b32 internal_overflow = 0; + { + u32 header = *(u32 *)at; + chars_count = (header & 0x0000FFFF) >> 0; + args_count = (header & 0x7FFF0000) >> 16; + internal_overflow = (header & 0xF0000000) >> 31; + at += 4; + } - // String fmt = Zi; - // { - // fmt.len = chars_count; - // fmt.text = at; - // at += chars_count; - // } + String fmt = Zi; + { + fmt.len = chars_count; + fmt.text = at; + at += chars_count; + } - // FmtArgArray args = Zi; - // args.count = args_count; - // { - // if (args_count > 0) - // { - // args.args = PushStructs(scratch.arena, FmtArg, args_count); - // for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx) - // { - // G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at); - // at += 1; + FmtArgArray args = Zi; + args.count = args_count; + { + if (args_count > 0) + { + args.args = PushStructs(scratch.arena, FmtArg, args_count); + for (u32 arg_idx = 0; arg_idx < args_count; ++arg_idx) + { + G_FmtArgKind gpu_kind = (G_FmtArgKind)(*at); + at += 1; - // FmtArg *dst = &args.args[arg_idx]; - // switch (gpu_kind) - // { - // // Translate unsigned integer args - // case G_FmtArgKind_Uint: - // { - // u32 gpu_value = *(u32 *)at; - // *dst = FmtUint(gpu_value); - // at += 4; - // } break; - // case G_FmtArgKind_Uint2: - // { - // Vec2U32 gpu_value = *(Vec2U32 *)at; - // *dst = FmtUint2(gpu_value); - // at += 8; - // } break; - // case G_FmtArgKind_Uint3: - // { - // Vec3U32 gpu_value = *(Vec3U32 *)at; - // *dst = FmtUint3(gpu_value); - // at += 12; - // } break; - // case G_FmtArgKind_Uint4: - // { - // Vec4U32 gpu_value = *(Vec4U32 *)at; - // *dst = FmtUint4(gpu_value); - // at += 16; - // } break; + FmtArg *dst = &args.args[arg_idx]; + switch (gpu_kind) + { + // Translate unsigned integer args + case G_FmtArgKind_Uint: + { + u32 gpu_value = *(u32 *)at; + *dst = FmtUint(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Uint2: + { + Vec2U32 gpu_value = *(Vec2U32 *)at; + *dst = FmtUint2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Uint3: + { + Vec3U32 gpu_value = *(Vec3U32 *)at; + *dst = FmtUint3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Uint4: + { + Vec4U32 gpu_value = *(Vec4U32 *)at; + *dst = FmtUint4(gpu_value); + at += 16; + } break; - // // Translate signed integer args - // case G_FmtArgKind_Sint: - // { - // i32 gpu_value = *(i32 *)at; - // *dst = FmtSint(gpu_value); - // at += 4; - // } break; - // case G_FmtArgKind_Sint2: - // { - // Vec2I32 gpu_value = *(Vec2I32 *)at; - // *dst = FmtSint2(gpu_value); - // at += 8; - // } break; - // case G_FmtArgKind_Sint3: - // { - // Vec3I32 gpu_value = *(Vec3I32 *)at; - // *dst = FmtSint3(gpu_value); - // at += 12; - // } break; - // case G_FmtArgKind_Sint4: - // { - // Vec4I32 gpu_value = *(Vec4I32 *)at; - // *dst = FmtSint4(gpu_value); - // at += 16; - // } break; + // Translate signed integer args + case G_FmtArgKind_Sint: + { + i32 gpu_value = *(i32 *)at; + *dst = FmtSint(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Sint2: + { + Vec2I32 gpu_value = *(Vec2I32 *)at; + *dst = FmtSint2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Sint3: + { + Vec3I32 gpu_value = *(Vec3I32 *)at; + *dst = FmtSint3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Sint4: + { + Vec4I32 gpu_value = *(Vec4I32 *)at; + *dst = FmtSint4(gpu_value); + at += 16; + } break; - // // Translate float args - // case G_FmtArgKind_Float: - // { - // f32 gpu_value = *(f32 *)at; - // *dst = FmtFloat(gpu_value); - // at += 4; - // } break; - // case G_FmtArgKind_Float2: - // { - // Vec2 gpu_value = *(Vec2 *)at; - // *dst = FmtFloat2(gpu_value); - // at += 8; - // } break; - // case G_FmtArgKind_Float3: - // { - // Vec3 gpu_value = *(Vec3 *)at; - // *dst = FmtFloat3(gpu_value); - // at += 12; - // } break; - // case G_FmtArgKind_Float4: - // { - // Vec4 gpu_value = *(Vec4 *)at; - // *dst = FmtFloat4(gpu_value); - // at += 16; - // } break; - // } - // dst->p = 16; - // } - // } - // } + // Translate float args + case G_FmtArgKind_Float: + { + f32 gpu_value = *(f32 *)at; + *dst = FmtFloat(gpu_value); + at += 4; + } break; + case G_FmtArgKind_Float2: + { + Vec2 gpu_value = *(Vec2 *)at; + *dst = FmtFloat2(gpu_value); + at += 8; + } break; + case G_FmtArgKind_Float3: + { + Vec3 gpu_value = *(Vec3 *)at; + *dst = FmtFloat3(gpu_value); + at += 12; + } break; + case G_FmtArgKind_Float4: + { + Vec4 gpu_value = *(Vec4 *)at; + *dst = FmtFloat4(gpu_value); + at += 16; + } break; + } + dst->p = 16; + } + } + } - // String final_str = Zi; - // if (internal_overflow) - // { - // final_str = Lit("[Shader PrintF is too large]"); - // } - // else - // { - // final_str = FormatString(scratch.arena, fmt, args); - // } - // LogDebug(final_str); + String final_str = Zi; + if (internal_overflow) + { + final_str = Lit("[Shader PrintF is too large]"); + } + else + { + final_str = FormatString(scratch.arena, fmt, args); + } + LogDebug(final_str); - // at = (u8 *)AlignU64((u64)at, 4); - // } - // } - // EndScratch(scratch); - // } - // } - // } - // } + at = (u8 *)AlignU64((u64)at, 4); + } + } + EndScratch(scratch); + } + } + } + } } //////////////////////////////////////////////////////////// diff --git a/src/gpu/gpu_dx12/gpu_dx12_core.h b/src/gpu/gpu_dx12/gpu_dx12_core.h index 4c4a2871..72d7d6fe 100644 --- a/src/gpu/gpu_dx12/gpu_dx12_core.h +++ b/src/gpu/gpu_dx12/gpu_dx12_core.h @@ -72,7 +72,7 @@ Struct(G_D12_Resource) u64 uid; // D3D12 resource - D3D12_RESOURCE_DESC d3d_desc; + D3D12_RESOURCE_DESC1 d3d_desc; ID3D12Resource *d3d_resource; D3D12_GPU_VIRTUAL_ADDRESS buffer_gpu_address; void *mapped; @@ -86,6 +86,7 @@ Struct(G_D12_Resource) G_Format texture_format; Vec3I32 texture_dims; i32 texture_mips; + D3D12_BARRIER_LAYOUT cmdlist_texture_layouts[G_D12_MaxMips]; // Sampler info G_SamplerDesc sampler_desc; @@ -261,7 +262,7 @@ Struct(G_D12_RawCommandList) u64 commit_fence_target; ID3D12CommandAllocator *d3d_ca; - ID3D12GraphicsCommandList *d3d_cl; + ID3D12GraphicsCommandList7 *d3d_cl; // Direct queue command lists keep a constant list of CPU-only descriptors G_D12_Descriptor *rtv_descriptors[G_MaxRenderTargets]; @@ -323,7 +324,10 @@ Struct(G_D12_Cmd) struct { + G_MemoryBarrierDesc desc; + // Post-batch data + b32 is_end_of_batch; u64 batch_gen; } barrier; @@ -474,7 +478,7 @@ Struct(G_D12_Ctx) // Device IDXGIFactory6 *factory; IDXGIAdapter3 *adapter; - ID3D12Device1 *device; + ID3D12Device10 *device; // Release-queue Mutex pending_releases_mutex; @@ -505,7 +509,10 @@ G_D12_Resource *G_D12_ResourceFromHandle(G_ResourceHandle handle); G_D12_Swapchain *G_D12_SwapchainFromHandle(G_SwapchainHandle handle); DXGI_FORMAT G_D12_DxgiFormatFromGpuFormat(G_Format format); -b32 G_D12_IsSimultaneous(G_D12_Resource *resource); +D3D12_BARRIER_SYNC G_D12_BarrierSyncFromStages(G_Stage stages); +D3D12_BARRIER_ACCESS G_D12_BarrierAccessFromAccesses(G_Access accesses); +D3D12_BARRIER_LAYOUT G_D12_BarrierLayoutFromLayout(G_Layout layout); +String G_D12_NameFromBarrierLayout(D3D12_BARRIER_LAYOUT layout); void G_D12_InitRtv(G_D12_Resource *resource, D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle, i32 mip); diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat new file mode 100644 index 00000000..9cfa41c0 --- /dev/null +++ b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/D3D12Core.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55430c370d4f012ef7b2e7854fd194ed8abb2c94a537835be12bd38f9ff80e67 +size 1662796 diff --git a/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat new file mode 100644 index 00000000..222b2b8b --- /dev/null +++ b/src/gpu/gpu_dx12/gpu_dx12_res/AgilitySDK/1.618.5/d3d12SDKLayers.dat @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd64750d758873691414f705c2fdff08ffd25437f77198d3ee00c9040f48856a +size 1775662 diff --git a/src/platform/platform.h b/src/platform/platform.h index 6c341e3e..4b8da801 100644 --- a/src/platform/platform.h +++ b/src/platform/platform.h @@ -26,6 +26,19 @@ Struct(PLT_FileMap) b32 valid; }; +//////////////////////////////////////////////////////////// +//~ Compression types + +Enum(PLT_CompressionLevel) +{ + PLT_CompressionLevel_0, // Fastest + PLT_CompressionLevel_1, + PLT_CompressionLevel_2, + PLT_CompressionLevel_3, + + PLT_CompressionLevel_COUNT +}; + //////////////////////////////////////////////////////////// //~ Message box types @@ -74,6 +87,12 @@ PLT_FileMap PLT_OpenFileMap(PLT_File file); void PLT_CloseFileMap(PLT_FileMap map); String PLT_GetFileMapData(PLT_FileMap map); +//////////////////////////////////////////////////////////// +//~ @hoodkecl Compression + +String PLT_Compress(Arena *arena, String data, PLT_CompressionLevel level); +String PLT_Decompress(Arena *arena, String data, PLT_CompressionLevel level); + //////////////////////////////////////////////////////////// //~ @hookdecl Utils diff --git a/src/platform/platform_win32/platform_win32.c b/src/platform/platform_win32/platform_win32.c index ada3d4ea..2a499bfa 100644 --- a/src/platform/platform_win32/platform_win32.c +++ b/src/platform/platform_win32/platform_win32.c @@ -12,6 +12,19 @@ void PLT_Bootstrap(void) DispatchWave(Lit("Win32 timer sync"), 1, PLT_W32_SyncTimerForever, 0); } +DWORD PLT_W32_CompressionAlgorithmFromLevel(PLT_CompressionLevel level) +{ + // Win32 compression algorithms from fastest -> slowest + PERSIST Readonly DWORD algos[] = { + COMPRESS_ALGORITHM_XPRESS, + COMPRESS_ALGORITHM_XPRESS_HUFF, + COMPRESS_ALGORITHM_MSZIP, + COMPRESS_ALGORITHM_LZMS, + }; + i32 algo_idx = ClampI32(level, 0, countof(algos)); + return algos[algo_idx]; +} + //////////////////////////////////////////////////////////// //~ Time @@ -258,50 +271,41 @@ void PLT_CloseFile(PLT_File file) String PLT_ReadFile(Arena *arena, PLT_File file) { - i64 size = 0; - GetFileSizeEx((HANDLE)file.handle, (PLARGE_INTEGER)&size); - - String result; - result.len = size; - if (size > 0) + String result = Zi; + HANDLE handle = (HANDLE)file.handle; + u32 chunk_size = Kibi(64); + result.text = ArenaNext(arena, u8); + for (;;) { - // ReadFile returns non-zero on success - // TODO: error checking - result.text = PushStructsNoZero(arena, u8, size); - ReadFile( - (HANDLE)file.handle, - result.text, - (DWORD)result.len, - 0, - 0 - ); + u8 *chunk = PushStructsNoZero(arena, u8, chunk_size); + DWORD chunk_bytes_read = 0; + ReadFile(handle, chunk, chunk_size, &chunk_bytes_read, 0); + result.len += chunk_bytes_read; + if (chunk_bytes_read < chunk_size) + { + PopStructsNoCopy(arena, u8, chunk_size - chunk_bytes_read); + break; + } } return result; } void PLT_WriteFile(PLT_File file, String data) { - // TODO: Check what the real data limit is and chunk sequentially based on - // that (rather than failing) - if (data.len >= 0x7FFF) + u32 chunk_size = Kibi(64); + u32 pos = 0; + while (pos < data.len) { - TempArena scratch = BeginScratchNoConflict(); - Panic(StringF( - scratch.arena, - "Tried to write too many bytes to disk (%F)", - FmtUint(data.len) - )); - EndScratch(scratch); + u32 part_size = MinU32(chunk_size, data.len - pos); + WriteFile( + (HANDLE)file.handle, + data.text + pos, + part_size, + 0, + 0 + ); + pos += part_size; } - - // WriteFile returns TRUE on success - WriteFile( - (HANDLE)file.handle, - data.text, - (DWORD)data.len, - 0, - 0 - ); } u64 PLT_GetFileSize(PLT_File file) @@ -412,6 +416,97 @@ String PLT_GetFileMapData(PLT_FileMap map) return map.mapped_memory; } +//////////////////////////////////////////////////////////// +//~ @hookimpl compression + +String PLT_Compress(Arena *arena, String data, PLT_CompressionLevel level) +{ + String result = Zi; + b32 ok = 1; + DWORD algo = PLT_W32_CompressionAlgorithmFromLevel(level); + + COMPRESSOR_HANDLE compressor = 0; + if (ok) + { + ok = CreateCompressor(algo, 0, &compressor); + } + + SIZE_T compressed_cap = data.len; + if (ok) + { + Compress(compressor, data.text, data.len, 0, 0, &compressed_cap); + } + + if (ok) + { + SIZE_T written_count = 0; + u8 *compressed = PushStructsNoZero(arena, u8, compressed_cap); + ok = Compress(compressor, data.text, data.len, compressed, compressed_cap, &written_count); + if (ok) + { + result.text = compressed; + result.len = written_count; + PopBytesNoCopy(arena, compressed_cap - written_count); + } + } + + if (compressor) + { + CloseCompressor(compressor); + } + + return result; +} + +String PLT_Decompress(Arena *arena, String data, PLT_CompressionLevel level) +{ + String result = Zi; + b32 ok = data.len > 0; + DWORD algo = PLT_W32_CompressionAlgorithmFromLevel(level); + + DECOMPRESSOR_HANDLE decompressor = 0; + if (ok) + { + ok = CreateDecompressor(algo, 0, &decompressor); + } + + SIZE_T out_cap = MaxI64(NextPow2U64(data.len * 8), Kibi(4)); + + while (ok) + { + u8 *out = PushStructsNoZero(arena, u8, out_cap); + + SIZE_T written_count = 0; + b32 decompress_ok = Decompress(decompressor, data.text, data.len, out, out_cap, &written_count); + if (decompress_ok) + { + result.text = out; + result.len = written_count; + PopBytesNoCopy(arena, out_cap - written_count); + break; + } + else + { + DWORD err = GetLastError(); + if (err == ERROR_INSUFFICIENT_BUFFER) + { + out_cap *= 2; + } + else + { + ok = 0; + } + } + } + + if (decompressor) + { + CloseDecompressor(decompressor); + } + + return result; +} + //////////////////////////////////////////////////////////// //~ @hookimpl Utils diff --git a/src/platform/platform_win32/platform_win32.h b/src/platform/platform_win32/platform_win32.h index f746f16d..33994a5d 100644 --- a/src/platform/platform_win32/platform_win32.h +++ b/src/platform/platform_win32/platform_win32.h @@ -29,6 +29,11 @@ Struct(PLT_W32_Ctx) extern PLT_W32_Ctx PLT_W32; +//////////////////////////////////////////////////////////// +//~ Helpers + +DWORD PLT_W32_CompressionAlgorithmFromLevel(PLT_CompressionLevel level); + //////////////////////////////////////////////////////////// //~ Time diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c index 85f1d950..d041882a 100644 --- a/src/pp/pp_vis/pp_vis_core.c +++ b/src/pp/pp_vis/pp_vis_core.c @@ -416,6 +416,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R8_Uint, tiles_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory, .name = Lit("Tiles") ); @@ -440,6 +441,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) ); @@ -452,6 +454,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) ); @@ -466,6 +469,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R16G16B16A16_Float, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Stains") ); @@ -477,6 +481,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R16G16B16A16_Float, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Dry stains") ); @@ -488,6 +493,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Float, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Drynesses") ); @@ -499,6 +505,7 @@ void V_TickForever(WaveLaneCtx *lane) gpu_perm, cl, G_Format_R32_Uint, cells_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, .name = Lit("Occluders cells") ); @@ -2494,9 +2501,9 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Push test emitter - if (frame->held_buttons[Button_F]) + // if (frame->held_buttons[Button_F]) // if (frame->held_buttons[Button_F] && !prev_frame->held_buttons[Button_F]) - // if (0) + if (0) { { V_Emitter emitter = Zi; @@ -2556,9 +2563,9 @@ void V_TickForever(WaveLaneCtx *lane) ////////////////////////////// //- Push test explosion - if (frame->held_buttons[Button_G]) + // if (frame->held_buttons[Button_G]) // if (frame->held_buttons[Button_G] && !prev_frame->held_buttons[Button_G]) - // if (0) + if (0) { // Fire { @@ -4784,6 +4791,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->screen_dims, + G_Layout_DirectQueue_RenderTarget, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) ); @@ -4797,6 +4805,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, G_DimsFromMip2D(G_Count2D(screen_target), 1), + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), .max_mips = 64 @@ -4812,6 +4821,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->screen_dims, + G_Layout_DirectQueue_RenderTarget, .flags = G_ResourceFlag_AllowRenderTarget, .name = StringF(frame->arena, "Albedo target [%F]", FmtSint(frame->tick)) ); @@ -4822,6 +4832,7 @@ void V_TickForever(WaveLaneCtx *lane) frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, frame->shade_dims, + G_Layout_DirectQueue_General, .flags = G_ResourceFlag_AllowShaderReadWrite, .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) ); @@ -4888,7 +4899,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(frame->cl, V_GpuConst_NoiseTex, G_BasicNoiseTexture()); // Sync - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); ////////////////////////////// //- Initialization pass @@ -4907,12 +4918,14 @@ void V_TickForever(WaveLaneCtx *lane) V.particle_seq = 0; } - // Prepare albedo RT + // Prepare RTs + G_DiscardRenderTarget(frame->cl, screen_target, 0); G_ClearRenderTarget(frame->cl, albedo_target, VEC4(0, 0, 0, 0), 0); - } - // Sync - G_Barrier(frame->cl); + // Sync + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_General); + G_DumbGlobalMemorySync(frame->cl); + } ////////////////////////////// //- Quads & emitters pass @@ -4932,7 +4945,10 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute(frame->cl, V_EmitParticlesCS, V_ThreadGroupSizeFromBufferSize(frame->emitters_count)); // Sync particles & occluders - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); + + // Transition albedo + G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_General); } ////////////////////////////// @@ -4943,7 +4959,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute(frame->cl, V_SimParticlesCS, V_ThreadGroupSizeFromBufferSize(V_ParticlesCap)); // Sync cells - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); } ////////////////////////////// @@ -4962,7 +4978,7 @@ void V_TickForever(WaveLaneCtx *lane) G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); // Sync screen tex - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); } ////////////////////////////// @@ -4984,7 +5000,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims)); - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); } //- Upsample passes @@ -4995,7 +5011,7 @@ void V_TickForever(WaveLaneCtx *lane) G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims)); - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); } } @@ -5005,13 +5021,15 @@ void V_TickForever(WaveLaneCtx *lane) { G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); } ////////////////////////////// //- Debug shapes pass { + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTarget); + G_Rasterize( frame->cl, V_DVertVS, V_DVertPS, @@ -5021,7 +5039,7 @@ void V_TickForever(WaveLaneCtx *lane) G_RasterMode_TriangleList ); - G_Barrier(frame->cl); + G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_General); } ////////////////////////////// diff --git a/src/sprite/sprite.c b/src/sprite/sprite.c index 29d67463..88e940bd 100644 --- a/src/sprite/sprite.c +++ b/src/sprite/sprite.c @@ -439,6 +439,7 @@ void SPR_TickAsync(WaveLaneCtx *lane, AsyncFrameLaneCtx *base_async_lane_frame) gpu_perm, cl, G_Format_R8G8B8A8_Unorm_Srgb, atlas->dims, + G_Layout_Simultaneous, .name = Lit("Sprite atlas") ); atlas->tex = G_PushTexture2DRef(gpu_perm, atlas->tex_res); diff --git a/src/ttf/ttf_dwrite/ttf_dwrite.h b/src/ttf/ttf_dwrite/ttf_dwrite.h index 08220b3a..3cb094d8 100644 --- a/src/ttf/ttf_dwrite/ttf_dwrite.h +++ b/src/ttf/ttf_dwrite/ttf_dwrite.h @@ -2,7 +2,6 @@ //~ Win32 libs #pragma comment(lib, "dwrite") -#pragma comment(lib, "gdi32") //////////////////////////////////////////////////////////// //~ DirectWrite types diff --git a/src/ui/ui_core.c b/src/ui/ui_core.c index c1b8678c..fe54311e 100644 --- a/src/ui/ui_core.c +++ b/src/ui/ui_core.c @@ -1703,6 +1703,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) frame->gpu_arena, frame->cl, G_Format_R16G16B16A16_Float, monitor_size, + G_Layout_DirectQueue_RenderTarget, .flags = G_ResourceFlag_AllowRenderTarget, .name = Lit("UI draw target") ); @@ -1738,7 +1739,7 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) G_SetConstant(frame->cl, UI_GpuConst_Params, params_ro); // Sync - G_Barrier(frame->cl); + G_DumbGlobalMemorySync(frame->cl); ////////////////////////////// //- Dispatch shaders @@ -1780,7 +1781,8 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) //- Backbuffer blit pass - G_Barrier(frame->cl); + G_DumbMemoryLayoutSync(frame->cl, draw_target, G_Layout_DirectQueue_General); + G_DumbMemoryLayoutSync(frame->cl, backbuffer, G_Layout_DirectQueue_RenderTarget); { G_Rasterize( @@ -1792,6 +1794,8 @@ void UI_EndFrame(UI_Frame *frame, i32 vsync) G_RasterMode_TriangleList ); } + + G_DumbMemoryLayoutSync(frame->cl, backbuffer, G_Layout_Common); } ////////////////////////////// diff --git a/src/window/window_win32/window_win32.c b/src/window/window_win32/window_win32.c index 7d36fbb9..b25c2537 100644 --- a/src/window/window_win32/window_win32.c +++ b/src/window/window_win32/window_win32.c @@ -1,10 +1,5 @@ WND_W32_Ctx WND_W32 = Zi; -//////////////////////////////////////////////////////////// -//~ Win32 libs - -#pragma comment(lib, "gdi32") - //////////////////////////////////////////////////////////// //~ @hookimpl Bootstrap diff --git a/tatus b/tatus deleted file mode 100644 index 1be2e11b..00000000 --- a/tatus +++ /dev/null @@ -1,926 +0,0 @@ -diff --git a/src/gpu/gpu_common.c b/src/gpu/gpu_common.c -index a9686d87..43835793 100644 ---- a/src/gpu/gpu_common.c -+++ b/src/gpu/gpu_common.c -@@ -25,7 +25,7 @@ void G_BootstrapCommon(void) - gpu_perm, cl, - G_Format_R8G8B8A8_Uint, - VEC2I32(8, 8), -- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present, -+ G_Layout_Simultaneous, - .flags = G_ResourceFlag_ZeroMemory - ); - G.blank_tex = G_PushTexture2DRef(gpu_perm, blank_tex); -@@ -44,7 +44,7 @@ void G_BootstrapCommon(void) - gpu_perm, cl, - G_Format_R16_Uint, - noise_dims, -- G_Layout_AnyQueue_ShaderRead_CopyRead_CopyWrite_Present -+ G_Layout_Simultaneous - ); - G_CopyCpuToTexture( - cl, -@@ -143,30 +143,54 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList -  - //- Mip -  --i32 G_DimsFromMip1D(i32 texture_dims, i32 mip) -+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip) - { -- mip = ClampI32(mip, 0, 31); -+ mip = ClampI32(mip, -31, 31); - i32 result = 0; -- result = MaxI32(result >> mip, 1); -+ if (mip >= 0) -+ { -+ result = MaxI32(result >> mip, 1); -+ } -+ else -+ { -+ result = MaxI32(result << -mip, 1); -+ } - return result; - } -  --Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip) -+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip) - { -- mip = ClampI32(mip, 0, 31); -+ mip = ClampI32(mip, -31, 31); - Vec2I32 result = Zi; -- result.x = MaxI32(texture_dims.x >> mip, 1); -- result.y = MaxI32(texture_dims.y >> mip, 1); -+ if (mip >= 0) -+ { -+ result.x = MaxI32(mip0_dims.x >> mip, 1); -+ result.y = MaxI32(mip0_dims.y >> mip, 1); -+ } -+ else -+ { -+ result.x = MaxI32(mip0_dims.x << -mip, 1); -+ result.y = MaxI32(mip0_dims.y << -mip, 1); -+ } - return result; - } -  --Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip) -+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip) - { -- mip = ClampI32(mip, 0, 31); -+ mip = ClampI32(mip, -31, 31); - Vec3I32 result = Zi; -- result.x = MaxI32(texture_dims.x >> mip, 1); -- result.y = MaxI32(texture_dims.y >> mip, 1); -- result.z = MaxI32(texture_dims.z >> mip, 1); -+ if (mip >= 0) -+ { -+ result.x = MaxI32(mip0_dims.x >> mip, 1); -+ result.y = MaxI32(mip0_dims.y >> mip, 1); -+ result.z = MaxI32(mip0_dims.z >> mip, 1); -+ } -+ else -+ { -+ result.x = MaxI32(mip0_dims.x << -mip, 1); -+ result.y = MaxI32(mip0_dims.y << -mip, 1); -+ result.z = MaxI32(mip0_dims.z << -mip, 1); -+ } - return result; - } -  -diff --git a/src/gpu/gpu_common.h b/src/gpu/gpu_common.h -index eb3ee6d2..03927040 100644 ---- a/src/gpu/gpu_common.h -+++ b/src/gpu/gpu_common.h -@@ -35,9 +35,9 @@ G_ResourceHandle G_PushBufferFromCpuCopy_(G_ArenaHandle gpu_arena, G_CommandList - G_PushBufferFromCpuCopy_((_arena), (_cl), (_src), (G_BufferDesc) { .size = (_src).len, __VA_ARGS__ }) -  - //- Mip --i32 G_DimsFromMip1D(i32 texture_dims, i32 mip); --Vec2I32 G_DimsFromMip2D(Vec2I32 texture_dims, i32 mip); --Vec3I32 G_DimsFromMip3D(Vec3I32 texture_dims, i32 mip); -+i32 G_DimsFromMip1D(i32 mip0_dims, i32 mip); -+Vec2I32 G_DimsFromMip2D(Vec2I32 mip0_dims, i32 mip); -+Vec3I32 G_DimsFromMip3D(Vec3I32 mip0_dims, i32 mip); -  - //- Viewport / scissor - Rng3 G_ViewportFromTexture(G_ResourceHandle texture); -diff --git a/src/gpu/gpu_core.h b/src/gpu/gpu_core.h -index 7e1b329a..bed18c93 100644 ---- a/src/gpu/gpu_core.h -+++ b/src/gpu/gpu_core.h -@@ -242,18 +242,16 @@ Enum(G_Access) - G_Access_IndexBuffer = (1 << 8), - G_Access_IndirectArgument = (1 << 9), -  -- G_Access_All = 0xFFFFFFFF -+ G_Access_All = 0xFFFFFFFF // Represents all accesses relevant to the specified sync stage - }; -  - Enum(G_Layout) - { - G_Layout_NoChange, -  -- // "Simultaneous" allows a resource to be used on any queue with any access -- // type, as long as there is only one writer at a time, and the writer is not -- // writing to any texels currently being read. -- // Resources cannot transition to/from this layout. They must be created -- // with it and are locked to it. -+ // Simultaneous layout allows a resource to be used on any queue with any -+ // access type (except depth-stencil). Resources cannot transition to/from -+ // this layout, they must be created with it. - G_Layout_Simultaneous, // D3D12_BARRIER_LAYOUT_COMMON + D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS -  - G_Layout_Undefined, // D3D12_BARRIER_LAYOUT_UNDEFINED -diff --git a/src/pp/pp_vis/pp_vis.lay b/src/pp/pp_vis/pp_vis.lay -index f72dc528..2d916376 100644 ---- a/src/pp/pp_vis/pp_vis.lay -+++ b/src/pp/pp_vis/pp_vis.lay -@@ -26,7 +26,7 @@ - @ComputeShader V_CompositeCS - @ComputeShader V_BloomDownCS - @ComputeShader V_BloomUpCS --@ComputeShader V_PostProcessCS -+@ComputeShader V_FinalizeCS - @VertexShader V_DVertVS - @PixelShader V_DVertPS -  -diff --git a/src/pp/pp_vis/pp_vis_core.c b/src/pp/pp_vis/pp_vis_core.c -index f2f5e6b5..338036ba 100644 ---- a/src/pp/pp_vis/pp_vis_core.c -+++ b/src/pp/pp_vis/pp_vis_core.c -@@ -416,7 +416,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R8_Uint, - tiles_dims, -- G_Layout_DirectQueue_ShaderRead, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory, - .name = Lit("Tiles") - ); -@@ -441,7 +441,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R32_Uint, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = StringF(perm, "Particle cells - layer %F", FmtSint(layer)) - ); -@@ -454,7 +454,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R32_Uint, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = StringF(perm, "Particle densities - layer %F", FmtSint(layer)) - ); -@@ -469,7 +469,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R16G16B16A16_Float, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Stains") - ); -@@ -481,7 +481,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R16G16B16A16_Float, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Dry stains") - ); -@@ -493,7 +493,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R32_Float, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Drynesses") - ); -@@ -505,7 +505,7 @@ void V_TickForever(WaveLaneCtx *lane) - gpu_perm, cl, - G_Format_R32_Uint, - cells_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_ZeroMemory | G_ResourceFlag_AllowShaderReadWrite, - .name = Lit("Occluders cells") - ); -@@ -614,6 +614,8 @@ void V_TickForever(WaveLaneCtx *lane) - frame->dt = SecondsFromNs(frame->dt_ns); - frame->rand = prev_frame->rand; -  -+ frame->should_tone_map = TweakBool("Tone mapping enabled", 1); -+ - if (P_IsEntKeyNil(V.player_key)) - { - TrueRand(StringFromStruct(&V.player_key)); -@@ -4918,18 +4920,17 @@ void V_TickForever(WaveLaneCtx *lane) - frame->tile_descs[tile_kind] = tile_desc; - } - } -+ - // Upload tiles - if (frame->tiles_dirty) - { - // LogDebugF("Uploading tiles to gpu"); -- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_CopyWrite); - G_CopyCpuToTexture( - frame->cl, - gpu_tiles_res, VEC3I32(0, 0, 0), - local_world->tiles, VEC3I32(tiles_dims.x, tiles_dims.y, 1), - RNG3I32(VEC3I32(0, 0, 0), VEC3I32(tiles_dims.x, tiles_dims.y, 1)) - ); -- G_DumbMemoryLayoutSync(frame->cl, gpu_tiles_res, G_Layout_DirectQueue_ShaderRead); - } -  - // Screen texture -@@ -4937,7 +4938,7 @@ void V_TickForever(WaveLaneCtx *lane) - frame->gpu_arena, frame->cl, - G_Format_R16G16B16A16_Float, - frame->screen_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, - .name = StringF(frame->arena, "Screen target [%F]", FmtSint(frame->tick)) - ); -@@ -4951,11 +4952,10 @@ void V_TickForever(WaveLaneCtx *lane) - frame->gpu_arena, frame->cl, - G_Format_R16G16B16A16_Float, - G_DimsFromMip2D(G_Count2D(screen_target), 1), -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_AllowShaderReadWrite | G_ResourceFlag_AllowRenderTarget, - .name = StringF(frame->arena, "Bloom target [%F]", FmtSint(frame->tick)), -- // .max_mips = 4 -- .max_mips = 8 -+ .max_mips = 64 - ); - for (i32 mip_idx = 0; mip_idx < G_CountMips(bloom_target); ++mip_idx) - { -@@ -4979,7 +4979,7 @@ void V_TickForever(WaveLaneCtx *lane) - frame->gpu_arena, frame->cl, - G_Format_R16G16B16A16_Float, - frame->shade_dims, -- G_Layout_DirectQueue_ShaderReadWrite, -+ G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite, - .flags = G_ResourceFlag_AllowShaderReadWrite, - .name = StringF(frame->arena, "Shade target [%F]", FmtSint(frame->tick)) - ); -@@ -5091,6 +5091,9 @@ void V_TickForever(WaveLaneCtx *lane) -  - // Sync particles & occluders - G_DumbGlobalMemorySync(frame->cl); -+ -+ // Transition albedo -+ G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite); - } -  - ////////////////////////////// -@@ -5113,83 +5116,63 @@ void V_TickForever(WaveLaneCtx *lane) - G_Compute(frame->cl, V_ShadeCS, V_ThreadGroupSizeFromTexSize(frame->shade_dims)); - } -  -- ////////////////////////////// -- //- Transition G-buffers to readonly -- -- { -- G_DumbMemoryLayoutSync(frame->cl, albedo_target, G_Layout_DirectQueue_ShaderRead); -- G_DumbMemoryLayoutSync(frame->cl, shade_target, G_Layout_DirectQueue_ShaderRead); -- } -- - ////////////////////////////// - //- Composite pass -  - { - G_Compute(frame->cl, V_CompositeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); -  -- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); -+ // Sync screen tex -+ G_DumbGlobalMemorySync(frame->cl); - } -  - ////////////////////////////// - //- Bloom passes -  - { -- i32 mips_count = G_CountMips(bloom_target); -+ i32 mips_count = G_CountMips(bloom_target) + 1; -+ G_SetConstant(frame->cl, V_GpuConst_MipsCount, mips_count); -+ -+ // NOTE: Because bloom mip chain starts at half screen size, mip_idx 0 -+ // actually represents the screen texture, while mip_idx - 1 represents -+ // the first mip index in the bloom mip chain -  - //- Downsample + blur passes -- for (i32 mip_idx = 0; mip_idx < mips_count; ++mip_idx) -+ for (i32 mip_idx = 1; mip_idx < mips_count; ++mip_idx) - { -- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); -- if (mip_idx == 0) -- { -- // Init bloom pyramid from screen target on first pass (prefilter) -- gpu_flags |= V_GpuFlag_InitBloom; -- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); -- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->screen_ro); -- } -- else -- { -- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx - 1, mip_idx - 1)); -- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx - 1]); -- } -- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]); -- { -- G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(dims)); -- } -- gpu_flags &= ~V_GpuFlag_InitBloom; -- G_SetConstant(frame->cl, V_GpuConst_Flags, gpu_flags); -+ Vec2I32 down_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); -+ -+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); -+ G_Compute(frame->cl, V_BloomDownCS, V_ThreadGroupSizeFromTexSize(down_dims)); -+ -+ G_DumbGlobalMemorySync(frame->cl); - } -  - //- Upsample passes - for (i32 mip_idx = mips_count - 2; mip_idx >= 0; --mip_idx) - { -- Vec2I32 dims = G_DimsFromMip2D(G_Count2D(bloom_target), mip_idx); -- -- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderReadWrite, .mips = RNGI32(mip_idx, mip_idx)); -- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(mip_idx + 1, mip_idx + 1)); -+ Vec2I32 up_dims = G_DimsFromMip2D(G_Count2D(screen_target), mip_idx); -  -- G_SetConstant(frame->cl, V_GpuConst_BloomRead, frame->bloom_mips_ro[mip_idx + 1]); -- G_SetConstant(frame->cl, V_GpuConst_BloomWrite, frame->bloom_mips_rw[mip_idx]); -+ G_SetConstant(frame->cl, V_GpuConst_MipIdx, mip_idx); -+ G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(up_dims)); -  -- G_Compute(frame->cl, V_BloomUpCS, V_ThreadGroupSizeFromTexSize(dims)); -- } -+ G_DumbGlobalMemorySync(frame->cl); -+ } - } -  - ////////////////////////////// -- //- Post process pass -+ //- Finalization pass -  - { -- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderReadWrite); -- G_DumbMemoryLayoutSync(frame->cl, bloom_target, G_Layout_DirectQueue_ShaderRead, .mips = RNGI32(0, 0)); -- G_Compute(frame->cl, V_PostProcessCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); -+ G_Compute(frame->cl, V_FinalizeCS, V_ThreadGroupSizeFromTexSize(frame->screen_dims)); - } -  - ////////////////////////////// - //- Debug shapes pass -  -- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite); -- - { -+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_RenderTargetWrite); -+ - G_Rasterize( - frame->cl, - V_DVertVS, V_DVertPS, -@@ -5198,12 +5181,13 @@ void V_TickForever(WaveLaneCtx *lane) - screen_viewport, screen_scissor, - G_RasterMode_TriangleList - ); -+ -+ G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead_ShaderReadWrite_CopyRead_CopyWrite); - } -  - ////////////////////////////// - //- Finalize screen target -  -- G_DumbMemoryLayoutSync(frame->cl, screen_target, G_Layout_DirectQueue_ShaderRead); - { - Rng2 uv = Zi; - uv.p0 = Vec2FromVec(screen_viewport.p0); -diff --git a/src/pp/pp_vis/pp_vis_gpu.g b/src/pp/pp_vis/pp_vis_gpu.g -index f8a254de..c0a9e47d 100644 ---- a/src/pp/pp_vis/pp_vis_gpu.g -+++ b/src/pp/pp_vis/pp_vis_gpu.g -@@ -53,13 +53,6 @@ Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density) - return result; - } -  --// ACES approximation by Krzysztof Narkowicz --// https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ --Vec3 V_ToneMap(Vec3 v) --{ -- return saturate((v * (2.51f * v + 0.03f)) / (v * (2.43f * v + 0.59f) + 0.14f)); --} -- - //////////////////////////////////////////////////////////// - //~ Prepare frame -  -@@ -142,11 +135,11 @@ ComputeShader2D(V_PrepareCellsCS, 8, 8) - } - else if (over_stain.a > 0) - { -- Vec4 stain = dry_stains[cell_pos]; - Vec4 dry_stain = max(dry_stains[cell_pos], 0); -+ Vec4 stain = dry_stain; -  -- stain = BlendPremul(over_stain, stain); - dry_stain = BlendPremul(over_dry_stain, dry_stain); -+ stain = BlendPremul(over_stain, stain); -  - stains[cell_pos] = stain; - dry_stains[cell_pos] = dry_stain; -@@ -483,7 +476,7 @@ ComputeShader(V_SimParticlesCS, 64) - particle.prev_occluder = occluder; - } -  -- if (!AnyBit(desc.flags, V_ParticleFlag_NoPruneWhenStill) && dot(particle.velocity, particle.velocity) < 0.0001) -+ if (dot(particle.velocity, particle.velocity) < (desc.prune_speed_threshold * desc.prune_speed_threshold)) - { - prune = 1; - } -@@ -723,7 +716,6 @@ ComputeShader2D(V_CompositeCS, 8, 8) - Vec4 ground_particle_color = 0; - Vec4 air_particle_color = 0; -  -- - for (V_ParticleLayer layer = (V_ParticleLayer)0; layer < V_ParticleLayer_COUNT; layer += (V_ParticleLayer)1) - { - RWTexture2D cells = G_Dereference(frame.particle_cells[layer]); -@@ -752,9 +744,9 @@ ComputeShader2D(V_CompositeCS, 8, 8) - // Darken wall particles / stains - if (tile == P_TileKind_Wall) - { -- ground_particle_color *= 0.25; -- air_particle_color *= 0.25; -- stain_color *= 0.25; -+ ground_particle_color *= 0.5; -+ air_particle_color *= 0.5; -+ stain_color *= 0.5; - } -  - ////////////////////////////// -@@ -972,57 +964,73 @@ ComputeShader2D(V_CompositeCS, 8, 8) - //////////////////////////////////////////////////////////// - //~ Bloom -  -+////////////////////////////// -+//- Downsample -+ - ComputeShader2D(V_BloomDownCS, 8, 8) - { -+ i32 mips_count = V_GpuConst_MipsCount; -+ i32 mip_idx = V_GpuConst_MipIdx; -+ - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; -- Texture2D bloom_up = G_Dereference(V_GpuConst_BloomRead); -- RWTexture2D bloom_down = G_Dereference(V_GpuConst_BloomWrite); - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); -+ RWTexture2D bloom_down = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); -+ -+ Texture2D bloom_up; -+ b32 is_first_pass = mip_idx == 1; -+ if (is_first_pass) -+ { -+ bloom_up = G_Dereference(frame.screen_ro); -+ } -+ else -+ { -+ bloom_up = G_Dereference(frame.bloom_mips_ro[mip_idx - 2]); -+ } -  -- Vec2 up_dims = countof(bloom_up); - Vec2 down_dims = countof(bloom_down); -  - Vec2 bloom_pos = SV_DispatchThreadID + 0.5; - Vec2 bloom_uv = bloom_pos / down_dims; - Vec2 off_uv = 0.5 / down_dims; -- b32 is_first_pass = !!(V_GpuConst_Flags & V_GpuFlag_InitBloom); -  -- Struct(SampleDesc) { Vec2 uv; f32 weight; }; -- SampleDesc samples[] = { -- { bloom_uv + Vec2(0, 0), 0.5 }, -- { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 }, -- { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 }, -- { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 }, -- { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 }, -- }; -+ f32 threshold = 0.25; -+ f32 knee = 0.75; -  - Vec4 result = 0; -- for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx) - { -- SampleDesc desc = samples[sample_idx]; -- Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0); -- -- f32 knee_weight = 1; -- if (is_first_pass) -+ Struct(SampleDesc) { Vec2 uv; f32 weight; }; -+ SampleDesc samples[] = { -+ { bloom_uv + Vec2(0, 0), 0.5 }, -+ { bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0.125 }, -+ { bloom_uv + Vec2(off_uv.x, -off_uv.y), 0.125 }, -+ { bloom_uv + Vec2(off_uv.x, off_uv.y), 0.125 }, -+ { bloom_uv + Vec2(-off_uv.x, off_uv.y), 0.125 }, -+ }; -+ for (u32 sample_idx = 0; sample_idx < countof(samples); ++sample_idx) - { -- f32 luminance = LuminanceFromColor(src); -- f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance -- f32 bright = max(luminance, (max_rgb - 1.0) * 0.5); -- if (bright > 0) -- { -- f32 threshold = 1.0; -- f32 knee = 0.5; -- f32 over_threshold = max(bright - threshold, 0.0); -- f32 ramp = saturate(over_threshold / knee); -- knee_weight = (over_threshold * ramp * ramp) / bright; -- } -- else -+ SampleDesc desc = samples[sample_idx]; -+ Vec4 src = bloom_up.SampleLevel(sampler, desc.uv, 0); -+ -+ f32 knee_weight = 1; -+ if (is_first_pass) - { -- knee_weight = 0; -+ f32 luminance = LuminanceFromColor(src); -+ f32 max_rgb = max(max(src.r, src.g), src.b); // So that we can get bloom on colors with high rgb, not just high luminance -+ f32 bright = max(luminance, (max_rgb - 1.0) * 0.5); -+ if (bright > 0) -+ { -+ f32 over_threshold = max(bright - threshold, 0.0); -+ f32 ramp = saturate(over_threshold / knee); -+ knee_weight = (over_threshold * ramp * ramp) / bright; -+ } -+ else -+ { -+ knee_weight = 0; -+ } - } -- } -  -- result += src * desc.weight * knee_weight; -+ result += src * desc.weight * knee_weight; -+ } - } -  - if (IsInside(bloom_pos, down_dims)) -@@ -1031,52 +1039,77 @@ ComputeShader2D(V_BloomDownCS, 8, 8) - } - } -  -+////////////////////////////// -+//- Upsample -+ - ComputeShader2D(V_BloomUpCS, 8, 8) - { -+ i32 mips_count = V_GpuConst_MipsCount; -+ i32 mip_idx = V_GpuConst_MipIdx; -+ - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; -- Texture2D bloom_down = G_Dereference(V_GpuConst_BloomRead); -- RWTexture2D bloom_up = G_Dereference(V_GpuConst_BloomWrite); - SamplerState sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); -+ Texture2D bloom_down = G_Dereference(frame.bloom_mips_ro[mip_idx]); -+ -+ b32 is_last_pass = mip_idx == 0; -+ RWTexture2D bloom_up; -+ if (is_last_pass) -+ { -+ bloom_up = G_Dereference(frame.screen_rw); -+ } -+ else -+ { -+ bloom_up = G_Dereference(frame.bloom_mips_rw[mip_idx - 1]); -+ } -  -- Vec2 up_dims = countof(bloom_up); - Vec2 down_dims = countof(bloom_down); -+ Vec2 up_dims = countof(bloom_up); -  - Vec2 bloom_pos = SV_DispatchThreadID + 0.5; - Vec2 bloom_uv = bloom_pos / up_dims; -- Vec2 off_uv = 1 / up_dims; -+ Vec2 off_uv0 = 1 / down_dims; -+ Vec2 off_uv1 = off_uv0 * 2; -  - Vec4 result = 0; - { - // Center -- result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 4; -- // Edges -+ result += bloom_down.SampleLevel(sampler, bloom_uv, 0) * 9.0f / 41.0f; -+ -+ // Outer Edges - result += ( -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv.y), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, 0), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv.y), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, 0), 0) -- ) * 2; -- // Corners -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, -off_uv1.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, 0), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(0, off_uv1.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, 0), 0) -+ ) * 3.0f / 41.0f; -+ -+ // Inner corners -+ result += ( -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, -off_uv0.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, -off_uv0.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv0.x, off_uv0.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv0.x, off_uv0.y), 0) -+ ) * 4.0f / 41.0f; -+ -+ // Outer corners - result += ( -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, -off_uv.y), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, -off_uv.y), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv.x, off_uv.y), 0) + -- bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv.x, off_uv.y), 0) -- ); -- // Normalize -- result /= 16; -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, -off_uv1.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, -off_uv1.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(off_uv1.x, off_uv1.y), 0) + -+ bloom_down.SampleLevel(sampler, bloom_uv + Vec2(-off_uv1.x, off_uv1.y), 0) -+ ) * 1.0f / 41.0f; - } -  - if (IsInside(bloom_pos, up_dims)) - { -- bloom_up[bloom_pos] += result; -+ bloom_up[bloom_pos] += result * 0.75; - } - } -  - //////////////////////////////////////////////////////////// --//~ Post process -+//~ Finalize -  --ComputeShader2D(V_PostProcessCS, 8, 8) -+ComputeShader2D(V_FinalizeCS, 8, 8) - { - V_SharedFrame frame = G_Dereference(V_GpuConst_Frame)[0]; - SamplerState bilinear_sampler = G_Dereference(frame.basic_samplers[G_BasicSamplerKind_BilinearClamp]); -@@ -1084,42 +1117,21 @@ ComputeShader2D(V_PostProcessCS, 8, 8) - RWTexture2D screen_tex = G_Dereference(frame.screen_rw); -  - Vec2 screen_pos = SV_DispatchThreadID + 0.5; -- Vec2 screen_uv = screen_pos / frame.screen_dims; - b32 is_in_screen = IsInside(screen_pos, frame.screen_dims); -- -- ////////////////////////////// -- //- Original -- -- Vec4 original = 0; - if (is_in_screen) - { -- original = screen_tex[screen_pos]; -- original.rgb *= original.a; -- } -+ Vec4 result = screen_tex[screen_pos]; -  -+ //- Tone map -+ if (frame.should_tone_map) -+ { -+ // ACES approximation by Krzysztof Narkowicz -+ // https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/ -+ result.rgb = saturate((result.rgb * (2.51f * result.rgb + 0.03f)) / (result.rgb * (2.43f * result.rgb + 0.59f) + 0.14f)); -+ } -  -- ////////////////////////////// -- //- Bloom -- -- Vec4 bloom = 0; -- if (is_in_screen) -- { -- bloom = bloom_tex.SampleLevel(bilinear_sampler, screen_uv, 0); -- // bloom.rgb *= bloom.a; -- } -- -- ////////////////////////////// -- //- Compose -- -- Vec4 result = Vec4(0, 0, 0, 1); -- result = BlendPremul(original, result); -- result += bloom; -- // result.rgb = V_ToneMap(result); -+ result = Unpremul(result); -  -- result = Unpremul(result); -- -- if (is_in_screen) -- { - screen_tex[screen_pos] = result; - } - } -diff --git a/src/pp/pp_vis/pp_vis_gpu.gh b/src/pp/pp_vis/pp_vis_gpu.gh -index a47a2335..f176f2f8 100644 ---- a/src/pp/pp_vis/pp_vis_gpu.gh -+++ b/src/pp/pp_vis/pp_vis_gpu.gh -@@ -46,7 +46,6 @@ Struct(V_DVertPSOutput) -  - f32 V_RandFromPos(Vec3 pos); - Vec4 V_ColorFromParticle(V_ParticleDesc desc, u32 particle_idx, u32 density); --Vec3 V_ToneMap(Vec3 v); -  - //////////////////////////////////////////////////////////// - //~ Shaders -@@ -73,8 +72,8 @@ ComputeShader2D(V_CompositeCS, 8, 8); - ComputeShader2D(V_BloomDownCS, 8, 8); - ComputeShader2D(V_BloomUpCS, 8, 8); -  --//- Post process --ComputeShader2D(V_PostProcessCS, 8, 8); -+//- Finalize -+ComputeShader2D(V_FinalizeCS, 8, 8); -  - //- Debug shapes - VertexShader(V_DVertVS, V_DVertPSInput); -diff --git a/src/pp/pp_vis/pp_vis_shared.cg b/src/pp/pp_vis/pp_vis_shared.cg -index 2419a6f2..72f6ae8d 100644 ---- a/src/pp/pp_vis/pp_vis_shared.cg -+++ b/src/pp/pp_vis/pp_vis_shared.cg -@@ -11,37 +11,42 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind) - V_ParticleDesc result; - { - PERSIST Readonly V_ParticleFlag flags[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) flags, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) flags, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly V_ParticleLayer layers[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) layer, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) layer, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly f32 stain_rates[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) stain_rate, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) stain_rate, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly f32 pen_rates[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) pen_rate, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) pen_rate, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly f32 lifetimes[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) lifetime, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) lifetime, -+ V_ParticlesXList(X) -+ #undef X -+ }; -+ PERSIST Readonly f32 prune_speed_thresholds[V_ParticleKind_COUNT] = { -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) prune_speed_threshold, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly Vec4 base_colors[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) base_color, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) base_color, - V_ParticlesXList(X) - #undef X - }; - PERSIST Readonly Vec4 dry_factor[V_ParticleKind_COUNT] = { -- #define X(name, flags, layer, stain_rate, pen_rate, lifetime, base_color, dry_factor) dry_factor, -+ #define X(name, flags, layer, stain_rate, pen_rate, lifetime, prune_speed_threshold, base_color, dry_factor) dry_factor, - V_ParticlesXList(X) - #undef X - }; -@@ -51,6 +56,7 @@ V_ParticleDesc V_DescFromParticleKind(V_ParticleKind kind) - result.stain_rate = stain_rates[kind]; - result.pen_rate = pen_rates[kind]; - result.lifetime = lifetimes[kind]; -+ result.prune_speed_threshold = prune_speed_thresholds[kind]; - result.base_color = LinearFromSrgb(base_colors[kind]); - result.dry_factor = LinearFromSrgb(dry_factor[kind]); - } -diff --git a/src/pp/pp_vis/pp_vis_shared.cgh b/src/pp/pp_vis/pp_vis_shared.cgh -index 16ca6419..71d88ea5 100644 ---- a/src/pp/pp_vis/pp_vis_shared.cgh -+++ b/src/pp/pp_vis/pp_vis_shared.cgh -@@ -9,14 +9,13 @@ - Enum(V_GpuFlag) - { - V_GpuFlag_None = 0, -- V_GpuFlag_InitBloom = (1 << 0), - }; -  - G_DeclConstant(V_GpuFlag, V_GpuConst_Flags, 0); - G_DeclConstant(G_StructuredBufferRef, V_GpuConst_Frame, 1); - G_DeclConstant(G_Texture3DRef, V_GpuConst_NoiseTex, 2); --G_DeclConstant(G_Texture2DRef, V_GpuConst_BloomRead, 3); --G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4); -+G_DeclConstant(i32, V_GpuConst_MipsCount, 3); -+G_DeclConstant(i32, V_GpuConst_MipIdx, 4); -  - //////////////////////////////////////////////////////////// - //~ Particle types -@@ -29,7 +28,6 @@ G_DeclConstant(G_RWTexture2DRef, V_GpuConst_BloomWrite, 4); - Enum(V_ParticleFlag) - { - V_ParticleFlag_None = 0, -- V_ParticleFlag_NoPruneWhenStill = (1 << 0), - V_ParticleFlag_StainWhenPruned = (1 << 1), - V_ParticleFlag_NoReflect = (1 << 2), - V_ParticleFlag_OnlyCollideWithWalls = (1 << 3), -@@ -53,6 +51,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Ground, \ - /* Stain rate, pen chance */ 30, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(0, 0, 0, 0), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -64,8 +63,9 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Ground, \ - /* Stain rate, pen chance */ 100, 0.25, \ - /* Lifetime */ Inf, \ -- /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.05), \ -- /* Dry color factor */ CompVec4(0.5, 0.5, 0.5, 1) \ -+ /* Prune speed threshold */ 0.5, \ -+ /* Base color */ CompVec4(0.6, 0.1, 0.1, 0.05), \ -+ /* Dry color factor */ CompVec4(0.4, 0.4, 0.4, 1) \ - ) \ - X( \ - /* Name */ BloodDebris, \ -@@ -73,6 +73,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Mid, \ - /* Stain rate, pen chance */ 30, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(0.5, 0.1, 0.1, 0.8), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -82,6 +83,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Mid, \ - /* Stain rate, pen chance */ 0, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(0.4, 0.3, 0.2, 1), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -91,6 +93,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Mid, \ - /* Stain rate, pen chance */ 0, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.1, \ - /* Base color */ CompVec4(2, 0.5, 0, 1), \ - /* Dry color factor */ CompVec4(0.2, 0.1, 0.0, 1) \ - ) \ -@@ -102,6 +105,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Mid, \ - /* Stain rate, pen chance */ 0, 0, \ - /* Lifetime */ 0.075, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(0.8, 0.6, 0.2, 0.25), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -111,6 +115,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Air, \ - /* Stain rate, pen chance */ 0, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(0.25, 0.25, 0.25, 0.75), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -122,6 +127,7 @@ Enum(V_ParticleLayer) - /* Layer */ V_ParticleLayer_Mid, \ - /* Stain rate, pen chance */ 0, 0, \ - /* Lifetime */ Inf, \ -+ /* Prune speed threshold */ 0.01, \ - /* Base color */ CompVec4(1, 1, 0, 1), \ - /* Dry color factor */ CompVec4(1, 1, 1, 1) \ - ) \ -@@ -168,6 +174,7 @@ Struct(V_ParticleDesc) - f32 stain_rate; - f32 pen_rate; - f32 lifetime; -+ f32 prune_speed_threshold; - Vec4 base_color; - Vec4 dry_factor; - }; -@@ -264,6 +271,7 @@ Struct(V_SharedFrame) -  - b32 tiles_dirty; - b32 should_clear_particles; -+ b32 should_tone_map; -  - b32 is_looking; - b32 is_moving;