diff --git a/src/base/base_arena.h b/src/base/base_arena.h index 6a0a501f..d7a97a9f 100644 --- a/src/base/base_arena.h +++ b/src/base/base_arena.h @@ -1,7 +1,7 @@ //////////////////////////////////////////////////////////// //~ Arena types -#define ArenaHeaderSize CachelineSize +#define ArenaHeaderSize 256 #define ArenaBlockSize 16384 Struct(Arena) diff --git a/src/base/base_wave.c b/src/base/base_wave.c index 343239c1..7093f230 100644 --- a/src/base/base_wave.c +++ b/src/base/base_wave.c @@ -106,3 +106,12 @@ void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n) { lane->default_spin_count = n; } + +//////////////////////////////////////////////////////////// +//~ Wave task helpers + +i32 WaveLaneIdxFromTaskIdx(WaveLaneCtx *lane, u64 task_idx) +{ + WaveCtx *wave = lane->wave; + return task_idx % wave->lanes_count; +} diff --git a/src/base/base_wave.h b/src/base/base_wave.h index 78ce96f9..5957fadb 100644 --- a/src/base/base_wave.h +++ b/src/base/base_wave.h @@ -43,6 +43,11 @@ void WaveSyncBroadcastEx_(WaveLaneCtx *lane, u32 broadcast_lane_idx, void *broad void SetWaveLaneDefaultSpin(WaveLaneCtx *lane, u64 n); +//////////////////////////////////////////////////////////// +//~ Wave task helpers + +i32 WaveLaneIdxFromTaskIdx(WaveLaneCtx *lane, u64 task_idx); + //////////////////////////////////////////////////////////// //~ @hookdecl Dispatch diff --git a/src/meta/meta.c b/src/meta/meta.c index 43d1bbe6..edd043de 100644 --- a/src/meta/meta.c +++ b/src/meta/meta.c @@ -398,6 +398,8 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) String c_out_file = F_GetFull(perm, StringF(perm, "%F_gen_c.c", FmtString(cmdline.leaf_layer_name))); String gpu_out_file = F_GetFull(perm, StringF(perm, "%F_gen_gpu.hlsl", FmtString(cmdline.leaf_layer_name))); + /* TODO: Dispatch OS Cmds asynchronously rather than synchronously waiting on each lane */ + ////////////////////////////// //- Generate final C file @@ -704,7 +706,7 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) if (!ret) ret = gpugen.errors.count > 0; ////////////////////////////// - //- Compile C + //- Compile C & Shaders Struct(CComp) { @@ -714,28 +716,6 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) }; CComp ccomp = ZI; - if (lane->idx == 0 && !ret) - { - ccomp.obj_file = StringF(perm, "%F_gen_c.obj", FmtString(cmdline.leaf_layer_name)); - String cmd = StringF(perm, - "cmd /c cl.exe /c %F -Fo:%F %F %F %F %F", - FmtString(c_out_file), - FmtString(ccomp.obj_file), - FmtString(StringFromList(perm, cp.flags_msvc, Lit(" "))), - FmtString(StringFromList(perm, cp.compiler_only_flags_msvc, Lit(" "))), - FmtString(StringFromList(perm, cp.warnings_msvc, Lit(" "))), - FmtString(StringFromList(perm, cp.defs, Lit(" ")))); - OS_CommandResult cmd_result = OS_RunCommand(perm, cmd); - String cmd_output = TrimWhitespace(cmd_result.output); - ccomp.output = cmd_output; - ccomp.return_code = cmd_result.code; - } - WaveSyncBroadcast(lane, 0, &ccomp); - if (!ret) ret = ccomp.return_code; - - ////////////////////////////// - //- Compile shaders - Struct(GpuComp) { String output; @@ -744,40 +724,64 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) u32 gpucomps_count = gpugen.shader_entries_count; GpuComp *gpucomps = PushStructs(perm, GpuComp, gpucomps_count); - if (lane->idx == 0 && !ret) + if (!ret) { + /* Compile C */ + u64 ccomp_task_idx = 0; + if (lane->idx == WaveLaneIdxFromTaskIdx(lane, ccomp_task_idx)) + { + ccomp.obj_file = StringF(perm, "%F_gen_c.obj", FmtString(cmdline.leaf_layer_name)); + String cmd = StringF(perm, + "cl.exe /c %F -Fo:%F %F %F %F %F", + FmtString(c_out_file), + FmtString(ccomp.obj_file), + FmtString(StringFromList(perm, cp.flags_msvc, Lit(" "))), + FmtString(StringFromList(perm, cp.compiler_only_flags_msvc, Lit(" "))), + FmtString(StringFromList(perm, cp.warnings_msvc, Lit(" "))), + FmtString(StringFromList(perm, cp.defs, Lit(" ")))); + OS_CommandResult cmd_result = OS_RunCommand(perm, cmd); + String cmd_output = TrimWhitespace(cmd_result.output); + ccomp.output = cmd_output; + ccomp.return_code = cmd_result.code; + } + + /* Compile shaders */ u32 gpucomp_idx = 0; for (ShaderEntry *e = gpugen.first_shader_entry; e; e = e->next) { - GpuComp *gpucomp = &gpucomps[gpucomp_idx]; - String out_file = StringF(perm, "%F/%F", FmtString(shader_store_name), FmtString(e->name)); - String target = e->kind == ShaderEntryKind_VS ? Lit("vs_6_6") - : e->kind == ShaderEntryKind_PS ? Lit("ps_6_6") - : e->kind == ShaderEntryKind_CS ? Lit("cs_6_6") - : Lit("vs_6_6"); - String compile_cmd = StringF(perm, - "cmd /c dxc.exe -T %F -E %F -Fo %F %F %F %F", - FmtString(target), - FmtString(e->name), - FmtString(out_file), - FmtString(gpu_out_file), - FmtString(StringFromList(perm, cp.defs, Lit(" "))), - FmtString(StringFromList(perm, cp.flags_dxc, Lit(" ")))); - - OS_CommandResult cmd_result = OS_RunCommand(perm, compile_cmd); - - if (cmd_result.code == 0) + /* NOTE: Using gpucomp_idx + 1 as task index for parralelism w/ C compilation */ + u64 gpucomp_task_idx = gpucomp_idx + 1; + if (lane->idx == WaveLaneIdxFromTaskIdx(lane, gpucomp_task_idx)) { - // f64 elapsed = SecondsFromNs(cmd_result.elapsed_ns); - f64 elapsed = 0; - // PushStringToList(perm, &gpucomp.output, StringF(perm, "%F:%F %Fs", FmtString(F_GetFileName(gpu_out_file)), FmtString(e->name), FmtFloat(elapsed))); - gpucomp->output = cmd_result.output; - gpucomp->return_code = cmd_result.code; + GpuComp *gpucomp = &gpucomps[gpucomp_idx]; + String out_file = StringF(perm, "%F/%F", FmtString(shader_store_name), FmtString(e->name)); + String target = e->kind == ShaderEntryKind_VS ? Lit("vs_6_6") + : e->kind == ShaderEntryKind_PS ? Lit("ps_6_6") + : e->kind == ShaderEntryKind_CS ? Lit("cs_6_6") + : Lit("vs_6_6"); + String compile_cmd = StringF(perm, + "dxc.exe -T %F -E %F -Fo %F %F %F %F", + FmtString(target), + FmtString(e->name), + FmtString(out_file), + FmtString(gpu_out_file), + FmtString(StringFromList(perm, cp.defs, Lit(" "))), + FmtString(StringFromList(perm, cp.flags_dxc, Lit(" ")))); + + OS_CommandResult cmd_result = OS_RunCommand(perm, compile_cmd); + + if (cmd_result.code == 0) + { + gpucomp->output = cmd_result.output; + gpucomp->return_code = cmd_result.code; + } } ++gpucomp_idx; } } + WaveSyncBroadcast(lane, 0, &ccomp); WaveSyncBroadcast(lane, 0, &gpucomps); + if (!ret) ret = ccomp.return_code; for (u32 i = 0; i < gpucomps_count; ++i) { if (!ret) ret = gpucomps[i].return_code; @@ -974,38 +978,41 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) u32 rescomps_count = arcinfogen.arc_entries_count; ResComp *rescomps = PushStructs(perm, ResComp, rescomps_count); - if (lane->idx == 0 && !ret) + if (!ret) { if (IsPlatformWindows) { i32 rescomp_idx = 0; for (ArcInfoEntry *entry = arcinfogen.first_arc_entry; entry; entry = entry->next) { - ResComp *rescomp = &rescomps[rescomp_idx]; - - String arc_path = entry->out_path; - - /* Generate RC file */ - String rc_out_file = StringF(perm, "%F.rc", FmtString(entry->store_name)); + if (lane->idx == WaveLaneIdxFromTaskIdx(lane, rescomp_idx)) { - RandState rs = ZI; - StringList rc_out_lines = ZI; - String arc_file_cp = F_GetFullCrossPlatform(perm, arc_path); - String line = StringF(perm, "%F_%F RCDATA \"%F\"", FmtString(Lit(Stringize(W32_EmbeddedDataPrefix))), FmtHex(RandU64FromState(&rs)), FmtString(arc_file_cp)); - PushStringToList(perm, &rc_out_lines, line); - /* Write to file */ - String rc_out = StringFromList(perm, rc_out_lines, Lit("\n")); - F_ClearWrite(rc_out_file, rc_out); - } + ResComp *rescomp = &rescomps[rescomp_idx]; - /* Compile RC file */ - rescomp->obj_file = StringF(perm, "%F.res", FmtString(entry->store_name)); - { - String cmd = StringF(perm, "cmd /c rc.exe -nologo -fo %F %F", FmtString(rescomp->obj_file), FmtString(F_GetFull(perm, rc_out_file))); - OS_CommandResult cmd_result = OS_RunCommand(perm, cmd); - String cmd_output = TrimWhitespace(cmd_result.output); - rescomp->output = cmd_output; - rescomp->return_code = cmd_result.code; + String arc_path = entry->out_path; + + /* Generate RC file */ + String rc_out_file = StringF(perm, "%F.rc", FmtString(entry->store_name)); + { + RandState rs = ZI; + StringList rc_out_lines = ZI; + String arc_file_cp = F_GetFullCrossPlatform(perm, arc_path); + String line = StringF(perm, "%F_%F RCDATA \"%F\"", FmtString(Lit(Stringize(W32_EmbeddedDataPrefix))), FmtHex(RandU64FromState(&rs)), FmtString(arc_file_cp)); + PushStringToList(perm, &rc_out_lines, line); + /* Write to file */ + String rc_out = StringFromList(perm, rc_out_lines, Lit("\n")); + F_ClearWrite(rc_out_file, rc_out); + } + + /* Compile RC file */ + rescomp->obj_file = StringF(perm, "%F.res", FmtString(entry->store_name)); + { + String cmd = StringF(perm, "rc.exe -nologo -fo %F %F", FmtString(rescomp->obj_file), FmtString(F_GetFull(perm, rc_out_file))); + OS_CommandResult cmd_result = OS_RunCommand(perm, cmd); + String cmd_output = TrimWhitespace(cmd_result.output); + rescomp->output = cmd_output; + rescomp->return_code = cmd_result.code; + } } ++rescomp_idx; @@ -1043,7 +1050,6 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) OS_CloseFile(file); } - i64 link_elapsed_ns = 0; i64 start_ns = TimeNs(); String obj_files_str = ZI; @@ -1059,7 +1065,7 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) } String cmd = StringF(perm, - "cmd /c link.exe %F /OUT:%F %F %F", + "link.exe %F /OUT:%F %F %F", FmtString(obj_files_str), FmtString(exe_file), FmtString(StringFromList(perm, cp.flags_msvc, Lit(" "))), @@ -1067,7 +1073,7 @@ void BuildEntryPoint(WaveLaneCtx *lane, void *udata) OS_CommandResult result = OS_RunCommand(perm, cmd); link.output = TrimWhitespace(result.output); link.return_code = result.code; - link_elapsed_ns = TimeNs() - start_ns; + i64 link_elapsed_ns = TimeNs() - start_ns; // EchoLine(StringF(perm, ">>>>> Linked in %Fs", FmtFloat(SecondsFromNs(link_elapsed_ns)))); } WaveSyncBroadcast(lane, 0, &link);