/* ========================== *
 * WASAPI backend for audio playback
 *
 * Based on mmozeiko's WASAPI example
 * https://gist.github.com/mmozeiko/5a5b168e61aff4c1eaec0381da62808f#file-win32_wasapi-h
 * ========================== */

#include "playback.h"
#include "arena.h"
#include "sys.h"
#include "mixer.h"
#include "atomic.h"
#include "app.h"

#define COBJMACROS
#define WIN32_LEAN_AND_MEAN
#define UNICODE
#include <Windows.h>
#include <initguid.h>
#include <objbase.h>
#include <uuids.h>
#include <avrt.h>
#include <Audioclient.h>
#include <mmdeviceapi.h>

#pragma comment(lib, "avrt")

DEFINE_GUID(CLSID_MMDeviceEnumerator, 0xbcde0395, 0xe52f, 0x467c, 0x8e, 0x3d, 0xc4, 0x57, 0x92, 0x91, 0x69, 0x2e);
DEFINE_GUID(IID_IMMDeviceEnumerator,  0xa95664d2, 0x9614, 0x4f35, 0xa7, 0x46, 0xde, 0x8d, 0xb6, 0x36, 0x17, 0xe6);
DEFINE_GUID(IID_IAudioClient,         0x1cb9ad4c, 0xdbfa, 0x4c32, 0xb1, 0x78, 0xc2, 0xf5, 0x68, 0xa7, 0x03, 0xb2);
DEFINE_GUID(IID_IAudioClient3,        0x7ed4ee07, 0x8e67, 0x4cd4, 0x8c, 0x1a, 0x2b, 0x7a, 0x59, 0x87, 0xad, 0x42);
DEFINE_GUID(IID_IAudioRenderClient,   0xf294acfc, 0x3146, 0x4483, 0xa7, 0xbf, 0xad, 0xdc, 0xa7, 0xc2, 0x60, 0xe2);

struct wasapi_buffer {
    u32 frames_count;
    u8 *frames;
};

GLOBAL struct {
    struct atomic_i32 shutdown;
    IAudioClient *client;
    HANDLE event;
    IAudioRenderClient *playback;
    WAVEFORMATEX *buffer_format;
    u32 buffer_frames;
} G = ZI, DEBUG_ALIAS(G, G_playback_wasapi);

/* ========================== *
 * Startup
 * ========================== */

INTERNAL void wasapi_initialize(void);
INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(playback_shutdown);
INTERNAL SYS_JOB_DEF(playback_job, _);

struct playback_startup_receipt playback_startup(struct mixer_startup_receipt *mixer_sr)
{
    (UNUSED)mixer_sr;

    wasapi_initialize();
    sys_run(1, playback_job, NULL, SYS_PRIORITY_HIGH, NULL);
    app_register_exit_callback(&playback_shutdown);

    return (struct playback_startup_receipt) { 0 };
}

INTERNAL APP_EXIT_CALLBACK_FUNC_DEF(playback_shutdown)
{
    __prof;
    atomic_i32_fetch_set(&G.shutdown, true);
}

/* ========================== *
 * Wasapi initialization
 * ========================== */

INTERNAL void wasapi_initialize(void)
{
    u64 sample_rate = PLAYBACK_SAMPLE_RATE;
    u64 channel_count = 2;
    u32 channel_mask = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;

    /* Create enumerator to get audio device */
    IMMDeviceEnumerator *enumerator;
    CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_ALL, &IID_IMMDeviceEnumerator, (LPVOID *)&enumerator);

    /* Get default playback device */
    IMMDevice *device;
    IMMDeviceEnumerator_GetDefaultAudioEndpoint(enumerator, eRender, eConsole, &device);
    IMMDeviceEnumerator_Release(enumerator);

    /* Create audio client for device */
    IMMDevice_Activate(device, &IID_IAudioClient, CLSCTX_ALL, NULL, (LPVOID *)&G.client);
    IMMDevice_Release(device);

    WAVEFORMATEXTENSIBLE format_ex = {
        .Format = {
            .wFormatTag = WAVE_FORMAT_EXTENSIBLE,
            .nChannels = (WORD)channel_count,
            .nSamplesPerSec = (WORD)sample_rate,
            .nAvgBytesPerSec = (DWORD)(sample_rate * channel_count * sizeof(f32)),
            .nBlockAlign = (WORD)(channel_count * sizeof(f32)),
            .wBitsPerSample = (WORD)(8 * sizeof(f32)),
            .cbSize = sizeof(format_ex) - sizeof(format_ex.Format),
        },
        .Samples.wValidBitsPerSample = 8 * sizeof(f32),
        .dwChannelMask = channel_mask,
        .SubFormat = MEDIASUBTYPE_IEEE_FLOAT,
    };
    WAVEFORMATEX *wfx = &format_ex.Format;

#if 0
    b32 client_initialized = FALSE;
    IAudioClient3 *client3;
    if (SUCCEEDED(IAudioClient_QueryInterface(G.client, &IID_IAudioClient3, (LPVOID *)&client3))) {
        /* From Martins: Minimum buffer size will typically be 480 samples (10msec @ 48khz)
         * but it can be 128 samples (2.66 msec @ 48khz) if driver is properly installed
         * see bullet-point instructions here: https://learn.microsoft.com/en-us/windows-hardware/drivers/audio/low-latency-audio#measurement-tools
         */
        UINT32 default_period_samples, fundamental_period_samples, min_period_samples, max_period_samples;
        IAudioClient3_GetSharedModeEnginePeriod(client3, wfx, &default_period_samples, &fundamental_period_samples, &min_period_samples, &max_period_samples);

        const DWORD flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK;
        if (SUCCEEDED(IAudioClient3_InitializeSharedAudioStream(client3, flags, min_period_samples, wfx, NULL))) {
            client_initialized = TRUE;
        }

        IAudioClient3_Release(client3);
    }
#else
    b32 client_initialized = false;
#endif

    if (!client_initialized) {
        /* Get duration for shared-mode streams, this will typically be 480 samples (10msec @ 48khz) */
        REFERENCE_TIME duration;
        IAudioClient_GetDevicePeriod(G.client, &duration, NULL);

        /* Initialize audio playback
         *
         * NOTE:
         * Passing AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM will tell WASAPI to
         * always convert to native mixing format. This may introduce latency
         * but allows for any input format.
         */
        const DWORD flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY;
        IAudioClient_Initialize(G.client, AUDCLNT_SHAREMODE_SHARED, flags, duration, 0, wfx, NULL);
    }

    IAudioClient_GetMixFormat(G.client, &G.buffer_format);

    /* Set up event handler to wait on */
    G.event = CreateEventW(NULL, FALSE, FALSE, NULL);
    IAudioClient_SetEventHandle(G.client, G.event);

    /* Get playback client */
    IAudioClient_GetService(G.client, &IID_IAudioRenderClient, (LPVOID *)&G.playback);

    /* Start the playback */
    IAudioClient_Start(G.client);

    /* Get audio buffer size in samples */
    IAudioClient_GetBufferSize(G.client, &G.buffer_frames);
}

/* ========================== *
 * Playback thread update
 * ========================== */

INTERNAL struct wasapi_buffer wasapi_update_begin(void)
{
    __prof;
    struct wasapi_buffer wspbuf = ZI;

    /* Wait */
    {
        __profscope(wasapi_wait_on_event);
        WaitForSingleObject(G.event, INFINITE);
    }

    /* Get padding frames */
    u32 padding_frames;
    IAudioClient_GetCurrentPadding(G.client, &padding_frames);

    /* Get output buffer from WASAPI */
    wspbuf.frames_count = 0;
    if (padding_frames <= G.buffer_frames) {
        wspbuf.frames_count = G.buffer_frames - padding_frames;
    }
    IAudioRenderClient_GetBuffer(G.playback, wspbuf.frames_count, &wspbuf.frames);

    return wspbuf;
}

INTERNAL void wasapi_update_end(struct wasapi_buffer *wspbuf, struct mixed_pcm_f32 src)
{
    __prof;
    u32 frames_in_source = src.count / 2;
    u32 frames_in_output = wspbuf->frames_count;

    u32 flags = 0;
    if (frames_in_source == frames_in_output) {
        /* Copy bytes to output */
        u32 bytes_per_sample = G.buffer_format->nBlockAlign / G.buffer_format->nChannels;
        u32 write_size = frames_in_source * 2 * bytes_per_sample;
        MEMCPY(wspbuf->frames, src.samples, write_size);
    } else {
        /* Submit silence if not enough samples */
        flags = AUDCLNT_BUFFERFLAGS_SILENT;

        /* This shouldn't occur, mixer should be generating samples equivilent
         * to value returned from `playback_update_begin`. */
        ASSERT(false);
    }

#if !AUDIO_ENABLED
    flags = AUDCLNT_BUFFERFLAGS_SILENT;
#endif

    /* Submit output buffer to WASAPI */
    IAudioRenderClient_ReleaseBuffer(G.playback, frames_in_source, flags);
    __profframe("Audio");
}

/* ========================== *
 * Playback thread entry
 * ========================== */

INTERNAL SYS_JOB_DEF(playback_job, _)
{
    (UNUSED)_;
    struct arena_temp scratch = scratch_begin_no_conflict();

    /* https://learn.microsoft.com/en-us/windows/win32/procthread/multimedia-class-scheduler-service#registry-settings */
    DWORD task = 0;
    HANDLE mmc_handle = AvSetMmThreadCharacteristicsW(L"Pro Audio", &task);
    ASSERT(mmc_handle);
    (UNUSED)mmc_handle;

    /* FIXME: If playback fails at any point and mixer stops advancing, we
     * need to halt mixer to prevent memory leak when sounds are played. */
    while (!atomic_i32_fetch(&G.shutdown)) {
        struct arena_temp temp = arena_temp_begin(scratch.arena);
        struct wasapi_buffer wspbuf = wasapi_update_begin();
        struct mixed_pcm_f32 pcm = mixer_update(temp.arena, wspbuf.frames_count);
        wasapi_update_end(&wspbuf, pcm);
        arena_temp_end(temp);

    }

    scratch_end(scratch);
}