// speech_utils.js
/**
 * @fileoverview Functions for accessing speech interface
 */

import { WebServices } from 'BMapsSrc/WebServices';

/**
 * Listen to the microphone, send to whisper, and call provided handleTranscription callback.
 * `timeout` is the number of milliseconds of silence it expects before transcribing the recording.
 * If `autoTranscribe` is true, it will automatically start listening again.
 * This is very specific to the needs of SpeechToText.jsx and ChatBot.jsx.
 * @param {{
 *  handleTranscription: () => any,
 *  timeout: number,
 *  autoTranscribe: boolean,
 * }}
 * @returns {Promise<MediaRecorder>}
*/
export async function captureAndTranscribe({
    handleTranscription, timeout=1000, autoTranscribe=false,
}) {
    const transcriber = await initTranscriber(handleTranscription);
    const listener = new MediaRecorder(await navigator.mediaDevices.getUserMedia({ audio: true }));
    startAudioCapture(transcriber, listener, autoTranscribe, timeout);
    return (listener);
}

/**
 * Send text to the command API to convert it to BMaps commands, if possible.
 * @param {string} words
 * @returns {object}
 */
export async function wordsToCommand(words, context) {
    if (!words || words.length === 0) return null;
    const commUrl = '/services/agents/command/';
    const commResponse = await WebServices.startWsRequestJson(commUrl, { prompt: words, context });
    const commResponseData = JSON.parse(commResponse);
    const commandObj = JSON.parse(commResponseData.response);
    return commandObj;
}

async function initTranscriber(handleTranscription, onerror) {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const transcriber = new MediaRecorder(stream);
    transcriber.onerror = onerror || ((error) => { console.error('MediaRecorder error: ', error); });
    transcriber.ondataavailable = async (evt) => handleTranscription(await sendWhisper(evt.data));
    return transcriber;
}

async function sendWhisper(audioBlob) {
    const file = new File([audioBlob], 'recording.wav', { type: 'wav' });
    const formData = new FormData();
    formData.append('audio', file);
    try {
        const url = '/services/agents/speech/';
        const response = await fetch(url, {
            method: 'POST',
            body: formData,
        });
        const responseData = await response.json();
        if (!responseData.segments || !responseData.response) {
            console.warn('Speech to text recognition error.');
            console.log(responseData);
            return null;
        }
        const noSpeechProb = responseData.segments.length > 0 ? responseData.segments[0]['no_speech_prob'] : 1;
        if (noSpeechProb > 0.4) {
            console.log(`No speech recognized, please try again.\n${responseData.response}\n${noSpeechProb.toFixed(3)}`);
            return null;
        }
        console.log(`Transcription (not speech prob ${noSpeechProb.toFixed(3)}): ${responseData.response}`);
        const words = responseData.response.replace('.', '');
        return words;
    } catch (sendError) {
        console.error('Error:', sendError);
        return null;
    }
}

function startAudioCapture(transcriber, listener, autoTranscribe=false, timeout=1000) {
    listener.start();

    const analyzer = createAudioAnalyzer(listener.stream);
    const bufferLength = analyzer.frequencyBinCount;
    const domainData = new Uint8Array(bufferLength);

    // detectSoundStep is a recursive loop called with window.requestanimationframe.
    // It calls itself for as long as the listener is recording.
    // Intention:
    // - Listen until the user is done speaking. ("done" speaking means `timeout` ms of silence)
    // - Stop the recording, causing speech data to be emitted for transcription
    // - Start recording again, if ongoing transcription is requested
    let timer = null;
    const detectSoundStep = () => {
        let soundDetected = false;

        // check if user is speaking... if so we will reset the timeout.
        analyzer.getByteFrequencyData(domainData);
        for (let i = 0; i < bufferLength; i++) {
            const value = domainData[i];
            if (value > 0) {
                soundDetected = true;
                break;
            }
        }

        if (soundDetected) {
            if (transcriber.state !== 'recording') {
                transcriber.start();
            }
            clearTimeout(timer);
            timer = setTimeout(() => {
                if (transcriber.state === 'recording') {
                    // mediaRecorder sends it's information to whisper
                    transcriber.stop();
                    if (!autoTranscribe) {
                        listener.stop();
                    }
                }
            }, timeout);
        }

        if (listener.state === 'recording') {
            window.requestAnimationFrame(detectSoundStep);
        }
    };
    // Start the Media recorder loop.
    // To end loop, call listener.stop(),
    // and requestAnimationFrome(detectSoundStep) won't trigger.
    window.requestAnimationFrame(detectSoundStep);
}

function createAudioAnalyzer(stream) {
    const audioContext = new AudioContext();
    const audioStreamSource = audioContext.createMediaStreamSource(stream);
    const analyzer = audioContext.createAnalyser();
    analyzer.minDecibels = -45;
    audioStreamSource.connect(analyzer);
    return analyzer;
}
