const { useRef, useEffect, useState } = React; const baseURL = "" // points to whatever is serving this app (eg your -dev.modal.run for modal serve, or .modal.run for modal deploy) const getBaseURL = () => { // use current web app server domain to construct the url for the moshi app const currentURL = new URL(window.location.href); let hostname = currentURL.hostname; hostname = hostname.replace('-web', '-moshi-web'); const wsProtocol = currentURL.protocol === 'https:' ? 'wss:' : 'ws:'; return `${wsProtocol}//${hostname}/ws`; } const App = () => { // Mic Input const [recorder, setRecorder] = useState(null); // Opus recorder const [amplitude, setAmplitude] = useState(0); // Amplitude, captured from PCM analyzer // Audio playback const [audioContext] = useState(() => new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 48000 })); const sourceNodeRef = useRef(null); // Audio source node const scheduledEndTimeRef = useRef(0); // Scheduled end time for audio playback const decoderRef = useRef(null); // Decoder for converting opus to PCM // WebSocket const socketRef = useRef(null); // Ongoing websocket connection // UI State const [warmupComplete, setWarmupComplete] = useState(false); const [completedSentences, setCompletedSentences] = useState([]); const [pendingSentence, setPendingSentence] = useState(''); // Mic Input: start the Opus recorder const startRecording = async () => { // prompts user for permission to use microphone const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); const recorder = new Recorder({ encoderPath: "https://cdn.jsdelivr.net/npm/opus-recorder@latest/dist/encoderWorker.min.js", streamPages: true, encoderApplication: 2049, encoderFrameSize: 80, // milliseconds, equal to 1920 samples at 24000 Hz encoderSampleRate: 24000, // 24000 to match model's sample rate maxFramesPerPage: 1, numberOfChannels: 1, }); recorder.ondataavailable = async (arrayBuffer) => { if (socketRef.current) { if (socketRef.current.readyState !== WebSocket.OPEN) { console.log("Socket not open, dropping audio"); return; } await socketRef.current.send(arrayBuffer); } }; recorder.start().then(() => { console.log("Recording started"); setRecorder(recorder); }); // create a MediaRecorder object for capturing PCM (calculating amplitude) const analyzerContext = new (window.AudioContext || window.webkitAudioContext)(); const analyzer = analyzerContext.createAnalyser(); analyzer.fftSize = 256; const sourceNode = analyzerContext.createMediaStreamSource(stream); sourceNode.connect(analyzer); // Use a separate audio processing function instead of MediaRecorder const processAudio = () => { const dataArray = new Uint8Array(analyzer.frequencyBinCount); analyzer.getByteFrequencyData(dataArray); const average = dataArray.reduce((sum, value) => sum + value, 0) / dataArray.length; setAmplitude(average); requestAnimationFrame(processAudio); }; processAudio(); }; // Audio Playback: Prep decoder for converting opus to PCM for audio playback useEffect(() => { const initializeDecoder = async () => { const decoder = new window["ogg-opus-decoder"].OggOpusDecoder(); await decoder.ready; decoderRef.current = decoder; console.log("Ogg Opus decoder initialized"); }; initializeDecoder(); return () => { if (decoderRef.current) { decoderRef.current.free(); } }; }, []); // Audio Playback: schedule PCM audio chunks for seamless playback const scheduleAudioPlayback = (newAudioData) => { const sampleRate = audioContext.sampleRate; const numberOfChannels = 1; const nowTime = audioContext.currentTime; // Create a new buffer and source node for the incoming audio data const newBuffer = audioContext.createBuffer(numberOfChannels, newAudioData.length, sampleRate); newBuffer.copyToChannel(newAudioData, 0); const sourceNode = audioContext.createBufferSource(); sourceNode.buffer = newBuffer; sourceNode.connect(audioContext.destination); // Schedule the new audio to play immediately after any currently playing audio const startTime = Math.max(scheduledEndTimeRef.current, nowTime); sourceNode.start(startTime); // Update the scheduled end time so we know when to schedule the next piece of audio scheduledEndTimeRef.current = startTime + newBuffer.duration; if (sourceNodeRef.current && sourceNodeRef.current.buffer) { const currentEndTime = sourceNodeRef.current.startTime + sourceNodeRef.current.buffer.duration; if (currentEndTime <= nowTime) { sourceNodeRef.current.disconnect(); } } sourceNodeRef.current = sourceNode; }; // WebSocket: open websocket connection and start recording useEffect(() => { const endpoint = getBaseURL(); console.log("Connecting to", endpoint); const socket = new WebSocket(endpoint); socketRef.current = socket; socket.onopen = () => { console.log("WebSocket connection opened"); startRecording(); setWarmupComplete(true); }; socket.onmessage = async (event) => { // data is a blob, convert to array buffer const arrayBuffer = await event.data.arrayBuffer(); const view = new Uint8Array(arrayBuffer); const tag = view[0]; const payload = arrayBuffer.slice(1); if (tag === 1) { // audio data const { channelData, samplesDecoded, sampleRate } = await decoderRef.current.decode(new Uint8Array(payload)); if (samplesDecoded > 0) { scheduleAudioPlayback(channelData[0]); } } if (tag === 2) { // text data const decoder = new TextDecoder(); const text = decoder.decode(payload); setPendingSentence(prevPending => { const updatedPending = prevPending + text; if (updatedPending.endsWith('.') || updatedPending.endsWith('!') || updatedPending.endsWith('?')) { setCompletedSentences(prevCompleted => [...prevCompleted, updatedPending]); return ''; } return updatedPending; }); } }; socket.onclose = () => { console.log("WebSocket connection closed"); }; return () => { socket.close(); }; }, []); return (
); } const AudioControl = ({ recorder, amplitude }) => { const [muted, setMuted] = useState(true); const toggleMute = () => { if (!recorder) { return; } setMuted(!muted); recorder.setRecordingGain(muted ? 1 : 0); }; // unmute automatically once the recorder is ready useEffect(() => { if (recorder) { setMuted(false); recorder.setRecordingGain(1); } }, [recorder]); const amplitudePercent = amplitude / 255; const maxAmplitude = 0.3; // for scaling const minDiameter = 30; // minimum diameter of the circle in pixels const maxDiameter = 200; // increased maximum diameter to ensure overflow var diameter = minDiameter + (maxDiameter - minDiameter) * (amplitudePercent / maxAmplitude); if (muted) { diameter = 20; } return (
); }; const TextOutput = ({ warmupComplete, completedSentences, pendingSentence }) => { const containerRef = useRef(null); const allSentences = [...completedSentences, pendingSentence]; if (pendingSentence.length === 0 && allSentences.length > 1) { allSentences.pop(); } useEffect(() => { if (containerRef.current) { containerRef.current.scrollTop = containerRef.current.scrollHeight; } }, [completedSentences, pendingSentence]); return (
{warmupComplete ? ( allSentences.map((sentence, index) => (

{sentence}

)).reverse() ) : (

Warming up model...

)}
); }; const container = document.getElementById("react"); ReactDOM.createRoot(container).render();