changed for prod

2024-11-19 19:58:28 +01:00 · 2024-11-19 19:58:28 +01:00 · 4f4f90d7ad
parent 7c7feff90e
commit 4f4f90d7ad
3 changed files with 622 additions and 581 deletions
--- a/src/index.ts
+++ b/src/index.ts
@ -1,146 +1,174 @@
-import express from 'express';
+import express from "express";
-import path from 'path';
+import path from "path";
-import { Request, Response } from 'express';
+import { Request, Response } from "express";
-import http from 'http';
+import http from "http";
-import { Server } from 'socket.io';
+import { Server } from "socket.io";
-import dotenv from 'dotenv';
+import dotenv from "dotenv";
-import { RealtimeClient, RealtimeUtils } from '@openai/realtime-api-beta';
+import { RealtimeClient, RealtimeUtils } from "@openai/realtime-api-beta";
-import { voiceEngineSocketUser } from './voiceEngine';
+import { voiceEngineSocketUser } from "./voiceEngine";
 dotenv.config();
 const app = express();
 const PORT = 50269;
-const publicFolder = path.join(__dirname, 'public');
+if (!process.env.PORT) {
  console.error("Please set the PORT environment variable");
  process.exit(1);
 }
 const PORT = parseInt(process.env.PORT, 10);
 const publicFolder = path.join(__dirname, "public");
 app.use(express.static(publicFolder));
-app.get('/', (req: Request, res: Response) => {
+app.get("/", (req: Request, res: Response) => {
-    res.sendFile(path.join(publicFolder, 'index.html'));
+  res.sendFile(path.join(publicFolder, "index.html"));
 });
 const server = http.createServer(app);
 const io = new Server(server);
 interface ConversationItem {
-    [key: string]: any;
+  [key: string]: any;
 }
-io.on('connection', (socket) => {
+io.on("connection", (socket) => {
-    console.log('A user connected');
+  console.log("A user connected");
-    voiceEngineSocketUser(socket);
+  voiceEngineSocketUser(socket);
-    let gptClient: null | RealtimeClient = null;
+  let gptClient: null | RealtimeClient = null;
-    socket.on('start', async (data) => {
+  socket.on("start", async (data) => {
-        gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
+    gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
-        (async () => {
+    (async () => {
-            try {
+      try {
-                await gptClient.connect();
+        await gptClient.connect();
-                console.log('Connected to OpenAI Realtime API');
+        console.log("Connected to OpenAI Realtime API");
-                socket.on('voice-data', async (audioBuffer) => {
+        socket.on("voice-data", async (audioBuffer) => {
-                    try {
+          try {
-                        console.log('Voice data received');
+            console.log("Voice data received");
-                        // Send user audio, must be Int16Array or ArrayBuffer
+            // Send user audio, must be Int16Array or ArrayBuffer
-                        // Default audio format is pcm16 with sample rate of 24,000 Hz
+            // Default audio format is pcm16 with sample rate of 24,000 Hz
-                        if (audioBuffer instanceof Float32Array) {
+            if (audioBuffer instanceof Float32Array) {
-                            console.log('Received audio data from the input worklet:');
+              console.log("Received audio data from the input worklet:");
-                            if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(audioBuffer));
+              if (gptClient)
-                        } else if (audioBuffer instanceof Buffer) {
+                gptClient.appendInputAudio(
-                            console.log('Received audio data as Buffer:');
+                  RealtimeUtils.floatTo16BitPCM(audioBuffer)
                );
            } else if (audioBuffer instanceof Buffer) {
              console.log("Received audio data as Buffer:");
-                            // Convert Buffer to ArrayBuffer
+              // Convert Buffer to ArrayBuffer
-                            const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
+              const arrayBuffer = audioBuffer.buffer.slice(
                audioBuffer.byteOffset,
                audioBuffer.byteOffset + audioBuffer.byteLength
              );
-                            // Convert ArrayBuffer to Int16Array
+              // Convert ArrayBuffer to Int16Array
-                            const float32Array = new Float32Array(arrayBuffer);
+              const float32Array = new Float32Array(arrayBuffer);
-                            if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(float32Array));
+              if (gptClient)
-                        } else {
+                gptClient.appendInputAudio(
-                            console.error('Invalid data type received in worklet');
+                  RealtimeUtils.floatTo16BitPCM(float32Array)
                );
            } else {
              console.error("Invalid data type received in worklet");
-                            // log the data type and return
+              // log the data type and return
-                            console.log('Data type:', typeof audioBuffer, audioBuffer);
+              console.log("Data type:", typeof audioBuffer, audioBuffer);
-                            return;
+              return;
                        }
                    } catch (error) {
                        console.error('Error with OpenAI Realtime API:', error);
                    }
                });
                gptClient.on('conversation.updated', (event: ConversationItem) => {
                    const { item, delta } = event;
                    if (item.content) {
                        socket.emit('openai-response', item.content);
                    }
                    console.log('Playing audio response...', delta);
                    if (delta && delta.audio) {
                        socket.emit('openai-audio', delta.audio);
                    }
                    console.log('Conversation updated:', event);
                });
                gptClient.on('conversation.item.completed', (event: ConversationItem) => {
                    const { item } = event;
                    console.log('Conversation item completed:', item);
                    if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) {
                        console.log('Playing audio response...');
                        //socket.emit('openai-audio', item.formatted.audio);
                    } else {
                        console.log('No audio content in this item.');
                    }
                });
            } catch (error) {
                console.error('Error connecting to OpenAI Realtime API:', error);
            }
-        })();
+          } catch (error) {
-
+            console.error("Error with OpenAI Realtime API:", error);
-        socket.on('disconnect', () => {
+          }
            console.log('A user disconnected');
            if (gptClient) gptClient.disconnect();
        });
-        socket.on('end', () => {
+        gptClient.on("conversation.updated", (event: ConversationItem) => {
-            console.log('A user ended the conversation');
+          const { item, delta } = event;
-            if (gptClient) gptClient.disconnect();
+          if (item.content) {
            socket.emit("openai-response", item.content);
          }
          console.log("Playing audio response...", delta);
          if (delta && delta.audio) {
            socket.emit("openai-audio", delta.audio);
          }
          console.log("Conversation updated:", event);
        });
-        gptClient.updateSession({
+        gptClient.on(
-            instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
+          "conversation.item.completed",
-Bitte spreche mit einer ruhigen Stimme.`,
+          (event: ConversationItem) => {
-        });
+            const { item } = event;
-        gptClient.updateSession({ voice: 'ballad' });
+            console.log("Conversation item completed:", item);
        gptClient.updateSession({
            turn_detection: { type: 'server_vad', threshold: 0.6, prefix_padding_ms: 300, silence_duration_ms: 500 },
            input_audio_transcription: { model: 'whisper-1' },
            input_audio_format: 'pcm16',
            output_audio_format: 'pcm16',
            max_response_output_tokens: 1500,
            modalities: ['audio', 'text'],
        });
-        gptClient.on('conversation.updated', (event: ConversationItem) => {
+            if (
-            const { item, delta } = event;
+              item.type === "message" &&
-            if (gptClient) {
+              item.role === "assistant" &&
-                const items = gptClient.conversation.getItems();
+              item.formatted &&
              item.formatted.audio
            ) {
              console.log("Playing audio response...");
              //socket.emit('openai-audio', item.formatted.audio);
            } else {
              console.log("No audio content in this item.");
            }
-            // Handle the updated conversation items
+          }
-        });
+        );
      } catch (error) {
        console.error("Error connecting to OpenAI Realtime API:", error);
      }
    })();
-        //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]);
+    socket.on("disconnect", () => {
-        gptClient.createResponse();
+      console.log("A user disconnected");
      if (gptClient) gptClient.disconnect();
    });
    socket.on("end", () => {
      console.log("A user ended the conversation");
      if (gptClient) gptClient.disconnect();
    });
    gptClient.updateSession({
      instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
 Bitte spreche mit einer ruhigen Stimme.`,
    });
    gptClient.updateSession({ voice: "ballad" });
    gptClient.updateSession({
      turn_detection: {
        type: "server_vad",
        threshold: 0.6,
        prefix_padding_ms: 300,
        silence_duration_ms: 500,
      },
      input_audio_transcription: { model: "whisper-1" },
      input_audio_format: "pcm16",
      output_audio_format: "pcm16",
      max_response_output_tokens: 1500,
      modalities: ["audio", "text"],
    });
    gptClient.on("conversation.updated", (event: ConversationItem) => {
      const { item, delta } = event;
      if (gptClient) {
        const items = gptClient.conversation.getItems();
      }
      // Handle the updated conversation items
    });
    //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]);
    gptClient.createResponse();
  });
 });
-server.listen(PORT, '127.0.0.1', () => {
+server.listen(PORT, "127.0.0.1", () => {
-    console.log(`Server läuft unter http://localhost:${PORT}`);
+  console.log(`Server läuft unter http://localhost:${PORT}`);
 });
--- a/src/public/index.html
+++ b/src/public/index.html
@ -1,276 +1,272 @@
 <!DOCTYPE html>
 <html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta
      name="viewport"
      content="width=device-width, initial-scale=1.0, user-scalable=no"
    />
    <!-- Theme color for Chrome, Firefox OS and Opera -->
    <meta name="theme-color" content="#fd9644" />
    <!-- Theme color for Windows Phone -->
    <meta name="msapplication-navbutton-color" content="#fd9644" />
    <!-- Theme color for iOS Safari -->
    <meta
      name="apple-mobile-web-app-status-bar-style"
      content="black-translucent"
    />
    <meta name="apple-mobile-web-app-capable" content="yes" />
-<head>
+    <title>Voice Call with a voice bot</title>
-  <meta charset="UTF-8">
+    <style>
-  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
+      body {
-  <!-- Theme color for Chrome, Firefox OS and Opera -->
+        display: flex;
-  <meta name="theme-color" content="#fd9644">
+        justify-content: center;
-  <!-- Theme color for Windows Phone -->
+        align-items: center;
-  <meta name="msapplication-navbutton-color" content="#fd9644">
+        height: 100vh;
-  <!-- Theme color for iOS Safari -->
+        margin: 0;
-  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
+        font-family: "Roboto", "Helvetica", "Arial", sans-serif;
  <meta name="apple-mobile-web-app-capable" content="yes">
  <title>Voice Call with a voice bot</title>
  <style>
    body {
      display: flex;
      justify-content: center;
      align-items: center;
      height: 100vh;
      margin: 0;
      font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
    }
    .center-container {
      text-align: center;
    }
    .material-button {
      font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
      display: inline-block;
      padding: 0 16px;
      height: 36px;
      font-size: 14px;
      font-weight: 500;
      line-height: 36px;
      border-radius: 4px;
      text-transform: uppercase;
      text-align: center;
      letter-spacing: 0.5px;
      overflow: hidden;
      will-change: box-shadow;
      transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
      border: none;
      cursor: pointer;
      color: #fff;
      background-color: #fd9644;
      box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
    }
    .material-button:hover {
      box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14), 0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
    }
    .material-button:active {
      box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14), 0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
    }
  </style>
 </head>
 <body>
  <div class="center-container">
    <h1>Voice Call with a Voice Bot</h1>
    <button id="startCallButton" class="material-button">Start Call</button>
    <p id="status">Status: Idle</p>
    <h3>Input: </h3>
    <p id="text-input">---</p>
    <h3>Output: </h3>
    <p id="text-output">---</p>
  </div>
  <script src="/socket.io/socket.io.js"></script>
  <script>
    const startCallButton = document.getElementById('startCallButton');
    const status = document.getElementById('status');
    const debugTextInput = document.getElementById('text-input');
    const debugTextOutput = document.getElementById('text-output');
    const socket = io('https://voice-test.ex.umbach.dev/'); // Connect to your server
    let localStream;
    let audioContext;
    let processor;
    let started = false;
    let audioQueue = [];
    startCallButton.addEventListener('click', async () => {
      if (started) {
        socket.emit('bot-end');
        processor.disconnect(audioContext.destination);
        localStream.getTracks().forEach(track => track.stop());
        localStream = null;
        audioContext.close();
        audioContext = null;
        processor = null;
        startCallButton.textContent = 'Start Call';
        status.textContent = 'Status: Call ended';
        started = false;
        return;
      }
-      started = true;
+      .center-container {
        text-align: center;
      }
-      startCallButton.textContent = 'End Call';
+      .material-button {
        font-family: "Roboto", "Helvetica", "Arial", sans-serif;
-      status.textContent = 'Status: Starting call...';
+        display: inline-block;
-      console.log('Starting call...');
+        padding: 0 16px;
        height: 36px;
        font-size: 14px;
        font-weight: 500;
        line-height: 36px;
        border-radius: 4px;
        text-transform: uppercase;
        text-align: center;
        letter-spacing: 0.5px;
        overflow: hidden;
        will-change: box-shadow;
        transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
        border: none;
        cursor: pointer;
        color: #fff;
        background-color: #fd9644;
        box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
          0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
      }
      .material-button:hover {
        box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
          0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
      }
      .material-button:active {
        box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
          0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
      }
    </style>
  </head>
-      // Get local audio stream
+  <body>
-      localStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+    <div class="center-container">
-      console.log('Local audio stream obtained:', localStream);
+      <h1>Voice Call with a Voice Bot</h1>
      <button id="startCallButton" class="material-button">Start Call</button>
      <p id="status">Status: Idle</p>
      <h3>Input:</h3>
      <p id="text-input">---</p>
      <h3>Output:</h3>
      <p id="text-output">---</p>
    </div>
-      socket.emit('bot-start');
+    <script src="/socket.io/socket.io.js"></script>
    <script>
      const startCallButton = document.getElementById("startCallButton");
      const status = document.getElementById("status");
      const debugTextInput = document.getElementById("text-input");
      const debugTextOutput = document.getElementById("text-output");
-      // Create AudioContext with sample rate of 24000 Hz
+      const socket = io(
-      audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
+        `${window.location.protocol}//${window.location.hostname}`
-      const source = audioContext.createMediaStreamSource(localStream);
+      ); // Connect to your server
-      processor = audioContext.createScriptProcessor(4096, 1, 1);
+      let localStream;
      let audioContext;
      let processor;
      let started = false;
-      // Process audio data
+      let audioQueue = [];
      processor.onaudioprocess = (event) => {
        const inputData = event.inputBuffer.getChannelData(0);
-        socket.emit('bot-voice-data', inputData); // Send as ArrayBuffer
+      startCallButton.addEventListener("click", async () => {
        if (started) {
          socket.emit("bot-end");
          processor.disconnect(audioContext.destination);
-        // echo audio locally
+          localStream.getTracks().forEach((track) => track.stop());
-        /*const outputData = event.outputBuffer.getChannelData(0);
+          localStream = null;
          audioContext.close();
          audioContext = null;
          processor = null;
          startCallButton.textContent = "Start Call";
          status.textContent = "Status: Call ended";
          started = false;
          return;
        }
        started = true;
        startCallButton.textContent = "End Call";
        status.textContent = "Status: Starting call...";
        console.log("Starting call...");
        // Get local audio stream
        localStream = await navigator.mediaDevices.getUserMedia({
          audio: true,
        });
        console.log("Local audio stream obtained:", localStream);
        socket.emit("bot-start");
        // Create AudioContext with sample rate of 24000 Hz
        audioContext = new (window.AudioContext || window.webkitAudioContext)({
          sampleRate: 24000,
        });
        const source = audioContext.createMediaStreamSource(localStream);
        processor = audioContext.createScriptProcessor(4096, 1, 1);
        // Process audio data
        processor.onaudioprocess = (event) => {
          const inputData = event.inputBuffer.getChannelData(0);
          socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
          // echo audio locally
          /*const outputData = event.outputBuffer.getChannelData(0);
        for (let sample = 0; sample < inputData.length; sample++) {
          outputData[sample] = inputData[sample];
        }*/
          let outputData = event.outputBuffer.getChannelData(0);
-        let outputData = event.outputBuffer.getChannelData(0);
+          if (audioQueue.length > 0) {
-
+            const chunk = audioQueue.splice(0, outputData.length);
-        if (audioQueue.length > 0) {
+            for (let i = 0; i < chunk.length; i++) {
-          const chunk = audioQueue.splice(0, outputData.length);
+              outputData[i] = chunk[i];
-          for (let i = 0; i < chunk.length; i++) {
+            }
-            outputData[i] = chunk[i];
+          } else {
            for (let i = 0; i < outputData.length; i++) {
              outputData[i] = 0;
            }
          }
-        } else {
+        };
-          for (let i = 0; i < outputData.length; i++) {
+
-            outputData[i] = 0;
+        source.connect(processor);
        processor.connect(audioContext.destination);
        status.textContent = "Status: Call started";
      });
      socket.on("openai-response", (data) => {
        console.log("OpenAI response received:", data);
        try {
          response.textContent = "Response: " + data[0].transcript;
        } catch (error) {}
      });
      socket.on("debug-text-input", (data) => {
        debugTextInput.textContent = data;
      });
      socket.on("debug-text-output", (data) => {
        debugTextOutput.textContent = data;
      });
      let dataSum = 0;
      let lastByte = undefined;
      let currentAudioID = "";
      socket.on("openai-audio-start", (randomUUID) => {
        console.log("OpenAI audio start:", randomUUID);
        currentAudioID = randomUUID;
        dataSum = 0;
      });
      socket.on("openai-audio", async (data, randomUUID) => {
        console.log("OpenAI audio received:", data, randomUUID);
        if (currentAudioID !== randomUUID) {
          return;
        }
        // Log the received data
        console.log("Received data type:", data.constructor.name);
        console.log("Received data:", data);
        // Ensure data is an ArrayBuffer
        if (!(data instanceof ArrayBuffer)) {
          console.error("Received data is not an ArrayBuffer");
          return;
        }
        dataSum += data.byteLength;
        console.log("Received data sum:", dataSum);
        try {
          // Check if there was an odd byte from the previous chunk
          if (lastByte !== undefined) {
            // Combine the last byte with the current data
            const lastData = new Uint8Array(
              lastByte.byteLength + data.byteLength
            );
            lastData.set(new Uint8Array(lastByte), 0);
            lastData.set(new Uint8Array(data), lastByte.byteLength);
            data = lastData;
            lastByte = undefined; // Reset lastByte since it has been processed
          }
          // Check if the combined data has an odd number of bytes
          if (data.byteLength % 2 !== 0) {
            // Save the last byte for the next chunk
            lastByte = data.slice(data.byteLength - 1);
            // Remove the last byte from the current data
            data = data.slice(0, data.byteLength - 1);
          }
          // Convert ArrayBuffer to Int16Array
          const int16Array = new Int16Array(data);
          // Normalize PCM16 data to the range [-1, 1]
          const float32Array = new Float32Array(int16Array.length);
          for (let i = 0; i < int16Array.length; i++) {
            float32Array[i] = int16Array[i] / 0x7fff;
          }
          console.log("Normalized data:", float32Array.length);
          // Add the normalized data to the audio queue
          for (let i = 0; i < float32Array.length; i++) {
            audioQueue.push(float32Array[i]);
          }
        } catch (error) {
          console.error("Error processing audio data:", error);
        }
      });
      socket.on("bot-stopSpeaking", () => {
        console.log("Bot stopped speaking");
        audioQueue = [];
-      };
+        // Clear the audio queue fade out
-
+        const fadeOut = setInterval(() => {
-      source.connect(processor);
+          if (audioQueue.length > 0) {
-      processor.connect(audioContext.destination);
+            audioQueue = audioQueue.slice(0, audioQueue.length - 1);
-
+          } else {
-      status.textContent = 'Status: Call started';
+            clearInterval(fadeOut);
-    });
+          }
-
+        }, 100);
-    socket.on('openai-response', (data) => {
+      });
-      console.log('OpenAI response received:', data);
+    </script>
-      try {
+  </body>
-        response.textContent = 'Response: ' + data[0].transcript;
+</html>
      } catch (error) {
      }
    });
    socket.on('debug-text-input', (data) => {
      debugTextInput.textContent = data;
    });
    socket.on('debug-text-output', (data) => {
      debugTextOutput.textContent = data;
    });
    let dataSum = 0;
    let lastByte = undefined;
    let currentAudioID = "";
    socket.on('openai-audio-start', (randomUUID) => {
      console.log('OpenAI audio start:', randomUUID);
      currentAudioID = randomUUID;
      dataSum = 0;
    });
    socket.on('openai-audio', async (data, randomUUID) => {
      console.log('OpenAI audio received:', data, randomUUID);
      if (currentAudioID !== randomUUID) {
        return;
      }
      // Log the received data
      console.log('Received data type:', data.constructor.name);
      console.log('Received data:', data);
      // Ensure data is an ArrayBuffer
      if (!(data instanceof ArrayBuffer)) {
        console.error('Received data is not an ArrayBuffer');
        return;
      }
      dataSum += data.byteLength;
      console.log('Received data sum:', dataSum);
      try {
        // Check if there was an odd byte from the previous chunk
        if (lastByte !== undefined) {
          // Combine the last byte with the current data
          const lastData = new Uint8Array(lastByte.byteLength + data.byteLength);
          lastData.set(new Uint8Array(lastByte), 0);
          lastData.set(new Uint8Array(data), lastByte.byteLength);
          data = lastData;
          lastByte = undefined; // Reset lastByte since it has been processed
        }
        // Check if the combined data has an odd number of bytes
        if (data.byteLength % 2 !== 0) {
          // Save the last byte for the next chunk
          lastByte = data.slice(data.byteLength - 1);
          // Remove the last byte from the current data
          data = data.slice(0, data.byteLength - 1);
        }
        // Convert ArrayBuffer to Int16Array
        const int16Array = new Int16Array(data);
        // Normalize PCM16 data to the range [-1, 1]
        const float32Array = new Float32Array(int16Array.length);
        for (let i = 0; i < int16Array.length; i++) {
          float32Array[i] = int16Array[i] / 0x7FFF;
        }
        console.log('Normalized data:', float32Array.length);
        // Add the normalized data to the audio queue
        for (let i = 0; i < float32Array.length; i++) {
          audioQueue.push(float32Array[i]);
        }
      } catch (error) {
        console.error('Error processing audio data:', error);
      }
    });
    socket.on('bot-stopSpeaking', () => {
      console.log('Bot stopped speaking');
      audioQueue = [];
      // Clear the audio queue fade out
      const fadeOut = setInterval(() => {
        if (audioQueue.length > 0) {
          audioQueue = audioQueue.slice(0, audioQueue.length - 1);
        } else {
          clearInterval(fadeOut);
        }
      }, 100);
    });
  </script>
 </body>
 </html>
--- a/src/voiceEngine
+++ b/src/voiceEngine
@ -1,59 +1,61 @@
-import { Socket } from 'socket.io';
+import { Socket } from "socket.io";
-import { DefaultEventsMap } from 'socket.io/dist/typed-events';
+import { DefaultEventsMap } from "socket.io/dist/typed-events";
-import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
+import { createClient, LiveTranscriptionEvents } from "@deepgram/sdk";
-import dotenv from 'dotenv';
+import dotenv from "dotenv";
-import { RealtimeUtils } from '@openai/realtime-api-beta';
+import { RealtimeUtils } from "@openai/realtime-api-beta";
-import { ElevenLabsClient } from 'elevenlabs';
+import { ElevenLabsClient } from "elevenlabs";
-import { OptimizeStreamingLatency } from 'elevenlabs/api';
+import { OptimizeStreamingLatency } from "elevenlabs/api";
-import { Writable } from 'stream';
+import { Writable } from "stream";
-import Cartesia from '@cartesia/cartesia-js';
+import Cartesia from "@cartesia/cartesia-js";
-import WS from 'ws';
+import WS from "ws";
-import OpenAI from 'openai';
+import OpenAI from "openai";
-import { ChatCompletionMessageParam } from 'openai/resources';
+import { ChatCompletionMessageParam } from "openai/resources";
-import elevenlabs_wss from './elevenlabs_wss';
+import elevenlabs_wss from "./elevenlabs_wss";
-import fs from 'fs';
+import fs from "fs";
 dotenv.config();
-import { AssemblyAI, RealtimeTranscript } from 'assemblyai';
+import { AssemblyAI, RealtimeTranscript } from "assemblyai";
 const assemblyAI = new AssemblyAI({
-    apiKey: process.env.ASSEMBLYAI_API_KEY || '',
+  apiKey: process.env.ASSEMBLYAI_API_KEY || "",
 });
 const cartesia = new Cartesia({
-    apiKey: process.env.CARTESIA_API_KEY,
+  apiKey: process.env.CARTESIA_API_KEY,
 });
 const openai = new OpenAI({
-    apiKey: process.env.OpenAI_API_KEY,
+  apiKey: process.env.OpenAI_API_KEY,
 });
 // @ts-ignore
 global.WebSocket = WS;
-export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>) {
+export function voiceEngineSocketUser(
-    let transcript = '';
+  socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>
-    let currentSpeachTime = 0;
+) {
-    let currentSpeach: {
+  let transcript = "";
-        charStartTimesMs: number[];
+  let currentSpeachTime = 0;
-        chars: string[];
+  let currentSpeach: {
-    }[] = [];
+    charStartTimesMs: number[];
    chars: string[];
  }[] = [];
-    function logTranscript(prefix: string, text: string) {
+  function logTranscript(prefix: string, text: string) {
-        transcript += prefix + ':\t' + text + '\n';
+    transcript += prefix + ":\t" + text + "\n";
-    }
+  }
-    console.log('A user connected');
+  console.log("A user connected");
-    socket.on('bot-start', async () => {
+  socket.on("bot-start", async () => {
-        let userMessages: ChatCompletionMessageParam[] = [
+    let userMessages: ChatCompletionMessageParam[] = [
-            {
+      {
-                role: 'system',
+        role: "system",
-                content: `Telefonvertriebler bei Sentrovo
+        content: `Telefonvertriebler bei Sentrovo
 Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren.
 Heutiges Datum: {{now}}
@ -122,187 +124,199 @@ Bestätigung und Abschluss
 „Super, ich habe den Termin am [Datum] um [Uhrzeit] für uns eingetragen. Ich freue mich auf das Gespräch und bedanke mich für Ihre Zeit. Bis dann!“
 Taktik: Positiv und locker abschließen, um den Termin als einfach und unverbindlich zu präsentieren.
 Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und die Hemmschwelle für ein Kennenlerngespräch zu senken.`,
-            },
+      },
-        ];
+    ];
-        const vars: { [key: string]: string } = {
+    const vars: { [key: string]: string } = {
-            Vorname: 'Max',
+      Vorname: "Max",
-            Nachname: 'Mustermann',
+      Nachname: "Mustermann",
-            Unternehmen: 'Musterfirma',
+      Unternehmen: "Musterfirma",
-            Position: 'Geschäftsführer',
+      Position: "Geschäftsführer",
-            now: new Date().toLocaleDateString(),
+      now: new Date().toLocaleDateString(),
-        };
+    };
-        for (const message of userMessages) {
+    for (const message of userMessages) {
-            if (message.content) {
+      if (message.content) {
-                for (const key in vars) {
+        for (const key in vars) {
-                    if (message.content && message.role === 'system') {
+          if (message.content && message.role === "system") {
-                        if (typeof message.content === 'string') {
+            if (typeof message.content === "string") {
-                            message.content = message.content.replace(new RegExp(`{{${key}}}`, 'g'), vars[key as keyof typeof vars]);
+              message.content = message.content.replace(
-                        }
+                new RegExp(`{{${key}}}`, "g"),
-                    }
+                vars[key as keyof typeof vars]
-                }
+              );
            }
          }
        }
      }
    }
-        console.log('Bot started');
+    console.log("Bot started");
-        async function speakText(text: string) {
+    async function speakText(text: string) {
-            console.log('Generated message:', text);
+      console.log("Generated message:", text);
-            const time = new Date().getTime();
+      const time = new Date().getTime();
-            let lastTime = 0;
+      let lastTime = 0;
-            let firstMessage = true;
+      let firstMessage = true;
-            const labs11 = await elevenlabs_wss.connect(
+      const labs11 = await elevenlabs_wss.connect(
-                {
+        {
-                    voice_id: 'N2lVS1w4EtoT3dr4eOWO',
+          voice_id: "N2lVS1w4EtoT3dr4eOWO",
-                    model_id: 'eleven_turbo_v2_5',
+          model_id: "eleven_turbo_v2_5",
-                    optimize_streaming_latency: 4,
+          optimize_streaming_latency: 4,
-                    output_format: 'pcm_24000',
+          output_format: "pcm_24000",
-                    language_code: 'de',
+          language_code: "de",
-                    //sync_alignment: true,
+          //sync_alignment: true,
-                    inactivity_timeout: 20,
+          inactivity_timeout: 20,
-                },
+        },
-                (data, randomUUID) => {
+        (data, randomUUID) => {
-                    if (!data.audio) return;
+          if (!data.audio) return;
-                    const audioBuffer = Buffer.from(data.audio, 'base64');
+          const audioBuffer = Buffer.from(data.audio, "base64");
-                    const audioBufferArray = new Uint8Array(audioBuffer);
+          const audioBufferArray = new Uint8Array(audioBuffer);
-                    socket.emit('openai-audio', audioBufferArray, randomUUID);
+          socket.emit("openai-audio", audioBufferArray, randomUUID);
-                    console.log('Received audio data from Eleven Labs');
+          console.log("Received audio data from Eleven Labs");
-                    if (data.normalizedAlignment) {
+          if (data.normalizedAlignment) {
-                        if (firstMessage) {
+            if (firstMessage) {
-                            firstMessage = false;
+              firstMessage = false;
-                            currentSpeachTime = new Date().getTime();
+              currentSpeachTime = new Date().getTime();
-                            currentSpeach = [];
+              currentSpeach = [];
                        }
                        currentSpeach.push(data.normalizedAlignment);
                    }
                }
            );
            const ws11 = labs11.socket;
            socket.emit('openai-audio-start', labs11.randomUUID);
            console.log('Connected to Eleven Labs. Took', new Date().getTime() - time, 'ms');
            elevenlabs_wss.generate(ws11, {
                flush: true,
                text: text + ' ',
                voice_settings: {
                    style: 0.5,
                    use_speaker_boost: true,
                    stability: 0.5,
                    similarity_boost: 0.5,
                },
            });
            elevenlabs_wss.generate(ws11, {
                //flush: true,
                text: '',
            });
        }
        async function generateVoiceMessage() {
            const output = await openai.beta.chat.completions.parse({
                model: 'gpt-4o',
                temperature: 0.8,
                //max_completion_tokens: 100,
                messages: [...userMessages],
            });
            const text = output.choices[0].message.content;
            if (!text) return;
            speakText(text);
        }
        function addMessageToUser(message: ChatCompletionMessageParam) {
            userMessages.push(message);
        }
        function addLastMessageToChat() {
            let oldText = '';
            let lastMs = 0;
            const speakOffset = new Date().getTime() - currentSpeachTime;
            let inrerrupt = false;
            for (const alignment of currentSpeach) {
                let index = 0;
                for (const char of alignment.chars) {
                    const ms = alignment.charStartTimesMs[index];
                    if (lastMs + ms < speakOffset) {
                        oldText += char;
                    } else {
                        inrerrupt = true;
                        break;
                    }
                    index++;
                }
                lastMs = alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
            }
-            if (inrerrupt) {
+            currentSpeach.push(data.normalizedAlignment);
-                oldText += ' ... **ABGEBROCHEN**';
+          }
-            }
+        }
      );
-            if (oldText) {
+      const ws11 = labs11.socket;
                addMessageToUser({
                    role: 'assistant',
                    content: oldText,
                });
            }
-            logTranscript('AI', oldText);
+      socket.emit("openai-audio-start", labs11.randomUUID);
      console.log(
        "Connected to Eleven Labs. Took",
        new Date().getTime() - time,
        "ms"
      );
      elevenlabs_wss.generate(ws11, {
        flush: true,
        text: text + " ",
        voice_settings: {
          style: 0.5,
          use_speaker_boost: true,
          stability: 0.5,
          similarity_boost: 0.5,
        },
      });
      elevenlabs_wss.generate(ws11, {
        //flush: true,
        text: "",
      });
    }
    async function generateVoiceMessage() {
      const output = await openai.beta.chat.completions.parse({
        model: "gpt-4o",
        temperature: 0.8,
        //max_completion_tokens: 100,
        messages: [...userMessages],
      });
      const text = output.choices[0].message.content;
      if (!text) return;
      speakText(text);
    }
    function addMessageToUser(message: ChatCompletionMessageParam) {
      userMessages.push(message);
    }
    function addLastMessageToChat() {
      let oldText = "";
      let lastMs = 0;
      const speakOffset = new Date().getTime() - currentSpeachTime;
      let inrerrupt = false;
      for (const alignment of currentSpeach) {
        let index = 0;
        for (const char of alignment.chars) {
          const ms = alignment.charStartTimesMs[index];
          if (lastMs + ms < speakOffset) {
            oldText += char;
          } else {
            inrerrupt = true;
            break;
          }
          index++;
        }
-        speakText('Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?');
+        lastMs =
          alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
      }
-        const transcriber = assemblyAI.realtime.transcriber({
+      if (inrerrupt) {
-            sampleRate: 16_000,
+        oldText += " ... **ABGEBROCHEN**";
-            encoding: 'pcm_s16le',
+      }
      if (oldText) {
        addMessageToUser({
          role: "assistant",
          content: oldText,
        });
      }
-        transcriber.on('open', ({ sessionId }) => {
+      logTranscript("AI", oldText);
-            console.log(`Session opened with ID: ${sessionId}`);
+    }
        });
-        transcriber.on('error', (error: Error) => {
+    speakText(
-            console.error('Error:', error);
+      "Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?"
-        });
+    );
-        transcriber.on('close', (code: number, reason: string) => console.log('Session closed:', code, reason));
+    const transcriber = assemblyAI.realtime.transcriber({
      sampleRate: 16_000,
      encoding: "pcm_s16le",
    });
-        transcriber.on('transcript', (transcript: RealtimeTranscript) => {
+    transcriber.on("open", ({ sessionId }) => {
-            if (!transcript.text) {
+      console.log(`Session opened with ID: ${sessionId}`);
-                return;
+    });
            }
-            if (transcript.message_type === 'PartialTranscript') {
+    transcriber.on("error", (error: Error) => {
-                console.log('Partial:', transcript.text);
+      console.error("Error:", error);
-            } else {
+    });
                console.log('Final:', transcript.text);
            }
        });
-        console.log('Connecting to real-time transcript service');
+    transcriber.on("close", (code: number, reason: string) =>
-        await transcriber.connect();
+      console.log("Session closed:", code, reason)
    );
-        console.log('Starting recording');
+    transcriber.on("transcript", (transcript: RealtimeTranscript) => {
      if (!transcript.text) {
        return;
      }
-        /*
+      if (transcript.message_type === "PartialTranscript") {
        console.log("Partial:", transcript.text);
      } else {
        console.log("Final:", transcript.text);
      }
    });
    console.log("Connecting to real-time transcript service");
    await transcriber.connect();
    console.log("Starting recording");
    /*
         {
                //data.speech_final
                if (data.is_final) {
@ -336,47 +350,50 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
                //console.log('ts\t', data);
            } */
-        // STEP 4: Fetch the audio stream and send it to the live transcription connection
+    // STEP 4: Fetch the audio stream and send it to the live transcription connection
-        socket.on('bot-voice-data', (audioBuffer: any) => {
+    socket.on("bot-voice-data", (audioBuffer: any) => {
-            // Convert Buffer to ArrayBuffer
+      // Convert Buffer to ArrayBuffer
-            const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
+      const arrayBuffer = audioBuffer.buffer.slice(
        audioBuffer.byteOffset,
        audioBuffer.byteOffset + audioBuffer.byteLength
      );
-            // Convert ArrayBuffer to Int16Array
+      // Convert ArrayBuffer to Int16Array
-            const float32Array = new Float32Array(arrayBuffer);
+      const float32Array = new Float32Array(arrayBuffer);
-            //console.log('Received audio data from User:', data);
+      //console.log('Received audio data from User:', data);
-            //dgConnection.send(RealtimeUtils.floatTo16BitPCM(float32Array));
+      //dgConnection.send(RealtimeUtils.floatTo16BitPCM(float32Array));
-            //transcriber.stream()
+      //transcriber.stream()
-            const audioBufferArray = RealtimeUtils.floatTo16BitPCM(float32Array);
+      const audioBufferArray = RealtimeUtils.floatTo16BitPCM(float32Array);
-            transcriber.sendAudio(audioBufferArray);
+      transcriber.sendAudio(audioBufferArray);
        });
        function stopConversation() {
            console.log('Ending conversation');
            addLastMessageToChat();
            //dgConnection.disconnect();
            socket.emit('bot-stopSpeaking');
            // write the transcript to the file
            const uuid = new Date().getTime();
            // create folder
            fs.mkdirSync(`transcripts/${uuid}`, { recursive: true });
            fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
        }
        socket.on('bot-end', () => {
            stopConversation();
        });
        socket.on('disconnect', () => {
            stopConversation();
            console.log('A user disconnected');
        });
    });
    function stopConversation() {
      console.log("Ending conversation");
      addLastMessageToChat();
      //dgConnection.disconnect();
      socket.emit("bot-stopSpeaking");
      // write the transcript to the file
      const uuid = new Date().getTime();
      // create folder
      fs.mkdirSync(`transcripts/${uuid}`, { recursive: true });
      fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
    }
    socket.on("bot-end", () => {
      stopConversation();
    });
    socket.on("disconnect", () => {
      stopConversation();
      console.log("A user disconnected");
    });
  });
 }