changed for prod

2024-11-19 19:58:28 +01:00 · 2024-11-19 19:58:28 +01:00 · 4f4f90d7ad
parent 7c7feff90e
commit 4f4f90d7ad
3 changed files with 622 additions and 581 deletions
--- a/src/index.ts
+++ b/src/index.ts
@ -1,146 +1,174 @@
-import express from 'express';
-import path from 'path';
-import { Request, Response } from 'express';
-import http from 'http';
-import { Server } from 'socket.io';
-import dotenv from 'dotenv';
-import { RealtimeClient, RealtimeUtils } from '@openai/realtime-api-beta';
-import { voiceEngineSocketUser } from './voiceEngine';
+import express from "express";
+import path from "path";
+import { Request, Response } from "express";
+import http from "http";
+import { Server } from "socket.io";
+import dotenv from "dotenv";
+import { RealtimeClient, RealtimeUtils } from "@openai/realtime-api-beta";
+import { voiceEngineSocketUser } from "./voiceEngine";

 dotenv.config();

 const app = express();
-const PORT = 50269;

-const publicFolder = path.join(__dirname, 'public');
+if (!process.env.PORT) {
+  console.error("Please set the PORT environment variable");
+  process.exit(1);
+}
+
+const PORT = parseInt(process.env.PORT, 10);
+
+const publicFolder = path.join(__dirname, "public");
 app.use(express.static(publicFolder));

-app.get('/', (req: Request, res: Response) => {
-    res.sendFile(path.join(publicFolder, 'index.html'));
+app.get("/", (req: Request, res: Response) => {
+  res.sendFile(path.join(publicFolder, "index.html"));
 });

 const server = http.createServer(app);
 const io = new Server(server);

 interface ConversationItem {
-    [key: string]: any;
+  [key: string]: any;
 }

-io.on('connection', (socket) => {
-    console.log('A user connected');
+io.on("connection", (socket) => {
+  console.log("A user connected");

-    voiceEngineSocketUser(socket);
+  voiceEngineSocketUser(socket);

-    let gptClient: null | RealtimeClient = null;
+  let gptClient: null | RealtimeClient = null;

-    socket.on('start', async (data) => {
-        gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
+  socket.on("start", async (data) => {
+    gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });

-        (async () => {
-            try {
-                await gptClient.connect();
+    (async () => {
+      try {
+        await gptClient.connect();

-                console.log('Connected to OpenAI Realtime API');
+        console.log("Connected to OpenAI Realtime API");

-                socket.on('voice-data', async (audioBuffer) => {
-                    try {
-                        console.log('Voice data received');
+        socket.on("voice-data", async (audioBuffer) => {
+          try {
+            console.log("Voice data received");

-                        // Send user audio, must be Int16Array or ArrayBuffer
-                        // Default audio format is pcm16 with sample rate of 24,000 Hz
+            // Send user audio, must be Int16Array or ArrayBuffer
+            // Default audio format is pcm16 with sample rate of 24,000 Hz

-                        if (audioBuffer instanceof Float32Array) {
-                            console.log('Received audio data from the input worklet:');
+            if (audioBuffer instanceof Float32Array) {
+              console.log("Received audio data from the input worklet:");

-                            if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(audioBuffer));
-                        } else if (audioBuffer instanceof Buffer) {
-                            console.log('Received audio data as Buffer:');
+              if (gptClient)
+                gptClient.appendInputAudio(
+                  RealtimeUtils.floatTo16BitPCM(audioBuffer)
+                );
+            } else if (audioBuffer instanceof Buffer) {
+              console.log("Received audio data as Buffer:");

-                            // Convert Buffer to ArrayBuffer
-                            const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
+              // Convert Buffer to ArrayBuffer
+              const arrayBuffer = audioBuffer.buffer.slice(
+                audioBuffer.byteOffset,
+                audioBuffer.byteOffset + audioBuffer.byteLength
+              );

-                            // Convert ArrayBuffer to Int16Array
-                            const float32Array = new Float32Array(arrayBuffer);
+              // Convert ArrayBuffer to Int16Array
+              const float32Array = new Float32Array(arrayBuffer);

-                            if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(float32Array));
-                        } else {
-                            console.error('Invalid data type received in worklet');
+              if (gptClient)
+                gptClient.appendInputAudio(
+                  RealtimeUtils.floatTo16BitPCM(float32Array)
+                );
+            } else {
+              console.error("Invalid data type received in worklet");

-                            // log the data type and return
-                            console.log('Data type:', typeof audioBuffer, audioBuffer);
-                            return;
-                        }
-                    } catch (error) {
-                        console.error('Error with OpenAI Realtime API:', error);
-                    }
-                });
-
-                gptClient.on('conversation.updated', (event: ConversationItem) => {
-                    const { item, delta } = event;
-                    if (item.content) {
-                        socket.emit('openai-response', item.content);
-                    }
-                    console.log('Playing audio response...', delta);
-                    if (delta && delta.audio) {
-                        socket.emit('openai-audio', delta.audio);
-                    }
-                    console.log('Conversation updated:', event);
-                });
-
-                gptClient.on('conversation.item.completed', (event: ConversationItem) => {
-                    const { item } = event;
-                    console.log('Conversation item completed:', item);
-
-                    if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) {
-                        console.log('Playing audio response...');
-                        //socket.emit('openai-audio', item.formatted.audio);
-                    } else {
-                        console.log('No audio content in this item.');
-                    }
-                });
-            } catch (error) {
-                console.error('Error connecting to OpenAI Realtime API:', error);
+              // log the data type and return
+              console.log("Data type:", typeof audioBuffer, audioBuffer);
+              return;
            }
-        })();
-
-        socket.on('disconnect', () => {
-            console.log('A user disconnected');
-
-            if (gptClient) gptClient.disconnect();
+          } catch (error) {
+            console.error("Error with OpenAI Realtime API:", error);
+          }
        });

-        socket.on('end', () => {
-            console.log('A user ended the conversation');
-            if (gptClient) gptClient.disconnect();
+        gptClient.on("conversation.updated", (event: ConversationItem) => {
+          const { item, delta } = event;
+          if (item.content) {
+            socket.emit("openai-response", item.content);
+          }
+          console.log("Playing audio response...", delta);
+          if (delta && delta.audio) {
+            socket.emit("openai-audio", delta.audio);
+          }
+          console.log("Conversation updated:", event);
        });

-        gptClient.updateSession({
-            instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
-Bitte spreche mit einer ruhigen Stimme.`,
-        });
-        gptClient.updateSession({ voice: 'ballad' });
-        gptClient.updateSession({
-            turn_detection: { type: 'server_vad', threshold: 0.6, prefix_padding_ms: 300, silence_duration_ms: 500 },
-            input_audio_transcription: { model: 'whisper-1' },
-            input_audio_format: 'pcm16',
-            output_audio_format: 'pcm16',
-            max_response_output_tokens: 1500,
-            modalities: ['audio', 'text'],
-        });
+        gptClient.on(
+          "conversation.item.completed",
+          (event: ConversationItem) => {
+            const { item } = event;
+            console.log("Conversation item completed:", item);

-        gptClient.on('conversation.updated', (event: ConversationItem) => {
-            const { item, delta } = event;
-            if (gptClient) {
-                const items = gptClient.conversation.getItems();
+            if (
+              item.type === "message" &&
+              item.role === "assistant" &&
+              item.formatted &&
+              item.formatted.audio
+            ) {
+              console.log("Playing audio response...");
+              //socket.emit('openai-audio', item.formatted.audio);
+            } else {
+              console.log("No audio content in this item.");
            }
-            // Handle the updated conversation items
-        });
+          }
+        );
+      } catch (error) {
+        console.error("Error connecting to OpenAI Realtime API:", error);
+      }
+    })();

-        //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]);
-        gptClient.createResponse();
+    socket.on("disconnect", () => {
+      console.log("A user disconnected");
+
+      if (gptClient) gptClient.disconnect();
    });
+
+    socket.on("end", () => {
+      console.log("A user ended the conversation");
+      if (gptClient) gptClient.disconnect();
+    });
+
+    gptClient.updateSession({
+      instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
+Bitte spreche mit einer ruhigen Stimme.`,
+    });
+    gptClient.updateSession({ voice: "ballad" });
+    gptClient.updateSession({
+      turn_detection: {
+        type: "server_vad",
+        threshold: 0.6,
+        prefix_padding_ms: 300,
+        silence_duration_ms: 500,
+      },
+      input_audio_transcription: { model: "whisper-1" },
+      input_audio_format: "pcm16",
+      output_audio_format: "pcm16",
+      max_response_output_tokens: 1500,
+      modalities: ["audio", "text"],
+    });
+
+    gptClient.on("conversation.updated", (event: ConversationItem) => {
+      const { item, delta } = event;
+      if (gptClient) {
+        const items = gptClient.conversation.getItems();
+      }
+      // Handle the updated conversation items
+    });
+
+    //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]);
+    gptClient.createResponse();
+  });
 });

-server.listen(PORT, '127.0.0.1', () => {
-    console.log(`Server läuft unter http://localhost:${PORT}`);
+server.listen(PORT, "127.0.0.1", () => {
+  console.log(`Server läuft unter http://localhost:${PORT}`);
 });
--- a/src/public/index.html
+++ b/src/public/index.html
@ -1,276 +1,272 @@
 <!DOCTYPE html>
 <html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta
+      name="viewport"
+      content="width=device-width, initial-scale=1.0, user-scalable=no"
+    />
+    <!-- Theme color for Chrome, Firefox OS and Opera -->
+    <meta name="theme-color" content="#fd9644" />
+    <!-- Theme color for Windows Phone -->
+    <meta name="msapplication-navbutton-color" content="#fd9644" />
+    <!-- Theme color for iOS Safari -->
+    <meta
+      name="apple-mobile-web-app-status-bar-style"
+      content="black-translucent"
+    />
+    <meta name="apple-mobile-web-app-capable" content="yes" />

-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
-  <!-- Theme color for Chrome, Firefox OS and Opera -->
-  <meta name="theme-color" content="#fd9644">
-  <!-- Theme color for Windows Phone -->
-  <meta name="msapplication-navbutton-color" content="#fd9644">
-  <!-- Theme color for iOS Safari -->
-  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
-  <meta name="apple-mobile-web-app-capable" content="yes">
-
-
-
-  <title>Voice Call with a voice bot</title>
-  <style>
-    body {
-      display: flex;
-      justify-content: center;
-      align-items: center;
-      height: 100vh;
-      margin: 0;
-      font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
-    }
-
-    .center-container {
-      text-align: center;
-    }
-
-    .material-button {
-      font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
-
-      display: inline-block;
-      padding: 0 16px;
-      height: 36px;
-      font-size: 14px;
-      font-weight: 500;
-      line-height: 36px;
-      border-radius: 4px;
-      text-transform: uppercase;
-      text-align: center;
-      letter-spacing: 0.5px;
-      overflow: hidden;
-      will-change: box-shadow;
-      transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
-      border: none;
-      cursor: pointer;
-      color: #fff;
-      background-color: #fd9644;
-      box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
-    }
-
-    .material-button:hover {
-      box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14), 0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
-    }
-
-    .material-button:active {
-      box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14), 0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
-    }
-  </style>
-</head>
-
-<body>
-  <div class="center-container">
-    <h1>Voice Call with a Voice Bot</h1>
-    <button id="startCallButton" class="material-button">Start Call</button>
-    <p id="status">Status: Idle</p>
-    <h3>Input: </h3>
-    <p id="text-input">---</p>
-    <h3>Output: </h3>
-    <p id="text-output">---</p>
-  </div>
-
-  <script src="/socket.io/socket.io.js"></script>
-  <script>
-    const startCallButton = document.getElementById('startCallButton');
-    const status = document.getElementById('status');
-    const debugTextInput = document.getElementById('text-input');
-    const debugTextOutput = document.getElementById('text-output');
-
-    const socket = io('https://voice-test.ex.umbach.dev/'); // Connect to your server
-    let localStream;
-    let audioContext;
-    let processor;
-    let started = false;
-
-    let audioQueue = [];
-
-    startCallButton.addEventListener('click', async () => {
-      if (started) {
-        socket.emit('bot-end');
-        processor.disconnect(audioContext.destination);
-
-        localStream.getTracks().forEach(track => track.stop());
-        localStream = null;
-
-        audioContext.close();
-        audioContext = null;
-
-        processor = null;
-
-        startCallButton.textContent = 'Start Call';
-        status.textContent = 'Status: Call ended';
-        started = false;
-        return;
+    <title>Voice Call with a voice bot</title>
+    <style>
+      body {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        height: 100vh;
+        margin: 0;
+        font-family: "Roboto", "Helvetica", "Arial", sans-serif;
      }

-      started = true;
+      .center-container {
+        text-align: center;
+      }

-      startCallButton.textContent = 'End Call';
+      .material-button {
+        font-family: "Roboto", "Helvetica", "Arial", sans-serif;

-      status.textContent = 'Status: Starting call...';
-      console.log('Starting call...');
+        display: inline-block;
+        padding: 0 16px;
+        height: 36px;
+        font-size: 14px;
+        font-weight: 500;
+        line-height: 36px;
+        border-radius: 4px;
+        text-transform: uppercase;
+        text-align: center;
+        letter-spacing: 0.5px;
+        overflow: hidden;
+        will-change: box-shadow;
+        transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
+        border: none;
+        cursor: pointer;
+        color: #fff;
+        background-color: #fd9644;
+        box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
+          0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
+      }

+      .material-button:hover {
+        box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
+          0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
+      }

+      .material-button:active {
+        box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
+          0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
+      }
+    </style>
+  </head>

-      // Get local audio stream
-      localStream = await navigator.mediaDevices.getUserMedia({ audio: true });
-      console.log('Local audio stream obtained:', localStream);
+  <body>
+    <div class="center-container">
+      <h1>Voice Call with a Voice Bot</h1>
+      <button id="startCallButton" class="material-button">Start Call</button>
+      <p id="status">Status: Idle</p>
+      <h3>Input:</h3>
+      <p id="text-input">---</p>
+      <h3>Output:</h3>
+      <p id="text-output">---</p>
+    </div>

-      socket.emit('bot-start');
+    <script src="/socket.io/socket.io.js"></script>
+    <script>
+      const startCallButton = document.getElementById("startCallButton");
+      const status = document.getElementById("status");
+      const debugTextInput = document.getElementById("text-input");
+      const debugTextOutput = document.getElementById("text-output");

-      // Create AudioContext with sample rate of 24000 Hz
-      audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
-      const source = audioContext.createMediaStreamSource(localStream);
-      processor = audioContext.createScriptProcessor(4096, 1, 1);
+      const socket = io(
+        `${window.location.protocol}//${window.location.hostname}`
+      ); // Connect to your server
+      let localStream;
+      let audioContext;
+      let processor;
+      let started = false;

-      // Process audio data
-      processor.onaudioprocess = (event) => {
-        const inputData = event.inputBuffer.getChannelData(0);
+      let audioQueue = [];

-        socket.emit('bot-voice-data', inputData); // Send as ArrayBuffer
+      startCallButton.addEventListener("click", async () => {
+        if (started) {
+          socket.emit("bot-end");
+          processor.disconnect(audioContext.destination);

-        // echo audio locally
-        /*const outputData = event.outputBuffer.getChannelData(0);
+          localStream.getTracks().forEach((track) => track.stop());
+          localStream = null;
+
+          audioContext.close();
+          audioContext = null;
+
+          processor = null;
+
+          startCallButton.textContent = "Start Call";
+          status.textContent = "Status: Call ended";
+          started = false;
+          return;
+        }
+
+        started = true;
+
+        startCallButton.textContent = "End Call";
+
+        status.textContent = "Status: Starting call...";
+        console.log("Starting call...");
+
+        // Get local audio stream
+        localStream = await navigator.mediaDevices.getUserMedia({
+          audio: true,
+        });
+        console.log("Local audio stream obtained:", localStream);
+
+        socket.emit("bot-start");
+
+        // Create AudioContext with sample rate of 24000 Hz
+        audioContext = new (window.AudioContext || window.webkitAudioContext)({
+          sampleRate: 24000,
+        });
+        const source = audioContext.createMediaStreamSource(localStream);
+        processor = audioContext.createScriptProcessor(4096, 1, 1);
+
+        // Process audio data
+        processor.onaudioprocess = (event) => {
+          const inputData = event.inputBuffer.getChannelData(0);
+
+          socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
+
+          // echo audio locally
+          /*const outputData = event.outputBuffer.getChannelData(0);
        for (let sample = 0; sample < inputData.length; sample++) {
          outputData[sample] = inputData[sample];
        }*/

+          let outputData = event.outputBuffer.getChannelData(0);

-        let outputData = event.outputBuffer.getChannelData(0);
-
-        if (audioQueue.length > 0) {
-          const chunk = audioQueue.splice(0, outputData.length);
-          for (let i = 0; i < chunk.length; i++) {
-            outputData[i] = chunk[i];
+          if (audioQueue.length > 0) {
+            const chunk = audioQueue.splice(0, outputData.length);
+            for (let i = 0; i < chunk.length; i++) {
+              outputData[i] = chunk[i];
+            }
+          } else {
+            for (let i = 0; i < outputData.length; i++) {
+              outputData[i] = 0;
+            }
          }
-        } else {
-          for (let i = 0; i < outputData.length; i++) {
-            outputData[i] = 0;
+        };
+
+        source.connect(processor);
+        processor.connect(audioContext.destination);
+
+        status.textContent = "Status: Call started";
+      });
+
+      socket.on("openai-response", (data) => {
+        console.log("OpenAI response received:", data);
+        try {
+          response.textContent = "Response: " + data[0].transcript;
+        } catch (error) {}
+      });
+
+      socket.on("debug-text-input", (data) => {
+        debugTextInput.textContent = data;
+      });
+
+      socket.on("debug-text-output", (data) => {
+        debugTextOutput.textContent = data;
+      });
+
+      let dataSum = 0;
+      let lastByte = undefined;
+      let currentAudioID = "";
+
+      socket.on("openai-audio-start", (randomUUID) => {
+        console.log("OpenAI audio start:", randomUUID);
+        currentAudioID = randomUUID;
+        dataSum = 0;
+      });
+
+      socket.on("openai-audio", async (data, randomUUID) => {
+        console.log("OpenAI audio received:", data, randomUUID);
+        if (currentAudioID !== randomUUID) {
+          return;
+        }
+
+        // Log the received data
+        console.log("Received data type:", data.constructor.name);
+        console.log("Received data:", data);
+
+        // Ensure data is an ArrayBuffer
+        if (!(data instanceof ArrayBuffer)) {
+          console.error("Received data is not an ArrayBuffer");
+          return;
+        }
+
+        dataSum += data.byteLength;
+        console.log("Received data sum:", dataSum);
+
+        try {
+          // Check if there was an odd byte from the previous chunk
+          if (lastByte !== undefined) {
+            // Combine the last byte with the current data
+            const lastData = new Uint8Array(
+              lastByte.byteLength + data.byteLength
+            );
+            lastData.set(new Uint8Array(lastByte), 0);
+            lastData.set(new Uint8Array(data), lastByte.byteLength);
+            data = lastData;
+            lastByte = undefined; // Reset lastByte since it has been processed
          }
+
+          // Check if the combined data has an odd number of bytes
+          if (data.byteLength % 2 !== 0) {
+            // Save the last byte for the next chunk
+            lastByte = data.slice(data.byteLength - 1);
+            // Remove the last byte from the current data
+            data = data.slice(0, data.byteLength - 1);
+          }
+
+          // Convert ArrayBuffer to Int16Array
+          const int16Array = new Int16Array(data);
+
+          // Normalize PCM16 data to the range [-1, 1]
+          const float32Array = new Float32Array(int16Array.length);
+          for (let i = 0; i < int16Array.length; i++) {
+            float32Array[i] = int16Array[i] / 0x7fff;
+          }
+
+          console.log("Normalized data:", float32Array.length);
+
+          // Add the normalized data to the audio queue
+          for (let i = 0; i < float32Array.length; i++) {
+            audioQueue.push(float32Array[i]);
+          }
+        } catch (error) {
+          console.error("Error processing audio data:", error);
        }
+      });

+      socket.on("bot-stopSpeaking", () => {
+        console.log("Bot stopped speaking");
+        audioQueue = [];

-      };
-
-      source.connect(processor);
-      processor.connect(audioContext.destination);
-
-      status.textContent = 'Status: Call started';
-    });
-
-    socket.on('openai-response', (data) => {
-      console.log('OpenAI response received:', data);
-      try {
-        response.textContent = 'Response: ' + data[0].transcript;
-      } catch (error) {
-
-      }
-    });
-
-    socket.on('debug-text-input', (data) => {
-      debugTextInput.textContent = data;
-    });
-
-    socket.on('debug-text-output', (data) => {
-      debugTextOutput.textContent = data;
-    });
-
-
-    let dataSum = 0;
-    let lastByte = undefined;
-    let currentAudioID = "";
-
-    socket.on('openai-audio-start', (randomUUID) => {
-      console.log('OpenAI audio start:', randomUUID);
-      currentAudioID = randomUUID;
-      dataSum = 0;
-    });
-
-    socket.on('openai-audio', async (data, randomUUID) => {
-      console.log('OpenAI audio received:', data, randomUUID);
-      if (currentAudioID !== randomUUID) {
-        return;
-      }
-
-      // Log the received data
-      console.log('Received data type:', data.constructor.name);
-      console.log('Received data:', data);
-
-      // Ensure data is an ArrayBuffer
-      if (!(data instanceof ArrayBuffer)) {
-        console.error('Received data is not an ArrayBuffer');
-        return;
-      }
-
-      dataSum += data.byteLength;
-      console.log('Received data sum:', dataSum);
-
-
-
-      try {
-        // Check if there was an odd byte from the previous chunk
-        if (lastByte !== undefined) {
-          // Combine the last byte with the current data
-          const lastData = new Uint8Array(lastByte.byteLength + data.byteLength);
-          lastData.set(new Uint8Array(lastByte), 0);
-          lastData.set(new Uint8Array(data), lastByte.byteLength);
-          data = lastData;
-          lastByte = undefined; // Reset lastByte since it has been processed
-        }
-
-        // Check if the combined data has an odd number of bytes
-        if (data.byteLength % 2 !== 0) {
-          // Save the last byte for the next chunk
-          lastByte = data.slice(data.byteLength - 1);
-          // Remove the last byte from the current data
-          data = data.slice(0, data.byteLength - 1);
-        }
-
-
-
-
-        // Convert ArrayBuffer to Int16Array
-        const int16Array = new Int16Array(data);
-
-
-
-        // Normalize PCM16 data to the range [-1, 1]
-        const float32Array = new Float32Array(int16Array.length);
-        for (let i = 0; i < int16Array.length; i++) {
-          float32Array[i] = int16Array[i] / 0x7FFF;
-        }
-
-        console.log('Normalized data:', float32Array.length);
-
-        // Add the normalized data to the audio queue
-        for (let i = 0; i < float32Array.length; i++) {
-          audioQueue.push(float32Array[i]);
-        }
-
-      } catch (error) {
-        console.error('Error processing audio data:', error);
-      }
-    });
-
-    socket.on('bot-stopSpeaking', () => {
-      console.log('Bot stopped speaking');
-      audioQueue = [];
-
-      // Clear the audio queue fade out
-      const fadeOut = setInterval(() => {
-        if (audioQueue.length > 0) {
-          audioQueue = audioQueue.slice(0, audioQueue.length - 1);
-        } else {
-          clearInterval(fadeOut);
-        }
-      }, 100);
-
-    });
-  </script>
-</body>
-
+        // Clear the audio queue fade out
+        const fadeOut = setInterval(() => {
+          if (audioQueue.length > 0) {
+            audioQueue = audioQueue.slice(0, audioQueue.length - 1);
+          } else {
+            clearInterval(fadeOut);
+          }
+        }, 100);
+      });
+    </script>
+  </body>
 </html>
--- a/src/voiceEngine
+++ b/src/voiceEngine
@ -1,59 +1,61 @@
-import { Socket } from 'socket.io';
-import { DefaultEventsMap } from 'socket.io/dist/typed-events';
+import { Socket } from "socket.io";
+import { DefaultEventsMap } from "socket.io/dist/typed-events";

-import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
+import { createClient, LiveTranscriptionEvents } from "@deepgram/sdk";

-import dotenv from 'dotenv';
-import { RealtimeUtils } from '@openai/realtime-api-beta';
-import { ElevenLabsClient } from 'elevenlabs';
-import { OptimizeStreamingLatency } from 'elevenlabs/api';
-import { Writable } from 'stream';
-import Cartesia from '@cartesia/cartesia-js';
-import WS from 'ws';
-import OpenAI from 'openai';
-import { ChatCompletionMessageParam } from 'openai/resources';
+import dotenv from "dotenv";
+import { RealtimeUtils } from "@openai/realtime-api-beta";
+import { ElevenLabsClient } from "elevenlabs";
+import { OptimizeStreamingLatency } from "elevenlabs/api";
+import { Writable } from "stream";
+import Cartesia from "@cartesia/cartesia-js";
+import WS from "ws";
+import OpenAI from "openai";
+import { ChatCompletionMessageParam } from "openai/resources";

-import elevenlabs_wss from './elevenlabs_wss';
-import fs from 'fs';
+import elevenlabs_wss from "./elevenlabs_wss";
+import fs from "fs";

 dotenv.config();

-import { AssemblyAI, RealtimeTranscript } from 'assemblyai';
+import { AssemblyAI, RealtimeTranscript } from "assemblyai";

 const assemblyAI = new AssemblyAI({
-    apiKey: process.env.ASSEMBLYAI_API_KEY || '',
+  apiKey: process.env.ASSEMBLYAI_API_KEY || "",
 });

 const cartesia = new Cartesia({
-    apiKey: process.env.CARTESIA_API_KEY,
+  apiKey: process.env.CARTESIA_API_KEY,
 });

 const openai = new OpenAI({
-    apiKey: process.env.OpenAI_API_KEY,
+  apiKey: process.env.OpenAI_API_KEY,
 });

 // @ts-ignore
 global.WebSocket = WS;

-export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>) {
-    let transcript = '';
-    let currentSpeachTime = 0;
-    let currentSpeach: {
-        charStartTimesMs: number[];
-        chars: string[];
-    }[] = [];
+export function voiceEngineSocketUser(
+  socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>
+) {
+  let transcript = "";
+  let currentSpeachTime = 0;
+  let currentSpeach: {
+    charStartTimesMs: number[];
+    chars: string[];
+  }[] = [];

-    function logTranscript(prefix: string, text: string) {
-        transcript += prefix + ':\t' + text + '\n';
-    }
+  function logTranscript(prefix: string, text: string) {
+    transcript += prefix + ":\t" + text + "\n";
+  }

-    console.log('A user connected');
+  console.log("A user connected");

-    socket.on('bot-start', async () => {
-        let userMessages: ChatCompletionMessageParam[] = [
-            {
-                role: 'system',
-                content: `Telefonvertriebler bei Sentrovo
+  socket.on("bot-start", async () => {
+    let userMessages: ChatCompletionMessageParam[] = [
+      {
+        role: "system",
+        content: `Telefonvertriebler bei Sentrovo
 Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren.

 Heutiges Datum: {{now}}
@ -122,187 +124,199 @@ Bestätigung und Abschluss
 „Super, ich habe den Termin am [Datum] um [Uhrzeit] für uns eingetragen. Ich freue mich auf das Gespräch und bedanke mich für Ihre Zeit. Bis dann!“
 Taktik: Positiv und locker abschließen, um den Termin als einfach und unverbindlich zu präsentieren.
 Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und die Hemmschwelle für ein Kennenlerngespräch zu senken.`,
-            },
-        ];
+      },
+    ];

-        const vars: { [key: string]: string } = {
-            Vorname: 'Max',
-            Nachname: 'Mustermann',
-            Unternehmen: 'Musterfirma',
-            Position: 'Geschäftsführer',
-            now: new Date().toLocaleDateString(),
-        };
+    const vars: { [key: string]: string } = {
+      Vorname: "Max",
+      Nachname: "Mustermann",
+      Unternehmen: "Musterfirma",
+      Position: "Geschäftsführer",
+      now: new Date().toLocaleDateString(),
+    };

-        for (const message of userMessages) {
-            if (message.content) {
-                for (const key in vars) {
-                    if (message.content && message.role === 'system') {
-                        if (typeof message.content === 'string') {
-                            message.content = message.content.replace(new RegExp(`{{${key}}}`, 'g'), vars[key as keyof typeof vars]);
-                        }
-                    }
-                }
+    for (const message of userMessages) {
+      if (message.content) {
+        for (const key in vars) {
+          if (message.content && message.role === "system") {
+            if (typeof message.content === "string") {
+              message.content = message.content.replace(
+                new RegExp(`{{${key}}}`, "g"),
+                vars[key as keyof typeof vars]
+              );
            }
+          }
        }
+      }
+    }

-        console.log('Bot started');
+    console.log("Bot started");

-        async function speakText(text: string) {
-            console.log('Generated message:', text);
+    async function speakText(text: string) {
+      console.log("Generated message:", text);

-            const time = new Date().getTime();
-            let lastTime = 0;
-            let firstMessage = true;
-            const labs11 = await elevenlabs_wss.connect(
-                {
-                    voice_id: 'N2lVS1w4EtoT3dr4eOWO',
-                    model_id: 'eleven_turbo_v2_5',
+      const time = new Date().getTime();
+      let lastTime = 0;
+      let firstMessage = true;
+      const labs11 = await elevenlabs_wss.connect(
+        {
+          voice_id: "N2lVS1w4EtoT3dr4eOWO",
+          model_id: "eleven_turbo_v2_5",

-                    optimize_streaming_latency: 4,
-                    output_format: 'pcm_24000',
-                    language_code: 'de',
-                    //sync_alignment: true,
-                    inactivity_timeout: 20,
-                },
-                (data, randomUUID) => {
-                    if (!data.audio) return;
+          optimize_streaming_latency: 4,
+          output_format: "pcm_24000",
+          language_code: "de",
+          //sync_alignment: true,
+          inactivity_timeout: 20,
+        },
+        (data, randomUUID) => {
+          if (!data.audio) return;

-                    const audioBuffer = Buffer.from(data.audio, 'base64');
+          const audioBuffer = Buffer.from(data.audio, "base64");

-                    const audioBufferArray = new Uint8Array(audioBuffer);
+          const audioBufferArray = new Uint8Array(audioBuffer);

-                    socket.emit('openai-audio', audioBufferArray, randomUUID);
-                    console.log('Received audio data from Eleven Labs');
+          socket.emit("openai-audio", audioBufferArray, randomUUID);
+          console.log("Received audio data from Eleven Labs");

-                    if (data.normalizedAlignment) {
-                        if (firstMessage) {
-                            firstMessage = false;
-                            currentSpeachTime = new Date().getTime();
-                            currentSpeach = [];
-                        }
-
-                        currentSpeach.push(data.normalizedAlignment);
-                    }
-                }
-            );
-
-            const ws11 = labs11.socket;
-
-            socket.emit('openai-audio-start', labs11.randomUUID);
-
-            console.log('Connected to Eleven Labs. Took', new Date().getTime() - time, 'ms');
-
-            elevenlabs_wss.generate(ws11, {
-                flush: true,
-                text: text + ' ',
-
-                voice_settings: {
-                    style: 0.5,
-                    use_speaker_boost: true,
-                    stability: 0.5,
-                    similarity_boost: 0.5,
-                },
-            });
-            elevenlabs_wss.generate(ws11, {
-                //flush: true,
-                text: '',
-            });
-        }
-
-        async function generateVoiceMessage() {
-            const output = await openai.beta.chat.completions.parse({
-                model: 'gpt-4o',
-                temperature: 0.8,
-                //max_completion_tokens: 100,
-                messages: [...userMessages],
-            });
-
-            const text = output.choices[0].message.content;
-
-            if (!text) return;
-
-            speakText(text);
-        }
-
-        function addMessageToUser(message: ChatCompletionMessageParam) {
-            userMessages.push(message);
-        }
-
-        function addLastMessageToChat() {
-            let oldText = '';
-            let lastMs = 0;
-
-            const speakOffset = new Date().getTime() - currentSpeachTime;
-
-            let inrerrupt = false;
-
-            for (const alignment of currentSpeach) {
-                let index = 0;
-                for (const char of alignment.chars) {
-                    const ms = alignment.charStartTimesMs[index];
-
-                    if (lastMs + ms < speakOffset) {
-                        oldText += char;
-                    } else {
-                        inrerrupt = true;
-                        break;
-                    }
-
-                    index++;
-                }
-
-                lastMs = alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
+          if (data.normalizedAlignment) {
+            if (firstMessage) {
+              firstMessage = false;
+              currentSpeachTime = new Date().getTime();
+              currentSpeach = [];
            }

-            if (inrerrupt) {
-                oldText += ' ... **ABGEBROCHEN**';
-            }
+            currentSpeach.push(data.normalizedAlignment);
+          }
+        }
+      );

-            if (oldText) {
-                addMessageToUser({
-                    role: 'assistant',
-                    content: oldText,
-                });
-            }
+      const ws11 = labs11.socket;

-            logTranscript('AI', oldText);
+      socket.emit("openai-audio-start", labs11.randomUUID);
+
+      console.log(
+        "Connected to Eleven Labs. Took",
+        new Date().getTime() - time,
+        "ms"
+      );
+
+      elevenlabs_wss.generate(ws11, {
+        flush: true,
+        text: text + " ",
+
+        voice_settings: {
+          style: 0.5,
+          use_speaker_boost: true,
+          stability: 0.5,
+          similarity_boost: 0.5,
+        },
+      });
+      elevenlabs_wss.generate(ws11, {
+        //flush: true,
+        text: "",
+      });
+    }
+
+    async function generateVoiceMessage() {
+      const output = await openai.beta.chat.completions.parse({
+        model: "gpt-4o",
+        temperature: 0.8,
+        //max_completion_tokens: 100,
+        messages: [...userMessages],
+      });
+
+      const text = output.choices[0].message.content;
+
+      if (!text) return;
+
+      speakText(text);
+    }
+
+    function addMessageToUser(message: ChatCompletionMessageParam) {
+      userMessages.push(message);
+    }
+
+    function addLastMessageToChat() {
+      let oldText = "";
+      let lastMs = 0;
+
+      const speakOffset = new Date().getTime() - currentSpeachTime;
+
+      let inrerrupt = false;
+
+      for (const alignment of currentSpeach) {
+        let index = 0;
+        for (const char of alignment.chars) {
+          const ms = alignment.charStartTimesMs[index];
+
+          if (lastMs + ms < speakOffset) {
+            oldText += char;
+          } else {
+            inrerrupt = true;
+            break;
+          }
+
+          index++;
        }

-        speakText('Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?');
+        lastMs =
+          alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
+      }

-        const transcriber = assemblyAI.realtime.transcriber({
-            sampleRate: 16_000,
-            encoding: 'pcm_s16le',
+      if (inrerrupt) {
+        oldText += " ... **ABGEBROCHEN**";
+      }
+
+      if (oldText) {
+        addMessageToUser({
+          role: "assistant",
+          content: oldText,
        });
+      }

-        transcriber.on('open', ({ sessionId }) => {
-            console.log(`Session opened with ID: ${sessionId}`);
-        });
+      logTranscript("AI", oldText);
+    }

-        transcriber.on('error', (error: Error) => {
-            console.error('Error:', error);
-        });
+    speakText(
+      "Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?"
+    );

-        transcriber.on('close', (code: number, reason: string) => console.log('Session closed:', code, reason));
+    const transcriber = assemblyAI.realtime.transcriber({
+      sampleRate: 16_000,
+      encoding: "pcm_s16le",
+    });

-        transcriber.on('transcript', (transcript: RealtimeTranscript) => {
-            if (!transcript.text) {
-                return;
-            }
+    transcriber.on("open", ({ sessionId }) => {
+      console.log(`Session opened with ID: ${sessionId}`);
+    });

-            if (transcript.message_type === 'PartialTranscript') {
-                console.log('Partial:', transcript.text);
-            } else {
-                console.log('Final:', transcript.text);
-            }
-        });
+    transcriber.on("error", (error: Error) => {
+      console.error("Error:", error);
+    });

-        console.log('Connecting to real-time transcript service');
-        await transcriber.connect();
+    transcriber.on("close", (code: number, reason: string) =>
+      console.log("Session closed:", code, reason)
+    );

-        console.log('Starting recording');
+    transcriber.on("transcript", (transcript: RealtimeTranscript) => {
+      if (!transcript.text) {
+        return;
+      }

-        /*
+      if (transcript.message_type === "PartialTranscript") {
+        console.log("Partial:", transcript.text);
+      } else {
+        console.log("Final:", transcript.text);
+      }
+    });
+
+    console.log("Connecting to real-time transcript service");
+    await transcriber.connect();
+
+    console.log("Starting recording");
+
+    /*
         {
                //data.speech_final
                if (data.is_final) {
@ -336,47 +350,50 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
                //console.log('ts\t', data);
            } */

-        // STEP 4: Fetch the audio stream and send it to the live transcription connection
+    // STEP 4: Fetch the audio stream and send it to the live transcription connection

-        socket.on('bot-voice-data', (audioBuffer: any) => {
-            // Convert Buffer to ArrayBuffer
-            const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
+    socket.on("bot-voice-data", (audioBuffer: any) => {
+      // Convert Buffer to ArrayBuffer
+      const arrayBuffer = audioBuffer.buffer.slice(
+        audioBuffer.byteOffset,
+        audioBuffer.byteOffset + audioBuffer.byteLength
+      );

-            // Convert ArrayBuffer to Int16Array
-            const float32Array = new Float32Array(arrayBuffer);
+      // Convert ArrayBuffer to Int16Array
+      const float32Array = new Float32Array(arrayBuffer);

-            //console.log('Received audio data from User:', data);
-            //dgConnection.send(RealtimeUtils.floatTo16BitPCM(float32Array));
-            //transcriber.stream()
+      //console.log('Received audio data from User:', data);
+      //dgConnection.send(RealtimeUtils.floatTo16BitPCM(float32Array));
+      //transcriber.stream()

-            const audioBufferArray = RealtimeUtils.floatTo16BitPCM(float32Array);
+      const audioBufferArray = RealtimeUtils.floatTo16BitPCM(float32Array);

-            transcriber.sendAudio(audioBufferArray);
-        });
-
-        function stopConversation() {
-            console.log('Ending conversation');
-            addLastMessageToChat();
-
-            //dgConnection.disconnect();
-            socket.emit('bot-stopSpeaking');
-
-            // write the transcript to the file
-            const uuid = new Date().getTime();
-
-            // create folder
-            fs.mkdirSync(`transcripts/${uuid}`, { recursive: true });
-
-            fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
-        }
-
-        socket.on('bot-end', () => {
-            stopConversation();
-        });
-
-        socket.on('disconnect', () => {
-            stopConversation();
-            console.log('A user disconnected');
-        });
+      transcriber.sendAudio(audioBufferArray);
    });
+
+    function stopConversation() {
+      console.log("Ending conversation");
+      addLastMessageToChat();
+
+      //dgConnection.disconnect();
+      socket.emit("bot-stopSpeaking");
+
+      // write the transcript to the file
+      const uuid = new Date().getTime();
+
+      // create folder
+      fs.mkdirSync(`transcripts/${uuid}`, { recursive: true });
+
+      fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
+    }
+
+    socket.on("bot-end", () => {
+      stopConversation();
+    });
+
+    socket.on("disconnect", () => {
+      stopConversation();
+      console.log("A user disconnected");
+    });
+  });
 }