fixed double voices

2024-11-19 21:59:23 +01:00 · 2024-11-19 21:59:23 +01:00 · 1764633554
parent 4f4f90d7ad
commit 1764633554
2 changed files with 260 additions and 250 deletions
--- a/src/public/index.html
+++ b/src/public/index.html
@ -1,272 +1,273 @@
 <!DOCTYPE html>
 <html lang="en">
-  <head>
-    <meta charset="UTF-8" />
-    <meta
-      name="viewport"
-      content="width=device-width, initial-scale=1.0, user-scalable=no"
-    />
-    <!-- Theme color for Chrome, Firefox OS and Opera -->
-    <meta name="theme-color" content="#fd9644" />
-    <!-- Theme color for Windows Phone -->
-    <meta name="msapplication-navbutton-color" content="#fd9644" />
-    <!-- Theme color for iOS Safari -->
-    <meta
-      name="apple-mobile-web-app-status-bar-style"
-      content="black-translucent"
-    />
-    <meta name="apple-mobile-web-app-capable" content="yes" />

-    <title>Voice Call with a voice bot</title>
-    <style>
-      body {
-        display: flex;
-        justify-content: center;
-        align-items: center;
-        height: 100vh;
-        margin: 0;
-        font-family: "Roboto", "Helvetica", "Arial", sans-serif;
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
+  <!-- Theme color for Chrome, Firefox OS and Opera -->
+  <meta name="theme-color" content="#fd9644" />
+  <!-- Theme color for Windows Phone -->
+  <meta name="msapplication-navbutton-color" content="#fd9644" />
+  <!-- Theme color for iOS Safari -->
+  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
+  <meta name="apple-mobile-web-app-capable" content="yes" />
+
+  <title>Voice Call with a voice bot</title>
+  <style>
+    body {
+      display: flex;
+      justify-content: center;
+      align-items: center;
+      height: 100vh;
+      margin: 0;
+      font-family: "Roboto", "Helvetica", "Arial", sans-serif;
+    }
+
+    .center-container {
+      text-align: center;
+    }
+
+    .material-button {
+      font-family: "Roboto", "Helvetica", "Arial", sans-serif;
+
+      display: inline-block;
+      padding: 0 16px;
+      height: 36px;
+      font-size: 14px;
+      font-weight: 500;
+      line-height: 36px;
+      border-radius: 4px;
+      text-transform: uppercase;
+      text-align: center;
+      letter-spacing: 0.5px;
+      overflow: hidden;
+      will-change: box-shadow;
+      transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
+      border: none;
+      cursor: pointer;
+      color: #fff;
+      background-color: #fd9644;
+      box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
+        0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
+    }
+
+    .material-button:hover {
+      box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
+        0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
+    }
+
+    .material-button:active {
+      box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
+        0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
+    }
+  </style>
+</head>
+
+<body>
+  <div class="center-container">
+    <h1>Voice Call with a Voice Bot</h1>
+    <button id="startCallButton" class="material-button">Start Call</button>
+    <p id="status">Status: Idle</p>
+    <h3>Input:</h3>
+    <p id="text-input">---</p>
+    <h3>Output:</h3>
+    <p id="text-output">---</p>
+  </div>
+
+  <script src="/socket.io/socket.io.js"></script>
+  <script>
+    const startCallButton = document.getElementById("startCallButton");
+    const status = document.getElementById("status");
+    const debugTextInput = document.getElementById("text-input");
+    const debugTextOutput = document.getElementById("text-output");
+
+    const socket = io(
+      `${window.location.protocol}//${window.location.hostname}`
+    ); // Connect to your server
+    let localStream;
+    let audioContext;
+    let processor;
+    let started = false;
+
+    let audioQueue = [];
+
+    startCallButton.addEventListener("click", async () => {
+      if (started) {
+        socket.emit("bot-end");
+        processor.disconnect(audioContext.destination);
+
+        localStream.getTracks().forEach((track) => track.stop());
+        localStream = null;
+
+        audioContext.close();
+        audioContext = null;
+
+        processor = null;
+
+        startCallButton.textContent = "Start Call";
+        status.textContent = "Status: Call ended";
+        started = false;
+        return;
      }

-      .center-container {
-        text-align: center;
+      started = true;
+
+      startCallButton.textContent = "End Call";
+
+      status.textContent = "Status: Starting call...";
+      console.log("Starting call...");
+
+      // Get local audio stream
+      localStream = await navigator.mediaDevices.getUserMedia({
+        audio: true,
+      });
+      console.log("Local audio stream obtained:", localStream);
+
+      socket.emit("bot-start");
+
+      // Create AudioContext with sample rate of 24000 Hz
+      audioContext = new (window.AudioContext || window.webkitAudioContext)({
+        sampleRate: 24000,
+      });
+      const source = audioContext.createMediaStreamSource(localStream);
+      processor = audioContext.createScriptProcessor(4096, 1, 1);
+
+      // Process audio data
+      processor.onaudioprocess = (event) => {
+        const inputData = event.inputBuffer.getChannelData(0);
+
+        socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
+
+        // echo audio locally
+        /*const outputData = event.outputBuffer.getChannelData(0);
+      for (let sample = 0; sample < inputData.length; sample++) {
+        outputData[sample] = inputData[sample];
+      }*/
+
+        let outputData = event.outputBuffer.getChannelData(0);
+
+        if (audioQueue.length > 0) {
+          const chunk = audioQueue.splice(0, outputData.length);
+          for (let i = 0; i < chunk.length; i++) {
+            outputData[i] = chunk[i];
+          }
+        } else {
+          for (let i = 0; i < outputData.length; i++) {
+            outputData[i] = 0;
+          }
+        }
+      };
+
+      source.connect(processor);
+      processor.connect(audioContext.destination);
+
+      status.textContent = "Status: Call started";
+    });
+
+    socket.on("openai-response", (data) => {
+      console.log("OpenAI response received:", data);
+      try {
+        response.textContent = "Response: " + data[0].transcript;
+      } catch (error) { }
+    });
+
+    socket.on("debug-text-input", (data) => {
+      debugTextInput.textContent = data;
+    });
+
+    socket.on("debug-text-output", (data) => {
+      debugTextOutput.textContent = data;
+    });
+
+    let dataSum = 0;
+    let lastByte = undefined;
+    let currentAudioID = "";
+
+    socket.on("openai-audio-start", (randomUUID) => {
+      console.log("OpenAI audio start:", randomUUID);
+      currentAudioID = randomUUID;
+      dataSum = 0;
+      stopAudio();
+    });
+
+    socket.on("openai-audio", async (data, randomUUID) => {
+      console.log("OpenAI audio received:", data, randomUUID);
+      if (currentAudioID !== randomUUID) {
+        return;
      }

-      .material-button {
-        font-family: "Roboto", "Helvetica", "Arial", sans-serif;
+      // Log the received data
+      console.log("Received data type:", data.constructor.name);
+      console.log("Received data:", data);

-        display: inline-block;
-        padding: 0 16px;
-        height: 36px;
-        font-size: 14px;
-        font-weight: 500;
-        line-height: 36px;
-        border-radius: 4px;
-        text-transform: uppercase;
-        text-align: center;
-        letter-spacing: 0.5px;
-        overflow: hidden;
-        will-change: box-shadow;
-        transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
-        border: none;
-        cursor: pointer;
-        color: #fff;
-        background-color: #fd9644;
-        box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
-          0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
+      // Ensure data is an ArrayBuffer
+      if (!(data instanceof ArrayBuffer)) {
+        console.error("Received data is not an ArrayBuffer");
+        return;
      }

-      .material-button:hover {
-        box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
-          0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
-      }
+      dataSum += data.byteLength;
+      console.log("Received data sum:", dataSum);

-      .material-button:active {
-        box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
-          0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
-      }
-    </style>
-  </head>
-
-  <body>
-    <div class="center-container">
-      <h1>Voice Call with a Voice Bot</h1>
-      <button id="startCallButton" class="material-button">Start Call</button>
-      <p id="status">Status: Idle</p>
-      <h3>Input:</h3>
-      <p id="text-input">---</p>
-      <h3>Output:</h3>
-      <p id="text-output">---</p>
-    </div>
-
-    <script src="/socket.io/socket.io.js"></script>
-    <script>
-      const startCallButton = document.getElementById("startCallButton");
-      const status = document.getElementById("status");
-      const debugTextInput = document.getElementById("text-input");
-      const debugTextOutput = document.getElementById("text-output");
-
-      const socket = io(
-        `${window.location.protocol}//${window.location.hostname}`
-      ); // Connect to your server
-      let localStream;
-      let audioContext;
-      let processor;
-      let started = false;
-
-      let audioQueue = [];
-
-      startCallButton.addEventListener("click", async () => {
-        if (started) {
-          socket.emit("bot-end");
-          processor.disconnect(audioContext.destination);
-
-          localStream.getTracks().forEach((track) => track.stop());
-          localStream = null;
-
-          audioContext.close();
-          audioContext = null;
-
-          processor = null;
-
-          startCallButton.textContent = "Start Call";
-          status.textContent = "Status: Call ended";
-          started = false;
-          return;
+      try {
+        // Check if there was an odd byte from the previous chunk
+        if (lastByte !== undefined) {
+          // Combine the last byte with the current data
+          const lastData = new Uint8Array(
+            lastByte.byteLength + data.byteLength
+          );
+          lastData.set(new Uint8Array(lastByte), 0);
+          lastData.set(new Uint8Array(data), lastByte.byteLength);
+          data = lastData;
+          lastByte = undefined; // Reset lastByte since it has been processed
        }

-        started = true;
-
-        startCallButton.textContent = "End Call";
-
-        status.textContent = "Status: Starting call...";
-        console.log("Starting call...");
-
-        // Get local audio stream
-        localStream = await navigator.mediaDevices.getUserMedia({
-          audio: true,
-        });
-        console.log("Local audio stream obtained:", localStream);
-
-        socket.emit("bot-start");
-
-        // Create AudioContext with sample rate of 24000 Hz
-        audioContext = new (window.AudioContext || window.webkitAudioContext)({
-          sampleRate: 24000,
-        });
-        const source = audioContext.createMediaStreamSource(localStream);
-        processor = audioContext.createScriptProcessor(4096, 1, 1);
-
-        // Process audio data
-        processor.onaudioprocess = (event) => {
-          const inputData = event.inputBuffer.getChannelData(0);
-
-          socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
-
-          // echo audio locally
-          /*const outputData = event.outputBuffer.getChannelData(0);
-        for (let sample = 0; sample < inputData.length; sample++) {
-          outputData[sample] = inputData[sample];
-        }*/
-
-          let outputData = event.outputBuffer.getChannelData(0);
-
-          if (audioQueue.length > 0) {
-            const chunk = audioQueue.splice(0, outputData.length);
-            for (let i = 0; i < chunk.length; i++) {
-              outputData[i] = chunk[i];
-            }
-          } else {
-            for (let i = 0; i < outputData.length; i++) {
-              outputData[i] = 0;
-            }
-          }
-        };
-
-        source.connect(processor);
-        processor.connect(audioContext.destination);
-
-        status.textContent = "Status: Call started";
-      });
-
-      socket.on("openai-response", (data) => {
-        console.log("OpenAI response received:", data);
-        try {
-          response.textContent = "Response: " + data[0].transcript;
-        } catch (error) {}
-      });
-
-      socket.on("debug-text-input", (data) => {
-        debugTextInput.textContent = data;
-      });
-
-      socket.on("debug-text-output", (data) => {
-        debugTextOutput.textContent = data;
-      });
-
-      let dataSum = 0;
-      let lastByte = undefined;
-      let currentAudioID = "";
-
-      socket.on("openai-audio-start", (randomUUID) => {
-        console.log("OpenAI audio start:", randomUUID);
-        currentAudioID = randomUUID;
-        dataSum = 0;
-      });
-
-      socket.on("openai-audio", async (data, randomUUID) => {
-        console.log("OpenAI audio received:", data, randomUUID);
-        if (currentAudioID !== randomUUID) {
-          return;
+        // Check if the combined data has an odd number of bytes
+        if (data.byteLength % 2 !== 0) {
+          // Save the last byte for the next chunk
+          lastByte = data.slice(data.byteLength - 1);
+          // Remove the last byte from the current data
+          data = data.slice(0, data.byteLength - 1);
        }

-        // Log the received data
-        console.log("Received data type:", data.constructor.name);
-        console.log("Received data:", data);
+        // Convert ArrayBuffer to Int16Array
+        const int16Array = new Int16Array(data);

-        // Ensure data is an ArrayBuffer
-        if (!(data instanceof ArrayBuffer)) {
-          console.error("Received data is not an ArrayBuffer");
-          return;
+        // Normalize PCM16 data to the range [-1, 1]
+        const float32Array = new Float32Array(int16Array.length);
+        for (let i = 0; i < int16Array.length; i++) {
+          float32Array[i] = int16Array[i] / 0x7fff;
        }

-        dataSum += data.byteLength;
-        console.log("Received data sum:", dataSum);
+        console.log("Normalized data:", float32Array.length);

-        try {
-          // Check if there was an odd byte from the previous chunk
-          if (lastByte !== undefined) {
-            // Combine the last byte with the current data
-            const lastData = new Uint8Array(
-              lastByte.byteLength + data.byteLength
-            );
-            lastData.set(new Uint8Array(lastByte), 0);
-            lastData.set(new Uint8Array(data), lastByte.byteLength);
-            data = lastData;
-            lastByte = undefined; // Reset lastByte since it has been processed
-          }
-
-          // Check if the combined data has an odd number of bytes
-          if (data.byteLength % 2 !== 0) {
-            // Save the last byte for the next chunk
-            lastByte = data.slice(data.byteLength - 1);
-            // Remove the last byte from the current data
-            data = data.slice(0, data.byteLength - 1);
-          }
-
-          // Convert ArrayBuffer to Int16Array
-          const int16Array = new Int16Array(data);
-
-          // Normalize PCM16 data to the range [-1, 1]
-          const float32Array = new Float32Array(int16Array.length);
-          for (let i = 0; i < int16Array.length; i++) {
-            float32Array[i] = int16Array[i] / 0x7fff;
-          }
-
-          console.log("Normalized data:", float32Array.length);
-
-          // Add the normalized data to the audio queue
-          for (let i = 0; i < float32Array.length; i++) {
-            audioQueue.push(float32Array[i]);
-          }
-        } catch (error) {
-          console.error("Error processing audio data:", error);
+        // Add the normalized data to the audio queue
+        for (let i = 0; i < float32Array.length; i++) {
+          audioQueue.push(float32Array[i]);
        }
-      });
+      } catch (error) {
+        console.error("Error processing audio data:", error);
+      }
+    });

-      socket.on("bot-stopSpeaking", () => {
-        console.log("Bot stopped speaking");
-        audioQueue = [];
+    socket.on("bot-stopSpeaking", () => {
+      console.log("Bot stopped speaking");
+      stopAudio();
+    });
+
+    function stopAudio() {
+      audioQueue = [];
+
+      // Clear the audio queue fade out
+      const fadeOut = setInterval(() => {
+        if (audioQueue.length > 0) {
+          audioQueue = audioQueue.slice(0, audioQueue.length - 1);
+        } else {
+          clearInterval(fadeOut);
+        }
+      }, 100);
+    }
+  </script>
+</body>

-        // Clear the audio queue fade out
-        const fadeOut = setInterval(() => {
-          if (audioQueue.length > 0) {
-            audioQueue = audioQueue.slice(0, audioQueue.length - 1);
-          } else {
-            clearInterval(fadeOut);
-          }
-        }, 100);
-      });
-    </script>
-  </body>
 </html>
--- a/transcripts/1732049017145/transcript.txt
+++ b/transcripts/1732049017145/transcript.txt
@ -0,0 +1,9 @@
+AI:	 Hallo hier ist Jan von Jannex, spreche ich hier mit Herr Mustermann? 
+User:	 ja babahaard
+AI:	 Super, dass ich Sie erreiche! Ich rufe an, um kurz über KI-Möglichkeiten für Ihr Unternehmen zu sprechen. Hätten Sie kurz Zeit? 
+User:	 ja was gibt's denn
+AI:	 Gibt es Bereiche bei Ihnen, wo Sie sich mehr Effizienz oder weniger Aufwand wünschen würden? KI kann da oft schon sehr hilfreich sein. 
+User:	 ja ich hab da zwar schon mal eine website gemacht aber
+AI:	 Ah,  ... **ABGEBROCHEN**
+User:	 ich glaub jetzt haben wir jetzt nicht so wirklich mit dafür
+AI:	 Total verständlich. Viele unserer Kunden waren anfangs auch skeptisch und haben trotzdem Potenziale gefunden. Ein kurzes Gespräch schadet nicht und könnte schon interessante Ansätze zeigen. Wie wäre es, wenn wir einfach mal ein KI-Audit machen, um mögliche Chancen auszuloten?