fixed double voices

main
alex 2024-11-19 21:59:23 +01:00
parent 4f4f90d7ad
commit 1764633554
2 changed files with 260 additions and 250 deletions

View File

@ -1,272 +1,273 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta
name="viewport"
content="width=device-width, initial-scale=1.0, user-scalable=no"
/>
<!-- Theme color for Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#fd9644" />
<!-- Theme color for Windows Phone -->
<meta name="msapplication-navbutton-color" content="#fd9644" />
<!-- Theme color for iOS Safari -->
<meta
name="apple-mobile-web-app-status-bar-style"
content="black-translucent"
/>
<meta name="apple-mobile-web-app-capable" content="yes" />
<title>Voice Call with a voice bot</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no" />
<!-- Theme color for Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#fd9644" />
<!-- Theme color for Windows Phone -->
<meta name="msapplication-navbutton-color" content="#fd9644" />
<!-- Theme color for iOS Safari -->
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent" />
<meta name="apple-mobile-web-app-capable" content="yes" />
<title>Voice Call with a voice bot</title>
<style>
body {
display: flex;
justify-content: center;
align-items: center;
height: 100vh;
margin: 0;
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
}
.center-container {
text-align: center;
}
.material-button {
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
display: inline-block;
padding: 0 16px;
height: 36px;
font-size: 14px;
font-weight: 500;
line-height: 36px;
border-radius: 4px;
text-transform: uppercase;
text-align: center;
letter-spacing: 0.5px;
overflow: hidden;
will-change: box-shadow;
transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
border: none;
cursor: pointer;
color: #fff;
background-color: #fd9644;
box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
}
.material-button:hover {
box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
}
.material-button:active {
box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
}
</style>
</head>
<body>
<div class="center-container">
<h1>Voice Call with a Voice Bot</h1>
<button id="startCallButton" class="material-button">Start Call</button>
<p id="status">Status: Idle</p>
<h3>Input:</h3>
<p id="text-input">---</p>
<h3>Output:</h3>
<p id="text-output">---</p>
</div>
<script src="/socket.io/socket.io.js"></script>
<script>
const startCallButton = document.getElementById("startCallButton");
const status = document.getElementById("status");
const debugTextInput = document.getElementById("text-input");
const debugTextOutput = document.getElementById("text-output");
const socket = io(
`${window.location.protocol}//${window.location.hostname}`
); // Connect to your server
let localStream;
let audioContext;
let processor;
let started = false;
let audioQueue = [];
startCallButton.addEventListener("click", async () => {
if (started) {
socket.emit("bot-end");
processor.disconnect(audioContext.destination);
localStream.getTracks().forEach((track) => track.stop());
localStream = null;
audioContext.close();
audioContext = null;
processor = null;
startCallButton.textContent = "Start Call";
status.textContent = "Status: Call ended";
started = false;
return;
}
.center-container {
text-align: center;
started = true;
startCallButton.textContent = "End Call";
status.textContent = "Status: Starting call...";
console.log("Starting call...");
// Get local audio stream
localStream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
console.log("Local audio stream obtained:", localStream);
socket.emit("bot-start");
// Create AudioContext with sample rate of 24000 Hz
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 24000,
});
const source = audioContext.createMediaStreamSource(localStream);
processor = audioContext.createScriptProcessor(4096, 1, 1);
// Process audio data
processor.onaudioprocess = (event) => {
const inputData = event.inputBuffer.getChannelData(0);
socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
// echo audio locally
/*const outputData = event.outputBuffer.getChannelData(0);
for (let sample = 0; sample < inputData.length; sample++) {
outputData[sample] = inputData[sample];
}*/
let outputData = event.outputBuffer.getChannelData(0);
if (audioQueue.length > 0) {
const chunk = audioQueue.splice(0, outputData.length);
for (let i = 0; i < chunk.length; i++) {
outputData[i] = chunk[i];
}
} else {
for (let i = 0; i < outputData.length; i++) {
outputData[i] = 0;
}
}
};
source.connect(processor);
processor.connect(audioContext.destination);
status.textContent = "Status: Call started";
});
socket.on("openai-response", (data) => {
console.log("OpenAI response received:", data);
try {
response.textContent = "Response: " + data[0].transcript;
} catch (error) { }
});
socket.on("debug-text-input", (data) => {
debugTextInput.textContent = data;
});
socket.on("debug-text-output", (data) => {
debugTextOutput.textContent = data;
});
let dataSum = 0;
let lastByte = undefined;
let currentAudioID = "";
socket.on("openai-audio-start", (randomUUID) => {
console.log("OpenAI audio start:", randomUUID);
currentAudioID = randomUUID;
dataSum = 0;
stopAudio();
});
socket.on("openai-audio", async (data, randomUUID) => {
console.log("OpenAI audio received:", data, randomUUID);
if (currentAudioID !== randomUUID) {
return;
}
.material-button {
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
// Log the received data
console.log("Received data type:", data.constructor.name);
console.log("Received data:", data);
display: inline-block;
padding: 0 16px;
height: 36px;
font-size: 14px;
font-weight: 500;
line-height: 36px;
border-radius: 4px;
text-transform: uppercase;
text-align: center;
letter-spacing: 0.5px;
overflow: hidden;
will-change: box-shadow;
transition: box-shadow 0.2s cubic-bezier(0.4, 0, 0.2, 1);
border: none;
cursor: pointer;
color: #fff;
background-color: #fd9644;
box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
// Ensure data is an ArrayBuffer
if (!(data instanceof ArrayBuffer)) {
console.error("Received data is not an ArrayBuffer");
return;
}
.material-button:hover {
box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
}
dataSum += data.byteLength;
console.log("Received data sum:", dataSum);
.material-button:active {
box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
}
</style>
</head>
<body>
<div class="center-container">
<h1>Voice Call with a Voice Bot</h1>
<button id="startCallButton" class="material-button">Start Call</button>
<p id="status">Status: Idle</p>
<h3>Input:</h3>
<p id="text-input">---</p>
<h3>Output:</h3>
<p id="text-output">---</p>
</div>
<script src="/socket.io/socket.io.js"></script>
<script>
const startCallButton = document.getElementById("startCallButton");
const status = document.getElementById("status");
const debugTextInput = document.getElementById("text-input");
const debugTextOutput = document.getElementById("text-output");
const socket = io(
`${window.location.protocol}//${window.location.hostname}`
); // Connect to your server
let localStream;
let audioContext;
let processor;
let started = false;
let audioQueue = [];
startCallButton.addEventListener("click", async () => {
if (started) {
socket.emit("bot-end");
processor.disconnect(audioContext.destination);
localStream.getTracks().forEach((track) => track.stop());
localStream = null;
audioContext.close();
audioContext = null;
processor = null;
startCallButton.textContent = "Start Call";
status.textContent = "Status: Call ended";
started = false;
return;
try {
// Check if there was an odd byte from the previous chunk
if (lastByte !== undefined) {
// Combine the last byte with the current data
const lastData = new Uint8Array(
lastByte.byteLength + data.byteLength
);
lastData.set(new Uint8Array(lastByte), 0);
lastData.set(new Uint8Array(data), lastByte.byteLength);
data = lastData;
lastByte = undefined; // Reset lastByte since it has been processed
}
started = true;
startCallButton.textContent = "End Call";
status.textContent = "Status: Starting call...";
console.log("Starting call...");
// Get local audio stream
localStream = await navigator.mediaDevices.getUserMedia({
audio: true,
});
console.log("Local audio stream obtained:", localStream);
socket.emit("bot-start");
// Create AudioContext with sample rate of 24000 Hz
audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 24000,
});
const source = audioContext.createMediaStreamSource(localStream);
processor = audioContext.createScriptProcessor(4096, 1, 1);
// Process audio data
processor.onaudioprocess = (event) => {
const inputData = event.inputBuffer.getChannelData(0);
socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
// echo audio locally
/*const outputData = event.outputBuffer.getChannelData(0);
for (let sample = 0; sample < inputData.length; sample++) {
outputData[sample] = inputData[sample];
}*/
let outputData = event.outputBuffer.getChannelData(0);
if (audioQueue.length > 0) {
const chunk = audioQueue.splice(0, outputData.length);
for (let i = 0; i < chunk.length; i++) {
outputData[i] = chunk[i];
}
} else {
for (let i = 0; i < outputData.length; i++) {
outputData[i] = 0;
}
}
};
source.connect(processor);
processor.connect(audioContext.destination);
status.textContent = "Status: Call started";
});
socket.on("openai-response", (data) => {
console.log("OpenAI response received:", data);
try {
response.textContent = "Response: " + data[0].transcript;
} catch (error) {}
});
socket.on("debug-text-input", (data) => {
debugTextInput.textContent = data;
});
socket.on("debug-text-output", (data) => {
debugTextOutput.textContent = data;
});
let dataSum = 0;
let lastByte = undefined;
let currentAudioID = "";
socket.on("openai-audio-start", (randomUUID) => {
console.log("OpenAI audio start:", randomUUID);
currentAudioID = randomUUID;
dataSum = 0;
});
socket.on("openai-audio", async (data, randomUUID) => {
console.log("OpenAI audio received:", data, randomUUID);
if (currentAudioID !== randomUUID) {
return;
// Check if the combined data has an odd number of bytes
if (data.byteLength % 2 !== 0) {
// Save the last byte for the next chunk
lastByte = data.slice(data.byteLength - 1);
// Remove the last byte from the current data
data = data.slice(0, data.byteLength - 1);
}
// Log the received data
console.log("Received data type:", data.constructor.name);
console.log("Received data:", data);
// Convert ArrayBuffer to Int16Array
const int16Array = new Int16Array(data);
// Ensure data is an ArrayBuffer
if (!(data instanceof ArrayBuffer)) {
console.error("Received data is not an ArrayBuffer");
return;
// Normalize PCM16 data to the range [-1, 1]
const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / 0x7fff;
}
dataSum += data.byteLength;
console.log("Received data sum:", dataSum);
console.log("Normalized data:", float32Array.length);
try {
// Check if there was an odd byte from the previous chunk
if (lastByte !== undefined) {
// Combine the last byte with the current data
const lastData = new Uint8Array(
lastByte.byteLength + data.byteLength
);
lastData.set(new Uint8Array(lastByte), 0);
lastData.set(new Uint8Array(data), lastByte.byteLength);
data = lastData;
lastByte = undefined; // Reset lastByte since it has been processed
}
// Check if the combined data has an odd number of bytes
if (data.byteLength % 2 !== 0) {
// Save the last byte for the next chunk
lastByte = data.slice(data.byteLength - 1);
// Remove the last byte from the current data
data = data.slice(0, data.byteLength - 1);
}
// Convert ArrayBuffer to Int16Array
const int16Array = new Int16Array(data);
// Normalize PCM16 data to the range [-1, 1]
const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / 0x7fff;
}
console.log("Normalized data:", float32Array.length);
// Add the normalized data to the audio queue
for (let i = 0; i < float32Array.length; i++) {
audioQueue.push(float32Array[i]);
}
} catch (error) {
console.error("Error processing audio data:", error);
// Add the normalized data to the audio queue
for (let i = 0; i < float32Array.length; i++) {
audioQueue.push(float32Array[i]);
}
});
} catch (error) {
console.error("Error processing audio data:", error);
}
});
socket.on("bot-stopSpeaking", () => {
console.log("Bot stopped speaking");
audioQueue = [];
socket.on("bot-stopSpeaking", () => {
console.log("Bot stopped speaking");
stopAudio();
});
// Clear the audio queue fade out
const fadeOut = setInterval(() => {
if (audioQueue.length > 0) {
audioQueue = audioQueue.slice(0, audioQueue.length - 1);
} else {
clearInterval(fadeOut);
}
}, 100);
});
</script>
</body>
</html>
function stopAudio() {
audioQueue = [];
// Clear the audio queue fade out
const fadeOut = setInterval(() => {
if (audioQueue.length > 0) {
audioQueue = audioQueue.slice(0, audioQueue.length - 1);
} else {
clearInterval(fadeOut);
}
}, 100);
}
</script>
</body>
</html>

View File

@ -0,0 +1,9 @@
AI: Hallo hier ist Jan von Jannex, spreche ich hier mit Herr Mustermann?
User: ja babahaard
AI: Super, dass ich Sie erreiche! Ich rufe an, um kurz über KI-Möglichkeiten für Ihr Unternehmen zu sprechen. Hätten Sie kurz Zeit?
User: ja was gibt's denn
AI: Gibt es Bereiche bei Ihnen, wo Sie sich mehr Effizienz oder weniger Aufwand wünschen würden? KI kann da oft schon sehr hilfreich sein.
User: ja ich hab da zwar schon mal eine website gemacht aber
AI: Ah, ... **ABGEBROCHEN**
User: ich glaub jetzt haben wir jetzt nicht so wirklich mit dafür
AI: Total verständlich. Viele unserer Kunden waren anfangs auch skeptisch und haben trotzdem Potenziale gefunden. Ein kurzes Gespräch schadet nicht und könnte schon interessante Ansätze zeigen. Wie wäre es, wenn wir einfach mal ein KI-Audit machen, um mögliche Chancen auszuloten?