changed for prod

main
alex 2024-11-19 19:58:28 +01:00
parent 7c7feff90e
commit 4f4f90d7ad
3 changed files with 622 additions and 581 deletions

View File

@ -1,22 +1,28 @@
import express from 'express'; import express from "express";
import path from 'path'; import path from "path";
import { Request, Response } from 'express'; import { Request, Response } from "express";
import http from 'http'; import http from "http";
import { Server } from 'socket.io'; import { Server } from "socket.io";
import dotenv from 'dotenv'; import dotenv from "dotenv";
import { RealtimeClient, RealtimeUtils } from '@openai/realtime-api-beta'; import { RealtimeClient, RealtimeUtils } from "@openai/realtime-api-beta";
import { voiceEngineSocketUser } from './voiceEngine'; import { voiceEngineSocketUser } from "./voiceEngine";
dotenv.config(); dotenv.config();
const app = express(); const app = express();
const PORT = 50269;
const publicFolder = path.join(__dirname, 'public'); if (!process.env.PORT) {
console.error("Please set the PORT environment variable");
process.exit(1);
}
const PORT = parseInt(process.env.PORT, 10);
const publicFolder = path.join(__dirname, "public");
app.use(express.static(publicFolder)); app.use(express.static(publicFolder));
app.get('/', (req: Request, res: Response) => { app.get("/", (req: Request, res: Response) => {
res.sendFile(path.join(publicFolder, 'index.html')); res.sendFile(path.join(publicFolder, "index.html"));
}); });
const server = http.createServer(app); const server = http.createServer(app);
@ -26,91 +32,108 @@ interface ConversationItem {
[key: string]: any; [key: string]: any;
} }
io.on('connection', (socket) => { io.on("connection", (socket) => {
console.log('A user connected'); console.log("A user connected");
voiceEngineSocketUser(socket); voiceEngineSocketUser(socket);
let gptClient: null | RealtimeClient = null; let gptClient: null | RealtimeClient = null;
socket.on('start', async (data) => { socket.on("start", async (data) => {
gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY }); gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
(async () => { (async () => {
try { try {
await gptClient.connect(); await gptClient.connect();
console.log('Connected to OpenAI Realtime API'); console.log("Connected to OpenAI Realtime API");
socket.on('voice-data', async (audioBuffer) => { socket.on("voice-data", async (audioBuffer) => {
try { try {
console.log('Voice data received'); console.log("Voice data received");
// Send user audio, must be Int16Array or ArrayBuffer // Send user audio, must be Int16Array or ArrayBuffer
// Default audio format is pcm16 with sample rate of 24,000 Hz // Default audio format is pcm16 with sample rate of 24,000 Hz
if (audioBuffer instanceof Float32Array) { if (audioBuffer instanceof Float32Array) {
console.log('Received audio data from the input worklet:'); console.log("Received audio data from the input worklet:");
if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(audioBuffer)); if (gptClient)
gptClient.appendInputAudio(
RealtimeUtils.floatTo16BitPCM(audioBuffer)
);
} else if (audioBuffer instanceof Buffer) { } else if (audioBuffer instanceof Buffer) {
console.log('Received audio data as Buffer:'); console.log("Received audio data as Buffer:");
// Convert Buffer to ArrayBuffer // Convert Buffer to ArrayBuffer
const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength); const arrayBuffer = audioBuffer.buffer.slice(
audioBuffer.byteOffset,
audioBuffer.byteOffset + audioBuffer.byteLength
);
// Convert ArrayBuffer to Int16Array // Convert ArrayBuffer to Int16Array
const float32Array = new Float32Array(arrayBuffer); const float32Array = new Float32Array(arrayBuffer);
if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(float32Array)); if (gptClient)
gptClient.appendInputAudio(
RealtimeUtils.floatTo16BitPCM(float32Array)
);
} else { } else {
console.error('Invalid data type received in worklet'); console.error("Invalid data type received in worklet");
// log the data type and return // log the data type and return
console.log('Data type:', typeof audioBuffer, audioBuffer); console.log("Data type:", typeof audioBuffer, audioBuffer);
return; return;
} }
} catch (error) { } catch (error) {
console.error('Error with OpenAI Realtime API:', error); console.error("Error with OpenAI Realtime API:", error);
} }
}); });
gptClient.on('conversation.updated', (event: ConversationItem) => { gptClient.on("conversation.updated", (event: ConversationItem) => {
const { item, delta } = event; const { item, delta } = event;
if (item.content) { if (item.content) {
socket.emit('openai-response', item.content); socket.emit("openai-response", item.content);
} }
console.log('Playing audio response...', delta); console.log("Playing audio response...", delta);
if (delta && delta.audio) { if (delta && delta.audio) {
socket.emit('openai-audio', delta.audio); socket.emit("openai-audio", delta.audio);
} }
console.log('Conversation updated:', event); console.log("Conversation updated:", event);
}); });
gptClient.on('conversation.item.completed', (event: ConversationItem) => { gptClient.on(
"conversation.item.completed",
(event: ConversationItem) => {
const { item } = event; const { item } = event;
console.log('Conversation item completed:', item); console.log("Conversation item completed:", item);
if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) { if (
console.log('Playing audio response...'); item.type === "message" &&
item.role === "assistant" &&
item.formatted &&
item.formatted.audio
) {
console.log("Playing audio response...");
//socket.emit('openai-audio', item.formatted.audio); //socket.emit('openai-audio', item.formatted.audio);
} else { } else {
console.log('No audio content in this item.'); console.log("No audio content in this item.");
} }
}); }
);
} catch (error) { } catch (error) {
console.error('Error connecting to OpenAI Realtime API:', error); console.error("Error connecting to OpenAI Realtime API:", error);
} }
})(); })();
socket.on('disconnect', () => { socket.on("disconnect", () => {
console.log('A user disconnected'); console.log("A user disconnected");
if (gptClient) gptClient.disconnect(); if (gptClient) gptClient.disconnect();
}); });
socket.on('end', () => { socket.on("end", () => {
console.log('A user ended the conversation'); console.log("A user ended the conversation");
if (gptClient) gptClient.disconnect(); if (gptClient) gptClient.disconnect();
}); });
@ -118,17 +141,22 @@ io.on('connection', (socket) => {
instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen. instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
Bitte spreche mit einer ruhigen Stimme.`, Bitte spreche mit einer ruhigen Stimme.`,
}); });
gptClient.updateSession({ voice: 'ballad' }); gptClient.updateSession({ voice: "ballad" });
gptClient.updateSession({ gptClient.updateSession({
turn_detection: { type: 'server_vad', threshold: 0.6, prefix_padding_ms: 300, silence_duration_ms: 500 }, turn_detection: {
input_audio_transcription: { model: 'whisper-1' }, type: "server_vad",
input_audio_format: 'pcm16', threshold: 0.6,
output_audio_format: 'pcm16', prefix_padding_ms: 300,
silence_duration_ms: 500,
},
input_audio_transcription: { model: "whisper-1" },
input_audio_format: "pcm16",
output_audio_format: "pcm16",
max_response_output_tokens: 1500, max_response_output_tokens: 1500,
modalities: ['audio', 'text'], modalities: ["audio", "text"],
}); });
gptClient.on('conversation.updated', (event: ConversationItem) => { gptClient.on("conversation.updated", (event: ConversationItem) => {
const { item, delta } = event; const { item, delta } = event;
if (gptClient) { if (gptClient) {
const items = gptClient.conversation.getItems(); const items = gptClient.conversation.getItems();
@ -141,6 +169,6 @@ Bitte spreche mit einer ruhigen Stimme.`,
}); });
}); });
server.listen(PORT, '127.0.0.1', () => { server.listen(PORT, "127.0.0.1", () => {
console.log(`Server läuft unter http://localhost:${PORT}`); console.log(`Server läuft unter http://localhost:${PORT}`);
}); });

View File

@ -1,18 +1,21 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head>
<head> <meta charset="UTF-8" />
<meta charset="UTF-8"> <meta
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no"> name="viewport"
content="width=device-width, initial-scale=1.0, user-scalable=no"
/>
<!-- Theme color for Chrome, Firefox OS and Opera --> <!-- Theme color for Chrome, Firefox OS and Opera -->
<meta name="theme-color" content="#fd9644"> <meta name="theme-color" content="#fd9644" />
<!-- Theme color for Windows Phone --> <!-- Theme color for Windows Phone -->
<meta name="msapplication-navbutton-color" content="#fd9644"> <meta name="msapplication-navbutton-color" content="#fd9644" />
<!-- Theme color for iOS Safari --> <!-- Theme color for iOS Safari -->
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent"> <meta
<meta name="apple-mobile-web-app-capable" content="yes"> name="apple-mobile-web-app-status-bar-style"
content="black-translucent"
/>
<meta name="apple-mobile-web-app-capable" content="yes" />
<title>Voice Call with a voice bot</title> <title>Voice Call with a voice bot</title>
<style> <style>
@ -22,7 +25,7 @@
align-items: center; align-items: center;
height: 100vh; height: 100vh;
margin: 0; margin: 0;
font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif; font-family: "Roboto", "Helvetica", "Arial", sans-serif;
} }
.center-container { .center-container {
@ -30,7 +33,7 @@
} }
.material-button { .material-button {
font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif; font-family: "Roboto", "Helvetica", "Arial", sans-serif;
display: inline-block; display: inline-block;
padding: 0 16px; padding: 0 16px;
@ -49,38 +52,43 @@
cursor: pointer; cursor: pointer;
color: #fff; color: #fff;
background-color: #fd9644; background-color: #fd9644;
box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12); box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
} }
.material-button:hover { .material-button:hover {
box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14), 0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2); box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
} }
.material-button:active { .material-button:active {
box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14), 0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2); box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
} }
</style> </style>
</head> </head>
<body> <body>
<div class="center-container"> <div class="center-container">
<h1>Voice Call with a Voice Bot</h1> <h1>Voice Call with a Voice Bot</h1>
<button id="startCallButton" class="material-button">Start Call</button> <button id="startCallButton" class="material-button">Start Call</button>
<p id="status">Status: Idle</p> <p id="status">Status: Idle</p>
<h3>Input: </h3> <h3>Input:</h3>
<p id="text-input">---</p> <p id="text-input">---</p>
<h3>Output: </h3> <h3>Output:</h3>
<p id="text-output">---</p> <p id="text-output">---</p>
</div> </div>
<script src="/socket.io/socket.io.js"></script> <script src="/socket.io/socket.io.js"></script>
<script> <script>
const startCallButton = document.getElementById('startCallButton'); const startCallButton = document.getElementById("startCallButton");
const status = document.getElementById('status'); const status = document.getElementById("status");
const debugTextInput = document.getElementById('text-input'); const debugTextInput = document.getElementById("text-input");
const debugTextOutput = document.getElementById('text-output'); const debugTextOutput = document.getElementById("text-output");
const socket = io('https://voice-test.ex.umbach.dev/'); // Connect to your server const socket = io(
`${window.location.protocol}//${window.location.hostname}`
); // Connect to your server
let localStream; let localStream;
let audioContext; let audioContext;
let processor; let processor;
@ -88,12 +96,12 @@
let audioQueue = []; let audioQueue = [];
startCallButton.addEventListener('click', async () => { startCallButton.addEventListener("click", async () => {
if (started) { if (started) {
socket.emit('bot-end'); socket.emit("bot-end");
processor.disconnect(audioContext.destination); processor.disconnect(audioContext.destination);
localStream.getTracks().forEach(track => track.stop()); localStream.getTracks().forEach((track) => track.stop());
localStream = null; localStream = null;
audioContext.close(); audioContext.close();
@ -101,29 +109,31 @@
processor = null; processor = null;
startCallButton.textContent = 'Start Call'; startCallButton.textContent = "Start Call";
status.textContent = 'Status: Call ended'; status.textContent = "Status: Call ended";
started = false; started = false;
return; return;
} }
started = true; started = true;
startCallButton.textContent = 'End Call'; startCallButton.textContent = "End Call";
status.textContent = 'Status: Starting call...';
console.log('Starting call...');
status.textContent = "Status: Starting call...";
console.log("Starting call...");
// Get local audio stream // Get local audio stream
localStream = await navigator.mediaDevices.getUserMedia({ audio: true }); localStream = await navigator.mediaDevices.getUserMedia({
console.log('Local audio stream obtained:', localStream); audio: true,
});
console.log("Local audio stream obtained:", localStream);
socket.emit('bot-start'); socket.emit("bot-start");
// Create AudioContext with sample rate of 24000 Hz // Create AudioContext with sample rate of 24000 Hz
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 }); audioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 24000,
});
const source = audioContext.createMediaStreamSource(localStream); const source = audioContext.createMediaStreamSource(localStream);
processor = audioContext.createScriptProcessor(4096, 1, 1); processor = audioContext.createScriptProcessor(4096, 1, 1);
@ -131,7 +141,7 @@
processor.onaudioprocess = (event) => { processor.onaudioprocess = (event) => {
const inputData = event.inputBuffer.getChannelData(0); const inputData = event.inputBuffer.getChannelData(0);
socket.emit('bot-voice-data', inputData); // Send as ArrayBuffer socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
// echo audio locally // echo audio locally
/*const outputData = event.outputBuffer.getChannelData(0); /*const outputData = event.outputBuffer.getChannelData(0);
@ -139,7 +149,6 @@
outputData[sample] = inputData[sample]; outputData[sample] = inputData[sample];
}*/ }*/
let outputData = event.outputBuffer.getChannelData(0); let outputData = event.outputBuffer.getChannelData(0);
if (audioQueue.length > 0) { if (audioQueue.length > 0) {
@ -152,70 +161,65 @@
outputData[i] = 0; outputData[i] = 0;
} }
} }
}; };
source.connect(processor); source.connect(processor);
processor.connect(audioContext.destination); processor.connect(audioContext.destination);
status.textContent = 'Status: Call started'; status.textContent = "Status: Call started";
}); });
socket.on('openai-response', (data) => { socket.on("openai-response", (data) => {
console.log('OpenAI response received:', data); console.log("OpenAI response received:", data);
try { try {
response.textContent = 'Response: ' + data[0].transcript; response.textContent = "Response: " + data[0].transcript;
} catch (error) { } catch (error) {}
}
}); });
socket.on('debug-text-input', (data) => { socket.on("debug-text-input", (data) => {
debugTextInput.textContent = data; debugTextInput.textContent = data;
}); });
socket.on('debug-text-output', (data) => { socket.on("debug-text-output", (data) => {
debugTextOutput.textContent = data; debugTextOutput.textContent = data;
}); });
let dataSum = 0; let dataSum = 0;
let lastByte = undefined; let lastByte = undefined;
let currentAudioID = ""; let currentAudioID = "";
socket.on('openai-audio-start', (randomUUID) => { socket.on("openai-audio-start", (randomUUID) => {
console.log('OpenAI audio start:', randomUUID); console.log("OpenAI audio start:", randomUUID);
currentAudioID = randomUUID; currentAudioID = randomUUID;
dataSum = 0; dataSum = 0;
}); });
socket.on('openai-audio', async (data, randomUUID) => { socket.on("openai-audio", async (data, randomUUID) => {
console.log('OpenAI audio received:', data, randomUUID); console.log("OpenAI audio received:", data, randomUUID);
if (currentAudioID !== randomUUID) { if (currentAudioID !== randomUUID) {
return; return;
} }
// Log the received data // Log the received data
console.log('Received data type:', data.constructor.name); console.log("Received data type:", data.constructor.name);
console.log('Received data:', data); console.log("Received data:", data);
// Ensure data is an ArrayBuffer // Ensure data is an ArrayBuffer
if (!(data instanceof ArrayBuffer)) { if (!(data instanceof ArrayBuffer)) {
console.error('Received data is not an ArrayBuffer'); console.error("Received data is not an ArrayBuffer");
return; return;
} }
dataSum += data.byteLength; dataSum += data.byteLength;
console.log('Received data sum:', dataSum); console.log("Received data sum:", dataSum);
try { try {
// Check if there was an odd byte from the previous chunk // Check if there was an odd byte from the previous chunk
if (lastByte !== undefined) { if (lastByte !== undefined) {
// Combine the last byte with the current data // Combine the last byte with the current data
const lastData = new Uint8Array(lastByte.byteLength + data.byteLength); const lastData = new Uint8Array(
lastByte.byteLength + data.byteLength
);
lastData.set(new Uint8Array(lastByte), 0); lastData.set(new Uint8Array(lastByte), 0);
lastData.set(new Uint8Array(data), lastByte.byteLength); lastData.set(new Uint8Array(data), lastByte.byteLength);
data = lastData; data = lastData;
@ -230,34 +234,28 @@
data = data.slice(0, data.byteLength - 1); data = data.slice(0, data.byteLength - 1);
} }
// Convert ArrayBuffer to Int16Array // Convert ArrayBuffer to Int16Array
const int16Array = new Int16Array(data); const int16Array = new Int16Array(data);
// Normalize PCM16 data to the range [-1, 1] // Normalize PCM16 data to the range [-1, 1]
const float32Array = new Float32Array(int16Array.length); const float32Array = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) { for (let i = 0; i < int16Array.length; i++) {
float32Array[i] = int16Array[i] / 0x7FFF; float32Array[i] = int16Array[i] / 0x7fff;
} }
console.log('Normalized data:', float32Array.length); console.log("Normalized data:", float32Array.length);
// Add the normalized data to the audio queue // Add the normalized data to the audio queue
for (let i = 0; i < float32Array.length; i++) { for (let i = 0; i < float32Array.length; i++) {
audioQueue.push(float32Array[i]); audioQueue.push(float32Array[i]);
} }
} catch (error) { } catch (error) {
console.error('Error processing audio data:', error); console.error("Error processing audio data:", error);
} }
}); });
socket.on('bot-stopSpeaking', () => { socket.on("bot-stopSpeaking", () => {
console.log('Bot stopped speaking'); console.log("Bot stopped speaking");
audioQueue = []; audioQueue = [];
// Clear the audio queue fade out // Clear the audio queue fade out
@ -268,9 +266,7 @@
clearInterval(fadeOut); clearInterval(fadeOut);
} }
}, 100); }, 100);
}); });
</script> </script>
</body> </body>
</html> </html>

View File

@ -1,27 +1,27 @@
import { Socket } from 'socket.io'; import { Socket } from "socket.io";
import { DefaultEventsMap } from 'socket.io/dist/typed-events'; import { DefaultEventsMap } from "socket.io/dist/typed-events";
import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk'; import { createClient, LiveTranscriptionEvents } from "@deepgram/sdk";
import dotenv from 'dotenv'; import dotenv from "dotenv";
import { RealtimeUtils } from '@openai/realtime-api-beta'; import { RealtimeUtils } from "@openai/realtime-api-beta";
import { ElevenLabsClient } from 'elevenlabs'; import { ElevenLabsClient } from "elevenlabs";
import { OptimizeStreamingLatency } from 'elevenlabs/api'; import { OptimizeStreamingLatency } from "elevenlabs/api";
import { Writable } from 'stream'; import { Writable } from "stream";
import Cartesia from '@cartesia/cartesia-js'; import Cartesia from "@cartesia/cartesia-js";
import WS from 'ws'; import WS from "ws";
import OpenAI from 'openai'; import OpenAI from "openai";
import { ChatCompletionMessageParam } from 'openai/resources'; import { ChatCompletionMessageParam } from "openai/resources";
import elevenlabs_wss from './elevenlabs_wss'; import elevenlabs_wss from "./elevenlabs_wss";
import fs from 'fs'; import fs from "fs";
dotenv.config(); dotenv.config();
import { AssemblyAI, RealtimeTranscript } from 'assemblyai'; import { AssemblyAI, RealtimeTranscript } from "assemblyai";
const assemblyAI = new AssemblyAI({ const assemblyAI = new AssemblyAI({
apiKey: process.env.ASSEMBLYAI_API_KEY || '', apiKey: process.env.ASSEMBLYAI_API_KEY || "",
}); });
const cartesia = new Cartesia({ const cartesia = new Cartesia({
@ -35,8 +35,10 @@ const openai = new OpenAI({
// @ts-ignore // @ts-ignore
global.WebSocket = WS; global.WebSocket = WS;
export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>) { export function voiceEngineSocketUser(
let transcript = ''; socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>
) {
let transcript = "";
let currentSpeachTime = 0; let currentSpeachTime = 0;
let currentSpeach: { let currentSpeach: {
charStartTimesMs: number[]; charStartTimesMs: number[];
@ -44,15 +46,15 @@ export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEv
}[] = []; }[] = [];
function logTranscript(prefix: string, text: string) { function logTranscript(prefix: string, text: string) {
transcript += prefix + ':\t' + text + '\n'; transcript += prefix + ":\t" + text + "\n";
} }
console.log('A user connected'); console.log("A user connected");
socket.on('bot-start', async () => { socket.on("bot-start", async () => {
let userMessages: ChatCompletionMessageParam[] = [ let userMessages: ChatCompletionMessageParam[] = [
{ {
role: 'system', role: "system",
content: `Telefonvertriebler bei Sentrovo content: `Telefonvertriebler bei Sentrovo
Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren. Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren.
@ -126,53 +128,56 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
]; ];
const vars: { [key: string]: string } = { const vars: { [key: string]: string } = {
Vorname: 'Max', Vorname: "Max",
Nachname: 'Mustermann', Nachname: "Mustermann",
Unternehmen: 'Musterfirma', Unternehmen: "Musterfirma",
Position: 'Geschäftsführer', Position: "Geschäftsführer",
now: new Date().toLocaleDateString(), now: new Date().toLocaleDateString(),
}; };
for (const message of userMessages) { for (const message of userMessages) {
if (message.content) { if (message.content) {
for (const key in vars) { for (const key in vars) {
if (message.content && message.role === 'system') { if (message.content && message.role === "system") {
if (typeof message.content === 'string') { if (typeof message.content === "string") {
message.content = message.content.replace(new RegExp(`{{${key}}}`, 'g'), vars[key as keyof typeof vars]); message.content = message.content.replace(
new RegExp(`{{${key}}}`, "g"),
vars[key as keyof typeof vars]
);
} }
} }
} }
} }
} }
console.log('Bot started'); console.log("Bot started");
async function speakText(text: string) { async function speakText(text: string) {
console.log('Generated message:', text); console.log("Generated message:", text);
const time = new Date().getTime(); const time = new Date().getTime();
let lastTime = 0; let lastTime = 0;
let firstMessage = true; let firstMessage = true;
const labs11 = await elevenlabs_wss.connect( const labs11 = await elevenlabs_wss.connect(
{ {
voice_id: 'N2lVS1w4EtoT3dr4eOWO', voice_id: "N2lVS1w4EtoT3dr4eOWO",
model_id: 'eleven_turbo_v2_5', model_id: "eleven_turbo_v2_5",
optimize_streaming_latency: 4, optimize_streaming_latency: 4,
output_format: 'pcm_24000', output_format: "pcm_24000",
language_code: 'de', language_code: "de",
//sync_alignment: true, //sync_alignment: true,
inactivity_timeout: 20, inactivity_timeout: 20,
}, },
(data, randomUUID) => { (data, randomUUID) => {
if (!data.audio) return; if (!data.audio) return;
const audioBuffer = Buffer.from(data.audio, 'base64'); const audioBuffer = Buffer.from(data.audio, "base64");
const audioBufferArray = new Uint8Array(audioBuffer); const audioBufferArray = new Uint8Array(audioBuffer);
socket.emit('openai-audio', audioBufferArray, randomUUID); socket.emit("openai-audio", audioBufferArray, randomUUID);
console.log('Received audio data from Eleven Labs'); console.log("Received audio data from Eleven Labs");
if (data.normalizedAlignment) { if (data.normalizedAlignment) {
if (firstMessage) { if (firstMessage) {
@ -188,13 +193,17 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
const ws11 = labs11.socket; const ws11 = labs11.socket;
socket.emit('openai-audio-start', labs11.randomUUID); socket.emit("openai-audio-start", labs11.randomUUID);
console.log('Connected to Eleven Labs. Took', new Date().getTime() - time, 'ms'); console.log(
"Connected to Eleven Labs. Took",
new Date().getTime() - time,
"ms"
);
elevenlabs_wss.generate(ws11, { elevenlabs_wss.generate(ws11, {
flush: true, flush: true,
text: text + ' ', text: text + " ",
voice_settings: { voice_settings: {
style: 0.5, style: 0.5,
@ -205,13 +214,13 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
}); });
elevenlabs_wss.generate(ws11, { elevenlabs_wss.generate(ws11, {
//flush: true, //flush: true,
text: '', text: "",
}); });
} }
async function generateVoiceMessage() { async function generateVoiceMessage() {
const output = await openai.beta.chat.completions.parse({ const output = await openai.beta.chat.completions.parse({
model: 'gpt-4o', model: "gpt-4o",
temperature: 0.8, temperature: 0.8,
//max_completion_tokens: 100, //max_completion_tokens: 100,
messages: [...userMessages], messages: [...userMessages],
@ -229,7 +238,7 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
} }
function addLastMessageToChat() { function addLastMessageToChat() {
let oldText = ''; let oldText = "";
let lastMs = 0; let lastMs = 0;
const speakOffset = new Date().getTime() - currentSpeachTime; const speakOffset = new Date().getTime() - currentSpeachTime;
@ -251,56 +260,61 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
index++; index++;
} }
lastMs = alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1]; lastMs =
alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
} }
if (inrerrupt) { if (inrerrupt) {
oldText += ' ... **ABGEBROCHEN**'; oldText += " ... **ABGEBROCHEN**";
} }
if (oldText) { if (oldText) {
addMessageToUser({ addMessageToUser({
role: 'assistant', role: "assistant",
content: oldText, content: oldText,
}); });
} }
logTranscript('AI', oldText); logTranscript("AI", oldText);
} }
speakText('Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?'); speakText(
"Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?"
);
const transcriber = assemblyAI.realtime.transcriber({ const transcriber = assemblyAI.realtime.transcriber({
sampleRate: 16_000, sampleRate: 16_000,
encoding: 'pcm_s16le', encoding: "pcm_s16le",
}); });
transcriber.on('open', ({ sessionId }) => { transcriber.on("open", ({ sessionId }) => {
console.log(`Session opened with ID: ${sessionId}`); console.log(`Session opened with ID: ${sessionId}`);
}); });
transcriber.on('error', (error: Error) => { transcriber.on("error", (error: Error) => {
console.error('Error:', error); console.error("Error:", error);
}); });
transcriber.on('close', (code: number, reason: string) => console.log('Session closed:', code, reason)); transcriber.on("close", (code: number, reason: string) =>
console.log("Session closed:", code, reason)
);
transcriber.on('transcript', (transcript: RealtimeTranscript) => { transcriber.on("transcript", (transcript: RealtimeTranscript) => {
if (!transcript.text) { if (!transcript.text) {
return; return;
} }
if (transcript.message_type === 'PartialTranscript') { if (transcript.message_type === "PartialTranscript") {
console.log('Partial:', transcript.text); console.log("Partial:", transcript.text);
} else { } else {
console.log('Final:', transcript.text); console.log("Final:", transcript.text);
} }
}); });
console.log('Connecting to real-time transcript service'); console.log("Connecting to real-time transcript service");
await transcriber.connect(); await transcriber.connect();
console.log('Starting recording'); console.log("Starting recording");
/* /*
{ {
@ -338,9 +352,12 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
// STEP 4: Fetch the audio stream and send it to the live transcription connection // STEP 4: Fetch the audio stream and send it to the live transcription connection
socket.on('bot-voice-data', (audioBuffer: any) => { socket.on("bot-voice-data", (audioBuffer: any) => {
// Convert Buffer to ArrayBuffer // Convert Buffer to ArrayBuffer
const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength); const arrayBuffer = audioBuffer.buffer.slice(
audioBuffer.byteOffset,
audioBuffer.byteOffset + audioBuffer.byteLength
);
// Convert ArrayBuffer to Int16Array // Convert ArrayBuffer to Int16Array
const float32Array = new Float32Array(arrayBuffer); const float32Array = new Float32Array(arrayBuffer);
@ -355,11 +372,11 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
}); });
function stopConversation() { function stopConversation() {
console.log('Ending conversation'); console.log("Ending conversation");
addLastMessageToChat(); addLastMessageToChat();
//dgConnection.disconnect(); //dgConnection.disconnect();
socket.emit('bot-stopSpeaking'); socket.emit("bot-stopSpeaking");
// write the transcript to the file // write the transcript to the file
const uuid = new Date().getTime(); const uuid = new Date().getTime();
@ -370,13 +387,13 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript); fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
} }
socket.on('bot-end', () => { socket.on("bot-end", () => {
stopConversation(); stopConversation();
}); });
socket.on('disconnect', () => { socket.on("disconnect", () => {
stopConversation(); stopConversation();
console.log('A user disconnected'); console.log("A user disconnected");
}); });
}); });
} }