changed for prod
parent
7c7feff90e
commit
4f4f90d7ad
128
src/index.ts
128
src/index.ts
|
@ -1,22 +1,28 @@
|
||||||
import express from 'express';
|
import express from "express";
|
||||||
import path from 'path';
|
import path from "path";
|
||||||
import { Request, Response } from 'express';
|
import { Request, Response } from "express";
|
||||||
import http from 'http';
|
import http from "http";
|
||||||
import { Server } from 'socket.io';
|
import { Server } from "socket.io";
|
||||||
import dotenv from 'dotenv';
|
import dotenv from "dotenv";
|
||||||
import { RealtimeClient, RealtimeUtils } from '@openai/realtime-api-beta';
|
import { RealtimeClient, RealtimeUtils } from "@openai/realtime-api-beta";
|
||||||
import { voiceEngineSocketUser } from './voiceEngine';
|
import { voiceEngineSocketUser } from "./voiceEngine";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
const app = express();
|
const app = express();
|
||||||
const PORT = 50269;
|
|
||||||
|
|
||||||
const publicFolder = path.join(__dirname, 'public');
|
if (!process.env.PORT) {
|
||||||
|
console.error("Please set the PORT environment variable");
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
const PORT = parseInt(process.env.PORT, 10);
|
||||||
|
|
||||||
|
const publicFolder = path.join(__dirname, "public");
|
||||||
app.use(express.static(publicFolder));
|
app.use(express.static(publicFolder));
|
||||||
|
|
||||||
app.get('/', (req: Request, res: Response) => {
|
app.get("/", (req: Request, res: Response) => {
|
||||||
res.sendFile(path.join(publicFolder, 'index.html'));
|
res.sendFile(path.join(publicFolder, "index.html"));
|
||||||
});
|
});
|
||||||
|
|
||||||
const server = http.createServer(app);
|
const server = http.createServer(app);
|
||||||
|
@ -26,91 +32,108 @@ interface ConversationItem {
|
||||||
[key: string]: any;
|
[key: string]: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
io.on('connection', (socket) => {
|
io.on("connection", (socket) => {
|
||||||
console.log('A user connected');
|
console.log("A user connected");
|
||||||
|
|
||||||
voiceEngineSocketUser(socket);
|
voiceEngineSocketUser(socket);
|
||||||
|
|
||||||
let gptClient: null | RealtimeClient = null;
|
let gptClient: null | RealtimeClient = null;
|
||||||
|
|
||||||
socket.on('start', async (data) => {
|
socket.on("start", async (data) => {
|
||||||
gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
|
gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
|
||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
try {
|
try {
|
||||||
await gptClient.connect();
|
await gptClient.connect();
|
||||||
|
|
||||||
console.log('Connected to OpenAI Realtime API');
|
console.log("Connected to OpenAI Realtime API");
|
||||||
|
|
||||||
socket.on('voice-data', async (audioBuffer) => {
|
socket.on("voice-data", async (audioBuffer) => {
|
||||||
try {
|
try {
|
||||||
console.log('Voice data received');
|
console.log("Voice data received");
|
||||||
|
|
||||||
// Send user audio, must be Int16Array or ArrayBuffer
|
// Send user audio, must be Int16Array or ArrayBuffer
|
||||||
// Default audio format is pcm16 with sample rate of 24,000 Hz
|
// Default audio format is pcm16 with sample rate of 24,000 Hz
|
||||||
|
|
||||||
if (audioBuffer instanceof Float32Array) {
|
if (audioBuffer instanceof Float32Array) {
|
||||||
console.log('Received audio data from the input worklet:');
|
console.log("Received audio data from the input worklet:");
|
||||||
|
|
||||||
if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(audioBuffer));
|
if (gptClient)
|
||||||
|
gptClient.appendInputAudio(
|
||||||
|
RealtimeUtils.floatTo16BitPCM(audioBuffer)
|
||||||
|
);
|
||||||
} else if (audioBuffer instanceof Buffer) {
|
} else if (audioBuffer instanceof Buffer) {
|
||||||
console.log('Received audio data as Buffer:');
|
console.log("Received audio data as Buffer:");
|
||||||
|
|
||||||
// Convert Buffer to ArrayBuffer
|
// Convert Buffer to ArrayBuffer
|
||||||
const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
|
const arrayBuffer = audioBuffer.buffer.slice(
|
||||||
|
audioBuffer.byteOffset,
|
||||||
|
audioBuffer.byteOffset + audioBuffer.byteLength
|
||||||
|
);
|
||||||
|
|
||||||
// Convert ArrayBuffer to Int16Array
|
// Convert ArrayBuffer to Int16Array
|
||||||
const float32Array = new Float32Array(arrayBuffer);
|
const float32Array = new Float32Array(arrayBuffer);
|
||||||
|
|
||||||
if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(float32Array));
|
if (gptClient)
|
||||||
|
gptClient.appendInputAudio(
|
||||||
|
RealtimeUtils.floatTo16BitPCM(float32Array)
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
console.error('Invalid data type received in worklet');
|
console.error("Invalid data type received in worklet");
|
||||||
|
|
||||||
// log the data type and return
|
// log the data type and return
|
||||||
console.log('Data type:', typeof audioBuffer, audioBuffer);
|
console.log("Data type:", typeof audioBuffer, audioBuffer);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error with OpenAI Realtime API:', error);
|
console.error("Error with OpenAI Realtime API:", error);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
gptClient.on('conversation.updated', (event: ConversationItem) => {
|
gptClient.on("conversation.updated", (event: ConversationItem) => {
|
||||||
const { item, delta } = event;
|
const { item, delta } = event;
|
||||||
if (item.content) {
|
if (item.content) {
|
||||||
socket.emit('openai-response', item.content);
|
socket.emit("openai-response", item.content);
|
||||||
}
|
}
|
||||||
console.log('Playing audio response...', delta);
|
console.log("Playing audio response...", delta);
|
||||||
if (delta && delta.audio) {
|
if (delta && delta.audio) {
|
||||||
socket.emit('openai-audio', delta.audio);
|
socket.emit("openai-audio", delta.audio);
|
||||||
}
|
}
|
||||||
console.log('Conversation updated:', event);
|
console.log("Conversation updated:", event);
|
||||||
});
|
});
|
||||||
|
|
||||||
gptClient.on('conversation.item.completed', (event: ConversationItem) => {
|
gptClient.on(
|
||||||
|
"conversation.item.completed",
|
||||||
|
(event: ConversationItem) => {
|
||||||
const { item } = event;
|
const { item } = event;
|
||||||
console.log('Conversation item completed:', item);
|
console.log("Conversation item completed:", item);
|
||||||
|
|
||||||
if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) {
|
if (
|
||||||
console.log('Playing audio response...');
|
item.type === "message" &&
|
||||||
|
item.role === "assistant" &&
|
||||||
|
item.formatted &&
|
||||||
|
item.formatted.audio
|
||||||
|
) {
|
||||||
|
console.log("Playing audio response...");
|
||||||
//socket.emit('openai-audio', item.formatted.audio);
|
//socket.emit('openai-audio', item.formatted.audio);
|
||||||
} else {
|
} else {
|
||||||
console.log('No audio content in this item.');
|
console.log("No audio content in this item.");
|
||||||
}
|
}
|
||||||
});
|
}
|
||||||
|
);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error connecting to OpenAI Realtime API:', error);
|
console.error("Error connecting to OpenAI Realtime API:", error);
|
||||||
}
|
}
|
||||||
})();
|
})();
|
||||||
|
|
||||||
socket.on('disconnect', () => {
|
socket.on("disconnect", () => {
|
||||||
console.log('A user disconnected');
|
console.log("A user disconnected");
|
||||||
|
|
||||||
if (gptClient) gptClient.disconnect();
|
if (gptClient) gptClient.disconnect();
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('end', () => {
|
socket.on("end", () => {
|
||||||
console.log('A user ended the conversation');
|
console.log("A user ended the conversation");
|
||||||
if (gptClient) gptClient.disconnect();
|
if (gptClient) gptClient.disconnect();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -118,17 +141,22 @@ io.on('connection', (socket) => {
|
||||||
instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
|
instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen.
|
||||||
Bitte spreche mit einer ruhigen Stimme.`,
|
Bitte spreche mit einer ruhigen Stimme.`,
|
||||||
});
|
});
|
||||||
gptClient.updateSession({ voice: 'ballad' });
|
gptClient.updateSession({ voice: "ballad" });
|
||||||
gptClient.updateSession({
|
gptClient.updateSession({
|
||||||
turn_detection: { type: 'server_vad', threshold: 0.6, prefix_padding_ms: 300, silence_duration_ms: 500 },
|
turn_detection: {
|
||||||
input_audio_transcription: { model: 'whisper-1' },
|
type: "server_vad",
|
||||||
input_audio_format: 'pcm16',
|
threshold: 0.6,
|
||||||
output_audio_format: 'pcm16',
|
prefix_padding_ms: 300,
|
||||||
|
silence_duration_ms: 500,
|
||||||
|
},
|
||||||
|
input_audio_transcription: { model: "whisper-1" },
|
||||||
|
input_audio_format: "pcm16",
|
||||||
|
output_audio_format: "pcm16",
|
||||||
max_response_output_tokens: 1500,
|
max_response_output_tokens: 1500,
|
||||||
modalities: ['audio', 'text'],
|
modalities: ["audio", "text"],
|
||||||
});
|
});
|
||||||
|
|
||||||
gptClient.on('conversation.updated', (event: ConversationItem) => {
|
gptClient.on("conversation.updated", (event: ConversationItem) => {
|
||||||
const { item, delta } = event;
|
const { item, delta } = event;
|
||||||
if (gptClient) {
|
if (gptClient) {
|
||||||
const items = gptClient.conversation.getItems();
|
const items = gptClient.conversation.getItems();
|
||||||
|
@ -141,6 +169,6 @@ Bitte spreche mit einer ruhigen Stimme.`,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
server.listen(PORT, '127.0.0.1', () => {
|
server.listen(PORT, "127.0.0.1", () => {
|
||||||
console.log(`Server läuft unter http://localhost:${PORT}`);
|
console.log(`Server läuft unter http://localhost:${PORT}`);
|
||||||
});
|
});
|
||||||
|
|
|
@ -1,18 +1,21 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
|
<head>
|
||||||
<head>
|
<meta charset="UTF-8" />
|
||||||
<meta charset="UTF-8">
|
<meta
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
|
name="viewport"
|
||||||
|
content="width=device-width, initial-scale=1.0, user-scalable=no"
|
||||||
|
/>
|
||||||
<!-- Theme color for Chrome, Firefox OS and Opera -->
|
<!-- Theme color for Chrome, Firefox OS and Opera -->
|
||||||
<meta name="theme-color" content="#fd9644">
|
<meta name="theme-color" content="#fd9644" />
|
||||||
<!-- Theme color for Windows Phone -->
|
<!-- Theme color for Windows Phone -->
|
||||||
<meta name="msapplication-navbutton-color" content="#fd9644">
|
<meta name="msapplication-navbutton-color" content="#fd9644" />
|
||||||
<!-- Theme color for iOS Safari -->
|
<!-- Theme color for iOS Safari -->
|
||||||
<meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
|
<meta
|
||||||
<meta name="apple-mobile-web-app-capable" content="yes">
|
name="apple-mobile-web-app-status-bar-style"
|
||||||
|
content="black-translucent"
|
||||||
|
/>
|
||||||
|
<meta name="apple-mobile-web-app-capable" content="yes" />
|
||||||
|
|
||||||
<title>Voice Call with a voice bot</title>
|
<title>Voice Call with a voice bot</title>
|
||||||
<style>
|
<style>
|
||||||
|
@ -22,7 +25,7 @@
|
||||||
align-items: center;
|
align-items: center;
|
||||||
height: 100vh;
|
height: 100vh;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
|
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
|
||||||
}
|
}
|
||||||
|
|
||||||
.center-container {
|
.center-container {
|
||||||
|
@ -30,7 +33,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
.material-button {
|
.material-button {
|
||||||
font-family: 'Roboto', 'Helvetica', 'Arial', sans-serif;
|
font-family: "Roboto", "Helvetica", "Arial", sans-serif;
|
||||||
|
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
padding: 0 16px;
|
padding: 0 16px;
|
||||||
|
@ -49,38 +52,43 @@
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
color: #fff;
|
color: #fff;
|
||||||
background-color: #fd9644;
|
background-color: #fd9644;
|
||||||
box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14), 0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
|
box-shadow: 0 2px 2px 0 rgba(0, 0, 0, 0.14),
|
||||||
|
0 3px 1px -2px rgba(0, 0, 0, 0.2), 0 1px 5px 0 rgba(0, 0, 0, 0.12);
|
||||||
}
|
}
|
||||||
|
|
||||||
.material-button:hover {
|
.material-button:hover {
|
||||||
box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14), 0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
|
box-shadow: 0 4px 5px 0 rgba(0, 0, 0, 0.14),
|
||||||
|
0 1px 10px 0 rgba(0, 0, 0, 0.12), 0 2px 4px -1px rgba(0, 0, 0, 0.2);
|
||||||
}
|
}
|
||||||
|
|
||||||
.material-button:active {
|
.material-button:active {
|
||||||
box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14), 0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
|
box-shadow: 0 8px 10px 1px rgba(0, 0, 0, 0.14),
|
||||||
|
0 3px 14px 2px rgba(0, 0, 0, 0.12), 0 5px 5px -3px rgba(0, 0, 0, 0.2);
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body>
|
<body>
|
||||||
<div class="center-container">
|
<div class="center-container">
|
||||||
<h1>Voice Call with a Voice Bot</h1>
|
<h1>Voice Call with a Voice Bot</h1>
|
||||||
<button id="startCallButton" class="material-button">Start Call</button>
|
<button id="startCallButton" class="material-button">Start Call</button>
|
||||||
<p id="status">Status: Idle</p>
|
<p id="status">Status: Idle</p>
|
||||||
<h3>Input: </h3>
|
<h3>Input:</h3>
|
||||||
<p id="text-input">---</p>
|
<p id="text-input">---</p>
|
||||||
<h3>Output: </h3>
|
<h3>Output:</h3>
|
||||||
<p id="text-output">---</p>
|
<p id="text-output">---</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/socket.io/socket.io.js"></script>
|
<script src="/socket.io/socket.io.js"></script>
|
||||||
<script>
|
<script>
|
||||||
const startCallButton = document.getElementById('startCallButton');
|
const startCallButton = document.getElementById("startCallButton");
|
||||||
const status = document.getElementById('status');
|
const status = document.getElementById("status");
|
||||||
const debugTextInput = document.getElementById('text-input');
|
const debugTextInput = document.getElementById("text-input");
|
||||||
const debugTextOutput = document.getElementById('text-output');
|
const debugTextOutput = document.getElementById("text-output");
|
||||||
|
|
||||||
const socket = io('https://voice-test.ex.umbach.dev/'); // Connect to your server
|
const socket = io(
|
||||||
|
`${window.location.protocol}//${window.location.hostname}`
|
||||||
|
); // Connect to your server
|
||||||
let localStream;
|
let localStream;
|
||||||
let audioContext;
|
let audioContext;
|
||||||
let processor;
|
let processor;
|
||||||
|
@ -88,12 +96,12 @@
|
||||||
|
|
||||||
let audioQueue = [];
|
let audioQueue = [];
|
||||||
|
|
||||||
startCallButton.addEventListener('click', async () => {
|
startCallButton.addEventListener("click", async () => {
|
||||||
if (started) {
|
if (started) {
|
||||||
socket.emit('bot-end');
|
socket.emit("bot-end");
|
||||||
processor.disconnect(audioContext.destination);
|
processor.disconnect(audioContext.destination);
|
||||||
|
|
||||||
localStream.getTracks().forEach(track => track.stop());
|
localStream.getTracks().forEach((track) => track.stop());
|
||||||
localStream = null;
|
localStream = null;
|
||||||
|
|
||||||
audioContext.close();
|
audioContext.close();
|
||||||
|
@ -101,29 +109,31 @@
|
||||||
|
|
||||||
processor = null;
|
processor = null;
|
||||||
|
|
||||||
startCallButton.textContent = 'Start Call';
|
startCallButton.textContent = "Start Call";
|
||||||
status.textContent = 'Status: Call ended';
|
status.textContent = "Status: Call ended";
|
||||||
started = false;
|
started = false;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
started = true;
|
started = true;
|
||||||
|
|
||||||
startCallButton.textContent = 'End Call';
|
startCallButton.textContent = "End Call";
|
||||||
|
|
||||||
status.textContent = 'Status: Starting call...';
|
|
||||||
console.log('Starting call...');
|
|
||||||
|
|
||||||
|
|
||||||
|
status.textContent = "Status: Starting call...";
|
||||||
|
console.log("Starting call...");
|
||||||
|
|
||||||
// Get local audio stream
|
// Get local audio stream
|
||||||
localStream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
localStream = await navigator.mediaDevices.getUserMedia({
|
||||||
console.log('Local audio stream obtained:', localStream);
|
audio: true,
|
||||||
|
});
|
||||||
|
console.log("Local audio stream obtained:", localStream);
|
||||||
|
|
||||||
socket.emit('bot-start');
|
socket.emit("bot-start");
|
||||||
|
|
||||||
// Create AudioContext with sample rate of 24000 Hz
|
// Create AudioContext with sample rate of 24000 Hz
|
||||||
audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 24000 });
|
audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||||
|
sampleRate: 24000,
|
||||||
|
});
|
||||||
const source = audioContext.createMediaStreamSource(localStream);
|
const source = audioContext.createMediaStreamSource(localStream);
|
||||||
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
processor = audioContext.createScriptProcessor(4096, 1, 1);
|
||||||
|
|
||||||
|
@ -131,7 +141,7 @@
|
||||||
processor.onaudioprocess = (event) => {
|
processor.onaudioprocess = (event) => {
|
||||||
const inputData = event.inputBuffer.getChannelData(0);
|
const inputData = event.inputBuffer.getChannelData(0);
|
||||||
|
|
||||||
socket.emit('bot-voice-data', inputData); // Send as ArrayBuffer
|
socket.emit("bot-voice-data", inputData); // Send as ArrayBuffer
|
||||||
|
|
||||||
// echo audio locally
|
// echo audio locally
|
||||||
/*const outputData = event.outputBuffer.getChannelData(0);
|
/*const outputData = event.outputBuffer.getChannelData(0);
|
||||||
|
@ -139,7 +149,6 @@
|
||||||
outputData[sample] = inputData[sample];
|
outputData[sample] = inputData[sample];
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
|
|
||||||
let outputData = event.outputBuffer.getChannelData(0);
|
let outputData = event.outputBuffer.getChannelData(0);
|
||||||
|
|
||||||
if (audioQueue.length > 0) {
|
if (audioQueue.length > 0) {
|
||||||
|
@ -152,70 +161,65 @@
|
||||||
outputData[i] = 0;
|
outputData[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
source.connect(processor);
|
source.connect(processor);
|
||||||
processor.connect(audioContext.destination);
|
processor.connect(audioContext.destination);
|
||||||
|
|
||||||
status.textContent = 'Status: Call started';
|
status.textContent = "Status: Call started";
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('openai-response', (data) => {
|
socket.on("openai-response", (data) => {
|
||||||
console.log('OpenAI response received:', data);
|
console.log("OpenAI response received:", data);
|
||||||
try {
|
try {
|
||||||
response.textContent = 'Response: ' + data[0].transcript;
|
response.textContent = "Response: " + data[0].transcript;
|
||||||
} catch (error) {
|
} catch (error) {}
|
||||||
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('debug-text-input', (data) => {
|
socket.on("debug-text-input", (data) => {
|
||||||
debugTextInput.textContent = data;
|
debugTextInput.textContent = data;
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('debug-text-output', (data) => {
|
socket.on("debug-text-output", (data) => {
|
||||||
debugTextOutput.textContent = data;
|
debugTextOutput.textContent = data;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
let dataSum = 0;
|
let dataSum = 0;
|
||||||
let lastByte = undefined;
|
let lastByte = undefined;
|
||||||
let currentAudioID = "";
|
let currentAudioID = "";
|
||||||
|
|
||||||
socket.on('openai-audio-start', (randomUUID) => {
|
socket.on("openai-audio-start", (randomUUID) => {
|
||||||
console.log('OpenAI audio start:', randomUUID);
|
console.log("OpenAI audio start:", randomUUID);
|
||||||
currentAudioID = randomUUID;
|
currentAudioID = randomUUID;
|
||||||
dataSum = 0;
|
dataSum = 0;
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('openai-audio', async (data, randomUUID) => {
|
socket.on("openai-audio", async (data, randomUUID) => {
|
||||||
console.log('OpenAI audio received:', data, randomUUID);
|
console.log("OpenAI audio received:", data, randomUUID);
|
||||||
if (currentAudioID !== randomUUID) {
|
if (currentAudioID !== randomUUID) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log the received data
|
// Log the received data
|
||||||
console.log('Received data type:', data.constructor.name);
|
console.log("Received data type:", data.constructor.name);
|
||||||
console.log('Received data:', data);
|
console.log("Received data:", data);
|
||||||
|
|
||||||
// Ensure data is an ArrayBuffer
|
// Ensure data is an ArrayBuffer
|
||||||
if (!(data instanceof ArrayBuffer)) {
|
if (!(data instanceof ArrayBuffer)) {
|
||||||
console.error('Received data is not an ArrayBuffer');
|
console.error("Received data is not an ArrayBuffer");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
dataSum += data.byteLength;
|
dataSum += data.byteLength;
|
||||||
console.log('Received data sum:', dataSum);
|
console.log("Received data sum:", dataSum);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Check if there was an odd byte from the previous chunk
|
// Check if there was an odd byte from the previous chunk
|
||||||
if (lastByte !== undefined) {
|
if (lastByte !== undefined) {
|
||||||
// Combine the last byte with the current data
|
// Combine the last byte with the current data
|
||||||
const lastData = new Uint8Array(lastByte.byteLength + data.byteLength);
|
const lastData = new Uint8Array(
|
||||||
|
lastByte.byteLength + data.byteLength
|
||||||
|
);
|
||||||
lastData.set(new Uint8Array(lastByte), 0);
|
lastData.set(new Uint8Array(lastByte), 0);
|
||||||
lastData.set(new Uint8Array(data), lastByte.byteLength);
|
lastData.set(new Uint8Array(data), lastByte.byteLength);
|
||||||
data = lastData;
|
data = lastData;
|
||||||
|
@ -230,34 +234,28 @@
|
||||||
data = data.slice(0, data.byteLength - 1);
|
data = data.slice(0, data.byteLength - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Convert ArrayBuffer to Int16Array
|
// Convert ArrayBuffer to Int16Array
|
||||||
const int16Array = new Int16Array(data);
|
const int16Array = new Int16Array(data);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Normalize PCM16 data to the range [-1, 1]
|
// Normalize PCM16 data to the range [-1, 1]
|
||||||
const float32Array = new Float32Array(int16Array.length);
|
const float32Array = new Float32Array(int16Array.length);
|
||||||
for (let i = 0; i < int16Array.length; i++) {
|
for (let i = 0; i < int16Array.length; i++) {
|
||||||
float32Array[i] = int16Array[i] / 0x7FFF;
|
float32Array[i] = int16Array[i] / 0x7fff;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Normalized data:', float32Array.length);
|
console.log("Normalized data:", float32Array.length);
|
||||||
|
|
||||||
// Add the normalized data to the audio queue
|
// Add the normalized data to the audio queue
|
||||||
for (let i = 0; i < float32Array.length; i++) {
|
for (let i = 0; i < float32Array.length; i++) {
|
||||||
audioQueue.push(float32Array[i]);
|
audioQueue.push(float32Array[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error processing audio data:', error);
|
console.error("Error processing audio data:", error);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('bot-stopSpeaking', () => {
|
socket.on("bot-stopSpeaking", () => {
|
||||||
console.log('Bot stopped speaking');
|
console.log("Bot stopped speaking");
|
||||||
audioQueue = [];
|
audioQueue = [];
|
||||||
|
|
||||||
// Clear the audio queue fade out
|
// Clear the audio queue fade out
|
||||||
|
@ -268,9 +266,7 @@
|
||||||
clearInterval(fadeOut);
|
clearInterval(fadeOut);
|
||||||
}
|
}
|
||||||
}, 100);
|
}, 100);
|
||||||
|
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
|
@ -1,27 +1,27 @@
|
||||||
import { Socket } from 'socket.io';
|
import { Socket } from "socket.io";
|
||||||
import { DefaultEventsMap } from 'socket.io/dist/typed-events';
|
import { DefaultEventsMap } from "socket.io/dist/typed-events";
|
||||||
|
|
||||||
import { createClient, LiveTranscriptionEvents } from '@deepgram/sdk';
|
import { createClient, LiveTranscriptionEvents } from "@deepgram/sdk";
|
||||||
|
|
||||||
import dotenv from 'dotenv';
|
import dotenv from "dotenv";
|
||||||
import { RealtimeUtils } from '@openai/realtime-api-beta';
|
import { RealtimeUtils } from "@openai/realtime-api-beta";
|
||||||
import { ElevenLabsClient } from 'elevenlabs';
|
import { ElevenLabsClient } from "elevenlabs";
|
||||||
import { OptimizeStreamingLatency } from 'elevenlabs/api';
|
import { OptimizeStreamingLatency } from "elevenlabs/api";
|
||||||
import { Writable } from 'stream';
|
import { Writable } from "stream";
|
||||||
import Cartesia from '@cartesia/cartesia-js';
|
import Cartesia from "@cartesia/cartesia-js";
|
||||||
import WS from 'ws';
|
import WS from "ws";
|
||||||
import OpenAI from 'openai';
|
import OpenAI from "openai";
|
||||||
import { ChatCompletionMessageParam } from 'openai/resources';
|
import { ChatCompletionMessageParam } from "openai/resources";
|
||||||
|
|
||||||
import elevenlabs_wss from './elevenlabs_wss';
|
import elevenlabs_wss from "./elevenlabs_wss";
|
||||||
import fs from 'fs';
|
import fs from "fs";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
|
|
||||||
import { AssemblyAI, RealtimeTranscript } from 'assemblyai';
|
import { AssemblyAI, RealtimeTranscript } from "assemblyai";
|
||||||
|
|
||||||
const assemblyAI = new AssemblyAI({
|
const assemblyAI = new AssemblyAI({
|
||||||
apiKey: process.env.ASSEMBLYAI_API_KEY || '',
|
apiKey: process.env.ASSEMBLYAI_API_KEY || "",
|
||||||
});
|
});
|
||||||
|
|
||||||
const cartesia = new Cartesia({
|
const cartesia = new Cartesia({
|
||||||
|
@ -35,8 +35,10 @@ const openai = new OpenAI({
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
global.WebSocket = WS;
|
global.WebSocket = WS;
|
||||||
|
|
||||||
export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>) {
|
export function voiceEngineSocketUser(
|
||||||
let transcript = '';
|
socket: Socket<DefaultEventsMap, DefaultEventsMap, DefaultEventsMap, any>
|
||||||
|
) {
|
||||||
|
let transcript = "";
|
||||||
let currentSpeachTime = 0;
|
let currentSpeachTime = 0;
|
||||||
let currentSpeach: {
|
let currentSpeach: {
|
||||||
charStartTimesMs: number[];
|
charStartTimesMs: number[];
|
||||||
|
@ -44,15 +46,15 @@ export function voiceEngineSocketUser(socket: Socket<DefaultEventsMap, DefaultEv
|
||||||
}[] = [];
|
}[] = [];
|
||||||
|
|
||||||
function logTranscript(prefix: string, text: string) {
|
function logTranscript(prefix: string, text: string) {
|
||||||
transcript += prefix + ':\t' + text + '\n';
|
transcript += prefix + ":\t" + text + "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('A user connected');
|
console.log("A user connected");
|
||||||
|
|
||||||
socket.on('bot-start', async () => {
|
socket.on("bot-start", async () => {
|
||||||
let userMessages: ChatCompletionMessageParam[] = [
|
let userMessages: ChatCompletionMessageParam[] = [
|
||||||
{
|
{
|
||||||
role: 'system',
|
role: "system",
|
||||||
content: `Telefonvertriebler bei Sentrovo
|
content: `Telefonvertriebler bei Sentrovo
|
||||||
Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren.
|
Rolle Du bist Kevin von Sentrovo, einer KI-Agentur, die spezialisierte KI-Dienstleistungen anbietet. Deine Hauptaufgabe besteht darin, potenzielle Kunden durch Outbound-Anrufe zu erreichen und ein Kennenlerngespräch für ein KI-Audit zu vereinbaren. Du kennst die typischen Vorteile von KI-Lösungen für Unternehmen und kannst anschaulich erklären, wie ein Audit dabei hilft, Potenziale zu identifizieren.
|
||||||
|
|
||||||
|
@ -126,53 +128,56 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
];
|
];
|
||||||
|
|
||||||
const vars: { [key: string]: string } = {
|
const vars: { [key: string]: string } = {
|
||||||
Vorname: 'Max',
|
Vorname: "Max",
|
||||||
Nachname: 'Mustermann',
|
Nachname: "Mustermann",
|
||||||
Unternehmen: 'Musterfirma',
|
Unternehmen: "Musterfirma",
|
||||||
Position: 'Geschäftsführer',
|
Position: "Geschäftsführer",
|
||||||
now: new Date().toLocaleDateString(),
|
now: new Date().toLocaleDateString(),
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const message of userMessages) {
|
for (const message of userMessages) {
|
||||||
if (message.content) {
|
if (message.content) {
|
||||||
for (const key in vars) {
|
for (const key in vars) {
|
||||||
if (message.content && message.role === 'system') {
|
if (message.content && message.role === "system") {
|
||||||
if (typeof message.content === 'string') {
|
if (typeof message.content === "string") {
|
||||||
message.content = message.content.replace(new RegExp(`{{${key}}}`, 'g'), vars[key as keyof typeof vars]);
|
message.content = message.content.replace(
|
||||||
|
new RegExp(`{{${key}}}`, "g"),
|
||||||
|
vars[key as keyof typeof vars]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Bot started');
|
console.log("Bot started");
|
||||||
|
|
||||||
async function speakText(text: string) {
|
async function speakText(text: string) {
|
||||||
console.log('Generated message:', text);
|
console.log("Generated message:", text);
|
||||||
|
|
||||||
const time = new Date().getTime();
|
const time = new Date().getTime();
|
||||||
let lastTime = 0;
|
let lastTime = 0;
|
||||||
let firstMessage = true;
|
let firstMessage = true;
|
||||||
const labs11 = await elevenlabs_wss.connect(
|
const labs11 = await elevenlabs_wss.connect(
|
||||||
{
|
{
|
||||||
voice_id: 'N2lVS1w4EtoT3dr4eOWO',
|
voice_id: "N2lVS1w4EtoT3dr4eOWO",
|
||||||
model_id: 'eleven_turbo_v2_5',
|
model_id: "eleven_turbo_v2_5",
|
||||||
|
|
||||||
optimize_streaming_latency: 4,
|
optimize_streaming_latency: 4,
|
||||||
output_format: 'pcm_24000',
|
output_format: "pcm_24000",
|
||||||
language_code: 'de',
|
language_code: "de",
|
||||||
//sync_alignment: true,
|
//sync_alignment: true,
|
||||||
inactivity_timeout: 20,
|
inactivity_timeout: 20,
|
||||||
},
|
},
|
||||||
(data, randomUUID) => {
|
(data, randomUUID) => {
|
||||||
if (!data.audio) return;
|
if (!data.audio) return;
|
||||||
|
|
||||||
const audioBuffer = Buffer.from(data.audio, 'base64');
|
const audioBuffer = Buffer.from(data.audio, "base64");
|
||||||
|
|
||||||
const audioBufferArray = new Uint8Array(audioBuffer);
|
const audioBufferArray = new Uint8Array(audioBuffer);
|
||||||
|
|
||||||
socket.emit('openai-audio', audioBufferArray, randomUUID);
|
socket.emit("openai-audio", audioBufferArray, randomUUID);
|
||||||
console.log('Received audio data from Eleven Labs');
|
console.log("Received audio data from Eleven Labs");
|
||||||
|
|
||||||
if (data.normalizedAlignment) {
|
if (data.normalizedAlignment) {
|
||||||
if (firstMessage) {
|
if (firstMessage) {
|
||||||
|
@ -188,13 +193,17 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
|
|
||||||
const ws11 = labs11.socket;
|
const ws11 = labs11.socket;
|
||||||
|
|
||||||
socket.emit('openai-audio-start', labs11.randomUUID);
|
socket.emit("openai-audio-start", labs11.randomUUID);
|
||||||
|
|
||||||
console.log('Connected to Eleven Labs. Took', new Date().getTime() - time, 'ms');
|
console.log(
|
||||||
|
"Connected to Eleven Labs. Took",
|
||||||
|
new Date().getTime() - time,
|
||||||
|
"ms"
|
||||||
|
);
|
||||||
|
|
||||||
elevenlabs_wss.generate(ws11, {
|
elevenlabs_wss.generate(ws11, {
|
||||||
flush: true,
|
flush: true,
|
||||||
text: text + ' ',
|
text: text + " ",
|
||||||
|
|
||||||
voice_settings: {
|
voice_settings: {
|
||||||
style: 0.5,
|
style: 0.5,
|
||||||
|
@ -205,13 +214,13 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
});
|
});
|
||||||
elevenlabs_wss.generate(ws11, {
|
elevenlabs_wss.generate(ws11, {
|
||||||
//flush: true,
|
//flush: true,
|
||||||
text: '',
|
text: "",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async function generateVoiceMessage() {
|
async function generateVoiceMessage() {
|
||||||
const output = await openai.beta.chat.completions.parse({
|
const output = await openai.beta.chat.completions.parse({
|
||||||
model: 'gpt-4o',
|
model: "gpt-4o",
|
||||||
temperature: 0.8,
|
temperature: 0.8,
|
||||||
//max_completion_tokens: 100,
|
//max_completion_tokens: 100,
|
||||||
messages: [...userMessages],
|
messages: [...userMessages],
|
||||||
|
@ -229,7 +238,7 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
}
|
}
|
||||||
|
|
||||||
function addLastMessageToChat() {
|
function addLastMessageToChat() {
|
||||||
let oldText = '';
|
let oldText = "";
|
||||||
let lastMs = 0;
|
let lastMs = 0;
|
||||||
|
|
||||||
const speakOffset = new Date().getTime() - currentSpeachTime;
|
const speakOffset = new Date().getTime() - currentSpeachTime;
|
||||||
|
@ -251,56 +260,61 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
index++;
|
index++;
|
||||||
}
|
}
|
||||||
|
|
||||||
lastMs = alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
|
lastMs =
|
||||||
|
alignment.charStartTimesMs[alignment.charStartTimesMs.length - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (inrerrupt) {
|
if (inrerrupt) {
|
||||||
oldText += ' ... **ABGEBROCHEN**';
|
oldText += " ... **ABGEBROCHEN**";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oldText) {
|
if (oldText) {
|
||||||
addMessageToUser({
|
addMessageToUser({
|
||||||
role: 'assistant',
|
role: "assistant",
|
||||||
content: oldText,
|
content: oldText,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
logTranscript('AI', oldText);
|
logTranscript("AI", oldText);
|
||||||
}
|
}
|
||||||
|
|
||||||
speakText('Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?');
|
speakText(
|
||||||
|
"Hallo hier ist Kevin von Sentrovo, spreche ich hier mit Herr Mustermann?"
|
||||||
|
);
|
||||||
|
|
||||||
const transcriber = assemblyAI.realtime.transcriber({
|
const transcriber = assemblyAI.realtime.transcriber({
|
||||||
sampleRate: 16_000,
|
sampleRate: 16_000,
|
||||||
encoding: 'pcm_s16le',
|
encoding: "pcm_s16le",
|
||||||
});
|
});
|
||||||
|
|
||||||
transcriber.on('open', ({ sessionId }) => {
|
transcriber.on("open", ({ sessionId }) => {
|
||||||
console.log(`Session opened with ID: ${sessionId}`);
|
console.log(`Session opened with ID: ${sessionId}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
transcriber.on('error', (error: Error) => {
|
transcriber.on("error", (error: Error) => {
|
||||||
console.error('Error:', error);
|
console.error("Error:", error);
|
||||||
});
|
});
|
||||||
|
|
||||||
transcriber.on('close', (code: number, reason: string) => console.log('Session closed:', code, reason));
|
transcriber.on("close", (code: number, reason: string) =>
|
||||||
|
console.log("Session closed:", code, reason)
|
||||||
|
);
|
||||||
|
|
||||||
transcriber.on('transcript', (transcript: RealtimeTranscript) => {
|
transcriber.on("transcript", (transcript: RealtimeTranscript) => {
|
||||||
if (!transcript.text) {
|
if (!transcript.text) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (transcript.message_type === 'PartialTranscript') {
|
if (transcript.message_type === "PartialTranscript") {
|
||||||
console.log('Partial:', transcript.text);
|
console.log("Partial:", transcript.text);
|
||||||
} else {
|
} else {
|
||||||
console.log('Final:', transcript.text);
|
console.log("Final:", transcript.text);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log('Connecting to real-time transcript service');
|
console.log("Connecting to real-time transcript service");
|
||||||
await transcriber.connect();
|
await transcriber.connect();
|
||||||
|
|
||||||
console.log('Starting recording');
|
console.log("Starting recording");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
{
|
{
|
||||||
|
@ -338,9 +352,12 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
|
|
||||||
// STEP 4: Fetch the audio stream and send it to the live transcription connection
|
// STEP 4: Fetch the audio stream and send it to the live transcription connection
|
||||||
|
|
||||||
socket.on('bot-voice-data', (audioBuffer: any) => {
|
socket.on("bot-voice-data", (audioBuffer: any) => {
|
||||||
// Convert Buffer to ArrayBuffer
|
// Convert Buffer to ArrayBuffer
|
||||||
const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength);
|
const arrayBuffer = audioBuffer.buffer.slice(
|
||||||
|
audioBuffer.byteOffset,
|
||||||
|
audioBuffer.byteOffset + audioBuffer.byteLength
|
||||||
|
);
|
||||||
|
|
||||||
// Convert ArrayBuffer to Int16Array
|
// Convert ArrayBuffer to Int16Array
|
||||||
const float32Array = new Float32Array(arrayBuffer);
|
const float32Array = new Float32Array(arrayBuffer);
|
||||||
|
@ -355,11 +372,11 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
});
|
});
|
||||||
|
|
||||||
function stopConversation() {
|
function stopConversation() {
|
||||||
console.log('Ending conversation');
|
console.log("Ending conversation");
|
||||||
addLastMessageToChat();
|
addLastMessageToChat();
|
||||||
|
|
||||||
//dgConnection.disconnect();
|
//dgConnection.disconnect();
|
||||||
socket.emit('bot-stopSpeaking');
|
socket.emit("bot-stopSpeaking");
|
||||||
|
|
||||||
// write the transcript to the file
|
// write the transcript to the file
|
||||||
const uuid = new Date().getTime();
|
const uuid = new Date().getTime();
|
||||||
|
@ -370,13 +387,13 @@ Hinweis: Der Ton bleibt freundlich und entspannt, um Vertrauen aufzubauen und di
|
||||||
fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
|
fs.writeFileSync(`transcripts/${uuid}/transcript.txt`, transcript);
|
||||||
}
|
}
|
||||||
|
|
||||||
socket.on('bot-end', () => {
|
socket.on("bot-end", () => {
|
||||||
stopConversation();
|
stopConversation();
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('disconnect', () => {
|
socket.on("disconnect", () => {
|
||||||
stopConversation();
|
stopConversation();
|
||||||
console.log('A user disconnected');
|
console.log("A user disconnected");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue