diff --git a/src/index.ts b/src/index.ts index e868926..5943a87 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,146 +1,174 @@ -import express from 'express'; -import path from 'path'; -import { Request, Response } from 'express'; -import http from 'http'; -import { Server } from 'socket.io'; -import dotenv from 'dotenv'; -import { RealtimeClient, RealtimeUtils } from '@openai/realtime-api-beta'; -import { voiceEngineSocketUser } from './voiceEngine'; +import express from "express"; +import path from "path"; +import { Request, Response } from "express"; +import http from "http"; +import { Server } from "socket.io"; +import dotenv from "dotenv"; +import { RealtimeClient, RealtimeUtils } from "@openai/realtime-api-beta"; +import { voiceEngineSocketUser } from "./voiceEngine"; dotenv.config(); const app = express(); -const PORT = 50269; -const publicFolder = path.join(__dirname, 'public'); +if (!process.env.PORT) { + console.error("Please set the PORT environment variable"); + process.exit(1); +} + +const PORT = parseInt(process.env.PORT, 10); + +const publicFolder = path.join(__dirname, "public"); app.use(express.static(publicFolder)); -app.get('/', (req: Request, res: Response) => { - res.sendFile(path.join(publicFolder, 'index.html')); +app.get("/", (req: Request, res: Response) => { + res.sendFile(path.join(publicFolder, "index.html")); }); const server = http.createServer(app); const io = new Server(server); interface ConversationItem { - [key: string]: any; + [key: string]: any; } -io.on('connection', (socket) => { - console.log('A user connected'); +io.on("connection", (socket) => { + console.log("A user connected"); - voiceEngineSocketUser(socket); + voiceEngineSocketUser(socket); - let gptClient: null | RealtimeClient = null; + let gptClient: null | RealtimeClient = null; - socket.on('start', async (data) => { - gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY }); + socket.on("start", async (data) => { + gptClient = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY }); - (async () => { - try { - await gptClient.connect(); + (async () => { + try { + await gptClient.connect(); - console.log('Connected to OpenAI Realtime API'); + console.log("Connected to OpenAI Realtime API"); - socket.on('voice-data', async (audioBuffer) => { - try { - console.log('Voice data received'); + socket.on("voice-data", async (audioBuffer) => { + try { + console.log("Voice data received"); - // Send user audio, must be Int16Array or ArrayBuffer - // Default audio format is pcm16 with sample rate of 24,000 Hz + // Send user audio, must be Int16Array or ArrayBuffer + // Default audio format is pcm16 with sample rate of 24,000 Hz - if (audioBuffer instanceof Float32Array) { - console.log('Received audio data from the input worklet:'); + if (audioBuffer instanceof Float32Array) { + console.log("Received audio data from the input worklet:"); - if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(audioBuffer)); - } else if (audioBuffer instanceof Buffer) { - console.log('Received audio data as Buffer:'); + if (gptClient) + gptClient.appendInputAudio( + RealtimeUtils.floatTo16BitPCM(audioBuffer) + ); + } else if (audioBuffer instanceof Buffer) { + console.log("Received audio data as Buffer:"); - // Convert Buffer to ArrayBuffer - const arrayBuffer = audioBuffer.buffer.slice(audioBuffer.byteOffset, audioBuffer.byteOffset + audioBuffer.byteLength); + // Convert Buffer to ArrayBuffer + const arrayBuffer = audioBuffer.buffer.slice( + audioBuffer.byteOffset, + audioBuffer.byteOffset + audioBuffer.byteLength + ); - // Convert ArrayBuffer to Int16Array - const float32Array = new Float32Array(arrayBuffer); + // Convert ArrayBuffer to Int16Array + const float32Array = new Float32Array(arrayBuffer); - if (gptClient) gptClient.appendInputAudio(RealtimeUtils.floatTo16BitPCM(float32Array)); - } else { - console.error('Invalid data type received in worklet'); + if (gptClient) + gptClient.appendInputAudio( + RealtimeUtils.floatTo16BitPCM(float32Array) + ); + } else { + console.error("Invalid data type received in worklet"); - // log the data type and return - console.log('Data type:', typeof audioBuffer, audioBuffer); - return; - } - } catch (error) { - console.error('Error with OpenAI Realtime API:', error); - } - }); - - gptClient.on('conversation.updated', (event: ConversationItem) => { - const { item, delta } = event; - if (item.content) { - socket.emit('openai-response', item.content); - } - console.log('Playing audio response...', delta); - if (delta && delta.audio) { - socket.emit('openai-audio', delta.audio); - } - console.log('Conversation updated:', event); - }); - - gptClient.on('conversation.item.completed', (event: ConversationItem) => { - const { item } = event; - console.log('Conversation item completed:', item); - - if (item.type === 'message' && item.role === 'assistant' && item.formatted && item.formatted.audio) { - console.log('Playing audio response...'); - //socket.emit('openai-audio', item.formatted.audio); - } else { - console.log('No audio content in this item.'); - } - }); - } catch (error) { - console.error('Error connecting to OpenAI Realtime API:', error); + // log the data type and return + console.log("Data type:", typeof audioBuffer, audioBuffer); + return; } - })(); - - socket.on('disconnect', () => { - console.log('A user disconnected'); - - if (gptClient) gptClient.disconnect(); + } catch (error) { + console.error("Error with OpenAI Realtime API:", error); + } }); - socket.on('end', () => { - console.log('A user ended the conversation'); - if (gptClient) gptClient.disconnect(); + gptClient.on("conversation.updated", (event: ConversationItem) => { + const { item, delta } = event; + if (item.content) { + socket.emit("openai-response", item.content); + } + console.log("Playing audio response...", delta); + if (delta && delta.audio) { + socket.emit("openai-audio", delta.audio); + } + console.log("Conversation updated:", event); }); - gptClient.updateSession({ - instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen. -Bitte spreche mit einer ruhigen Stimme.`, - }); - gptClient.updateSession({ voice: 'ballad' }); - gptClient.updateSession({ - turn_detection: { type: 'server_vad', threshold: 0.6, prefix_padding_ms: 300, silence_duration_ms: 500 }, - input_audio_transcription: { model: 'whisper-1' }, - input_audio_format: 'pcm16', - output_audio_format: 'pcm16', - max_response_output_tokens: 1500, - modalities: ['audio', 'text'], - }); + gptClient.on( + "conversation.item.completed", + (event: ConversationItem) => { + const { item } = event; + console.log("Conversation item completed:", item); - gptClient.on('conversation.updated', (event: ConversationItem) => { - const { item, delta } = event; - if (gptClient) { - const items = gptClient.conversation.getItems(); + if ( + item.type === "message" && + item.role === "assistant" && + item.formatted && + item.formatted.audio + ) { + console.log("Playing audio response..."); + //socket.emit('openai-audio', item.formatted.audio); + } else { + console.log("No audio content in this item."); } - // Handle the updated conversation items - }); + } + ); + } catch (error) { + console.error("Error connecting to OpenAI Realtime API:", error); + } + })(); - //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]); - gptClient.createResponse(); + socket.on("disconnect", () => { + console.log("A user disconnected"); + + if (gptClient) gptClient.disconnect(); }); + + socket.on("end", () => { + console.log("A user ended the conversation"); + if (gptClient) gptClient.disconnect(); + }); + + gptClient.updateSession({ + instructions: `Du bist beim Kundensupport von Jannex und möchtest eine Erinnerung für ein Termin nachfragen. +Bitte spreche mit einer ruhigen Stimme.`, + }); + gptClient.updateSession({ voice: "ballad" }); + gptClient.updateSession({ + turn_detection: { + type: "server_vad", + threshold: 0.6, + prefix_padding_ms: 300, + silence_duration_ms: 500, + }, + input_audio_transcription: { model: "whisper-1" }, + input_audio_format: "pcm16", + output_audio_format: "pcm16", + max_response_output_tokens: 1500, + modalities: ["audio", "text"], + }); + + gptClient.on("conversation.updated", (event: ConversationItem) => { + const { item, delta } = event; + if (gptClient) { + const items = gptClient.conversation.getItems(); + } + // Handle the updated conversation items + }); + + //gptClient.sendUserMessageContent([{ type: 'input_text', text: `Wie geht es dir?` }]); + gptClient.createResponse(); + }); }); -server.listen(PORT, '127.0.0.1', () => { - console.log(`Server läuft unter http://localhost:${PORT}`); +server.listen(PORT, "127.0.0.1", () => { + console.log(`Server läuft unter http://localhost:${PORT}`); }); diff --git a/src/public/index.html b/src/public/index.html index f6ec391..8e48a79 100644 --- a/src/public/index.html +++ b/src/public/index.html @@ -1,276 +1,272 @@ +
+ + + + + + + + + - - - - - - - - - - - - - -Status: Idle
----
----
-