Adam Patterson
05/14/2025, 1:09 AMVapi Support Bot
05/14/2025, 1:11 AMjson
{
"content": "Your generated message here"
}
or
json
{
"text": "Your generated message here"
}
If one doesn't work, try the other, as the documentation does not specify a single required field name.
Source:
- [Vapi Custom LLM server response format](https://docs.vapi.ai/customization/custom-llm/using-your-server)Adam Patterson
05/14/2025, 1:28 AMAdam Patterson
05/14/2025, 1:38 AMhttps://farar.net${req.url ?? ""}
);;
if (!(url.pathname == "/chat/completions" && req.method == "POST")) {
res.writeHead(404);
res.end();
return;
}
const streamBuffer = new StreamBuffer();
req.pipe(streamBuffer);
await once(req, "end");
const requestBody = JSON.parse(streamBuffer.buffer.toString("utf-8"));
console.log("requestBody", requestBody);
const { model, messages, temperature, max_tokens } = requestBody;
const data = {
model: model,
messages: messages,
temperature: temperature,
max_completion_tokens: max_tokens
} as OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming;
console.log("data", data);
const responseBody = JSON.stringify(await openAI.chat.completions.create(data));
res.setHeader("Content-Length", Buffer.byteLength(responseBody));
res.setHeader("Content-Type", "application/json");
res.end(responseBody);
console.log("responseBody", responseBody);
})();
});
server.listen(8443, "0.0.0.0");Pigeon
05/14/2025, 6:27 AMAdam Patterson
05/14/2025, 11:57 AMAdam Patterson
05/14/2025, 12:00 PMPigeon
05/14/2025, 12:09 PMchunk = ChatCompletionChunk(
id=f"chatcmpl-{call_id}",
created=int(time.time()),
model=groq_settings.GROQ_MODEL_LLAMA_70_V,
choices=[ChunkChoice(index=0, delta=delta, finish_reason=finish_reason)],
)
Pigeon
05/14/2025, 12:12 PMreturn f"data: {json.dumps(chunk.model_dump())}\n\n"
You can read about streaming response here: https://platform.openai.com/docs/guides/streaming-responses?api-mode=chatPigeon
05/14/2025, 12:13 PMPigeon
05/14/2025, 12:18 PMchunk.choices[0].delta.content
Pigeon
05/14/2025, 12:18 PM{
"id": "chatcmpl-x",
"object": "chat.completion.chunk",
"created": 1747224956,
"model": "llama-3.3-70b-versatile",
"choices": [
{
"index": 0,
"delta": {
"role": "assistant",
"content": "",
"refusal": null,
"tool_calls": null,
"function_call": null
},
"logprobs": null,
"finish_reason": null
}
]
}
Pigeon
05/14/2025, 12:23 PM{
"id": "chatcmpl-x",
"object": "chat.completion.chunk",
"created": 1747224966,
"model": "llama-3.3-70b-versatile",
"choices": [
{
"index": 0,
"delta": {
"role": null,
"content": "LLM RESPONSE TEXT",
"refusal": null,
"tool_calls": null,
"function_call": null
},
"logprobs": null,
"finish_reason": null
}
]
}
Pigeon
05/14/2025, 12:24 PM{
"id": "chatcmpl-x",
"object": "chat.completion.chunk",
"created": 1747224966,
"model": "llama-3.3-70b-versatile",
"choices": [
{
"index": 0,
"delta": {
"role": null,
"content": null,
"refusal": null,
"tool_calls": null,
"function_call": null
},
"logprobs": null,
"finish_reason": "stop"
}
]
}
And this worksPigeon
05/14/2025, 12:25 PMAdam Patterson
05/14/2025, 2:46 PMPigeon
05/14/2025, 2:47 PMAdam Patterson
05/14/2025, 2:47 PM/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { once } from "node:events";
import * as https from "node:https";
import { readFileSync } from "node:fs";
import { IncomingMessage, ServerResponse } from "node:http";
import { OpenAI } from "openai";
import { StreamBuffer } from "./stream_buffer.js";
const {
OPENAI_API_KEY = "",
} = process.env;
const openAI = new OpenAI({ "apiKey": OPENAI_API_KEY });
const options = {
key: readFileSync("/home/farar/.certs/farar_net.key"),
cert: readFileSync("/home/farar/.certs/farar_net.pub"),
};
const server = https.createServer(options);
server.on("error", console.error);
let mutex = Promise.resolve();
server.on("request", (req: IncomingMessage, res: ServerResponse & { req: IncomingMessage; }) => {
mutex = (async () => {
await mutex;
const url = new URL(`https://farar.net${req.url ?? ""}`);;
if (!(url.pathname == "/chat/completions" && req.method == "POST")) {
res.writeHead(404);
res.end();
return;
}
const streamBuffer = new StreamBuffer();
req.pipe(streamBuffer);
await once(req, "end");
const requestBody = JSON.parse(streamBuffer.buffer.toString("utf-8"));
const { model, messages } = requestBody;
const data = {
model: model,
messages: messages,
temperature: 1,
stream: true
} as unknown as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming;
const stream = await openAI.chat.completions.create(data);
res.setHeader("Content-Type", "text/event-stream");
res.statusCode = 200;
for await (const chunk of stream) {
const responseBody = `data: ${JSON.stringify(chunk)}\n\n`;
console.log(responseBody);
res.write(responseBody);
}
res.end("data: [DONE]\n\n");
})();
});
server.listen(8443, "0.0.0.0");
Vapi
05/15/2025, 3:06 AMAdam Patterson
05/15/2025, 11:50 AMAdam Patterson
05/17/2025, 4:29 PMAdam Patterson
05/17/2025, 4:33 PMAdam Patterson
05/17/2025, 5:34 PMVapi
05/19/2025, 6:10 PM