Python

Setup

pip install openai
export OPENAI_API_KEY="sk-..."
export OPENAI_BASE_URL="https://api.chris.hellotopia.io/v1"

The openai SDK reads both env vars automatically — no code change needed to redirect from OpenAI to this gateway.

Chat (simple)

from openai import OpenAI
client = OpenAI()

resp = client.chat.completions.create(
    model="llama3.1:8b",
    messages=[{"role": "user", "content": "Summarize the plot of Hamlet in 2 sentences."}],
)
print(resp.choices[0].message.content)

Chat (streaming)

from openai import OpenAI
client = OpenAI()

stream = client.chat.completions.create(
    model="llama3.3:70b",
    messages=[{"role": "user", "content": "Explain gradient descent step by step."}],
    stream=True,
)
for chunk in stream:
    print(chunk.choices[0].delta.content or "", end="", flush=True)

Chat (async, concurrent)

import asyncio
from openai import AsyncOpenAI

client = AsyncOpenAI()

async def ask(prompt: str) -> str:
    resp = await client.chat.completions.create(
        model="llama3.1:8b",
        messages=[{"role": "user", "content": prompt}],
    )
    return resp.choices[0].message.content

async def main():
    prompts = ["What is recursion?", "What is polymorphism?", "What is a monad?"]
    answers = await asyncio.gather(*(ask(p) for p in prompts))
    for p, a in zip(prompts, answers):
        print(f"Q: {p}\nA: {a}\n---")

asyncio.run(main())

Auto-routing will spread these three calls across the 5080 and Spark backends automatically.

Embeddings

from openai import OpenAI
import numpy as np

client = OpenAI()

docs = ["The cat sat on the mat.", "A feline rested on a rug.", "The stock market closed up 2%."]
resp = client.embeddings.create(model="embed/nomic-embed-text", input=docs)
vectors = np.array([d.embedding for d in resp.data])

# cosine similarity
v = vectors / np.linalg.norm(vectors, axis=1, keepdims=True)
print(v @ v.T)

Transcription

from openai import OpenAI
client = OpenAI()

with open("meeting.m4a", "rb") as f:
    r = client.audio.transcriptions.create(
        file=f,
        model="whisper-large-v3",
        language="en",
        response_format="verbose_json",
    )

print(r.text)
for seg in r.segments:
    print(f"[{seg.start:6.2f} → {seg.end:6.2f}] {seg.text}")

Using LangChain

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="llama3.3:70b",
    base_url="https://api.chris.hellotopia.io/v1",
    api_key="sk-...",
)

print(llm.invoke("Explain transformers.").content)

Using LlamaIndex

from llama_index.llms.openai import OpenAI

llm = OpenAI(
    model="llama3.1:8b",
    api_base="https://api.chris.hellotopia.io/v1",
    api_key="sk-...",
)
print(llm.complete("Hello"))