Chat Completions

curl --request POST \
  --url https://modelslab.com/api/v7/llm/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>"
    }
  ],
  "model": "<string>",
  "max_tokens": 1000,
  "temperature": 1,
  "top_p": 1,
  "stream": false,
  "presence_penalty": 0,
  "frequency_penalty": 0
}
'

import requests

url = "https://modelslab.com/api/v7/llm/chat/completions"

payload = {
    "messages": [{ "content": "<string>" }],
    "model": "<string>",
    "max_tokens": 1000,
    "temperature": 1,
    "top_p": 1,
    "stream": False,
    "presence_penalty": 0,
    "frequency_penalty": 0
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>'}],
    model: '<string>',
    max_tokens: 1000,
    temperature: 1,
    top_p: 1,
    stream: false,
    presence_penalty: 0,
    frequency_penalty: 0
  })
};

fetch('https://modelslab.com/api/v7/llm/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://modelslab.com/api/v7/llm/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>'
        ]
    ],
    'model' => '<string>',
    'max_tokens' => 1000,
    'temperature' => 1,
    'top_p' => 1,
    'stream' => false,
    'presence_penalty' => 0,
    'frequency_penalty' => 0
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://modelslab.com/api/v7/llm/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://modelslab.com/api/v7/llm/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://modelslab.com/api/v7/llm/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "object": "chat.completion",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "assistant",
        "content": "<string>"
      }
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "completion_tokens": 123,
    "total_tokens": 123
  }
}

{
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>"
  }
}

{
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>"
  }
}

POST

chat

completions

Chat Completions

curl --request POST \
  --url https://modelslab.com/api/v7/llm/chat/completions \
  --header 'Authorization: Bearer <token>' \
  --header 'Content-Type: application/json' \
  --data '
{
  "messages": [
    {
      "content": "<string>"
    }
  ],
  "model": "<string>",
  "max_tokens": 1000,
  "temperature": 1,
  "top_p": 1,
  "stream": false,
  "presence_penalty": 0,
  "frequency_penalty": 0
}
'

import requests

url = "https://modelslab.com/api/v7/llm/chat/completions"

payload = {
    "messages": [{ "content": "<string>" }],
    "model": "<string>",
    "max_tokens": 1000,
    "temperature": 1,
    "top_p": 1,
    "stream": False,
    "presence_penalty": 0,
    "frequency_penalty": 0
}
headers = {
    "Authorization": "Bearer <token>",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

print(response.text)

const options = {
  method: 'POST',
  headers: {Authorization: 'Bearer <token>', 'Content-Type': 'application/json'},
  body: JSON.stringify({
    messages: [{content: '<string>'}],
    model: '<string>',
    max_tokens: 1000,
    temperature: 1,
    top_p: 1,
    stream: false,
    presence_penalty: 0,
    frequency_penalty: 0
  })
};

fetch('https://modelslab.com/api/v7/llm/chat/completions', options)
  .then(res => res.json())
  .then(res => console.log(res))
  .catch(err => console.error(err));

<?php

$curl = curl_init();

curl_setopt_array($curl, [
  CURLOPT_URL => "https://modelslab.com/api/v7/llm/chat/completions",
  CURLOPT_RETURNTRANSFER => true,
  CURLOPT_ENCODING => "",
  CURLOPT_MAXREDIRS => 10,
  CURLOPT_TIMEOUT => 30,
  CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
  CURLOPT_CUSTOMREQUEST => "POST",
  CURLOPT_POSTFIELDS => json_encode([
    'messages' => [
        [
                'content' => '<string>'
        ]
    ],
    'model' => '<string>',
    'max_tokens' => 1000,
    'temperature' => 1,
    'top_p' => 1,
    'stream' => false,
    'presence_penalty' => 0,
    'frequency_penalty' => 0
  ]),
  CURLOPT_HTTPHEADER => [
    "Authorization: Bearer <token>",
    "Content-Type: application/json"
  ],
]);

$response = curl_exec($curl);
$err = curl_error($curl);

curl_close($curl);

if ($err) {
  echo "cURL Error #:" . $err;
} else {
  echo $response;
}

package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://modelslab.com/api/v7/llm/chat/completions"

	payload := strings.NewReader("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Authorization", "Bearer <token>")
	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(string(body))

}

HttpResponse<String> response = Unirest.post("https://modelslab.com/api/v7/llm/chat/completions")
  .header("Authorization", "Bearer <token>")
  .header("Content-Type", "application/json")
  .body("{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}")
  .asString();

require 'uri'
require 'net/http'

url = URI("https://modelslab.com/api/v7/llm/chat/completions")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Authorization"] = 'Bearer <token>'
request["Content-Type"] = 'application/json'
request.body = "{\n  \"messages\": [\n    {\n      \"content\": \"<string>\"\n    }\n  ],\n  \"model\": \"<string>\",\n  \"max_tokens\": 1000,\n  \"temperature\": 1,\n  \"top_p\": 1,\n  \"stream\": false,\n  \"presence_penalty\": 0,\n  \"frequency_penalty\": 0\n}"

response = http.request(request)
puts response.read_body

{
  "id": "<string>",
  "object": "chat.completion",
  "created": 123,
  "model": "<string>",
  "choices": [
    {
      "index": 123,
      "message": {
        "role": "assistant",
        "content": "<string>"
      }
    }
  ],
  "usage": {
    "prompt_tokens": 123,
    "completion_tokens": 123,
    "total_tokens": 123
  }
}

{
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>"
  }
}

{
  "error": {
    "message": "<string>",
    "type": "<string>",
    "code": "<string>"
  }
}

Request

POST https://modelslab.com/api/v7/llm/chat/completions

Pass your API key as a Bearer token in the Authorization header.

curl -X POST https://modelslab.com/api/v7/llm/chat/completions \
  -H "Authorization: Bearer $MODELSLAB_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen/Qwen2.5-VL-72B-Instruct-together",
    "messages": [
      {"role": "system", "content": "You are a helpful assistant."},
      {"role": "user", "content": "What is the capital of France?"}
    ],
    "max_tokens": 1000,
    "temperature": 0.7
  }'

Body

{
  "model": "Qwen/Qwen2.5-VL-72B-Instruct-together",
  "messages": [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "What is the capital of France?"}
  ],
  "max_tokens": 1000,
  "temperature": 0.7,
  "top_p": 1,
  "stream": false,
  "presence_penalty": 0,
  "frequency_penalty": 0
}

Response

{
  "id": "chat-abc123",
  "object": "chat.completion",
  "created": 1712345678,
  "model": "Qwen/Qwen2.5-VL-72B-Instruct-together",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "The capital of France is Paris."
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 25,
    "completion_tokens": 8,
    "total_tokens": 33
  }
}

Streaming

Set "stream": true to receive Server-Sent Events (SSE) as tokens are generated:

curl -X POST https://modelslab.com/api/v7/llm/chat/completions \
  -H "Authorization: Bearer $MODELSLAB_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "model": "Qwen/Qwen2.5-VL-72B-Instruct-together",
    "messages": [{"role": "user", "content": "Write a haiku"}],
    "stream": true
  }'

Each SSE event contains a chat.completion.chunk object:

data: {"id":"chat-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"Silent"},"finish_reason":null}]}
data: {"id":"chat-abc123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":" snow"},"finish_reason":null}]}
data: [DONE]

OpenAI SDK

This endpoint is fully compatible with the OpenAI SDK. Just change the base_url and api_key:

Python
Node.js

from openai import OpenAI

client = OpenAI(
    api_key="YOUR_MODELSLAB_API_KEY",
    base_url="https://modelslab.com/api/v7/llm",
)

# Non-streaming
response = client.chat.completions.create(
    model="Qwen/Qwen2.5-VL-72B-Instruct-together",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Explain quantum computing in simple terms"},
    ],
    max_tokens=1000,
)
print(response.choices[0].message.content)

# Streaming
stream = client.chat.completions.create(
    model="Qwen/Qwen2.5-VL-72B-Instruct-together",
    messages=[{"role": "user", "content": "Write a story"}],
    stream=True,
)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

import OpenAI from 'openai';

const client = new OpenAI({
  apiKey: 'YOUR_MODELSLAB_API_KEY',
  baseURL: 'https://modelslab.com/api/v7/llm',
});

const response = await client.chat.completions.create({
  model: 'Qwen/Qwen2.5-VL-72B-Instruct-together',
  messages: [
    { role: 'system', content: 'You are a helpful assistant.' },
    { role: 'user', content: 'Hello!' },
  ],
});

console.log(response.choices[0].message.content);

Authorizations

Authorization

string

header

required

Bearer token authentication using ModelsLab API key

Body

application/json

messages

object[]

required

Array of chat messages

Show child attributes

model

string

Model ID to use for the completion (e.g. 'Qwen/Qwen2.5-VL-72B-Instruct-together')

max_tokens

integer

default:1000

Maximum number of tokens to generate

Required range: x >= 1

temperature

number

default:1

Sampling temperature (0-2). Higher values make output more random.

Required range: 0 <= x <= 2

top_p

number

default:1

Nucleus sampling parameter

Required range: 0 <= x <= 1

stream

boolean

default:false

Whether to stream partial results as Server-Sent Events

presence_penalty

number

default:0

Penalize new tokens based on whether they appear in the text so far

Required range: -2 <= x <= 2

frequency_penalty

number

default:0

Penalize new tokens based on their frequency in the text so far

Required range: -2 <= x <= 2

Response

Chat completion response

string

Unique completion ID

object

enum<string>

Available options:

chat.completion

created

integer

Unix timestamp

model

string

Model used

choices

object[]

Show child attributes

usage

object

Show child attributes

LLM API

Messages

Using the APIs

Our AI APIs

Chat Completions

Request

Body

Response

Streaming

OpenAI SDK

Authorizations

Body

Response

​Request

​Body

​Response

​Streaming

​OpenAI SDK

Authorizations

Body

Response

Request

Body

Response

Streaming

OpenAI SDK