Skip to main content
POST
/
openai
/
chat
/
completions
OpenAI Compatible Chat Completions (alternative path)
curl --request POST \
  --url http://localhost:8080/openai/chat/completions \
  --header 'Content-Type: application/json' \
  --data '{
  "model": "openai/gpt-4o-mini",
  "messages": [
    {
      "role": "user",
      "content": "Hello, how are you?",
      "tool_call_id": "<string>",
      "tool_calls": [
        {
          "id": "tool_123",
          "type": "function",
          "function": {
            "name": "get_weather",
            "arguments": "{\"location\": \"San Francisco, CA\"}"
          }
        }
      ],
      "refusal": "<string>",
      "annotations": [
        {
          "type": "<string>",
          "url_citation": {
            "start_index": 123,
            "end_index": 123,
            "title": "<string>",
            "url": "<string>",
            "sources": "<any>",
            "type": "<string>"
          }
        }
      ],
      "thought": "<string>"
    }
  ],
  "max_tokens": 1000,
  "temperature": 1,
  "top_p": 0.5,
  "n": 1,
  "stream": false,
  "stream_options": {
    "include_usage": true
  },
  "stop": "<string>",
  "presence_penalty": 0,
  "frequency_penalty": 0,
  "logit_bias": {},
  "logprobs": false,
  "top_logprobs": 10,
  "max_completion_tokens": 123,
  "metadata": {},
  "modalities": [
    "<string>"
  ],
  "parallel_tool_calls": true,
  "prompt_cache_key": "<string>",
  "reasoning_effort": "<string>",
  "response_format": {
    "type": "text"
  },
  "safety_identifier": "<string>",
  "seed": 123,
  "service_tier": "auto",
  "store": true,
  "tool_choice": "none",
  "tools": [
    {
      "type": "function",
      "function": {
        "description": "<string>",
        "name": "<string>",
        "parameters": {}
      }
    }
  ],
  "user": "<string>",
  "verbosity": "<string>",
  "fallbacks": [
    "anthropic/claude-3-sonnet-20240229",
    "openai/gpt-4o"
  ]
}'
{
  "id": "chatcmpl-123",
  "object": "chat.completion",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "user",
        "content": "Hello, how are you?",
        "tool_call_id": "<string>",
        "tool_calls": [
          {
            "id": "tool_123",
            "type": "function",
            "function": {
              "name": "get_weather",
              "arguments": "{\"location\": \"San Francisco, CA\"}"
            }
          }
        ],
        "refusal": "<string>",
        "annotations": [
          {
            "type": "<string>",
            "url_citation": {
              "start_index": 123,
              "end_index": 123,
              "title": "<string>",
              "url": "<string>",
              "sources": "<any>",
              "type": "<string>"
            }
          }
        ],
        "thought": "<string>"
      },
      "finish_reason": "stop",
      "stop": "<string>",
      "log_probs": {
        "content": [
          {
            "bytes": [
              123
            ],
            "logprob": -0.123,
            "token": "hello",
            "top_logprobs": [
              {
                "bytes": [
                  123
                ],
                "logprob": -0.456,
                "token": "world"
              }
            ]
          }
        ],
        "refusal": [
          {
            "bytes": [
              123
            ],
            "logprob": -0.456,
            "token": "world"
          }
        ]
      }
    }
  ],
  "data": [
    {
      "index": 123,
      "object": "<string>",
      "embedding": [
        123
      ]
    }
  ],
  "speech": {
    "usage": {
      "characters": 123
    },
    "audio": "aSDinaTvuI8gbWludGxpZnk="
  },
  "transcribe": {
    "text": "<string>",
    "logprobs": [
      {
        "token": "<string>",
        "log_prob": 123
      }
    ],
    "usage": {
      "prompt_tokens": 123,
      "completion_tokens": 123,
      "total_tokens": 123
    }
  },
  "messages": [
    {
      "role": "user",
      "content": "<string>"
    }
  ],
  "conversation_id": "<string>",
  "finish_reason": "<string>",
  "stop_reason": "<string>",
  "stop_sequence": "<string>",
  "prompt_cache": {
    "status": "<string>"
  },
  "model": "gpt-4o",
  "created": 1677652288,
  "service_tier": "<string>",
  "system_fingerprint": "<string>",
  "usage": {
    "prompt_tokens": 56,
    "completion_tokens": 31,
    "total_tokens": 87,
    "completion_tokens_details": {
      "reasoning_tokens": 123,
      "audio_tokens": 123,
      "accepted_prediction_tokens": 123,
      "rejected_prediction_tokens": 123
    }
  },
  "extra_fields": {
    "provider": "openai",
    "request_type": "list_models",
    "model_requested": "<string>",
    "model_params": {
      "temperature": 0.7,
      "top_p": 0.9,
      "top_k": 40,
      "max_tokens": 1000,
      "stop_sequences": [
        "\n\n",
        "END"
      ],
      "presence_penalty": 0,
      "frequency_penalty": 0,
      "tools": [
        {
          "id": "<string>",
          "type": "function",
          "function": {
            "name": "get_weather",
            "description": "Get current weather for a location",
            "parameters": {
              "type": "object",
              "description": "<string>",
              "properties": {},
              "required": [
                "<string>"
              ],
              "enum": [
                "<string>"
              ]
            }
          }
        }
      ],
      "tool_choice": {
        "type": "auto",
        "function": {
          "name": "get_weather"
        }
      },
      "parallel_tool_calls": true
    },
    "latency": 1234,
    "billed_usage": {
      "prompt_tokens": 123,
      "completion_tokens": 123,
      "search_units": 123,
      "classifications": 123
    },
    "raw_response": {}
  }
}

Body

application/json
model
string
required

Model identifier in 'provider/model' format (e.g., 'openai/gpt-4o-mini', 'anthropic/claude-3-sonnet-20240229')

Example:

"openai/gpt-4o-mini"

messages
object[]
required

Array of chat messages

Minimum length: 1
max_tokens
integer

Maximum number of tokens to generate. Note: this is an alias for max_completion_tokens and will be overridden by it if both are present.

Required range: x >= 1
Example:

1000

temperature
number

Controls randomness in the output. Higher values make the output more random, while lower values make it more deterministic.

Required range: 0 <= x <= 2
top_p
number

Controls diversity via nucleus sampling. 0.5 means half of all likelihood-weighted options are considered.

Required range: 0 <= x <= 1
n
integer
default:1

Number of chat completion choices to generate for each input message.

stream
boolean
default:false

If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available.

stream_options
object
stop

Up to 4 sequences where the API will stop generating further tokens.

presence_penalty
number

Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.

Required range: -2 <= x <= 2
frequency_penalty
number

Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.

Required range: -2 <= x <= 2
logit_bias
object

Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens to an associated bias value from -100 to 100.

logprobs
boolean
default:false

Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.

top_logprobs
integer

An integer between 0 and 20. The number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.

Required range: 0 <= x <= 20
max_completion_tokens
integer

The maximum number of tokens that can be generated in the chat completion.

metadata
object

A set of key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format.

modalities
string[]

A list of modalities to use for the response.

parallel_tool_calls
boolean

Whether to enable parallel tool calls. If set to true, the model will be able to call multiple tools in a single response.

prompt_cache_key
string

A key to use for caching the prompt.

reasoning_effort
string

The reasoning effort to use for the response.

response_format
object
safety_identifier
string

A unique identifier for the safety settings to use for the response.

seed
integer

This feature is in Beta. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer to the system_fingerprint response parameter to monitor changes in the backend.

service_tier
enum<string>

The service tier to use for the response. Can be auto or default.

Available options:
auto,
default
store
boolean

Whether to store the request and response in the log store.

tool_choice

Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. required means the model must call a function. Specifying a particular function via {"type": "function", "function": {"name": "my_function"}} forces the model to call that function.

Available options:
none,
auto,
required
tools
object[]
user
string

A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.

verbosity
string

The verbosity level of the response.

fallbacks
string[]

Fallback model names in 'provider/model' format

Example:
[
"anthropic/claude-3-sonnet-20240229",
"openai/gpt-4o"
]

Response

OpenAI-compatible chat completion response

id
string

Unique response identifier

Example:

"chatcmpl-123"

object
enum<string>

Response type

Available options:
text.completion,
chat.completion,
embedding,
speech,
transcribe,
responses.completion
Example:

"chat.completion"

choices
object[]

Array of completion choices for chat and text completions. Not present for responses type.

data
object[]

Array of embedding objects

speech
object
transcribe
object
messages
object[]

Array of messages for responses type.

conversation_id
string

The conversation ID.

finish_reason
string

The reason the model stopped generating tokens.

stop_reason
string

The reason the model stopped generating tokens.

stop_sequence
string

The stop sequence that was generated.

prompt_cache
object
model
string

Model used for generation

Example:

"gpt-4o"

created
integer

Unix timestamp of creation

Example:

1677652288

service_tier
string

Service tier used

system_fingerprint
string

System fingerprint

usage
object
extra_fields
object