curl --request POST \
--url http://localhost:8080/openai/deployments/{deployment-id}/responses \
--header 'Content-Type: application/json' \
--data '
{
"model": "<string>",
"input": "<string>",
"fallbacks": [
"<string>"
],
"stream": true,
"background": true,
"conversation": "<string>",
"include": [
"<string>"
],
"instructions": "<string>",
"max_output_tokens": 123,
"max_tool_calls": 123,
"metadata": {},
"parallel_tool_calls": true,
"previous_response_id": "<string>",
"prompt_cache_key": "<string>",
"reasoning": {
"effort": "<string>"
},
"safety_identifier": "<string>",
"service_tier": "<string>",
"stream_options": {
"include_usage": true
},
"store": true,
"temperature": 123,
"text": {
"max_tokens": 123
},
"top_logprobs": 123,
"top_p": 123,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "<string>",
"arguments": {}
}
}
],
"truncation": "<string>"
}
'{
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [
{
"index": 0,
"message": {
"role": "user",
"content": "Hello, how are you?",
"tool_call_id": "<string>",
"tool_calls": [
{
"function": {
"name": "get_weather",
"arguments": "{\"location\": \"San Francisco, CA\"}"
},
"id": "tool_123",
"type": "function"
}
],
"refusal": "<string>",
"annotations": [
{
"type": "<string>",
"url_citation": {
"start_index": 123,
"end_index": 123,
"title": "<string>",
"url": "<string>",
"sources": "<unknown>",
"type": "<string>"
}
}
],
"thought": "<string>"
},
"finish_reason": "stop",
"stop": "<string>",
"log_probs": {
"content": [
{
"logprob": -0.123,
"token": "hello",
"bytes": [
123
],
"top_logprobs": [
{
"logprob": -0.456,
"token": "world",
"bytes": [
123
]
}
]
}
],
"refusal": [
{
"logprob": -0.456,
"token": "world",
"bytes": [
123
]
}
]
}
}
],
"data": [
{
"index": 123,
"object": "<string>",
"embedding": [
123
]
}
],
"speech": {
"usage": {
"characters": 123
},
"audio": "aSDinaTvuI8gbWludGxpZnk="
},
"transcribe": {
"text": "<string>",
"logprobs": [
{
"token": "<string>",
"log_prob": 123
}
],
"usage": {
"prompt_tokens": 123,
"completion_tokens": 123,
"total_tokens": 123
}
},
"messages": [
{
"role": "user",
"content": "<string>"
}
],
"conversation_id": "<string>",
"finish_reason": "<string>",
"stop_reason": "<string>",
"stop_sequence": "<string>",
"prompt_cache": {
"status": "<string>"
},
"model": "gpt-4o",
"created": 1677652288,
"service_tier": "<string>",
"system_fingerprint": "<string>",
"usage": {
"prompt_tokens": 56,
"completion_tokens": 31,
"total_tokens": 87,
"completion_tokens_details": {
"reasoning_tokens": 123,
"audio_tokens": 123,
"accepted_prediction_tokens": 123,
"rejected_prediction_tokens": 123
}
},
"extra_fields": {
"provider": "openai",
"request_type": "list_models",
"model_requested": "<string>",
"model_params": {
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"max_tokens": 1000,
"stop_sequences": [
"\n\n",
"END"
],
"presence_penalty": 0,
"frequency_penalty": 0,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"description": "<string>",
"properties": {},
"required": [
"<string>"
],
"enum": [
"<string>"
]
}
},
"id": "<string>"
}
],
"tool_choice": {
"type": "auto",
"function": {
"name": "get_weather"
}
},
"parallel_tool_calls": true
},
"latency": 1234,
"billed_usage": {
"prompt_tokens": 123,
"completion_tokens": 123,
"search_units": 123,
"classifications": 123
},
"raw_response": {}
}
}OpenAI-compatible responses endpoint for specific deployments.
curl --request POST \
--url http://localhost:8080/openai/deployments/{deployment-id}/responses \
--header 'Content-Type: application/json' \
--data '
{
"model": "<string>",
"input": "<string>",
"fallbacks": [
"<string>"
],
"stream": true,
"background": true,
"conversation": "<string>",
"include": [
"<string>"
],
"instructions": "<string>",
"max_output_tokens": 123,
"max_tool_calls": 123,
"metadata": {},
"parallel_tool_calls": true,
"previous_response_id": "<string>",
"prompt_cache_key": "<string>",
"reasoning": {
"effort": "<string>"
},
"safety_identifier": "<string>",
"service_tier": "<string>",
"stream_options": {
"include_usage": true
},
"store": true,
"temperature": 123,
"text": {
"max_tokens": 123
},
"top_logprobs": 123,
"top_p": 123,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "<string>",
"arguments": {}
}
}
],
"truncation": "<string>"
}
'{
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [
{
"index": 0,
"message": {
"role": "user",
"content": "Hello, how are you?",
"tool_call_id": "<string>",
"tool_calls": [
{
"function": {
"name": "get_weather",
"arguments": "{\"location\": \"San Francisco, CA\"}"
},
"id": "tool_123",
"type": "function"
}
],
"refusal": "<string>",
"annotations": [
{
"type": "<string>",
"url_citation": {
"start_index": 123,
"end_index": 123,
"title": "<string>",
"url": "<string>",
"sources": "<unknown>",
"type": "<string>"
}
}
],
"thought": "<string>"
},
"finish_reason": "stop",
"stop": "<string>",
"log_probs": {
"content": [
{
"logprob": -0.123,
"token": "hello",
"bytes": [
123
],
"top_logprobs": [
{
"logprob": -0.456,
"token": "world",
"bytes": [
123
]
}
]
}
],
"refusal": [
{
"logprob": -0.456,
"token": "world",
"bytes": [
123
]
}
]
}
}
],
"data": [
{
"index": 123,
"object": "<string>",
"embedding": [
123
]
}
],
"speech": {
"usage": {
"characters": 123
},
"audio": "aSDinaTvuI8gbWludGxpZnk="
},
"transcribe": {
"text": "<string>",
"logprobs": [
{
"token": "<string>",
"log_prob": 123
}
],
"usage": {
"prompt_tokens": 123,
"completion_tokens": 123,
"total_tokens": 123
}
},
"messages": [
{
"role": "user",
"content": "<string>"
}
],
"conversation_id": "<string>",
"finish_reason": "<string>",
"stop_reason": "<string>",
"stop_sequence": "<string>",
"prompt_cache": {
"status": "<string>"
},
"model": "gpt-4o",
"created": 1677652288,
"service_tier": "<string>",
"system_fingerprint": "<string>",
"usage": {
"prompt_tokens": 56,
"completion_tokens": 31,
"total_tokens": 87,
"completion_tokens_details": {
"reasoning_tokens": 123,
"audio_tokens": 123,
"accepted_prediction_tokens": 123,
"rejected_prediction_tokens": 123
}
},
"extra_fields": {
"provider": "openai",
"request_type": "list_models",
"model_requested": "<string>",
"model_params": {
"temperature": 0.7,
"top_p": 0.9,
"top_k": 40,
"max_tokens": 1000,
"stop_sequences": [
"\n\n",
"END"
],
"presence_penalty": 0,
"frequency_penalty": 0,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get current weather for a location",
"parameters": {
"type": "object",
"description": "<string>",
"properties": {},
"required": [
"<string>"
],
"enum": [
"<string>"
]
}
},
"id": "<string>"
}
],
"tool_choice": {
"type": "auto",
"function": {
"name": "get_weather"
}
},
"parallel_tool_calls": true
},
"latency": 1234,
"billed_usage": {
"prompt_tokens": 123,
"completion_tokens": 123,
"search_units": 123,
"classifications": 123
},
"raw_response": {}
}
}Azure deployment ID
Model identifier in 'provider/model' format
Simple text input for the response
auto, any, none, required, tool OpenAI-compatible responses response
Unique response identifier
"chatcmpl-123"
Response type
text.completion, chat.completion, embedding, speech, transcribe, responses.completion "chat.completion"
Array of completion choices for chat and text completions. Not present for responses type.
Show child attributes
Choice index
0
Show child attributes
Role of the message sender
user, assistant, system, tool "user"
Message content - can be simple text or structured content with text and images
"Hello, how are you?"
ID of the tool call (for tool messages)
Tool calls made by assistant
Show child attributes
Unique tool call identifier
"tool_123"
Tool call type
function "function"
Refusal message from assistant
Message annotations
Show child attributes
Annotation type
Show child attributes
Start index in the text
End index in the text
Citation title
Citation URL
Citation sources
Citation type
Assistant's internal thought process
Reason completion stopped
stop, length, tool_calls, content_filter, function_call "stop"
Stop sequence that ended generation
Show child attributes
Log probabilities for content
Show child attributes
Log probability
-0.123
Token
"hello"
Byte representation
Show child attributes
The conversation ID.
The reason the model stopped generating tokens.
The reason the model stopped generating tokens.
The stop sequence that was generated.
Model used for generation
"gpt-4o"
Unix timestamp of creation
1677652288
Service tier used
System fingerprint
Show child attributes
Tokens in the prompt
56
Tokens in the completion
31
Total tokens used
87
Show child attributes
Tokens used for reasoning
Tokens used for audio
Accepted prediction tokens
Rejected prediction tokens
Show child attributes
AI model provider
openai, anthropic, azure, bedrock, cohere, vertex, mistral, ollama, gemini, groq, openrouter, sgl, parasail, elevenlabs, perplexity, cerebras "openai"
Request type
list_models, text_completion, chat_completion, chat_completion_stream, responses, responses_stream, embedding, speech, speech_stream, transcription, transcription_stream Model requested
Show child attributes
Controls randomness in the output
0 <= x <= 20.7
Nucleus sampling parameter
0 <= x <= 10.9
Top-k sampling parameter
x >= 140
Maximum number of tokens to generate
x >= 11000
Sequences that stop generation
["\n\n", "END"]Penalizes repeated tokens
-2 <= x <= 20
Penalizes frequent tokens
-2 <= x <= 20
Available tools for the model
Show child attributes
Tool type
function "function"
Show child attributes
Function name
"get_weather"
Function description
"Get current weather for a location"
Show child attributes
Parameter type
"object"
Parameter description
Parameter properties (JSON Schema)
Required parameter names
Enum values for parameters
Unique tool identifier
Show child attributes
How tools should be chosen
none, auto, any, function, required "auto"
Enable parallel tool execution
true
Request latency in milliseconds
1234
Raw provider response
Was this page helpful?