{
"service": "InferenceLatency.com",
"endpoint": "throughput",
"description": "Combined latency and throughput benchmarking",
"providers": [
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 1577,
"throughput_tokens_per_sec": 10.78
},
"tokens_generated": 17,
"elapsed_seconds": 1.577,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.3e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "592319ed9da9",
"trace_id": "trace-13f80d96"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
1577
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
10.78
]
}
},
{
"provider": "Groq",
"model": "Llama3-8B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-04-18",
"context_length": 8192,
"hardware": "LPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://console.groq.com/docs/openai",
"human_readable_url": "https://console.groq.com/docs",
"api_docs": "https://console.groq.com/docs/api-reference"
},
"infra": {
"schema_url": "https://console.groq.com/docs/openai",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://console.groq.com/docs",
"api_docs": "https://console.groq.com/docs/api-reference"
},
"test_metadata": {
"request_hash": "daec401e0765",
"trace_id": "trace-9da6aff0"
},
"history": {
"latency_trend_7d": [
952,
950,
948,
949,
951,
950,
950
],
"throughput_trend_7d": [
121.9,
122.1,
122.0,
122.2,
122.0,
122.0,
122.01
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1994,
"throughput_tokens_per_sec": 12.04
},
"tokens_generated": 24,
"elapsed_seconds": 1.994,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 7.2e-05
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "9cd52dbbf3e8",
"trace_id": "trace-48f35942"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1994
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
12.04
]
}
},
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 856,
"throughput_tokens_per_sec": 31.54
},
"tokens_generated": 27,
"elapsed_seconds": 0.856,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "9c57407055d2",
"trace_id": "trace-b5a1c109"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
856
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
31.54
]
}
},
{
"provider": "Google Gemini",
"model": "Gemini-2.0-Flash",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00075,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-12-11",
"context_length": 2000000,
"hardware": "TPU",
"api_type": "Google-native",
"schema_url": "https://ai.google.dev/api",
"human_readable_url": "https://ai.google.dev/docs",
"api_docs": "https://ai.google.dev/api/generate-content"
},
"infra": {
"schema_url": "https://ai.google.dev/api",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://ai.google.dev/docs",
"api_docs": "https://ai.google.dev/api/generate-content"
},
"test_metadata": {
"request_hash": "0eb907c2f20c",
"trace_id": "trace-1098fdd6"
},
"history": {
"latency_trend_7d": [
352,
350,
348,
349,
351,
350,
350
],
"throughput_trend_7d": [
91.9,
92.1,
92.0,
92.2,
92.0,
92.0,
92.01
]
}
},
{
"provider": "Together AI",
"model": "Llama3.1-8B-Turbo",
"metrics": {
"latency_ms": 337,
"throughput_tokens_per_sec": 94.83
},
"tokens_generated": 32,
"elapsed_seconds": 0.337,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 6e-06
},
"model_metadata": {
"release_date": "2024-01-15",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://docs.together.ai/docs/inference-models",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.together.ai",
"api_docs": "https://docs.together.ai/reference/chat-completions"
},
"test_metadata": {
"request_hash": "a151443cc584",
"trace_id": "trace-20b0e681"
},
"history": {
"latency_trend_7d": [
120,
118,
116,
117,
119,
118,
337
],
"throughput_trend_7d": [
145.9,
146.1,
146.0,
146.2,
146.0,
146.0,
94.83
]
}
},
{
"provider": "Fireworks AI",
"model": "Llama3.1-8B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-04-15",
"context_length": 8192,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
"human_readable_url": "https://fireworks.ai/",
"api_docs": "https://readme.fireworks.ai/docs"
},
"infra": {
"schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://fireworks.ai/",
"api_docs": "https://readme.fireworks.ai/docs"
},
"test_metadata": {
"request_hash": "6c7bb6ebfdf2",
"trace_id": "trace-a677460c"
},
"history": {
"latency_trend_7d": [
180,
178,
176,
177,
179,
178,
178
],
"throughput_trend_7d": [
135.9,
136.1,
136.0,
136.2,
136.0,
136.0,
136.01
]
}
},
{
"provider": "HF GPT OSS 120B (Cerebras)",
"model": "GPT OSS 120B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0008,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2025-01-07",
"context_length": 128000,
"hardware": "Cerebras WSE",
"api_type": "OpenAI-compatible",
"schema_url": "https://huggingface.co/docs/inference-providers",
"human_readable_url": "https://huggingface.co/openai/gpt-oss-120b",
"api_docs": "https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1502,
"throughput_tokens_per_sec": 11.32
},
"tokens_generated": 17,
"elapsed_seconds": 1.502,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "fee1bd55edda",
"trace_id": "trace-81b688ca"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "Cohere",
"model": "command-r",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '404 Not Found' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00075,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-10-01",
"context_length": 128000,
"hardware": "GPU",
"api_type": "Cohere-native",
"schema_url": "https://docs.cohere.com/reference/chat",
"human_readable_url": "https://docs.cohere.com/",
"api_docs": "https://docs.cohere.com/reference/chat"
},
"infra": {
"schema_url": "https://docs.cohere.com/reference/chat",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.cohere.com/",
"api_docs": "https://docs.cohere.com/reference/chat"
},
"test_metadata": {
"request_hash": "cee4b78ea75f",
"trace_id": "trace-fb5fe140"
},
"history": {
"latency_trend_7d": [
320,
318,
316,
317,
319,
318,
318
],
"throughput_trend_7d": [
45.9,
46.1,
46.0,
46.2,
46.0,
46.0,
46.01
]
}
}
],
"rankings": {
"by_latency": [
{
"provider": "Together AI",
"model": "Llama3.1-8B-Turbo",
"metrics": {
"latency_ms": 337,
"throughput_tokens_per_sec": 94.83
},
"tokens_generated": 32,
"elapsed_seconds": 0.337,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 6e-06
},
"model_metadata": {
"release_date": "2024-01-15",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://docs.together.ai/docs/inference-models",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.together.ai",
"api_docs": "https://docs.together.ai/reference/chat-completions"
},
"test_metadata": {
"request_hash": "a151443cc584",
"trace_id": "trace-20b0e681"
},
"history": {
"latency_trend_7d": [
120,
118,
116,
117,
119,
118,
337
],
"throughput_trend_7d": [
145.9,
146.1,
146.0,
146.2,
146.0,
146.0,
94.83
]
}
},
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 856,
"throughput_tokens_per_sec": 31.54
},
"tokens_generated": 27,
"elapsed_seconds": 0.856,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "9c57407055d2",
"trace_id": "trace-b5a1c109"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
856
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
31.54
]
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1502,
"throughput_tokens_per_sec": 11.32
},
"tokens_generated": 17,
"elapsed_seconds": 1.502,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "fee1bd55edda",
"trace_id": "trace-81b688ca"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 1577,
"throughput_tokens_per_sec": 10.78
},
"tokens_generated": 17,
"elapsed_seconds": 1.577,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.3e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "592319ed9da9",
"trace_id": "trace-13f80d96"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
1577
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
10.78
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1994,
"throughput_tokens_per_sec": 12.04
},
"tokens_generated": 24,
"elapsed_seconds": 1.994,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 7.2e-05
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "9cd52dbbf3e8",
"trace_id": "trace-48f35942"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1994
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
12.04
]
}
}
],
"by_throughput": [
{
"provider": "Together AI",
"model": "Llama3.1-8B-Turbo",
"metrics": {
"latency_ms": 337,
"throughput_tokens_per_sec": 94.83
},
"tokens_generated": 32,
"elapsed_seconds": 0.337,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 6e-06
},
"model_metadata": {
"release_date": "2024-01-15",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://docs.together.ai/docs/inference-models",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.together.ai",
"api_docs": "https://docs.together.ai/reference/chat-completions"
},
"test_metadata": {
"request_hash": "a151443cc584",
"trace_id": "trace-20b0e681"
},
"history": {
"latency_trend_7d": [
120,
118,
116,
117,
119,
118,
337
],
"throughput_trend_7d": [
145.9,
146.1,
146.0,
146.2,
146.0,
146.0,
94.83
]
}
},
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 856,
"throughput_tokens_per_sec": 31.54
},
"tokens_generated": 27,
"elapsed_seconds": 0.856,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "9c57407055d2",
"trace_id": "trace-b5a1c109"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
856
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
31.54
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1994,
"throughput_tokens_per_sec": 12.04
},
"tokens_generated": 24,
"elapsed_seconds": 1.994,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 7.2e-05
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "9cd52dbbf3e8",
"trace_id": "trace-48f35942"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1994
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
12.04
]
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1502,
"throughput_tokens_per_sec": 11.32
},
"tokens_generated": 17,
"elapsed_seconds": 1.502,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "fee1bd55edda",
"trace_id": "trace-81b688ca"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 1577,
"throughput_tokens_per_sec": 10.78
},
"tokens_generated": 17,
"elapsed_seconds": 1.577,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.3e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "592319ed9da9",
"trace_id": "trace-13f80d96"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
1577
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
10.78
]
}
}
]
},
"fastest_latency": "Together AI",
"highest_throughput": "Together AI",
"total_tested": 10,
"successful_tests": 5,
"failed_tests": 5,
"performance_summary": {
"best_latency_ms": 337,
"best_throughput_tokens_per_sec": 94.83,
"avg_latency_ms": 1253,
"avg_throughput_tokens_per_sec": 32.1
},
"ai_guidance": {
"best_for_speed": "Together AI",
"best_for_throughput": "Together AI",
"recommendation": "Use Together AI for lowest latency, Together AI for highest throughput",
"use_case_guidance": {
"real_time_chat": "Recommended: Together AI (lowest latency)",
"bulk_generation": "Recommended: Together AI (highest throughput)",
"balanced_workload": "Consider both Together AI and Together AI"
}
},
"human_readable_summary": "\u26a1 Fastest: Together AI (337ms - Good) | \ud83d\ude80 Highest throughput: Together AI (94.8 tokens/sec - High) | \u2705 5/10 providers responding",
"timestamp": "2026-03-01T22:45:04.190308Z"
}