{
"service": "InferenceLatency.com",
"endpoint": "throughput",
"description": "Combined latency and throughput benchmarking",
"providers": [
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 4253,
"throughput_tokens_per_sec": 4.47
},
"tokens_generated": 19,
"elapsed_seconds": 4.253,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.8e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "4e2461bd4304",
"trace_id": "trace-f564834c"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
4253
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
4.47
]
}
},
{
"provider": "Groq",
"model": "Llama3-8B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-04-18",
"context_length": 8192,
"hardware": "LPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://console.groq.com/docs/openai",
"human_readable_url": "https://console.groq.com/docs",
"api_docs": "https://console.groq.com/docs/api-reference"
},
"infra": {
"schema_url": "https://console.groq.com/docs/openai",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://console.groq.com/docs",
"api_docs": "https://console.groq.com/docs/api-reference"
},
"test_metadata": {
"request_hash": "e2485d537c8c",
"trace_id": "trace-b8b865cf"
},
"history": {
"latency_trend_7d": [
952,
950,
948,
949,
951,
950,
950
],
"throughput_trend_7d": [
121.9,
122.1,
122.0,
122.2,
122.0,
122.0,
122.01
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1981,
"throughput_tokens_per_sec": 17.16
},
"tokens_generated": 34,
"elapsed_seconds": 1.981,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 0.000102
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "0b3afc2053ee",
"trace_id": "trace-c6afa323"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1981
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
17.16
]
}
},
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 527,
"throughput_tokens_per_sec": 32.25
},
"tokens_generated": 17,
"elapsed_seconds": 0.527,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 3e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "8057b06fdcb1",
"trace_id": "trace-fbad41a6"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
527
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
32.25
]
}
},
{
"provider": "Google Gemini",
"model": "Gemini-2.0-Flash",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00075,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-12-11",
"context_length": 2000000,
"hardware": "TPU",
"api_type": "Google-native",
"schema_url": "https://ai.google.dev/api",
"human_readable_url": "https://ai.google.dev/docs",
"api_docs": "https://ai.google.dev/api/generate-content"
},
"infra": {
"schema_url": "https://ai.google.dev/api",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://ai.google.dev/docs",
"api_docs": "https://ai.google.dev/api/generate-content"
},
"test_metadata": {
"request_hash": "d0a8767fd2b3",
"trace_id": "trace-7216ed50"
},
"history": {
"latency_trend_7d": [
352,
350,
348,
349,
351,
350,
350
],
"throughput_trend_7d": [
91.9,
92.1,
92.0,
92.2,
92.0,
92.0,
92.01
]
}
},
{
"provider": "Together AI",
"model": "Llama3.1-8B-Turbo",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '400 Bad Request' for url 'https://api.together.xyz/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-01-15",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://docs.together.ai/docs/inference-models",
"human_readable_url": "https://docs.together.ai",
"api_docs": "https://docs.together.ai/reference/chat-completions"
},
"infra": {
"schema_url": "https://docs.together.ai/docs/inference-models",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.together.ai",
"api_docs": "https://docs.together.ai/reference/chat-completions"
},
"test_metadata": {
"request_hash": "64662ceee087",
"trace_id": "trace-7c04df79"
},
"history": {
"latency_trend_7d": [
120,
118,
116,
117,
119,
118,
118
],
"throughput_trend_7d": [
145.9,
146.1,
146.0,
146.2,
146.0,
146.0,
146.01
]
}
},
{
"provider": "Fireworks AI",
"model": "Llama3.1-8B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0002,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-04-15",
"context_length": 8192,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
"human_readable_url": "https://fireworks.ai/",
"api_docs": "https://readme.fireworks.ai/docs"
},
"infra": {
"schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://fireworks.ai/",
"api_docs": "https://readme.fireworks.ai/docs"
},
"test_metadata": {
"request_hash": "12185644a9b1",
"trace_id": "trace-0e1c8c85"
},
"history": {
"latency_trend_7d": [
180,
178,
176,
177,
179,
178,
178
],
"throughput_trend_7d": [
135.9,
136.1,
136.0,
136.2,
136.0,
136.0,
136.01
]
}
},
{
"provider": "HF GPT OSS 120B (Cerebras)",
"model": "GPT OSS 120B",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0008,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2025-01-07",
"context_length": 128000,
"hardware": "Cerebras WSE",
"api_type": "OpenAI-compatible",
"schema_url": "https://huggingface.co/docs/inference-providers",
"human_readable_url": "https://huggingface.co/openai/gpt-oss-120b",
"api_docs": "https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1901,
"throughput_tokens_per_sec": 8.94
},
"tokens_generated": 17,
"elapsed_seconds": 1.901,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "2e36a7849063",
"trace_id": "trace-f3a209b8"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "Cohere",
"model": "command-r",
"metrics": {
"latency_ms": null,
"throughput_tokens_per_sec": null
},
"error": "Client error '429 Too Many Requests' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429",
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 0.0,
"error_rate_percent": 100.0,
"status": "unhealthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00075,
"estimated_cost_usd": 0.0
},
"model_metadata": {
"release_date": "2024-10-01",
"context_length": 128000,
"hardware": "GPU",
"api_type": "Cohere-native",
"schema_url": "https://docs.cohere.com/reference/chat",
"human_readable_url": "https://docs.cohere.com/",
"api_docs": "https://docs.cohere.com/reference/chat"
},
"infra": {
"schema_url": "https://docs.cohere.com/reference/chat",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.cohere.com/",
"api_docs": "https://docs.cohere.com/reference/chat"
},
"test_metadata": {
"request_hash": "19e28055256a",
"trace_id": "trace-e32b6597"
},
"history": {
"latency_trend_7d": [
320,
318,
316,
317,
319,
318,
318
],
"throughput_trend_7d": [
45.9,
46.1,
46.0,
46.2,
46.0,
46.0,
46.01
]
}
}
],
"rankings": {
"by_latency": [
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 527,
"throughput_tokens_per_sec": 32.25
},
"tokens_generated": 17,
"elapsed_seconds": 0.527,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 3e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "8057b06fdcb1",
"trace_id": "trace-fbad41a6"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
527
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
32.25
]
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1901,
"throughput_tokens_per_sec": 8.94
},
"tokens_generated": 17,
"elapsed_seconds": 1.901,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "2e36a7849063",
"trace_id": "trace-f3a209b8"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1981,
"throughput_tokens_per_sec": 17.16
},
"tokens_generated": 34,
"elapsed_seconds": 1.981,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 0.000102
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "0b3afc2053ee",
"trace_id": "trace-c6afa323"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1981
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
17.16
]
}
},
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 4253,
"throughput_tokens_per_sec": 4.47
},
"tokens_generated": 19,
"elapsed_seconds": 4.253,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.8e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "4e2461bd4304",
"trace_id": "trace-f564834c"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
4253
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
4.47
]
}
}
],
"by_throughput": [
{
"provider": "OpenRouter",
"model": "Mistral",
"metrics": {
"latency_ms": 527,
"throughput_tokens_per_sec": 32.25
},
"tokens_generated": 17,
"elapsed_seconds": 0.527,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00018,
"estimated_cost_usd": 3e-06
},
"model_metadata": {
"release_date": "2024-02-26",
"context_length": 32768,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://openrouter.ai/docs",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://openrouter.ai/docs",
"api_docs": "https://openrouter.ai/docs/api"
},
"test_metadata": {
"request_hash": "8057b06fdcb1",
"trace_id": "trace-fbad41a6"
},
"history": {
"latency_trend_7d": [
652,
650,
648,
649,
651,
650,
527
],
"throughput_trend_7d": [
41.9,
42.1,
42.0,
42.2,
42.0,
42.0,
32.25
]
}
},
{
"provider": "Claude",
"model": "Claude Sonnet 4",
"metrics": {
"latency_ms": 1981,
"throughput_tokens_per_sec": 17.16
},
"tokens_generated": 34,
"elapsed_seconds": 1.981,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.003,
"estimated_cost_usd": 0.000102
},
"model_metadata": {
"release_date": "2024-10-22",
"context_length": 200000,
"hardware": "GPU",
"api_type": "Anthropic-native"
},
"infra": {
"schema_url": "https://docs.anthropic.com/en/api/messages",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://docs.anthropic.com",
"api_docs": "https://docs.anthropic.com/en/api"
},
"test_metadata": {
"request_hash": "0b3afc2053ee",
"trace_id": "trace-c6afa323"
},
"history": {
"latency_trend_7d": [
1152,
1150,
1148,
1149,
1151,
1150,
1981
],
"throughput_trend_7d": [
21.9,
22.1,
22.0,
22.2,
22.0,
22.0,
17.16
]
}
},
{
"provider": "DeepSeek",
"model": "deepseek-chat",
"metrics": {
"latency_ms": 1901,
"throughput_tokens_per_sec": 8.94
},
"tokens_generated": 17,
"elapsed_seconds": 1.901,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.00027,
"estimated_cost_usd": 5e-06
},
"model_metadata": {
"release_date": "2024-12-01",
"context_length": 64000,
"hardware": "GPU",
"api_type": "OpenAI-compatible",
"schema_url": "https://api-docs.deepseek.com/",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"infra": {
"schema_url": "https://api-docs.deepseek.com/",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.deepseek.com/",
"api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
},
"test_metadata": {
"request_hash": "2e36a7849063",
"trace_id": "trace-f3a209b8"
},
"history": {
"latency_trend_7d": [
280,
278,
276,
277,
279,
278,
278
],
"throughput_trend_7d": [
85.9,
86.1,
86.0,
86.2,
86.0,
86.0,
86.01
]
}
},
{
"provider": "OpenAI",
"model": "GPT-4o",
"metrics": {
"latency_ms": 4253,
"throughput_tokens_per_sec": 4.47
},
"tokens_generated": 19,
"elapsed_seconds": 4.253,
"health": {
"cold_start_latency_ms": null,
"warm_start_latency_ms": 750,
"availability_percent": 100.0,
"error_rate_percent": 0.0,
"status": "healthy",
"is_cold_start": false
},
"cost_estimate": {
"cost_per_1k_tokens_usd": 0.0025,
"estimated_cost_usd": 4.8e-05
},
"model_metadata": {
"release_date": "2024-05-13",
"context_length": 128000,
"hardware": "GPU",
"api_type": "OpenAI-compatible"
},
"infra": {
"schema_url": "https://platform.openai.com/docs/api-reference",
"plugin_manifest": "/.well-known/ai-plugin.json",
"human_readable_url": "https://platform.openai.com/docs",
"api_docs": "https://platform.openai.com/docs/api-reference/chat"
},
"test_metadata": {
"request_hash": "4e2461bd4304",
"trace_id": "trace-f564834c"
},
"history": {
"latency_trend_7d": [
752,
750,
748,
749,
751,
750,
4253
],
"throughput_trend_7d": [
31.9,
32.1,
32.0,
32.2,
32.0,
32.0,
4.47
]
}
}
]
},
"fastest_latency": "OpenRouter",
"highest_throughput": "OpenRouter",
"total_tested": 10,
"successful_tests": 4,
"failed_tests": 6,
"performance_summary": {
"best_latency_ms": 527,
"best_throughput_tokens_per_sec": 32.25,
"avg_latency_ms": 2166,
"avg_throughput_tokens_per_sec": 15.7
},
"ai_guidance": {
"best_for_speed": "OpenRouter",
"best_for_throughput": "OpenRouter",
"recommendation": "Use OpenRouter for lowest latency, OpenRouter for highest throughput",
"use_case_guidance": {
"real_time_chat": "Recommended: OpenRouter (lowest latency)",
"bulk_generation": "Recommended: OpenRouter (highest throughput)",
"balanced_workload": "Consider both OpenRouter and OpenRouter"
}
},
"human_readable_summary": "\u26a1 Fastest: OpenRouter (527ms - Moderate) | \ud83d\ude80 Highest throughput: OpenRouter (32.2 tokens/sec - Moderate) | \u2705 4/10 providers responding",
"timestamp": "2026-04-16T01:05:14.308783Z"
}