{"service":"InferenceLatency.com","endpoint":"throughput","description":"Combined latency and throughput benchmarking","providers":[{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":874,"throughput_tokens_per_sec":18.31},"tokens_generated":16,"elapsed_seconds":0.874,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":4e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"0966d0113739","trace_id":"trace-fdaaef15"},"history":{"latency_trend_7d":[752,750,748,749,751,750,874],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,18.31]}},{"provider":"Groq","model":"Llama3-8B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-04-18","context_length":8192,"hardware":"LPU","api_type":"OpenAI-compatible","schema_url":"https://console.groq.com/docs/openai","human_readable_url":"https://console.groq.com/docs","api_docs":"https://console.groq.com/docs/api-reference"},"infra":{"schema_url":"https://console.groq.com/docs/openai","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://console.groq.com/docs","api_docs":"https://console.groq.com/docs/api-reference"},"test_metadata":{"request_hash":"b3f010e934bd","trace_id":"trace-a1c5b241"},"history":{"latency_trend_7d":[952,950,948,949,951,950,950],"throughput_trend_7d":[121.9,122.1,122.0,122.2,122.0,122.0,122.01]}},{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1786,"throughput_tokens_per_sec":18.48},"tokens_generated":33,"elapsed_seconds":1.786,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":9.9e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"72f0d8806f3a","trace_id":"trace-ae20df9a"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1786],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,18.48]}},{"provider":"OpenRouter","model":"Mistral","metrics":{"latency_ms":405,"throughput_tokens_per_sec":41.96},"tokens_generated":17,"elapsed_seconds":0.405,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00018,"estimated_cost_usd":3e-06},"model_metadata":{"release_date":"2024-02-26","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://openrouter.ai/docs","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://openrouter.ai/docs","api_docs":"https://openrouter.ai/docs/api"},"test_metadata":{"request_hash":"2c9bea4b99a8","trace_id":"trace-5ed18973"},"history":{"latency_trend_7d":[652,650,648,649,651,650,405],"throughput_trend_7d":[41.9,42.1,42.0,42.2,42.0,42.0,41.96]}},{"provider":"Google Gemini","model":"Gemini-2.0-Flash","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00075,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-12-11","context_length":2000000,"hardware":"TPU","api_type":"Google-native","schema_url":"https://ai.google.dev/api","human_readable_url":"https://ai.google.dev/docs","api_docs":"https://ai.google.dev/api/generate-content"},"infra":{"schema_url":"https://ai.google.dev/api","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://ai.google.dev/docs","api_docs":"https://ai.google.dev/api/generate-content"},"test_metadata":{"request_hash":"050ef5361706","trace_id":"trace-5cdf959d"},"history":{"latency_trend_7d":[352,350,348,349,351,350,350],"throughput_trend_7d":[91.9,92.1,92.0,92.2,92.0,92.0,92.01]}},{"provider":"Together AI","model":"Llama3.1-8B-Turbo","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '400 Bad Request' for url 'https://api.together.xyz/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0002,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-01-15","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://docs.together.ai/docs/inference-models","human_readable_url":"https://docs.together.ai","api_docs":"https://docs.together.ai/reference/chat-completions"},"infra":{"schema_url":"https://docs.together.ai/docs/inference-models","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.together.ai","api_docs":"https://docs.together.ai/reference/chat-completions"},"test_metadata":{"request_hash":"782c5d6f555a","trace_id":"trace-2f8eb8c7"},"history":{"latency_trend_7d":[120,118,116,117,119,118,118],"throughput_trend_7d":[145.9,146.1,146.0,146.2,146.0,146.0,146.01]}},{"provider":"Fireworks AI","model":"Llama3.1-8B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0002,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-04-15","context_length":8192,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://readme.fireworks.ai/reference/createchatcompletion","human_readable_url":"https://fireworks.ai/","api_docs":"https://readme.fireworks.ai/docs"},"infra":{"schema_url":"https://readme.fireworks.ai/reference/createchatcompletion","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://fireworks.ai/","api_docs":"https://readme.fireworks.ai/docs"},"test_metadata":{"request_hash":"6a1e3a23a2c3","trace_id":"trace-bed20e2a"},"history":{"latency_trend_7d":[180,178,176,177,179,178,178],"throughput_trend_7d":[135.9,136.1,136.0,136.2,136.0,136.0,136.01]}},{"provider":"HF GPT OSS 120B (Cerebras)","model":"GPT OSS 120B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0008,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2025-01-07","context_length":128000,"hardware":"Cerebras WSE","api_type":"OpenAI-compatible","schema_url":"https://huggingface.co/docs/inference-providers","human_readable_url":"https://huggingface.co/openai/gpt-oss-120b","api_docs":"https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":3977,"throughput_tokens_per_sec":4.27},"tokens_generated":17,"elapsed_seconds":3.977,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":5e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"de68ac14f80e","trace_id":"trace-b47d96fb"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}},{"provider":"Cohere","model":"command-r","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '429 Too Many Requests' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00075,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-10-01","context_length":128000,"hardware":"GPU","api_type":"Cohere-native","schema_url":"https://docs.cohere.com/reference/chat","human_readable_url":"https://docs.cohere.com/","api_docs":"https://docs.cohere.com/reference/chat"},"infra":{"schema_url":"https://docs.cohere.com/reference/chat","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.cohere.com/","api_docs":"https://docs.cohere.com/reference/chat"},"test_metadata":{"request_hash":"6f8f5a1c29c4","trace_id":"trace-509d25ec"},"history":{"latency_trend_7d":[320,318,316,317,319,318,318],"throughput_trend_7d":[45.9,46.1,46.0,46.2,46.0,46.0,46.01]}}],"rankings":{"by_latency":[{"provider":"OpenRouter","model":"Mistral","metrics":{"latency_ms":405,"throughput_tokens_per_sec":41.96},"tokens_generated":17,"elapsed_seconds":0.405,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00018,"estimated_cost_usd":3e-06},"model_metadata":{"release_date":"2024-02-26","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://openrouter.ai/docs","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://openrouter.ai/docs","api_docs":"https://openrouter.ai/docs/api"},"test_metadata":{"request_hash":"2c9bea4b99a8","trace_id":"trace-5ed18973"},"history":{"latency_trend_7d":[652,650,648,649,651,650,405],"throughput_trend_7d":[41.9,42.1,42.0,42.2,42.0,42.0,41.96]}},{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":874,"throughput_tokens_per_sec":18.31},"tokens_generated":16,"elapsed_seconds":0.874,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":4e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"0966d0113739","trace_id":"trace-fdaaef15"},"history":{"latency_trend_7d":[752,750,748,749,751,750,874],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,18.31]}},{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1786,"throughput_tokens_per_sec":18.48},"tokens_generated":33,"elapsed_seconds":1.786,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":9.9e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"72f0d8806f3a","trace_id":"trace-ae20df9a"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1786],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,18.48]}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":3977,"throughput_tokens_per_sec":4.27},"tokens_generated":17,"elapsed_seconds":3.977,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":5e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"de68ac14f80e","trace_id":"trace-b47d96fb"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}}],"by_throughput":[{"provider":"OpenRouter","model":"Mistral","metrics":{"latency_ms":405,"throughput_tokens_per_sec":41.96},"tokens_generated":17,"elapsed_seconds":0.405,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00018,"estimated_cost_usd":3e-06},"model_metadata":{"release_date":"2024-02-26","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://openrouter.ai/docs","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://openrouter.ai/docs","api_docs":"https://openrouter.ai/docs/api"},"test_metadata":{"request_hash":"2c9bea4b99a8","trace_id":"trace-5ed18973"},"history":{"latency_trend_7d":[652,650,648,649,651,650,405],"throughput_trend_7d":[41.9,42.1,42.0,42.2,42.0,42.0,41.96]}},{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1786,"throughput_tokens_per_sec":18.48},"tokens_generated":33,"elapsed_seconds":1.786,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":9.9e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"72f0d8806f3a","trace_id":"trace-ae20df9a"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1786],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,18.48]}},{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":874,"throughput_tokens_per_sec":18.31},"tokens_generated":16,"elapsed_seconds":0.874,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":4e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"0966d0113739","trace_id":"trace-fdaaef15"},"history":{"latency_trend_7d":[752,750,748,749,751,750,874],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,18.31]}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":3977,"throughput_tokens_per_sec":4.27},"tokens_generated":17,"elapsed_seconds":3.977,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":5e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"de68ac14f80e","trace_id":"trace-b47d96fb"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}}]},"fastest_latency":"OpenRouter","highest_throughput":"OpenRouter","total_tested":10,"successful_tests":4,"failed_tests":6,"performance_summary":{"best_latency_ms":405,"best_throughput_tokens_per_sec":41.96,"avg_latency_ms":1760,"avg_throughput_tokens_per_sec":20.75},"ai_guidance":{"best_for_speed":"OpenRouter","best_for_throughput":"OpenRouter","recommendation":"Use OpenRouter for lowest latency, OpenRouter for highest throughput","use_case_guidance":{"real_time_chat":"Recommended: OpenRouter (lowest latency)","bulk_generation":"Recommended: OpenRouter (highest throughput)","balanced_workload":"Consider both OpenRouter and OpenRouter"}},"human_readable_summary":"⚡ Fastest: OpenRouter (405ms - Good) | 🚀 Highest throughput: OpenRouter (42.0 tokens/sec - Moderate) | ✅ 4/10 providers responding","timestamp":"2026-04-16T01:05:21.833691Z"}