{"service":"InferenceLatency.com","endpoint":"throughput","description":"Combined latency and throughput benchmarking","providers":[{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":1970,"throughput_tokens_per_sec":10.15},"tokens_generated":20,"elapsed_seconds":1.97,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":5e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"e0cf1e4206a4","trace_id":"trace-0de39823"},"history":{"latency_trend_7d":[752,750,748,749,751,750,1970],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,10.15]}},{"provider":"Groq","model":"Llama3-8B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-04-18","context_length":8192,"hardware":"LPU","api_type":"OpenAI-compatible","schema_url":"https://console.groq.com/docs/openai","human_readable_url":"https://console.groq.com/docs","api_docs":"https://console.groq.com/docs/api-reference"},"infra":{"schema_url":"https://console.groq.com/docs/openai","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://console.groq.com/docs","api_docs":"https://console.groq.com/docs/api-reference"},"test_metadata":{"request_hash":"ea5c95539820","trace_id":"trace-f34d409a"},"history":{"latency_trend_7d":[952,950,948,949,951,950,950],"throughput_trend_7d":[121.9,122.1,122.0,122.2,122.0,122.0,122.01]}},{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1497,"throughput_tokens_per_sec":16.69},"tokens_generated":25,"elapsed_seconds":1.497,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":7.5e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"8e3383e876d0","trace_id":"trace-addaec92"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1497],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,16.69]}},{"provider":"OpenRouter","model":"Mistral","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '404 Not Found' for url 'https://openrouter.ai/api/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00018,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-02-26","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://openrouter.ai/docs","human_readable_url":"https://openrouter.ai/docs","api_docs":"https://openrouter.ai/docs/api"},"infra":{"schema_url":"https://openrouter.ai/docs","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://openrouter.ai/docs","api_docs":"https://openrouter.ai/docs/api"},"test_metadata":{"request_hash":"613d94d0d357","trace_id":"trace-aabe6a9b"},"history":{"latency_trend_7d":[652,650,648,649,651,650,650],"throughput_trend_7d":[41.9,42.1,42.0,42.2,42.0,42.0,42.01]}},{"provider":"Google Gemini","model":"Gemini-2.0-Flash","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ModelService.ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00075,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-12-11","context_length":2000000,"hardware":"TPU","api_type":"Google-native","schema_url":"https://ai.google.dev/api","human_readable_url":"https://ai.google.dev/docs","api_docs":"https://ai.google.dev/api/generate-content"},"infra":{"schema_url":"https://ai.google.dev/api","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://ai.google.dev/docs","api_docs":"https://ai.google.dev/api/generate-content"},"test_metadata":{"request_hash":"1c90ec2321e5","trace_id":"trace-31763732"},"history":{"latency_trend_7d":[352,350,348,349,351,350,350],"throughput_trend_7d":[91.9,92.1,92.0,92.2,92.0,92.0,92.01]}},{"provider":"Together AI","model":"Llama3.1-8B-Turbo","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '400 Bad Request' for url 'https://api.together.xyz/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0002,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-01-15","context_length":32768,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://docs.together.ai/docs/inference-models","human_readable_url":"https://docs.together.ai","api_docs":"https://docs.together.ai/reference/chat-completions"},"infra":{"schema_url":"https://docs.together.ai/docs/inference-models","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.together.ai","api_docs":"https://docs.together.ai/reference/chat-completions"},"test_metadata":{"request_hash":"461c977be1e5","trace_id":"trace-ece48de7"},"history":{"latency_trend_7d":[120,118,116,117,119,118,118],"throughput_trend_7d":[145.9,146.1,146.0,146.2,146.0,146.0,146.01]}},{"provider":"Fireworks AI","model":"Llama3.1-8B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0002,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-04-15","context_length":8192,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://readme.fireworks.ai/reference/createchatcompletion","human_readable_url":"https://fireworks.ai/","api_docs":"https://readme.fireworks.ai/docs"},"infra":{"schema_url":"https://readme.fireworks.ai/reference/createchatcompletion","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://fireworks.ai/","api_docs":"https://readme.fireworks.ai/docs"},"test_metadata":{"request_hash":"f60d654659aa","trace_id":"trace-e53a02b4"},"history":{"latency_trend_7d":[180,178,176,177,179,178,178],"throughput_trend_7d":[135.9,136.1,136.0,136.2,136.0,136.0,136.01]}},{"provider":"HF GPT OSS 120B (Cerebras)","model":"GPT OSS 120B","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0008,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2025-01-07","context_length":128000,"hardware":"Cerebras WSE","api_type":"OpenAI-compatible","schema_url":"https://huggingface.co/docs/inference-providers","human_readable_url":"https://huggingface.co/openai/gpt-oss-120b","api_docs":"https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":1654,"throughput_tokens_per_sec":9.68},"tokens_generated":16,"elapsed_seconds":1.654,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":4e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"eae8f2d26f1a","trace_id":"trace-e182f142"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}},{"provider":"Cohere","model":"command-r","metrics":{"latency_ms":null,"throughput_tokens_per_sec":null},"error":"Client error '404 Not Found' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404","health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":0.0,"error_rate_percent":100.0,"status":"unhealthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00075,"estimated_cost_usd":0.0},"model_metadata":{"release_date":"2024-10-01","context_length":128000,"hardware":"GPU","api_type":"Cohere-native","schema_url":"https://docs.cohere.com/reference/chat","human_readable_url":"https://docs.cohere.com/","api_docs":"https://docs.cohere.com/reference/chat"},"infra":{"schema_url":"https://docs.cohere.com/reference/chat","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.cohere.com/","api_docs":"https://docs.cohere.com/reference/chat"},"test_metadata":{"request_hash":"845760df04e3","trace_id":"trace-5077b326"},"history":{"latency_trend_7d":[320,318,316,317,319,318,318],"throughput_trend_7d":[45.9,46.1,46.0,46.2,46.0,46.0,46.01]}}],"rankings":{"by_latency":[{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1497,"throughput_tokens_per_sec":16.69},"tokens_generated":25,"elapsed_seconds":1.497,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":7.5e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"8e3383e876d0","trace_id":"trace-addaec92"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1497],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,16.69]}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":1654,"throughput_tokens_per_sec":9.68},"tokens_generated":16,"elapsed_seconds":1.654,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":4e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"eae8f2d26f1a","trace_id":"trace-e182f142"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}},{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":1970,"throughput_tokens_per_sec":10.15},"tokens_generated":20,"elapsed_seconds":1.97,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":5e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"e0cf1e4206a4","trace_id":"trace-0de39823"},"history":{"latency_trend_7d":[752,750,748,749,751,750,1970],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,10.15]}}],"by_throughput":[{"provider":"Claude","model":"Claude Sonnet 4","metrics":{"latency_ms":1497,"throughput_tokens_per_sec":16.69},"tokens_generated":25,"elapsed_seconds":1.497,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.003,"estimated_cost_usd":7.5e-05},"model_metadata":{"release_date":"2024-10-22","context_length":200000,"hardware":"GPU","api_type":"Anthropic-native"},"infra":{"schema_url":"https://docs.anthropic.com/en/api/messages","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://docs.anthropic.com","api_docs":"https://docs.anthropic.com/en/api"},"test_metadata":{"request_hash":"8e3383e876d0","trace_id":"trace-addaec92"},"history":{"latency_trend_7d":[1152,1150,1148,1149,1151,1150,1497],"throughput_trend_7d":[21.9,22.1,22.0,22.2,22.0,22.0,16.69]}},{"provider":"OpenAI","model":"GPT-4o","metrics":{"latency_ms":1970,"throughput_tokens_per_sec":10.15},"tokens_generated":20,"elapsed_seconds":1.97,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.0025,"estimated_cost_usd":5e-05},"model_metadata":{"release_date":"2024-05-13","context_length":128000,"hardware":"GPU","api_type":"OpenAI-compatible"},"infra":{"schema_url":"https://platform.openai.com/docs/api-reference","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.openai.com/docs","api_docs":"https://platform.openai.com/docs/api-reference/chat"},"test_metadata":{"request_hash":"e0cf1e4206a4","trace_id":"trace-0de39823"},"history":{"latency_trend_7d":[752,750,748,749,751,750,1970],"throughput_trend_7d":[31.9,32.1,32.0,32.2,32.0,32.0,10.15]}},{"provider":"DeepSeek","model":"deepseek-chat","metrics":{"latency_ms":1654,"throughput_tokens_per_sec":9.68},"tokens_generated":16,"elapsed_seconds":1.654,"health":{"cold_start_latency_ms":null,"warm_start_latency_ms":750,"availability_percent":100.0,"error_rate_percent":0.0,"status":"healthy","is_cold_start":false},"cost_estimate":{"cost_per_1k_tokens_usd":0.00027,"estimated_cost_usd":4e-06},"model_metadata":{"release_date":"2024-12-01","context_length":64000,"hardware":"GPU","api_type":"OpenAI-compatible","schema_url":"https://api-docs.deepseek.com/","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"infra":{"schema_url":"https://api-docs.deepseek.com/","plugin_manifest":"/.well-known/ai-plugin.json","human_readable_url":"https://platform.deepseek.com/","api_docs":"https://api-docs.deepseek.com/api/create-chat-completion"},"test_metadata":{"request_hash":"eae8f2d26f1a","trace_id":"trace-e182f142"},"history":{"latency_trend_7d":[280,278,276,277,279,278,278],"throughput_trend_7d":[85.9,86.1,86.0,86.2,86.0,86.0,86.01]}}]},"fastest_latency":"Claude","highest_throughput":"Claude","total_tested":10,"successful_tests":3,"failed_tests":7,"performance_summary":{"best_latency_ms":1497,"best_throughput_tokens_per_sec":16.69,"avg_latency_ms":1707,"avg_throughput_tokens_per_sec":12.17},"ai_guidance":{"best_for_speed":"Claude","best_for_throughput":"Claude","recommendation":"Use Claude for lowest latency, Claude for highest throughput","use_case_guidance":{"real_time_chat":"Recommended: Claude (lowest latency)","bulk_generation":"Recommended: Claude (highest throughput)","balanced_workload":"Consider both Claude and Claude"}},"human_readable_summary":"⚡ Fastest: Claude (1497ms - Moderate) | 🚀 Highest throughput: Claude (16.7 tokens/sec - Standard) | ✅ 3/10 providers responding","timestamp":"2026-05-31T17:24:07.344653Z"}