⚡ Throughput Analysis Results

← Back to Home

⚡ Fastest: OpenRouter (527ms - Moderate) | 🚀 Highest throughput: OpenRouter (32.2 tokens/sec - Moderate) | ✅ 4/10 providers responding
Full JSON Response (for developers & agents)
{
  "service": "InferenceLatency.com",
  "endpoint": "throughput",
  "description": "Combined latency and throughput benchmarking",
  "providers": [
    {
      "provider": "OpenAI",
      "model": "GPT-4o",
      "metrics": {
        "latency_ms": 4253,
        "throughput_tokens_per_sec": 4.47
      },
      "tokens_generated": 19,
      "elapsed_seconds": 4.253,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0025,
        "estimated_cost_usd": 4.8e-05
      },
      "model_metadata": {
        "release_date": "2024-05-13",
        "context_length": 128000,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible"
      },
      "infra": {
        "schema_url": "https://platform.openai.com/docs/api-reference",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://platform.openai.com/docs",
        "api_docs": "https://platform.openai.com/docs/api-reference/chat"
      },
      "test_metadata": {
        "request_hash": "4e2461bd4304",
        "trace_id": "trace-f564834c"
      },
      "history": {
        "latency_trend_7d": [
          752,
          750,
          748,
          749,
          751,
          750,
          4253
        ],
        "throughput_trend_7d": [
          31.9,
          32.1,
          32.0,
          32.2,
          32.0,
          32.0,
          4.47
        ]
      }
    },
    {
      "provider": "Groq",
      "model": "Llama3-8B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00027,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-04-18",
        "context_length": 8192,
        "hardware": "LPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://console.groq.com/docs/openai",
        "human_readable_url": "https://console.groq.com/docs",
        "api_docs": "https://console.groq.com/docs/api-reference"
      },
      "infra": {
        "schema_url": "https://console.groq.com/docs/openai",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://console.groq.com/docs",
        "api_docs": "https://console.groq.com/docs/api-reference"
      },
      "test_metadata": {
        "request_hash": "e2485d537c8c",
        "trace_id": "trace-b8b865cf"
      },
      "history": {
        "latency_trend_7d": [
          952,
          950,
          948,
          949,
          951,
          950,
          950
        ],
        "throughput_trend_7d": [
          121.9,
          122.1,
          122.0,
          122.2,
          122.0,
          122.0,
          122.01
        ]
      }
    },
    {
      "provider": "Claude",
      "model": "Claude Sonnet 4",
      "metrics": {
        "latency_ms": 1981,
        "throughput_tokens_per_sec": 17.16
      },
      "tokens_generated": 34,
      "elapsed_seconds": 1.981,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.003,
        "estimated_cost_usd": 0.000102
      },
      "model_metadata": {
        "release_date": "2024-10-22",
        "context_length": 200000,
        "hardware": "GPU",
        "api_type": "Anthropic-native"
      },
      "infra": {
        "schema_url": "https://docs.anthropic.com/en/api/messages",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.anthropic.com",
        "api_docs": "https://docs.anthropic.com/en/api"
      },
      "test_metadata": {
        "request_hash": "0b3afc2053ee",
        "trace_id": "trace-c6afa323"
      },
      "history": {
        "latency_trend_7d": [
          1152,
          1150,
          1148,
          1149,
          1151,
          1150,
          1981
        ],
        "throughput_trend_7d": [
          21.9,
          22.1,
          22.0,
          22.2,
          22.0,
          22.0,
          17.16
        ]
      }
    },
    {
      "provider": "OpenRouter",
      "model": "Mistral",
      "metrics": {
        "latency_ms": 527,
        "throughput_tokens_per_sec": 32.25
      },
      "tokens_generated": 17,
      "elapsed_seconds": 0.527,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00018,
        "estimated_cost_usd": 3e-06
      },
      "model_metadata": {
        "release_date": "2024-02-26",
        "context_length": 32768,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible"
      },
      "infra": {
        "schema_url": "https://openrouter.ai/docs",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://openrouter.ai/docs",
        "api_docs": "https://openrouter.ai/docs/api"
      },
      "test_metadata": {
        "request_hash": "8057b06fdcb1",
        "trace_id": "trace-fbad41a6"
      },
      "history": {
        "latency_trend_7d": [
          652,
          650,
          648,
          649,
          651,
          650,
          527
        ],
        "throughput_trend_7d": [
          41.9,
          42.1,
          42.0,
          42.2,
          42.0,
          42.0,
          32.25
        ]
      }
    },
    {
      "provider": "Google Gemini",
      "model": "Gemini-2.0-Flash",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00075,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-12-11",
        "context_length": 2000000,
        "hardware": "TPU",
        "api_type": "Google-native",
        "schema_url": "https://ai.google.dev/api",
        "human_readable_url": "https://ai.google.dev/docs",
        "api_docs": "https://ai.google.dev/api/generate-content"
      },
      "infra": {
        "schema_url": "https://ai.google.dev/api",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://ai.google.dev/docs",
        "api_docs": "https://ai.google.dev/api/generate-content"
      },
      "test_metadata": {
        "request_hash": "d0a8767fd2b3",
        "trace_id": "trace-7216ed50"
      },
      "history": {
        "latency_trend_7d": [
          352,
          350,
          348,
          349,
          351,
          350,
          350
        ],
        "throughput_trend_7d": [
          91.9,
          92.1,
          92.0,
          92.2,
          92.0,
          92.0,
          92.01
        ]
      }
    },
    {
      "provider": "Together AI",
      "model": "Llama3.1-8B-Turbo",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '400 Bad Request' for url 'https://api.together.xyz/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0002,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-01-15",
        "context_length": 32768,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://docs.together.ai/docs/inference-models",
        "human_readable_url": "https://docs.together.ai",
        "api_docs": "https://docs.together.ai/reference/chat-completions"
      },
      "infra": {
        "schema_url": "https://docs.together.ai/docs/inference-models",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.together.ai",
        "api_docs": "https://docs.together.ai/reference/chat-completions"
      },
      "test_metadata": {
        "request_hash": "64662ceee087",
        "trace_id": "trace-7c04df79"
      },
      "history": {
        "latency_trend_7d": [
          120,
          118,
          116,
          117,
          119,
          118,
          118
        ],
        "throughput_trend_7d": [
          145.9,
          146.1,
          146.0,
          146.2,
          146.0,
          146.0,
          146.01
        ]
      }
    },
    {
      "provider": "Fireworks AI",
      "model": "Llama3.1-8B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0002,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-04-15",
        "context_length": 8192,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
        "human_readable_url": "https://fireworks.ai/",
        "api_docs": "https://readme.fireworks.ai/docs"
      },
      "infra": {
        "schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://fireworks.ai/",
        "api_docs": "https://readme.fireworks.ai/docs"
      },
      "test_metadata": {
        "request_hash": "12185644a9b1",
        "trace_id": "trace-0e1c8c85"
      },
      "history": {
        "latency_trend_7d": [
          180,
          178,
          176,
          177,
          179,
          178,
          178
        ],
        "throughput_trend_7d": [
          135.9,
          136.1,
          136.0,
          136.2,
          136.0,
          136.0,
          136.01
        ]
      }
    },
    {
      "provider": "HF GPT OSS 120B (Cerebras)",
      "model": "GPT OSS 120B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0008,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2025-01-07",
        "context_length": 128000,
        "hardware": "Cerebras WSE",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://huggingface.co/docs/inference-providers",
        "human_readable_url": "https://huggingface.co/openai/gpt-oss-120b",
        "api_docs": "https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"
      }
    },
    {
      "provider": "DeepSeek",
      "model": "deepseek-chat",
      "metrics": {
        "latency_ms": 1901,
        "throughput_tokens_per_sec": 8.94
      },
      "tokens_generated": 17,
      "elapsed_seconds": 1.901,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00027,
        "estimated_cost_usd": 5e-06
      },
      "model_metadata": {
        "release_date": "2024-12-01",
        "context_length": 64000,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://api-docs.deepseek.com/",
        "human_readable_url": "https://platform.deepseek.com/",
        "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
      },
      "infra": {
        "schema_url": "https://api-docs.deepseek.com/",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://platform.deepseek.com/",
        "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
      },
      "test_metadata": {
        "request_hash": "2e36a7849063",
        "trace_id": "trace-f3a209b8"
      },
      "history": {
        "latency_trend_7d": [
          280,
          278,
          276,
          277,
          279,
          278,
          278
        ],
        "throughput_trend_7d": [
          85.9,
          86.1,
          86.0,
          86.2,
          86.0,
          86.0,
          86.01
        ]
      }
    },
    {
      "provider": "Cohere",
      "model": "command-r",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '429 Too Many Requests' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/429",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00075,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-10-01",
        "context_length": 128000,
        "hardware": "GPU",
        "api_type": "Cohere-native",
        "schema_url": "https://docs.cohere.com/reference/chat",
        "human_readable_url": "https://docs.cohere.com/",
        "api_docs": "https://docs.cohere.com/reference/chat"
      },
      "infra": {
        "schema_url": "https://docs.cohere.com/reference/chat",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.cohere.com/",
        "api_docs": "https://docs.cohere.com/reference/chat"
      },
      "test_metadata": {
        "request_hash": "19e28055256a",
        "trace_id": "trace-e32b6597"
      },
      "history": {
        "latency_trend_7d": [
          320,
          318,
          316,
          317,
          319,
          318,
          318
        ],
        "throughput_trend_7d": [
          45.9,
          46.1,
          46.0,
          46.2,
          46.0,
          46.0,
          46.01
        ]
      }
    }
  ],
  "rankings": {
    "by_latency": [
      {
        "provider": "OpenRouter",
        "model": "Mistral",
        "metrics": {
          "latency_ms": 527,
          "throughput_tokens_per_sec": 32.25
        },
        "tokens_generated": 17,
        "elapsed_seconds": 0.527,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00018,
          "estimated_cost_usd": 3e-06
        },
        "model_metadata": {
          "release_date": "2024-02-26",
          "context_length": 32768,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://openrouter.ai/docs",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://openrouter.ai/docs",
          "api_docs": "https://openrouter.ai/docs/api"
        },
        "test_metadata": {
          "request_hash": "8057b06fdcb1",
          "trace_id": "trace-fbad41a6"
        },
        "history": {
          "latency_trend_7d": [
            652,
            650,
            648,
            649,
            651,
            650,
            527
          ],
          "throughput_trend_7d": [
            41.9,
            42.1,
            42.0,
            42.2,
            42.0,
            42.0,
            32.25
          ]
        }
      },
      {
        "provider": "DeepSeek",
        "model": "deepseek-chat",
        "metrics": {
          "latency_ms": 1901,
          "throughput_tokens_per_sec": 8.94
        },
        "tokens_generated": 17,
        "elapsed_seconds": 1.901,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00027,
          "estimated_cost_usd": 5e-06
        },
        "model_metadata": {
          "release_date": "2024-12-01",
          "context_length": 64000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible",
          "schema_url": "https://api-docs.deepseek.com/",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "infra": {
          "schema_url": "https://api-docs.deepseek.com/",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "test_metadata": {
          "request_hash": "2e36a7849063",
          "trace_id": "trace-f3a209b8"
        },
        "history": {
          "latency_trend_7d": [
            280,
            278,
            276,
            277,
            279,
            278,
            278
          ],
          "throughput_trend_7d": [
            85.9,
            86.1,
            86.0,
            86.2,
            86.0,
            86.0,
            86.01
          ]
        }
      },
      {
        "provider": "Claude",
        "model": "Claude Sonnet 4",
        "metrics": {
          "latency_ms": 1981,
          "throughput_tokens_per_sec": 17.16
        },
        "tokens_generated": 34,
        "elapsed_seconds": 1.981,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.003,
          "estimated_cost_usd": 0.000102
        },
        "model_metadata": {
          "release_date": "2024-10-22",
          "context_length": 200000,
          "hardware": "GPU",
          "api_type": "Anthropic-native"
        },
        "infra": {
          "schema_url": "https://docs.anthropic.com/en/api/messages",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://docs.anthropic.com",
          "api_docs": "https://docs.anthropic.com/en/api"
        },
        "test_metadata": {
          "request_hash": "0b3afc2053ee",
          "trace_id": "trace-c6afa323"
        },
        "history": {
          "latency_trend_7d": [
            1152,
            1150,
            1148,
            1149,
            1151,
            1150,
            1981
          ],
          "throughput_trend_7d": [
            21.9,
            22.1,
            22.0,
            22.2,
            22.0,
            22.0,
            17.16
          ]
        }
      },
      {
        "provider": "OpenAI",
        "model": "GPT-4o",
        "metrics": {
          "latency_ms": 4253,
          "throughput_tokens_per_sec": 4.47
        },
        "tokens_generated": 19,
        "elapsed_seconds": 4.253,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.0025,
          "estimated_cost_usd": 4.8e-05
        },
        "model_metadata": {
          "release_date": "2024-05-13",
          "context_length": 128000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://platform.openai.com/docs/api-reference",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.openai.com/docs",
          "api_docs": "https://platform.openai.com/docs/api-reference/chat"
        },
        "test_metadata": {
          "request_hash": "4e2461bd4304",
          "trace_id": "trace-f564834c"
        },
        "history": {
          "latency_trend_7d": [
            752,
            750,
            748,
            749,
            751,
            750,
            4253
          ],
          "throughput_trend_7d": [
            31.9,
            32.1,
            32.0,
            32.2,
            32.0,
            32.0,
            4.47
          ]
        }
      }
    ],
    "by_throughput": [
      {
        "provider": "OpenRouter",
        "model": "Mistral",
        "metrics": {
          "latency_ms": 527,
          "throughput_tokens_per_sec": 32.25
        },
        "tokens_generated": 17,
        "elapsed_seconds": 0.527,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00018,
          "estimated_cost_usd": 3e-06
        },
        "model_metadata": {
          "release_date": "2024-02-26",
          "context_length": 32768,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://openrouter.ai/docs",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://openrouter.ai/docs",
          "api_docs": "https://openrouter.ai/docs/api"
        },
        "test_metadata": {
          "request_hash": "8057b06fdcb1",
          "trace_id": "trace-fbad41a6"
        },
        "history": {
          "latency_trend_7d": [
            652,
            650,
            648,
            649,
            651,
            650,
            527
          ],
          "throughput_trend_7d": [
            41.9,
            42.1,
            42.0,
            42.2,
            42.0,
            42.0,
            32.25
          ]
        }
      },
      {
        "provider": "Claude",
        "model": "Claude Sonnet 4",
        "metrics": {
          "latency_ms": 1981,
          "throughput_tokens_per_sec": 17.16
        },
        "tokens_generated": 34,
        "elapsed_seconds": 1.981,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.003,
          "estimated_cost_usd": 0.000102
        },
        "model_metadata": {
          "release_date": "2024-10-22",
          "context_length": 200000,
          "hardware": "GPU",
          "api_type": "Anthropic-native"
        },
        "infra": {
          "schema_url": "https://docs.anthropic.com/en/api/messages",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://docs.anthropic.com",
          "api_docs": "https://docs.anthropic.com/en/api"
        },
        "test_metadata": {
          "request_hash": "0b3afc2053ee",
          "trace_id": "trace-c6afa323"
        },
        "history": {
          "latency_trend_7d": [
            1152,
            1150,
            1148,
            1149,
            1151,
            1150,
            1981
          ],
          "throughput_trend_7d": [
            21.9,
            22.1,
            22.0,
            22.2,
            22.0,
            22.0,
            17.16
          ]
        }
      },
      {
        "provider": "DeepSeek",
        "model": "deepseek-chat",
        "metrics": {
          "latency_ms": 1901,
          "throughput_tokens_per_sec": 8.94
        },
        "tokens_generated": 17,
        "elapsed_seconds": 1.901,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00027,
          "estimated_cost_usd": 5e-06
        },
        "model_metadata": {
          "release_date": "2024-12-01",
          "context_length": 64000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible",
          "schema_url": "https://api-docs.deepseek.com/",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "infra": {
          "schema_url": "https://api-docs.deepseek.com/",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "test_metadata": {
          "request_hash": "2e36a7849063",
          "trace_id": "trace-f3a209b8"
        },
        "history": {
          "latency_trend_7d": [
            280,
            278,
            276,
            277,
            279,
            278,
            278
          ],
          "throughput_trend_7d": [
            85.9,
            86.1,
            86.0,
            86.2,
            86.0,
            86.0,
            86.01
          ]
        }
      },
      {
        "provider": "OpenAI",
        "model": "GPT-4o",
        "metrics": {
          "latency_ms": 4253,
          "throughput_tokens_per_sec": 4.47
        },
        "tokens_generated": 19,
        "elapsed_seconds": 4.253,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.0025,
          "estimated_cost_usd": 4.8e-05
        },
        "model_metadata": {
          "release_date": "2024-05-13",
          "context_length": 128000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://platform.openai.com/docs/api-reference",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.openai.com/docs",
          "api_docs": "https://platform.openai.com/docs/api-reference/chat"
        },
        "test_metadata": {
          "request_hash": "4e2461bd4304",
          "trace_id": "trace-f564834c"
        },
        "history": {
          "latency_trend_7d": [
            752,
            750,
            748,
            749,
            751,
            750,
            4253
          ],
          "throughput_trend_7d": [
            31.9,
            32.1,
            32.0,
            32.2,
            32.0,
            32.0,
            4.47
          ]
        }
      }
    ]
  },
  "fastest_latency": "OpenRouter",
  "highest_throughput": "OpenRouter",
  "total_tested": 10,
  "successful_tests": 4,
  "failed_tests": 6,
  "performance_summary": {
    "best_latency_ms": 527,
    "best_throughput_tokens_per_sec": 32.25,
    "avg_latency_ms": 2166,
    "avg_throughput_tokens_per_sec": 15.7
  },
  "ai_guidance": {
    "best_for_speed": "OpenRouter",
    "best_for_throughput": "OpenRouter",
    "recommendation": "Use OpenRouter for lowest latency, OpenRouter for highest throughput",
    "use_case_guidance": {
      "real_time_chat": "Recommended: OpenRouter (lowest latency)",
      "bulk_generation": "Recommended: OpenRouter (highest throughput)",
      "balanced_workload": "Consider both OpenRouter and OpenRouter"
    }
  },
  "human_readable_summary": "\u26a1 Fastest: OpenRouter (527ms - Moderate) | \ud83d\ude80 Highest throughput: OpenRouter (32.2 tokens/sec - Moderate) | \u2705 4/10 providers responding",
  "timestamp": "2026-04-16T01:05:14.308783Z"
}