⚡ Throughput Analysis Results

← Back to Home

⚡ Fastest: DeepSeek (1031ms - Moderate) | 🚀 Highest throughput: Claude (20.4 tokens/sec - Moderate) | ✅ 3/10 providers responding
Full JSON Response (for developers & agents)
{
  "service": "InferenceLatency.com",
  "endpoint": "throughput",
  "description": "Combined latency and throughput benchmarking",
  "providers": [
    {
      "provider": "OpenAI",
      "model": "GPT-4o",
      "metrics": {
        "latency_ms": 1414,
        "throughput_tokens_per_sec": 12.02
      },
      "tokens_generated": 17,
      "elapsed_seconds": 1.414,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0025,
        "estimated_cost_usd": 4.3e-05
      },
      "model_metadata": {
        "release_date": "2024-05-13",
        "context_length": 128000,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible"
      },
      "infra": {
        "schema_url": "https://platform.openai.com/docs/api-reference",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://platform.openai.com/docs",
        "api_docs": "https://platform.openai.com/docs/api-reference/chat"
      },
      "test_metadata": {
        "request_hash": "28de04c6483e",
        "trace_id": "trace-dd3f15d7"
      },
      "history": {
        "latency_trend_7d": [
          752,
          750,
          748,
          749,
          751,
          750,
          1414
        ],
        "throughput_trend_7d": [
          31.9,
          32.1,
          32.0,
          32.2,
          32.0,
          32.0,
          12.02
        ]
      }
    },
    {
      "provider": "Groq",
      "model": "Llama3-8B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '400 Bad Request' for url 'https://api.groq.com/openai/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00027,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-04-18",
        "context_length": 8192,
        "hardware": "LPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://console.groq.com/docs/openai",
        "human_readable_url": "https://console.groq.com/docs",
        "api_docs": "https://console.groq.com/docs/api-reference"
      },
      "infra": {
        "schema_url": "https://console.groq.com/docs/openai",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://console.groq.com/docs",
        "api_docs": "https://console.groq.com/docs/api-reference"
      },
      "test_metadata": {
        "request_hash": "41776aa672ce",
        "trace_id": "trace-15eaed66"
      },
      "history": {
        "latency_trend_7d": [
          952,
          950,
          948,
          949,
          951,
          950,
          950
        ],
        "throughput_trend_7d": [
          121.9,
          122.1,
          122.0,
          122.2,
          122.0,
          122.0,
          122.01
        ]
      }
    },
    {
      "provider": "Claude",
      "model": "Claude Sonnet 4",
      "metrics": {
        "latency_ms": 1475,
        "throughput_tokens_per_sec": 20.35
      },
      "tokens_generated": 30,
      "elapsed_seconds": 1.475,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.003,
        "estimated_cost_usd": 9e-05
      },
      "model_metadata": {
        "release_date": "2024-10-22",
        "context_length": 200000,
        "hardware": "GPU",
        "api_type": "Anthropic-native"
      },
      "infra": {
        "schema_url": "https://docs.anthropic.com/en/api/messages",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.anthropic.com",
        "api_docs": "https://docs.anthropic.com/en/api"
      },
      "test_metadata": {
        "request_hash": "1034e0de174b",
        "trace_id": "trace-47410a03"
      },
      "history": {
        "latency_trend_7d": [
          1152,
          1150,
          1148,
          1149,
          1151,
          1150,
          1475
        ],
        "throughput_trend_7d": [
          21.9,
          22.1,
          22.0,
          22.2,
          22.0,
          22.0,
          20.35
        ]
      }
    },
    {
      "provider": "OpenRouter",
      "model": "Mistral",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '404 Not Found' for url 'https://openrouter.ai/api/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00018,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-02-26",
        "context_length": 32768,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://openrouter.ai/docs",
        "human_readable_url": "https://openrouter.ai/docs",
        "api_docs": "https://openrouter.ai/docs/api"
      },
      "infra": {
        "schema_url": "https://openrouter.ai/docs",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://openrouter.ai/docs",
        "api_docs": "https://openrouter.ai/docs/api"
      },
      "test_metadata": {
        "request_hash": "44c11bcf4016",
        "trace_id": "trace-25a338d4"
      },
      "history": {
        "latency_trend_7d": [
          652,
          650,
          648,
          649,
          651,
          650,
          650
        ],
        "throughput_trend_7d": [
          41.9,
          42.1,
          42.0,
          42.2,
          42.0,
          42.0,
          42.01
        ]
      }
    },
    {
      "provider": "Google Gemini",
      "model": "Gemini-2.0-Flash",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "404 NOT_FOUND. {'error': {'code': 404, 'message': 'models/gemini-2.0-flash-exp is not found for API version v1beta, or is not supported for generateContent. Call ModelService.ListModels to see the list of available models and their supported methods.', 'status': 'NOT_FOUND'}}",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00075,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-12-11",
        "context_length": 2000000,
        "hardware": "TPU",
        "api_type": "Google-native",
        "schema_url": "https://ai.google.dev/api",
        "human_readable_url": "https://ai.google.dev/docs",
        "api_docs": "https://ai.google.dev/api/generate-content"
      },
      "infra": {
        "schema_url": "https://ai.google.dev/api",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://ai.google.dev/docs",
        "api_docs": "https://ai.google.dev/api/generate-content"
      },
      "test_metadata": {
        "request_hash": "e1d7e905716b",
        "trace_id": "trace-568f501f"
      },
      "history": {
        "latency_trend_7d": [
          352,
          350,
          348,
          349,
          351,
          350,
          350
        ],
        "throughput_trend_7d": [
          91.9,
          92.1,
          92.0,
          92.2,
          92.0,
          92.0,
          92.01
        ]
      }
    },
    {
      "provider": "Together AI",
      "model": "Llama3.1-8B-Turbo",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '400 Bad Request' for url 'https://api.together.xyz/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0002,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-01-15",
        "context_length": 32768,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://docs.together.ai/docs/inference-models",
        "human_readable_url": "https://docs.together.ai",
        "api_docs": "https://docs.together.ai/reference/chat-completions"
      },
      "infra": {
        "schema_url": "https://docs.together.ai/docs/inference-models",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.together.ai",
        "api_docs": "https://docs.together.ai/reference/chat-completions"
      },
      "test_metadata": {
        "request_hash": "a3cc20a5cc3c",
        "trace_id": "trace-30055fd3"
      },
      "history": {
        "latency_trend_7d": [
          120,
          118,
          116,
          117,
          119,
          118,
          118
        ],
        "throughput_trend_7d": [
          145.9,
          146.1,
          146.0,
          146.2,
          146.0,
          146.0,
          146.01
        ]
      }
    },
    {
      "provider": "Fireworks AI",
      "model": "Llama3.1-8B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '404 Not Found' for url 'https://api.fireworks.ai/inference/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0002,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-04-15",
        "context_length": 8192,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
        "human_readable_url": "https://fireworks.ai/",
        "api_docs": "https://readme.fireworks.ai/docs"
      },
      "infra": {
        "schema_url": "https://readme.fireworks.ai/reference/createchatcompletion",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://fireworks.ai/",
        "api_docs": "https://readme.fireworks.ai/docs"
      },
      "test_metadata": {
        "request_hash": "1a0f7acd5832",
        "trace_id": "trace-1d358481"
      },
      "history": {
        "latency_trend_7d": [
          180,
          178,
          176,
          177,
          179,
          178,
          178
        ],
        "throughput_trend_7d": [
          135.9,
          136.1,
          136.0,
          136.2,
          136.0,
          136.0,
          136.01
        ]
      }
    },
    {
      "provider": "HF GPT OSS 120B (Cerebras)",
      "model": "GPT OSS 120B",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '401 Unauthorized' for url 'https://router.huggingface.co/v1/chat/completions'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/401",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.0008,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2025-01-07",
        "context_length": 128000,
        "hardware": "Cerebras WSE",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://huggingface.co/docs/inference-providers",
        "human_readable_url": "https://huggingface.co/openai/gpt-oss-120b",
        "api_docs": "https://huggingface.co/docs/inference-providers/en/guides/gpt-oss"
      }
    },
    {
      "provider": "DeepSeek",
      "model": "deepseek-chat",
      "metrics": {
        "latency_ms": 1031,
        "throughput_tokens_per_sec": 15.52
      },
      "tokens_generated": 16,
      "elapsed_seconds": 1.031,
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 100.0,
        "error_rate_percent": 0.0,
        "status": "healthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00027,
        "estimated_cost_usd": 4e-06
      },
      "model_metadata": {
        "release_date": "2024-12-01",
        "context_length": 64000,
        "hardware": "GPU",
        "api_type": "OpenAI-compatible",
        "schema_url": "https://api-docs.deepseek.com/",
        "human_readable_url": "https://platform.deepseek.com/",
        "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
      },
      "infra": {
        "schema_url": "https://api-docs.deepseek.com/",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://platform.deepseek.com/",
        "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
      },
      "test_metadata": {
        "request_hash": "4a3e74e7467b",
        "trace_id": "trace-345d340e"
      },
      "history": {
        "latency_trend_7d": [
          280,
          278,
          276,
          277,
          279,
          278,
          278
        ],
        "throughput_trend_7d": [
          85.9,
          86.1,
          86.0,
          86.2,
          86.0,
          86.0,
          86.01
        ]
      }
    },
    {
      "provider": "Cohere",
      "model": "command-r",
      "metrics": {
        "latency_ms": null,
        "throughput_tokens_per_sec": null
      },
      "error": "Client error '404 Not Found' for url 'https://api.cohere.ai/v1/chat'\nFor more information check: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/404",
      "health": {
        "cold_start_latency_ms": null,
        "warm_start_latency_ms": 750,
        "availability_percent": 0.0,
        "error_rate_percent": 100.0,
        "status": "unhealthy",
        "is_cold_start": false
      },
      "cost_estimate": {
        "cost_per_1k_tokens_usd": 0.00075,
        "estimated_cost_usd": 0.0
      },
      "model_metadata": {
        "release_date": "2024-10-01",
        "context_length": 128000,
        "hardware": "GPU",
        "api_type": "Cohere-native",
        "schema_url": "https://docs.cohere.com/reference/chat",
        "human_readable_url": "https://docs.cohere.com/",
        "api_docs": "https://docs.cohere.com/reference/chat"
      },
      "infra": {
        "schema_url": "https://docs.cohere.com/reference/chat",
        "plugin_manifest": "/.well-known/ai-plugin.json",
        "human_readable_url": "https://docs.cohere.com/",
        "api_docs": "https://docs.cohere.com/reference/chat"
      },
      "test_metadata": {
        "request_hash": "2e6a41d8c6a4",
        "trace_id": "trace-367672ab"
      },
      "history": {
        "latency_trend_7d": [
          320,
          318,
          316,
          317,
          319,
          318,
          318
        ],
        "throughput_trend_7d": [
          45.9,
          46.1,
          46.0,
          46.2,
          46.0,
          46.0,
          46.01
        ]
      }
    }
  ],
  "rankings": {
    "by_latency": [
      {
        "provider": "DeepSeek",
        "model": "deepseek-chat",
        "metrics": {
          "latency_ms": 1031,
          "throughput_tokens_per_sec": 15.52
        },
        "tokens_generated": 16,
        "elapsed_seconds": 1.031,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00027,
          "estimated_cost_usd": 4e-06
        },
        "model_metadata": {
          "release_date": "2024-12-01",
          "context_length": 64000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible",
          "schema_url": "https://api-docs.deepseek.com/",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "infra": {
          "schema_url": "https://api-docs.deepseek.com/",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "test_metadata": {
          "request_hash": "4a3e74e7467b",
          "trace_id": "trace-345d340e"
        },
        "history": {
          "latency_trend_7d": [
            280,
            278,
            276,
            277,
            279,
            278,
            278
          ],
          "throughput_trend_7d": [
            85.9,
            86.1,
            86.0,
            86.2,
            86.0,
            86.0,
            86.01
          ]
        }
      },
      {
        "provider": "OpenAI",
        "model": "GPT-4o",
        "metrics": {
          "latency_ms": 1414,
          "throughput_tokens_per_sec": 12.02
        },
        "tokens_generated": 17,
        "elapsed_seconds": 1.414,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.0025,
          "estimated_cost_usd": 4.3e-05
        },
        "model_metadata": {
          "release_date": "2024-05-13",
          "context_length": 128000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://platform.openai.com/docs/api-reference",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.openai.com/docs",
          "api_docs": "https://platform.openai.com/docs/api-reference/chat"
        },
        "test_metadata": {
          "request_hash": "28de04c6483e",
          "trace_id": "trace-dd3f15d7"
        },
        "history": {
          "latency_trend_7d": [
            752,
            750,
            748,
            749,
            751,
            750,
            1414
          ],
          "throughput_trend_7d": [
            31.9,
            32.1,
            32.0,
            32.2,
            32.0,
            32.0,
            12.02
          ]
        }
      },
      {
        "provider": "Claude",
        "model": "Claude Sonnet 4",
        "metrics": {
          "latency_ms": 1475,
          "throughput_tokens_per_sec": 20.35
        },
        "tokens_generated": 30,
        "elapsed_seconds": 1.475,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.003,
          "estimated_cost_usd": 9e-05
        },
        "model_metadata": {
          "release_date": "2024-10-22",
          "context_length": 200000,
          "hardware": "GPU",
          "api_type": "Anthropic-native"
        },
        "infra": {
          "schema_url": "https://docs.anthropic.com/en/api/messages",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://docs.anthropic.com",
          "api_docs": "https://docs.anthropic.com/en/api"
        },
        "test_metadata": {
          "request_hash": "1034e0de174b",
          "trace_id": "trace-47410a03"
        },
        "history": {
          "latency_trend_7d": [
            1152,
            1150,
            1148,
            1149,
            1151,
            1150,
            1475
          ],
          "throughput_trend_7d": [
            21.9,
            22.1,
            22.0,
            22.2,
            22.0,
            22.0,
            20.35
          ]
        }
      }
    ],
    "by_throughput": [
      {
        "provider": "Claude",
        "model": "Claude Sonnet 4",
        "metrics": {
          "latency_ms": 1475,
          "throughput_tokens_per_sec": 20.35
        },
        "tokens_generated": 30,
        "elapsed_seconds": 1.475,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.003,
          "estimated_cost_usd": 9e-05
        },
        "model_metadata": {
          "release_date": "2024-10-22",
          "context_length": 200000,
          "hardware": "GPU",
          "api_type": "Anthropic-native"
        },
        "infra": {
          "schema_url": "https://docs.anthropic.com/en/api/messages",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://docs.anthropic.com",
          "api_docs": "https://docs.anthropic.com/en/api"
        },
        "test_metadata": {
          "request_hash": "1034e0de174b",
          "trace_id": "trace-47410a03"
        },
        "history": {
          "latency_trend_7d": [
            1152,
            1150,
            1148,
            1149,
            1151,
            1150,
            1475
          ],
          "throughput_trend_7d": [
            21.9,
            22.1,
            22.0,
            22.2,
            22.0,
            22.0,
            20.35
          ]
        }
      },
      {
        "provider": "DeepSeek",
        "model": "deepseek-chat",
        "metrics": {
          "latency_ms": 1031,
          "throughput_tokens_per_sec": 15.52
        },
        "tokens_generated": 16,
        "elapsed_seconds": 1.031,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.00027,
          "estimated_cost_usd": 4e-06
        },
        "model_metadata": {
          "release_date": "2024-12-01",
          "context_length": 64000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible",
          "schema_url": "https://api-docs.deepseek.com/",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "infra": {
          "schema_url": "https://api-docs.deepseek.com/",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.deepseek.com/",
          "api_docs": "https://api-docs.deepseek.com/api/create-chat-completion"
        },
        "test_metadata": {
          "request_hash": "4a3e74e7467b",
          "trace_id": "trace-345d340e"
        },
        "history": {
          "latency_trend_7d": [
            280,
            278,
            276,
            277,
            279,
            278,
            278
          ],
          "throughput_trend_7d": [
            85.9,
            86.1,
            86.0,
            86.2,
            86.0,
            86.0,
            86.01
          ]
        }
      },
      {
        "provider": "OpenAI",
        "model": "GPT-4o",
        "metrics": {
          "latency_ms": 1414,
          "throughput_tokens_per_sec": 12.02
        },
        "tokens_generated": 17,
        "elapsed_seconds": 1.414,
        "health": {
          "cold_start_latency_ms": null,
          "warm_start_latency_ms": 750,
          "availability_percent": 100.0,
          "error_rate_percent": 0.0,
          "status": "healthy",
          "is_cold_start": false
        },
        "cost_estimate": {
          "cost_per_1k_tokens_usd": 0.0025,
          "estimated_cost_usd": 4.3e-05
        },
        "model_metadata": {
          "release_date": "2024-05-13",
          "context_length": 128000,
          "hardware": "GPU",
          "api_type": "OpenAI-compatible"
        },
        "infra": {
          "schema_url": "https://platform.openai.com/docs/api-reference",
          "plugin_manifest": "/.well-known/ai-plugin.json",
          "human_readable_url": "https://platform.openai.com/docs",
          "api_docs": "https://platform.openai.com/docs/api-reference/chat"
        },
        "test_metadata": {
          "request_hash": "28de04c6483e",
          "trace_id": "trace-dd3f15d7"
        },
        "history": {
          "latency_trend_7d": [
            752,
            750,
            748,
            749,
            751,
            750,
            1414
          ],
          "throughput_trend_7d": [
            31.9,
            32.1,
            32.0,
            32.2,
            32.0,
            32.0,
            12.02
          ]
        }
      }
    ]
  },
  "fastest_latency": "DeepSeek",
  "highest_throughput": "Claude",
  "total_tested": 10,
  "successful_tests": 3,
  "failed_tests": 7,
  "performance_summary": {
    "best_latency_ms": 1031,
    "best_throughput_tokens_per_sec": 20.35,
    "avg_latency_ms": 1307,
    "avg_throughput_tokens_per_sec": 15.96
  },
  "ai_guidance": {
    "best_for_speed": "DeepSeek",
    "best_for_throughput": "Claude",
    "recommendation": "Use DeepSeek for lowest latency, Claude for highest throughput",
    "use_case_guidance": {
      "real_time_chat": "Recommended: DeepSeek (lowest latency)",
      "bulk_generation": "Recommended: Claude (highest throughput)",
      "balanced_workload": "Consider both DeepSeek and Claude"
    }
  },
  "human_readable_summary": "\u26a1 Fastest: DeepSeek (1031ms - Moderate) | \ud83d\ude80 Highest throughput: Claude (20.4 tokens/sec - Moderate) | \u2705 3/10 providers responding",
  "timestamp": "2026-05-31T02:44:24.719759Z"
}