🚀 AI Latency Test Results

← Back to Home

🏆 Fastest: Cohere (645ms - Moderate) | 📊 Range: 645-3294ms | 📈 Average: 1738ms | ✅ 12/16 providers responding
Full JSON Response (for developers & agents)
{
  "service": "InferenceLatency.com",
  "providers": [
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 50.0,
      "measurements_count": 2
    },
    {
      "name": "Fireworks AI",
      "model": "accounts/fireworks/models/deepseek-v3p2",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 4
    },
    {
      "name": "Cerebras",
      "model": "llama3.1-8b",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 4
    },
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 871,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 645,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "SambaNova",
      "model": "Meta-Llama-3.1-8B-Instruct",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 4
    },
    {
      "name": "Perplexity",
      "model": "sonar",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 4
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 976,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 1910,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 1002,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 1020,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 2092,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1453,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 1850,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 3057,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 2689,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    }
  ],
  "ranked": [
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 645,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 871,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 976,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 1002,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 1020,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1453,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 1850,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 1910,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 2092,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 2689,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 3057,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 4
    },
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 50.0,
      "measurements_count": 2
    }
  ],
  "fastest": "Cohere",
  "total_tested": 16,
  "successful_tests": 12,
  "failed_tests": 4,
  "performance_summary": {
    "fastest_ms": 645,
    "slowest_ms": 3294,
    "average_ms": 1738
  },
  "ai_agent_guidance": {
    "recommended_provider": "Cohere",
    "use_case": "Choose 'fastest' provider for lowest latency API calls",
    "fallback_order": [
      "Cohere",
      "Groq",
      "Together AI",
      "Nvidia NIM",
      "Mistral AI",
      "DeepSeek",
      "OpenAI",
      "OpenRouter",
      "Hyperbolic",
      "xAI (Grok)",
      "Anthropic",
      "Google Gemini"
    ],
    "reliability_score": "12/16 providers responding"
  },
  "human_readable_summary": "\ud83c\udfc6 Fastest: Cohere (645ms - Moderate) | \ud83d\udcca Range: 645-3294ms | \ud83d\udcc8 Average: 1738ms | \u2705 12/16 providers responding",
  "timestamp": "2026-06-06T15:03:29.089221Z"
}