🚀 AI Latency Test Results

← Back to Home

🏆 Fastest: Cohere (726ms - Moderate) | 📊 Range: 726-5583ms | 📈 Average: 2629ms | ✅ 12/16 providers responding
Full JSON Response (for developers & agents)
{
  "service": "InferenceLatency.com",
  "providers": [
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Fireworks AI",
      "model": "accounts/fireworks/models/deepseek-v3p2",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 1
    },
    {
      "name": "Cerebras",
      "model": "llama3.1-8b",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 1
    },
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 3878,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 726,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "SambaNova",
      "model": "Meta-Llama-3.1-8B-Instruct",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 1
    },
    {
      "name": "Perplexity",
      "model": "sonar",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 1
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 907,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 4362,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 897,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 963,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 1414,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1854,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 5271,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 5583,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 2404,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    }
  ],
  "ranked": [
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 726,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 897,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 907,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 963,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 1414,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1854,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 2404,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 3878,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 4362,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 5271,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 5583,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    }
  ],
  "fastest": "Cohere",
  "total_tested": 16,
  "successful_tests": 12,
  "failed_tests": 4,
  "performance_summary": {
    "fastest_ms": 726,
    "slowest_ms": 5583,
    "average_ms": 2629
  },
  "ai_agent_guidance": {
    "recommended_provider": "Cohere",
    "use_case": "Choose 'fastest' provider for lowest latency API calls",
    "fallback_order": [
      "Cohere",
      "Nvidia NIM",
      "Together AI",
      "Mistral AI",
      "Hyperbolic",
      "DeepSeek",
      "xAI (Grok)",
      "Google Gemini",
      "Groq",
      "OpenRouter",
      "OpenAI",
      "Anthropic"
    ],
    "reliability_score": "12/16 providers responding"
  },
  "human_readable_summary": "\ud83c\udfc6 Fastest: Cohere (726ms - Moderate) | \ud83d\udcca Range: 726-5583ms | \ud83d\udcc8 Average: 2629ms | \u2705 12/16 providers responding",
  "timestamp": "2026-06-06T14:00:18.816494Z"
}