🚀 AI Latency Test Results

← Back to Home

🏆 Fastest: Groq (586ms - Moderate) | 📊 Range: 586-3824ms | 📈 Average: 1542ms | ✅ 12/16 providers responding
Full JSON Response (for developers & agents)
{
  "service": "InferenceLatency.com",
  "providers": [
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Fireworks AI",
      "model": "accounts/fireworks/models/deepseek-v3p2",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 2
    },
    {
      "name": "Cerebras",
      "model": "llama3.1-8b",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 2
    },
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 586,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 662,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "SambaNova",
      "model": "Meta-Llama-3.1-8B-Instruct",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 2
    },
    {
      "name": "Perplexity",
      "model": "sonar",
      "latency_ms": null,
      "error": "All recent attempts failed",
      "success_rate": 0.0,
      "measurements_count": 2
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 780,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 819,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 871,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 790,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 2109,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1473,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 1806,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 3824,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 1486,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    }
  ],
  "ranked": [
    {
      "name": "Groq",
      "model": "llama-3.3-70b-versatile",
      "latency_ms": 586,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Cohere",
      "model": "command-a-03-2025",
      "latency_ms": 662,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Together AI",
      "model": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
      "latency_ms": 780,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Mistral AI",
      "model": "mistral-small-latest",
      "latency_ms": 790,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "OpenRouter",
      "model": "mistralai/mistral-small-3.2-24b-instruct",
      "latency_ms": 819,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Nvidia NIM",
      "model": "meta/llama-3.1-8b-instruct",
      "latency_ms": 871,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "DeepSeek",
      "model": "deepseek-chat",
      "latency_ms": 1473,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "xAI (Grok)",
      "model": "grok-3-mini-fast",
      "latency_ms": 1486,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "OpenAI",
      "model": "gpt-4o",
      "latency_ms": 1806,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Hyperbolic",
      "model": "meta-llama/Llama-3.3-70B-Instruct",
      "latency_ms": 2109,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    },
    {
      "name": "Google Gemini",
      "model": "gemini-2.5-flash",
      "latency_ms": 3294,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 1
    },
    {
      "name": "Anthropic",
      "model": "claude-sonnet-4-5-20250929",
      "latency_ms": 3824,
      "tokens_generated": 1,
      "success_rate": 100.0,
      "measurements_count": 2
    }
  ],
  "fastest": "Groq",
  "total_tested": 16,
  "successful_tests": 12,
  "failed_tests": 4,
  "performance_summary": {
    "fastest_ms": 586,
    "slowest_ms": 3824,
    "average_ms": 1542
  },
  "ai_agent_guidance": {
    "recommended_provider": "Groq",
    "use_case": "Choose 'fastest' provider for lowest latency API calls",
    "fallback_order": [
      "Groq",
      "Cohere",
      "Together AI",
      "Mistral AI",
      "OpenRouter",
      "Nvidia NIM",
      "DeepSeek",
      "xAI (Grok)",
      "OpenAI",
      "Hyperbolic",
      "Google Gemini",
      "Anthropic"
    ],
    "reliability_score": "12/16 providers responding"
  },
  "human_readable_summary": "\ud83c\udfc6 Fastest: Groq (586ms - Moderate) | \ud83d\udcca Range: 586-3824ms | \ud83d\udcc8 Average: 1542ms | \u2705 12/16 providers responding",
  "timestamp": "2026-06-06T14:13:50.294817Z"
}