{
  "metadata": {
    "generated_at": "2026-05-19T09:25:00+02:00",
    "project": "Can My GPU Run This LLM",
    "internal_project_name": "Can I Run It? - Local AI Edition",
    "model_count": 15,
    "hardware_preset_count": 8,
    "default_system_vram_offset_gb": 1.5,
    "default_runtime_overhead_multiplier": 1.2,
    "data_policy": "Curated V1 dataset. Memory estimates are practical planning estimates, not benchmarks.",
    "source_note": "Ollama library pages and publisher model families were used as primary V1 references where available. Real memory use varies by backend, context length, quantization file, KV cache, drivers, and offloading settings.",
    "last_curated_by": "Codex"
  },
  "quantization_options": [
    {
      "id": "q4",
      "label": "Q4 / 4-bit",
      "bits": 4,
      "use_case": "Default local inference balance"
    },
    {
      "id": "q5",
      "label": "Q5 / 5-bit",
      "bits": 5,
      "use_case": "Better quality, more VRAM"
    },
    {
      "id": "q8",
      "label": "Q8 / 8-bit",
      "bits": 8,
      "use_case": "High quality, much more VRAM"
    },
    {
      "id": "fp16",
      "label": "FP16 / 16-bit",
      "bits": 16,
      "use_case": "Mostly workstation/server use"
    }
  ],
  "hardware_presets": [
    {
      "id": "vram-6gb",
      "label": "6 GB VRAM entry GPU",
      "example_gpus": ["GTX 1660", "RTX 2060 6GB"],
      "gpu_vram_gb": 6,
      "system_ram_gb": 16,
      "positioning": "Small local chat models only"
    },
    {
      "id": "vram-8gb",
      "label": "8 GB VRAM mainstream GPU",
      "example_gpus": ["RTX 3060 Ti", "RTX 4060", "RTX 3070"],
      "gpu_vram_gb": 8,
      "system_ram_gb": 32,
      "positioning": "Good for 7B/8B Q4 models"
    },
    {
      "id": "vram-10gb",
      "label": "10 GB VRAM older high-end GPU",
      "example_gpus": ["RTX 3080 10GB"],
      "gpu_vram_gb": 10,
      "system_ram_gb": 32,
      "positioning": "Strong 7B/8B, tight for 14B"
    },
    {
      "id": "vram-12gb",
      "label": "12 GB VRAM local agent GPU",
      "example_gpus": ["RTX 3060 12GB", "RTX 4070"],
      "gpu_vram_gb": 12,
      "system_ram_gb": 32,
      "positioning": "Wolfgang-style routing/agent/test GPU"
    },
    {
      "id": "vram-16gb",
      "label": "16 GB VRAM creator GPU",
      "example_gpus": ["RTX 4060 Ti 16GB", "RTX 4080"],
      "gpu_vram_gb": 16,
      "system_ram_gb": 64,
      "positioning": "Comfortable 14B Q4, some 20B-class models"
    },
    {
      "id": "vram-24gb",
      "label": "24 GB VRAM homelab workstation",
      "example_gpus": ["RTX 3090", "RTX 4090"],
      "gpu_vram_gb": 24,
      "system_ram_gb": 64,
      "positioning": "Wolfgang-style heavy local model GPU"
    },
    {
      "id": "vram-48gb",
      "label": "48 GB VRAM workstation",
      "example_gpus": ["RTX A6000", "L40S 48GB"],
      "gpu_vram_gb": 48,
      "system_ram_gb": 128,
      "positioning": "Large local models and long context"
    },
    {
      "id": "apple-unified-32gb",
      "label": "Apple Silicon 32 GB unified memory",
      "example_gpus": ["M2 Max 32GB", "M3 Max 36GB"],
      "gpu_vram_gb": null,
      "system_ram_gb": 32,
      "positioning": "Unified memory; not directly comparable to discrete VRAM",
      "special_handling": "apple_unified_memory"
    }
  ],
  "models": [
    {
      "id": "llama-3-1-8b",
      "model_name": "Llama 3.1 8B Instruct",
      "family": "Llama",
      "provider": "Meta",
      "license_type": "open-weight",
      "parameters_billions": 8,
      "architecture": "dense",
      "ollama_tag": "llama3.1:8b",
      "lm_studio_search": "Llama 3.1 8B Instruct GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 6,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 128000,
      "capabilities": {
        "chat": true,
        "coding": false,
        "agents": true,
        "vision": false,
        "reasoning": false,
        "best_for": "Fast local chat, lightweight agents, low-cost local testing",
        "weakness": "Complex coding and deep reasoning",
        "agent_readiness_score": 78
      },
      "hardware": {
        "minimum_vram_gb_q4": 8,
        "comfortable_vram_gb_q4": 12,
        "recommended_gpu": "RTX 4060 8GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/llama3.1"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "llama-3-1-70b",
      "model_name": "Llama 3.1 70B Instruct",
      "family": "Llama",
      "provider": "Meta",
      "license_type": "open-weight",
      "parameters_billions": 70,
      "architecture": "dense",
      "ollama_tag": "llama3.1:70b",
      "lm_studio_search": "Llama 3.1 70B Instruct GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 44,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 128000,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "High-quality local chat and reasoning on workstation-class hardware",
        "weakness": "Too large for single 24GB consumer GPUs without heavy offload",
        "agent_readiness_score": 88
      },
      "hardware": {
        "minimum_vram_gb_q4": 48,
        "comfortable_vram_gb_q4": 64,
        "recommended_gpu": "48GB+ VRAM workstation or multi-GPU setup"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/llama3.1"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "qwen2-5-coder-7b",
      "model_name": "Qwen2.5 Coder 7B",
      "family": "Qwen",
      "provider": "Alibaba",
      "license_type": "open-weight",
      "parameters_billions": 7,
      "architecture": "dense",
      "ollama_tag": "qwen2.5-coder:7b",
      "lm_studio_search": "Qwen2.5 Coder 7B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 5.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 32768,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": false,
        "best_for": "Small local coding assistant and agent tool generation",
        "weakness": "Larger refactors and complex multi-file reasoning",
        "agent_readiness_score": 82
      },
      "hardware": {
        "minimum_vram_gb_q4": 8,
        "comfortable_vram_gb_q4": 12,
        "recommended_gpu": "RTX 4060 8GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/qwen2.5-coder"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "qwen2-5-coder-14b",
      "model_name": "Qwen2.5 Coder 14B",
      "family": "Qwen",
      "provider": "Alibaba",
      "license_type": "open-weight",
      "parameters_billions": 14,
      "architecture": "dense",
      "ollama_tag": "qwen2.5-coder:14b",
      "lm_studio_search": "Qwen2.5 Coder 14B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 10.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 32768,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "Local coding, scripts, repo assistance, technical agents",
        "weakness": "Can be tight on 12GB GPUs at longer context",
        "agent_readiness_score": 88
      },
      "hardware": {
        "minimum_vram_gb_q4": 12,
        "comfortable_vram_gb_q4": 16,
        "recommended_gpu": "RTX 3060 12GB minimum, 16GB+ preferred"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/qwen2.5-coder"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "qwen2-5-coder-32b",
      "model_name": "Qwen2.5 Coder 32B",
      "family": "Qwen",
      "provider": "Alibaba",
      "license_type": "open-weight",
      "parameters_billions": 32,
      "architecture": "dense",
      "ollama_tag": "qwen2.5-coder:32b",
      "lm_studio_search": "Qwen2.5 Coder 32B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 21,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 32768,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "Strong local coding and architecture work on 24GB GPUs",
        "weakness": "Little VRAM headroom on single 24GB GPUs with long context",
        "agent_readiness_score": 92
      },
      "hardware": {
        "minimum_vram_gb_q4": 24,
        "comfortable_vram_gb_q4": 32,
        "recommended_gpu": "RTX 3090/4090 24GB minimum, 32GB+ preferred"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/qwen2.5-coder"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "qwen3-8b",
      "model_name": "Qwen3 8B",
      "family": "Qwen",
      "provider": "Alibaba",
      "license_type": "open-weight",
      "parameters_billions": 8,
      "architecture": "dense",
      "ollama_tag": "qwen3:8b",
      "lm_studio_search": "Qwen3 8B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 6,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 40000,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "Fast general local assistant with reasoning/coding balance",
        "weakness": "Less capable than 14B/32B models for large tasks",
        "agent_readiness_score": 86
      },
      "hardware": {
        "minimum_vram_gb_q4": 8,
        "comfortable_vram_gb_q4": 12,
        "recommended_gpu": "RTX 4060 8GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/qwen3"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "medium"
    },
    {
      "id": "deepseek-r1-distill-qwen-8b",
      "model_name": "DeepSeek R1 Distill Qwen 8B",
      "family": "DeepSeek R1 Distill",
      "provider": "DeepSeek",
      "license_type": "open-weight",
      "parameters_billions": 8,
      "architecture": "dense",
      "ollama_tag": "deepseek-r1:8b",
      "lm_studio_search": "DeepSeek R1 Distill Qwen 8B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 6,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens":  128000,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": false,
        "vision": false,
        "reasoning": true,
        "best_for": "Local reasoning experiments and step-by-step technical analysis",
        "weakness": "Verbose reasoning can slow simple agent workflows",
        "agent_readiness_score": 72
      },
      "hardware": {
        "minimum_vram_gb_q4": 8,
        "comfortable_vram_gb_q4": 12,
        "recommended_gpu": "RTX 4060 8GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/deepseek-r1"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "deepseek-r1-distill-qwen-14b",
      "model_name": "DeepSeek R1 Distill Qwen 14B",
      "family": "DeepSeek R1 Distill",
      "provider": "DeepSeek",
      "license_type": "open-weight",
      "parameters_billions": 14,
      "architecture": "dense",
      "ollama_tag": "deepseek-r1:14b",
      "lm_studio_search": "DeepSeek R1 Distill Qwen 14B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 10.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens":  128000,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": false,
        "vision": false,
        "reasoning": true,
        "best_for": "Local reasoning and debugging on 12GB/16GB GPUs",
        "weakness": "Less ergonomic for fast Telegram-style assistant responses",
        "agent_readiness_score": 76
      },
      "hardware": {
        "minimum_vram_gb_q4": 12,
        "comfortable_vram_gb_q4": 16,
        "recommended_gpu": "RTX 3060 12GB minimum, 16GB+ preferred"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/deepseek-r1"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "deepseek-r1-distill-qwen-32b",
      "model_name": "DeepSeek R1 Distill Qwen 32B",
      "family": "DeepSeek R1 Distill",
      "provider": "DeepSeek",
      "license_type": "open-weight",
      "parameters_billions": 32,
      "architecture": "dense",
      "ollama_tag": "deepseek-r1:32b",
      "lm_studio_search": "DeepSeek R1 Distill Qwen 32B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 21,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens":  128000,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": false,
        "vision": false,
        "reasoning": true,
        "best_for": "Heavy local reasoning on 24GB GPUs",
        "weakness": "Tight VRAM headroom and slower agent loops",
        "agent_readiness_score": 78
      },
      "hardware": {
        "minimum_vram_gb_q4": 24,
        "comfortable_vram_gb_q4": 32,
        "recommended_gpu": "RTX 3090/4090 24GB minimum, 32GB+ preferred"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/deepseek-r1"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "medium"
    },
    {
      "id": "gemma-3-4b",
      "model_name": "Gemma 3 4B",
      "family": "Gemma",
      "provider": "Google",
      "license_type": "open-weight",
      "parameters_billions": 4,
      "architecture": "dense",
      "ollama_tag": "gemma3:4b",
      "lm_studio_search": "Gemma 3 4B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 3.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 128000,
      "capabilities": {
        "chat": true,
        "coding": false,
        "agents": true,
        "vision": true,
        "reasoning": false,
        "best_for": "Small multimodal local assistant and low-resource setups",
        "weakness": "Limited quality for coding and complex tasks",
        "agent_readiness_score": 70
      },
      "hardware": {
        "minimum_vram_gb_q4": 6,
        "comfortable_vram_gb_q4": 8,
        "recommended_gpu": "6GB+ VRAM GPU"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/gemma3"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "gemma-3-12b",
      "model_name": "Gemma 3 12B",
      "family": "Gemma",
      "provider": "Google",
      "license_type": "open-weight",
      "parameters_billions": 12,
      "architecture": "dense",
      "ollama_tag": "gemma3:12b",
      "lm_studio_search": "Gemma 3 12B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 9,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 128000,
      "capabilities": {
        "chat": true,
        "coding": false,
        "agents": true,
        "vision": true,
        "reasoning": true,
        "best_for": "Balanced multimodal local chat on 12GB+ GPUs",
        "weakness": "Not primarily a coding model",
        "agent_readiness_score": 78
      },
      "hardware": {
        "minimum_vram_gb_q4": 12,
        "comfortable_vram_gb_q4": 16,
        "recommended_gpu": "RTX 3060 12GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/gemma3"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "gemma-3-27b",
      "model_name": "Gemma 3 27B",
      "family": "Gemma",
      "provider": "Google",
      "license_type": "open-weight",
      "parameters_billions": 27,
      "architecture": "dense",
      "ollama_tag": "gemma3:27b",
      "lm_studio_search": "Gemma 3 27B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 18,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 128000,
      "capabilities": {
        "chat": true,
        "coding": false,
        "agents": true,
        "vision": true,
        "reasoning": true,
        "best_for": "High-quality multimodal local assistant on 24GB GPUs",
        "weakness": "Less specialized for code than Qwen Coder",
        "agent_readiness_score": 84
      },
      "hardware": {
        "minimum_vram_gb_q4": 24,
        "comfortable_vram_gb_q4": 32,
        "recommended_gpu": "RTX 3090/4090 24GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/gemma3"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "mistral-7b",
      "model_name": "Mistral 7B",
      "family": "Mistral",
      "provider": "Mistral AI",
      "license_type": "open-weight",
      "parameters_billions": 7,
      "architecture": "dense",
      "ollama_tag": "mistral:7b",
      "lm_studio_search": "Mistral 7B Instruct GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 5.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 32768,
      "capabilities": {
        "chat": true,
        "coding": false,
        "agents": true,
        "vision": false,
        "reasoning": false,
        "best_for": "Fast local chat and simple agent tasks",
        "weakness": "Older/smaller than newer Qwen/Gemma alternatives",
        "agent_readiness_score": 74
      },
      "hardware": {
        "minimum_vram_gb_q4": 8,
        "comfortable_vram_gb_q4": 12,
        "recommended_gpu": "RTX 4060 8GB or better"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/mistral"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "mixtral-8x7b",
      "model_name": "Mixtral 8x7B",
      "family": "Mixtral",
      "provider": "Mistral AI",
      "license_type": "open-weight",
      "parameters_billions": 46.7,
      "architecture": "moe",
      "ollama_tag": "mixtral:8x7b",
      "lm_studio_search": "Mixtral 8x7B Instruct GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 28,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 32768,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "MoE local reasoning/chat when enough VRAM is available",
        "weakness": "Not practical for 24GB single-GPU setups without offload",
        "agent_readiness_score": 82
      },
      "hardware": {
        "minimum_vram_gb_q4": 32,
        "comfortable_vram_gb_q4": 48,
        "recommended_gpu": "32GB+ VRAM or CPU/RAM offload"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/mixtral"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    },
    {
      "id": "phi-4-14b",
      "model_name": "Phi-4 14B",
      "family": "Phi",
      "provider": "Microsoft",
      "license_type": "open-weight",
      "parameters_billions": 14,
      "architecture": "dense",
      "ollama_tag": "phi4:14b",
      "lm_studio_search": "Phi-4 14B GGUF",
      "recommended_quantization": "Q4_K_M",
      "estimated_q4_runtime_gb": 10.5,
      "kv_cache_multiplier": 1.2,
      "minimum_context_tokens": 8192,
      "typical_context_tokens": 16384,
      "capabilities": {
        "chat": true,
        "coding": true,
        "agents": true,
        "vision": false,
        "reasoning": true,
        "best_for": "Compact reasoning and technical assistant on 12GB/16GB GPUs",
        "weakness": "Smaller ecosystem than Llama/Qwen families",
        "agent_readiness_score": 82
      },
      "hardware": {
        "minimum_vram_gb_q4": 12,
        "comfortable_vram_gb_q4": 16,
        "recommended_gpu": "RTX 3060 12GB minimum, 16GB+ preferred"
      },
      "sources": [
        {
          "label": "Ollama library",
          "url": "https://ollama.com/library/phi4"
        }
      ],
      "verified_at": "2026-05-19",
      "confidence": "high"
    }
  ]
}
