[{"data":1,"prerenderedAt":1047},["ShallowReactive",2],{"guide-detail-local-llm-mac":3},{"id":4,"title":5,"body":6,"category":1031,"description":1032,"difficulty":1033,"extension":1034,"meta":1035,"navigation":828,"path":1036,"published":1037,"readTime":1038,"seo":1039,"slug":1040,"stem":1041,"tags":1042,"updated":1037,"__hash__":1046},"guides\u002Fen\u002Fguides\u002Flocal-llm-mac.md","Run Local LLMs on Mac (Apple Silicon)",{"type":7,"value":8,"toc":1011},"minimark",[9,14,18,24,91,97,103,109,112,115,119,122,148,151,175,178,198,201,218,220,224,239,242,245,280,291,293,297,300,406,412,415,429,431,435,440,482,486,540,544,585,587,591,594,597,617,620,623,658,660,664,667,671,674,750,760,764,771,775,796,799,801,805,812,883,885,889,968,975,983,985,989,1007],[10,11,13],"h2",{"id":12},"hardware-requirements","Hardware Requirements",[15,16,17],"p",{},"Apple Silicon Macs use unified memory — the same physical RAM serves both CPU and GPU. This means a 16 GB M2 Mac can run a 7–9B model with acceptable speed, because the GPU does not need a separate VRAM pool.",[15,19,20],{},[21,22,23],"strong",{},"Minimum requirements:",[25,26,27,43],"table",{},[28,29,30],"thead",{},[31,32,33,37,40],"tr",{},[34,35,36],"th",{},"Spec",[34,38,39],{},"Minimum",[34,41,42],{},"Recommended",[44,45,46,58,69,80],"tbody",{},[31,47,48,52,55],{},[49,50,51],"td",{},"Chip",[49,53,54],{},"M1",[49,56,57],{},"M2 \u002F M3",[31,59,60,63,66],{},[49,61,62],{},"RAM",[49,64,65],{},"8 GB",[49,67,68],{},"16 GB",[31,70,71,74,77],{},[49,72,73],{},"Storage",[49,75,76],{},"50 GB free",[49,78,79],{},"100 GB free",[31,81,82,85,88],{},[49,83,84],{},"macOS",[49,86,87],{},"13 Ventura",[49,89,90],{},"14 Sonoma+",[15,92,93,96],{},[21,94,95],{},"8 GB RAM reality check:"," You can run 3B–7B models at Q4_K_M quantization. macOS keeps ~2–3 GB for the OS. That leaves ~5–6 GB usable. Expect slower token generation and potential swapping.",[15,98,99,102],{},[21,100,101],{},"16 GB:"," Run 7B–13B models comfortably. The sweet spot for most users.",[15,104,105,108],{},[21,106,107],{},"32 GB+:"," Run 30B+ models (Llama 3.3 70B at Q2_K fits in 35 GB).",[15,110,111],{},"Intel Macs (pre-2020) are not supported — Ollama requires Apple Silicon for Metal acceleration.",[113,114],"hr",{},[10,116,118],{"id":117},"install-ollama","Install Ollama",[15,120,121],{},"Homebrew is the simplest path:",[123,124,129],"pre",{"className":125,"code":126,"language":127,"meta":128,"style":128},"language-bash shiki shiki-themes github-light github-dark","brew install ollama\n","bash","",[130,131,132],"code",{"__ignoreMap":128},[133,134,137,141,145],"span",{"class":135,"line":136},"line",1,[133,138,140],{"class":139},"sScJk","brew",[133,142,144],{"class":143},"sZZnC"," install",[133,146,147],{"class":143}," ollama\n",[15,149,150],{},"Or use the official installer (no Homebrew required):",[123,152,154],{"className":125,"code":153,"language":127,"meta":128,"style":128},"curl -fsSL https:\u002F\u002Follama.com\u002Finstall.sh | sh\n",[130,155,156],{"__ignoreMap":128},[133,157,158,161,165,168,172],{"class":135,"line":136},[133,159,160],{"class":139},"curl",[133,162,164],{"class":163},"sj4cs"," -fsSL",[133,166,167],{"class":143}," https:\u002F\u002Follama.com\u002Finstall.sh",[133,169,171],{"class":170},"szBVR"," |",[133,173,174],{"class":139}," sh\n",[15,176,177],{},"Verify installation:",[123,179,181],{"className":125,"code":180,"language":127,"meta":128,"style":128},"ollama --version\n# ollama version 0.3.x\n",[130,182,183,191],{"__ignoreMap":128},[133,184,185,188],{"class":135,"line":136},[133,186,187],{"class":139},"ollama",[133,189,190],{"class":163}," --version\n",[133,192,194],{"class":135,"line":193},2,[133,195,197],{"class":196},"sJ8bj","# ollama version 0.3.x\n",[15,199,200],{},"Start the Ollama server (runs automatically as a background service after install, but you can also start it manually):",[123,202,204],{"className":125,"code":203,"language":127,"meta":128,"style":128},"ollama serve\n# Listening on 127.0.0.1:11434\n",[130,205,206,213],{"__ignoreMap":128},[133,207,208,210],{"class":135,"line":136},[133,209,187],{"class":139},[133,211,212],{"class":143}," serve\n",[133,214,215],{"class":135,"line":193},[133,216,217],{"class":196},"# Listening on 127.0.0.1:11434\n",[113,219],{},[10,221,223],{"id":222},"pull-your-first-model","Pull Your First Model",[123,225,227],{"className":125,"code":226,"language":127,"meta":128,"style":128},"ollama pull llama3.2:3b\n",[130,228,229],{"__ignoreMap":128},[133,230,231,233,236],{"class":135,"line":136},[133,232,187],{"class":139},[133,234,235],{"class":143}," pull",[133,237,238],{"class":143}," llama3.2:3b\n",[15,240,241],{},"This downloads a 3B parameter Llama 3.2 model (Q4_K_M, ~2 GB). Fast download, very fast on M2.",[15,243,244],{},"Run it immediately in the terminal:",[123,246,248],{"className":125,"code":247,"language":127,"meta":128,"style":128},"ollama run llama3.2:3b\n>>> Tell me about Apple Silicon.\n",[130,249,250,259],{"__ignoreMap":128},[133,251,252,254,257],{"class":135,"line":136},[133,253,187],{"class":139},[133,255,256],{"class":143}," run",[133,258,238],{"class":143},[133,260,261,265,268,271,274,277],{"class":135,"line":193},[133,262,264],{"class":263},"sVt8B",">>> ",[133,266,267],{"class":139},"Tell",[133,269,270],{"class":143}," me",[133,272,273],{"class":143}," about",[133,275,276],{"class":143}," Apple",[133,278,279],{"class":143}," Silicon.\n",[15,281,282,283,286,287,290],{},"Press ",[130,284,285],{},"Ctrl+D"," or type ",[130,288,289],{},"\u002Fbye"," to exit the interactive session.",[113,292],{},[10,294,296],{"id":295},"quantization-tradeoffs","Quantization Tradeoffs",[15,298,299],{},"GGUF quantization reduces model size at the cost of slight quality degradation. Ollama uses llama.cpp under the hood and pulls pre-quantized models from the Ollama library.",[25,301,302,321],{},[28,303,304],{},[31,305,306,309,312,315,318],{},[34,307,308],{},"Format",[34,310,311],{},"Size (7B model)",[34,313,314],{},"Quality",[34,316,317],{},"RAM needed",[34,319,320],{},"Speed (M2)",[44,322,323,340,357,373,390],{},[31,324,325,328,331,334,337],{},[49,326,327],{},"F16",[49,329,330],{},"~14 GB",[49,332,333],{},"Best",[49,335,336],{},"18+ GB",[49,338,339],{},"Slow",[31,341,342,345,348,351,354],{},[49,343,344],{},"Q8_0",[49,346,347],{},"~7 GB",[49,349,350],{},"Excellent",[49,352,353],{},"10 GB",[49,355,356],{},"Good",[31,358,359,362,365,367,370],{},[49,360,361],{},"Q4_K_M",[49,363,364],{},"~4 GB",[49,366,356],{},[49,368,369],{},"6 GB",[49,371,372],{},"Fast",[31,374,375,378,381,384,387],{},[49,376,377],{},"Q3_K_M",[49,379,380],{},"~3.3 GB",[49,382,383],{},"Fair",[49,385,386],{},"5 GB",[49,388,389],{},"Very fast",[31,391,392,395,398,401,404],{},[49,393,394],{},"Q2_K",[49,396,397],{},"~2.5 GB",[49,399,400],{},"Acceptable",[49,402,403],{},"4 GB",[49,405,389],{},[15,407,408,411],{},[21,409,410],{},"Recommendation:"," Use Q4_K_M by default. It is the best tradeoff for most workloads. Q8_0 if you have 16+ GB and need higher accuracy.",[15,413,414],{},"Ollama automatically selects Q4_K_M when you pull without specifying a tag. Specify explicitly with a colon:",[123,416,418],{"className":125,"code":417,"language":127,"meta":128,"style":128},"ollama pull llama3.2:8b-instruct-q8_0\n",[130,419,420],{"__ignoreMap":128},[133,421,422,424,426],{"class":135,"line":136},[133,423,187],{"class":139},[133,425,235],{"class":143},[133,427,428],{"class":143}," llama3.2:8b-instruct-q8_0\n",[113,430],{},[10,432,434],{"id":433},"model-recommendations-by-ram","Model Recommendations by RAM",[436,437,439],"h3",{"id":438},"_8-gb-ram","8 GB RAM",[123,441,443],{"className":125,"code":442,"language":127,"meta":128,"style":128},"ollama pull llama3.2:3b       # general purpose, fast\nollama pull phi4-mini         # Microsoft Phi-4 Mini, strong at coding\nollama pull gemma3:4b         # Google Gemma 3, good instruction following\n",[130,444,445,457,469],{"__ignoreMap":128},[133,446,447,449,451,454],{"class":135,"line":136},[133,448,187],{"class":139},[133,450,235],{"class":143},[133,452,453],{"class":143}," llama3.2:3b",[133,455,456],{"class":196},"       # general purpose, fast\n",[133,458,459,461,463,466],{"class":135,"line":193},[133,460,187],{"class":139},[133,462,235],{"class":143},[133,464,465],{"class":143}," phi4-mini",[133,467,468],{"class":196},"         # Microsoft Phi-4 Mini, strong at coding\n",[133,470,472,474,476,479],{"class":135,"line":471},3,[133,473,187],{"class":139},[133,475,235],{"class":143},[133,477,478],{"class":143}," gemma3:4b",[133,480,481],{"class":196},"         # Google Gemma 3, good instruction following\n",[436,483,485],{"id":484},"_16-gb-ram","16 GB RAM",[123,487,489],{"className":125,"code":488,"language":127,"meta":128,"style":128},"ollama pull llama3.3:8b       # best general-purpose 8B model (Q4_K_M)\nollama pull qwen2.5:7b        # strong multilingual and coding\nollama pull mistral-nemo:12b  # Mistral NeMo 12B, excellent for instruction\nollama pull deepseek-r1:8b    # reasoning-focused, CoT outputs\n",[130,490,491,503,515,527],{"__ignoreMap":128},[133,492,493,495,497,500],{"class":135,"line":136},[133,494,187],{"class":139},[133,496,235],{"class":143},[133,498,499],{"class":143}," llama3.3:8b",[133,501,502],{"class":196},"       # best general-purpose 8B model (Q4_K_M)\n",[133,504,505,507,509,512],{"class":135,"line":193},[133,506,187],{"class":139},[133,508,235],{"class":143},[133,510,511],{"class":143}," qwen2.5:7b",[133,513,514],{"class":196},"        # strong multilingual and coding\n",[133,516,517,519,521,524],{"class":135,"line":471},[133,518,187],{"class":139},[133,520,235],{"class":143},[133,522,523],{"class":143}," mistral-nemo:12b",[133,525,526],{"class":196},"  # Mistral NeMo 12B, excellent for instruction\n",[133,528,530,532,534,537],{"class":135,"line":529},4,[133,531,187],{"class":139},[133,533,235],{"class":143},[133,535,536],{"class":143}," deepseek-r1:8b",[133,538,539],{"class":196},"    # reasoning-focused, CoT outputs\n",[436,541,543],{"id":542},"_32-gb-ram","32 GB RAM",[123,545,547],{"className":125,"code":546,"language":127,"meta":128,"style":128},"ollama pull llama3.3:70b      # Llama 3.3 70B at Q2_K\nollama pull qwen2.5:32b       # excellent coding model\nollama pull deepseek-r1:32b   # reasoning, competitive with GPT-4o on benchmarks\n",[130,548,549,561,573],{"__ignoreMap":128},[133,550,551,553,555,558],{"class":135,"line":136},[133,552,187],{"class":139},[133,554,235],{"class":143},[133,556,557],{"class":143}," llama3.3:70b",[133,559,560],{"class":196},"      # Llama 3.3 70B at Q2_K\n",[133,562,563,565,567,570],{"class":135,"line":193},[133,564,187],{"class":139},[133,566,235],{"class":143},[133,568,569],{"class":143}," qwen2.5:32b",[133,571,572],{"class":196},"       # excellent coding model\n",[133,574,575,577,579,582],{"class":135,"line":471},[133,576,187],{"class":139},[133,578,235],{"class":143},[133,580,581],{"class":143}," deepseek-r1:32b",[133,583,584],{"class":196},"   # reasoning, competitive with GPT-4o on benchmarks\n",[113,586],{},[10,588,590],{"id":589},"metal-mps-acceleration","Metal \u002F MPS Acceleration",[15,592,593],{},"Ollama uses Metal Performance Shaders (MPS) automatically on Apple Silicon. No configuration needed.",[15,595,596],{},"To confirm Metal is active, check the Ollama server log:",[123,598,600],{"className":125,"code":599,"language":127,"meta":128,"style":128},"# In the Ollama server terminal, look for:\n# llm_load_tensors: offloading 32 repeating layers to GPU\n# llm_load_tensors: offloaded 33\u002F33 layers to GPU\n",[130,601,602,607,612],{"__ignoreMap":128},[133,603,604],{"class":135,"line":136},[133,605,606],{"class":196},"# In the Ollama server terminal, look for:\n",[133,608,609],{"class":135,"line":193},[133,610,611],{"class":196},"# llm_load_tensors: offloading 32 repeating layers to GPU\n",[133,613,614],{"class":135,"line":471},[133,615,616],{"class":196},"# llm_load_tensors: offloaded 33\u002F33 layers to GPU\n",[15,618,619],{},"When all layers are offloaded to GPU, inference is fastest. If only partial offload occurs (due to RAM pressure), performance drops significantly.",[15,621,622],{},"Force full GPU offload by ensuring no other memory-heavy apps are running:",[123,624,626],{"className":125,"code":625,"language":127,"meta":128,"style":128},"# check what's using RAM\ntop -l 1 -s 0 | head -20\n",[130,627,628,633],{"__ignoreMap":128},[133,629,630],{"class":135,"line":136},[133,631,632],{"class":196},"# check what's using RAM\n",[133,634,635,638,641,644,647,650,652,655],{"class":135,"line":193},[133,636,637],{"class":139},"top",[133,639,640],{"class":163}," -l",[133,642,643],{"class":163}," 1",[133,645,646],{"class":163}," -s",[133,648,649],{"class":163}," 0",[133,651,171],{"class":170},[133,653,654],{"class":139}," head",[133,656,657],{"class":163}," -20\n",[113,659],{},[10,661,663],{"id":662},"open-webui","Open WebUI",[15,665,666],{},"Open WebUI provides a ChatGPT-like interface for Ollama running locally.",[436,668,670],{"id":669},"install-with-docker","Install with Docker",[15,672,673],{},"Ensure Docker Desktop for Mac is installed and running, then:",[123,675,677],{"className":125,"code":676,"language":127,"meta":128,"style":128},"docker run -d \\\n  --name open-webui \\\n  -p 3000:8080 \\\n  -v open-webui:\u002Fapp\u002Fbackend\u002Fdata \\\n  -e OLLAMA_BASE_URL=http:\u002F\u002Fhost.docker.internal:11434 \\\n  --restart always \\\n  ghcr.io\u002Fopen-webui\u002Fopen-webui:main\n",[130,678,679,692,702,712,722,733,744],{"__ignoreMap":128},[133,680,681,684,686,689],{"class":135,"line":136},[133,682,683],{"class":139},"docker",[133,685,256],{"class":143},[133,687,688],{"class":163}," -d",[133,690,691],{"class":163}," \\\n",[133,693,694,697,700],{"class":135,"line":193},[133,695,696],{"class":163},"  --name",[133,698,699],{"class":143}," open-webui",[133,701,691],{"class":163},[133,703,704,707,710],{"class":135,"line":471},[133,705,706],{"class":163},"  -p",[133,708,709],{"class":143}," 3000:8080",[133,711,691],{"class":163},[133,713,714,717,720],{"class":135,"line":529},[133,715,716],{"class":163},"  -v",[133,718,719],{"class":143}," open-webui:\u002Fapp\u002Fbackend\u002Fdata",[133,721,691],{"class":163},[133,723,725,728,731],{"class":135,"line":724},5,[133,726,727],{"class":163},"  -e",[133,729,730],{"class":143}," OLLAMA_BASE_URL=http:\u002F\u002Fhost.docker.internal:11434",[133,732,691],{"class":163},[133,734,736,739,742],{"class":135,"line":735},6,[133,737,738],{"class":163},"  --restart",[133,740,741],{"class":143}," always",[133,743,691],{"class":163},[133,745,747],{"class":135,"line":746},7,[133,748,749],{"class":143},"  ghcr.io\u002Fopen-webui\u002Fopen-webui:main\n",[15,751,752,753,759],{},"Open ",[754,755,756],"a",{"href":756,"rel":757},"http:\u002F\u002Flocalhost:3000",[758],"nofollow"," in your browser. First run will prompt you to create an admin account (local only, no external registration).",[436,761,763],{"id":762},"select-a-model","Select a Model",[15,765,766,767,770],{},"In Open WebUI: click the model dropdown at the top → select any model you have pulled in Ollama. If you do not see models, verify Ollama is running (",[130,768,769],{},"ollama list",").",[436,772,774],{"id":773},"install-without-docker","Install Without Docker",[123,776,778],{"className":125,"code":777,"language":127,"meta":128,"style":128},"pip install open-webui\nopen-webui serve\n",[130,779,780,790],{"__ignoreMap":128},[133,781,782,785,787],{"class":135,"line":136},[133,783,784],{"class":139},"pip",[133,786,144],{"class":143},[133,788,789],{"class":143}," open-webui\n",[133,791,792,794],{"class":135,"line":193},[133,793,662],{"class":139},[133,795,212],{"class":143},[15,797,798],{},"This is simpler but requires Python 3.11+.",[113,800],{},[10,802,804],{"id":803},"api-access","API Access",[15,806,807,808,811],{},"Ollama exposes an OpenAI-compatible API at ",[130,809,810],{},"http:\u002F\u002Flocalhost:11434\u002Fv1",". Any OpenAI SDK or tool that supports a custom base URL works:",[123,813,817],{"className":814,"code":815,"language":816,"meta":128,"style":128},"language-python shiki shiki-themes github-light github-dark","from openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http:\u002F\u002Flocalhost:11434\u002Fv1\",\n    api_key=\"ollama\"   # required by the SDK, value is ignored\n)\n\nresponse = client.chat.completions.create(\n    model=\"llama3.2:3b\",\n    messages=[{\"role\": \"user\", \"content\": \"What is 2 + 2?\"}]\n)\nprint(response.choices[0].message.content)\n","python",[130,818,819,824,830,835,840,845,850,854,860,866,872,877],{"__ignoreMap":128},[133,820,821],{"class":135,"line":136},[133,822,823],{},"from openai import OpenAI\n",[133,825,826],{"class":135,"line":193},[133,827,829],{"emptyLinePlaceholder":828},true,"\n",[133,831,832],{"class":135,"line":471},[133,833,834],{},"client = OpenAI(\n",[133,836,837],{"class":135,"line":529},[133,838,839],{},"    base_url=\"http:\u002F\u002Flocalhost:11434\u002Fv1\",\n",[133,841,842],{"class":135,"line":724},[133,843,844],{},"    api_key=\"ollama\"   # required by the SDK, value is ignored\n",[133,846,847],{"class":135,"line":735},[133,848,849],{},")\n",[133,851,852],{"class":135,"line":746},[133,853,829],{"emptyLinePlaceholder":828},[133,855,857],{"class":135,"line":856},8,[133,858,859],{},"response = client.chat.completions.create(\n",[133,861,863],{"class":135,"line":862},9,[133,864,865],{},"    model=\"llama3.2:3b\",\n",[133,867,869],{"class":135,"line":868},10,[133,870,871],{},"    messages=[{\"role\": \"user\", \"content\": \"What is 2 + 2?\"}]\n",[133,873,875],{"class":135,"line":874},11,[133,876,849],{},[133,878,880],{"class":135,"line":879},12,[133,881,882],{},"print(response.choices[0].message.content)\n",[113,884],{},[10,886,888],{"id":887},"managing-models","Managing Models",[123,890,892],{"className":125,"code":891,"language":127,"meta":128,"style":128},"# list downloaded models\nollama list\n\n# remove a model (frees disk space)\nollama rm llama3.2:3b\n\n# show model details\nollama show llama3.3:8b\n\n# copy\u002Fcreate a custom model variant\nollama create my-model -f Modelfile\n",[130,893,894,899,906,910,915,924,928,933,943,947,952],{"__ignoreMap":128},[133,895,896],{"class":135,"line":136},[133,897,898],{"class":196},"# list downloaded models\n",[133,900,901,903],{"class":135,"line":193},[133,902,187],{"class":139},[133,904,905],{"class":143}," list\n",[133,907,908],{"class":135,"line":471},[133,909,829],{"emptyLinePlaceholder":828},[133,911,912],{"class":135,"line":529},[133,913,914],{"class":196},"# remove a model (frees disk space)\n",[133,916,917,919,922],{"class":135,"line":724},[133,918,187],{"class":139},[133,920,921],{"class":143}," rm",[133,923,238],{"class":143},[133,925,926],{"class":135,"line":735},[133,927,829],{"emptyLinePlaceholder":828},[133,929,930],{"class":135,"line":746},[133,931,932],{"class":196},"# show model details\n",[133,934,935,937,940],{"class":135,"line":856},[133,936,187],{"class":139},[133,938,939],{"class":143}," show",[133,941,942],{"class":143}," llama3.3:8b\n",[133,944,945],{"class":135,"line":862},[133,946,829],{"emptyLinePlaceholder":828},[133,948,949],{"class":135,"line":868},[133,950,951],{"class":196},"# copy\u002Fcreate a custom model variant\n",[133,953,954,956,959,962,965],{"class":135,"line":874},[133,955,187],{"class":139},[133,957,958],{"class":143}," create",[133,960,961],{"class":143}," my-model",[133,963,964],{"class":163}," -f",[133,966,967],{"class":143}," Modelfile\n",[15,969,970,971,974],{},"A minimal ",[130,972,973],{},"Modelfile"," for customizing system prompt:",[123,976,981],{"className":977,"code":979,"language":980},[978],"language-text","FROM llama3.3:8b\nSYSTEM \"You are a concise assistant. Answer in under 3 sentences.\"\nPARAMETER temperature 0.7\n","text",[130,982,979],{"__ignoreMap":128},[113,984],{},[10,986,988],{"id":987},"see-also","See Also",[990,991,992,1000],"ul",{},[993,994,995,999],"li",{},[754,996,998],{"href":997},"\u002Fen\u002Ftier-list\u002Ftext","Text model tier list"," — ranked comparisons of local and cloud models",[993,1001,1002,1006],{},[754,1003,1005],{"href":1004},"\u002Fen\u002Ftools","Tools directory"," — GUI apps and integrations for local LLMs",[1008,1009,1010],"style",{},"html .default .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .shiki span {color: var(--shiki-default);background: var(--shiki-default-bg);font-style: var(--shiki-default-font-style);font-weight: var(--shiki-default-font-weight);text-decoration: var(--shiki-default-text-decoration);}html .dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html.dark .shiki span {color: var(--shiki-dark);background: var(--shiki-dark-bg);font-style: var(--shiki-dark-font-style);font-weight: var(--shiki-dark-font-weight);text-decoration: var(--shiki-dark-text-decoration);}html pre.shiki code .sScJk, html code.shiki .sScJk{--shiki-default:#6F42C1;--shiki-dark:#B392F0}html pre.shiki code .sZZnC, html code.shiki .sZZnC{--shiki-default:#032F62;--shiki-dark:#9ECBFF}html pre.shiki code .sj4cs, html code.shiki .sj4cs{--shiki-default:#005CC5;--shiki-dark:#79B8FF}html pre.shiki code .szBVR, html code.shiki .szBVR{--shiki-default:#D73A49;--shiki-dark:#F97583}html pre.shiki code .sJ8bj, html code.shiki .sJ8bj{--shiki-default:#6A737D;--shiki-dark:#6A737D}html pre.shiki code .sVt8B, html code.shiki .sVt8B{--shiki-default:#24292E;--shiki-dark:#E1E4E8}",{"title":128,"searchDepth":193,"depth":471,"links":1012},[1013,1014,1015,1016,1017,1022,1023,1028,1029,1030],{"id":12,"depth":193,"text":13},{"id":117,"depth":193,"text":118},{"id":222,"depth":193,"text":223},{"id":295,"depth":193,"text":296},{"id":433,"depth":193,"text":434,"children":1018},[1019,1020,1021],{"id":438,"depth":471,"text":439},{"id":484,"depth":471,"text":485},{"id":542,"depth":471,"text":543},{"id":589,"depth":193,"text":590},{"id":662,"depth":193,"text":663,"children":1024},[1025,1026,1027],{"id":669,"depth":471,"text":670},{"id":762,"depth":471,"text":763},{"id":773,"depth":471,"text":774},{"id":803,"depth":193,"text":804},{"id":887,"depth":193,"text":888},{"id":987,"depth":193,"text":988},"local-llm","Use Ollama and Open WebUI to run quantized language models locally on M1\u002FM2\u002FM3 Macs with Metal acceleration.","intermediate","md",{},"\u002Fen\u002Fguides\u002Flocal-llm-mac","2026-05-30",22,{"title":5,"description":1032},"local-llm-mac","en\u002Fguides\u002Flocal-llm-mac",[187,1043,1044,1031,662,1045],"mac","apple-silicon","gguf","9n60mek5RY42Tn79I5lmt6n6azqSIQK432BBY3xh2L8",1781557609815]