ollama-multirun: __socrates_is_a_human___all_humans_are_mo: 20250713-131116

models: codellama:7b cogito:3b cogito:8b deepcoder:1.5b deepseek-r1:1.5b deepseek-r1:14b deepseek-r1:8b dolphin-mistral:7b dolphin3:8b gemma3:1b gemma3:4b gemma3n:e2b gemma3n:e4b gemma:2b granite3.3:2b granite3.3:8b hermes3:8b llama3.1:8b-instruct-q4_1 llama3.2:1b llama3.2:3b llava-llama3:8b llava-phi3:3.8b llava:7b minicpm-v:8b mistral:7b mistral:7b-instruct qwen2.5-coder:7b qwen2.5vl:3b qwen2.5vl:7b qwen3:0.6b qwen3:1.7b qwen3:14b qwen3:4b qwen3:8b smollm2:1.7b smollm2:135m smollm2:360m

Prompt: (raw) (yaml) words:14 bytes:70

Model Response
words
Response
bytes
Total
duration
Load
duration
Prompt eval
count
Prompt eval
duration
Prompt eval
rate
Eval
count
Eval
duration
Eval
rate
Model
params
Model
size
Model
context
Ollama
context
Ollama
proc
codellama:7b 16 89 1.78416975s 15.12775ms 45 token(s) 515.286333ms 87.33 tokens/s 26 token(s) 1.253001959s 20.75 tokens/s 6.7B 9.4 GB 16384 8192 100% GPU
cogito:3b 94 542 4.044034708s 28.58ms 29 token(s) 182.782584ms 158.66 tokens/s 139 token(s) 3.832057583s 36.27 tokens/s 3.6B 4.0 GB 131072 8192 100% GPU
cogito:8b 58 318 5.051019083s 31.677958ms 29 token(s) 385.885459ms 75.15 tokens/s 83 token(s) 4.632810833s 17.92 tokens/s 8.0B 7.0 GB 131072 8192 100% GPU
deepcoder:1.5b 51 319 4.871419166s 32.723916ms 22 token(s) 105.849625ms 207.84 tokens/s 283 token(s) 4.732268542s 59.80 tokens/s 1.8B 2.1 GB 131072 8192 100% GPU
deepseek-r1:1.5b 40 278 7.419838709s 30.005959ms 22 token(s) 125.249792ms 175.65 tokens/s 437 token(s) 7.263978166s 60.16 tokens/s 1.8B 2.1 GB 131072 8192 100% GPU
deepseek-r1:14b 46 309 53.452344625s 25.8865ms 22 token(s) 3.918333875s 5.61 tokens/s 463 token(s) 49.50697725s 9.35 tokens/s 14.8B 11 GB 131072 8192 6%/94% CPU/GPU
deepseek-r1:8b 81 482 39.413811458s 28.29075ms 21 token(s) 369.613334ms 56.82 tokens/s 639 token(s) 39.015386s 16.38 tokens/s 8.2B 7.6 GB 131072 8192 100% GPU
dolphin-mistral:7b 35 188 3.600346875s 17.164708ms 53 token(s) 504.153708ms 105.13 tokens/s 59 token(s) 3.078299875s 19.17 tokens/s 7.2B 6.4 GB 32768 8192 100% GPU
dolphin3:8b 13 76 1.708469458s 32.200083ms 42 token(s) 636.605042ms 65.97 tokens/s 18 token(s) 1.039076291s 17.32 tokens/s 8.0B 7.0 GB 131072 8192 100% GPU
gemma3:1b 91 606 2.203318667s 55.397334ms 28 token(s) 95.585958ms 292.93 tokens/s 134 token(s) 2.051803708s 65.31 tokens/s 999.89M 2.0 GB 32768 8192 100% GPU
gemma3:4b 46 302 3.083903958s 52.17675ms 28 token(s) 241.3835ms 116.00 tokens/s 80 token(s) 2.789812s 28.68 tokens/s 4.3B 5.9 GB 131072 8192 100% GPU
gemma3n:e2b 64 409 3.373344125s 52.079208ms 28 token(s) 274.59ms 101.97 tokens/s 100 token(s) 3.046099708s 32.83 tokens/s 4.5B 4.8 GB 32768 8192 100% GPU
gemma3n:e4b 49 310 4.836905416s 73.924208ms 28 token(s) 1.116221s 25.08 tokens/s 82 token(s) 3.646292709s 22.49 tokens/s 6.9B 6.2 GB 32768 8192 100% GPU
gemma:2b 26 155 811.885208ms 29.112208ms 41 token(s) 165.682708ms 247.46 tokens/s 31 token(s) 616.543792ms 50.28 tokens/s 2.5B 3.0 GB 8192 8192 100% GPU
granite3.3:2b 33 214 1.665402583s 21.852791ms 68 token(s) 293.494ms 231.69 tokens/s 54 token(s) 1.349327125s 40.02 tokens/s 2.5B 3.3 GB 131072 8192 100% GPU
granite3.3:8b 101 642 11.662759042s 21.648375ms 68 token(s) 851.875125ms 79.82 tokens/s 179 token(s) 10.788482667s 16.59 tokens/s 8.2B 7.9 GB 131072 8192 100% GPU
hermes3:8b 75 459 5.426610667s 30.382917ms 28 token(s) 482.867375ms 57.99 tokens/s 95 token(s) 4.912776167s 19.34 tokens/s 8.0B 6.7 GB 131072 8192 100% GPU
llama3.1:8b-instruct-q4_1 94 518 8.92332575s 30.522542ms 29 token(s) 371.405208ms 78.08 tokens/s 147 token(s) 8.520679542s 17.25 tokens/s 8.0B 7.2 GB 131072 8192 100% GPU
llama3.2:1b 67 375 1.846308458s 29.553208ms 44 token(s) 136.164375ms 323.14 tokens/s 93 token(s) 1.680038458s 55.36 tokens/s 1.2B 2.8 GB 131072 8192 100% GPU
llama3.2:3b 81 509 3.578505042s 32.371625ms 44 token(s) 286.865917ms 153.38 tokens/s 118 token(s) 3.258653292s 36.21 tokens/s 3.2B 4.0 GB 131072 8192 100% GPU
llava-llama3:8b 17 105 1.599675625s 31.933708ms 30 token(s) 371.381125ms 80.78 tokens/s 21 token(s) 1.195826708s 17.56 tokens/s 8.0B 6.8 GB 8192 4096 100% GPU
llava-phi3:3.8b 50 277 2.743260416s 16.55925ms 35 token(s) 365.127958ms 95.86 tokens/s 75 token(s) 2.360913209s 31.77 tokens/s 3.8B 5.4 GB 4096 4096 100% GPU
llava:7b 13 85 1.648378833s 13.744708ms 33 token(s) 476.045125ms 69.32 tokens/s 23 token(s) 1.157706292s 19.87 tokens/s 7.2B 7.0 GB 32768 8192 100% GPU
minicpm-v:8b 185 1157 11.229582334s 28.600209ms 27 token(s) 303.721583ms 88.90 tokens/s 222 token(s) 10.896694625s 20.37 tokens/s 7.6B 6.8 GB 32768 8192 100% GPU
mistral:7b 14 75 1.480011708s 16.975083ms 30 token(s) 405.94625ms 73.90 tokens/s 21 token(s) 1.056302625s 19.88 tokens/s 7.2B 6.4 GB 32768 8192 100% GPU
mistral:7b-instruct 19 105 1.746573167s 17.736834ms 29 token(s) 320.734917ms 90.42 tokens/s 28 token(s) 1.407332792s 19.90 tokens/s 7.2B 6.4 GB 32768 8192 100% GPU
qwen2.5-coder:7b 45 262 3.783768s 28.141459ms 48 token(s) 581.242417ms 82.58 tokens/s 61 token(s) 3.173769916s 19.22 tokens/s 7.6B 6.0 GB 32768 8192 100% GPU
qwen2.5vl:3b 31 192 1.602317292s 30.927625ms 39 token(s) 330.319042ms 118.07 tokens/s 44 token(s) 1.2405255s 35.47 tokens/s 3.8B 6.2 GB 128000 8192 100% GPU
qwen2.5vl:7b 57 358 4.91826025s 30.817875ms 39 token(s) 546.777208ms 71.33 tokens/s 82 token(s) 4.340041417s 18.89 tokens/s 8.3B 9.1 GB 128000 8192 100% GPU
qwen3:0.6b 34 198 3.197030125s 27.662625ms 29 token(s) 75.210875ms 385.58 tokens/s 311 token(s) 3.093634s 100.53 tokens/s 751.63M 2.3 GB 40960 8192 100% GPU
qwen3:1.7b 61 372 10.3783225s 25.109875ms 29 token(s) 127.892917ms 226.75 tokens/s 564 token(s) 10.224783s 55.16 tokens/s 2.0B 3.0 GB 40960 8192 100% GPU
qwen3:14b 83 554 1m23.837005583s 26.075166ms 29 token(s) 3.575726417s 8.11 tokens/s 728 token(s) 1m20.233820083s 9.07 tokens/s 14.8B 12 GB 40960 8192 5%/95% CPU/GPU
qwen3:4b 75 502 20.803685834s 26.377042ms 29 token(s) 222.635375ms 130.26 tokens/s 543 token(s) 20.554090375s 26.42 tokens/s 4.0B 5.3 GB 40960 8192 100% GPU
qwen3:8b 92 585 45.31394025s 29.283375ms 29 token(s) 367.792042ms 78.85 tokens/s 738 token(s) 44.916298667s 16.43 tokens/s 8.2B 7.6 GB 40960 8192 100% GPU
smollm2:1.7b 45 259 1.688999167s 17.721625ms 48 token(s) 221.648916ms 216.56 tokens/s 62 token(s) 1.448981625s 42.79 tokens/s 1.7B 4.7 GB 8192 8192 100% GPU
smollm2:135m 70 418 620.797666ms 18.598208ms 49 token(s) 44.420875ms 1103.08 tokens/s 88 token(s) 557.174667ms 157.94 tokens/s 134.52M 1.2 GB 8192 8192 100% GPU
smollm2:360m 35 208 557.485542ms 20.45275ms 49 token(s) 71.108208ms 689.09 tokens/s 42 token(s) 465.01ms 90.32 tokens/s 361.82M 1.9 GB 8192 8192 100% GPU


System
Ollama proc100% GPU
Ollama context8192
Ollama version0.9.7-rc0
Multirun timeout300 seconds
Sys archarm64
Sys processorarm
sys memory12G + 235M
Sys OSDarwin 24.5.0