anomalyco · andrelandgraf · Jul 3, 2026 · Jul 3, 2026 · Jul 3, 2026
diff --git a/providers/neon/models/claude-opus-4-8.toml b/providers/neon/models/claude-opus-4-8.toml
@@ -0,0 +1,12 @@
+base_model = "anthropic/claude-opus-4-8"
+reasoning_options = [{ type = "toggle" }, { type = "budget_tokens", min = 1_024, max = 127_999 }]
+
+[cost]
+input = 5
+output = 25
+cache_read = 0.5
+cache_write = 6.25
+
+[experimental.modes.fast]
+cost = { input = 10, output = 50, cache_read = 1, cache_write = 12.5 }
+provider = { body = { speed = "fast" }, headers = { anthropic-beta = "fast-mode-2026-02-01" } }
diff --git a/providers/neon/models/gemini-3-5-flash.toml b/providers/neon/models/gemini-3-5-flash.toml
@@ -0,0 +1,8 @@
+base_model = "google/gemini-3.5-flash"
+reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "high"] }]
+
+[cost]
+input = 1.5
+output = 9
+cache_read = 0.15
+input_audio = 1.5
diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml
@@ -0,0 +1,24 @@
+name = "Gemma 3 12B"
+description = "Google's open-weight Gemma 3 vision-language model for text and image understanding"
+family = "gemma"
+release_date = "2025-03-13"
+last_updated = "2025-03-13"
+attachment = true
+reasoning = false
+temperature = true
+tool_call = true
+structured_output = true
+knowledge = "2024-08-31"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.5
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text", "image"]
+output = ["text"]
diff --git a/providers/neon/models/gpt-5-1-codex-max.toml b/providers/neon/models/gpt-5-1-codex-max.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.1-codex-max"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.25
+output = 10
+cache_read = 0.125
diff --git a/providers/neon/models/gpt-5-1-codex-mini.toml b/providers/neon/models/gpt-5-1-codex-mini.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.1-codex-mini"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }]
+
+[cost]
+input = 0.25
+output = 2
+cache_read = 0.025
diff --git a/providers/neon/models/gpt-5-2-codex.toml b/providers/neon/models/gpt-5-2-codex.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.2-codex"
+reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.75
+output = 14
+cache_read = 0.175
diff --git a/providers/neon/models/gpt-5-3-codex.toml b/providers/neon/models/gpt-5-3-codex.toml
@@ -0,0 +1,7 @@
+base_model = "openai/gpt-5.3-codex"
+reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }]
+
+[cost]
+input = 1.75
+output = 14
+cache_read = 0.175
diff --git a/providers/neon/models/gpt-5-5.toml b/providers/neon/models/gpt-5-5.toml
diff --git a/providers/neon/models/llama-4-maverick.toml b/providers/neon/models/llama-4-maverick.toml
@@ -0,0 +1,5 @@
+base_model = "meta/llama-4-maverick-17b-instruct"
+
+[cost]
+input = 0.5
+output = 1.5
diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml
@@ -0,0 +1,24 @@
+name = "Llama 3.1 8B Instruct"
+description = "Meta's compact open-weight Llama 3.1 model for fast, low-cost text generation"
+family = "llama"
+release_date = "2024-07-23"
+last_updated = "2024-07-23"
+attachment = false
+reasoning = false
+temperature = true
+tool_call = true
+structured_output = true
+knowledge = "2023-12-31"
+open_weights = true
+
+[cost]
+input = 0.15
+output = 0.45
+
+[limit]
+context = 131_072
+output = 16_384
+
+[modalities]
+input = ["text"]
+output = ["text"]
diff --git a/providers/neon/models/meta-llama-3-3-70b-instruct.toml b/providers/neon/models/meta-llama-3-3-70b-instruct.toml
@@ -0,0 +1,6 @@
+base_model = "meta/llama-3.3-70b-instruct"
+attachment = false
+
+[cost]
+input = 0.5
+output = 1.5
diff --git a/providers/neon/models/qwen3-next-80b-a3b-instruct.toml b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml
@@ -0,0 +1,5 @@
+base_model = "alibaba/qwen3-next-80b-a3b-instruct"
+
+[cost]
+input = 0.15
+output = 1.2
diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml
@@ -0,0 +1,15 @@
+base_model = "alibaba/qwen3.5-122b-a10b"
+attachment = false
+reasoning_options = [{ type = "toggle" }, { type = "budget_tokens" }]
+
+[cost]
+input = 0.22
+output = 2.2
+
+[limit]
+context = 262_144
+output = 8_000
+
+[modalities]
+input = ["text"]
+output = ["text"]