diff --git a/providers/neon/models/claude-opus-4-8.toml b/providers/neon/models/claude-opus-4-8.toml new file mode 100644 index 0000000000..980c04951e --- /dev/null +++ b/providers/neon/models/claude-opus-4-8.toml @@ -0,0 +1,12 @@ +base_model = "anthropic/claude-opus-4-8" +reasoning_options = [{ type = "toggle" }, { type = "budget_tokens", min = 1_024, max = 127_999 }] + +[cost] +input = 5 +output = 25 +cache_read = 0.5 +cache_write = 6.25 + +[experimental.modes.fast] +cost = { input = 10, output = 50, cache_read = 1, cache_write = 12.5 } +provider = { body = { speed = "fast" }, headers = { anthropic-beta = "fast-mode-2026-02-01" } } diff --git a/providers/neon/models/gemini-3-5-flash.toml b/providers/neon/models/gemini-3-5-flash.toml new file mode 100644 index 0000000000..0d75d4a0d3 --- /dev/null +++ b/providers/neon/models/gemini-3-5-flash.toml @@ -0,0 +1,8 @@ +base_model = "google/gemini-3.5-flash" +reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "high"] }] + +[cost] +input = 1.5 +output = 9 +cache_read = 0.15 +input_audio = 1.5 diff --git a/providers/neon/models/gemma-3-12b.toml b/providers/neon/models/gemma-3-12b.toml new file mode 100644 index 0000000000..963b076df4 --- /dev/null +++ b/providers/neon/models/gemma-3-12b.toml @@ -0,0 +1,24 @@ +name = "Gemma 3 12B" +description = "Google's open-weight Gemma 3 vision-language model for text and image understanding" +family = "gemma" +release_date = "2025-03-13" +last_updated = "2025-03-13" +attachment = true +reasoning = false +temperature = true +tool_call = true +structured_output = true +knowledge = "2024-08-31" +open_weights = true + +[cost] +input = 0.15 +output = 0.5 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text", "image"] +output = ["text"] diff --git a/providers/neon/models/gpt-5-1-codex-max.toml b/providers/neon/models/gpt-5-1-codex-max.toml new file mode 100644 index 0000000000..11bdb4b5bf --- /dev/null +++ b/providers/neon/models/gpt-5-1-codex-max.toml @@ -0,0 +1,16 @@ +base_model = "openai/gpt-5.1-codex-max" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.25 +output = 10 +cache_read = 0.125 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-1-codex-mini.toml b/providers/neon/models/gpt-5-1-codex-mini.toml new file mode 100644 index 0000000000..6134221f6e --- /dev/null +++ b/providers/neon/models/gpt-5-1-codex-mini.toml @@ -0,0 +1,16 @@ +base_model = "openai/gpt-5.1-codex-mini" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high"] }] + +[cost] +input = 0.25 +output = 2 +cache_read = 0.025 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-1.toml b/providers/neon/models/gpt-5-1.toml index 489f83fffc..dbdda89b9e 100644 --- a/providers/neon/models/gpt-5-1.toml +++ b/providers/neon/models/gpt-5-1.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high input = 1.25 output = 10 cache_read = 0.125 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-2-codex.toml b/providers/neon/models/gpt-5-2-codex.toml new file mode 100644 index 0000000000..d325069ce0 --- /dev/null +++ b/providers/neon/models/gpt-5-2-codex.toml @@ -0,0 +1,16 @@ +base_model = "openai/gpt-5.2-codex" +reasoning_options = [{ type = "effort", values = ["low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.75 +output = 14 +cache_read = 0.175 + +[modalities] +input = ["text", "image", "pdf"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-2.toml b/providers/neon/models/gpt-5-2.toml index a2a20d0e32..0a93b33e98 100644 --- a/providers/neon/models/gpt-5-2.toml +++ b/providers/neon/models/gpt-5-2.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high input = 1.75 output = 14 cache_read = 0.175 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-3-codex.toml b/providers/neon/models/gpt-5-3-codex.toml new file mode 100644 index 0000000000..f9b49c0017 --- /dev/null +++ b/providers/neon/models/gpt-5-3-codex.toml @@ -0,0 +1,16 @@ +base_model = "openai/gpt-5.3-codex" +reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }] + +[cost] +input = 1.75 +output = 14 +cache_read = 0.175 + +[modalities] +input = ["text", "image", "pdf"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-4-mini.toml b/providers/neon/models/gpt-5-4-mini.toml index 56a278d15d..c1f0daadee 100644 --- a/providers/neon/models/gpt-5-4-mini.toml +++ b/providers/neon/models/gpt-5-4-mini.toml @@ -9,3 +9,12 @@ cache_read = 0.075 [experimental.modes.fast] cost = { input = 1.5, output = 9, cache_read = 0.15 } provider = { body = { service_tier = "priority" } } + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-4-nano.toml b/providers/neon/models/gpt-5-4-nano.toml index 3872b109a2..bd7a157b88 100644 --- a/providers/neon/models/gpt-5-4-nano.toml +++ b/providers/neon/models/gpt-5-4-nano.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high input = 0.2 output = 1.25 cache_read = 0.02 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-4.toml b/providers/neon/models/gpt-5-4.toml index 76ccd302ea..21c8f51c18 100644 --- a/providers/neon/models/gpt-5-4.toml +++ b/providers/neon/models/gpt-5-4.toml @@ -15,3 +15,12 @@ cache_read = 0.5 [experimental.modes.fast] cost = { input = 5, output = 30, cache_read = 0.5 } provider = { body = { service_tier = "priority" } } + +[modalities] +input = ["text", "image", "pdf"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-5.toml b/providers/neon/models/gpt-5-5.toml deleted file mode 100644 index 9b565dd3a2..0000000000 --- a/providers/neon/models/gpt-5-5.toml +++ /dev/null @@ -1,17 +0,0 @@ -base_model = "openai/gpt-5.5" -reasoning_options = [{ type = "effort", values = ["none", "low", "medium", "high", "xhigh"] }] - -[cost] -input = 5 -output = 30 -cache_read = 0.5 - -[[cost.tiers]] -tier = { type = "context", size = 272_000 } -input = 10 -output = 45 -cache_read = 1 - -[experimental.modes.fast] -cost = { input = 12.5, output = 75, cache_read = 1.25 } -provider = { body = { service_tier = "priority" } } diff --git a/providers/neon/models/gpt-5-mini.toml b/providers/neon/models/gpt-5-mini.toml index 5754d47df3..82cc74c50d 100644 --- a/providers/neon/models/gpt-5-mini.toml +++ b/providers/neon/models/gpt-5-mini.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "h input = 0.25 output = 2 cache_read = 0.025 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5-nano.toml b/providers/neon/models/gpt-5-nano.toml index b96eddfd6f..01f2a4111e 100644 --- a/providers/neon/models/gpt-5-nano.toml +++ b/providers/neon/models/gpt-5-nano.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "h input = 0.05 output = 0.4 cache_read = 0.005 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/gpt-5.toml b/providers/neon/models/gpt-5.toml index 104575502b..6b3b02a0b2 100644 --- a/providers/neon/models/gpt-5.toml +++ b/providers/neon/models/gpt-5.toml @@ -5,3 +5,12 @@ reasoning_options = [{ type = "effort", values = ["minimal", "low", "medium", "h input = 1.25 output = 10 cache_read = 0.125 + +[modalities] +input = ["text", "image"] +output = ["text", "image"] + +[provider] +npm = "@ai-sdk/openai" +api = "${NEON_AI_GATEWAY_BASE_URL}/ai-gateway/openai/v1" +shape = "responses" diff --git a/providers/neon/models/llama-4-maverick.toml b/providers/neon/models/llama-4-maverick.toml new file mode 100644 index 0000000000..2f7ae0d63c --- /dev/null +++ b/providers/neon/models/llama-4-maverick.toml @@ -0,0 +1,5 @@ +base_model = "meta/llama-4-maverick-17b-instruct" + +[cost] +input = 0.5 +output = 1.5 diff --git a/providers/neon/models/meta-llama-3-1-8b-instruct.toml b/providers/neon/models/meta-llama-3-1-8b-instruct.toml new file mode 100644 index 0000000000..000b16fcdd --- /dev/null +++ b/providers/neon/models/meta-llama-3-1-8b-instruct.toml @@ -0,0 +1,24 @@ +name = "Llama 3.1 8B Instruct" +description = "Meta's compact open-weight Llama 3.1 model for fast, low-cost text generation" +family = "llama" +release_date = "2024-07-23" +last_updated = "2024-07-23" +attachment = false +reasoning = false +temperature = true +tool_call = true +structured_output = true +knowledge = "2023-12-31" +open_weights = true + +[cost] +input = 0.15 +output = 0.45 + +[limit] +context = 131_072 +output = 16_384 + +[modalities] +input = ["text"] +output = ["text"] diff --git a/providers/neon/models/meta-llama-3-3-70b-instruct.toml b/providers/neon/models/meta-llama-3-3-70b-instruct.toml new file mode 100644 index 0000000000..0fdd5af25d --- /dev/null +++ b/providers/neon/models/meta-llama-3-3-70b-instruct.toml @@ -0,0 +1,6 @@ +base_model = "meta/llama-3.3-70b-instruct" +attachment = false + +[cost] +input = 0.5 +output = 1.5 diff --git a/providers/neon/models/qwen3-next-80b-a3b-instruct.toml b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml new file mode 100644 index 0000000000..c21be63d5a --- /dev/null +++ b/providers/neon/models/qwen3-next-80b-a3b-instruct.toml @@ -0,0 +1,5 @@ +base_model = "alibaba/qwen3-next-80b-a3b-instruct" + +[cost] +input = 0.15 +output = 1.2 diff --git a/providers/neon/models/qwen35-122b-a10b.toml b/providers/neon/models/qwen35-122b-a10b.toml new file mode 100644 index 0000000000..d5afed3343 --- /dev/null +++ b/providers/neon/models/qwen35-122b-a10b.toml @@ -0,0 +1,15 @@ +base_model = "alibaba/qwen3.5-122b-a10b" +attachment = false +reasoning_options = [{ type = "toggle" }, { type = "budget_tokens" }] + +[cost] +input = 0.22 +output = 2.2 + +[limit] +context = 262_144 +output = 8_000 + +[modalities] +input = ["text"] +output = ["text"]