From 35ce97dcac3095016cba2e4ba0b102bc4c06d781 Mon Sep 17 00:00:00 2001 From: Evgeny Formanenko Date: Fri, 29 May 2026 14:05:32 +0300 Subject: [PATCH] Replace Swagger UI with Scalar, add a consumer-first API reference Serve the OpenAPI reference with Scalar (utoipa v5 + utoipa-scalar) at /docs, replacing Swagger UI, and ship a consumer-first Introduction. - Renderer: Scalar at /docs via a custom HTML/CSS template (docs/openapi/scalar_template.html); /api-docs/openapi.json served by serve_openapi_spec; utoipa-swagger-ui removed. - Introduction (docs/openapi/, concatenated into info.description): 01-introduction (orientation + finalized vs. real-time data model), 02-blockchain-forks (parentBlockHash / 409 protocol), 03-getting-started (first query + README pointer). Architecture/self-hosting content is kept out of the reference. - Internal-endpoint filtering via the x-internal OpenAPI extension; build_openapi_spec(show_internal) drops internal ops and empty tags while preserving doc-only tags. Toggle with SHOW_INTERNAL_DOCS. Datasets/Streaming shown by default; Monitoring/Debug hidden. - /stream 409 documents a ConflictResponse body (previousBlocks). - Move graceful_shutdown.md under docs/decisions/. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 123 ++------- Cargo.toml | 5 +- .../graceful_shutdown.md} | 0 docs/openapi/01-introduction.md | 43 ++++ docs/openapi/02-blockchain-forks.md | 105 ++++++++ docs/openapi/03-getting-started.md | 25 ++ docs/openapi/scalar_template.html | 152 ++++++++++++ src/config.rs | 2 +- src/endpoints/stream.rs | 26 +- src/http_server.rs | 176 ++++--------- src/main.rs | 7 +- src/openapi.rs | 233 +++++++++++++++++- 12 files changed, 632 insertions(+), 265 deletions(-) rename docs/{GRACEFUL_SHUTDOWN.md => decisions/graceful_shutdown.md} (100%) create mode 100644 docs/openapi/01-introduction.md create mode 100644 docs/openapi/02-blockchain-forks.md create mode 100644 docs/openapi/03-getting-started.md create mode 100644 docs/openapi/scalar_template.html diff --git a/Cargo.lock b/Cargo.lock index 21d9f62..24d980c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2482,9 +2482,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -4344,9 +4344,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.25" +version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" +checksum = "616ec5685824bcc94416c6d4a7a446eea774a31efd7062c8480ba6fd06d7a6e5" [[package]] name = "loom" @@ -4456,16 +4456,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c44f8e672c00fe5308fa235f821cb4198414e1c77935c1ab6948d3fd78550e" -dependencies = [ - "mime", - "unicase", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4474,11 +4464,12 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.3" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -6111,30 +6102,6 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" version = "1.0.93" @@ -6887,40 +6854,6 @@ dependencies = [ "tokio 1.48.0", ] -[[package]] -name = "rust-embed" -version = "8.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04113cb9355a377d83f06ef1f0a45b8ab8cd7d8b1288160717d66df5c7988d27" -dependencies = [ - "rust-embed-impl", - "rust-embed-utils", - "walkdir", -] - -[[package]] -name = "rust-embed-impl" -version = "8.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da0902e4c7c8e997159ab384e6d0fc91c221375f6894346ae107f47dd0f3ccaa" -dependencies = [ - "proc-macro2", - "quote", - "rust-embed-utils", - "syn 2.0.96", - "walkdir", -] - -[[package]] -name = "rust-embed-utils" -version = "8.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bcdef0be6fe7f6fa333b1073c949729274b05f123a0ad7efcb8efd878e5c3b1" -dependencies = [ - "sha2", - "walkdir", -] - [[package]] name = "rustc-demangle" version = "0.1.24" @@ -7633,6 +7566,12 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "simd-json" version = "0.13.11" @@ -7926,7 +7865,6 @@ dependencies = [ "url", "utoipa", "utoipa-scalar", - "utoipa-swagger-ui", "uuid 1.19.0", ] @@ -9045,12 +8983,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" -[[package]] -name = "unicase" -version = "2.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142" - [[package]] name = "unicode-ident" version = "1.0.16" @@ -9185,9 +9117,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "utoipa" -version = "4.2.3" +version = "5.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" +checksum = "8bde15df68e80b16c7d16b9616e80770ad158988daa56a27dccd1e55558b0160" dependencies = [ "indexmap 2.7.1", "serde", @@ -9197,11 +9129,10 @@ dependencies = [ [[package]] name = "utoipa-gen" -version = "4.3.1" +version = "5.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20c24e8ab68ff9ee746aad22d39b5535601e6416d1b0feeabf78be986a5c4392" +checksum = "6ba0b99ee52df3028635d93840c797102da61f8a7bb3cf751032455895b52ef8" dependencies = [ - "proc-macro-error", "proc-macro2", "quote", "syn 2.0.96", @@ -9209,30 +9140,14 @@ dependencies = [ [[package]] name = "utoipa-scalar" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ab4b7269d14d93626b0bfedf212f1b0995cb7d13d35daba21d579511e7fae8" -dependencies = [ - "axum", - "serde", - "serde_json", - "utoipa", -] - -[[package]] -name = "utoipa-swagger-ui" -version = "6.0.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b39868d43c011961e04b41623e050aedf2cc93652562ff7935ce0f819aaf2da" +checksum = "c1291aa7a2223c2f8399d1c6627ca0ba57ca0d7ecac762a2094a9dfd6376445a" dependencies = [ "axum", - "mime_guess", - "regex", - "rust-embed", "serde", "serde_json", "utoipa", - "zip", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d071e31..fa794bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,9 +15,8 @@ atomic_enum = { version = "0.3.0", features = ["cas"] } axum = { version = "0.7", features = ["http2"] } axum-extra = { version = "0.9", features = ["query"] } base64 = "0.22" -utoipa = { version = "4", features = ["preserve_path_order"] } -utoipa-swagger-ui = { version = "6", features = ["axum"] } -utoipa-scalar = { version = "0.1", features = ["axum"] } +utoipa = { version = "5", features = ["preserve_path_order"] } +utoipa-scalar = { version = "0.2", features = ["axum"] } bimap = "0.6.3" bytes = "1.9" chrono = "0.4" diff --git a/docs/GRACEFUL_SHUTDOWN.md b/docs/decisions/graceful_shutdown.md similarity index 100% rename from docs/GRACEFUL_SHUTDOWN.md rename to docs/decisions/graceful_shutdown.md diff --git a/docs/openapi/01-introduction.md b/docs/openapi/01-introduction.md new file mode 100644 index 0000000..9bcbad1 --- /dev/null +++ b/docs/openapi/01-introduction.md @@ -0,0 +1,43 @@ +API for querying and streaming blockchain data from the SQD Network. + +A portal is the HTTP gateway you query for on-chain data. You point it at a dataset (a chain), ask for +a range of blocks with the fields and filters you care about, and it streams the matching data back as +newline-delimited JSON. + +## How a portal serves data + +A single request can span the whole chain because the portal merges two sources for you: **archival** +history for everything up to the last few hours, and **real-time** blocks for the tip of the chain. You +query one endpoint; the portal stitches the ranges together. + +```text + ┌──────────────────────────┐ + │ real-time tip │ + │ (unfinalized, │ + │ can reorg) │ + └──────────────────────────┘ + ▲ + ┌──────────────┐ ── recent ─────┘ + client ──HTTP──► │ Portal │ + └──────────────┘ ── historical ─┐ + ▼ + ┌──────────────────────────┐ + │ archival │ + │ (finalized, │ + │ lags by hours) │ + └──────────────────────────┘ +``` + +## Finalized vs. real-time data + +The two halves of that stream come with different guarantees, and the difference matters for any +consumer: + +- **Archival (finalized) history** — everything except roughly the last few hours. High throughput and + stable: these blocks are finalized and will not change. +- **Real-time tip** — the most recent blocks, served at low latency. These are **unfinalized**: when the + chain reorganizes ("reorg"), a block you already received at the tip can be replaced. + +So if you stream near the head of the chain, be ready for blocks you've already processed to be rolled +back. The next section is the protocol the portal gives you to detect that and resync safely — **read it +before you build a streaming consumer.** diff --git a/docs/openapi/02-blockchain-forks.md b/docs/openapi/02-blockchain-forks.md new file mode 100644 index 0000000..3976f30 --- /dev/null +++ b/docs/openapi/02-blockchain-forks.md @@ -0,0 +1,105 @@ +## Blockchain forks + +Public blockchains can **reorganize** ("reorg") — a block that was on the canonical chain a moment ago can be replaced +by a block from a competing branch, invalidating its descendants. A streaming consumer that has already processed a +now-orphaned block will be out of sync with the network unless the portal tells it which blocks to discard. + +The portal handles this via the optional `parentBlockHash` field on a stream request and a structured **409 Conflict** +response. + +> [!IMPORTANT] +> **Always pass `parentBlockHash` when resuming a stream.** A reorg can happen on any chain — you can't prevent +> conflicts, only detect them. A client that omits `parentBlockHash` will silently process blocks from a forked chain +> after a reorg, with no signal that anything is wrong. + +### How parentBlockHash works + +When opening a stream, the client passes the **hash of the parent of `fromBlock`** (i.e. the last block the client +already trusts). The portal compares that hash to the parent it sees for `fromBlock` on the current canonical chain: + +```text + portal's view of the chain + ──────────────────────────────────────────────────────────────────► + + ┌─────┐ ┌─────┐ ┌─────┐ ┌─────┐ + ... │ N-2 │ ────────► │ N-1 │ ────────► │ N │ ────────► │ N+1 │ ... + └─────┘ └─────┘ └─────┘ └─────┘ + ▲ ▲ + │ │ + parentBlockHash fromBlock + from the client from the client +``` + +- If `parentBlockHash` matches the parent the portal sees for `fromBlock` → the stream starts normally. +- If it does **not** match → a fork happened between the client's last seen block and `fromBlock`. The portal returns + `409 Conflict`. + +### What a conflict means + +A conflict means the chain the client remembers has diverged from the canonical chain the portal sees, at or before +`fromBlock`: the client can't safely resume from this block and must back up first. The portal surfaces this conflict as +a `409 Conflict` response. + +### Visualising a reorg + +```text + ┌─────┐ + │ N+1'│ ← new canonical tip + ┌────►└─────┘ + │ + ┌─────┐ ┌─────┐ │ ┌─────┐ + ... │ N-1 │──►│ N │─┴►│ N+1 │ ← orphaned branch + └─────┘ └─────┘ └─────┘ (the client may + have processed + this already) +``` + +If the client tries to resume at `fromBlock = N+2` with `parentBlockHash = hash(N+1)` (orphaned), the portal sees that +`N+1` is no longer the parent of `N+2'` on the canonical chain → it reports a conflict (`409`). + +### Conflict response body + +```json +{ + "previousBlocks": [ + { + "number": 21780872, + "hash": "0xf6a96a29..." + }, + { + "number": 21780871, + "hash": "0xab12cd..." + } + ] +} +``` + +- `previousBlocks` is a single array of `{ number, hash }` pairs from the **current canonical chain** at and below the + conflict point. +- The array length is arbitrary, but is **guaranteed to contain at least the parent of the requested `fromBlock`**. +- Order: most recent first (canonical chain descending from the conflict point). + +### Recommended client behaviour + +When the portal reports a conflict (`409`): + +1. **Walk back** through `previousBlocks` and look for a block `{ number, hash }` you already trust — i.e. one whose + `(number, hash)` you've previously processed and stored. +2. If you find a shared ancestor at block `K`, **resume the stream from `K+1`** with `parentBlockHash = hash(K)`. +3. If `previousBlocks` does not contain any block you recognise (the divergence is deeper than the array provided), * + *re-request earlier blocks** — open a new stream that includes a `fromBlock` further in the past — and repeat the + search. **Always pass `parentBlockHash`** on every retry too: that keeps the portal comparing your view against the + canonical chain, so each conflict narrows the search instead of just resending the same one. + +```text + client state portal canonical chain + ──────────── ────────────────────── + + ✓ block K ─── matches ───► block K ← shared ancestor: resume here + ? block K+1 ─► block K+1' + ✗ block K+2 (orphaned) ─► block K+2' + ✗ block K+3 (orphaned) ─► block K+3' (tip) +``` + +The portal never tells you "rewind exactly to block X" — it gives you a slice of the canonical chain and trusts your +client to compare against its own state. diff --git a/docs/openapi/03-getting-started.md b/docs/openapi/03-getting-started.md new file mode 100644 index 0000000..ba9916c --- /dev/null +++ b/docs/openapi/03-getting-started.md @@ -0,0 +1,25 @@ +## Your first query + +Stream a few blocks from a dataset. `fromBlock` is required; list the fields you want and the portal +returns only those: + +```bash +curl "http://localhost:8000/datasets/ethereum-mainnet/stream" --compressed -d '{ + "type": "evm", + "fromBlock": 21000000, + "fields": { "block": { "number": true, "hash": true, "timestamp": true } } +}' +``` + +The response is newline-delimited JSON — one block per line. Whenever you **resume** a stream, pass +`parentBlockHash` (see [How parentBlockHash works](#description/how-parentblockhash-works)) so the portal +can tell you if the chain reorged while you were away. + +For the full query language — filters, the other chain types, and every selectable field — see the +[SQD docs](https://beta.docs.sqd.dev/en/portal/evm/overview). + +## Running your own portal + +This reference is for consuming data from a portal. If you want to operate your own, the +[project README](https://github.com/subsquid/sqd-portal#initial-configuration) covers configuration and +deployment. diff --git a/docs/openapi/scalar_template.html b/docs/openapi/scalar_template.html new file mode 100644 index 0000000..0b69325 --- /dev/null +++ b/docs/openapi/scalar_template.html @@ -0,0 +1,152 @@ + + + + SQD Portal API + + + + + + + + + + diff --git a/src/config.rs b/src/config.rs index f840bb9..04fcaeb 100644 --- a/src/config.rs +++ b/src/config.rs @@ -178,7 +178,7 @@ fn default_transport_timeout() -> Duration { Duration::from_secs(60) } -// Graceful shutdown defaults. See docs/GRACEFUL_SHUTDOWN.md for the full +// Graceful shutdown defaults. See docs/decisions/graceful_shutdown.md for the full // lifecycle and timing rationale. // // pre_drain_grace_period: window during which /ready returns 503 before we diff --git a/src/endpoints/stream.rs b/src/endpoints/stream.rs index e035842..12bf8ae 100644 --- a/src/endpoints/stream.rs +++ b/src/endpoints/stream.rs @@ -16,11 +16,12 @@ use crate::{ hotblocks::{traceless_key, HotblocksHandle, StreamMode}, http_server::{forward_hotblocks_response, forward_response}, network::NetworkClient, + openapi::{ConflictResponse, StreamRequestBody}, types::{Compression, DatasetId, RequestError, StreamRequest}, utils::conversion::{join_gzip_default, recompress_gzip}, }; -/// [INTERNAL] Archival stream +/// Archival stream /// /// Returns only archived blocks (no hotblocks). Server-side limits apply to the query. #[utoipa::path( @@ -44,7 +45,8 @@ use crate::{ (status = 500, description = "Internal server error"), (status = 503, description = "Service temporarily unavailable"), ), - tag = "Archival stream" + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] pub(crate) async fn run_archival_stream_restricted( task_manager: Extension>, @@ -57,7 +59,7 @@ pub(crate) async fn run_archival_stream_restricted( run_archival_stream(task_manager, network, config, dataset_id, request).await } -/// [INTERNAL] Archival stream (debug) +/// Debug Archival stream /// /// Debug variant of /archival-stream without server-side query restrictions. #[utoipa::path( @@ -81,7 +83,8 @@ pub(crate) async fn run_archival_stream_restricted( (status = 500, description = "Internal server error"), (status = 503, description = "Service temporarily unavailable"), ), - tag = "Archival stream" + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] pub(crate) async fn run_archival_stream( Extension(task_manager): Extension>, @@ -142,18 +145,15 @@ pub(crate) async fn run_archival_stream( (status = 404, description = "Dataset not found"), (status = 409, description = "\ Parent block hash mismatch — the `parentHash` of the first requested block does not match \ -`query.parentBlockHash`. This typically indicates a chain reorganization relative to the \ -client's state.\n\n\ -The response body is a JSON object with a single `previousBlocks` array of `{number, hash}` \ -pairs from the current canonical chain. The array may have arbitrary length but is \ -guaranteed to contain at least the parent of the first requested block. Clients should \ -find the last known shared ancestor and re-request from there; if no shared block is \ -found, request earlier blocks until one is."), +`query.parentBlockHash`, typically a chain reorganization relative to the client's state. \ +The body (see schema below) lists recent canonical-chain blocks so the client can find a \ +shared ancestor and resume. See [How parentBlockHash works](#description/how-parentblockhash-works) \ +for the recovery procedure and a worked example.", body = ConflictResponse), (status = 429, description = "Rate limit exceeded"), (status = 500, description = "Internal server error"), (status = 503, description = "Service temporarily unavailable"), ), - tag = "Stream" + tag = "Streaming" )] pub(crate) async fn run_stream( Extension(task_manager): Extension>, @@ -200,7 +200,7 @@ pub(crate) async fn run_stream( (status = 500, description = "Internal server error"), (status = 503, description = "Service temporarily unavailable"), ), - tag = "Stream" + tag = "Streaming" )] pub(crate) async fn run_finalized_stream( Extension(task_manager): Extension>, diff --git a/src/http_server.rs b/src/http_server.rs index b3beb11..25309eb 100644 --- a/src/http_server.rs +++ b/src/http_server.rs @@ -24,9 +24,7 @@ use tower_http::decompression::RequestDecompressionLayer; use tower_http::request_id::{ MakeRequestUuid, PropagateRequestIdLayer, RequestId, SetRequestIdLayer, }; -use utoipa::OpenApi; use utoipa_scalar::{Scalar, Servable as _}; -use utoipa_swagger_ui::SwaggerUi; use crate::datasets::DatasetConfig; use crate::endpoints::{ @@ -36,7 +34,7 @@ use crate::endpoints::{ }, }; use crate::hotblocks::HotblocksErr; -use crate::openapi::ApiDoc; +use crate::openapi::{build_openapi_spec, serve_openapi_spec, BlockHead, StatusResponse}; use crate::types::api_types::AvailableDatasetApiResponse; use crate::types::Compression; use crate::utils::conversion::json_lines_to_json; @@ -55,58 +53,6 @@ use crate::sql; #[cfg(feature = "sql")] use axum::body; -/// Endpoints whose doc-comment summary starts with this marker are considered internal. -/// Mark a handler like: -/// -/// ```ignore -/// /// [INTERNAL] Get debug information for a specific block -/// ``` -/// -/// They are stripped from the served OpenAPI spec unless `show_internal` is true. -/// When shown, the marker is stripped from the summary for a clean UI. -const INTERNAL_MARKER: &str = "[INTERNAL]"; - -fn build_openapi_spec(show_internal: bool) -> utoipa::openapi::OpenApi { - let mut spec = ApiDoc::openapi(); - spec.paths.paths.retain(|_, item| { - item.operations.retain(|_, op| { - let internal = op - .summary - .as_deref() - .is_some_and(|s| s.trim_start().starts_with(INTERNAL_MARKER)); - if internal && !show_internal { - return false; - } - if internal { - if let Some(summary) = op.summary.as_mut() { - *summary = summary - .trim_start() - .strip_prefix(INTERNAL_MARKER) - .unwrap_or(summary) - .trim_start() - .to_string(); - } - } - true - }); - !item.operations.is_empty() - }); - - // Drop tag declarations that no surviving operation references — otherwise - // Scalar/Swagger render empty groups. - let used_tags: std::collections::HashSet = spec - .paths - .paths - .values() - .flat_map(|item| item.operations.values()) - .flat_map(|op| op.tags.iter().flatten().cloned()) - .collect(); - if let Some(tags) = spec.tags.as_mut() { - tags.retain(|t| used_tags.contains(&t.name)); - } - spec -} - #[allow(deprecated)] pub async fn run_server( task_manager: Arc, @@ -211,8 +157,12 @@ pub async fn run_server( ) .route("/metrics", get(get_metrics)) .route("/ready", get(get_readiness)) - .merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", openapi_spec.clone())) - .merge(Scalar::with_url("/docs", openapi_spec)); + .route("/api-docs/openapi.json", get(serve_openapi_spec)) + .merge( + Scalar::with_url("/docs", openapi_spec.clone()) + .custom_html(include_str!("../docs/openapi/scalar_template.html")), + ) + .layer(Extension(Arc::new(openapi_spec))); // SQL Query Engine #[cfg(feature = "sql")] @@ -251,7 +201,7 @@ pub async fn run_server( /// Races axum's graceful drain against a hard `drain_timeout` deadline. /// -/// See [`docs/GRACEFUL_SHUTDOWN.md`](../docs/GRACEFUL_SHUTDOWN.md) for the +/// See [`docs/decisions/graceful_shutdown.md`](../docs/decisions/graceful_shutdown.md) for the /// drain semantics — including what "force-close" actually does (and does /// not) to in-flight per-connection tasks. async fn drive_serve_with_drain( @@ -284,7 +234,7 @@ where Ok(()) } -/// [INTERNAL] Latest archival head +/// Latest archival head /// /// Returns the block number and hash of the highest archived block (no hotblocks). #[utoipa::path( @@ -297,7 +247,8 @@ where (status = 200, description = "Archival head block retrieved", body = Option), (status = 404, description = "Dataset has no archival data source"), ), - tag = "Archival stream" + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] async fn get_archival_head( Extension(network): Extension>, @@ -337,7 +288,7 @@ async fn get_archival_head( (status = 200, description = "Finalized head block retrieved", body = Option), (status = 404, description = "Dataset has no data sources"), ), - tag = "Stream" + tag = "Streaming" )] async fn get_finalized_head( Extension(hotblocks): Extension>, @@ -383,7 +334,7 @@ async fn get_finalized_head( (status = 200, description = "Head block retrieved", body = Option), (status = 404, description = "Dataset has no data sources"), ), - tag = "Stream" + tag = "Streaming" )] async fn get_head( Extension(hotblocks): Extension>, @@ -412,7 +363,7 @@ async fn get_head( .into_response() } -/// [INTERNAL] Portal Status +/// Portal Status /// /// Returns a JSON document with human-readable information about the portal's state. /// The exact format may change without notice. @@ -440,7 +391,8 @@ async fn get_head( "portal_version": "0.5.5" })), ), - tag = "Status" + tag = "Monitoring", + extensions(("x-internal" = json!(true))), )] async fn get_status(Extension(client): Extension>) -> impl IntoResponse { let response = crate::openapi::StatusResponse { @@ -499,7 +451,7 @@ async fn get_datasets( axum::Json(res) } -/// [INTERNAL] Dataset State +/// Dataset State /// /// Returns the current state of the dataset. #[utoipa::path( @@ -512,7 +464,8 @@ async fn get_datasets( (status = 200, description = "Dataset state retrieved successfully", body = serde_json::Value), (status = 404, description = "Dataset not found"), ), - tag = "Datasets" + tag = "Datasets", + extensions(("x-internal" = json!(true))), )] async fn get_dataset_state( dataset_id: DatasetId, @@ -558,7 +511,7 @@ async fn get_dataset_metadata( axum::Json(resp.with_fields(&query.expand)) } -/// [INTERNAL] Block Debug Info +/// Block Debug Info /// /// Returns worker information for the given dataset and block. #[utoipa::path( @@ -572,7 +525,8 @@ async fn get_dataset_metadata( (status = 200, description = "Debug information retrieved", body = serde_json::Value), (status = 404, description = "Dataset or block not found"), ), - tag = "Debug" + tag = "Debug", + extensions(("x-internal" = json!(true))), )] async fn get_debug_block( Path((_dataset, block)): Path<(String, u64)>, @@ -584,7 +538,7 @@ async fn get_debug_block( })) } -/// [INTERNAL] Worker Inventory +/// Worker Inventory /// /// Returns information about all workers currently visible to the portal. #[utoipa::path( @@ -593,7 +547,8 @@ async fn get_debug_block( responses( (status = 200, description = "All worker information retrieved", body = serde_json::Value), ), - tag = "Debug" + tag = "Debug", + extensions(("x-internal" = json!(true))), )] async fn get_all_workers( Extension(client): Extension>, @@ -603,7 +558,7 @@ async fn get_all_workers( })) } -/// [INTERNAL] Prometheus Metrics +/// Prometheus Metrics /// /// Returns portal metrics in OpenMetrics text format. #[utoipa::path( @@ -612,7 +567,8 @@ async fn get_all_workers( responses( (status = 200, description = "Metrics in OpenMetrics format", content_type = "application/openmetrics-text"), ), - tag = "Status" + tag = "Monitoring", + extensions(("x-internal" = json!(true))), )] async fn get_metrics(Extension(registry): Extension>) -> impl IntoResponse { lazy_static::lazy_static! { @@ -634,7 +590,7 @@ async fn get_metrics(Extension(registry): Extension>) -> impl Into (HEADERS.clone(), buffer) } -/// [INTERNAL] Readiness Probe +/// Readiness Probe /// /// Returns 200 once the portal is ready to serve requests; 503 otherwise (e.g. during shutdown). #[utoipa::path( @@ -644,7 +600,8 @@ async fn get_metrics(Extension(registry): Extension>) -> impl Into (status = 200, description = "Portal is ready"), (status = 503, description = "Portal is not ready"), ), - tag = "Status" + tag = "Monitoring", + extensions(("x-internal" = json!(true))), )] async fn get_readiness( Extension(client): Extension>, @@ -659,7 +616,7 @@ async fn get_readiness( (StatusCode::SERVICE_UNAVAILABLE, "Not ready").into_response() } -/// [INTERNAL] Dataset Height (deprecated) +/// Dataset Height /// /// Returns the current height of the dataset. Kept for backward compatibility. #[utoipa::path( @@ -672,7 +629,8 @@ async fn get_readiness( (status = 200, description = "Height retrieved successfully", body = String), (status = 404, description = "Dataset not found"), ), - tag = "Stream", + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] #[deprecated] async fn get_height( @@ -683,7 +641,7 @@ async fn get_height( height_response(&network, &dataset, &dataset_id) } -/// [INTERNAL] Finalized Stream Height (deprecated) +/// Finalized Stream Height /// /// Same as /datasets/{dataset}/height. Kept for backward compatibility. #[utoipa::path( @@ -696,7 +654,8 @@ async fn get_height( (status = 200, description = "Height retrieved successfully", body = String), (status = 404, description = "Dataset not found"), ), - tag = "Stream", + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] #[deprecated] async fn get_finalized_stream_height( @@ -707,7 +666,7 @@ async fn get_finalized_stream_height( height_response(&network, &dataset, &dataset_id) } -/// [INTERNAL] Archival Stream Height (deprecated) +/// Archival Stream Height /// /// Same as /datasets/{dataset}/height. Kept for backward compatibility. #[utoipa::path( @@ -720,7 +679,8 @@ async fn get_finalized_stream_height( (status = 200, description = "Height retrieved successfully", body = String), (status = 404, description = "Dataset not found"), ), - tag = "Archival stream", + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] #[deprecated] async fn get_archival_stream_height( @@ -745,7 +705,7 @@ fn height_response( } } -/// [INTERNAL] Get worker information for a block range (deprecated) +/// Worker Info /// /// Returns worker information for the given dataset and block range. This endpoint is deprecated. #[utoipa::path( @@ -762,6 +722,7 @@ fn height_response( (status = 503, description = "No available workers"), ), tag = "Debug", + extensions(("x-internal" = json!(true))), )] #[deprecated] async fn get_worker( @@ -800,7 +761,7 @@ async fn get_worker( .into_response() } -/// [INTERNAL] Worker Query (deprecated) +/// Worker Query /// /// Sends a data query to a specific worker in the network. Deprecated in favor of /stream. #[utoipa::path( @@ -817,7 +778,8 @@ async fn get_worker( (status = 404, description = "Dataset or worker not found"), (status = 503, description = "Service unavailable"), ), - tag = "Stream", + tag = "Streaming", + extensions(("x-internal" = json!(true))), )] #[deprecated] async fn execute_query( @@ -1142,54 +1104,6 @@ mod tests { use super::*; use std::time::Duration; - #[test] - fn hide_internal_drops_marked_ops_and_empty_tags() { - let hidden = build_openapi_spec(false); - let summaries: Vec<&str> = hidden - .paths - .paths - .values() - .flat_map(|i| i.operations.values()) - .filter_map(|o| o.summary.as_deref()) - .collect(); - assert!( - summaries.iter().all(|s| !s.contains(INTERNAL_MARKER)), - "no surviving op should carry the marker: {summaries:?}" - ); - - let tag_names: Vec<&str> = hidden - .tags - .as_deref() - .unwrap_or(&[]) - .iter() - .map(|t| t.name.as_str()) - .collect(); - for empty_tag in ["Status", "Debug", "Archival stream"] { - assert!( - !tag_names.contains(&empty_tag), - "tag {empty_tag} should be pruned (all its operations are internal); got {tag_names:?}" - ); - } - } - - #[test] - fn show_internal_keeps_all_and_strips_marker() { - let shown = build_openapi_spec(true); - let full = ApiDoc::openapi(); - assert_eq!(shown.paths.paths.len(), full.paths.paths.len()); - let summaries: Vec<&str> = shown - .paths - .paths - .values() - .flat_map(|i| i.operations.values()) - .filter_map(|o| o.summary.as_deref()) - .collect(); - assert!( - summaries.iter().all(|s| !s.contains(INTERNAL_MARKER)), - "marker should be stripped from summaries when shown: {summaries:?}" - ); - } - /// End-to-end: real TCP listener + axum router + reqwest client. Verifies that /// flipping the AtomicBool that `watch_shutdown_signal` flips on SIGTERM actually /// changes the response observed by an HTTP client. The handler mirrors diff --git a/src/main.rs b/src/main.rs index c2305ea..9081132 100644 --- a/src/main.rs +++ b/src/main.rs @@ -36,8 +36,9 @@ pub struct Cli { #[arg(long, env, hide(true))] pub log_span_durations: bool, - /// Show endpoints marked internal (tag prefixed with `_internal_`) in the OpenAPI docs. - /// By default, internal endpoints are stripped from /docs, /swagger-ui and /api-docs/openapi.json. + /// Show endpoints marked internal (operations carrying the `x-internal: true` + /// OpenAPI extension) in the docs. By default such endpoints are stripped from + /// `/docs` and `/api-docs/openapi.json`. #[arg(long, env = "SHOW_INTERNAL_DOCS")] pub show_internal_docs: bool, } @@ -182,7 +183,7 @@ async fn main() -> anyhow::Result<()> { /// Awaits SIGTERM and runs the two-phase shutdown sequence. /// -/// See [`docs/GRACEFUL_SHUTDOWN.md`](../docs/GRACEFUL_SHUTDOWN.md) for the +/// See [`docs/decisions/graceful_shutdown.md`](../docs/decisions/graceful_shutdown.md) for the /// full lifecycle, timing rationale, and non-goals. async fn watch_shutdown_signal( mut sigterm: tokio::signal::unix::Signal, diff --git a/src/openapi.rs b/src/openapi.rs index 1a9408a..988b65b 100644 --- a/src/openapi.rs +++ b/src/openapi.rs @@ -1,5 +1,8 @@ #![allow(dead_code)] +use std::sync::Arc; + +use axum::{Extension, Json}; use serde::{Deserialize, Serialize}; use utoipa::{OpenApi, ToSchema}; @@ -22,6 +25,22 @@ pub struct BlockHead { pub hash: String, } +/// Body of a `409 Conflict` on a stream request — a slice of the **current +/// canonical chain** at and below the conflict point, most recent first. +#[derive(Serialize, Deserialize, Clone, Debug, ToSchema)] +#[serde(rename_all = "camelCase")] +#[schema(example = json!({ + "previousBlocks": [ + { "number": 21780872, "hash": "0xf6a96a29..." }, + { "number": 21780871, "hash": "0xab12cd..." } + ] +}))] +pub struct ConflictResponse { + /// `{ number, hash }` pairs from the canonical chain, descending from the conflict + /// point. Guaranteed to contain at least the parent of the requested `fromBlock`. + pub previous_blocks: Vec, +} + /// Block number response for timestamp query #[derive(Serialize, Deserialize, Clone, Debug, ToSchema)] pub struct BlockNumberResponse { @@ -85,20 +104,19 @@ pub struct DatasetStateResponse { crate::http_server::get_dataset_metadata, crate::http_server::get_dataset_state, crate::endpoints::block_number_by_timestamp::get_blocknumber_by_timestamp, - // Stream — real-time pair, then finalized pair (POST stream before GET head). - // Deprecated endpoints appended at the end of the group. + // Stream — real-time pair, then finalized pair (POST stream before GET head), + // then archival variants (internal). Deprecated endpoints appended at the end. crate::endpoints::stream::run_stream, crate::http_server::get_head, crate::endpoints::stream::run_finalized_stream, crate::http_server::get_finalized_head, - crate::http_server::get_finalized_stream_height, - crate::http_server::execute_query, - // Archival stream (internal) crate::endpoints::stream::run_archival_stream_restricted, crate::endpoints::stream::run_archival_stream, crate::http_server::get_archival_head, + crate::http_server::get_finalized_stream_height, crate::http_server::get_archival_stream_height, - // Status (internal) + crate::http_server::execute_query, + // Monitoring (internal) crate::http_server::get_status, crate::http_server::get_readiness, crate::http_server::get_metrics, @@ -115,6 +133,7 @@ pub struct DatasetStateResponse { Status, AvailableDatasetApiResponse, BlockHead, + ConflictResponse, BlockNumberResponse, ErrorResponse, MetadataQueryParams, @@ -126,18 +145,212 @@ pub struct DatasetStateResponse { ), info( title = "SQD Portal API", - description = "API for querying and streaming blockchain data from the SQD network", + description = concat!( + include_str!("../docs/openapi/01-introduction.md"), + "\n\n", + include_str!("../docs/openapi/02-blockchain-forks.md"), + "\n\n", + include_str!("../docs/openapi/03-getting-started.md"), + ), version = "0.10.0", ), servers( (url = "http://localhost:8000", description = "Local development server"), ), tags( - (name = "Status", description = "Portal and dataset status operations"), (name = "Datasets", description = "Dataset information and metadata"), - (name = "Stream", description = "Stream blocks and query head — real-time and finalized"), - (name = "Archival stream", description = "Archival data only: stable historical blocks"), + (name = "Streaming", description = "Stream blocks and query head — real-time, finalized, and archival"), + (name = "Monitoring", description = "Portal status, readiness and metrics"), (name = "Debug", description = "Debug operations"), ), )] pub struct ApiDoc; + +/// To mark an API operation as internal — so it is pruned from the served +/// OpenAPI spec unless `show_internal` is true — add this extension on its +/// handler: +/// +/// ```ignore +/// #[utoipa::path( +/// ..., +/// extensions(("x-internal" = json!(true))), +/// )] +/// ``` +/// +/// [`build_openapi_spec`] reads this key to decide which operations to drop. +const INTERNAL_EXT_KEY: &str = "x-internal"; + +/// Builds the served OpenAPI spec from [`ApiDoc`], optionally pruning operations +/// (and now-empty tags) marked internal via the `x-internal` extension. +pub fn build_openapi_spec(show_internal: bool) -> utoipa::openapi::OpenApi { + let mut spec = ApiDoc::openapi(); + let is_internal = |op: &utoipa::openapi::path::Operation| { + op.extensions + .as_ref() + .and_then(|e| e.get(INTERNAL_EXT_KEY)) + .and_then(|v| v.as_bool()) + .unwrap_or(false) + }; + + // Tags that no operation references in the *original* spec are "doc-only" + // (e.g. "Forks") — they carry markdown content for the sidebar. We must + // never prune them, regardless of operation visibility. + let original_op_tags: std::collections::HashSet = spec + .paths + .paths + .values() + .flat_map(|item| { + [ + &item.get, + &item.put, + &item.post, + &item.delete, + &item.options, + &item.head, + &item.patch, + &item.trace, + ] + }) + .filter_map(|slot| slot.as_ref()) + .flat_map(|op| op.tags.iter().flatten().cloned()) + .collect(); + + spec.paths.paths.retain(|_, item| { + for slot in [ + &mut item.get, + &mut item.put, + &mut item.post, + &mut item.delete, + &mut item.options, + &mut item.head, + &mut item.patch, + &mut item.trace, + ] { + if let Some(op) = slot.as_mut() { + if is_internal(op) { + if show_internal { + // Scalar (and some other renderers) treat `x-internal: true` + // as a hint to hide the operation client-side, even if it's + // present in the served spec. Strip the marker so the + // operation actually renders in /docs. + if let Some(ext) = op.extensions.as_mut() { + ext.remove(INTERNAL_EXT_KEY); + } + } else { + *slot = None; + } + } + } + } + item.get.is_some() + || item.put.is_some() + || item.post.is_some() + || item.delete.is_some() + || item.options.is_some() + || item.head.is_some() + || item.patch.is_some() + || item.trace.is_some() + }); + + // Drop tag declarations that no surviving operation references — otherwise + // Scalar/Swagger render empty groups. + let used_tags: std::collections::HashSet = spec + .paths + .paths + .values() + .flat_map(|item| { + [ + &item.get, + &item.put, + &item.post, + &item.delete, + &item.options, + &item.head, + &item.patch, + &item.trace, + ] + }) + .filter_map(|slot| slot.as_ref()) + .flat_map(|op| op.tags.iter().flatten().cloned()) + .collect(); + if let Some(tags) = spec.tags.as_mut() { + tags.retain(|t| { + // Keep tags that still have operations OR tags that were never bound + // to operations in the first place (doc-only sections). + used_tags.contains(&t.name) || !original_op_tags.contains(&t.name) + }); + } + spec +} + +/// Axum handler that serves the pre-built OpenAPI spec injected as an extension. +pub async fn serve_openapi_spec( + Extension(spec): Extension>, +) -> Json { + Json((*spec).clone()) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn all_ops(spec: &utoipa::openapi::OpenApi) -> Vec<&utoipa::openapi::path::Operation> { + spec.paths + .paths + .values() + .flat_map(|i| { + [ + &i.get, &i.put, &i.post, &i.delete, &i.options, &i.head, &i.patch, &i.trace, + ] + }) + .filter_map(|slot| slot.as_ref()) + .collect() + } + + fn has_internal_ext(op: &utoipa::openapi::path::Operation) -> bool { + op.extensions + .as_ref() + .and_then(|e| e.get(INTERNAL_EXT_KEY)) + .and_then(|v| v.as_bool()) + .unwrap_or(false) + } + + #[test] + fn hide_internal_drops_marked_ops_and_empty_tags() { + let hidden = build_openapi_spec(false); + let ops = all_ops(&hidden); + assert!( + ops.iter().all(|op| !has_internal_ext(op)), + "no surviving op should carry x-internal" + ); + + let tag_names: Vec<&str> = hidden + .tags + .as_deref() + .unwrap_or(&[]) + .iter() + .map(|t| t.name.as_str()) + .collect(); + for empty_tag in ["Monitoring", "Debug"] { + assert!( + !tag_names.contains(&empty_tag), + "tag {empty_tag} should be pruned (all its operations are internal); got {tag_names:?}" + ); + } + } + + #[test] + fn show_internal_keeps_all_ops() { + let shown = build_openapi_spec(true); + let full = ApiDoc::openapi(); + assert_eq!(shown.paths.paths.len(), full.paths.paths.len()); + let ops = all_ops(&shown); + assert!(!ops.is_empty(), "should have at least some operations"); + // Scalar hides operations carrying `x-internal: true` client-side, so the + // marker must be stripped when we want them rendered. + assert!( + ops.iter().all(|op| !has_internal_ext(op)), + "no surviving op should still carry x-internal when show_internal=true" + ); + } +}