diff --git a/linera-chain/src/data_types/mod.rs b/linera-chain/src/data_types/mod.rs index e104cfb22747..166d44325768 100644 --- a/linera-chain/src/data_types/mod.rs +++ b/linera-chain/src/data_types/mod.rs @@ -27,7 +27,7 @@ use linera_base::{ }; use linera_execution::{committee::Committee, Message, MessageKind, Operation, OutgoingMessage}; use serde::{Deserialize, Serialize}; -use tracing::instrument; +use tracing::{info, instrument}; use crate::{ block::{Block, ValidatedBlock}, @@ -308,6 +308,10 @@ impl IncomingBundle { if self.bundle.is_skippable() { return None; } else if !self.bundle.is_protected() { + info!( + origin = %self.origin, + "Rejecting incoming message bundle due to the message policy" + ); self.action = MessageAction::Reject; } } diff --git a/linera-core/src/chain_worker/state.rs b/linera-core/src/chain_worker/state.rs index 5535f7937ec6..350ff4e71c72 100644 --- a/linera-core/src/chain_worker/state.rs +++ b/linera-core/src/chain_worker/state.rs @@ -63,9 +63,9 @@ mod metrics { use linera_base::prometheus_util::{ exponential_bucket_interval, exponential_bucket_latencies, register_histogram, - register_histogram_vec, + register_histogram_vec, register_int_counter, register_int_counter_vec, }; - use prometheus::{Histogram, HistogramVec}; + use prometheus::{Histogram, HistogramVec, IntCounter, IntCounterVec}; pub static CREATE_NETWORK_ACTIONS_LATENCY: LazyLock = LazyLock::new(|| { register_histogram( @@ -83,6 +83,21 @@ mod metrics { exponential_bucket_interval(1.0, 10_000.0), ) }); + + pub static BLOCK_PROPOSALS_RECEIVED_TOTAL: LazyLock = LazyLock::new(|| { + register_int_counter( + "block_proposals_received_total", + "Total number of block proposals received by the worker", + ) + }); + + pub static BLOCK_PROPOSALS_REJECTED_TOTAL: LazyLock = LazyLock::new(|| { + register_int_counter_vec( + "block_proposals_rejected_total", + "Total number of block proposals rejected by the worker, labelled by error type", + &["error_type"], + ) + }); } /// The state of the chain worker. @@ -2109,10 +2124,20 @@ where &mut self, proposal: BlockProposal, ) -> (Result, NetworkActions) { + #[cfg(with_metrics)] + metrics::BLOCK_PROPOSALS_RECEIVED_TOTAL.inc(); + let chain_id = proposal.content.block.chain_id; + let height = proposal.content.block.height; let old_round = self.chain.manager.current_round(); match self.try_handle_block_proposal(proposal).await { Ok((response, actions)) => (Ok(response), actions), Err(err) => { + let error_type = err.error_type(); + #[cfg(with_metrics)] + metrics::BLOCK_PROPOSALS_REJECTED_TOTAL + .with_label_values(&[error_type.as_str()]) + .inc(); + debug!(%chain_id, %height, %error_type, "Block proposal rejected"); // Even on error, the manager's `current_round` may have advanced // (the `HasIncompatibleConfirmedVote` recovery path calls // `update_signed_proposal`). Surface the resulting `NewRound` @@ -2383,7 +2408,8 @@ where metrics::NUM_INBOXES .with_label_values(&[]) .observe(origins_and_inboxes.len() as f64); - let action = if *self.chain.execution_state.system.closed.get() { + let is_closed = *self.chain.execution_state.system.closed.get(); + let action = if is_closed { MessageAction::Reject } else { MessageAction::Accept @@ -2397,6 +2423,13 @@ where }); } } + if is_closed && !bundles.is_empty() { + info!( + chain_id = %self.chain.chain_id(), + count = bundles.len(), + "Auto-rejecting all incoming message bundles because the chain is closed" + ); + } info.requested_pending_message_bundles = bundles; } let hashes = self diff --git a/linera-core/src/client/chain_client/mod.rs b/linera-core/src/client/chain_client/mod.rs index 7b1f468ed219..9beb07f7282a 100644 --- a/linera-core/src/client/chain_client/mod.rs +++ b/linera-core/src/client/chain_client/mod.rs @@ -247,7 +247,7 @@ impl Clone for ChainClient { } /// Error type for [`ChainClient`]. -#[derive(Debug, Error)] +#[derive(Debug, Error, strum::IntoStaticStr)] #[allow(missing_docs)] pub enum Error { #[error("Local node operation failed: {0}")] @@ -369,6 +369,20 @@ impl Error { pub fn signer_failure(err: impl signer::Error + 'static) -> Self { Self::Signer(Box::new(err)) } + + /// Returns the qualified error variant name for the `error_type` metric label, + /// delegating to the wrapped error's `error_type()` so the underlying worker or + /// chain error name is surfaced rather than just the outer variant. + pub fn error_type(&self) -> String { + match self { + Error::LocalNodeError(local_node_error) => local_node_error.error_type(), + Error::ChainError(chain_error) => chain_error.error_type(), + other => { + let variant: &'static str = other.into(); + format!("ChainClientError::{variant}") + } + } + } } impl ChainClient { @@ -1458,6 +1472,23 @@ impl ChainClient { #[cfg(with_metrics)] let _latency = super::metrics::EXECUTE_BLOCK_LATENCY.measure_latency(); + let result = self.try_execute_block(operations, blobs).await; + if let Err(error) = &result { + let error_type = error.error_type(); + #[cfg(with_metrics)] + super::metrics::BLOCK_STAGING_FAILURES_TOTAL + .with_label_values(&[error_type.as_str()]) + .inc(); + info!(chain_id = %self.chain_id, %error_type, "Block staging failed"); + } + result + } + + async fn try_execute_block( + &self, + operations: Vec, + blobs: Vec, + ) -> Result, Error> { let mutex = self.proposal_mutex(); let lock_start = linera_base::time::Instant::now(); let mut proposal_guard = mutex.lock_owned().await; @@ -3539,3 +3570,24 @@ impl ChainClient { .unwrap(); } } + +#[cfg(test)] +mod tests { + use super::{Error, LocalNodeError}; + + #[test] + fn error_type_delegates_to_local_node_error() { + assert_eq!( + Error::LocalNodeError(LocalNodeError::InvalidChainInfoResponse).error_type(), + "LocalNodeError::InvalidChainInfoResponse" + ); + } + + #[test] + fn error_type_falls_back_to_chain_client_variant() { + assert_eq!( + Error::WalletSynchronizationError.error_type(), + "ChainClientError::WalletSynchronizationError" + ); + } +} diff --git a/linera-core/src/client/mod.rs b/linera-core/src/client/mod.rs index 468964e11ab7..13c28c80289d 100644 --- a/linera-core/src/client/mod.rs +++ b/linera-core/src/client/mod.rs @@ -77,8 +77,10 @@ mod validator_trackers; mod metrics { use std::sync::LazyLock; - use linera_base::prometheus_util::{exponential_bucket_latencies, register_histogram_vec}; - use prometheus::HistogramVec; + use linera_base::prometheus_util::{ + exponential_bucket_latencies, register_histogram_vec, register_int_counter_vec, + }; + use prometheus::{HistogramVec, IntCounterVec}; pub static PROCESS_INBOX_WITHOUT_PREPARE_LATENCY: LazyLock = LazyLock::new(|| { @@ -125,6 +127,14 @@ mod metrics { exponential_bucket_latencies(10_000.0), ) }); + + pub static BLOCK_STAGING_FAILURES_TOTAL: LazyLock = LazyLock::new(|| { + register_int_counter_vec( + "block_staging_failures_total", + "Total number of client block staging (execute_block) failures, labelled by error type", + &["error_type"], + ) + }); } /// Default number of certificates to download in a single batch. diff --git a/linera-core/src/local_node.rs b/linera-core/src/local_node.rs index 9d870bd2d92e..d77412a48bdd 100644 --- a/linera-core/src/local_node.rs +++ b/linera-core/src/local_node.rs @@ -48,7 +48,7 @@ where } /// Error type for the operations on a local node. -#[derive(Debug, Error)] +#[derive(Debug, Error, strum::IntoStaticStr)] #[allow(missing_docs)] pub enum LocalNodeError { #[error(transparent)] @@ -73,6 +73,20 @@ pub enum LocalNodeError { EventsNotFound(Vec), } +impl LocalNodeError { + /// Returns the qualified error variant name for the `error_type` metric label, + /// delegating to [`WorkerError::error_type`] for wrapped worker errors. + pub fn error_type(&self) -> String { + match self { + LocalNodeError::WorkerError(worker_error) => worker_error.error_type(), + other => { + let variant: &'static str = other.into(); + format!("LocalNodeError::{variant}") + } + } + } +} + impl From for LocalNodeError { fn from(error: WorkerError) -> Self { match error { @@ -480,3 +494,24 @@ where Ok(self.node.state.get_manager_seed(chain_id).await?) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn error_type_delegates_to_worker_error() { + assert_eq!( + LocalNodeError::WorkerError(WorkerError::InvalidOwner).error_type(), + "WorkerError::InvalidOwner" + ); + } + + #[test] + fn error_type_falls_back_to_local_node_variant() { + assert_eq!( + LocalNodeError::InvalidChainInfoResponse.error_type(), + "LocalNodeError::InvalidChainInfoResponse" + ); + } +}