diff --git a/linera-execution/src/execution_state_actor.rs b/linera-execution/src/execution_state_actor.rs index 57f1acf5b9bc..f3d9584dd028 100644 --- a/linera-execution/src/execution_state_actor.rs +++ b/linera-execution/src/execution_state_actor.rs @@ -23,7 +23,7 @@ use linera_base::{ OwnerSpender, StreamId, }, ownership::ChainOwnership, - time::Instant, + time::{Duration, Instant}, }; use linera_views::{batch::Batch, context::Context, views::View}; use oneshot::Sender; @@ -728,6 +728,13 @@ where let state = &mut self.state; let local_time = self.txn_tracker.local_time(); let created_blobs = self.txn_tracker.created_blobs().clone(); + // Measured only when the service actually runs (validation mode). When the + // oracle response is replayed from the certificate the closure is skipped, so + // this stays zero. Reporting it back to the runtime — rather than letting the + // runtime time the whole round-trip — keeps the tracked execution time off the + // wall clock during replay, which is what makes block re-execution deterministic + // across validators (see `run_service_oracle_query`). + let mut execution_time = Duration::ZERO; let bytes = self .txn_tracker .oracle(|| async { @@ -736,6 +743,7 @@ where next_block_height, local_time, }; + let execution_start = Instant::now(); let QueryOutcome { response, operations, @@ -747,6 +755,7 @@ where created_blobs, )) .await?; + execution_time = execution_start.elapsed(); ensure!( operations.is_empty(), ExecutionError::ServiceOracleQueryOperations(operations) @@ -755,7 +764,7 @@ where }) .await? .to_service_response()?; - callback.respond(bytes); + callback.respond((bytes, execution_time)); } AddOutgoingMessage { message, callback } => { @@ -1587,7 +1596,7 @@ pub enum ExecutionRequest { next_block_height: BlockHeight, query: Vec, #[debug(skip)] - callback: Sender>, + callback: Sender<(Vec, Duration)>, }, AddOutgoingMessage { diff --git a/linera-execution/src/runtime.rs b/linera-execution/src/runtime.rs index 055092a36804..24a2afa5bea1 100644 --- a/linera-execution/src/runtime.rs +++ b/linera-execution/src/runtime.rs @@ -492,9 +492,13 @@ impl SyncRuntimeInternal { let timeout = self .resource_controller .remaining_service_oracle_execution_time()?; - let execution_start = Instant::now(); - let deadline = Some(execution_start + timeout); - let response = self + let deadline = Some(Instant::now() + timeout); + // `execution_time` is measured by the actor around the service execution itself and is + // zero when the response is replayed from the certificate. Tracking that value — instead + // of timing the whole request round-trip here — means a replaying validator accumulates + // no wall-clock execution time, so it cannot diverge from the proposer by crossing + // `maximum_service_oracle_execution_ms` only on some hosts. + let (response, execution_time) = self .execution_state_sender .send_request(|callback| ExecutionRequest::QueryServiceOracle { deadline, @@ -506,7 +510,7 @@ impl SyncRuntimeInternal { .recv_response()?; self.resource_controller - .track_service_oracle_execution(execution_start.elapsed())?; + .track_service_oracle_execution(execution_time)?; self.resource_controller .track_service_oracle_response(response.len())?; diff --git a/linera-execution/tests/contract_runtime_apis.rs b/linera-execution/tests/contract_runtime_apis.rs index e65adf5d4a7d..b010786e0cfd 100644 --- a/linera-execution/tests/contract_runtime_apis.rs +++ b/linera-execution/tests/contract_runtime_apis.rs @@ -17,6 +17,7 @@ use linera_base::{ http, identifiers::{Account, AccountOwner, ApplicationId, DataBlobHash, ModuleId}, ownership::ChainOwnership, + time::Duration, vm::VmRuntime, }; use linera_execution::{ @@ -843,6 +844,85 @@ async fn test_query_service(authorized_apps: Option>) -> Result<(), Exec Ok(()) } +/// Replaying a service-oracle response must not accumulate any execution time. +/// +/// When the response is replayed from the certificate the service is not actually run, so +/// timing it would record wall-clock time that differs between validators. If that time were +/// tracked it could cross `maximum_service_oracle_execution_ms` on a slow host but not a fast +/// one, making the two validators compute a different block outcome (`CorruptedChainState`). +/// Regression test: after a replayed `query_service`, the tracked service-oracle execution +/// time must be exactly zero. +#[test_log::test(tokio::test)] +async fn test_query_service_does_not_track_execution_time_on_replay() -> Result<(), ExecutionError> +{ + let description = dummy_chain_description(0); + let chain_id = description.id(); + let mut view = SystemExecutionState { + ownership: ChainOwnership::default(), + balance: Amount::ONE, + balances: BTreeMap::new(), + ..SystemExecutionState::new(description) + } + .into_view() + .await; + + let contract_blob = TransferTestEndpoint::sender_application_contract_blob(); + let service_blob = TransferTestEndpoint::sender_application_service_blob(); + let contract_blob_id = contract_blob.id(); + let service_blob_id = service_blob.id(); + + let application_description = TransferTestEndpoint::sender_application_description(); + let application_description_blob = Blob::new_application_description(&application_description); + let app_desc_blob_id = application_description_blob.id(); + + let (application_id, application) = view + .register_mock_application_with(application_description, contract_blob, service_blob) + .await + .expect("should register mock application"); + + view.system + .application_permissions + .set(ApplicationPermissions { + call_service_as_oracle: None, + ..ApplicationPermissions::new_single(application_id) + }); + + application.expect_call(ExpectedCall::execute_operation( + move |runtime, _operation| { + runtime.query_service(application_id, vec![])?; + Ok(vec![]) + }, + )); + application.expect_call(ExpectedCall::default_finalize()); + + let context = create_dummy_operation_context(chain_id); + let mut controller = ResourceController::default(); + let operation = Operation::User { + application_id, + bytes: vec![], + }; + + // Replay mode: the `OracleResponse::Service` is taken from the recorded responses, so the + // service itself is never executed. + let mut txn_tracker = TransactionTracker::new_replaying(vec![ + OracleResponse::Blob(app_desc_blob_id), + OracleResponse::Blob(contract_blob_id), + OracleResponse::Blob(service_blob_id), + OracleResponse::Service(vec![]), + ]); + ExecutionStateActor::new(&mut view, &mut txn_tracker, &mut controller) + .execute_operation(context, operation) + .await?; + + assert_eq!( + controller.tracker.service_oracle_execution, + Duration::ZERO, + "a replayed service-oracle call must not accumulate wall-clock execution time" + ); + + Ok(()) +} + /// Tests the contract system API to make HTTP requests. #[test_case(None => matches Ok(_); "when all authorized")] #[test_case(Some(vec![()]) => matches Ok(_); "when single app authorized")]