From 820f6ee8a27b916d3c2a40b5092b537812a767c5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:14:25 +0000 Subject: [PATCH 01/24] Update Rust crate serde_json to 1.0.111 (#4769) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d9d85bdc79..65e58d39fd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7441,9 +7441,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.108" +version = "1.0.111" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" +checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index f08a1691ca..ba5bdb010b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -322,7 +322,7 @@ semver = { version = "1.0.21", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.108" +serde_json = "1.0.111" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b16310d478..cff10b60ce 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -88,7 +88,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -191,7 +191,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.108", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 150954297f839205b2838f169b926ce0893b4583 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 08:44:46 +0000 Subject: [PATCH 02/24] Update Rust crate trybuild to 1.0.88 (#4773) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 65e58d39fd..3db966876d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -417,9 +417,9 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "basic-toml" -version = "0.1.4" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7bfc506e7a2370ec239e1d072507b2a80c833083699d3c6fa176fbb4de8448c6" +checksum = "2db21524cad41c5591204d22d75e1970a2d1f71060214ca931dc7d5afe2c14e5" dependencies = [ "serde", ] @@ -9087,9 +9087,9 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "trybuild" -version = "1.0.85" +version = "1.0.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "196a58260a906cedb9bf6d8034b6379d0c11f552416960452f267402ceeddff1" +checksum = "76de4f783e610194f6c98bfd53f9fc52bb2e0d02c947621e8a0f4ecc799b2880" dependencies = [ "basic-toml", "glob", diff --git a/Cargo.toml b/Cargo.toml index ba5bdb010b..8553a7244b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -384,7 +384,7 @@ trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" trust-dns-server = "0.22" -trybuild = "1.0.85" +trybuild = "1.0.88" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } unicode-width = "0.1.11" From 9fe8a3cf614a3430365e9205bb24ae0940e5d959 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 03:05:59 -0800 Subject: [PATCH 03/24] [tests] time out tests in test_all after a duration (#4780) We're seeing some tests in `test_all` hang forever (#4779). Set a reasonable upper bound on test duration. This will also cause stdout and stderr for failing tests to be printed. Doing so on SIGTERM in general is tracked at https://github.com/nextest-rs/nextest/issues/1208. Also, bump up the required nextest version to 0.9.64 to make use of the `binary_id` predicate. --- .config/nextest.toml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 4f927d2396..136a21a236 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.59", recommended = "0.9.64" } +nextest-version = { required = "0.9.64", recommended = "0.9.64" } experimental = ["setup-scripts"] @@ -32,3 +32,9 @@ clickhouse-cluster = { max-threads = 1 } [[profile.default.overrides]] filter = 'package(oximeter-db) and test(replicated)' test-group = 'clickhouse-cluster' + +[[profile.ci.overrides]] +filter = 'binary_id(omicron-nexus::test_all)' +# As of 2023-01-08, the slowest test in test_all takes 196s on a Ryzen 7950X. +# 900s is a good upper limit that adds a comfortable buffer. +slow-timeout = { period = '60s', terminate-after = 15 } From a6245c414fd5d49c669379db44baf028c14eaa66 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Jan 2024 11:54:35 +0000 Subject: [PATCH 04/24] Update taiki-e/install-action digest to 2f4c386 (#4784) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`c63cad0` -> `2f4c386`](https://togithub.com/taiki-e/install-action/compare/c63cad0...2f4c386) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index f30775f819..f758bd79b9 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@c63cad0540fb5357c70e2481e3da40d7649add24 # v2 + uses: taiki-e/install-action@2f4c386a81aeab009d470320dfc6e0930ee4e064 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From e4522e52a720cc4721600a03cf924c3bf043a883 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 9 Jan 2024 14:35:59 -0800 Subject: [PATCH 05/24] [schema][test] Add a data migration validation test (#4783) Adds a schema test with "before" / "after" hooks, and adds an example specifically for the "23.0.0" migration. My intent is that this can be used for any other schema migrations that would like to execute arbitrary SQL checks against the new schema too. Fixes https://github.com/oxidecomputer/omicron/issues/4747 --- nexus/tests/integration_tests/schema.rs | 280 ++++++++++++++++++++++-- 1 file changed, 265 insertions(+), 15 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 6feafe415d..21ed99e010 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -5,6 +5,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; +use futures::future::BoxFuture; use nexus_db_model::schema::SCHEMA_VERSION as LATEST_SCHEMA_VERSION; use nexus_db_queries::db::datastore::{ all_sql_for_version_migration, EARLIEST_SUPPORTED_VERSION, @@ -14,7 +15,7 @@ use omicron_common::api::external::SemverVersion; use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::Config; use omicron_common::nexus_config::SchemaConfig; -use omicron_test_utils::dev::db::CockroachInstance; +use omicron_test_utils::dev::db::{Client, CockroachInstance}; use pretty_assertions::{assert_eq, assert_ne}; use similar_asserts; use slog::Logger; @@ -163,6 +164,18 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { version } +#[derive(PartialEq, Clone, Debug)] +struct SqlEnum { + name: String, + variant: String, +} + +impl From<(&str, &str)> for SqlEnum { + fn from((name, variant): (&str, &str)) -> Self { + Self { name: name.to_string(), variant: variant.to_string() } + } +} + // A newtype wrapper around a string, which allows us to more liberally // interpret SQL types. // @@ -170,19 +183,57 @@ async fn query_crdb_schema_version(crdb: &CockroachInstance) -> String { // the contents of the database, merely the schema and equality of contained data. #[derive(PartialEq, Clone, Debug)] enum AnySqlType { - DateTime, - String(String), Bool(bool), - Uuid(Uuid), - Int8(i64), + DateTime, + Enum(SqlEnum), Float4(f32), + Int8(i64), + Json(serde_json::value::Value), + String(String), TextArray(Vec), + Uuid(Uuid), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to // populate data. } +impl From for AnySqlType { + fn from(b: bool) -> Self { + Self::Bool(b) + } +} + +impl From for AnySqlType { + fn from(value: SqlEnum) -> Self { + Self::Enum(value) + } +} + +impl From for AnySqlType { + fn from(value: f32) -> Self { + Self::Float4(value) + } +} + +impl From for AnySqlType { + fn from(value: i64) -> Self { + Self::Int8(value) + } +} + +impl From for AnySqlType { + fn from(value: String) -> Self { + Self::String(value) + } +} + +impl From for AnySqlType { + fn from(value: Uuid) -> Self { + Self::Uuid(value) + } +} + impl AnySqlType { fn as_str(&self) -> &str { match self { @@ -218,15 +269,33 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { if f32::accepts(ty) { return Ok(AnySqlType::Float4(f32::from_sql(ty, raw)?)); } + if serde_json::value::Value::accepts(ty) { + return Ok(AnySqlType::Json(serde_json::value::Value::from_sql( + ty, raw, + )?)); + } if Vec::::accepts(ty) { return Ok(AnySqlType::TextArray(Vec::::from_sql( ty, raw, )?)); } - Err(anyhow::anyhow!( - "Cannot parse type {ty}. If you're trying to use this type in a table which is populated \ -during a schema migration, consider adding it to `AnySqlType`." - ).into()) + + use tokio_postgres::types::Kind; + match ty.kind() { + Kind::Enum(_) => { + Ok(AnySqlType::Enum(SqlEnum { + name: ty.name().to_string(), + variant: std::str::from_utf8(raw)?.to_string(), + })) + }, + _ => { + Err(anyhow::anyhow!( + "Cannot parse type {ty:?}. \ + If you're trying to use this type in a table which is populated \ + during a schema migration, consider adding it to `AnySqlType`." + ).into()) + } + } } fn accepts(_ty: &tokio_postgres::types::Type) -> bool { @@ -234,15 +303,19 @@ during a schema migration, consider adding it to `AnySqlType`." } } +// It's a little redunant to include the column name alongside each value, +// but it results in a prettier diff. #[derive(PartialEq, Debug)] -struct NamedSqlValue { - // It's a little redunant to include the column name alongside each value, - // but it results in a prettier diff. +struct ColumnValue { column: String, value: Option, } -impl NamedSqlValue { +impl ColumnValue { + fn new>(column: &str, value: V) -> Self { + Self { column: String::from(column), value: Some(value.into()) } + } + fn expect(&self, column: &str) -> Option<&AnySqlType> { assert_eq!(self.column, column); self.value.as_ref() @@ -252,7 +325,7 @@ impl NamedSqlValue { // A generic representation of a row of SQL data #[derive(PartialEq, Debug)] struct Row { - values: Vec, + values: Vec, } impl Row { @@ -278,7 +351,7 @@ fn process_rows(rows: &Vec) -> Vec { let mut row_result = Row::new(); for i in 0..row.len() { let column_name = row.columns()[i].name(); - row_result.values.push(NamedSqlValue { + row_result.values.push(ColumnValue { column: column_name.to_string(), value: row.get(i), }); @@ -849,6 +922,183 @@ async fn dbinit_equals_sum_of_all_up() { logctx.cleanup_successful(); } +type BeforeFn = for<'a> fn(client: &'a Client) -> BoxFuture<'a, ()>; +type AfterFn = for<'a> fn(client: &'a Client) -> BoxFuture<'a, ()>; + +// Describes the operations which we might take before and after +// migrations to check that they worked. +struct DataMigrationFns { + before: Option, + after: AfterFn, +} + +// "51F0" -> "Silo" +const SILO1: Uuid = Uuid::from_u128(0x111151F0_5c3d_4647_83b0_8f3515da7be1); +const SILO2: Uuid = Uuid::from_u128(0x222251F0_5c3d_4647_83b0_8f3515da7be1); + +// "6001" -> "Pool" +const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); +const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); +const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); + +fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async move { + // Create two silos + client.batch_execute(&format!("INSERT INTO silo + (id, name, description, time_created, time_modified, time_deleted, discoverable, authentication_mode, user_provision_type, mapped_fleet_roles, rcgen) VALUES + ('{SILO1}', 'silo1', '', now(), now(), NULL, false, 'local', 'jit', '{{}}', 1), + ('{SILO2}', 'silo2', '', now(), now(), NULL, false, 'local', 'jit', '{{}}', 1); + ")).await.expect("Failed to create silo"); + + // Create an IP pool for each silo, and a third "fleet pool" which has + // no corresponding silo. + client.batch_execute(&format!("INSERT INTO ip_pool + (id, name, description, time_created, time_modified, time_deleted, rcgen, silo_id, is_default) VALUES + ('{POOL1}', 'pool1', '', now(), now(), NULL, 1, '{SILO1}', true), + ('{POOL2}', 'pool2', '', now(), now(), NULL, 1, '{SILO2}', false), + ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true); + ")).await.expect("Failed to create IP Pool"); + }) +} + +fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + // Confirm that the ip_pool_resource objects have been created + // by the migration. + let rows = client + .query("SELECT * FROM ip_pool_resource ORDER BY ip_pool_id", &[]) + .await + .expect("Failed to query ip pool resource"); + let ip_pool_resources = process_rows(&rows); + + assert_eq!(ip_pool_resources.len(), 4); + + let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + + // pool1, which referenced silo1 in the "ip_pool" table, has a newly + // created resource. + // + // The same relationship is true for pool2 / silo2. + assert_eq!( + ip_pool_resources[0].values, + vec![ + ColumnValue::new("ip_pool_id", POOL1), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO1), + ColumnValue::new("is_default", true), + ], + ); + assert_eq!( + ip_pool_resources[1].values, + vec![ + ColumnValue::new("ip_pool_id", POOL2), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO2), + ColumnValue::new("is_default", false), + ], + ); + + // pool3 did not previously have a corresponding silo, so now it's associated + // with both silos as a new resource in each. + // + // Additionally, silo1 already had a default pool (pool1), but silo2 did + // not have one. As a result, pool3 becomes the new default pool for silo2. + assert_eq!( + ip_pool_resources[2].values, + vec![ + ColumnValue::new("ip_pool_id", POOL3), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO1), + ColumnValue::new("is_default", false), + ], + ); + assert_eq!( + ip_pool_resources[3].values, + vec![ + ColumnValue::new("ip_pool_id", POOL3), + ColumnValue::new("resource_type", type_silo.clone()), + ColumnValue::new("resource_id", SILO2), + ColumnValue::new("is_default", true), + ], + ); + }) +} + +// Lazily initializes all migration checks. The combination of Rust function +// pointers and async makes defining a static table fairly painful, so we're +// using lazy initialization instead. +// +// Each "check" is implemented as a pair of {before, after} migration function +// pointers, called precisely around the migration under test. +fn get_migration_checks() -> BTreeMap { + let mut map = BTreeMap::new(); + + map.insert( + SemverVersion(semver::Version::parse("23.0.0").unwrap()), + DataMigrationFns { before: Some(before_23_0_0), after: after_23_0_0 }, + ); + + map +} + +// Performs all schema changes and runs version-specific assertions. +// +// HOW TO ADD A MIGRATION CHECK: +// - Add a new "map.insert" line to "get_migration_checks", with the semver of +// the version you'd like to inspect before / after. +// - Define your "before" (optional) and "after" (required) functions. These +// act on a connection to CockroachDB, and can observe and mutate arbitrary +// state. +// +// ADVICE FOR MIGRATION CHECKS: +// - Your migration check will run in the same test as all other migration +// checks, because performing schema migrations isn't that fast. If you +// perform an operation that could be disruptive to subsequent checks, I +// recommend cleaning up after yourself (e.g., DELETE relevant rows). +// - I recommend using schema checks that are NOT strongly-typed. When you +// add a migration check, it'll happen to match the "latest" static schemas +// defined by Nexus, but that won't always be the case. As the schema +// continues to change (maybe a table you're trying to check gets a new column +// in a later version), your code should continue operating on the OLD version, +// and as such, should avoid needing any updates. +#[tokio::test] +async fn validate_data_migration() { + let config = load_test_config(); + let logctx = LogContext::new("validate_data_migration", &config.pkg.log); + let log = &logctx.log; + + let populate = false; + let mut crdb = test_setup_just_crdb(&logctx.log, populate).await; + let client = crdb.connect().await.expect("Failed to access CRDB client"); + + let all_versions = read_all_schema_versions().await; + let all_checks = get_migration_checks(); + + // Go from the first version to the latest version. + for version in &all_versions { + // If this check has preconditions (or setup), run them. + let checks = all_checks.get(version); + if let Some(before) = checks.and_then(|check| check.before) { + before(&client).await; + } + + apply_update(log, &crdb, &version.to_string(), 1).await; + assert_eq!(version.to_string(), query_crdb_schema_version(&crdb).await); + + // If this check has postconditions (or cleanup), run them. + if let Some(after) = checks.map(|check| check.after) { + after(&client).await; + } + } + assert_eq!( + LATEST_SCHEMA_VERSION.to_string(), + query_crdb_schema_version(&crdb).await + ); + + crdb.cleanup().await.unwrap(); + logctx.cleanup_successful(); +} + // Returns the InformationSchema object for a database populated via `sql`. async fn get_information_schema(log: &Logger, sql: &str) -> InformationSchema { let populate = false; From 45b665184ad6254f5b2bb969abefbf476e0803c8 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 15:18:45 -0800 Subject: [PATCH 06/24] [ci] update nextest to 0.9.67 (#4788) This version of nextest has a fix for https://github.com/nextest-rs/nextest/issues/1208, which we encountered while attempting to diagnose #4779. --- .config/nextest.toml | 2 +- .github/buildomat/build-and-test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.config/nextest.toml b/.config/nextest.toml index 136a21a236..ba28fa0625 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,7 @@ # # The required version should be bumped up if we need new features, performance # improvements or bugfixes that are present in newer versions of nextest. -nextest-version = { required = "0.9.64", recommended = "0.9.64" } +nextest-version = { required = "0.9.64", recommended = "0.9.67" } experimental = ["setup-scripts"] diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index 34f81bab68..eab64c528c 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -7,7 +7,7 @@ set -o xtrace # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) -NEXTEST_VERSION='0.9.64' +NEXTEST_VERSION='0.9.67' cargo --version rustc --version From 9eba46df9980aa76a5043cf0300feddc6e79202c Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 9 Jan 2024 18:27:32 -0800 Subject: [PATCH 07/24] [nexus-test-utils] set 60s timeouts for each init step (#4789) In #4779 we're tracking what appears to be a ClickHouse initialization failure during Nexus startup. Set a timeout of 60s for each step in the initialization process. These steps should usually not take more than 5 seconds each, so 60s is a really comfortable buffer. --- Cargo.lock | 2 + common/src/api/external/mod.rs | 2 +- nexus/test-interface/src/lib.rs | 4 +- nexus/test-utils/Cargo.toml | 2 + nexus/test-utils/src/lib.rs | 160 +++++++++++++++++++++++++++----- 5 files changed, 142 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3db966876d..2974dfe98e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4265,6 +4265,7 @@ dependencies = [ "dns-server", "dns-service-client", "dropshot", + "futures", "gateway-messages", "gateway-test-utils", "headers", @@ -4286,6 +4287,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "slog", + "tokio", "trust-dns-resolver", "uuid", ] diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 312d400d2f..899f15a04b 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1919,7 +1919,7 @@ impl MacAddr { /// Iterate the MAC addresses in the system address range /// (used as an allocator in contexts where collisions are not expected and /// determinism is useful, like in the test suite) - pub fn iter_system() -> impl Iterator { + pub fn iter_system() -> impl Iterator + Send { ((Self::MAX_SYSTEM_RESV + 1)..=Self::MAX_SYSTEM_ADDR) .map(Self::from_i64) } diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 2456f27684..23326a5ecb 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -38,8 +38,8 @@ use std::net::{SocketAddr, SocketAddrV6}; use uuid::Uuid; #[async_trait] -pub trait NexusServer { - type InternalServer; +pub trait NexusServer: Send + Sync + 'static { + type InternalServer: Send + Sync + 'static; async fn start_internal( config: &Config, diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 024cba958b..4a7924770e 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -14,6 +14,7 @@ crucible-agent-client.workspace = true dns-server.workspace = true dns-service-client.workspace = true dropshot.workspace = true +futures.workspace = true gateway-messages.workspace = true gateway-test-utils.workspace = true headers.workspace = true @@ -34,6 +35,7 @@ serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true +tokio.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index d2ac0405fc..c6dc9fefe9 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -14,6 +14,8 @@ use dropshot::ConfigDropshot; use dropshot::ConfigLogging; use dropshot::ConfigLoggingLevel; use dropshot::HandlerTaskMode; +use futures::future::BoxFuture; +use futures::FutureExt; use gateway_test_utils::setup::GatewayTestContext; use nexus_test_interface::NexusServer; use nexus_types::external_api::params::UserId; @@ -39,7 +41,7 @@ use omicron_test_utils::dev; use oximeter_collector::Oximeter; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; -use slog::{debug, o, Logger}; +use slog::{debug, error, o, Logger}; use std::collections::HashMap; use std::fmt::Debug; use std::net::{IpAddr, Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -158,7 +160,7 @@ struct RackInitRequestBuilder { services: Vec, datasets: Vec, internal_dns_config: internal_dns::DnsConfigBuilder, - mac_addrs: Box>, + mac_addrs: Box + Send>, } impl RackInitRequestBuilder { @@ -254,11 +256,18 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub external_dns_zone_name: Option, pub external_dns: Option, pub internal_dns: Option, + dns_config: Option, pub silo_name: Option, pub user_name: Option, } +type StepInitFn<'a, N> = Box< + dyn for<'b> FnOnce( + &'b mut ControlPlaneTestContextBuilder<'a, N>, + ) -> BoxFuture<'b, ()>, +>; + impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { pub fn new( test_name: &'a str, @@ -290,11 +299,37 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { external_dns_zone_name: None, external_dns: None, internal_dns: None, + dns_config: None, silo_name: None, user_name: None, } } + pub async fn init_with_steps( + &mut self, + steps: Vec<(&str, StepInitFn<'a, N>)>, + timeout: Duration, + ) { + let log = self.logctx.log.new(o!("component" => "init_with_steps")); + for (step_name, step) in steps { + debug!(log, "Running step {step_name}"); + let step_fut = step(self); + match tokio::time::timeout(timeout, step_fut).await { + Ok(()) => {} + Err(_) => { + error!( + log, + "Timed out after {timeout:?} \ + while running step {step_name}, failing test" + ); + panic!( + "Timed out after {timeout:?} while running step {step_name}", + ); + } + } + } + } + pub async fn start_crdb(&mut self, populate: bool) { let populate = if populate { PopulateCrdb::FromEnvironmentSeed @@ -581,7 +616,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { self.nexus_internal_addr = Some(nexus_internal_addr); } - pub async fn populate_internal_dns(&mut self) -> DnsConfigParams { + pub async fn populate_internal_dns(&mut self) { let log = &self.logctx.log; debug!(log, "Populating Internal DNS"); @@ -604,18 +639,21 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { dns_config_client.dns_config_put(&dns_config).await.expect( "Failed to send initial DNS records to internal DNS server", ); - dns_config + self.dns_config = Some(dns_config); } // Perform RSS handoff pub async fn start_nexus_external( &mut self, - dns_config: DnsConfigParams, tls_certificates: Vec, ) { let log = &self.logctx.log; debug!(log, "Starting Nexus (external API)"); + let dns_config = self.dns_config.clone().expect( + "populate_internal_dns must be called before start_nexus_external", + ); + // Create a recovery silo let external_dns_zone_name = internal_dns::names::DNS_ZONE_EXTERNAL_TESTING.to_string(); @@ -956,30 +994,102 @@ async fn setup_with_config_impl( sim_mode: sim::SimMode, initial_cert: Option, ) -> ControlPlaneTestContext { - builder.start_crdb_impl(populate).await; - builder.start_clickhouse().await; - builder.start_gateway().await; - builder.start_dendrite(SwitchLocation::Switch0).await; - builder.start_dendrite(SwitchLocation::Switch1).await; - builder.start_mgd(SwitchLocation::Switch0).await; - builder.start_mgd(SwitchLocation::Switch1).await; - builder.start_internal_dns().await; - builder.start_external_dns().await; - builder.start_nexus_internal().await; - builder.start_sled(sim_mode).await; - builder.start_crucible_pantry().await; - builder.scrimlet_dns_setup().await; - - // Give Nexus necessary information to find the Crucible Pantry - let dns_config = builder.populate_internal_dns().await; + const STEP_TIMEOUT: Duration = Duration::from_secs(60); builder - .start_nexus_external(dns_config, initial_cert.into_iter().collect()) + .init_with_steps( + vec![ + ( + "start_crdb", + Box::new(|builder| { + builder.start_crdb_impl(populate).boxed() + }), + ), + ( + "start_clickhouse", + Box::new(|builder| builder.start_clickhouse().boxed()), + ), + ( + "start_gateway", + Box::new(|builder| builder.start_gateway().boxed()), + ), + ( + "start_dendrite_switch0", + Box::new(|builder| { + builder.start_dendrite(SwitchLocation::Switch0).boxed() + }), + ), + ( + "start_dendrite_switch1", + Box::new(|builder| { + builder.start_dendrite(SwitchLocation::Switch1).boxed() + }), + ), + ( + "start_mgd_switch0", + Box::new(|builder| { + builder.start_mgd(SwitchLocation::Switch0).boxed() + }), + ), + ( + "start_mgd_switch1", + Box::new(|builder| { + builder.start_mgd(SwitchLocation::Switch1).boxed() + }), + ), + ( + "start_internal_dns", + Box::new(|builder| builder.start_internal_dns().boxed()), + ), + ( + "start_external_dns", + Box::new(|builder| builder.start_external_dns().boxed()), + ), + ( + "start_nexus_internal", + Box::new(|builder| builder.start_nexus_internal().boxed()), + ), + ( + "start_sled", + Box::new(move |builder| { + builder.start_sled(sim_mode).boxed() + }), + ), + ( + "start_crucible_pantry", + Box::new(|builder| builder.start_crucible_pantry().boxed()), + ), + ( + "scrimlet_dns_setup", + Box::new(|builder| builder.scrimlet_dns_setup().boxed()), + ), + ( + "populate_internal_dns", + Box::new(|builder| builder.populate_internal_dns().boxed()), + ), + ( + "start_nexus_external", + Box::new(|builder| { + builder + .start_nexus_external( + initial_cert.into_iter().collect(), + ) + .boxed() + }), + ), + ( + "start_oximeter", + Box::new(|builder| builder.start_oximeter().boxed()), + ), + ( + "start_producer_server", + Box::new(|builder| builder.start_producer_server().boxed()), + ), + ], + STEP_TIMEOUT, + ) .await; - builder.start_oximeter().await; - builder.start_producer_server().await; - builder.build() } From 42c0c8a32e863433370ecce8c1d8c1e86454bdbd Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 10 Jan 2024 11:16:11 -0800 Subject: [PATCH 08/24] [sled agent] fail requests for non-NTP zones if time is not synchronized (#4778) Fixes https://github.com/oxidecomputer/omicron/issues/4776 --- sled-agent/src/bootstrap/pre_server.rs | 9 +- sled-agent/src/services.rs | 215 ++++++++++++++++++++++--- sled-agent/src/sled_agent.rs | 4 +- 3 files changed, 200 insertions(+), 28 deletions(-) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 02710ff583..e61e15d370 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -18,6 +18,7 @@ use crate::long_running_tasks::{ spawn_all_longrunning_tasks, LongRunningTaskHandles, }; use crate::services::ServiceManager; +use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; use crate::storage_monitor::UnderlayAccess; use camino::Utf8PathBuf; @@ -127,12 +128,18 @@ impl BootstrapAgentStartup { let global_zone_bootstrap_ip = startup_networking.global_zone_bootstrap_ip; + let time_sync = if let Some(true) = config.skip_timesync { + TimeSyncConfig::Skip + } else { + TimeSyncConfig::Normal + }; + let service_manager = ServiceManager::new( &base_log, ddm_admin_localhost_client.clone(), startup_networking, sled_mode, - config.skip_timesync, + time_sync, config.sidecar_revision.clone(), config.switch_zone_maghemite_links.clone(), long_running_task_handles.storage_manager.clone(), diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index ddfea5d596..e240fb4d03 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -202,6 +202,11 @@ pub enum Error { #[error("NTP zone not ready")] NtpZoneNotReady, + // This isn't exactly "NtpZoneNotReady" -- it can happen when the NTP zone + // is up, but time is still in the process of synchronizing. + #[error("Time not yet synchronized")] + TimeNotSynchronized, + #[error("Execution error: {0}")] ExecutionError(#[from] illumos_utils::ExecutionError), @@ -252,6 +257,9 @@ impl From for omicron_common::api::external::Error { err @ Error::RequestedConfigOutdated { .. } => { omicron_common::api::external::Error::conflict(&err.to_string()) } + err @ Error::TimeNotSynchronized => { + omicron_common::api::external::Error::unavail(&err.to_string()) + } _ => omicron_common::api::external::Error::InternalError { internal_message: err.to_string(), }, @@ -274,6 +282,27 @@ fn display_zone_init_errors(errors: &[(String, Box)]) -> String { output } +// Does this zone require time synchronization before it is initialized?" +// +// This function is somewhat conservative - the set of services +// that can be launched before timesync has completed is intentionally kept +// small, since it would be easy to add a service that expects time to be +// reasonably synchronized. +fn zone_requires_timesync(zone_type: &OmicronZoneType) -> bool { + match zone_type { + // These zones can be initialized and started before time has been + // synchronized. For the NTP zones, this should be self-evident -- + // we need the NTP zone to actually perform time synchronization! + // + // The DNS zone is a bit of an exception here, since the NTP zone + // itself may rely on DNS lookups as a dependency. + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::InternalDns { .. } => false, + _ => true, + } +} + /// Configuration parameters which modify the [`ServiceManager`]'s behavior. pub struct Config { /// Identifies the sled being configured @@ -504,18 +533,20 @@ enum SledLocalZone { }, } +type ZoneMap = BTreeMap; + /// Manages miscellaneous Sled-local services. pub struct ServiceManagerInner { log: Logger, global_zone_bootstrap_link_local_address: Ipv6Addr, switch_zone: Mutex, sled_mode: SledMode, - skip_timesync: Option, + time_sync_config: TimeSyncConfig, time_synced: AtomicBool, switch_zone_maghemite_links: Vec, sidecar_revision: SidecarRevision, // Zones representing running services - zones: Mutex>, + zones: Mutex, underlay_vnic_allocator: VnicAllocator, underlay_vnic: EtherstubVnic, bootstrap_vnic_allocator: VnicAllocator, @@ -540,6 +571,16 @@ struct SledAgentInfo { rack_network_config: Option, } +pub(crate) enum TimeSyncConfig { + // Waits for NTP to confirm that time has been synchronized. + Normal, + // Skips timesync unconditionally. + Skip, + // Fails timesync unconditionally. + #[cfg(test)] + Fail, +} + #[derive(Clone)] pub struct ServiceManager { inner: Arc, @@ -554,7 +595,7 @@ impl ServiceManager { /// - `bootstrap_networking`: Collection of etherstubs/VNICs set up when /// bootstrap agent begins /// - `sled_mode`: The sled's mode of operation (Gimlet vs Scrimlet). - /// - `skip_timesync`: If true, the sled always reports synced time. + /// - `time_sync_config`: Describes how the sled awaits synced time. /// - `sidecar_revision`: Rev of attached sidecar, if present. /// - `switch_zone_maghemite_links`: List of physical links on which /// maghemite should listen. @@ -565,7 +606,7 @@ impl ServiceManager { ddmd_client: DdmAdminClient, bootstrap_networking: BootstrapNetworking, sled_mode: SledMode, - skip_timesync: Option, + time_sync_config: TimeSyncConfig, sidecar_revision: SidecarRevision, switch_zone_maghemite_links: Vec, storage: StorageHandle, @@ -582,7 +623,7 @@ impl ServiceManager { // Load the switch zone if it already exists? switch_zone: Mutex::new(SledLocalZone::Disabled), sled_mode, - skip_timesync, + time_sync_config, time_synced: AtomicBool::new(false), sidecar_revision, switch_zone_maghemite_links, @@ -2767,6 +2808,26 @@ impl ServiceManager { old_zones_set.difference(&requested_zones_set); let zones_to_be_added = requested_zones_set.difference(&old_zones_set); + // For each new zone request, ensure that we've sufficiently + // synchronized time. + // + // NOTE: This imposes a constraint, during initial setup, cold boot, + // etc, that NTP and the internal DNS system it depends on MUST be + // initialized prior to other zones. + let time_is_synchronized = + match self.timesync_get_locked(&existing_zones).await { + // Time is synchronized + Ok(TimeSync { sync: true, .. }) => true, + // Time is not synchronized, or we can't check + _ => false, + }; + for zone in zones_to_be_added.clone() { + if zone_requires_timesync(&zone.zone_type) && !time_is_synchronized + { + return Err(Error::TimeNotSynchronized); + } + } + // Destroy zones that should not be running for zone in zones_to_be_removed { let expected_zone_name = zone.zone_name(); @@ -2960,8 +3021,24 @@ impl ServiceManager { pub async fn timesync_get(&self) -> Result { let existing_zones = self.inner.zones.lock().await; + self.timesync_get_locked(&existing_zones).await + } + + async fn timesync_get_locked( + &self, + existing_zones: &tokio::sync::MutexGuard<'_, ZoneMap>, + ) -> Result { + let skip_timesync = match &self.inner.time_sync_config { + TimeSyncConfig::Normal => false, + TimeSyncConfig::Skip => true, + #[cfg(test)] + TimeSyncConfig::Fail => { + info!(self.inner.log, "Configured to fail timesync checks"); + return Err(Error::TimeNotSynchronized); + } + }; - if let Some(true) = self.inner.skip_timesync { + if skip_timesync { info!(self.inner.log, "Configured to skip timesync checks"); self.boottime_rewrite(); return Ok(TimeSync { @@ -3545,7 +3622,6 @@ mod test { svc, zone::MockZones, }; - use omicron_common::address::OXIMETER_PORT; use sled_storage::disk::{RawDisk, SyntheticDisk}; use sled_storage::manager::{FakeStorageManager, StorageHandle}; @@ -3558,6 +3634,7 @@ mod test { const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_oximeter"; + const EXPECTED_PORT: u16 = 12223; fn make_bootstrap_networking_config() -> BootstrapNetworking { BootstrapNetworking { @@ -3575,7 +3652,9 @@ mod test { } // Returns the expectations for a new service to be created. - fn expect_new_service() -> Vec> { + fn expect_new_service( + expected_zone_name_prefix: &str, + ) -> Vec> { illumos_utils::USE_MOCKS.store(true, Ordering::SeqCst); // Create a VNIC let create_vnic_ctx = MockDladm::create_vnic_context(); @@ -3587,15 +3666,19 @@ mod test { ); // Install the Omicron Zone let install_ctx = MockZones::install_omicron_zone_context(); - install_ctx.expect().return_once(|_, _, name, _, _, _, _, _, _| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); - Ok(()) - }); + let prefix = expected_zone_name_prefix.to_string(); + install_ctx.expect().return_once( + move |_, _, name, _, _, _, _, _, _| { + assert!(name.starts_with(&prefix)); + Ok(()) + }, + ); // Boot the zone. let boot_ctx = MockZones::boot_context(); - boot_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + let prefix = expected_zone_name_prefix.to_string(); + boot_ctx.expect().return_once(move |name| { + assert!(name.starts_with(&prefix)); Ok(()) }); @@ -3603,8 +3686,9 @@ mod test { // up the zone ID for the booted zone. This goes through // `MockZone::id` to find the zone and get its ID. let id_ctx = MockZones::id_context(); - id_ctx.expect().return_once(|name| { - assert!(name.starts_with(EXPECTED_ZONE_NAME_PREFIX)); + let prefix = expected_zone_name_prefix.to_string(); + id_ctx.expect().return_once(move |name| { + assert!(name.starts_with(&prefix)); Ok(Some(1)) }); @@ -3720,19 +3804,35 @@ mod test { id: Uuid, generation: Generation, ) { - let _expectations = expect_new_service(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); + try_new_service_of_type( + mgr, + id, + generation, + OmicronZoneType::Oximeter { address }, + ) + .await + .expect("Could not create service"); + } + + async fn try_new_service_of_type( + mgr: &ServiceManager, + id: Uuid, + generation: Generation, + zone_type: OmicronZoneType, + ) -> Result<(), Error> { + let zone_prefix = format!("oxz_{}", zone_type.zone_type_str()); + let _expectations = expect_new_service(&zone_prefix); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation, zones: vec![OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type, }], }) .await - .unwrap(); } // Prepare to call "ensure" for a service which already exists. We should @@ -3743,7 +3843,7 @@ mod test { generation: Generation, ) { let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation, zones: vec![OmicronZoneConfig { @@ -3802,6 +3902,7 @@ mod test { // files. std::fs::write(dir.join("oximeter.tar.gz"), "Not a real file") .unwrap(); + std::fs::write(dir.join("ntp.tar.gz"), "Not a real file").unwrap(); } } @@ -3857,13 +3958,20 @@ mod test { } fn new_service_manager(self) -> ServiceManager { + self.new_service_manager_with_timesync(TimeSyncConfig::Skip) + } + + fn new_service_manager_with_timesync( + self, + time_sync_config: TimeSyncConfig, + ) -> ServiceManager { let log = &self.log; let mgr = ServiceManager::new( log, self.ddmd_client, make_bootstrap_networking_config(), SledMode::Auto, - Some(true), + time_sync_config, SidecarRevision::Physical("rev-test".to_string()), vec![], self.storage_handle, @@ -3927,6 +4035,63 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_ensure_service_before_timesync() { + let logctx = omicron_test_utils::dev::test_setup_log( + "test_ensure_service_before_timesync", + ); + let test_config = TestConfig::new().await; + let helper = + LedgerTestHelper::new(logctx.log.clone(), &test_config).await; + + let mgr = + helper.new_service_manager_with_timesync(TimeSyncConfig::Fail); + LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + + let v1 = Generation::new(); + let found = + mgr.omicron_zones_list().await.expect("failed to list zones"); + assert_eq!(found.generation, v1); + assert!(found.zones.is_empty()); + + let v2 = v1.next(); + let id = Uuid::new_v4(); + + // Should fail: time has not yet synchronized. + let address = + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); + let result = try_new_service_of_type( + &mgr, + id, + v2, + OmicronZoneType::Oximeter { address }, + ) + .await; + assert_matches::assert_matches!( + result, + Err(Error::TimeNotSynchronized) + ); + + // Should succeed: we don't care that time has not yet synchronized (for + // this specific service). + try_new_service_of_type( + &mgr, + id, + v2, + OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, + ) + .await + .unwrap(); + + drop_service_manager(mgr); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_ensure_service_which_already_exists() { let logctx = omicron_test_utils::dev::test_setup_log( @@ -3975,7 +4140,7 @@ mod test { // Before we re-create the service manager - notably, using the same // config file! - expect that a service gets initialized. - let _expectations = expect_new_service(); + let _expectations = expect_new_service(EXPECTED_ZONE_NAME_PREFIX); let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); @@ -4049,7 +4214,7 @@ mod test { let _expectations = expect_new_services(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); let mut zones = vec![OmicronZoneConfig { id: id1, underlay_address: Ipv6Addr::LOCALHOST, @@ -4241,7 +4406,7 @@ mod test { let _expectations = expect_new_services(); let address = - SocketAddrV6::new(Ipv6Addr::LOCALHOST, OXIMETER_PORT, 0, 0); + SocketAddrV6::new(Ipv6Addr::LOCALHOST, EXPECTED_PORT, 0, 0); let mut zones = migrated_ledger.data().clone().to_omicron_zones_config().zones; zones.push(OmicronZoneConfig { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 5bc0f8d257..d094643cf9 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -159,7 +159,7 @@ impl From for omicron_common::api::external::Error { impl From for dropshot::HttpError { fn from(err: Error) -> Self { match err { - crate::sled_agent::Error::Instance(instance_manager_error) => { + Error::Instance(instance_manager_error) => { match instance_manager_error { crate::instance_manager::Error::Instance( instance_error, @@ -196,7 +196,7 @@ impl From for dropshot::HttpError { e => HttpError::for_internal_error(e.to_string()), } } - crate::sled_agent::Error::ZoneBundle(ref inner) => match inner { + Error::ZoneBundle(ref inner) => match inner { BundleError::NoStorage | BundleError::Unavailable { .. } => { HttpError::for_unavail(None, inner.to_string()) } From cae1029a5ba60b71f8b3f012bfd2affa42d9b9e9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:18:42 -0800 Subject: [PATCH 09/24] Update Rust crate base64 to 0.21.6 (#4785) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2974dfe98e..33da6a7305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] name = "base64" -version = "0.21.5" +version = "0.21.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" +checksum = "c79fed4cdb43e993fcdadc7e58a09fd0e3e649c4436fa11da71c9f1f3ee7feb9" [[package]] name = "base64ct" diff --git a/Cargo.toml b/Cargo.toml index 8553a7244b..dfd57c4db1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,7 +151,7 @@ async-trait = "0.1.77" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } -base64 = "0.21.5" +base64 = "0.21.6" bb8 = "0.8.1" bcs = "0.1.6" bincode = "1.3.3" From 4aebef093ef44d5d18d34c435d7e74c9ed713b5e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:19:57 -0800 Subject: [PATCH 10/24] Update Rust crate libc to 0.2.152 (#4786) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33da6a7305..a4157829af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3649,9 +3649,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.151" +version = "0.2.152" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" [[package]] name = "libdlpi-sys" diff --git a/Cargo.toml b/Cargo.toml index dfd57c4db1..b2d0e406da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -230,7 +230,7 @@ ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } kstat-rs = "0.2.3" -libc = "0.2.151" +libc = "0.2.152" linear-map = "1.2.0" macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cff10b60ce..5688e133c0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -62,7 +62,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.151", features = ["extra_traits"] } +libc = { version = "0.2.152", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } @@ -165,7 +165,7 @@ ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } lalrpop-util = { version = "0.19.12" } lazy_static = { version = "1.4.0", default-features = false, features = ["spin_no_std"] } -libc = { version = "0.2.151", features = ["extra_traits"] } +libc = { version = "0.2.152", features = ["extra_traits"] } log = { version = "0.4.20", default-features = false, features = ["std"] } managed = { version = "0.8.0", default-features = false, features = ["alloc", "map"] } memchr = { version = "2.6.3" } From 0c7be4c9459b59a8d00b078985f2bc1dd41c2074 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Wed, 10 Jan 2024 16:42:54 -0500 Subject: [PATCH 11/24] Addresses for propolis instances at SLED_PREFIX + 0xFFFF (#4777) Rather than allocating instance IPs starting at `SLED_PREFIX` + `RSS_RESERVED_ADDRESSES` + 1 where the `1` is the sled-agent allocated address of the GZ, we begin allocation from a larger block: `SLED_PREFIX` + `CP_SERVICES_RESERVED_ADDRESSES`. This gives us more room for nexus to allocate control plane services. Implements #4765 --- common/src/address.rs | 3 + nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/sled.rs | 6 +- nexus/db-queries/src/db/datastore/mod.rs | 38 ++----------- nexus/src/app/sagas/instance_common.rs | 6 +- nexus/src/app/sagas/instance_migrate.rs | 4 +- nexus/src/app/sagas/instance_start.rs | 4 +- nexus/tests/integration_tests/schema.rs | 72 ++++++++++++++++++++++++ schema/crdb/24.0.0/up.sql | 3 + schema/crdb/dbinit.sql | 4 +- 10 files changed, 97 insertions(+), 45 deletions(-) create mode 100644 schema/crdb/24.0.0/up.sql diff --git a/common/src/address.rs b/common/src/address.rs index 94361a2705..78eaee0bb4 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -165,6 +165,9 @@ const GZ_ADDRESS_INDEX: usize = 2; /// The maximum number of addresses per sled reserved for RSS. pub const RSS_RESERVED_ADDRESSES: u16 = 32; +// The maximum number of addresses per sled reserved for control plane services. +pub const CP_SERVICES_RESERVED_ADDRESSES: u16 = 0xFFFF; + /// Wraps an [`Ipv6Network`] with a compile-time prefix length. #[derive(Debug, Clone, Copy, JsonSchema, Serialize, Hash, PartialEq, Eq)] #[schemars(rename = "Ipv6Subnet")] diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 02bdd2c349..ed819cba80 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(23, 0, 1); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(24, 0, 0); table! { disk (id) { diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 85a6b3139c..52968c27d5 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -57,7 +57,7 @@ pub struct Sled { pub ip: ipv6::Ipv6Addr, pub port: SqlU16, - /// The last IP address provided to an Oxide service on this sled + /// The last IP address provided to a propolis instance on this sled pub last_used_address: ipv6::Ipv6Addr, provision_state: SledProvisionState, @@ -183,7 +183,9 @@ impl SledUpdate { pub fn into_insertable(self) -> Sled { let last_used_address = { let mut segments = self.ip().segments(); - segments[7] += omicron_common::address::RSS_RESERVED_ADDRESSES; + // We allocate the entire last segment to control plane services + segments[7] = + omicron_common::address::CP_SERVICES_RESERVED_ADDRESSES; ipv6::Ipv6Addr::from(Ipv6Addr::from(segments)) }; Sled { diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 93486771b5..d61ff15a3d 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -275,8 +275,8 @@ impl DataStore { self.pool_connection_unauthorized().await } - /// Return the next available IPv6 address for an Oxide service running on - /// the provided sled. + /// Return the next available IPv6 address for a propolis instance running + /// on the provided sled. pub async fn next_ipv6_address( &self, opctx: &OpContext, @@ -1286,7 +1286,6 @@ mod test { // Test sled-specific IPv6 address allocation #[tokio::test] async fn test_sled_ipv6_address_allocation() { - use omicron_common::address::RSS_RESERVED_ADDRESSES as STATIC_IPV6_ADDRESS_OFFSET; use std::net::Ipv6Addr; let logctx = dev::test_setup_log("test_sled_ipv6_address_allocation"); @@ -1322,41 +1321,14 @@ mod test { datastore.sled_upsert(sled2).await.unwrap(); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1de, - 0, - 0, - 0, - 0, - 0, - 2 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1de, 0, 0, 0, 0, 1, 0); assert_eq!(ip, expected_ip); let ip = datastore.next_ipv6_address(&opctx, sled1_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1de, - 0, - 0, - 0, - 0, - 0, - 3 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1de, 0, 0, 0, 0, 1, 1); assert_eq!(ip, expected_ip); let ip = datastore.next_ipv6_address(&opctx, sled2_id).await.unwrap(); - let expected_ip = Ipv6Addr::new( - 0xfd00, - 0x1df, - 0, - 0, - 0, - 0, - 0, - 2 + STATIC_IPV6_ADDRESS_OFFSET, - ); + let expected_ip = Ipv6Addr::new(0xfd00, 0x1df, 0, 0, 0, 0, 1, 0); assert_eq!(ip, expected_ip); let _ = db.cleanup().await; diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 438b92cb84..8f9197b03b 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -121,9 +121,9 @@ pub async fn destroy_vmm_record( Ok(()) } -/// Allocates a new IPv6 address for a service that will run on the supplied -/// sled. -pub(super) async fn allocate_sled_ipv6( +/// Allocates a new IPv6 address for a propolis instance that will run on the +/// supplied sled. +pub(super) async fn allocate_vmm_ipv6( opctx: &OpContext, datastore: &DataStore, sled_uuid: Uuid, diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 29c189efb4..1716953f04 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -7,7 +7,7 @@ use crate::app::instance::{ InstanceStateChangeError, InstanceStateChangeRequest, }; use crate::app::sagas::{ - declare_saga_actions, instance_common::allocate_sled_ipv6, + declare_saga_actions, instance_common::allocate_vmm_ipv6, }; use crate::external_api::params; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; @@ -181,7 +181,7 @@ async fn sim_allocate_propolis_ip( &sagactx, ¶ms.serialized_authn, ); - allocate_sled_ipv6( + allocate_vmm_ipv6( &opctx, sagactx.user_data().datastore(), params.migrate_params.dst_sled_id, diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 8957a838e7..9d12bd8031 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -7,7 +7,7 @@ use std::net::Ipv6Addr; use super::{ - instance_common::allocate_sled_ipv6, NexusActionContext, NexusSaga, + instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; use crate::app::instance::InstanceStateChangeError; @@ -159,7 +159,7 @@ async fn sis_alloc_propolis_ip( ¶ms.serialized_authn, ); let sled_uuid = sagactx.lookup::("sled_id")?; - allocate_sled_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await + allocate_vmm_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await } async fn sis_create_vmm_record( diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 21ed99e010..f183b53282 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -20,6 +20,7 @@ use pretty_assertions::{assert_eq, assert_ne}; use similar_asserts; use slog::Logger; use std::collections::{BTreeMap, BTreeSet}; +use std::net::IpAddr; use std::path::PathBuf; use tokio::time::timeout; use tokio::time::Duration; @@ -192,6 +193,7 @@ enum AnySqlType { String(String), TextArray(Vec), Uuid(Uuid), + Inet(IpAddr), // TODO: This isn't exhaustive, feel free to add more. // // These should only be necessary for rows where the database schema changes also choose to @@ -234,6 +236,12 @@ impl From for AnySqlType { } } +impl From for AnySqlType { + fn from(value: IpAddr) -> Self { + Self::Inet(value) + } +} + impl AnySqlType { fn as_str(&self) -> &str { match self { @@ -279,6 +287,9 @@ impl<'a> tokio_postgres::types::FromSql<'a> for AnySqlType { ty, raw, )?)); } + if IpAddr::accepts(ty) { + return Ok(AnySqlType::Inet(IpAddr::from_sql(ty, raw)?)); + } use tokio_postgres::types::Kind; match ty.kind() { @@ -941,6 +952,13 @@ const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); +// "513D" -> "Sled" +const SLED1: Uuid = Uuid::from_u128(0x1111513d_5c3d_4647_83b0_8f3515da7be1); +const SLED2: Uuid = Uuid::from_u128(0x2222513d_5c3d_4647_83b0_8f3515da7be1); + +// "7AC4" -> "Rack" +const RACK1: Uuid = Uuid::from_u128(0x11117ac4_5c3d_4647_83b0_8f3515da7be1); + fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { Box::pin(async move { // Create two silos @@ -1024,6 +1042,56 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { }) } +fn before_24_0_0(client: &Client) -> BoxFuture<'_, ()> { + // IP addresses were pulled off dogfood sled 16 + Box::pin(async move { + // Create two sleds + client + .batch_execute(&format!( + "INSERT INTO sled + (id, time_created, time_modified, time_deleted, rcgen, rack_id, + is_scrimlet, serial_number, part_number, revision, + usable_hardware_threads, usable_physical_ram, reservoir_size, ip, + port, last_used_address, provision_state) VALUES + + ('{SLED1}', now(), now(), NULL, 1, '{RACK1}', true, 'abcd', 'defg', + '1', 64, 12345678, 77, 'fd00:1122:3344:104::1', 12345, + 'fd00:1122:3344:104::1ac', 'provisionable'), + ('{SLED2}', now(), now(), NULL, 1, '{RACK1}', false, 'zzzz', 'xxxx', + '2', 64, 12345678, 77,'fd00:1122:3344:107::1', 12345, + 'fd00:1122:3344:107::d4', 'provisionable'); + " + )) + .await + .expect("Failed to create sleds"); + }) +} + +fn after_24_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + // Confirm that the IP Addresses have the last 2 bytes changed to `0xFFFF` + let rows = client + .query("SELECT last_used_address FROM sled ORDER BY id", &[]) + .await + .expect("Failed to sled last_used_address"); + let last_used_addresses = process_rows(&rows); + + let expected_addr_1: IpAddr = + "fd00:1122:3344:104::ffff".parse().unwrap(); + let expected_addr_2: IpAddr = + "fd00:1122:3344:107::ffff".parse().unwrap(); + + assert_eq!( + last_used_addresses[0].values, + vec![ColumnValue::new("last_used_address", expected_addr_1)] + ); + assert_eq!( + last_used_addresses[1].values, + vec![ColumnValue::new("last_used_address", expected_addr_2)] + ); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -1037,6 +1105,10 @@ fn get_migration_checks() -> BTreeMap { SemverVersion(semver::Version::parse("23.0.0").unwrap()), DataMigrationFns { before: Some(before_23_0_0), after: after_23_0_0 }, ); + map.insert( + SemverVersion(semver::Version::parse("24.0.0").unwrap()), + DataMigrationFns { before: Some(before_24_0_0), after: after_24_0_0 }, + ); map } diff --git a/schema/crdb/24.0.0/up.sql b/schema/crdb/24.0.0/up.sql new file mode 100644 index 0000000000..91bd10ab9f --- /dev/null +++ b/schema/crdb/24.0.0/up.sql @@ -0,0 +1,3 @@ +UPDATE omicron.public.sled + SET last_used_address = (netmask(set_masklen(ip, 64)) & ip) + 0xFFFF + WHERE time_deleted is null; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index e40c97972f..2105caabef 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -108,7 +108,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled ( ip INET NOT NULL, port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, - /* The last address allocated to an Oxide service on this sled. */ + /* The last address allocated to a propolis instance on this sled. */ last_used_address INET NOT NULL, /* The state of whether resources should be provisioned onto the sled */ @@ -3258,7 +3258,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '23.0.1', NULL) + ( TRUE, NOW(), NOW(), '24.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 05284564f82d835c06ad9ee822a498a99ab08fe7 Mon Sep 17 00:00:00 2001 From: Michael Zeller Date: Wed, 10 Jan 2024 19:08:21 -0500 Subject: [PATCH 12/24] The control plane should use libipcc (#4536) We should use libipcc in omicron rather than make direct calls via `ioctl`. The library will take care of hiding the implementation details from upstream consumers -- this becomes important in the future when communication with the service processor from the host OS physically changes with newer board design. Currently the only consumer in this repo is installinator but the control plane is about to start communicating with the RoT via IPCC as well. This PR introduces new bindings around `libipcc` and removes the old `ioctl` interfaces. --------- Co-authored-by: Andy Fiddaman --- .cargo/config | 45 ++++++- .github/buildomat/build-and-test.sh | 9 ++ Cargo.lock | 7 +- Cargo.toml | 6 +- gateway/Cargo.toml | 2 +- gateway/src/http_entrypoints.rs | 13 +- gateway/src/http_entrypoints/conversions.rs | 2 +- installinator/Cargo.toml | 2 +- installinator/src/artifact.rs | 4 +- ipcc-key-value/src/ioctl.rs | 126 ------------------- ipcc-key-value/src/ioctl_common.rs | 57 --------- ipcc-key-value/src/ioctl_stub.rs | 32 ----- {ipcc-key-value => ipcc}/Cargo.toml | 3 +- ipcc/build.rs | 16 +++ ipcc/src/ffi.rs | 83 +++++++++++++ ipcc/src/handle.rs | 129 ++++++++++++++++++++ ipcc/src/handle_stub.rs | 25 ++++ {ipcc-key-value => ipcc}/src/lib.rs | 116 +++++++++++++++--- openapi/gateway.json | 2 +- 19 files changed, 420 insertions(+), 259 deletions(-) delete mode 100644 ipcc-key-value/src/ioctl.rs delete mode 100644 ipcc-key-value/src/ioctl_common.rs delete mode 100644 ipcc-key-value/src/ioctl_stub.rs rename {ipcc-key-value => ipcc}/Cargo.toml (91%) create mode 100644 ipcc/build.rs create mode 100644 ipcc/src/ffi.rs create mode 100644 ipcc/src/handle.rs create mode 100644 ipcc/src/handle_stub.rs rename {ipcc-key-value => ipcc}/src/lib.rs (75%) diff --git a/.cargo/config b/.cargo/config index 6794e988ad..f658f146c9 100644 --- a/.cargo/config +++ b/.cargo/config @@ -7,13 +7,50 @@ [build] rustdocflags = "--document-private-items" -# On illumos, use `-znocompstrtab` to reduce link time. +# On illumos, use `-znocompstrtab` to reduce link time. We also add the Oxide +# specific platform directory to the RPATH where additional libraries can be +# found such as libipcc. # -# Note that these flags are overridden by a user's environment variable, so -# things critical to correctness probably don't belong here. +# Our reasoning for including `-R/usr/platform/oxide/lib/amd64` here: +# - Oxide specific features - This path contains Oxide specific libraries such +# as libipcc and will likely grow over time to include more functionality. +# - Avoid the rpaths crate - The rpaths crate was built to deal with runtime +# paths that are dynamic such as with libraries like libpq which can live in +# different locations based on OS. This path will only ever be found on +# illumos and will be tied directly to the Oxide platform. +# - Less developer burden - Having something like the ipcc crate emit +# `DEP_IPCC_LIBDIRS` means that we end up littering the repo with Cargo.toml +# and build.rs changes whenever ipcc shows up somewhere in the dependency +# tree. While initially exploring that path we ran into a significant number +# of tests failing due to not being able to find libipcc in the runtime path +# which can be confusing or surprising to someone doing work on omicron. +# +# We could also update Helios so that a symlink is created from +# /usr/platform/oxide/lib/amd64 into /usr/lib/64 but we opted to not take +# this route forward as it meant waiting for another round of updates on +# shared build machines and to buildomat itself. +# +# As documented at: +# https://doc.rust-lang.org/cargo/reference/config.html#buildrustflags +# +# There are four mutually exclusive sources of extra flags. They are checked in +# order, with the first one being used: +# 1. `CARGO_ENCODED_RUSTFLAGS` environment variable. +# 2. `RUSTFLAGS` environment variable. +# 3. All matching target..rustflags and target..rustflags config +# entries joined together. +# 4. build.rustflags config value. +# +# When overriding the defaults in this config by environment variable the user +# may need to manually pass the additional options found below. +# +# Note that other runtime paths should not be added here, but should instead +# reuse the infrastructure found in the `rpaths` crate which can be found in +# this repo. Those paths are usually platform specific and will vary based on a +# variety of things such as host OS. [target.x86_64-unknown-illumos] rustflags = [ - "-C", "link-arg=-Wl,-znocompstrtab" + "-C", "link-arg=-Wl,-znocompstrtab,-R/usr/platform/oxide/lib/amd64" ] # Set up `cargo xtask`. diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh index eab64c528c..5cf086b1a3 100755 --- a/.github/buildomat/build-and-test.sh +++ b/.github/buildomat/build-and-test.sh @@ -4,6 +4,8 @@ set -o errexit set -o pipefail set -o xtrace +target_os=$1 + # NOTE: This version should be in sync with the recommended version in # .config/nextest.toml. (Maybe build an automated way to pull the recommended # version in the future.) @@ -48,6 +50,13 @@ ptime -m bash ./tools/install_builder_prerequisites.sh -y # banner build export RUSTFLAGS="-D warnings" +# When running on illumos we need to pass an additional runpath that is +# usually configured via ".cargo/config" but the `RUSTFLAGS` env variable +# takes precedence. This path contains oxide specific libraries such as +# libipcc. +if [[ $target_os == "illumos" ]]; then + RUSTFLAGS="-D warnings -C link-arg=-R/usr/platform/oxide/lib/amd64" +fi export RUSTDOCFLAGS="-D warnings" export TMPDIR=$TEST_TMPDIR export RUST_BACKTRACE=1 diff --git a/Cargo.lock b/Cargo.lock index a4157829af..7491f30dde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3306,7 +3306,7 @@ dependencies = [ "illumos-utils", "installinator-artifact-client", "installinator-common", - "ipcc-key-value", + "ipcc", "itertools 0.12.0", "libc", "omicron-common", @@ -3459,9 +3459,10 @@ dependencies = [ ] [[package]] -name = "ipcc-key-value" +name = "ipcc" version = "0.1.0" dependencies = [ + "cfg-if", "ciborium", "libc", "omicron-common", @@ -4712,7 +4713,7 @@ dependencies = [ "http", "hyper", "illumos-utils", - "ipcc-key-value", + "ipcc", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", diff --git a/Cargo.toml b/Cargo.toml index b2d0e406da..fbef04d3c0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ members = [ "installinator", "internal-dns-cli", "internal-dns", - "ipcc-key-value", + "ipcc", "key-manager", "nexus", "nexus/authz-macros", @@ -105,7 +105,7 @@ default-members = [ "installinator", "internal-dns-cli", "internal-dns", - "ipcc-key-value", + "ipcc", "key-manager", "nexus", "nexus/authz-macros", @@ -225,7 +225,7 @@ installinator-artifactd = { path = "installinator-artifactd" } installinator-artifact-client = { path = "clients/installinator-artifact-client" } installinator-common = { path = "installinator-common" } internal-dns = { path = "internal-dns" } -ipcc-key-value = { path = "ipcc-key-value" } +ipcc = { path = "ipcc" } ipnetwork = { version = "0.20", features = ["schemars"] } itertools = "0.12.0" key-manager = { path = "key-manager" } diff --git a/gateway/Cargo.toml b/gateway/Cargo.toml index f2e5f83a8a..450c4b445e 100644 --- a/gateway/Cargo.toml +++ b/gateway/Cargo.toml @@ -17,7 +17,7 @@ hex.workspace = true http.workspace = true hyper.workspace = true illumos-utils.workspace = true -ipcc-key-value.workspace = true +ipcc.workspace = true omicron-common.workspace = true once_cell.workspace = true schemars.workspace = true diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index e33e8dd4a6..b5a765a8a8 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -443,11 +443,11 @@ pub struct ImageVersion { pub version: u32, } -// This type is a duplicate of the type in `ipcc-key-value`, and we provide a +// This type is a duplicate of the type in `ipcc`, and we provide a // `From<_>` impl to convert to it. We keep these types distinct to allow us to // choose different representations for MGS's HTTP API (this type) and the wire // format passed through the SP to installinator -// (`ipcc_key_value::InstallinatorImageId`), although _currently_ they happen to +// (`ipcc::InstallinatorImageId`), although _currently_ they happen to // be defined identically. #[derive( Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, @@ -1292,7 +1292,7 @@ async fn sp_power_state_set( /// Set the installinator image ID the sled should use for recovery. /// -/// This value can be read by the host via IPCC; see the `ipcc-key-value` crate. +/// This value can be read by the host via IPCC; see the `ipcc` crate. #[endpoint { method = PUT, path = "/sp/{type}/{slot}/ipcc/installinator-image-id", @@ -1302,14 +1302,13 @@ async fn sp_installinator_image_id_set( path: Path, body: TypedBody, ) -> Result { - use ipcc_key_value::Key; + use ipcc::Key; let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); let sp = apictx.mgmt_switch.sp(sp_id)?; - let image_id = - ipcc_key_value::InstallinatorImageId::from(body.into_inner()); + let image_id = ipcc::InstallinatorImageId::from(body.into_inner()); sp.set_ipcc_key_lookup_value( Key::InstallinatorImageId as u8, @@ -1330,7 +1329,7 @@ async fn sp_installinator_image_id_delete( rqctx: RequestContext>, path: Path, ) -> Result { - use ipcc_key_value::Key; + use ipcc::Key; let apictx = rqctx.context(); let sp_id = path.into_inner().sp.into(); diff --git a/gateway/src/http_entrypoints/conversions.rs b/gateway/src/http_entrypoints/conversions.rs index 1182163bcc..a4aef7425e 100644 --- a/gateway/src/http_entrypoints/conversions.rs +++ b/gateway/src/http_entrypoints/conversions.rs @@ -397,7 +397,7 @@ impl From for HostStartupOptions { } } -impl From for ipcc_key_value::InstallinatorImageId { +impl From for ipcc::InstallinatorImageId { fn from(id: InstallinatorImageId) -> Self { Self { update_id: id.update_id, diff --git a/installinator/Cargo.toml b/installinator/Cargo.toml index d489e73ec1..43966d1202 100644 --- a/installinator/Cargo.toml +++ b/installinator/Cargo.toml @@ -20,7 +20,7 @@ http.workspace = true illumos-utils.workspace = true installinator-artifact-client.workspace = true installinator-common.workspace = true -ipcc-key-value.workspace = true +ipcc.workspace = true itertools.workspace = true libc.workspace = true omicron-common.workspace = true diff --git a/installinator/src/artifact.rs b/installinator/src/artifact.rs index f74d7b7f06..734759a2c2 100644 --- a/installinator/src/artifact.rs +++ b/installinator/src/artifact.rs @@ -9,7 +9,7 @@ use clap::Args; use futures::StreamExt; use installinator_artifact_client::ClientError; use installinator_common::EventReport; -use ipcc_key_value::{InstallinatorImageId, Ipcc}; +use ipcc::{InstallinatorImageId, Ipcc}; use omicron_common::update::{ArtifactHash, ArtifactHashId}; use tokio::sync::mpsc; use uuid::Uuid; @@ -47,7 +47,7 @@ pub(crate) struct ArtifactIdOpts { impl ArtifactIdOpts { pub(crate) fn resolve(&self) -> Result { if self.from_ipcc { - let ipcc = Ipcc::open().context("error opening IPCC")?; + let ipcc = Ipcc::new().context("error opening IPCC")?; ipcc.installinator_image_id() .context("error retrieving installinator image ID") } else { diff --git a/ipcc-key-value/src/ioctl.rs b/ipcc-key-value/src/ioctl.rs deleted file mode 100644 index b0524e973f..0000000000 --- a/ipcc-key-value/src/ioctl.rs +++ /dev/null @@ -1,126 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! IPCC `ioctl` interface. -//! -//! This module is tightly-coupled to the host OS image, and should only be used -//! by services bundled with it (e.g., sled-agent and installinator). - -use crate::InstallinatorImageId; -use crate::InstallinatorImageIdError; -use crate::IpccKey; -use crate::IpccKeyLookupError; -use crate::PingError; -use crate::Pong; -use libc::c_int; -use std::fs::File; -use std::io; -use std::os::unix::prelude::AsRawFd; - -pub struct Ipcc { - file: File, -} - -impl Ipcc { - pub fn open() -> io::Result { - let file = File::options().read(true).write(true).open(IPCC_DEV)?; - Ok(Self { file }) - } - - pub fn ping(&self) -> Result { - const EXPECTED_REPLY: &[u8] = b"pong"; - - let mut buf = [0; EXPECTED_REPLY.len()]; - let n = self.key_lookup(IpccKey::Ping, &mut buf)?; - let buf = &buf[..n]; - - if buf == EXPECTED_REPLY { - Ok(Pong) - } else { - Err(PingError::UnexpectedReply(buf.to_vec())) - } - } - - pub fn installinator_image_id( - &self, - ) -> Result { - let mut buf = [0; InstallinatorImageId::CBOR_SERIALIZED_SIZE]; - let n = self.key_lookup(IpccKey::InstallinatorImageId, &mut buf)?; - let id = InstallinatorImageId::deserialize(&buf[..n]) - .map_err(InstallinatorImageIdError::DeserializationFailed)?; - Ok(id) - } - - fn key_lookup( - &self, - key: IpccKey, - buf: &mut [u8], - ) -> Result { - let mut kl = IpccKeyLookup { - key: key as u8, - buflen: u16::try_from(buf.len()).unwrap_or(u16::MAX), - result: 0, - datalen: 0, - buf: buf.as_mut_ptr(), - }; - - let result = unsafe { - libc::ioctl( - self.file.as_raw_fd(), - IPCC_KEYLOOKUP, - &mut kl as *mut IpccKeyLookup, - ) - }; - - if result != 0 { - let error = io::Error::last_os_error(); - return Err(IpccKeyLookupError::IoctlFailed { key, error }); - } - - match kl.result { - IPCC_KEYLOOKUP_SUCCESS => Ok(usize::from(kl.datalen)), - IPCC_KEYLOOKUP_UNKNOWN_KEY => { - Err(IpccKeyLookupError::UnknownKey { key }) - } - IPCC_KEYLOOKUP_NO_VALUE => { - Err(IpccKeyLookupError::NoValueForKey { key }) - } - IPCC_KEYLOOKUP_BUFFER_TOO_SMALL => { - Err(IpccKeyLookupError::BufferTooSmallForValue { key }) - } - _ => Err(IpccKeyLookupError::UnknownResultValue { - key, - result: kl.result, - }), - } - } -} - -// -------------------------------------------------------------------- -// Constants and structures from stlouis `usr/src/uts/oxide/sys/ipcc.h` -// -------------------------------------------------------------------- - -const IPCC_DEV: &str = "/dev/ipcc"; - -const IPCC_IOC: c_int = - ((b'i' as c_int) << 24) | ((b'c' as c_int) << 16) | ((b'c' as c_int) << 8); - -const IPCC_KEYLOOKUP: c_int = IPCC_IOC | 4; - -const IPCC_KEYLOOKUP_SUCCESS: u8 = 0; -const IPCC_KEYLOOKUP_UNKNOWN_KEY: u8 = 1; -const IPCC_KEYLOOKUP_NO_VALUE: u8 = 2; -const IPCC_KEYLOOKUP_BUFFER_TOO_SMALL: u8 = 3; - -#[derive(Debug, Clone, Copy)] -#[repr(C)] -struct IpccKeyLookup { - key: u8, - buflen: u16, - result: u8, - datalen: u16, - buf: *mut u8, -} diff --git a/ipcc-key-value/src/ioctl_common.rs b/ipcc-key-value/src/ioctl_common.rs deleted file mode 100644 index 670cc7bff2..0000000000 --- a/ipcc-key-value/src/ioctl_common.rs +++ /dev/null @@ -1,57 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! This module contains types shared between the real (illumos-only) -//! `crate::ioctl` module and the generic `crate::ioctl_stub` module. - -use std::io; -use thiserror::Error; - -/// IPCC keys; the source of truth for these is RFD 316 + the -/// `host-sp-messages` crate in hubris. -#[derive(Debug, Clone, Copy)] -#[repr(u8)] -pub enum IpccKey { - Ping = 0, - InstallinatorImageId = 1, -} - -#[derive(Debug, Error)] -pub enum IpccKeyLookupError { - #[error("IPCC key lookup ioctl failed for key {key:?}: {error}")] - IoctlFailed { key: IpccKey, error: io::Error }, - #[error("IPCC key lookup failed for key {key:?}: unknown key")] - UnknownKey { key: IpccKey }, - #[error("IPCC key lookup failed for key {key:?}: no value for key")] - NoValueForKey { key: IpccKey }, - #[error( - "IPCC key lookup failed for key {key:?}: buffer too small for value" - )] - BufferTooSmallForValue { key: IpccKey }, - #[error( - "IPCC key lookup failed for key {key:?}: unknown result value {result}" - )] - UnknownResultValue { key: IpccKey, result: u8 }, -} - -#[derive(Debug, Error)] -pub enum InstallinatorImageIdError { - #[error(transparent)] - IpccKeyLookupError(#[from] IpccKeyLookupError), - #[error("deserializing installinator image ID failed: {0}")] - DeserializationFailed(String), -} - -#[derive(Debug, Error)] -pub enum PingError { - #[error(transparent)] - IpccKeyLookupError(#[from] IpccKeyLookupError), - #[error("unexpected reply from SP (expected `pong`: {0:?})")] - UnexpectedReply(Vec), -} - -#[derive(Debug, Clone, Copy)] -pub struct Pong; diff --git a/ipcc-key-value/src/ioctl_stub.rs b/ipcc-key-value/src/ioctl_stub.rs deleted file mode 100644 index cbf54b3eb4..0000000000 --- a/ipcc-key-value/src/ioctl_stub.rs +++ /dev/null @@ -1,32 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -// Copyright 2023 Oxide Computer Company - -//! Stub definition of the `Ipcc` type for compiling (but not running) on -//! non-Oxide systems. - -use crate::InstallinatorImageId; -use crate::InstallinatorImageIdError; -use crate::PingError; -use crate::Pong; -use std::io; - -pub struct Ipcc {} - -impl Ipcc { - pub fn open() -> io::Result { - panic!("ipcc unavailable on this platform") - } - - pub fn ping(&self) -> Result { - panic!("ipcc unavailable on this platform") - } - - pub fn installinator_image_id( - &self, - ) -> Result { - panic!("ipcc unavailable on this platform") - } -} diff --git a/ipcc-key-value/Cargo.toml b/ipcc/Cargo.toml similarity index 91% rename from ipcc-key-value/Cargo.toml rename to ipcc/Cargo.toml index 04aea9f939..98a781ab86 100644 --- a/ipcc-key-value/Cargo.toml +++ b/ipcc/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "ipcc-key-value" +name = "ipcc" version = "0.1.0" edition = "2021" license = "MPL-2.0" @@ -12,6 +12,7 @@ serde.workspace = true thiserror.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +cfg-if.workspace = true [dev-dependencies] omicron-common = { workspace = true, features = ["testing"] } diff --git a/ipcc/build.rs b/ipcc/build.rs new file mode 100644 index 0000000000..a64133dac2 --- /dev/null +++ b/ipcc/build.rs @@ -0,0 +1,16 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +/// This path is where Oxide specific libraries live on helios systems. +#[cfg(target_os = "illumos")] +static OXIDE_PLATFORM: &str = "/usr/platform/oxide/lib/amd64/"; + +fn main() { + println!("cargo:rerun-if-changed=build.rs"); + #[cfg(target_os = "illumos")] + { + println!("cargo:rustc-link-arg=-Wl,-R{}", OXIDE_PLATFORM); + println!("cargo:rustc-link-search={}", OXIDE_PLATFORM); + } +} diff --git a/ipcc/src/ffi.rs b/ipcc/src/ffi.rs new file mode 100644 index 0000000000..420c1ddcde --- /dev/null +++ b/ipcc/src/ffi.rs @@ -0,0 +1,83 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +use std::ffi::{c_char, c_int, c_uint}; + +/// Opaque libipcc handle +#[repr(C)] +pub(crate) struct libipcc_handle_t { + _data: [u8; 0], + _marker: core::marker::PhantomData<(*mut u8, core::marker::PhantomPinned)>, +} + +/// Indicates that there was no error. Used as the initialized value when +/// calling into libipcc. +pub(crate) const LIBIPCC_ERR_OK: libipcc_err_t = 0; + +/// Indicates that there was a memory allocation error. The system error +/// contains the specific errno. +pub(crate) const LIBIPCC_ERR_NO_MEM: libipcc_err_t = 1; + +/// One of the function parameters does not pass validation. There will be more +/// detail available via libipcc_errmsg(). +pub(crate) const LIBIPCC_ERR_INVALID_PARAM: libipcc_err_t = 2; + +/// An internal error occurred. There will be more detail available via +/// libipcc_errmsg() and libipcc_syserr(). +pub(crate) const LIBIPCC_ERR_INTERNAL: libipcc_err_t = 3; + +/// The requested lookup key was not known to the SP. +pub(crate) const LIBIPCC_ERR_KEY_UNKNOWN: libipcc_err_t = 4; + +/// The value for the requested lookup key was too large for the +/// supplied buffer. +pub(crate) const LIBIPCC_ERR_KEY_BUFTOOSMALL: libipcc_err_t = 5; + +/// An attempt to write to a key failed because the key is read-only. +pub(crate) const LIBIPCC_ERR_KEY_READONLY: libipcc_err_t = 6; + +/// An attempt to write to a key failed because the passed value is too +/// long. +pub(crate) const LIBIPCC_ERR_KEY_VALTOOLONG: libipcc_err_t = 7; + +/// Compression or decompression failed. If appropriate, libipcc_syserr() will +/// return the Z_ error from zlib. +pub(crate) const LIBIPCC_ERR_KEY_ZERR: libipcc_err_t = 8; +pub(crate) type libipcc_err_t = c_uint; + +/// Maxium length of an error message retrieved by libipcc_errmsg(). +pub(crate) const LIBIPCC_ERR_LEN: usize = 1024; + +/// Flags that can be passed to libipcc when looking up a key. Today this is +/// used for looking up a compressed key, however nothing in the public API of +/// this crate takes advantage of this. +pub(crate) type libipcc_key_flag_t = ::std::os::raw::c_uint; + +#[link(name = "ipcc")] +extern "C" { + pub(crate) fn libipcc_init( + lihp: *mut *mut libipcc_handle_t, + libipcc_errp: *mut libipcc_err_t, + syserrp: *mut c_int, + errmsg: *const c_char, + errlen: usize, + ) -> bool; + pub(crate) fn libipcc_fini(lih: *mut libipcc_handle_t); + pub(crate) fn libipcc_err(lih: *mut libipcc_handle_t) -> libipcc_err_t; + pub(crate) fn libipcc_syserr(lih: *mut libipcc_handle_t) -> c_int; + pub(crate) fn libipcc_errmsg(lih: *mut libipcc_handle_t) -> *const c_char; + pub(crate) fn libipcc_keylookup( + lih: *mut libipcc_handle_t, + key: u8, + bufp: *mut *mut u8, + lenp: *mut usize, + flags: libipcc_key_flag_t, + ) -> bool; +} diff --git a/ipcc/src/handle.rs b/ipcc/src/handle.rs new file mode 100644 index 0000000000..91b71a6ce3 --- /dev/null +++ b/ipcc/src/handle.rs @@ -0,0 +1,129 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +use std::{ + ffi::{c_int, CStr, CString}, + ptr, +}; + +use crate::IpccError; +use crate::{ffi::*, IpccErrorInner}; + +pub struct IpccHandle(*mut libipcc_handle_t); + +impl Drop for IpccHandle { + fn drop(&mut self) { + unsafe { + libipcc_fini(self.0); + } + } +} +fn ipcc_fatal_error>( + context: C, + lerr: libipcc_err_t, + syserr: c_int, + errmsg: CString, +) -> IpccError { + let context = context.into(); + let syserr = if syserr == 0 { + "no system errno".to_string() + } else { + std::io::Error::from_raw_os_error(syserr).to_string() + }; + let inner = IpccErrorInner { + context, + errmsg: errmsg.to_string_lossy().into_owned(), + syserr, + }; + match lerr { + LIBIPCC_ERR_OK => panic!("called fatal on LIBIPCC_ERR_OK"), + LIBIPCC_ERR_NO_MEM => IpccError::NoMem(inner), + LIBIPCC_ERR_INVALID_PARAM => IpccError::InvalidParam(inner), + LIBIPCC_ERR_INTERNAL => IpccError::Internal(inner), + LIBIPCC_ERR_KEY_UNKNOWN => IpccError::KeyUnknown(inner), + LIBIPCC_ERR_KEY_BUFTOOSMALL => IpccError::KeyBufTooSmall(inner), + LIBIPCC_ERR_KEY_READONLY => IpccError::KeyReadonly(inner), + LIBIPCC_ERR_KEY_VALTOOLONG => IpccError::KeyValTooLong(inner), + LIBIPCC_ERR_KEY_ZERR => IpccError::KeyZerr(inner), + _ => IpccError::UnknownErr(inner), + } +} + +impl IpccHandle { + pub fn new() -> Result { + let mut ipcc_handle: *mut libipcc_handle_t = ptr::null_mut(); + // We subtract 1 from the length of the inital vector since CString::new + // will append a nul for us. + // Safety: Unwrapped because we guarantee that the supplied bytes + // contain no 0 bytes up front. + let errmsg = CString::new(vec![1; LIBIPCC_ERR_LEN - 1]).unwrap(); + let errmsg_len = errmsg.as_bytes().len(); + let errmsg_ptr = errmsg.into_raw(); + let mut lerr = LIBIPCC_ERR_OK; + let mut syserr = 0; + if !unsafe { + libipcc_init( + &mut ipcc_handle, + &mut lerr, + &mut syserr, + errmsg_ptr, + errmsg_len, + ) + } { + // Safety: CString::from_raw retakes ownership of a CString + // transferred to C via CString::into_raw. We are calling into_raw() + // above so it is safe to turn this back into it's owned variant. + let errmsg = unsafe { CString::from_raw(errmsg_ptr) }; + return Err(ipcc_fatal_error( + "Could not init libipcc handle", + lerr, + syserr, + errmsg, + )); + } + + Ok(IpccHandle(ipcc_handle)) + } + + fn fatal>(&self, context: C) -> IpccError { + let lerr = unsafe { libipcc_err(self.0) }; + let errmsg = unsafe { libipcc_errmsg(self.0) }; + // Safety: CStr::from_ptr is documented as safe if: + // 1. The pointer contains a valid null terminator at the end of + // the string + // 2. The pointer is valid for reads of bytes up to and including + // the null terminator + // 3. The memory referenced by the return CStr is not mutated for + // the duration of lifetime 'a + // + // (1) is true because this crate initializes space for an error message + // via CString::new which adds a terminator on our behalf. + // (2) should be guaranteed by libipcc itself since it is writing error + // messages into the CString backed buffer that we gave it. + // (3) We aren't currently mutating the memory referenced by the + // CStr, and we are creating an owned copy of the data immediately so + // that it can outlive the lifetime of the libipcc handle if needed. + let errmsg = unsafe { CStr::from_ptr(errmsg) }.to_owned(); + let syserr = unsafe { libipcc_syserr(self.0) }; + ipcc_fatal_error(context, lerr, syserr, errmsg) + } + + pub(crate) fn key_lookup( + &self, + key: u8, + buf: &mut [u8], + ) -> Result { + let mut lenp = buf.len(); + + if !unsafe { + libipcc_keylookup(self.0, key, &mut buf.as_mut_ptr(), &mut lenp, 0) + } { + return Err(self.fatal(format!("lookup of key {key} failed"))); + } + + Ok(lenp) + } +} diff --git a/ipcc/src/handle_stub.rs b/ipcc/src/handle_stub.rs new file mode 100644 index 0000000000..bc4b84b7fe --- /dev/null +++ b/ipcc/src/handle_stub.rs @@ -0,0 +1,25 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// Copyright 2023 Oxide Computer Company + +use crate::IpccError; + +/// This stub and it's implementation are used for non-illumos platforms which +/// lack libipcc. +pub struct IpccHandle; + +impl IpccHandle { + pub fn new() -> Result { + panic!("ipcc unavailable on this platform") + } + + pub(crate) fn key_lookup( + &self, + _key: u8, + _buf: &mut [u8], + ) -> Result { + panic!("ipcc unavailable on this platform") + } +} diff --git a/ipcc-key-value/src/lib.rs b/ipcc/src/lib.rs similarity index 75% rename from ipcc-key-value/src/lib.rs rename to ipcc/src/lib.rs index 16e6685018..e997c51230 100644 --- a/ipcc-key-value/src/lib.rs +++ b/ipcc/src/lib.rs @@ -4,26 +4,28 @@ // Copyright 2023 Oxide Computer Company -//! Utilities for key/value pairs passed from the control plane to the SP -//! (through MGS) to the host (through the host/SP uart) via IPCC. +//! An interface to libipcc (inter-processor communications channel) which +//! currently supports looking up values stored in the SP by key. These +//! values are variously static, passed from the control plane to the SP +//! (through MGS) or set from userland via libipcc. +use cfg_if::cfg_if; use omicron_common::update::ArtifactHash; use serde::Deserialize; use serde::Serialize; +use thiserror::Error; use uuid::Uuid; -mod ioctl_common; -pub use ioctl_common::*; - -#[cfg(target_os = "illumos")] -mod ioctl; -#[cfg(target_os = "illumos")] -pub use ioctl::Ipcc; - -#[cfg(not(target_os = "illumos"))] -mod ioctl_stub; -#[cfg(not(target_os = "illumos"))] -pub use ioctl_stub::Ipcc; +cfg_if! { + if #[cfg(target_os = "illumos")] { + mod ffi; + mod handle; + use handle::IpccHandle; + } else { + mod handle_stub; + use handle_stub::IpccHandle; + } +} #[cfg(test)] use proptest::arbitrary::any; @@ -38,9 +40,9 @@ use proptest::strategy::Strategy; #[repr(u8)] pub enum Key { /// Always responds `"pong"`. - Ping = 0, + Ping = IpccKey::Ping as u8, /// The value should be an encoded [`InstallinatorImageId`]. - InstallinatorImageId = 1, + InstallinatorImageId = IpccKey::InstallinatorImageId as u8, } /// Description of the images `installinator` needs to fetch from a peer on the @@ -135,10 +137,84 @@ impl InstallinatorImageId { } } -// TODO Add ioctl wrappers? `installinator` is the only client for -// `Key::InstallinatorImageId`, but we might grow other keys for other clients, -// at which point we probably want all the ioctl wrapping to happen in one -// place. +#[derive(Debug, Error)] +pub enum InstallinatorImageIdError { + #[error(transparent)] + Ipcc(#[from] IpccError), + #[error("deserializing installinator image ID failed: {0}")] + DeserializationFailed(String), +} + +#[derive(Error, Debug)] +pub enum IpccError { + #[error("Memory allocation error")] + NoMem(#[source] IpccErrorInner), + #[error("Invalid parameter")] + InvalidParam(#[source] IpccErrorInner), + #[error("Internal error occurred")] + Internal(#[source] IpccErrorInner), + #[error("Requested lookup key was not known to the SP")] + KeyUnknown(#[source] IpccErrorInner), + #[error("Value for the requested lookup key was too large for the supplied buffer")] + KeyBufTooSmall(#[source] IpccErrorInner), + #[error("Attempted to write to read-only key")] + KeyReadonly(#[source] IpccErrorInner), + #[error("Attempted write to key failed because the value is too long")] + KeyValTooLong(#[source] IpccErrorInner), + #[error("Compression or decompression failed")] + KeyZerr(#[source] IpccErrorInner), + #[error("Unknown libipcc error")] + UnknownErr(#[source] IpccErrorInner), +} + +#[derive(Error, Debug)] +#[error("{context}: {errmsg} ({syserr})")] +pub struct IpccErrorInner { + pub context: String, + pub errmsg: String, + pub syserr: String, +} + +/// These are the IPCC keys we can look up. +/// NB: These keys match the definitions found in libipcc (RFD 316) and should +/// match the values in `[ipcc::Key]` one-to-one. +#[derive(Debug, Clone, Copy)] +#[allow(dead_code)] +#[repr(u8)] +enum IpccKey { + Ping = 0, + InstallinatorImageId = 1, + Inventory = 2, + System = 3, + Dtrace = 4, +} + +/// Interface to the inter-processor communications channel. +/// For more information see rfd 316. +pub struct Ipcc { + handle: IpccHandle, +} + +impl Ipcc { + /// Creates a new `Ipcc` instance. + pub fn new() -> Result { + let handle = IpccHandle::new()?; + Ok(Self { handle }) + } + + /// Returns the current `InstallinatorImageId`. + pub fn installinator_image_id( + &self, + ) -> Result { + let mut buf = [0; InstallinatorImageId::CBOR_SERIALIZED_SIZE]; + let n = self + .handle + .key_lookup(IpccKey::InstallinatorImageId as u8, &mut buf)?; + let id = InstallinatorImageId::deserialize(&buf[..n]) + .map_err(InstallinatorImageIdError::DeserializationFailed)?; + Ok(id) + } +} #[cfg(test)] mod tests { diff --git a/openapi/gateway.json b/openapi/gateway.json index 9eacbe122d..5961b670ed 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -1129,7 +1129,7 @@ "/sp/{type}/{slot}/ipcc/installinator-image-id": { "put": { "summary": "Set the installinator image ID the sled should use for recovery.", - "description": "This value can be read by the host via IPCC; see the `ipcc-key-value` crate.", + "description": "This value can be read by the host via IPCC; see the `ipcc` crate.", "operationId": "sp_installinator_image_id_set", "parameters": [ { From 28957f1cab39da836043a891d0b62229560413d1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 05:23:44 +0000 Subject: [PATCH 13/24] Update taiki-e/install-action digest to a6173a9 (#4797) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`2f4c386` -> `a6173a9`](https://togithub.com/taiki-e/install-action/compare/2f4c386...a6173a9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index f758bd79b9..c940f21fb2 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@2f4c386a81aeab009d470320dfc6e0930ee4e064 # v2 + uses: taiki-e/install-action@a6173a9cbc8927eb1def26c72d123d297efb1b10 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From aa3f26dd35b9346e698d58a86170b13cf8046ccb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 07:03:52 +0000 Subject: [PATCH 14/24] Update Rust crate vsss-rs to 3.3.4 (#4799) --- Cargo.lock | 4 ++-- bootstore/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7491f30dde..3cf8bfd887 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9527,9 +9527,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "vsss-rs" -version = "3.3.2" +version = "3.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a18cf462590a38451396633ef9771f3e9afcf439309137fd6c6eaaec0fb38252" +checksum = "196bbee60607a195bc850e94f0e040bd090e45794ad8df0e9c5a422b9975a00f" dependencies = [ "curve25519-dalek", "elliptic-curve", diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 1eade5afe8..37280f6dcb 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -27,7 +27,7 @@ slog.workspace = true thiserror.workspace = true tokio.workspace = true uuid.workspace = true -vsss-rs = { version = "3.3.2", features = ["std", "curve25519"] } +vsss-rs = { version = "3.3.4", features = ["std", "curve25519"] } zeroize.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. From 295b202f06dde1f05cc49db9381a596c2c742660 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 08:16:49 +0000 Subject: [PATCH 15/24] Update Swatinem/rust-cache action to v2.7.2 (#4800) --- .github/workflows/rust.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 23ccc7e61f..fff5f3e6c2 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -34,7 +34,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -64,7 +64,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version @@ -94,7 +94,7 @@ jobs: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 + - uses: Swatinem/rust-cache@a22603398250b864f7190077025cf752307154dc # v2.7.2 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version run: cargo --version From 1416ee06f367fc1e8aa6c28d65b3b88a7300c79a Mon Sep 17 00:00:00 2001 From: Greg Colombo Date: Thu, 11 Jan 2024 08:37:51 -0800 Subject: [PATCH 16/24] add --include-deleted switch to omdb db commands (#4648) omdb's database commands are slightly inconsistent about whether they include soft-deleted records in their outputs: most queries don't, but instances do. Add a command-line option for this (defaulting to "don't include") and update subcommands that construct their own queries to honor it. The exceptions are (a) subcommands that call datastore functions directly (these keep their existing behavior) and (b) validation commands that e.g. reason about reference graphs involving deleted and not-deleted objects. Also, clean up a couple of callers of `check_limit` who weren't actually setting a LIMIT clause in their queries. (The check is harmless in this case, but will produce spurious warnings if the queries involved return exactly the right number of rows.) Tested by running `omdb db` commands on the dogfood rack. Fixes #4647. --- dev-tools/omdb/src/bin/omdb/db.rs | 179 +++++++++++++++----------- dev-tools/omdb/tests/usage_errors.out | 4 + 2 files changed, 111 insertions(+), 72 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index f58fd57b9d..23e9206506 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -133,6 +133,15 @@ pub struct DbArgs { #[clap(long, env("OMDB_DB_URL"))] db_url: Option, + #[clap(flatten)] + fetch_opts: DbFetchOptions, + + #[command(subcommand)] + command: DbCommands, +} + +#[derive(Debug, Args)] +pub struct DbFetchOptions { /// limit to apply to queries that fetch rows #[clap( long = "fetch-limit", @@ -141,8 +150,10 @@ pub struct DbArgs { )] fetch_limit: NonZeroU32, - #[command(subcommand)] - command: DbCommands, + /// whether to include soft-deleted records when enumerating objects that + /// can be soft-deleted + #[clap(long, default_value_t = false)] + include_deleted: bool, } /// Subcommands that query or update the database @@ -398,30 +409,30 @@ impl DbArgs { command: DiskCommands::Info(uuid), }) => cmd_db_disk_info(&opctx, &datastore, uuid).await, DbCommands::Disks(DiskArgs { command: DiskCommands::List }) => { - cmd_db_disk_list(&datastore, self.fetch_limit).await + cmd_db_disk_list(&datastore, &self.fetch_opts).await } DbCommands::Disks(DiskArgs { command: DiskCommands::Physical(uuid), }) => { - cmd_db_disk_physical(&opctx, &datastore, self.fetch_limit, uuid) + cmd_db_disk_physical(&opctx, &datastore, &self.fetch_opts, uuid) .await } DbCommands::Dns(DnsArgs { command: DnsCommands::Show }) => { - cmd_db_dns_show(&opctx, &datastore, self.fetch_limit).await + cmd_db_dns_show(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Dns(DnsArgs { command: DnsCommands::Diff(args) }) => { - cmd_db_dns_diff(&opctx, &datastore, self.fetch_limit, args) + cmd_db_dns_diff(&opctx, &datastore, &self.fetch_opts, args) .await } DbCommands::Dns(DnsArgs { command: DnsCommands::Names(args) }) => { - cmd_db_dns_names(&opctx, &datastore, self.fetch_limit, args) + cmd_db_dns_names(&opctx, &datastore, &self.fetch_opts, args) .await } DbCommands::Inventory(inventory_args) => { cmd_db_inventory( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, inventory_args, ) .await @@ -432,7 +443,7 @@ impl DbArgs { cmd_db_services_list_instances( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, ) .await } @@ -442,21 +453,21 @@ impl DbArgs { cmd_db_services_list_by_sled( &opctx, &datastore, - self.fetch_limit, + &self.fetch_opts, ) .await } DbCommands::Sleds => { - cmd_db_sleds(&opctx, &datastore, self.fetch_limit).await + cmd_db_sleds(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Instances => { - cmd_db_instances(&opctx, &datastore, self.fetch_limit).await + cmd_db_instances(&opctx, &datastore, &self.fetch_opts).await } DbCommands::Network(NetworkArgs { command: NetworkCommands::ListEips, verbose, }) => { - cmd_db_eips(&opctx, &datastore, self.fetch_limit, *verbose) + cmd_db_eips(&opctx, &datastore, &self.fetch_opts, *verbose) .await } DbCommands::Snapshots(SnapshotArgs { @@ -464,19 +475,13 @@ impl DbArgs { }) => cmd_db_snapshot_info(&opctx, &datastore, uuid).await, DbCommands::Snapshots(SnapshotArgs { command: SnapshotCommands::List, - }) => cmd_db_snapshot_list(&datastore, self.fetch_limit).await, + }) => cmd_db_snapshot_list(&datastore, &self.fetch_opts).await, DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateVolumeReferences, - }) => { - cmd_db_validate_volume_references(&datastore, self.fetch_limit) - .await - } + }) => cmd_db_validate_volume_references(&datastore).await, DbCommands::Validate(ValidateArgs { command: ValidateCommands::ValidateRegionSnapshots, - }) => { - cmd_db_validate_region_snapshots(&datastore, self.fetch_limit) - .await - } + }) => cmd_db_validate_region_snapshots(&datastore).await, } } } @@ -564,7 +569,7 @@ fn first_page<'a, T>(limit: NonZeroU32) -> DataPageParams<'a, T> { /// Run `omdb db disk list`. async fn cmd_db_disk_list( datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -579,15 +584,19 @@ async fn cmd_db_disk_list( let ctx = || "listing disks".to_string(); use db::schema::disk::dsl; - let disks = dsl::disk - .filter(dsl::time_deleted.is_null()) - .limit(i64::from(u32::from(limit))) + let mut query = dsl::disk.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let disks = query + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .select(Disk::as_select()) .load_async(&*datastore.pool_connection_for_tests().await?) .await .context("loading disks")?; - check_limit(&disks, limit, ctx); + check_limit(&disks, fetch_opts.fetch_limit, ctx); let rows = disks.into_iter().map(|disk| DiskRow { name: disk.name().to_string(), @@ -777,15 +786,19 @@ async fn cmd_db_disk_info( async fn cmd_db_disk_physical( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DiskPhysicalArgs, ) -> Result<(), anyhow::Error> { let conn = datastore.pool_connection_for_tests().await?; // We start by finding any zpools that are using the physical disk. use db::schema::zpool::dsl as zpool_dsl; - let zpools = zpool_dsl::zpool - .filter(zpool_dsl::time_deleted.is_null()) + let mut query = zpool_dsl::zpool.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(zpool_dsl::time_deleted.is_null()); + } + + let zpools = query .filter(zpool_dsl::physical_disk_id.eq(args.uuid)) .select(Zpool::as_select()) .load_async(&*conn) @@ -799,6 +812,7 @@ async fn cmd_db_disk_physical( println!("Found no zpools on physical disk UUID {}", args.uuid); return Ok(()); } + // The current plan is a single zpool per physical disk, so we expect that // this will have a single item. However, If single zpool per disk ever // changes, this code will still work. @@ -808,8 +822,12 @@ async fn cmd_db_disk_physical( // Next, we find all the datasets that are on our zpool. use db::schema::dataset::dsl as dataset_dsl; - let datasets = dataset_dsl::dataset - .filter(dataset_dsl::time_deleted.is_null()) + let mut query = dataset_dsl::dataset.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dataset_dsl::time_deleted.is_null()); + } + + let datasets = query .filter(dataset_dsl::pool_id.eq(zp.id())) .select(Dataset::as_select()) .load_async(&*conn) @@ -862,16 +880,20 @@ async fn cmd_db_disk_physical( // to find the virtual disks associated with these volume IDs and // display information about those disks. use db::schema::disk::dsl; - let disks = dsl::disk - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::disk.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let disks = query .filter(dsl::volume_id.eq_any(volume_ids)) - .limit(i64::from(u32::from(limit))) + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .select(Disk::as_select()) .load_async(&*conn) .await .context("loading disks")?; - check_limit(&disks, limit, || "listing disks".to_string()); + check_limit(&disks, fetch_opts.fetch_limit, || "listing disks".to_string()); #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -924,6 +946,7 @@ async fn cmd_db_disk_physical( println!("{}", table); // Collect the region_snapshots associated with the dataset IDs + let limit = fetch_opts.fetch_limit; use db::schema::region_snapshot::dsl as region_snapshot_dsl; let region_snapshots = region_snapshot_dsl::region_snapshot .filter(region_snapshot_dsl::dataset_id.eq_any(dataset_ids)) @@ -972,8 +995,12 @@ async fn cmd_db_disk_physical( // Get the snapshots from the list of IDs we built above. // Display information about those snapshots. use db::schema::snapshot::dsl as snapshot_dsl; - let snapshots = snapshot_dsl::snapshot - .filter(snapshot_dsl::time_deleted.is_null()) + let mut query = snapshot_dsl::snapshot.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(snapshot_dsl::time_deleted.is_null()); + } + + let snapshots = query .filter(snapshot_dsl::id.eq_any(snapshot_ids)) .limit(i64::from(u32::from(limit))) .select(Snapshot::as_select()) @@ -1046,13 +1073,18 @@ impl From for SnapshotRow { /// Run `omdb db snapshot list`. async fn cmd_db_snapshot_list( datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { let ctx = || "listing snapshots".to_string(); + let limit = fetch_opts.fetch_limit; use db::schema::snapshot::dsl; - let snapshots = dsl::snapshot - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::snapshot.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let snapshots = query .limit(i64::from(u32::from(limit))) .select(Snapshot::as_select()) .load_async(&*datastore.pool_connection_for_tests().await?) @@ -1158,8 +1190,9 @@ async fn cmd_db_snapshot_info( async fn cmd_db_services_list_instances( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sled_list = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1223,8 +1256,9 @@ struct ServiceInstanceSledRow { async fn cmd_db_services_list_by_sled( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sled_list = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1299,8 +1333,9 @@ impl From for SledRow { async fn cmd_db_sleds( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let sleds = datastore .sled_list(&opctx, &first_page(limit)) .await @@ -1333,11 +1368,18 @@ struct CustomerInstanceRow { async fn cmd_db_instances( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { use db::schema::instance::dsl; use db::schema::vmm::dsl as vmm_dsl; - let instances: Vec = dsl::instance + + let limit = fetch_opts.fetch_limit; + let mut query = dsl::instance.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let instances: Vec = query .left_join( vmm_dsl::vmm.on(vmm_dsl::id .nullable() @@ -1408,7 +1450,7 @@ async fn cmd_db_instances( async fn cmd_db_dns_show( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, ) -> Result<(), anyhow::Error> { #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] @@ -1421,6 +1463,7 @@ async fn cmd_db_dns_show( reason: String, } + let limit = fetch_opts.fetch_limit; let mut rows = Vec::with_capacity(2); for group in [DnsGroup::Internal, DnsGroup::External] { let ctx = || format!("listing DNS zones for DNS group {:?}", group); @@ -1493,9 +1536,10 @@ async fn load_zones_version( async fn cmd_db_dns_diff( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DnsVersionArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let (dns_zones, version) = load_zones_version(opctx, datastore, limit, args).await?; @@ -1557,9 +1601,10 @@ async fn cmd_db_dns_diff( async fn cmd_db_dns_names( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, args: &DnsVersionArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let (group_zones, version) = load_zones_version(opctx, datastore, limit, args).await?; @@ -1606,17 +1651,24 @@ async fn cmd_db_dns_names( async fn cmd_db_eips( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, verbose: bool, ) -> Result<(), anyhow::Error> { use db::schema::external_ip::dsl; - let ips: Vec = dsl::external_ip - .filter(dsl::time_deleted.is_null()) + let mut query = dsl::external_ip.into_boxed(); + if !fetch_opts.include_deleted { + query = query.filter(dsl::time_deleted.is_null()); + } + + let ips: Vec = query .select(ExternalIp::as_select()) + .limit(i64::from(u32::from(fetch_opts.fetch_limit))) .get_results_async(&*datastore.pool_connection_for_tests().await?) .await?; - check_limit(&ips, limit, || String::from("listing external ips")); + check_limit(&ips, fetch_opts.fetch_limit, || { + String::from("listing external ips") + }); struct PortRange { first: u16, @@ -1756,7 +1808,6 @@ async fn cmd_db_eips( /// Validate the `volume_references` column of the region snapshots table async fn cmd_db_validate_volume_references( datastore: &DataStore, - limit: NonZeroU32, ) -> Result<(), anyhow::Error> { // First, get all region snapshot records let region_snapshots: Vec = { @@ -1775,10 +1826,6 @@ async fn cmd_db_validate_volume_references( }) .await?; - check_limit(®ion_snapshots, limit, || { - String::from("listing region snapshots") - }); - region_snapshots }; @@ -1823,10 +1870,6 @@ async fn cmd_db_validate_volume_references( }) .await?; - check_limit(&matching_volumes, limit, || { - String::from("finding matching volumes") - }); - matching_volumes }; @@ -1890,7 +1933,6 @@ async fn cmd_db_validate_volume_references( async fn cmd_db_validate_region_snapshots( datastore: &DataStore, - limit: NonZeroU32, ) -> Result<(), anyhow::Error> { let mut regions_to_snapshots_map: BTreeMap> = BTreeMap::default(); @@ -1922,10 +1964,6 @@ async fn cmd_db_validate_region_snapshots( }) .await?; - check_limit(&datasets_region_snapshots, limit, || { - String::from("listing datasets and region snapshots") - }); - datasets_region_snapshots }; @@ -2097,10 +2135,6 @@ async fn cmd_db_validate_region_snapshots( }) .await?; - check_limit(&datasets_and_regions, limit, || { - String::from("listing datasets and regions") - }); - datasets_and_regions }; @@ -2226,9 +2260,10 @@ fn format_record(record: &DnsRecord) -> impl Display { async fn cmd_db_inventory( opctx: &OpContext, datastore: &DataStore, - limit: NonZeroU32, + fetch_opts: &DbFetchOptions, inventory_args: &InventoryArgs, ) -> Result<(), anyhow::Error> { + let limit = fetch_opts.fetch_limit; let conn = datastore.pool_connection_for_tests().await?; match inventory_args.command { InventoryCommands::BaseboardIds => { diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index eaabf970a6..3c5f099c61 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -105,6 +105,8 @@ Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] --fetch-limit limit to apply to queries that fetch rows [env: OMDB_FETCH_LIMIT=] [default: 500] + --include-deleted whether to include soft-deleted records when enumerating objects + that can be soft-deleted -h, --help Print help ============================================= EXECUTING COMMAND: omdb ["db", "--help"] @@ -131,6 +133,8 @@ Options: --db-url URL of the database SQL interface [env: OMDB_DB_URL=] --fetch-limit limit to apply to queries that fetch rows [env: OMDB_FETCH_LIMIT=] [default: 500] + --include-deleted whether to include soft-deleted records when enumerating objects + that can be soft-deleted -h, --help Print help --------------------------------------------- stderr: From 19059b1bedc8bbc7a9d293486842a9a4fd264ea3 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 11 Jan 2024 10:03:22 -0800 Subject: [PATCH 17/24] Update Crucible and Propolis versions (#4795) Propolis changes since the last update: Gripe when using non-raw block device Update zerocopy dependency nvme: Wire up GetFeatures command Make Viona more robust in the face of errors bump softnpu (#577) Modernize 16550 UART Crucible changes since the last update: Don't check ROP if the scrub is done (#1093) Allow crutest cli to be quiet on generic test (#1070) Offload write encryption (#1066) Simplify handling of BlockReq at program exit (#1085) Update Rust crate byte-unit to v5 (#1054) Remove unused fields in match statements, downstairs edition (#1084) Remove unused fields in match statements and consolidate (#1083) Add logger to Guest (#1082) Drive hash / decrypt tests from Upstairs::apply Wait to reconnect if auto_promote is false Change guest work id from u64 -> GuestWorkId remove BlockOp::Commit (#1072) Various clippy fixes (#1071) Don't panic if tasks are destroyed out of order Update Rust crate reedline to 0.27.1 (#1074) Update Rust crate async-trait to 0.1.75 (#1073) Buffer should destructure to Vec when single-referenced Don't fail to make unencrypted regions (#1067) Fix shadowing in downstairs (#1063) Single-task refactoring (#1058) Update Rust crate tokio to 1.35 (#1052) Update Rust crate openapiv3 to 2.0.0 (#1050) Update Rust crate libc to 0.2.151 (#1049) Update Rust crate rusqlite to 0.30 (#1035) --------- Co-authored-by: Alan Hanson --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3cf8bfd887..5d378531ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "bhyve_api_sys", "libc", @@ -471,7 +471,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "libc", "strum", @@ -1312,7 +1312,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "anyhow", "chrono", @@ -1328,7 +1328,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "anyhow", "chrono", @@ -1345,7 +1345,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=fab27994d0bd12725c17d6b478a9bfc2673ad6f4#fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" dependencies = [ "crucible-workspace-hack", "libc", @@ -6283,7 +6283,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "async-trait", "base64", @@ -6304,7 +6304,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "anyhow", "atty", @@ -6334,7 +6334,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=f1571ce141421cff3d3328f43e7722f5df96fdda#f1571ce141421cff3d3328f43e7722f5df96fdda" +source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index fbef04d3c0..5cd13cc04b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -172,9 +172,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -295,9 +295,9 @@ prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "f1571ce141421cff3d3328f43e7722f5df96fdda" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 6bd40c320d..406b53c97e 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -396,10 +396,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "850b468c308cf63ef9e10addee36a923a91b7ab64af0fa0836130c830fb42863" +source.sha256 = "030a02551e487f561bcfad47426b953d15c4430d77770765c7fc03afd8d61bd9" output.type = "zone" [package.crucible-pantry] @@ -407,10 +407,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "fab27994d0bd12725c17d6b478a9bfc2673ad6f4" +source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "893f845caa5d9b146137b503e80d5615cbd6e9d393745e81e772b10a9072b58b" +source.sha256 = "c74e23e7f7995ba3a69a9ec3a31f1db517ec15cd3a9942c2c07621b219b743b2" output.type = "zone" # Refer to @@ -421,10 +421,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "f1571ce141421cff3d3328f43e7722f5df96fdda" +source.commit = "1e25649e8c2ac274bd04adfe0513dd14a482058c" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "6e2607f103419a6338936434f3e67afb7cbe14d6397f2d01982ba94b8d0182a9" +source.sha256 = "09c124315da3e434c85fe1ddb16459c36d8302e15705ff18fe6bbc7b4876f5f9" output.type = "zone" [package.mg-ddm-gz] From 3e2db6915df2c03284d15cc6da6790b813d6ed19 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 11 Jan 2024 13:36:44 -0600 Subject: [PATCH 18/24] Bump web console (page size 10 -> 25) (#4803) https://github.com/oxidecomputer/console/compare/bcc80258...367142c5 * [367142c5](https://github.com/oxidecomputer/console/commit/367142c5) oxidecomputer/console#1878 * [c588a63e](https://github.com/oxidecomputer/console/commit/c588a63e) oxidecomputer/console#1874 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index b12bcdbc9f..f9347b6dbf 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="bcc80258f7ddd99f6cc4c94f8cc62c012d08acad" -SHA2="5d219bd7b2e5bd6a23985988be4f557bc79880fb307b1a55c1eed4b2927a8fd5" +COMMIT="367142c5ed711e6dcfc59586277775020625bd6a" +SHA2="7e061165950fc064811cc5f26d7b7bd102c0df63797ef05cf73d737c2fdceb87" From 86c64a2fe8167de354a827425b2d48cfe0f33f33 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Jan 2024 13:24:55 -0800 Subject: [PATCH 19/24] Update Rust crate cargo_toml to 0.18 (#4801) --- Cargo.lock | 4 ++-- dev-tools/xtask/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d378531ba..ef7aefb9f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,9 +794,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ca592ad99e6a0fd4b95153406138b997cc26ccd3cd0aecdfd4fbdbf1519bd77" +checksum = "802b755090e39835a4b0440fb0bbee0df7495a8b337f63db21e616f7821c7e8c" dependencies = [ "serde", "toml 0.8.8", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index d054d85646..bccb69a1f7 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -7,6 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true camino.workspace = true -cargo_toml = "0.17" +cargo_toml = "0.18" cargo_metadata = "0.18" clap.workspace = true From 5f62bb8fb8dd29d25ee59118cf6980e3ff32d1e5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Jan 2024 12:39:41 -0800 Subject: [PATCH 20/24] Update Rust crate assert_cmd to 2.0.13 (#4807) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ef7aefb9f2..d4616fd801 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -236,9 +236,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.0.12" +version = "2.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88903cb14723e4d4003335bb7f8a14f27691649105346a0f0957466c096adfe6" +checksum = "00ad3f3a942eee60335ab4342358c161ee296829e0d16ff42fc1d6cb07815467" dependencies = [ "anstyle", "bstr 1.6.0", diff --git a/Cargo.toml b/Cargo.toml index 5cd13cc04b..ca22b9d938 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ anyhow = "1.0" api_identity = { path = "api_identity" } approx = "0.5.1" assert_matches = "1.5.0" -assert_cmd = "2.0.12" +assert_cmd = "2.0.13" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } async-trait = "0.1.77" atomicwrites = "0.4.3" From 2ebbb7a1a9b0804e22de1fc949cf37f8e73a8580 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 12 Jan 2024 13:37:10 -0800 Subject: [PATCH 21/24] [clickhouse] upload logs on test failure (#4796) Part of trying to figure out #4779 -- we now create ClickHouse logs in a well-named directory which gets not-cleaned up (and therefore uploaded) at the end. Tested this by introducing a failure in a test, and then seeing the following files in `/tmp`: ``` % find test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08 test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08 test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/log test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/clickhouse-server.log test_all-c4d6fc05c1fc48de-test_local_users.3329283.1-clickhouse-I4Af08/clickhouse-server.errlog ``` --- .../buildomat/jobs/build-and-test-helios.sh | 2 +- .../buildomat/jobs/build-and-test-linux.sh | 2 +- Cargo.lock | 110 ++++++++++------ dev-tools/omicron-dev/src/bin/omicron-dev.rs | 32 +++-- nexus/benches/setup_benchmark.rs | 6 +- nexus/test-utils/src/lib.rs | 10 +- oximeter/db/src/client.rs | 30 +++-- test-utils/Cargo.toml | 1 + test-utils/src/dev/clickhouse.rs | 119 +++++++++++++++--- workspace-hack/Cargo.toml | 4 +- 10 files changed, 232 insertions(+), 84 deletions(-) diff --git a/.github/buildomat/jobs/build-and-test-helios.sh b/.github/buildomat/jobs/build-and-test-helios.sh index f9722a2b92..2c7a1f884d 100755 --- a/.github/buildomat/jobs/build-and-test-helios.sh +++ b/.github/buildomat/jobs/build-and-test-helios.sh @@ -5,7 +5,7 @@ #: target = "helios-2.0" #: rust_toolchain = "1.72.1" #: output_rules = [ -#: "/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/.github/buildomat/jobs/build-and-test-linux.sh b/.github/buildomat/jobs/build-and-test-linux.sh index 715effd080..4f4ebc1d8a 100755 --- a/.github/buildomat/jobs/build-and-test-linux.sh +++ b/.github/buildomat/jobs/build-and-test-linux.sh @@ -5,7 +5,7 @@ #: target = "ubuntu-22.04" #: rust_toolchain = "1.72.1" #: output_rules = [ -#: "/var/tmp/omicron_tmp/*", +#: "%/var/tmp/omicron_tmp/*", #: "!/var/tmp/omicron_tmp/crdb-base*", #: "!/var/tmp/omicron_tmp/rustc*", #: ] diff --git a/Cargo.lock b/Cargo.lock index d4616fd801..63b36090e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1836,7 +1836,7 @@ dependencies = [ "dns-service-client", "dropshot", "expectorate", - "http", + "http 0.2.11", "omicron-test-utils", "omicron-workspace-hack", "openapi-lint", @@ -1868,7 +1868,7 @@ name = "dns-service-client" version = "0.1.0" dependencies = [ "chrono", - "http", + "http 0.2.11", "omicron-workspace-hack", "progenitor", "reqwest", @@ -1906,7 +1906,7 @@ dependencies = [ "anyhow", "chrono", "futures", - "http", + "http 0.2.11", "ipnetwork", "omicron-workspace-hack", "omicron-zone-package", @@ -1928,7 +1928,7 @@ dependencies = [ [[package]] name = "dropshot" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" dependencies = [ "async-stream", "async-trait", @@ -1941,7 +1941,7 @@ dependencies = [ "form_urlencoded", "futures", "hostname", - "http", + "http 0.2.11", "hyper", "indexmap 2.1.0", "multer", @@ -1950,7 +1950,7 @@ dependencies = [ "percent-encoding", "proc-macro2", "rustls", - "rustls-pemfile", + "rustls-pemfile 2.0.0", "schemars", "serde", "serde_json", @@ -1974,7 +1974,7 @@ dependencies = [ [[package]] name = "dropshot_endpoint" version = "0.9.1-dev" -source = "git+https://github.com/oxidecomputer/dropshot?branch=main#ff87a0175a6c8ce4462cfe7486edd7000f01be6e" +source = "git+https://github.com/oxidecomputer/dropshot?branch=main#b19a9a5d049f4433547f9f3b11d10a9483fc6acf" dependencies = [ "proc-macro2", "quote", @@ -2112,7 +2112,7 @@ dependencies = [ "async-trait", "base64", "chrono", - "http", + "http 0.2.11", "hyper", "omicron-sled-agent", "omicron-test-utils", @@ -2749,7 +2749,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.11", "indexmap 1.9.3", "slab", "tokio", @@ -2802,7 +2802,7 @@ dependencies = [ "base64", "bytes", "headers-core", - "http", + "http 0.2.11", "httpdate", "mime", "sha1", @@ -2814,7 +2814,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" dependencies = [ - "http", + "http 0.2.11", ] [[package]] @@ -2930,6 +2930,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b32afd38673a8016f7c9ae69e5af41a58f81b1d31689040f2f1959594ce194ea" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.5" @@ -2937,7 +2948,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.11", "pin-project-lite", ] @@ -2970,7 +2981,7 @@ dependencies = [ "crossbeam-channel", "form_urlencoded", "futures", - "http", + "http 0.2.11", "hyper", "log", "once_cell", @@ -3057,7 +3068,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "httparse", "httpdate", @@ -3077,7 +3088,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", - "http", + "http 0.2.11", "hyper", "log", "rustls", @@ -3093,7 +3104,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "318ca89e4827e7fe4ddd2824f52337239796ae8ecc761a663324407dc3d8d7e7" dependencies = [ "futures-util", - "http", + "http 0.2.11", "http-range", "httpdate", "hyper", @@ -3302,7 +3313,7 @@ dependencies = [ "futures", "hex", "hex-literal", - "http", + "http 0.2.11", "illumos-utils", "installinator-artifact-client", "installinator-common", @@ -4000,14 +4011,14 @@ dependencies = [ [[package]] name = "multer" -version = "2.1.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01acbdc23469fd8fe07ab135923371d5f5a422fbf9c522158677c8eb15bc51c2" +checksum = "a15d522be0a9c3e46fd2632e272d178f56387bdb5c9fbb3a36c649062e9b5219" dependencies = [ "bytes", "encoding_rs", "futures-util", - "http", + "http 1.0.0", "httparse", "log", "memchr", @@ -4151,7 +4162,7 @@ dependencies = [ "futures", "gateway-client", "headers", - "http", + "http 0.2.11", "hyper", "hyper-rustls", "internal-dns", @@ -4270,7 +4281,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "headers", - "http", + "http 0.2.11", "hyper", "internal-dns", "nexus-db-queries", @@ -4619,7 +4630,7 @@ dependencies = [ "expectorate", "futures", "hex", - "http", + "http 0.2.11", "ipnetwork", "libc", "macaddr", @@ -4710,7 +4721,7 @@ dependencies = [ "gateway-sp-comms", "gateway-test-utils", "hex", - "http", + "http 0.2.11", "hyper", "illumos-utils", "ipcc", @@ -4768,7 +4779,7 @@ dependencies = [ "gateway-test-utils", "headers", "hex", - "http", + "http 0.2.11", "httptest", "hubtools", "hyper", @@ -4980,7 +4991,7 @@ dependencies = [ "glob", "guppy", "hex", - "http", + "http 0.2.11", "hyper", "hyper-staticfile", "illumos-utils", @@ -5052,7 +5063,7 @@ dependencies = [ "filetime", "headers", "hex", - "http", + "http 0.2.11", "libc", "omicron-common", "omicron-workspace-hack", @@ -5070,6 +5081,7 @@ dependencies = [ "tokio", "tokio-postgres", "usdt", + "walkdir", ] [[package]] @@ -5385,7 +5397,7 @@ dependencies = [ "base64", "chrono", "futures", - "http", + "http 0.2.11", "hyper", "omicron-workspace-hack", "progenitor", @@ -5540,7 +5552,7 @@ dependencies = [ "chrono", "dropshot", "futures", - "http", + "http 0.2.11", "kstat-rs", "omicron-workspace-hack", "oximeter", @@ -6248,7 +6260,7 @@ source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628 dependencies = [ "getopts", "heck 0.4.1", - "http", + "http 0.2.11", "indexmap 2.1.0", "openapiv3", "proc-macro2", @@ -6746,7 +6758,7 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", + "http 0.2.11", "http-body", "hyper", "hyper-rustls", @@ -6760,7 +6772,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile", + "rustls-pemfile 1.0.3", "serde", "serde_json", "serde_urlencoded", @@ -7098,7 +7110,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" dependencies = [ "openssl-probe", - "rustls-pemfile", + "rustls-pemfile 1.0.3", "schannel", "security-framework", ] @@ -7112,6 +7124,22 @@ dependencies = [ "base64", ] +[[package]] +name = "rustls-pemfile" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +dependencies = [ + "base64", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e9d979b3ce68192e42760c7810125eb6cf2ea10efae545a156063e61f314e2a" + [[package]] name = "rustls-webpki" version = "0.101.7" @@ -7375,9 +7403,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +checksum = "63261df402c67811e9ac6def069e4786148c4563f4b50fd4bf30aa370d626b02" dependencies = [ "serde_derive", ] @@ -7413,9 +7441,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.194" +version = "1.0.195" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", @@ -7813,9 +7841,9 @@ dependencies = [ [[package]] name = "slog-bunyan" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "440fd32d0423c31e4f98d76c0b62ebdb847f905aa07357197e9b41ac620af97d" +checksum = "dcaaf6e68789d3f0411f1e72bc443214ef252a1038b6e344836e50442541f190" dependencies = [ "hostname", "slog", @@ -9185,7 +9213,7 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http", + "http 0.2.11", "httparse", "log", "rand 0.8.5", @@ -9840,7 +9868,7 @@ dependencies = [ "gateway-messages", "gateway-test-utils", "hex", - "http", + "http 0.2.11", "hubtools", "hyper", "illumos-utils", diff --git a/dev-tools/omicron-dev/src/bin/omicron-dev.rs b/dev-tools/omicron-dev/src/bin/omicron-dev.rs index bbff4f6fe5..0eb421478c 100644 --- a/dev-tools/omicron-dev/src/bin/omicron-dev.rs +++ b/dev-tools/omicron-dev/src/bin/omicron-dev.rs @@ -9,6 +9,7 @@ use camino::Utf8Path; use camino::Utf8PathBuf; use clap::Args; use clap::Parser; +use dropshot::test_util::LogContext; use futures::stream::StreamExt; use nexus_test_interface::NexusServer; use omicron_common::cmd::fatal; @@ -270,22 +271,32 @@ struct ChRunArgs { } async fn cmd_clickhouse_run(args: &ChRunArgs) -> Result<(), anyhow::Error> { + let logctx = LogContext::new( + "omicron-dev", + &dropshot::ConfigLogging::StderrTerminal { + level: dropshot::ConfigLoggingLevel::Info, + }, + ); if args.replicated { - start_replicated_cluster().await?; + start_replicated_cluster(&logctx).await?; } else { - start_single_node(args.port).await?; + start_single_node(&logctx, args.port).await?; } Ok(()) } -async fn start_single_node(port: u16) -> Result<(), anyhow::Error> { +async fn start_single_node( + logctx: &LogContext, + port: u16, +) -> Result<(), anyhow::Error> { // Start a stream listening for SIGINT let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); let mut signal_stream = signals.fuse(); // Start the database server process, possibly on a specific port let mut db_instance = - dev::clickhouse::ClickHouseInstance::new_single_node(port).await?; + dev::clickhouse::ClickHouseInstance::new_single_node(logctx, port) + .await?; println!( "omicron-dev: running ClickHouse with full command:\n\"clickhouse {}\"", db_instance.cmdline().join(" ") @@ -331,7 +342,9 @@ async fn start_single_node(port: u16) -> Result<(), anyhow::Error> { Ok(()) } -async fn start_replicated_cluster() -> Result<(), anyhow::Error> { +async fn start_replicated_cluster( + logctx: &LogContext, +) -> Result<(), anyhow::Error> { // Start a stream listening for SIGINT let signals = Signals::new(&[SIGINT]).expect("failed to wait for SIGINT"); let mut signal_stream = signals.fuse(); @@ -345,9 +358,12 @@ async fn start_replicated_cluster() -> Result<(), anyhow::Error> { .as_path() .join("../../oximeter/db/src/configs/keeper_config.xml"); - let mut cluster = - dev::clickhouse::ClickHouseCluster::new(replica_config, keeper_config) - .await?; + let mut cluster = dev::clickhouse::ClickHouseCluster::new( + logctx, + replica_config, + keeper_config, + ) + .await?; println!( "omicron-dev: running ClickHouse cluster with configuration files:\n \ replicas: {}\n keepers: {}", diff --git a/nexus/benches/setup_benchmark.rs b/nexus/benches/setup_benchmark.rs index 304ccc8325..5e87151512 100644 --- a/nexus/benches/setup_benchmark.rs +++ b/nexus/benches/setup_benchmark.rs @@ -28,8 +28,12 @@ async fn do_crdb_setup() { // Wraps exclusively the ClickhouseDB portion of setup/teardown. async fn do_clickhouse_setup() { + let cfg = nexus_test_utils::load_test_config(); + let logctx = LogContext::new("clickhouse_setup", &cfg.pkg.log); let mut clickhouse = - dev::clickhouse::ClickHouseInstance::new_single_node(0).await.unwrap(); + dev::clickhouse::ClickHouseInstance::new_single_node(&logctx, 0) + .await + .unwrap(); clickhouse.cleanup().await.unwrap(); } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index c6dc9fefe9..19d5f747d8 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -387,10 +387,12 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { pub async fn start_clickhouse(&mut self) { let log = &self.logctx.log; debug!(log, "Starting Clickhouse"); - let clickhouse = - dev::clickhouse::ClickHouseInstance::new_single_node(0) - .await - .unwrap(); + let clickhouse = dev::clickhouse::ClickHouseInstance::new_single_node( + &self.logctx, + 0, + ) + .await + .unwrap(); let port = clickhouse.port(); let zpool_id = Uuid::new_v4(); diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index d6ec01d9fc..fc46a2c498 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -1399,6 +1399,7 @@ mod tests { use crate::query::field_table_name; use bytes::Bytes; use chrono::Utc; + use dropshot::test_util::LogContext; use omicron_test_utils::dev::clickhouse::{ ClickHouseCluster, ClickHouseInstance, }; @@ -1463,8 +1464,9 @@ mod tests { #[tokio::test] async fn test_single_node() { + let logctx = test_setup_log("test_single_node"); // Let the OS assign a port and discover it after ClickHouse starts - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); @@ -1635,22 +1637,24 @@ mod tests { .unwrap(); db.cleanup().await.expect("Failed to cleanup ClickHouse server"); + logctx.cleanup_successful(); } - async fn create_cluster() -> ClickHouseCluster { + async fn create_cluster(logctx: &LogContext) -> ClickHouseCluster { let cur_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let replica_config = cur_dir.as_path().join("src/configs/replica_config.xml"); let keeper_config = cur_dir.as_path().join("src/configs/keeper_config.xml"); - ClickHouseCluster::new(replica_config, keeper_config) + ClickHouseCluster::new(logctx, replica_config, keeper_config) .await .expect("Failed to initialise ClickHouse Cluster") } #[tokio::test] async fn test_replicated() { - let mut cluster = create_cluster().await; + let logctx = test_setup_log("test_replicated"); + let mut cluster = create_cluster(&logctx).await; // Tests that the expected error is returned on a wrong address bad_db_connection_test().await.unwrap(); @@ -1884,6 +1888,8 @@ mod tests { .cleanup() .await .expect("Failed to cleanup ClickHouse server 2"); + + logctx.cleanup_successful(); } async fn bad_db_connection_test() -> Result<(), Error> { @@ -4099,7 +4105,7 @@ mod tests { const TEST_NAME: &str = "test_apply_one_schema_upgrade_replicated"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut cluster = create_cluster().await; + let mut cluster = create_cluster(&logctx).await; let address = cluster.replica_1.address; test_apply_one_schema_upgrade_impl(log, address, true).await; @@ -4138,7 +4144,7 @@ mod tests { const TEST_NAME: &str = "test_apply_one_schema_upgrade_single_node"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4152,7 +4158,7 @@ mod tests { let logctx = test_setup_log("test_ensure_schema_with_version_gaps_fails"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4195,7 +4201,7 @@ mod tests { "test_ensure_schema_with_missing_desired_schema_version_fails", ); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4327,7 +4333,7 @@ mod tests { "test_ensure_schema_walks_through_multiple_steps_single_node"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4345,7 +4351,7 @@ mod tests { "test_ensure_schema_walks_through_multiple_steps_replicated"; let logctx = test_setup_log(TEST_NAME); let log = &logctx.log; - let mut cluster = create_cluster().await; + let mut cluster = create_cluster(&logctx).await; let address = cluster.replica_1.address; test_ensure_schema_walks_through_multiple_steps_impl( log, address, true, @@ -4448,7 +4454,7 @@ mod tests { let logctx = test_setup_log("test_select_all_field_types"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); @@ -4479,7 +4485,7 @@ mod tests { async fn test_sql_query_output() { let logctx = test_setup_log("test_sql_query_output"); let log = &logctx.log; - let mut db = ClickHouseInstance::new_single_node(0) + let mut db = ClickHouseInstance::new_single_node(&logctx, 0) .await .expect("Failed to start ClickHouse"); let address = SocketAddr::new(Ipv6Addr::LOCALHOST.into(), db.port()); diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 7f210134a2..48223e0291 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -30,6 +30,7 @@ usdt.workspace = true rcgen.workspace = true regex.workspace = true reqwest.workspace = true +walkdir.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index c73161eec7..220662d9bb 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -4,13 +4,15 @@ //! Tools for managing ClickHouse during development +use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::process::Stdio; use std::time::Duration; use anyhow::{anyhow, Context}; use camino::{Utf8Path, Utf8PathBuf}; -use camino_tempfile::Utf8TempDir; +use camino_tempfile::{Builder, Utf8TempDir}; +use dropshot::test_util::{log_prefix_for_test, LogContext}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use thiserror::Error; use tokio::{ @@ -63,8 +65,11 @@ pub enum ClickHouseError { impl ClickHouseInstance { /// Start a new single node ClickHouse server on the given IPv6 port. - pub async fn new_single_node(port: u16) -> Result { - let data_dir = ClickHouseDataDir::new()?; + pub async fn new_single_node( + logctx: &LogContext, + port: u16, + ) -> Result { + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "server".to_string(), "--log-file".to_string(), @@ -115,6 +120,7 @@ impl ClickHouseInstance { /// Start a new replicated ClickHouse server on the given IPv6 port. pub async fn new_replicated( + logctx: &LogContext, port: u16, tcp_port: u16, interserver_port: u16, @@ -122,7 +128,7 @@ impl ClickHouseInstance { r_number: String, config_path: PathBuf, ) -> Result { - let data_dir = ClickHouseDataDir::new()?; + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "server".to_string(), "--config-file".to_string(), @@ -181,6 +187,7 @@ impl ClickHouseInstance { /// Start a new ClickHouse keeper on the given IPv6 port. pub async fn new_keeper( + logctx: &LogContext, port: u16, k_id: u16, config_path: PathBuf, @@ -191,7 +198,7 @@ impl ClickHouseInstance { if ![1, 2, 3].contains(&k_id) { return Err(ClickHouseError::InvalidKeeperId.into()); } - let data_dir = ClickHouseDataDir::new()?; + let data_dir = ClickHouseDataDir::new(logctx)?; let args = vec![ "keeper".to_string(), @@ -262,7 +269,7 @@ impl ClickHouseInstance { child.wait().await.context("waiting for child")?; } if let Some(dir) = self.data_dir.take() { - dir.close()?; + dir.close_clean()?; } Ok(()) } @@ -294,10 +301,12 @@ struct ClickHouseDataDir { } impl ClickHouseDataDir { - fn new() -> Result { - // Keepers do not allow a dot in the beginning of the directory, so we must - // use a prefix. - let dir = Utf8TempDir::with_prefix("clickhouse-") + fn new(logctx: &LogContext) -> Result { + let (parent_dir, prefix) = log_prefix_for_test(logctx.test_name()); + + let dir = Builder::new() + .prefix(&format!("{prefix}-clickhouse-")) + .tempdir_in(parent_dir) .context("failed to create tempdir for ClickHouse data")?; let ret = Self { dir }; @@ -375,9 +384,83 @@ impl ClickHouseDataDir { self.dir.path().join("snapshots/") } - fn close(self) -> Result<(), anyhow::Error> { + fn close_clean(self) -> Result<(), anyhow::Error> { self.dir.close().context("failed to delete ClickHouse data dir") } + + /// Closes this data directory during a test failure, or other unclean + /// shutdown. + /// + /// Removes all files except those in any of the log directories. + fn close_unclean(self) -> Result<(), anyhow::Error> { + let keep_prefixes = vec![ + self.log_path(), + self.err_log_path(), + self.keeper_log_path(), + self.keeper_err_log_path(), + self.keeper_log_storage_path(), + ]; + // Persist this temporary directory since we're going to be doing the + // cleanup ourselves. + let dir = self.dir.into_path(); + + let mut error_paths = BTreeMap::new(); + // contents_first = true ensures that we delete inner files before + // outer directories. + for entry in walkdir::WalkDir::new(&dir).contents_first(true) { + match entry { + Ok(entry) => { + // If it matches any of the prefixes, skip it. + if keep_prefixes + .iter() + .any(|prefix| entry.path().starts_with(prefix)) + { + continue; + } + if entry.file_type().is_dir() { + if let Err(error) = std::fs::remove_dir(entry.path()) { + // Ignore ENOTEMPTY errors because they're likely + // generated from parents of files we've kept, or + // were unable to delete for other reasons. + if error.raw_os_error() != Some(libc::ENOTEMPTY) { + error_paths.insert( + entry.path().to_owned(), + anyhow!(error), + ); + } + } + } else { + if let Err(error) = std::fs::remove_file(entry.path()) { + error_paths.insert( + entry.path().to_owned(), + anyhow!(error), + ); + } + } + } + Err(error) => { + if let Some(path) = error.path() { + error_paths.insert(path.to_owned(), anyhow!(error)); + } + } + } + } + + // Are there any error paths? + if !error_paths.is_empty() { + let error_paths = error_paths + .into_iter() + .map(|(path, error)| format!("- {}: {}", path.display(), error)) + .collect::>(); + let error_paths = error_paths.join("\n"); + return Err(anyhow!( + "failed to clean up ClickHouse data dir:\n{}", + error_paths + )); + } + + Ok(()) + } } impl Drop for ClickHouseInstance { @@ -392,7 +475,9 @@ impl Drop for ClickHouseInstance { let _ = child.start_kill(); } if let Some(dir) = self.data_dir.take() { - let _ = dir.close(); + if let Err(e) = dir.close_unclean() { + eprintln!("{}", e); + } } } } @@ -412,18 +497,20 @@ pub struct ClickHouseCluster { impl ClickHouseCluster { pub async fn new( + logctx: &LogContext, replica_config: PathBuf, keeper_config: PathBuf, ) -> Result { // Start all Keeper coordinator nodes let keeper_amount = 3; let mut keepers = - Self::new_keeper_set(keeper_amount, &keeper_config).await?; + Self::new_keeper_set(logctx, keeper_amount, &keeper_config).await?; // Start all replica nodes let replica_amount = 2; let mut replicas = - Self::new_replica_set(replica_amount, &replica_config).await?; + Self::new_replica_set(logctx, replica_amount, &replica_config) + .await?; let r1 = replicas.swap_remove(0); let r2 = replicas.swap_remove(0); @@ -443,6 +530,7 @@ impl ClickHouseCluster { } pub async fn new_keeper_set( + logctx: &LogContext, keeper_amount: u16, config_path: &PathBuf, ) -> Result, anyhow::Error> { @@ -453,6 +541,7 @@ impl ClickHouseCluster { let k_id = i; let k = ClickHouseInstance::new_keeper( + logctx, k_port, k_id, config_path.clone(), @@ -468,6 +557,7 @@ impl ClickHouseCluster { } pub async fn new_replica_set( + logctx: &LogContext, replica_amount: u16, config_path: &PathBuf, ) -> Result, anyhow::Error> { @@ -480,6 +570,7 @@ impl ClickHouseCluster { let r_name = format!("oximeter_cluster node {}", i); let r_number = format!("0{}", i); let r = ClickHouseInstance::new_replicated( + logctx, r_port, r_tcp_port, r_interserver_port, diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5688e133c0..8646e08c27 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -87,7 +87,7 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } -serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } @@ -190,7 +190,7 @@ reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", " ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } -serde = { version = "1.0.194", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } From 095dbbde156931822a1394fa8248d76825bffaf4 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:22:54 -0800 Subject: [PATCH 22/24] Update Rust crate base64 to 0.21.7 (#4812) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63b36090e0..75c77f869f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -405,9 +405,9 @@ checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" [[package]] name = "base64" -version = "0.21.6" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c79fed4cdb43e993fcdadc7e58a09fd0e3e649c4436fa11da71c9f1f3ee7feb9" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64ct" diff --git a/Cargo.toml b/Cargo.toml index ca22b9d938..238b9e36bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,7 +151,7 @@ async-trait = "0.1.77" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } -base64 = "0.21.6" +base64 = "0.21.7" bb8 = "0.8.1" bcs = "0.1.6" bincode = "1.3.3" From 35838d1df8b626135857815db621cb89acb22bd2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 00:28:51 -0800 Subject: [PATCH 23/24] Update Rust crate console to 0.15.8 (#4798) --- Cargo.lock | 72 ++------------------------------------- tufaceous/Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 +-- 3 files changed, 6 insertions(+), 72 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75c77f869f..4b7360ae8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1037,15 +1037,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.7" +version = "0.15.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" dependencies = [ "encode_unicode", "lazy_static", "libc", "unicode-width", - "windows-sys 0.45.0", + "windows-sys 0.52.0", ] [[package]] @@ -9982,15 +9982,6 @@ dependencies = [ "windows-targets 0.48.5", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -10009,21 +10000,6 @@ dependencies = [ "windows-targets 0.52.0", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.5" @@ -10054,12 +10030,6 @@ dependencies = [ "windows_x86_64_msvc 0.52.0", ] -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" - [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -10072,12 +10042,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" -[[package]] -name = "windows_aarch64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" - [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -10090,12 +10054,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" -[[package]] -name = "windows_i686_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" - [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -10108,12 +10066,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" -[[package]] -name = "windows_i686_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" - [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -10126,12 +10078,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" -[[package]] -name = "windows_x86_64_gnu" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" - [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -10144,12 +10090,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" - [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -10162,12 +10102,6 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" -[[package]] -name = "windows_x86_64_msvc" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" - [[package]] name = "windows_x86_64_msvc" version = "0.48.5" diff --git a/tufaceous/Cargo.toml b/tufaceous/Cargo.toml index 81248af57d..b911c85b81 100644 --- a/tufaceous/Cargo.toml +++ b/tufaceous/Cargo.toml @@ -10,7 +10,7 @@ anyhow = { workspace = true, features = ["backtrace"] } camino.workspace = true clap = { workspace = true, features = ["derive", "env"] } chrono.workspace = true -console = { version = "0.15.7", default-features = false } +console = { version = "0.15.8", default-features = false } humantime.workspace = true omicron-common.workspace = true slog.workspace = true diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8646e08c27..4eda5c1af4 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -29,7 +29,7 @@ chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } -console = { version = "0.15.7" } +console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } @@ -132,7 +132,7 @@ chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["derive", "env", "wrap_help"] } clap_builder = { version = "4.4.2", default-features = false, features = ["color", "env", "std", "suggestions", "usage", "wrap_help"] } -console = { version = "0.15.7" } +console = { version = "0.15.8" } const-oid = { version = "0.9.5", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.15" } crossbeam-utils = { version = "0.8.16" } From a00fe948a75395eaf3cc40ed8d8aed713bbd5a0a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Jan 2024 09:34:41 +0000 Subject: [PATCH 24/24] Update taiki-e/install-action digest to 681c09d (#4813) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`a6173a9` -> `681c09d`](https://togithub.com/taiki-e/install-action/compare/a6173a9...681c09d) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index c940f21fb2..67fefd1430 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@a6173a9cbc8927eb1def26c72d123d297efb1b10 # v2 + uses: taiki-e/install-action@681c09da0e1063a389bc0f4cfa913bfdfdaf0a4d # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date