From 8af3d844deb340bd0018ad6df60210ea957113f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:44:30 -0800 Subject: [PATCH 01/15] Bump shlex from 1.1.0 to 1.3.0 (#4865) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2d9601a38..1b04752e34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7691,9 +7691,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shlex" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" From beb1d1149a0876a786e94bf24a60568a29911056 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:46:07 -0800 Subject: [PATCH 02/15] Update Rust crate hyper-rustls to 0.26.0 (#4814) --- Cargo.lock | 127 ++++++++++++++++++++++++++++++-------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 2 + 3 files changed, 103 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b04752e34..d66fc3dadc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1920,7 +1920,7 @@ dependencies = [ "futures", "hostname", "http 0.2.11", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "multer", "openapiv3", @@ -2091,7 +2091,7 @@ dependencies = [ "base64", "chrono", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", @@ -2938,6 +2938,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.0.0", +] + [[package]] name = "http-range" version = "0.1.5" @@ -2968,7 +2978,7 @@ dependencies = [ "form_urlencoded", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "log", "once_cell", "regex", @@ -3055,7 +3065,7 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", + "http-body 0.4.5", "httparse", "httpdate", "itoa", @@ -3067,6 +3077,24 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5aa53871fc917b1a9ed87b683a5d86db645e23acb32c2e0785a353e522fb75" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -3075,7 +3103,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.11", - "hyper", + "hyper 0.14.27", "rustls 0.21.9", "tokio", "tokio-rustls 0.24.1", @@ -3083,19 +3111,21 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "399c78f9338483cb7e630c8474b07268983c6bd5acee012e4211f9f7bb21b070" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", - "http 0.2.11", - "hyper", + "http 1.0.0", + "hyper 1.1.0", + "hyper-util", "log", "rustls 0.22.2", "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.25.0", + "tower-service", ] [[package]] @@ -3108,7 +3138,7 @@ dependencies = [ "http 0.2.11", "http-range", "httpdate", - "hyper", + "hyper 0.14.27", "mime_guess", "percent-encoding", "rand 0.8.5", @@ -3124,12 +3154,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.27", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdea9aac0dbe5a9240d68cfd9501e2db94222c6dc06843e06640b9e07f0fdc67" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "hyper 1.1.0", + "pin-project-lite", + "socket2 0.5.5", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.57" @@ -3371,7 +3421,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "expectorate", - "hyper", + "hyper 0.14.27", "installinator-common", "omicron-common", "omicron-test-utils", @@ -3427,7 +3477,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", @@ -4165,8 +4215,8 @@ dependencies = [ "gateway-client", "headers", "http 0.2.11", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4304,7 +4354,7 @@ dependencies = [ "gateway-test-utils", "headers", "http 0.2.11", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-db-queries", "nexus-test-interface", @@ -4745,7 +4795,7 @@ dependencies = [ "gateway-test-utils", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "illumos-utils", "ipcc", "omicron-common", @@ -4806,8 +4856,8 @@ dependencies = [ "http 0.2.11", "httptest", "hubtools", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "illumos-utils", "internal-dns", "ipnetwork", @@ -5020,7 +5070,7 @@ dependencies = [ "guppy", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "hyper-staticfile", "illumos-utils", "installinator-common", @@ -5159,7 +5209,7 @@ dependencies = [ "hashbrown 0.13.2", "hex", "hmac", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "inout", "ipnetwork", @@ -5200,6 +5250,7 @@ dependencies = [ "similar", "slog", "snafu", + "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", @@ -5427,7 +5478,7 @@ dependencies = [ "chrono", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-workspace-hack", "progenitor", "rand 0.8.5", @@ -5501,7 +5552,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-client", "nexus-types", @@ -6352,7 +6403,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "futures", - "hyper", + "hyper 0.14.27", "progenitor", "propolis_types", "rand 0.8.5", @@ -6787,8 +6838,8 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", - "hyper", + "http-body 0.4.5", + "hyper 0.14.27", "hyper-rustls 0.24.2", "hyper-tls", "ipnet", @@ -9048,6 +9099,28 @@ dependencies = [ "walkdir", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -9933,7 +10006,7 @@ dependencies = [ "hex", "http 0.2.11", "hubtools", - "hyper", + "hyper 0.14.27", "illumos-utils", "installinator", "installinator-artifact-client", diff --git a/Cargo.toml b/Cargo.toml index d97236b632..43faed9360 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,7 +219,7 @@ httptest = "0.15.5" hubtools = { git = "https://github.com/oxidecomputer/hubtools.git", branch = "main" } humantime = "2.1.0" hyper = "0.14" -hyper-rustls = "0.25.0" +hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indexmap = "2.1.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b574a292d1..e4733992bc 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -94,6 +94,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -198,6 +199,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From 66afddbd174c02e3da3f47e2519fb82558fec01c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 23 Jan 2024 17:10:55 -0500 Subject: [PATCH 03/15] Add oxlog tool and library (#4810) --- .github/buildomat/jobs/package.sh | 2 +- Cargo.lock | 14 + Cargo.toml | 3 + dev-tools/oxlog/Cargo.toml | 16 + dev-tools/oxlog/src/bin/oxlog.rs | 121 ++++++++ dev-tools/oxlog/src/lib.rs | 457 ++++++++++++++++++++++++++++ illumos-utils/Cargo.toml | 1 + illumos-utils/src/running_zone.rs | 28 +- package-manifest.toml | 8 + tools/build-global-zone-packages.sh | 7 + workspace-hack/Cargo.toml | 2 + 11 files changed, 631 insertions(+), 28 deletions(-) create mode 100644 dev-tools/oxlog/Cargo.toml create mode 100644 dev-tools/oxlog/src/bin/oxlog.rs create mode 100644 dev-tools/oxlog/src/lib.rs diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index f0bd764feb..b4d10891b9 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -91,7 +91,7 @@ ptime -m cargo run --locked --release --bin omicron-package -- \ -t host target create -i standard -m gimlet -s asic -r multi-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t host package -stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay +stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay oxlog # Create global zone package @ /work/global-zone-packages.tar.gz ptime -m ./tools/build-global-zone-packages.sh "$tarball_src_dir" /work diff --git a/Cargo.lock b/Cargo.lock index d66fc3dadc..ec594abb8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3266,6 +3266,7 @@ dependencies = [ "omicron-workspace-hack", "opte-ioctl", "oxide-vpc", + "oxlog", "regress", "schemars", "serde", @@ -5177,6 +5178,7 @@ dependencies = [ "bstr 1.6.0", "byteorder", "bytes", + "camino", "chrono", "cipher", "clap 4.4.3", @@ -5676,6 +5678,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "oxlog" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap 4.4.3", + "omicron-workspace-hack", + "uuid", +] + [[package]] name = "p256" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 43faed9360..3d0be64380 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", @@ -93,6 +94,7 @@ default-members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", # Do not include xtask in the list of default members, because this causes # hakari to not work as well and build times to be longer. @@ -252,6 +254,7 @@ nexus-inventory = { path = "nexus/inventory" } omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } omicron-workspace-hack = "0.1.0" +oxlog = { path = "dev-tools/oxlog" } nexus-test-interface = { path = "nexus/test-interface" } nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } diff --git a/dev-tools/oxlog/Cargo.toml b/dev-tools/oxlog/Cargo.toml new file mode 100644 index 0000000000..5d7cfaf5c1 --- /dev/null +++ b/dev-tools/oxlog/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "oxlog" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +uuid.workspace = true +omicron-workspace-hack.workspace = true + +[[bin]] +name = "oxlog" diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs new file mode 100644 index 0000000000..ef79605dda --- /dev/null +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tool for discovering oxide related logfiles on sleds + +use clap::{Args, Parser, Subcommand}; +use oxlog::{Filter, LogFile, Zones}; + +#[derive(Debug, Parser)] +#[command(version)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + /// List all zones found on the filesystem + Zones, + + /// List logs for a given service + Logs { + // The name of the zone + zone: String, + + /// The name of the service to list logs for + service: Option, + + /// Print available metadata + #[arg(short, long)] + metadata: bool, + + #[command(flatten)] + filter: FilterArgs, + }, +} + +#[derive(Args, Debug)] +#[group(required = true, multiple = true)] +struct FilterArgs { + /// Print only the current log file + #[arg(short, long)] + current: bool, + + /// Print only the archived log files + #[arg(short, long)] + archived: bool, + + // Print only the extra log files + #[arg(short, long)] + extra: bool, +} + +fn main() -> Result<(), anyhow::Error> { + let cli = Cli::parse(); + + match cli.command { + Commands::Zones => { + for zone in Zones::load()?.zones.keys() { + println!("{zone}"); + } + Ok(()) + } + Commands::Logs { zone, service, metadata, filter } => { + let zones = Zones::load()?; + let filter = Filter { + current: filter.current, + archived: filter.archived, + extra: filter.extra, + }; + let print_metadata = |f: &LogFile| { + println!( + "{}\t{}\t{}", + f.path, + f.size.map_or_else(|| "-".to_string(), |s| s.to_string()), + f.modified + .map_or_else(|| "-".to_string(), |s| s.to_rfc3339()) + ); + }; + + let logs = zones.zone_logs(&zone, filter); + for (svc_name, mut svc_logs) in logs { + if let Some(service) = &service { + if svc_name != service.as_str() { + continue; + } + } + svc_logs.archived.sort(); + if filter.current { + if let Some(current) = &svc_logs.current { + if metadata { + print_metadata(current); + } else { + println!("{}", current.path); + } + } + } + if filter.archived { + for f in &svc_logs.archived { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + if filter.extra { + for f in &svc_logs.extra { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + } + Ok(()) + } + } +} diff --git a/dev-tools/oxlog/src/lib.rs b/dev-tools/oxlog/src/lib.rs new file mode 100644 index 0000000000..589b113928 --- /dev/null +++ b/dev-tools/oxlog/src/lib.rs @@ -0,0 +1,457 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A tool to show oxide related log file paths +//! +//! All data is based off of reading the filesystem + +use anyhow::Context; +use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf}; +use chrono::{DateTime, Utc}; +use std::collections::BTreeMap; +use std::io; +use uuid::Uuid; + +/// Return a UUID if the `DirEntry` contains a directory that parses into a UUID. +fn get_uuid_dir(result: io::Result) -> Option { + let Ok(entry) = result else { + return None; + }; + let Ok(file_type) = entry.file_type() else { + return None; + }; + if !file_type.is_dir() { + return None; + } + let file_name = entry.file_name(); + if let Ok(uuid) = file_name.parse() { + Some(uuid) + } else { + None + } +} + +#[derive(Debug)] +pub struct Pools { + pub internal: Vec, + pub external: Vec, +} + +impl Pools { + pub fn read() -> anyhow::Result { + let internal = Utf8Path::new("/pool/int/") + .read_dir_utf8() + .context("Failed to read /pool/int")? + .filter_map(get_uuid_dir) + .collect(); + let external = Utf8Path::new("/pool/ext/") + .read_dir_utf8() + .context("Failed to read /pool/ext")? + .filter_map(get_uuid_dir) + .collect(); + Ok(Pools { internal, external }) + } +} + +/// Filter which logs to search for in a given zone +/// +/// Each field in the filter is additive. +/// +/// The filter was added to the library and not just the CLI because in some +/// cases searching for archived logs is pretty expensive. +#[derive(Clone, Copy, Debug)] +pub struct Filter { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: bool, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: bool, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: bool, +} + +/// Path and metadata about a logfile +/// We use options for metadata as retrieval is fallible +#[derive(Debug, Clone, Eq)] +pub struct LogFile { + pub path: Utf8PathBuf, + pub size: Option, + pub modified: Option>, +} + +impl LogFile { + pub fn read_metadata(&mut self, entry: &Utf8DirEntry) { + if let Ok(metadata) = entry.metadata() { + self.size = Some(metadata.len()); + if let Ok(modified) = metadata.modified() { + self.modified = Some(modified.into()); + } + } + } +} + +impl PartialEq for LogFile { + fn eq(&self, other: &Self) -> bool { + self.path == other.path + } +} + +impl PartialOrd for LogFile { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for LogFile { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl LogFile { + fn new(path: Utf8PathBuf) -> LogFile { + LogFile { path, size: None, modified: None } + } +} + +/// All oxide logs for a given service in a given zone +#[derive(Debug, Clone, Default)] +pub struct SvcLogs { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: Option, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: Vec, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: Vec, +} + +// These probably don't warrant newtypes. They are just to make the +// keys in maps a bit easier to read. +type ZoneName = String; +type ServiceName = String; + +pub struct Paths { + /// Links to the location of current and rotated log files for a given service + pub primary: Utf8PathBuf, + + /// Links to debug directories containing archived log files + pub debug: Vec, + + /// Links to directories containing extra files such as cockroachdb logs + /// that reside outside our SMF log and debug service log paths. + pub extra: Vec<(&'static str, Utf8PathBuf)>, +} + +pub struct Zones { + pub zones: BTreeMap, +} + +impl Zones { + pub fn load() -> Result { + let mut zones = BTreeMap::new(); + + // Describe where to find logs for the global zone + zones.insert( + "global".to_string(), + Paths { + primary: Utf8PathBuf::from("/var/svc/log"), + debug: vec![], + extra: vec![], + }, + ); + + // Describe where to find logs for the switch zone + zones.insert( + "oxz_switch".to_string(), + Paths { + primary: Utf8PathBuf::from("/zone/oxz_switch/root/var/svc/log"), + debug: vec![], + extra: vec![( + "dendrite", + "/zone/oxz_switch/root/var/dendrite".into(), + )], + }, + ); + + // Find the directories containing the primary and extra log files + // for all zones on external storage pools. + let pools = Pools::read()?; + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/zone"].iter().collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + + // Add the path to the current logs for the zone + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/var/svc/log"); + let mut paths = + Paths { primary: dir, debug: vec![], extra: vec![] }; + + // Add the path to the extra logs for the zone + if zone.starts_with("oxz_cockroachdb") { + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/data/logs"); + paths.extra.push(("cockroachdb", dir)); + } + + zones.insert(zone.to_string(), paths); + } + } + + // Find the directories containing the debug log files + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/debug"] + .iter() + .collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + let mut dir = zones_path.clone(); + dir.push(zone); + + // We only add debug paths if the zones have primary paths + if let Some(paths) = zones.get_mut(zone) { + paths.debug.push(dir); + } + } + } + + Ok(Zones { zones }) + } + + /// Return log files organized by service name + pub fn zone_logs( + &self, + zone: &str, + filter: Filter, + ) -> BTreeMap { + let mut output = BTreeMap::new(); + let Some(paths) = self.zones.get(zone) else { + return BTreeMap::new(); + }; + // Some rotated files exist in `paths.primary` that we track as + // 'archived'. These files have not yet been migrated into the debug + // directory. + if filter.current || filter.archived { + load_svc_logs(paths.primary.clone(), &mut output); + } + + if filter.archived { + for dir in paths.debug.clone() { + load_svc_logs(dir, &mut output); + } + } + if filter.extra { + for (svc_name, dir) in paths.extra.clone() { + load_extra_logs(dir, svc_name, &mut output); + } + } + output + } +} + +const OX_SMF_PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; + +/// Return true if the provided file name appears to be a valid log file for an +/// Oxide-managed SMF service. +/// +/// Note that this operates on the _file name_. Any leading path components will +/// cause this check to return `false`. +pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { + // Log files are named by the SMF services, with the `/` in the FMRI + // translated to a `-`. + let filename = filename.as_ref(); + OX_SMF_PREFIXES + .iter() + .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) +} + +// Parse an oxide smf log file name and return the name of the underlying +// service. +// +// If parsing fails for some reason, return `None`. +pub fn oxide_smf_service_name_from_log_file_name( + filename: &str, +) -> Option<&str> { + let Some((prefix, _suffix)) = filename.split_once(':') else { + // No ':' found + return None; + }; + + for ox_prefix in OX_SMF_PREFIXES { + if let Some(svc_name) = prefix.strip_prefix(ox_prefix) { + return Some(svc_name); + } + } + + None +} + +// Given a directory, find all oxide specific SMF service logs and return them +// mapped to their inferred service name. +fn load_svc_logs(dir: Utf8PathBuf, logs: &mut BTreeMap) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + + // Is this a log file we care about? + if is_oxide_smf_log_file(filename) { + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + + let Some(svc_name) = + oxide_smf_service_name_from_log_file_name(filename) + else { + // parsing failed + continue; + }; + + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + + let is_current = filename.ends_with(".log"); + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + if is_current { + svc_logs.current = Some(logfile.clone()); + } else { + svc_logs.archived.push(logfile.clone()); + } + } + } +} + +// Load any logs in non-standard paths. We grab all logs in `dir` and +// don't filter based on filename prefix as in `load_svc_logs`. +fn load_extra_logs( + dir: Utf8PathBuf, + svc_name: &str, + logs: &mut BTreeMap, +) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + svc_logs.extra.push(logfile); + } +} + +#[cfg(test)] +mod tests { + pub use super::is_oxide_smf_log_file; + pub use super::oxide_smf_service_name_from_log_file_name; + + #[test] + fn test_is_oxide_smf_log_file() { + assert!(is_oxide_smf_log_file("oxide-blah:default.log")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); + assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); + assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); + assert!(!is_oxide_smf_log_file("system-blah:default.log")); + } + + #[test] + fn test_oxide_smf_service_name_from_log_file_name() { + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name("oxide-blah:default.log") + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.0" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.1111" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log.0" + ) + ); + assert!(oxide_smf_service_name_from_log_file_name( + "not-oxide-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "not-system-illumos-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "system-blah:default.log" + ) + .is_none()); + } +} diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 8296eace5c..e4a99095fd 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -20,6 +20,7 @@ libc.workspace = true macaddr.workspace = true omicron-common.workspace = true oxide-vpc.workspace = true +oxlog.workspace = true schemars.workspace = true serde.workspace = true slog.workspace = true diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ea80a6d34b..4b4107f529 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -14,6 +14,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::Utf8TempDir; use ipnetwork::IpNetwork; use omicron_common::backoff; +pub use oxlog::is_oxide_smf_log_file; use slog::{error, info, o, warn, Logger}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::sync::Arc; @@ -1411,24 +1412,8 @@ pub fn is_oxide_smf_service(fmri: impl AsRef) -> bool { SMF_SERVICE_PREFIXES.iter().any(|prefix| fmri.starts_with(prefix)) } -/// Return true if the provided file name appears to be a valid log file for an -/// Oxide-managed SMF service. -/// -/// Note that this operates on the _file name_. Any leading path components will -/// cause this check to return `false`. -pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { - // Log files are named by the SMF services, with the `/` in the FMRI - // translated to a `-`. - const PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; - let filename = filename.as_ref(); - PREFIXES - .iter() - .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) -} - #[cfg(test)] mod tests { - use super::is_oxide_smf_log_file; use super::is_oxide_smf_service; #[test] @@ -1438,15 +1423,4 @@ mod tests { assert!(!is_oxide_smf_service("svc:/system/blah:default")); assert!(!is_oxide_smf_service("svc:/not/oxide/blah:default")); } - - #[test] - fn test_is_oxide_smf_log_file() { - assert!(is_oxide_smf_log_file("oxide-blah:default.log")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); - assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); - assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); - } } diff --git a/package-manifest.toml b/package-manifest.toml index 7b12583437..fa6bba7a96 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -638,3 +638,11 @@ source.rust.binary_names = ["omdb"] source.rust.release = true output.type = "zone" output.intermediate_only = true + +[package.oxlog] +service_name = "oxlog" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["oxlog"] +source.rust.release = true +output.type = "tarball" diff --git a/tools/build-global-zone-packages.sh b/tools/build-global-zone-packages.sh index fc1ab42ade..b989e6a543 100755 --- a/tools/build-global-zone-packages.sh +++ b/tools/build-global-zone-packages.sh @@ -15,6 +15,7 @@ deps=( "$tarball_src_dir/mg-ddm-gz.tar" "$tarball_src_dir/propolis-server.tar.gz" "$tarball_src_dir/overlay.tar.gz" + "$tarball_src_dir/oxlog.tar" ) for dep in "${deps[@]}"; do if [[ ! -e $dep ]]; then @@ -48,6 +49,12 @@ mkdir -p "$pkg_dir" cd "$pkg_dir" tar -xvfz "$tarball_src_dir/mg-ddm-gz.tar" cd - +# Extract the oxlog tarball for re-packaging into the layered GZ archive. +pkg_dir="$tmp_gz/root/opt/oxide/oxlog" +mkdir -p "$pkg_dir" +cd "$pkg_dir" +tar -xvfz "$tarball_src_dir/oxlog.tar" +cd - # propolis should be bundled with this OS: Put the propolis-server zone image # under /opt/oxide in the gz. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e4733992bc..b6d61d9ea5 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -25,6 +25,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } @@ -130,6 +131,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } From 7bb6cd3d9b5b597424e3929f5f5dd8485fe8d454 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 23 Jan 2024 16:15:19 -0600 Subject: [PATCH 04/15] Bump web console (#4873) https://github.com/oxidecomputer/console/compare/644a45b8...b9013a33 * [b9013a33](https://github.com/oxidecomputer/console/commit/b9013a33) oxidecomputer/console#1898 * [fb9e9ca5](https://github.com/oxidecomputer/console/commit/fb9e9ca5) oxidecomputer/console#1904 * [9ae29498](https://github.com/oxidecomputer/console/commit/9ae29498) oxidecomputer/console#1897 * [6b894ceb](https://github.com/oxidecomputer/console/commit/6b894ceb) oxidecomputer/console#1901 * [d80d2e7c](https://github.com/oxidecomputer/console/commit/d80d2e7c) oxidecomputer/console#1886 * [2a7da0fa](https://github.com/oxidecomputer/console/commit/2a7da0fa) bump vite for security fix * [700e2700](https://github.com/oxidecomputer/console/commit/700e2700) oxidecomputer/console#1893 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index e76c29c9a0..0dc0024f2c 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="644a45b8e4ab673ad51754e372277abc3ddfd036" -SHA2="a059917d826081df04efd44186f6dfeef0099fc53f1e8618796ea990a510f4b0" +COMMIT="b9013a33eaa3f5efdcd5c7d244e36a54e7222295" +SHA2="bebb9800ff94c42897d54faac8c2a3f89b2b0e927ebf75ec74223b6163e4209d" From 0de612278713efe3e48c990dd516125402eedb31 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 05:59:51 +0000 Subject: [PATCH 05/15] Update taiki-e/install-action digest to cf2d7f1 (#4879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`bd4f144` -> `cf2d7f1`](https://togithub.com/taiki-e/install-action/compare/bd4f144...cf2d7f1) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index a4ebc74843..06da0395a1 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@bd4f14420660e33ca2929e5c0306a8367173c1ee # v2 + uses: taiki-e/install-action@cf2d7f1118304815479579570ad3ec572fe94523 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From a6a74d3efcbb9db3b00d2f1ae03dd1b8f86cbfba Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 00:42:54 -0800 Subject: [PATCH 06/15] [nexus] simplify BackgroundTask trait (#4877) Just using `'a` everywhere is semantically identical to the current pattern because all the lifetime parameters are covariant. --- nexus/src/app/background/common.rs | 29 ++++++------------- nexus/src/app/background/dns_config.rs | 10 ++----- nexus/src/app/background/dns_propagation.rs | 10 ++----- nexus/src/app/background/dns_servers.rs | 10 ++----- .../src/app/background/external_endpoints.rs | 10 ++----- .../app/background/inventory_collection.rs | 10 ++----- nexus/src/app/background/nat_cleanup.rs | 10 ++----- nexus/src/app/background/phantom_disks.rs | 10 ++----- 8 files changed, 30 insertions(+), 69 deletions(-) diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/common.rs index 7b05eab61b..4fcce74714 100644 --- a/nexus/src/app/background/common.rs +++ b/nexus/src/app/background/common.rs @@ -153,13 +153,10 @@ use tokio::time::MissedTickBehavior; /// /// See module-level documentation for details. pub trait BackgroundTask: Send + Sync { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c; + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value>; } /// Drives the execution of background tasks @@ -499,14 +496,10 @@ mod test { } impl BackgroundTask for ReportingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; @@ -684,14 +677,10 @@ mod test { } impl BackgroundTask for PausingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; diff --git a/nexus/src/app/background/dns_config.rs b/nexus/src/app/background/dns_config.rs index 805ae813fe..959cf1843e 100644 --- a/nexus/src/app/background/dns_config.rs +++ b/nexus/src/app/background/dns_config.rs @@ -43,14 +43,10 @@ impl DnsConfigWatcher { } impl BackgroundTask for DnsConfigWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/dns_propagation.rs b/nexus/src/app/background/dns_propagation.rs index 45776df21b..cf7a399999 100644 --- a/nexus/src/app/background/dns_propagation.rs +++ b/nexus/src/app/background/dns_propagation.rs @@ -36,14 +36,10 @@ impl DnsPropagator { } impl BackgroundTask for DnsPropagator { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Read the DNS configuration and server list from the other // background tasks that assemble these. Clone them because diff --git a/nexus/src/app/background/dns_servers.rs b/nexus/src/app/background/dns_servers.rs index 3a75c09302..97fb3510b7 100644 --- a/nexus/src/app/background/dns_servers.rs +++ b/nexus/src/app/background/dns_servers.rs @@ -57,14 +57,10 @@ impl DnsServersWatcher { } impl BackgroundTask for DnsServersWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/external_endpoints.rs b/nexus/src/app/background/external_endpoints.rs index 53401c16de..ed530e0775 100644 --- a/nexus/src/app/background/external_endpoints.rs +++ b/nexus/src/app/background/external_endpoints.rs @@ -42,14 +42,10 @@ impl ExternalEndpointsWatcher { } impl BackgroundTask for ExternalEndpointsWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 5c52fa519b..044e5a2234 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -51,14 +51,10 @@ impl InventoryCollector { } impl BackgroundTask for InventoryCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { match inventory_activate( opctx, diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs index 1691d96a4b..5014dc0553 100644 --- a/nexus/src/app/background/nat_cleanup.rs +++ b/nexus/src/app/background/nat_cleanup.rs @@ -32,14 +32,10 @@ impl Ipv4NatGarbageCollector { } impl BackgroundTask for Ipv4NatGarbageCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/phantom_disks.rs b/nexus/src/app/background/phantom_disks.rs index b038d70ac6..48688838e5 100644 --- a/nexus/src/app/background/phantom_disks.rs +++ b/nexus/src/app/background/phantom_disks.rs @@ -37,14 +37,10 @@ impl PhantomDiskDetector { } impl BackgroundTask for PhantomDiskDetector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; warn!(&log, "phantom disk task started"); From 95778ebb270929469e208ad4d7052101e372aad2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:50:52 +0000 Subject: [PATCH 07/15] Update Rust crate argon2 to 0.5.3 (#4881) --- Cargo.lock | 8 ++++---- passwords/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec594abb8f..d58b5f6cd9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,9 +188,9 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "argon2" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ba4cac0a46bc1d2912652a751c47f2a9f3a7fe89bcae2275d418f5270402f9" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" dependencies = [ "base64ct", "blake2", @@ -1100,9 +1100,9 @@ checksum = "f9236877021b66ad90f833d8a73a7acb702b985b64c5986682d9f1f1a184f0fb" [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] diff --git a/passwords/Cargo.toml b/passwords/Cargo.toml index 8adcf75a2e..4f3922a7a5 100644 --- a/passwords/Cargo.toml +++ b/passwords/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] -argon2 = { version = "0.5.2", features = ["alloc", "password-hash", "rand", "std"] } +argon2 = { version = "0.5.3", features = ["alloc", "password-hash", "rand", "std"] } rand.workspace = true thiserror.workspace = true schemars.workspace = true From d978cd6cedc1653643c7ab2ab529634a10b46c0d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:39:53 +0000 Subject: [PATCH 08/15] Update Rust crate tabled to 0.15 (#4847) --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- oximeter/db/src/bin/oxdb.rs | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d58b5f6cd9..d179ad4b0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5726,9 +5726,9 @@ dependencies = [ [[package]] name = "papergrid" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ccbe15f2b6db62f9a9871642746427e297b0ceb85f9a7f1ee5ff47d184d0c8" +checksum = "9ad43c07024ef767f9160710b3a6773976194758c7919b17e63b863db0bdf7fb" dependencies = [ "bytecount", "fnv", @@ -8528,9 +8528,9 @@ dependencies = [ [[package]] name = "tabled" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe9c3632da101aba5131ed63f9eed38665f8b3c68703a6bb18124835c1a5d22" +checksum = "4c998b0c8b921495196a48aabaf1901ff28be0760136e31604f7967b0792050e" dependencies = [ "papergrid", "tabled_derive", @@ -8539,9 +8539,9 @@ dependencies = [ [[package]] name = "tabled_derive" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f688a08b54f4f02f0a3c382aefdb7884d3d69609f785bd253dc033243e3fe4" +checksum = "4c138f99377e5d653a371cdad263615634cfc8467685dfe8e73e2b8e98f44b17" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index 3d0be64380..2e538656ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -371,7 +371,7 @@ supports-color = "2.1.0" swrite = "0.1.0" libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } -tabled = "0.14" +tabled = "0.15.0" tar = "0.4" tempdir = "0.3" tempfile = "3.9" diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb.rs index 17f05c24e2..02a8054da0 100644 --- a/oximeter/db/src/bin/oxdb.rs +++ b/oximeter/db/src/bin/oxdb.rs @@ -391,7 +391,7 @@ async fn describe_virtual_table( } let mut builder = tabled::builder::Builder::default(); - builder.set_header(cols); + builder.push_record(cols); // first record is the header builder.push_record(types); println!( "{}", @@ -553,7 +553,7 @@ async fn sql_shell( println!(); let mut builder = tabled::builder::Builder::default(); - builder.set_header(&table.column_names); + builder.push_record(&table.column_names); // first record is the header for row in table.rows.iter() { builder.push_record( row.iter().map(ToString::to_string), From 255cb83822098f80c2f60811152922259ca8e82c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:57:33 +0000 Subject: [PATCH 09/15] Update Rust crate ciborium to 0.2.2 (#4882) --- Cargo.lock | 20 ++++++++++++-------- Cargo.toml | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d179ad4b0f..4d531366f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -892,9 +892,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -903,15 +903,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -2726,9 +2726,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "hash32" diff --git a/Cargo.toml b/Cargo.toml index 2e538656ac..5364f4b4e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ camino = "1.1" camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" -ciborium = "0.2.1" +ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.4", features = ["cargo", "derive", "env", "wrap_help"] } From 8f1134a911532792cac29fdea7ad42b9403ac233 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 08:36:41 -0800 Subject: [PATCH 10/15] [wicket] update to ratatui 0.25.0 (#4876) This was mostly a smooth upgrade, with just a small change required to our use of tui-tree-widget. Tested the TUI locally -- works great. --- Cargo.lock | 51 +++++++++++++++------ Cargo.toml | 3 +- wicket/Cargo.toml | 2 +- wicket/src/runner.rs | 1 - wicket/src/state/update.rs | 1 + wicket/src/ui/controls/mod.rs | 4 +- wicket/src/ui/main.rs | 3 +- wicket/src/ui/panes/overview.rs | 3 +- wicket/src/ui/panes/rack_setup.rs | 2 +- wicket/src/ui/panes/update.rs | 68 ++++++++++++++++------------ wicket/src/ui/splash.rs | 3 +- wicket/src/ui/widgets/fade.rs | 11 +---- wicket/src/ui/widgets/status_view.rs | 3 +- workspace-hack/Cargo.toml | 6 ++- 14 files changed, 96 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d531366f6..f5b80f9262 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -2772,6 +2778,10 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "headers" @@ -3595,15 +3605,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.0" @@ -3885,6 +3886,15 @@ dependencies = [ "zerocopy 0.6.4", ] +[[package]] +name = "lru" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7" +dependencies = [ + "hashbrown 0.14.2", +] + [[package]] name = "lru-cache" version = "0.1.2" @@ -5213,6 +5223,7 @@ dependencies = [ "getrandom 0.2.10", "group", "hashbrown 0.13.2", + "hashbrown 0.14.2", "hex", "hmac", "hyper 0.14.27", @@ -6631,16 +6642,18 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad" +checksum = "a5659e52e4ba6e07b2dad9f1158f578ef84a73762625ddb51536019f34d180eb" dependencies = [ "bitflags 2.4.0", "cassowary", "crossterm", "indoc 2.0.3", - "itertools 0.11.0", + "itertools 0.12.0", + "lru", "paste", + "stability", "strum", "unicode-segmentation", "unicode-width", @@ -8276,6 +8289,16 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "stability" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd1b177894da2a2d9120208c3386066af06a488255caabc5de8ddca22dbc3ce" +dependencies = [ + "quote", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -9351,9 +9374,9 @@ dependencies = [ [[package]] name = "tui-tree-widget" -version = "0.13.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01f9172fb8f4f925fb1e259c2f411be14af031ab8b35d517fd05cb78c0784d5" +checksum = "136011b328c4f392499a02c4b5b78d509fb297bf9c10f2bda5d11d65cb946e4c" dependencies = [ "ratatui", "unicode-width", diff --git a/Cargo.toml b/Cargo.toml index 5364f4b4e1..d47b5727ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -310,7 +310,7 @@ propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev proptest = "1.4.0" quote = "1.0" rand = "0.8.5" -ratatui = "0.23.0" +ratatui = "0.25.0" rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" @@ -396,6 +396,7 @@ trust-dns-server = "0.22" trybuild = "1.0.89" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } +tui-tree-widget = "0.16.0" unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } diff --git a/wicket/Cargo.toml b/wicket/Cargo.toml index efb8e51dff..140c011511 100644 --- a/wicket/Cargo.toml +++ b/wicket/Cargo.toml @@ -37,7 +37,7 @@ tokio = { workspace = true, features = ["full"] } tokio-util.workspace = true toml.workspace = true toml_edit.workspace = true -tui-tree-widget = "0.13.0" +tui-tree-widget.workspace = true unicode-width.workspace = true zeroize.workspace = true diff --git a/wicket/src/runner.rs b/wicket/src/runner.rs index 32fabde53e..e83d321459 100644 --- a/wicket/src/runner.rs +++ b/wicket/src/runner.rs @@ -34,7 +34,6 @@ use crate::{Action, Cmd, Event, KeyHandler, Recorder, State, TICK_INTERVAL}; // We can avoid a bunch of unnecessary type parameters by picking them ahead of time. pub type Term = Terminal>; -pub type Frame<'a> = ratatui::Frame<'a, CrosstermBackend>; const MAX_RECORDED_EVENTS: usize = 10000; diff --git a/wicket/src/state/update.rs b/wicket/src/state/update.rs index 6d8a168614..77bbdd83d2 100644 --- a/wicket/src/state/update.rs +++ b/wicket/src/state/update.rs @@ -333,6 +333,7 @@ impl UpdateItem { } } +#[derive(Debug, Copy, Clone)] pub enum UpdateState { NotStarted, Starting, diff --git a/wicket/src/ui/controls/mod.rs b/wicket/src/ui/controls/mod.rs index 4305fb5809..a2682b8052 100644 --- a/wicket/src/ui/controls/mod.rs +++ b/wicket/src/ui/controls/mod.rs @@ -2,8 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::{Action, Cmd, Frame, State}; -use ratatui::layout::Rect; +use crate::{Action, Cmd, State}; +use ratatui::{layout::Rect, Frame}; /// A [`Control`] is the an item on a screen that can be selected and interacted with. /// Control's render [`ratatui::widgets::Widget`]s when drawn. diff --git a/wicket/src/ui/main.rs b/wicket/src/ui/main.rs index 58ea6c1771..379cbd03af 100644 --- a/wicket/src/ui/main.rs +++ b/wicket/src/ui/main.rs @@ -8,11 +8,12 @@ use super::{Control, OverviewPane, RackSetupPane, StatefulList, UpdatePane}; use crate::ui::defaults::colors::*; use crate::ui::defaults::style; use crate::ui::widgets::Fade; -use crate::{Action, Cmd, Frame, State, Term}; +use crate::{Action, Cmd, State, Term}; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; use ratatui::style::{Modifier, Style}; use ratatui::text::{Line, Span}; use ratatui::widgets::{Block, BorderType, Borders, List, ListItem, Paragraph}; +use ratatui::Frame; use slog::{o, Logger}; use wicketd_client::types::GetLocationResponse; diff --git a/wicket/src/ui/panes/overview.rs b/wicket/src/ui/panes/overview.rs index e8cf50bb32..f2d4d4a7ab 100644 --- a/wicket/src/ui/panes/overview.rs +++ b/wicket/src/ui/panes/overview.rs @@ -16,11 +16,12 @@ use crate::ui::defaults::style; use crate::ui::widgets::IgnitionPopup; use crate::ui::widgets::{BoxConnector, BoxConnectorKind, Rack}; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use ratatui::layout::{Constraint, Direction, Layout, Rect}; use ratatui::style::Style; use ratatui::text::{Line, Span, Text}; use ratatui::widgets::{Block, BorderType, Borders, Paragraph}; +use ratatui::Frame; use wicketd_client::types::RotState; use wicketd_client::types::SpComponentCaboose; use wicketd_client::types::SpComponentInfo; diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 086d01ce9d..ab85c63819 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -16,7 +16,6 @@ use crate::ui::widgets::PopupScrollOffset; use crate::Action; use crate::Cmd; use crate::Control; -use crate::Frame; use crate::State; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -29,6 +28,7 @@ use ratatui::widgets::Block; use ratatui::widgets::BorderType; use ratatui::widgets::Borders; use ratatui::widgets::Paragraph; +use ratatui::Frame; use std::borrow::Cow; use wicketd_client::types::Baseboard; use wicketd_client::types::CurrentRssUserConfig; diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index d14b90dfab..be21984997 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -17,7 +17,7 @@ use crate::ui::widgets::{ PopupScrollOffset, StatusView, }; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use indexmap::IndexMap; use omicron_common::api::internal::nexus::KnownArtifactKind; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; @@ -26,6 +26,7 @@ use ratatui::widgets::{ Block, BorderType, Borders, Cell, List, ListItem, ListState, Paragraph, Row, Table, }; +use ratatui::Frame; use slog::{info, o, Logger}; use tui_tree_widget::{Tree, TreeItem, TreeState}; use update_engine::{ @@ -148,8 +149,11 @@ pub struct UpdatePane { /// TODO: Move following state into global `State` so that recorder snapshots /// capture all state. - tree_state: TreeState, - items: Vec>, + /// + /// TODO: The generic parameter is carried over from earlier versions + /// of tui-tree-widget, but there's likely a better index type. + tree_state: TreeState, + items: Vec>, // Per-component update state that isn't serializable. component_state: BTreeMap, @@ -175,14 +179,20 @@ impl UpdatePane { pub fn new(log: &Logger) -> UpdatePane { let log = log.new(o!("component" => "UpdatePane")); let mut tree_state = TreeState::default(); - tree_state.select_first(); + let items = ALL_COMPONENT_IDS + .iter() + .enumerate() + .map(|(index, id)| { + TreeItem::new(index, id.to_string_uppercase(), vec![]) + .expect("no children so no duplicate identifiers") + }) + .collect::>(); + tree_state.select_first(&items); + UpdatePane { log, tree_state, - items: ALL_COMPONENT_IDS - .iter() - .map(|id| TreeItem::new(id.to_string_uppercase(), vec![])) - .collect(), + items, help: vec![ ("Expand", ""), ("Collapse", ""), @@ -826,7 +836,8 @@ impl UpdatePane { .update_state .items .iter() - .map(|(id, states)| { + .enumerate() + .map(|(index, (id, states))| { let children: Vec<_> = states .iter() .flat_map(|(component, s)| { @@ -834,9 +845,8 @@ impl UpdatePane { artifact_version(id, component, &versions); let installed_versions = all_installed_versions(id, component, inventory); - let contents_rect = self.contents_rect; installed_versions.into_iter().map(move |v| { - let spans = vec![ + vec![ Span::styled(v.title, style::selected()), Span::styled(v.version, style::selected_line()), Span::styled( @@ -844,17 +854,20 @@ impl UpdatePane { style::selected(), ), Span::styled(s.to_string(), s.style()), - ]; - TreeItem::new_leaf(align_by( - 0, - MAX_COLUMN_WIDTH, - contents_rect, - spans, - )) + ] }) }) + .enumerate() + .map(|(leaf_index, spans)| { + let contents_rect = self.contents_rect; + TreeItem::new_leaf( + leaf_index, + align_by(0, MAX_COLUMN_WIDTH, contents_rect, spans), + ) + }) .collect(); - TreeItem::new(id.to_string_uppercase(), children) + TreeItem::new(index, id.to_string_uppercase(), children) + .expect("tree does not contain duplicate identifiers") }) .collect(); } @@ -1365,6 +1378,7 @@ impl UpdatePane { // Draw the contents let tree = Tree::new(self.items.clone()) + .expect("tree does not have duplicate identifiers") .block(block.clone().borders(Borders::LEFT | Borders::RIGHT)) .style(style::plain_text()) .highlight_style(style::highlighted()); @@ -1421,12 +1435,11 @@ impl UpdatePane { Constraint::Length(cell_width), Constraint::Length(cell_width), ]; - let header_table = Table::new(std::iter::empty()) + let header_table = Table::new(std::iter::empty(), &width_constraints) .header( Row::new(vec!["COMPONENT", "VERSION", "TARGET", "STATUS"]) .style(header_style), ) - .widths(&width_constraints) .block(block.clone().title("OVERVIEW (* = active)")); frame.render_widget(header_table, self.table_headers_rect); @@ -1458,12 +1471,11 @@ impl UpdatePane { ]) }) }); - let version_table = - Table::new(version_rows).widths(&width_constraints).block( - block - .clone() - .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), - ); + let version_table = Table::new(version_rows, &width_constraints).block( + block + .clone() + .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), + ); frame.render_widget(version_table, self.status_view_version_rect); // Ensure the version table is connected to the table headers @@ -2413,7 +2425,7 @@ impl Control for UpdatePane { Some(Action::Redraw) } Cmd::GotoTop => { - self.tree_state.select_first(); + self.tree_state.select_first(&self.items); state.rack_state.selected = ALL_COMPONENT_IDS[0]; Some(Action::Redraw) } diff --git a/wicket/src/ui/splash.rs b/wicket/src/ui/splash.rs index cc8ab0bff8..9da9fa8648 100644 --- a/wicket/src/ui/splash.rs +++ b/wicket/src/ui/splash.rs @@ -10,9 +10,10 @@ use super::defaults::colors::*; use super::defaults::dimensions::RectExt; use super::defaults::style; use super::widgets::{Logo, LogoState, LOGO_HEIGHT, LOGO_WIDTH}; -use crate::{Cmd, Frame, Term}; +use crate::{Cmd, Term}; use ratatui::style::Style; use ratatui::widgets::Block; +use ratatui::Frame; const TOTAL_FRAMES: usize = 100; diff --git a/wicket/src/ui/widgets/fade.rs b/wicket/src/ui/widgets/fade.rs index d1669cd5b7..5462a4ecf2 100644 --- a/wicket/src/ui/widgets/fade.rs +++ b/wicket/src/ui/widgets/fade.rs @@ -9,15 +9,6 @@ pub struct Fade {} impl Widget for Fade { fn render(self, area: Rect, buf: &mut Buffer) { - for x in area.left()..area.right() { - for y in area.top()..area.bottom() { - buf.set_string( - x, - y, - buf.get(x, y).symbol.clone(), - style::faded_background(), - ); - } - } + buf.set_style(area, style::faded_background()); } } diff --git a/wicket/src/ui/widgets/status_view.rs b/wicket/src/ui/widgets/status_view.rs index 7418fed512..b9e981c9bc 100644 --- a/wicket/src/ui/widgets/status_view.rs +++ b/wicket/src/ui/widgets/status_view.rs @@ -6,10 +6,9 @@ use ratatui::{ layout::{Alignment, Rect}, text::Text, widgets::{Block, Borders, List, Paragraph, StatefulWidget, Widget}, + Frame, }; -use crate::Frame; - use super::{BoxConnector, BoxConnectorKind}; /// A displayer for the status view. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b6d61d9ea5..a15b972554 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -54,7 +54,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } @@ -160,7 +161,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } From e85af5b1e51bba3f8d0c8eea633307b90a2cb2a8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:19 -0800 Subject: [PATCH 11/15] Update Rust crate uuid to 1.7.0 (#4885) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5b80f9262..2c58c7c33c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9705,9 +9705,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom 0.2.10", "serde", diff --git a/Cargo.toml b/Cargo.toml index d47b5727ad..f5e5a13e0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -401,7 +401,7 @@ unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.3" -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a15b972554..33434766d3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -114,7 +114,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -222,7 +222,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } From 7bd6ea60e504c288a63db79a0deb44944897e84c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:41 -0800 Subject: [PATCH 12/15] Update Rust crate serde_with to 3.5.1 (#4884) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c58c7c33c..bdb288be18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7693,9 +7693,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23" +checksum = "f5c9fdb6b00a489875b22efd4b78fe2b363b72265cc5f6eb2e2b9ee270e6140c" dependencies = [ "base64", "chrono", @@ -7710,9 +7710,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788" +checksum = "dbff351eb4b33600a2e138dfa0b10b65a238ea8ff8fb2387c422c5022a3e8298" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index f5e5a13e0c..fc8afecc9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -336,7 +336,7 @@ serde_json = "1.0.111" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.4.0" +serde_with = "3.5.1" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" From c392c76f0cc8a0bdaca5cf7690c6d95dd071e7db Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:38:34 -0800 Subject: [PATCH 13/15] Update Rust crate regex to 1.10.3 (#4883) --- Cargo.lock | 13 +++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 10 ++++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bdb288be18..2451404a1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5182,6 +5182,7 @@ name = "omicron-workspace-hack" version = "0.1.0" dependencies = [ "ahash", + "aho-corasick", "anyhow", "base16ct", "bit-set", @@ -5254,7 +5255,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "regex", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", "reqwest", "ring 0.17.7", @@ -6780,13 +6781,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", ] @@ -6804,9 +6805,9 @@ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" dependencies = [ "aho-corasick", "memchr", diff --git a/Cargo.toml b/Cargo.toml index fc8afecc9c..fb4327f575 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -315,7 +315,7 @@ rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" ref-cast = "1.0" -regex = "1.10.2" +regex = "1.10.3" regress = "0.7.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.7" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 33434766d3..cda4426c9b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -15,6 +15,7 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -83,8 +84,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } @@ -122,6 +123,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -190,8 +192,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } From 9ac047e54985d581a4e6b3f77077bd69d5ff20f5 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 24 Jan 2024 12:50:26 -0800 Subject: [PATCH 14/15] updates for tunnel routing (#3859) --- .github/buildomat/jobs/deploy.sh | 21 +++- Cargo.lock | 19 ++- Cargo.toml | 4 +- clients/ddm-admin-client/src/lib.rs | 18 ++- common/src/api/external/mod.rs | 2 +- docs/boundary-services-a-to-z.adoc | 117 ++---------------- docs/networking.adoc | 10 +- illumos-utils/src/opte/mod.rs | 20 --- illumos-utils/src/opte/port_manager.rs | 3 - nexus/src/app/rack.rs | 76 +----------- .../app/sagas/switch_port_settings_apply.rs | 84 +++++++++++++ .../app/sagas/switch_port_settings_clear.rs | 112 ++++++++++++++++- .../app/sagas/switch_port_settings_common.rs | 41 +----- package-manifest.toml | 12 +- sled-agent/src/bootstrap/early_networking.rs | 94 ++++++-------- smf/sled-agent/non-gimlet/config-rss.toml | 2 +- tools/ci_check_opte_ver.sh | 5 + tools/install_opte.sh | 10 ++ tools/maghemite_ddm_openapi_version | 4 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- tools/opte_version | 2 +- tools/opte_version_override | 5 + 23 files changed, 342 insertions(+), 327 deletions(-) create mode 100644 tools/opte_version_override diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index f4f1e0a999..e69cfb0078 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.27" +#: target = "lab-2.0-opte-0.28" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", @@ -33,6 +33,9 @@ _exit_trap() { local status=$? [[ $status -eq 0 ]] && exit 0 + # XXX paranoia + pfexec cp /tmp/opteadm /opt/oxide/opte/bin/opteadm + set +o errexit set -o xtrace banner evidence @@ -50,6 +53,7 @@ _exit_trap() { standalone \ dump-state pfexec /opt/oxide/opte/bin/opteadm list-ports + pfexec /opt/oxide/opte/bin/opteadm dump-v2b z_swadm link ls z_swadm addr list z_swadm route list @@ -97,6 +101,19 @@ z_swadm () { pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@ } +# XXX remove. This is just to test against a development branch of OPTE in CI. +set +x +OPTE_COMMIT="73d4669ea213d0b7aca35c4babb6fd09ed51d29e" +curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde +pfexec rem_drv xde || true +pfexec mv xde /kernel/drv/amd64/xde +pfexec add_drv xde || true +curl -sSfOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HM09S4M15WNXB2B2MX8R1GBT/yLalJU5vT4S4IEpwSeY4hPuspxw3JcINokZmlfNU14npHkzG/01HM09SJ2RQSFGW7MVKC9JKZ8D/01HM0A58D888AJ7YP6N1Q6T6ZD/opteadm +chmod +x opteadm +cp opteadm /tmp/opteadm +pfexec mv opteadm /opt/oxide/opte/bin/opteadm +set -x + # # XXX work around 14537 (UFS should not allow directories to be unlinked) which # is probably not yet fixed in xde branch? Once the xde branch merges from @@ -236,7 +253,7 @@ infra_ip_last = \"$UPLINK_IP\" /^routes/c\\ routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\] /^addresses/c\\ -addresses = \\[\"$UPLINK_IP/32\"\\] +addresses = \\[\"$UPLINK_IP/24\"\\] } " pkg/config-rss.toml diff -u pkg/config-rss.toml{~,} || true diff --git a/Cargo.lock b/Cargo.lock index 2451404a1e..0c3eb15179 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3257,7 +3257,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" [[package]] name = "illumos-utils" @@ -3657,7 +3657,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "quote", "syn 2.0.46", @@ -5411,7 +5411,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "cfg-if", "dyn-clone", @@ -5427,7 +5427,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -5439,7 +5439,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "libc", "libnet", @@ -5513,10 +5513,12 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ + "cfg-if", "illumos-sys-hdrs", "opte", + "poptrie", "serde", "smoltcp 0.11.0", "zerocopy 0.7.31", @@ -6153,6 +6155,11 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "poptrie" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/poptrie?branch=multipath#ca52bef3f87ff1a67d81b3c6e601dcb5fdbcc165" + [[package]] name = "portable-atomic" version = "1.4.3" diff --git a/Cargo.toml b/Cargo.toml index fb4327f575..093e972b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -270,7 +270,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -278,7 +278,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index 93248c73a1..c32345d1dc 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -20,7 +20,7 @@ pub use inner::types; pub use inner::Error; use either::Either; -use inner::types::Ipv6Prefix; +use inner::types::{Ipv6Prefix, TunnelOrigin}; use inner::Client as InnerClient; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -108,6 +108,22 @@ impl Client { }); } + pub fn advertise_tunnel_endpoint(&self, endpoint: TunnelOrigin) { + let me = self.clone(); + tokio::spawn(async move { + retry_notify(retry_policy_internal_service_aggressive(), || async { + me.inner.advertise_tunnel_endpoints(&vec![endpoint.clone()]).await?; + Ok(()) + }, |err, duration| { + info!( + me.log, + "Failed to notify ddmd of tunnel endpoint (retry in {duration:?}"; + "err" => %err, + ); + }).await.unwrap(); + }); + } + /// Returns the addresses of connected sleds. /// /// Note: These sleds have not yet been verified. diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 68fcb0f9fa..a8aff00afa 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -3253,7 +3253,7 @@ mod test { let net_des = serde_json::from_str::(&ser).unwrap(); assert_eq!(net, net_des); - let net_str = "fd00:99::1/64"; + let net_str = "fd00:47::1/64"; let net = IpNet::from_str(net_str).unwrap(); let ser = serde_json::to_string(&net).unwrap(); diff --git a/docs/boundary-services-a-to-z.adoc b/docs/boundary-services-a-to-z.adoc index 6f4f2fcea6..e4c47ac7f9 100644 --- a/docs/boundary-services-a-to-z.adoc +++ b/docs/boundary-services-a-to-z.adoc @@ -1,115 +1,18 @@ = Boundary Services A-Z -NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded into xref:how-to-run.adoc[the main how-to-run docs]. +NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded +into xref:how-to-run.adoc[the main how-to-run docs]. -The virtual hardware making up SoftNPU is a bit different than what was previously used. What we now have looks like this. +The virtual hardware making up SoftNPU is depicted in the diagram below. image::plumbing.png[] -The `softnpu` zone will be configured and launched during the `create_virtual_hardware.sh` script. +The `softnpu` zone will be configured and launched during the +`create_virtual_hardware.sh` script. Once the control plane is running, `softnpu` can be configured via `dendrite` -using the `swadm` binary located in the `oxz_switch` zone. -An example script is provided in `tools/scrimlet/softnpu-init.sh`. -This script should work without modification for basic development setups, -but feel free to tweak it as needed. - ----- -$ ./tools/scrimlet/softnpu-init.sh -++ netstat -rn -f inet -++ grep default -++ awk -F ' ' '{print $2}' -+ GATEWAY_IP=10.85.0.1 -+ echo 'Using 10.85.0.1 as gateway ip' -Using 10.85.0.1 as gateway ip -++ arp 10.85.0.1 -++ awk -F ' ' '{print $4}' -+ GATEWAY_MAC=68:d7:9a:1f:77:a1 -+ echo 'Using 68:d7:9a:1f:77:a1 as gateway mac' -Using 68:d7:9a:1f:77:a1 as gateway mac -+ z_swadm link create rear0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create rear0 --speed 100G --fec RS -+ z_swadm link create qsfp0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create qsfp0 --speed 100G --fec RS -+ z_swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ z_swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm addr add rear0/0 fd00:99::1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fd00:99::1 -+ z_swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ z_swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ z_swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ z_swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ z_swadm link ls -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link ls -Port/Link Media Speed FEC Enabled Link MAC -rear0/0 Copper 100G RS true Up a8:40:25:46:55:e3 -qsfp0/0 Copper 100G RS true Up a8:40:25:46:55:e4 -+ z_swadm addr list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr list -Link IPv4 IPv6 -rear0/0 fe80::aae1:deff:fe01:701c - fd00:99::1 -qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm route list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route list -Subnet Port Link Gateway -0.0.0.0/0 qsfp0 0 10.85.0.1 -fd00:1122:3344:101::/64 rear0 0 fe80::aae1:deff:fe00:1 -+ z_swadm arp list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp list -host mac age -10.85.0.1 68:d7:9a:1f:77:a1 0s -fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 0s ----- - -While following -https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run.adoc[how-to-run.adoc] -to set up IPs, images, disks, instances etc, pay particular attention to the -following. - -- The address range in the IP pool should be on a subnet in your local network that - can NAT out to the Internet. -- Be sure to set up an external IP for the instance you create. - -You will need to set up `proxy-arp` if your VM external IP addresses are on the -same L2 network as the router or other non-oxide hosts: ----- -pfexec /opt/oxide/softnpu/stuff/scadm \ - --server /opt/oxide/softnpu/stuff/server \ - --client /opt/oxide/softnpu/stuff/client \ - standalone \ - add-proxy-arp \ - $ip_pool_start \ - $ip_pool_end \ - $softnpu_mac ----- - -By the end, we have an instance up and running with external connectivity -configured via boundary services: ----- -ry@korgano:~/omicron$ ~/propolis/target/release/propolis-cli --server fd00:1122:3344:101::c serial - -debian login: root -Linux debian 5.10.0-9-amd64 #1 SMP Debian 5.10.70-1 (2021-09-30) x86_64 - -The programs included with the Debian GNU/Linux system are free software; -the exact distribution terms for each program are described in the -individual files in /usr/share/doc/*/copyright. - -Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent -permitted by applicable law. -root@debian:~# host oxide.computer -oxide.computer has address 76.76.21.61 -oxide.computer has address 76.76.21.22 -oxide.computer mail is handled by 5 alt2.aspmx.l.google.com. -oxide.computer mail is handled by 1 aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx3.googlemail.com. -oxide.computer mail is handled by 5 alt1.aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx2.googlemail.com. ----- +using the `swadm` binary located in the `oxz_switch` zone. This is not necessary +under normal operation, as the switch state will be managed automatically by the +control plane and networking daemons. An example script is provided in +`tools/scrimlet/softnpu-init.sh`. This script should work without modification +for basic development setups, but feel free to tweak it as needed. diff --git a/docs/networking.adoc b/docs/networking.adoc index 2ebad97842..84c95832c0 100644 --- a/docs/networking.adoc +++ b/docs/networking.adoc @@ -669,13 +669,13 @@ fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:c UG 2 640 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:c UG 2 2401 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:40c UG 2 51 cxgbe1 fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:40c UG 2 11090 cxgbe1 -fd00:99::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 +fdb2:ceeb:3ab7:8c9d::1/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:393::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:191::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:c UG 2 634578 cxgbe0 -fd00:99::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 +fd96:354:c1dc:606d::1/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:40c UG 2 14094545 cxgbe1 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 @@ -733,7 +733,11 @@ fd00:1122:3344:3::/64 fe80::aa40:25ff:fe05:c UG 2 2437 cxgbe0 Recall that cxgbe0 and cxgbe1 are connected to separate switches in the rack. So we're seeing the prefixes for the other sleds in this deployment. We have two routes to reach each sled: one through each switch. The gateway is the link-local address _of each switch_ on the corresponding link. One notable exception: the route for this same sled (`fd00:1122:3344:104::/64`) points to `underlay0`, the GZ's VNIC on the sled's underlay network. In this way, traffic leaving the GZ (whether it originated in this GZ or arrived from one of the switches) is directed to the sled's underlay network etherstub and from there to the right zone VNIC. -(Questions: Why does 107 only have one route? What are the `fd00:99::` routes?) +(Questions: Why does 107 only have one route?) + +The `fdb2:ceeb:3ab7:8c9d::1/64` and `fd96:354:c1dc:606d::1/64` routes are +randomly generated boundary services tunnel endpoint addresses. See RFD 404 for +more details. There are similar routes for other sleds' prefixes on the bootstrap network. diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index 710e783181..d06b6b26e5 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -29,26 +29,6 @@ pub use oxide_vpc::api::DhcpCfg; pub use oxide_vpc::api::Vni; use std::net::IpAddr; -fn default_boundary_services() -> BoundaryServices { - use oxide_vpc::api::Ipv6Addr; - use oxide_vpc::api::MacAddr; - // TODO-completeness: Don't hardcode any of these values. - // - // Boundary Services will be started on several Sidecars during rack - // setup, and those addresses and VNIs will need to be propagated here. - // See https://github.com/oxidecomputer/omicron/issues/1382 - let ip = Ipv6Addr::from([0xfd00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]); - - // This MAC address is entirely irrelevant to the functionality of OPTE and - // the Oxide VPC. It's never used to actually forward packets. It only - // represents the "logical" destination of Boundary Services as a - // destination that OPTE as a virtual gateway forwards packets to as its - // next hop. - let mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xf9, 0x99, 0x99]); - let vni = Vni::new(99_u32).unwrap(); - BoundaryServices { ip, mac, vni } -} - /// Information about the gateway for an OPTE port #[derive(Debug, Clone, Copy)] #[allow(dead_code)] diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 3558ef1c78..c472996598 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,7 +4,6 @@ //! Manager for all OPTE ports on a Helios system -use crate::opte::default_boundary_services; use crate::opte::opte_firewall_rules; use crate::opte::params::DeleteVirtualNetworkInterfaceHost; use crate::opte::params::SetVirtualNetworkInterfaceHost; @@ -110,7 +109,6 @@ impl PortManager { let subnet = IpNetwork::from(nic.subnet); let vpc_subnet = IpCidr::from(subnet); let gateway = Gateway::from_subnet(&subnet); - let boundary_services = default_boundary_services(); // Describe the external IP addresses for this port. macro_rules! ip_cfg { @@ -219,7 +217,6 @@ impl PortManager { gateway_mac: MacAddr::from(gateway.mac.into_array()), vni, phys_ip: self.inner.underlay_ip.into(), - boundary_services, }; // Create the xde device. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 23ee39415f..17e7a17444 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::params::{ - AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, + AddressLotCreate, BgpPeerConfig, Route, SiloCreate, SwitchPortSettingsCreate, }; use nexus_types::external_api::shared::Baseboard; @@ -375,24 +375,7 @@ impl super::Nexus { let ipv4_block = AddressLotBlockCreate { first_address, last_address }; - let first_address = - IpAddr::from_str("fd00:99::1").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::1` as `IpAddr`: {e}" - )) - })?; - - let last_address = - IpAddr::from_str("fd00:99::ffff").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::ffff` as `IpAddr`: {e}" - )) - })?; - - let ipv6_block = - AddressLotBlockCreate { first_address, last_address }; - - let blocks = vec![ipv4_block, ipv6_block]; + let blocks = vec![ipv4_block]; let address_lot_params = AddressLotCreate { identity, kind, blocks }; @@ -412,24 +395,6 @@ impl super::Nexus { }, }?; - let address_lot_lookup = self - .address_lot_lookup( - &opctx, - NameOrId::Name(address_lot_name.clone()), - ) - .map_err(|e| { - Error::internal_error(&format!( - "unable to lookup infra address_lot: {e}" - )) - })?; - - let (.., authz_address_lot) = address_lot_lookup - .lookup_for(authz::Action::Modify) - .await - .map_err(|e| { - Error::internal_error(&format!("unable to retrieve authz_address_lot for infra address_lot: {e}")) - })?; - let mut bgp_configs = HashMap::new(); for bgp_config in &rack_network_config.bgp { @@ -542,43 +507,6 @@ impl super::Nexus { )) })?; - // TODO: #3603 Use separate address lots for loopback addresses and infra ips - let loopback_address_params = LoopbackAddressCreate { - address_lot: NameOrId::Name(address_lot_name.clone()), - rack_id, - switch_location: switch_location.clone(), - address: first_address, - mask: 64, - anycast: true, - }; - - if self - .loopback_address_lookup( - &opctx, - rack_id, - switch_location.clone().into(), - ipnetwork::IpNetwork::new( - loopback_address_params.address, - loopback_address_params.mask, - ) - .map_err(|_| { - Error::invalid_request("invalid loopback address") - })? - .into(), - )? - .lookup_for(authz::Action::Read) - .await - .is_err() - { - self.db_datastore - .loopback_address_create( - opctx, - &loopback_address_params, - None, - &authz_address_lot, - ) - .await?; - } let uplink_name = format!("default-uplink{idx}"); let name = Name::from_str(&uplink_name).unwrap(); diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 0d6bb52421..979ec54afd 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -15,6 +15,10 @@ use crate::app::sagas::{ use anyhow::Error; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::PortId; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteStaticRoute4Request, Prefix4, StaticRoute4, + StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; @@ -52,6 +56,10 @@ declare_saga_actions! { + spa_ensure_switch_port_settings - spa_undo_ensure_switch_port_settings } + ENSURE_SWITCH_ROUTES -> "ensure_switch_routes" { + + spa_ensure_switch_routes + - spa_undo_ensure_switch_routes + } ENSURE_SWITCH_PORT_UPLINK -> "ensure_switch_port_uplink" { + spa_ensure_switch_port_uplink - spa_undo_ensure_switch_port_uplink @@ -210,6 +218,82 @@ async fn spa_ensure_switch_port_settings( Ok(()) } +async fn spa_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route add {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_undo_ensure_switch_port_settings( sagactx: NexusActionContext, ) -> Result<(), Error> { diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index 0d876f8159..ff79de8e8e 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -15,12 +15,16 @@ use crate::app::sagas::{ }; use anyhow::Error; use dpd_client::types::PortId; -use mg_admin_client::types::DeleteNeighborRequest; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteNeighborRequest, DeleteStaticRoute4Request, + Prefix4, StaticRoute4, StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::authn; use nexus_db_queries::db::datastore::UpdatePrecondition; use omicron_common::api::external::{self, NameOrId, SwitchLocation}; use serde::{Deserialize, Serialize}; +use std::net::IpAddr; use std::str::FromStr; use std::sync::Arc; use steno::ActionError; @@ -43,6 +47,10 @@ declare_saga_actions! { + spa_clear_switch_port_settings - spa_undo_clear_switch_port_settings } + CLEAR_SWITCH_PORT_ROUTES -> "clear_switch_port_routes" { + + spa_clear_switch_port_routes + - spa_undo_clear_switch_port_routes + } CLEAR_SWITCH_PORT_UPLINK -> "clear_switch_port_uplink" { + spa_clear_switch_port_uplink - spa_undo_clear_switch_port_uplink @@ -351,6 +359,108 @@ async fn spa_undo_clear_switch_port_bgp_settings( .await?) } +async fn spa_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_clear_switch_port_bootstore_network_settings( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index 9ef23ebf44..9c710d837d 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -1,12 +1,14 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + use super::NexusActionContext; use crate::app::map_switch_zone_addrs; use crate::Nexus; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::{ LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, - RouteSettingsV4, RouteSettingsV6, }; -use dpd_client::{Ipv4Cidr, Ipv6Cidr}; use internal_dns::ServiceName; use ipnetwork::IpNetwork; use mg_admin_client::types::Prefix4; @@ -85,41 +87,6 @@ pub(crate) fn api_to_dpd_port_settings( ); } - for r in &settings.routes { - match &r.dst { - IpNetwork::V4(n) => { - let gw = match r.gw.ip() { - IpAddr::V4(gw) => gw, - IpAddr::V6(_) => { - return Err( - "IPv4 destination cannot have IPv6 nexthop".into() - ) - } - }; - dpd_port_settings.v4_routes.insert( - Ipv4Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop: gw }], - ); - } - IpNetwork::V6(n) => { - let gw = match r.gw.ip() { - IpAddr::V6(gw) => gw, - IpAddr::V4(_) => { - return Err( - "IPv6 destination cannot have IPv4 nexthop".into() - ) - } - }; - dpd_port_settings.v6_routes.insert( - Ipv6Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop: gw }], - ); - } - } - } - Ok(dpd_port_settings) } diff --git a/package-manifest.toml b/package-manifest.toml index fa6bba7a96..3525b121e4 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -438,10 +438,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" +source.sha256 = "1cf9cb514d11275d93c4e4760500539a778f23039374508ca07528fcaf0ba3f8" output.type = "tarball" [package.mg-ddm] @@ -454,10 +454,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" +source.sha256 = "a9b959b4287ac2ec7b45ed99ccd00e1f134b8e3d501099cd669cee5de9525ae3" output.type = "zone" output.intermediate_only = true @@ -469,10 +469,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" +source.sha256 = "ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 75958a2f37..acad2b8d3c 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -6,21 +6,23 @@ use anyhow::{anyhow, Context}; use bootstore::schemes::v0 as bootstore; -use ddm_admin_client::{Client as DdmAdminClient, DdmError}; -use dpd_client::types::{Ipv6Entry, RouteSettingsV6}; +use ddm_admin_client::DdmError; use dpd_client::types::{ - LinkCreate, LinkId, LinkSettings, PortId, PortSettings, RouteSettingsV4, + LinkCreate, LinkId, LinkSettings, PortId, PortSettings, }; use dpd_client::Client as DpdClient; use futures::future; use gateway_client::Client as MgsClient; use internal_dns::resolver::{ResolveError, Resolver as DnsResolver}; use internal_dns::ServiceName; -use ipnetwork::{IpNetwork, Ipv6Network}; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, Prefix4}; +use ipnetwork::Ipv6Network; +use mg_admin_client::types::{ + AddStaticRoute4Request, ApplyRequest, BgpPeerConfig, Prefix4, StaticRoute4, + StaticRoute4List, +}; use mg_admin_client::Client as MgdClient; -use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; -use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::internal::shared::{ BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, SwitchLocation, UplinkConfig, @@ -38,7 +40,6 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::time::{Duration, Instant}; use thiserror::Error; -static BOUNDARY_SERVICES_ADDR: &str = "fd00:99::1"; const BGP_SESSION_RESOLUTION: u64 = 100; /// Errors that can occur during early network setup @@ -421,22 +422,11 @@ impl<'a> EarlyNetworkSetup<'a> { // configure uplink for each requested uplink in configuration that // matches our switch_location for port_config in &our_ports { - let (ipv6_entry, dpd_port_settings, port_id) = + let (dpd_port_settings, port_id) = self.build_port_config(port_config)?; self.wait_for_dendrite(&dpd).await; - info!( - self.log, - "Configuring boundary services loopback address on switch"; - "config" => #?ipv6_entry - ); - dpd.loopback_ipv6_create(&ipv6_entry).await.map_err(|e| { - EarlyNetworkSetupError::Dendrite(format!( - "unable to create inital switch loopback address: {e}" - )) - })?; - info!( self.log, "Configuring default uplink on switch"; @@ -453,13 +443,6 @@ impl<'a> EarlyNetworkSetup<'a> { "unable to apply uplink port configuration: {e}" )) })?; - - info!(self.log, "advertising boundary services loopback address"); - - let ddmd_addr = - SocketAddrV6::new(switch_zone_underlay_ip, DDMD_PORT, 0, 0); - let ddmd_client = DdmAdminClient::new(&self.log, ddmd_addr)?; - ddmd_client.advertise_prefix(Ipv6Subnet::new(ipv6_entry.addr)); } let mgd = MgdClient::new( @@ -548,22 +531,40 @@ impl<'a> EarlyNetworkSetup<'a> { } } + // Iterate through ports and apply static routing config. + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for port in &our_ports { + for r in &port.routes { + let nexthop = match r.nexthop { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.destination.ip() { + IpAddr::V4(v4) => { + Prefix4 { value: v4, length: r.destination.prefix() } + } + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + } + mgd.inner.static_add_v4_route(&rq).await.map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "static routing configuration failed: {e}", + )) + })?; + Ok(our_ports) } fn build_port_config( &self, port_config: &PortConfigV1, - ) -> Result<(Ipv6Entry, PortSettings, PortId), EarlyNetworkSetupError> { + ) -> Result<(PortSettings, PortId), EarlyNetworkSetupError> { info!(self.log, "Building Port Configuration"); - let ipv6_entry = Ipv6Entry { - addr: BOUNDARY_SERVICES_ADDR.parse().map_err(|e| { - EarlyNetworkSetupError::BadConfig(format!( - "failed to parse `BOUNDARY_SERVICES_ADDR` as `Ipv6Addr`: {e}" - )) - })?, - tag: OMICRON_DPD_TAG.into(), - }; let mut dpd_port_settings = PortSettings { links: HashMap::new(), v4_routes: HashMap::new(), @@ -600,26 +601,7 @@ impl<'a> EarlyNetworkSetup<'a> { )) })?; - for r in &port_config.routes { - if let (IpNetwork::V4(dst), IpAddr::V4(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v4_routes.insert( - dst.to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop }], - ); - } - if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v6_routes.insert( - dst.to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop }], - ); - } - } - - Ok((ipv6_entry, dpd_port_settings, port_id)) + Ok((dpd_port_settings, port_id)) } async fn wait_for_dendrite(&self, dpd: &DpdClient) { diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index fdc81c0f8f..12cb2afd24 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -103,7 +103,7 @@ bgp = [] # Routes associated with this port. routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. -addresses = ["192.168.1.30/32"] +addresses = ["192.168.1.30/24"] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 26382690e1..7f05ec1f36 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -1,6 +1,11 @@ #!/bin/bash set -euo pipefail +source tools/opte_version_override +if [[ "x$OPTE_COMMIT" != "x" ]]; then + exit 0 +fi + # Grab all the oxidecomputer/opte dependencies' revisions readarray -t opte_deps_revs < <(toml get Cargo.toml workspace.dependencies | jq -r 'to_entries | .[] | select(.value.git? | contains("oxidecomputer/opte")?) | .value.rev') OPTE_REV="${opte_deps_revs[0]}" diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 20a33b05a5..b572c305a7 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -97,3 +97,13 @@ if [[ "$RC" -ne 0 ]]; then echo "The \`opteadm\` administration tool is not on your path." echo "You may add \"/opt/oxide/opte/bin\" to your path to access it." fi + +source $OMICRON_TOP/tools/opte_version_override + +if [[ "x$OPTE_COMMIT" != "x" ]]; then + set +x + curl -fOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde + pfexec rem_drv xde || true + pfexec mv xde /kernel/drv/amd64/xde + pfexec add_drv xde || true +fi diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 37c099d7f5..be8772b7e6 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 329c05fc42..6bf1999c61 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="931efa310d972b1f8afba2308751fc6a2035afbaebba77b3a40a8358c123ba3c" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 1d3cf98f94..b5fe84b662 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" -MGD_LINUX_SHA256="1bcadfd700902e3640843e0bb53d3defdbcd8d86c3279efa0953ae8d6437e2b0" \ No newline at end of file +CIDL_SHA256="ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file diff --git a/tools/opte_version b/tools/opte_version index 82d79dcf28..0a04873e11 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.214 +0.28.215 diff --git a/tools/opte_version_override b/tools/opte_version_override new file mode 100644 index 0000000000..80a6529b24 --- /dev/null +++ b/tools/opte_version_override @@ -0,0 +1,5 @@ +#!/bin/bash + +# only set this if you want to override the version of opte/xde installed by the +# install_opte.sh script +OPTE_COMMIT="" From cc643045191d92e539faa839d662a4198d81d718 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 24 Jan 2024 21:05:37 +0000 Subject: [PATCH 15/15] Live attach/detach of external IPs (#4694) This PR adds new endpoints to attach and detach external IPs to/from an individual instance at runtime, when instances are either stopped or started. These new endpoints are: * POST `/v1/floating-ips/{floating_ip}/attach` * POST `/v1/floating-ips/{floating_ip}/detach` * POST `/v1/instances/{instance}/external-ips/ephemeral` * DELETE `/v1/instances/{instance}/external-ips/ephemeral` These follow and enforce the same rules as external IPs registered during instance creation: at most one ephemeral IP, and at most 32 external IPs total. `/v1/floating-ips/{floating_ip}/attach` includes a `kind` field to account for future API resources which a FIP may be bound to -- such as internet gateways, load balancers, and services. ## Interaction with other instance lifecycle changes and sagas Both external IP modify sagas begin with an atomic update to external IP attach state conditioned on $\mathit{state}\in[ \mathit{started},\mathit{stopped}]$. As a result, we know that an external IP saga can only ever start before any other instance state change occurs. We then only need to think about how these other sagas/events must behave when called *during* an attach/detach, keeping in mind that these are worst-case orderings: attach/detach are likely to complete quickly. ### Instance start & migrate Both of these sagas alter an instance's functional sled ID, which controls whether NAT entry insertion and OPTE port state updates are performed. If an IP attach/detach is incomplete when either saga reaches `instance_ensure_dpd_config` or `instance_ensure_registered` (e.g., any IP associated with the target instance is in attaching/detaching state), the start/migrate will unwind with an HTTP 503. Generally, neither should undo in practice since IP attach/detach are fast operations -- particularly when an instance is formerly stopped. This is used solely to guarantee that only one saga is accessing a given external IP at a time, and that the update target remains unchanged. ### Instance stop & delete These operations are either not sagaized (stop), or cannot unwind (delete), and so we cannot block them using IP attach state. IP attach/detach will unwind if a given sled-agent is no longer responsible for an instance. Instance delete will force-detach IP addresses bound to an instance, and if this is seen then IP attach will deliberately unwind to potentially clean up NAT state. OPTE/DPD undo operations are best-effort in such a case to prevent stuck sagas. Instance stop and IP attach may interleave such that the latter adds additional NAT entries after other network state is cleared. Because we cannot unwind in this case, `instance_ensure_dpd_config` will now attempt to remove leftover conflicting RPW entries if they are detected, since we know they are a deviation from intended state. ## Additional/supporting changes * Pool/floating IP specifiers in instance create now take `NameOrId`, parameter names changed to match. * External IP create/bind in instance create no longer double-resolves name on saga unwind. * `views::ExternalIp` can now contain `FloatingIp` body. * DPD NAT insert/remove functions now perform single rule update via ID instead of index into the EIP list -- index-based was unstable under live addition/removal. * NAT RPW ensure is now more authoritative, and will remove conflicting entries if an initial insert fails. * Pool `NameOrId` resolution for floating IP allocation pulled up from `Datastore` into `Nexus`. --- Closes #4630 and #4628. --- dev-tools/omdb/src/bin/omdb/db.rs | 3 + end-to-end-tests/src/instance_launch.rs | 13 +- illumos-utils/src/opte/illumos.rs | 10 + illumos-utils/src/opte/non_illumos.rs | 10 + illumos-utils/src/opte/port_manager.rs | 116 +++ nexus/db-model/src/external_ip.rs | 120 ++- nexus/db-model/src/instance.rs | 17 +- nexus/db-model/src/instance_state.rs | 6 + nexus/db-model/src/ipv4_nat_entry.rs | 3 +- nexus/db-model/src/macaddr.rs | 13 +- nexus/db-model/src/schema.rs | 1 + nexus/db-queries/src/db/datastore/disk.rs | 4 +- .../src/db/datastore/external_ip.rs | 707 +++++++++++++++--- nexus/db-queries/src/db/datastore/instance.rs | 102 +-- .../src/db/datastore/ipv4_nat_entry.rs | 29 +- nexus/db-queries/src/db/datastore/mod.rs | 26 +- nexus/db-queries/src/db/pool_connection.rs | 1 + .../db-queries/src/db/queries/external_ip.rs | 131 +++- nexus/src/app/external_ip.rs | 84 ++- nexus/src/app/instance.rs | 78 ++ nexus/src/app/instance_network.rs | 384 ++++++---- nexus/src/app/mod.rs | 4 +- nexus/src/app/sagas/instance_common.rs | 336 ++++++++- nexus/src/app/sagas/instance_create.rs | 141 +++- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_ip_attach.rs | 583 +++++++++++++++ nexus/src/app/sagas/instance_ip_detach.rs | 551 ++++++++++++++ nexus/src/app/sagas/instance_start.rs | 27 +- nexus/src/app/sagas/mod.rs | 8 + nexus/src/external_api/http_entrypoints.rs | 140 ++++ nexus/test-utils/src/resource_helpers.rs | 4 +- nexus/tests/integration_tests/disks.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 62 +- nexus/tests/integration_tests/external_ips.rs | 490 +++++++++++- nexus/tests/integration_tests/instances.rs | 49 +- .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/output/nexus_tags.txt | 4 + nexus/types/src/external_api/params.rs | 45 +- nexus/types/src/external_api/shared.rs | 4 +- nexus/types/src/external_api/views.rs | 53 +- openapi/nexus.json | 363 ++++++++- openapi/sled-agent.json | 115 +++ schema/crdb/25.0.0/up01.sql | 6 + schema/crdb/25.0.0/up02.sql | 4 + schema/crdb/25.0.0/up03.sql | 7 + schema/crdb/25.0.0/up04.sql | 7 + schema/crdb/25.0.0/up05.sql | 2 + schema/crdb/25.0.0/up06.sql | 4 + schema/crdb/25.0.0/up07.sql | 4 + schema/crdb/25.0.0/up08.sql | 2 + schema/crdb/25.0.0/up09.sql | 4 + schema/crdb/dbinit.sql | 38 +- sled-agent/src/http_entrypoints.rs | 36 +- sled-agent/src/instance.rs | 154 +++- sled-agent/src/instance_manager.rs | 37 + sled-agent/src/params.rs | 10 + sled-agent/src/sim/http_entrypoints.rs | 41 +- sled-agent/src/sim/sled_agent.rs | 64 +- sled-agent/src/sled_agent.rs | 40 +- 59 files changed, 4776 insertions(+), 525 deletions(-) create mode 100644 nexus/src/app/sagas/instance_ip_attach.rs create mode 100644 nexus/src/app/sagas/instance_ip_detach.rs create mode 100644 schema/crdb/25.0.0/up01.sql create mode 100644 schema/crdb/25.0.0/up02.sql create mode 100644 schema/crdb/25.0.0/up03.sql create mode 100644 schema/crdb/25.0.0/up04.sql create mode 100644 schema/crdb/25.0.0/up05.sql create mode 100644 schema/crdb/25.0.0/up06.sql create mode 100644 schema/crdb/25.0.0/up07.sql create mode 100644 schema/crdb/25.0.0/up08.sql create mode 100644 schema/crdb/25.0.0/up09.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 23e9206506..a465183351 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -44,6 +44,7 @@ use nexus_db_model::ExternalIp; use nexus_db_model::HwBaseboardId; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::IpAttachState; use nexus_db_model::Project; use nexus_db_model::Region; use nexus_db_model::RegionSnapshot; @@ -1705,6 +1706,7 @@ async fn cmd_db_eips( ip: ipnetwork::IpNetwork, ports: PortRange, kind: String, + state: IpAttachState, owner: Owner, } @@ -1789,6 +1791,7 @@ async fn cmd_db_eips( first: ip.first_port.into(), last: ip.last_port.into(), }, + state: ip.state, kind: format!("{:?}", ip.kind), owner, }; diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index b3d1406070..2efd66bf91 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -5,9 +5,9 @@ use anyhow::{ensure, Context as _, Result}; use async_trait::async_trait; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ - ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount, - InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, - SshKeyCreate, + ByteCount, DiskCreate, DiskSource, ExternalIp, ExternalIpCreate, + InstanceCpuCount, InstanceCreate, InstanceDiskAttachment, + InstanceNetworkInterfaceAttachment, SshKeyCreate, }; use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt}; use russh::{ChannelMsg, Disconnect}; @@ -70,7 +70,7 @@ async fn instance_launch() -> Result<()> { name: disk_name.clone(), }], network_interfaces: InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![ExternalIpCreate::Ephemeral { pool_name: None }], + external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }], user_data: String::new(), start: true, }) @@ -87,7 +87,10 @@ async fn instance_launch() -> Result<()> { .items .first() .context("no external IPs")? - .ip; + .clone(); + let ExternalIp::Ephemeral { ip: ip_addr } = ip_addr else { + anyhow::bail!("IP bound to instance was not ephemeral as required.") + }; eprintln!("instance external IP: {}", ip_addr); // poll serial for login prompt, waiting 5 min max diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs index 88e8d343b1..527172b976 100644 --- a/illumos-utils/src/opte/illumos.rs +++ b/illumos-utils/src/opte/illumos.rs @@ -11,6 +11,7 @@ use omicron_common::api::internal::shared::NetworkInterfaceKind; use opte_ioctl::OpteHdl; use slog::info; use slog::Logger; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -46,6 +47,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } /// Delete all xde devices on the system. diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs index ccd4990d5f..bf61249fb1 100644 --- a/illumos-utils/src/opte/non_illumos.rs +++ b/illumos-utils/src/opte/non_illumos.rs @@ -8,6 +8,7 @@ use slog::Logger; use crate::addrobj::AddrObject; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -16,6 +17,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } pub fn initialize_xde_driver( diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index c472996598..2b2f622070 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -28,6 +28,7 @@ use oxide_vpc::api::MacAddr; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; use oxide_vpc::api::SNat6Cfg; +use oxide_vpc::api::SetExternalIpsReq; use oxide_vpc::api::VpcCfg; use slog::debug; use slog::error; @@ -398,6 +399,121 @@ impl PortManager { Ok((port, ticket)) } + /// Ensure external IPs for an OPTE port are up to date. + #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] + pub fn external_ips_ensure( + &self, + nic_id: Uuid, + nic_kind: NetworkInterfaceKind, + source_nat: Option, + ephemeral_ip: Option, + floating_ips: &[IpAddr], + ) -> Result<(), Error> { + let ports = self.inner.ports.lock().unwrap(); + let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| { + Error::ExternalIpUpdateMissingPort(nic_id, nic_kind) + })?; + + // XXX: duplicates parts of macro logic in `create_port`. + macro_rules! ext_ip_cfg { + ($ip:expr, $log_prefix:literal, $ip_t:path, $cidr_t:path, + $ipcfg_e:path, $ipcfg_t:ident, $snat_t:ident) => {{ + let snat = match source_nat { + Some(snat) => { + let $ip_t(snat_ip) = snat.ip else { + error!( + self.inner.log, + concat!($log_prefix, " SNAT config"); + "snat_ip" => ?snat.ip, + ); + return Err(Error::InvalidPortIpConfig); + }; + let ports = snat.first_port..=snat.last_port; + Some($snat_t { external_ip: snat_ip.into(), ports }) + } + None => None, + }; + let ephemeral_ip = match ephemeral_ip { + Some($ip_t(ip)) => Some(ip.into()), + Some(_) => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + return Err(Error::InvalidPortIpConfig); + } + None => None, + }; + let floating_ips: Vec<_> = floating_ips + .iter() + .copied() + .map(|ip| match ip { + $ip_t(ip) => Ok(ip.into()), + _ => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + Err(Error::InvalidPortIpConfig) + } + }) + .collect::, _>>()?; + + ExternalIpCfg { + ephemeral_ip, + snat, + floating_ips, + } + }} + } + + // TODO-completeness: support dual-stack. We'll need to explicitly store + // a v4 and a v6 ephemeral IP + SNat + gateway + ... in `InstanceInner` + // to have enough info to build both. + let mut v4_cfg = None; + let mut v6_cfg = None; + match port.gateway().ip { + IpAddr::V4(_) => { + v4_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv4", + IpAddr::V4, + IpCidr::Ip4, + IpCfg::Ipv4, + Ipv4Cfg, + SNat4Cfg + )) + } + IpAddr::V6(_) => { + v6_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv6", + IpAddr::V6, + IpCidr::Ip6, + IpCfg::Ipv6, + Ipv6Cfg, + SNat6Cfg + )) + } + } + + let req = SetExternalIpsReq { + port_name: port.name().into(), + external_ips_v4: v4_cfg, + external_ips_v6: v6_cfg, + }; + + #[cfg(target_os = "illumos")] + let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; + + #[cfg(target_os = "illumos")] + hdl.set_external_ips(&req)?; + + Ok(()) + } + #[cfg(target_os = "illumos")] pub fn firewall_rules_ensure( &self, diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index e95185658f..1e9def4182 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -23,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::InstanceExternalIpBody; use std::convert::TryFrom; use std::net::IpAddr; use uuid::Uuid; @@ -32,7 +33,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "ip_kind", schema = "public"))] pub struct IpKindEnum; - #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] #[diesel(sql_type = IpKindEnum)] pub enum IpKind; @@ -41,6 +42,42 @@ impl_enum_type!( Floating => b"floating" ); +impl_enum_type!( + #[derive(SqlType, Debug, Clone, Copy, QueryId)] + #[diesel(postgres_type(name = "ip_attach_state"))] + pub struct IpAttachStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] + #[diesel(sql_type = IpAttachStateEnum)] + pub enum IpAttachState; + + Detached => b"detached" + Attached => b"attached" + Detaching => b"detaching" + Attaching => b"attaching" +); + +impl std::fmt::Display for IpAttachState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpAttachState::Detached => "Detached", + IpAttachState::Attached => "Attached", + IpAttachState::Detaching => "Detaching", + IpAttachState::Attaching => "Attaching", + }) + } +} + +impl std::fmt::Display for IpKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpKind::Floating => "floating", + IpKind::Ephemeral => "ephemeral", + IpKind::SNat => "SNAT", + }) + } +} + /// The main model type for external IP addresses for instances /// and externally-facing services. /// @@ -51,7 +88,9 @@ impl_enum_type!( /// addresses and port ranges, while source NAT IPs are not discoverable in the /// API at all, and only provide outbound connectivity to instances, not /// inbound. -#[derive(Debug, Clone, Selectable, Queryable, Insertable)] +#[derive( + Debug, Clone, Selectable, Queryable, Insertable, Deserialize, Serialize, +)] #[diesel(table_name = external_ip)] pub struct ExternalIp { pub id: Uuid, @@ -76,6 +115,7 @@ pub struct ExternalIp { pub last_port: SqlU16, // Only Some(_) for instance Floating IPs pub project_id: Option, + pub state: IpAttachState, } /// A view type constructed from `ExternalIp` used to represent Floating IP @@ -125,6 +165,7 @@ pub struct IncompleteExternalIp { parent_id: Option, pool_id: Uuid, project_id: Option, + state: IpAttachState, // Optional address requesting that a specific IP address be allocated. explicit_ip: Option, // Optional range when requesting a specific SNAT range be allocated. @@ -137,34 +178,38 @@ impl IncompleteExternalIp { instance_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: false, parent_id: Some(instance_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } - pub fn for_ephemeral(id: Uuid, instance_id: Uuid, pool_id: Uuid) -> Self { + pub fn for_ephemeral(id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::Ephemeral; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::Ephemeral, + kind, is_service: false, - parent_id: Some(instance_id), + parent_id: None, pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -175,18 +220,20 @@ impl IncompleteExternalIp { project_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -198,18 +245,20 @@ impl IncompleteExternalIp { explicit_ip: IpAddr, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: Some(explicit_ip.into()), explicit_port_range: None, + state: kind.initial_state(), } } @@ -233,6 +282,7 @@ impl IncompleteExternalIp { project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range: None, + state: IpAttachState::Attached, } } @@ -250,18 +300,20 @@ impl IncompleteExternalIp { NUM_SOURCE_NAT_PORTS, ); let explicit_port_range = Some((first_port.into(), last_port.into())); + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range, + state: kind.initial_state(), } } @@ -272,34 +324,38 @@ impl IncompleteExternalIp { service_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: IpAttachState::Attached, } } pub fn for_service_snat(id: Uuid, service_id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -339,6 +395,10 @@ impl IncompleteExternalIp { &self.project_id } + pub fn state(&self) -> &IpAttachState { + &self.state + } + pub fn explicit_ip(&self) -> &Option { &self.explicit_ip } @@ -348,6 +408,18 @@ impl IncompleteExternalIp { } } +impl IpKind { + /// The initial state which a new non-service IP should + /// be allocated in. + pub fn initial_state(&self) -> IpAttachState { + match &self { + IpKind::SNat => IpAttachState::Attached, + IpKind::Ephemeral => IpAttachState::Detached, + IpKind::Floating => IpAttachState::Detached, + } + } +} + impl TryFrom for shared::IpKind { type Error = Error; @@ -371,8 +443,15 @@ impl TryFrom for views::ExternalIp { "Service IPs should not be exposed in the API", )); } - let kind = ip.kind.try_into()?; - Ok(views::ExternalIp { kind, ip: ip.ip.ip() }) + match ip.kind { + IpKind::Floating => Ok(views::ExternalIp::Floating(ip.try_into()?)), + IpKind::Ephemeral => { + Ok(views::ExternalIp::Ephemeral { ip: ip.ip.ip() }) + } + IpKind::SNat => Err(Error::internal_error( + "SNAT IP addresses should not be exposed in the API", + )), + } } } @@ -450,3 +529,18 @@ impl From for views::FloatingIp { } } } + +impl TryFrom for InstanceExternalIpBody { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + let ip = value.ip.ip(); + match value.kind { + IpKind::Ephemeral => Ok(InstanceExternalIpBody::Ephemeral(ip)), + IpKind::Floating => Ok(InstanceExternalIpBody::Floating(ip)), + IpKind::SNat => Err(Error::invalid_request( + "cannot dynamically add/remove SNAT allocation", + )), + } + } +} diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 9252926547..e10f8c2603 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -2,9 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::{ByteCount, Disk, Generation, InstanceCpuCount, InstanceState}; +use super::{ + ByteCount, Disk, ExternalIp, Generation, InstanceCpuCount, InstanceState, +}; use crate::collection::DatastoreAttachTargetConfig; -use crate::schema::{disk, instance}; +use crate::schema::{disk, external_ip, instance}; use chrono::{DateTime, Utc}; use db_macros::Resource; use nexus_types::external_api::params; @@ -101,6 +103,17 @@ impl DatastoreAttachTargetConfig for Instance { type ResourceTimeDeletedColumn = disk::dsl::time_deleted; } +impl DatastoreAttachTargetConfig for Instance { + type Id = Uuid; + + type CollectionIdColumn = instance::dsl::id; + type CollectionTimeDeletedColumn = instance::dsl::time_deleted; + + type ResourceIdColumn = external_ip::dsl::id; + type ResourceCollectionIdColumn = external_ip::dsl::parent_id; + type ResourceTimeDeletedColumn = external_ip::dsl::time_deleted; +} + /// Runtime state of the Instance, including the actual running state and minimal /// metadata /// diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index 7b98850b43..dca809758f 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -65,3 +65,9 @@ impl From for sled_agent_client::types::InstanceState { } } } + +impl From for InstanceState { + fn from(state: external::InstanceState) -> Self { + Self::new(state) + } +} diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs index 570a46b5e9..b0fa2b8eb9 100644 --- a/nexus/db-model/src/ipv4_nat_entry.rs +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -5,6 +5,7 @@ use crate::{schema::ipv4_nat_entry, Ipv4Net, Ipv6Net, SqlU16, Vni}; use chrono::{DateTime, Utc}; use omicron_common::api::external; use schemars::JsonSchema; +use serde::Deserialize; use serde::Serialize; use uuid::Uuid; @@ -21,7 +22,7 @@ pub struct Ipv4NatValues { } /// Database representation of an Ipv4 NAT Entry. -#[derive(Queryable, Debug, Clone, Selectable)] +#[derive(Queryable, Debug, Clone, Selectable, Serialize, Deserialize)] #[diesel(table_name = ipv4_nat_entry)] pub struct Ipv4NatEntry { pub id: Uuid, diff --git a/nexus/db-model/src/macaddr.rs b/nexus/db-model/src/macaddr.rs index dceb8acf48..b3329598bd 100644 --- a/nexus/db-model/src/macaddr.rs +++ b/nexus/db-model/src/macaddr.rs @@ -8,8 +8,19 @@ use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use omicron_common::api::external; +use serde::Deserialize; +use serde::Serialize; -#[derive(Clone, Copy, Debug, PartialEq, AsExpression, FromSqlRow)] +#[derive( + Clone, + Copy, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, +)] #[diesel(sql_type = sql_types::BigInt)] pub struct MacAddr(pub external::MacAddr); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2e7493716e..11cdf87f6c 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -574,6 +574,7 @@ table! { last_port -> Int4, project_id -> Nullable, + state -> crate::IpAttachStateEnum, } } diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 2055287e62..390376e627 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -206,7 +206,7 @@ impl DataStore { let (instance, disk) = query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: AttachError| { match e { AttachError::CollectionNotFound => { Err(Error::not_found_by_id( @@ -348,7 +348,7 @@ impl DataStore { ) .detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: DetachError| { match e { DetachError::CollectionNotFound => { Err(Error::not_found_by_id( diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 02ce950118..9d4d947476 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -9,6 +9,10 @@ use crate::authz; use crate::authz::ApiResource; use crate::context::OpContext; use crate::db; +use crate::db::collection_attach::AttachError; +use crate::db::collection_attach::DatastoreAttachTarget; +use crate::db::collection_detach::DatastoreDetachTarget; +use crate::db::collection_detach::DetachError; use crate::db::error::public_error_from_diesel; use crate::db::error::retryable; use crate::db::error::ErrorHandler; @@ -22,11 +26,17 @@ use crate::db::model::Name; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; +use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES_CREATING; +use crate::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Instance; +use nexus_db_model::IpAttachState; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -35,13 +45,14 @@ use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; use std::net::IpAddr; use uuid::Uuid; +const MAX_EXTERNAL_IPS_PLUS_SNAT: u32 = MAX_EXTERNAL_IPS_PER_INSTANCE + 1; + impl DataStore { /// Create an external IP address for source NAT for an instance. pub async fn allocate_instance_snat_ip( @@ -60,23 +71,43 @@ impl DataStore { } /// Create an Ephemeral IP address for an instance. + /// + /// For consistency between instance create and External IP attach/detach + /// operations, this IP will be created in the `Attaching` state to block + /// concurrent access. + /// Callers must call `external_ip_complete_op` on saga completion to move + /// the IP to `Attached`. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. pub async fn allocate_instance_ephemeral_ip( &self, opctx: &OpContext, ip_id: Uuid, instance_id: Uuid, - pool_name: Option, - ) -> CreateResult { - let pool = match pool_name { - Some(name) => { - let (.., authz_pool, pool) = LookupPath::new(opctx, &self) - .ip_pool_name(&name) + pool: Option, + creating_instance: bool, + ) -> CreateResult<(ExternalIp, bool)> { + // This is slightly hacky: we need to create an unbound ephemeral IP, and + // then attempt to bind it to respect two separate constraints: + // - At most one Ephemeral IP per instance + // - At most MAX external IPs per instance + // Naturally, we now *need* to destroy the ephemeral IP if the newly alloc'd + // IP was not attached, including on idempotent success. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) // any authenticated user can CreateChild on an IP pool. this is // meant to represent allocating an IP .fetch_for(authz::Action::CreateChild) .await?; // If this pool is not linked to the current silo, 404 + // As name resolution happens one layer up, we need to use the *original* + // authz Pool. if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { return Err(authz_pool.not_found()); } @@ -91,9 +122,49 @@ impl DataStore { }; let pool_id = pool.identity.id; - let data = - IncompleteExternalIp::for_ephemeral(ip_id, instance_id, pool_id); - self.allocate_external_ip(opctx, data).await + let data = IncompleteExternalIp::for_ephemeral(ip_id, pool_id); + + // We might not be able to acquire a new IP, but in the event of an + // idempotent or double attach this failure is allowed. + let temp_ip = self.allocate_external_ip(opctx, data).await; + if let Err(e) = temp_ip { + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or(e)?; + + return Ok((eip, false)); + } + let temp_ip = temp_ip?; + + match self + .begin_attach_ip( + opctx, + temp_ip.id, + instance_id, + IpKind::Ephemeral, + creating_instance, + ) + .await + { + Err(e) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + Err(e) + } + // Idempotent case: attach failed due to a caught UniqueViolation. + Ok(None) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or_else(|| Error::internal_error( + "failed to lookup current ephemeral IP for idempotent attach" + ))?; + let do_saga = eip.state != IpAttachState::Attached; + Ok((eip, do_saga)) + } + Ok(Some(v)) => Ok(v), + } } /// Allocates an IP address for internal service usage. @@ -140,33 +211,34 @@ impl DataStore { opctx: &OpContext, project_id: Uuid, params: params::FloatingIpCreate, + pool: Option, ) -> CreateResult { let ip_id = Uuid::new_v4(); - // TODO: NameOrId resolution should happen a level higher, in the nexus function - let (.., authz_pool, pool) = match params.pool { - Some(NameOrId::Name(name)) => { - LookupPath::new(opctx, self) - .ip_pool_name(&Name(name)) - .fetch_for(authz::Action::Read) - .await? + // This implements the same pattern as in `allocate_instance_ephemeral_ip` to + // check that a chosen pool is valid from within the current silo. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) + .fetch_for(authz::Action::CreateChild) + .await?; + + if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { + return Err(authz_pool.not_found()); + } + + pool } - Some(NameOrId::Id(id)) => { - LookupPath::new(opctx, self) - .ip_pool_id(id) - .fetch_for(authz::Action::Read) - .await? + // If no name given, use the default logic + None => { + let (.., pool) = self.ip_pools_fetch_default(&opctx).await?; + pool } - None => self.ip_pools_fetch_default(opctx).await?, }; let pool_id = pool.id(); - // If this pool is not linked to the current silo, 404 - if self.ip_pool_fetch_link(opctx, pool_id).await.is_err() { - return Err(authz_pool.not_found()); - } - let data = if let Some(ip) = params.address { IncompleteExternalIp::for_floating_explicit( ip_id, @@ -228,6 +300,7 @@ impl DataStore { ) } } + // Floating IP: name conflict DatabaseError(UniqueViolation, ..) if name.is_some() => { TransactionError::CustomError(public_error_from_diesel( e, @@ -299,7 +372,266 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } - /// Deallocate the external IP address with the provided ID. + /// Attempt to move a target external IP from detached to attaching, + /// checking that its parent instance does not have too many addresses + /// and is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-attach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_attach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> Result, Error> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + use diesel::result::DatabaseErrorKind::UniqueViolation; + use diesel::result::Error::DatabaseError; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::attach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Detached)) + .filter(dsl::kind.eq(kind)) + .filter(dsl::parent_id.is_null()), + MAX_EXTERNAL_IPS_PLUS_SNAT, + diesel::update(dsl::external_ip).set(( + dsl::parent_id.eq(Some(instance_id)), + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Attaching), + )), + ); + + let mut do_saga = true; + query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|(_, resource)| Some(resource)) + .or_else(|e: AttachError| match e { + AttachError::CollectionNotFound => { + Err(Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + )) + }, + AttachError::ResourceNotFound => { + Err(if kind == IpKind::Ephemeral { + Error::internal_error("call-scoped ephemeral IP was lost") + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + }) + }, + AttachError::NoUpdate { attached_count, resource, collection } => { + match resource.state { + // Idempotent errors: is in progress or complete for same resource pair -- this is fine. + IpAttachState::Attaching if resource.parent_id == Some(instance_id) => + return Ok(Some(resource)), + IpAttachState::Attached if resource.parent_id == Some(instance_id) => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Attached => + return Err(Error::invalid_request(&format!( + "{kind} IP cannot be attached to one \ + instance while still attached to another" + ))), + // User can reattempt depending on how the current saga unfolds. + // NB; only floating IP can return this case, eph will return + // a UniqueViolation. + IpAttachState::Attaching | IpAttachState::Detaching + => return Err(Error::unavail(&format!( + "tried to attach {kind} IP mid-attach/detach: \ + attach will be safe to retry once operation on \ + same IP resource completes" + ))), + + IpAttachState::Detached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to attach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + Err(match &collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) + => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + attach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + if attached_count >= MAX_EXTERNAL_IPS_PLUS_SNAT as i64 { + Error::invalid_request(&format!( + "an instance may not have more than \ + {MAX_EXTERNAL_IPS_PER_INSTANCE} external IP addresses", + )) + } else { + Error::internal_error(&format!("failed to attach {kind} IP")) + } + }, + state => Error::invalid_request(&format!( + "cannot attach {kind} IP to instance in {state} state" + )), + }) + }, + // This case occurs for both currently attaching and attached ephemeral IPs: + AttachError::DatabaseError(DatabaseError(UniqueViolation, ..)) + if kind == IpKind::Ephemeral => { + Ok(None) + }, + AttachError::DatabaseError(e) => { + Err(public_error_from_diesel(e, ErrorHandler::Server)) + }, + }) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Attempt to move a target external IP from attached to detaching, + /// checking that its parent instance is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-detach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_detach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> UpdateResult> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::detach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Attached)) + .filter(dsl::kind.eq(kind)), + diesel::update(dsl::external_ip).set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Detaching), + )), + ); + + let mut do_saga = true; + query.detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(Some) + .or_else(|e: DetachError| Err(match e { + DetachError::CollectionNotFound => { + Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + ) + }, + DetachError::ResourceNotFound => { + if kind == IpKind::Ephemeral { + return Ok(None); + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + } + }, + DetachError::NoUpdate { resource, collection } => { + let parent_match = resource.parent_id == Some(instance_id); + match resource.state { + // Idempotent cases: already detached OR detaching from same instance. + IpAttachState::Detached => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Detaching if parent_match => return Ok(Some(resource)), + IpAttachState::Attached if !parent_match + => return Err(Error::invalid_request(&format!( + "{kind} IP is not attached to the target instance", + ))), + // User can reattempt depending on how the current saga unfolds. + IpAttachState::Attaching + | IpAttachState::Detaching => return Err(Error::unavail(&format!( + "tried to detach {kind} IP mid-attach/detach: \ + detach will be safe to retry once operation on \ + same IP resource completes" + ))), + IpAttachState::Attached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to detach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + match collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + detach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + Error::internal_error(&format!("failed to detach {kind} IP")) + }, + state => Error::invalid_request(&format!( + "cannot detach {kind} IP from instance in {state} state" + )), + } + }, + DetachError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + }, + + })) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Deallocate the external IP address with the provided ID. This is a complete + /// removal of the IP entry, in contrast with `begin_deallocate_ephemeral_ip`, + /// and should only be used for SNAT entries or cleanup of short-lived ephemeral + /// IPs on failure. /// /// To support idempotency, such as in saga operations, this method returns /// an extra boolean, rather than the usual `DeleteResult`. The meaning of @@ -329,7 +661,34 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Delete all external IP addresses associated with the provided instance + /// Moves an instance's ephemeral IP from 'Attached' to 'Detaching'. + /// + /// To support idempotency, this method will succeed if the instance + /// has no ephemeral IP or one is actively being removed. As a result, + /// information on an actual `ExternalIp` is best-effort. + pub async fn begin_deallocate_ephemeral_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + ) -> Result, Error> { + let _ = LookupPath::new(&opctx, self) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await?; + + self.begin_detach_ip( + opctx, + ip_id, + instance_id, + IpKind::Ephemeral, + false, + ) + .await + .map(|res| res.map(|(ip, _do_saga)| ip)) + } + + /// Delete all non-floating IP addresses associated with the provided instance /// ID. /// /// This method returns the number of records deleted, rather than the usual @@ -347,16 +706,22 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.ne(IpKind::Floating)) - .set(dsl::time_deleted.eq(now)) + .set(( + dsl::time_deleted.eq(now), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Detach an individual Floating IP address from its parent instance. + /// Detach all Floating IP address from their parent instance. /// /// As in `deallocate_external_ip_by_instance_id`, this method returns the /// number of records altered, rather than an `UpdateResult`. + /// + /// This method ignores ongoing state transitions, and is only safely + /// usable from within the instance_delete saga. pub async fn detach_floating_ips_by_instance_id( &self, opctx: &OpContext, @@ -368,13 +733,18 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.eq(IpKind::Floating)) - .set(dsl::parent_id.eq(Option::::None)) + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::parent_id.eq(Option::::None), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } /// Fetch all external IP addresses of any kind for the provided instance + /// in all attachment states. pub async fn instance_lookup_external_ips( &self, opctx: &OpContext, @@ -391,6 +761,20 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Fetch the ephmeral IP address assigned to the provided instance, if this + /// has been configured. + pub async fn instance_lookup_ephemeral_ip( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> LookupResult> { + Ok(self + .instance_lookup_external_ips(opctx, instance_id) + .await? + .into_iter() + .find(|v| v.kind == IpKind::Ephemeral)) + } + /// Fetch all Floating IP addresses for the provided project. pub async fn floating_ips_list( &self, @@ -425,26 +809,20 @@ impl DataStore { &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, ) -> DeleteResult { use db::schema::external_ip::dsl; - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be deleted while attached to an instance", - )); - } - opctx.authorize(authz::Action::Delete, authz_fip).await?; let now = Utc::now(); - let updated_rows = diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) + let result = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(authz_fip.id())) .filter(dsl::time_deleted.is_null()) .filter(dsl::parent_id.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) .set(dsl::time_deleted.eq(now)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .check_if_exists::(authz_fip.id()) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( @@ -453,103 +831,208 @@ impl DataStore { ) })?; - if updated_rows == 0 { - return Err(Error::invalid_request( - "deletion failed due to concurrent modification", - )); + match result.status { + // Verify this FIP is not attached to any instances/services. + UpdateStatus::NotUpdatedButExists if result.found.parent_id.is_some() => Err(Error::invalid_request( + "Floating IP cannot be deleted while attached to an instance", + )), + // Only remaining cause of `NotUpdated` is earlier soft-deletion. + // Return success in this case to maintain idempotency. + UpdateStatus::Updated | UpdateStatus::NotUpdatedButExists => Ok(()), } - Ok(()) } /// Attaches a Floating IP address to an instance. - pub async fn floating_ip_attach( + /// + /// This moves a floating IP into the 'attaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'attached' state at saga completion. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + pub async fn floating_ip_begin_attach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, instance_id: Uuid, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be attached to one instance while still attached to another", - )); - } - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.is_null()) - .set(( - dsl::parent_id.eq(Some(instance_id)), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_attach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_attach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) } /// Detaches a Floating IP address from an instance. - pub async fn floating_ip_detach( + /// + /// This moves a floating IP into the 'detaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'detached' state at saga completion. + /// + /// To better handle idempotent detachment, this method returns an + /// additional bool: + /// - true: EIP was attached or detaching. proceed with saga. + /// - false: EIP was detached. No-op for remainder of saga. + pub async fn floating_ip_begin_detach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - let Some(instance_id) = db_fip.parent_id else { - return Err(Error::invalid_request( - "Floating IP is not attached to an instance", - )); - }; - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + instance_id: Uuid, + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.eq(instance_id)) - .set(( - dsl::parent_id.eq(Option::::None), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_detach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_detach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) + } + + /// Move an external IP from a transitional state (attaching, detaching) + /// to its intended end state. + /// + /// Returns the number of rows modified, this may be zero on: + /// - instance delete by another saga + /// - saga action rerun + /// + /// This is valid in both cases for idempotency. + pub async fn external_ip_complete_op( + &self, + opctx: &OpContext, + ip_id: Uuid, + ip_kind: IpKind, + expected_state: IpAttachState, + target_state: IpAttachState, + ) -> Result { + use db::schema::external_ip::dsl; + + if matches!( + expected_state, + IpAttachState::Attached | IpAttachState::Detached + ) { + return Err(Error::internal_error(&format!( + "{expected_state:?} is not a valid transition state for attach/detach" + ))); + } + + let part_out = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(ip_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(expected_state)); + + let now = Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + match (ip_kind, expected_state, target_state) { + (IpKind::SNat, _, _) => return Err(Error::internal_error( + "SNAT should not be removed via `external_ip_complete_op`, \ + use `deallocate_external_ip`", + )), + + (IpKind::Ephemeral, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::time_deleted.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + (IpKind::Floating, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + // Attaching->Attached gets separate logic because we choose to fail + // and unwind on instance delete. This covers two cases: + // - External IP is deleted. + // - Floating IP is suddenly `detached`. + (_, IpAttachState::Attaching, IpAttachState::Attached) => { + return part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .check_if_exists::(ip_id) + .execute_and_check( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + }) + .and_then(|r| match r.status { + UpdateStatus::Updated => Ok(1), + UpdateStatus::NotUpdatedButExists + if r.found.state == IpAttachState::Detached + || r.found.time_deleted.is_some() => + { + Err(Error::internal_error( + "unwinding due to concurrent instance delete", + )) + } + UpdateStatus::NotUpdatedButExists => Ok(0), + }) + } + + // Unwind from failed detach. + (_, _, IpAttachState::Attached) => { + part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + _ => return Err(Error::internal_error("unreachable")), + } + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 188f5c30c9..c01f40e791 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -11,6 +11,7 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_detach_many::DatastoreDetachManyTarget; use crate::db::collection_detach_many::DetachManyError; +use crate::db::collection_detach_many::DetachManyFromCollectionStatement; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; @@ -28,6 +29,7 @@ use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Disk; use nexus_db_model::VmmRuntimeState; use omicron_common::api; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -405,59 +407,63 @@ impl DataStore { let ok_to_detach_disk_state_labels: Vec<_> = ok_to_detach_disk_states.iter().map(|s| s.label()).collect(); - let _instance = Instance::detach_resources( - authz_instance.id(), - instance::table.into_boxed().filter( - instance::dsl::state - .eq_any(ok_to_delete_instance_states) - .and(instance::dsl::active_propolis_id.is_null()), - ), - disk::table.into_boxed().filter( - disk::dsl::disk_state.eq_any(ok_to_detach_disk_state_labels), - ), - diesel::update(instance::dsl::instance).set(( - instance::dsl::state.eq(destroyed), - instance::dsl::time_deleted.eq(Utc::now()), - )), - diesel::update(disk::dsl::disk).set(( - disk::dsl::disk_state.eq(detached_label), - disk::dsl::attach_instance_id.eq(Option::::None), - disk::dsl::slot.eq(Option::::None), - )), - ) - .detach_and_get_result_async( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .map_err(|e| match e { - DetachManyError::CollectionNotFound => Error::not_found_by_id( - ResourceType::Instance, - &authz_instance.id(), - ), - DetachManyError::NoUpdate { collection } => { - if collection.runtime_state.propolis_id.is_some() { - return Error::invalid_request( + let stmt: DetachManyFromCollectionStatement = + Instance::detach_resources( + authz_instance.id(), + instance::table.into_boxed().filter( + instance::dsl::state + .eq_any(ok_to_delete_instance_states) + .and(instance::dsl::active_propolis_id.is_null()), + ), + disk::table.into_boxed().filter( + disk::dsl::disk_state + .eq_any(ok_to_detach_disk_state_labels), + ), + diesel::update(instance::dsl::instance).set(( + instance::dsl::state.eq(destroyed), + instance::dsl::time_deleted.eq(Utc::now()), + )), + diesel::update(disk::dsl::disk).set(( + disk::dsl::disk_state.eq(detached_label), + disk::dsl::attach_instance_id.eq(Option::::None), + disk::dsl::slot.eq(Option::::None), + )), + ); + + let _instance = stmt + .detach_and_get_result_async( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| match e { + DetachManyError::CollectionNotFound => Error::not_found_by_id( + ResourceType::Instance, + &authz_instance.id(), + ), + DetachManyError::NoUpdate { collection } => { + if collection.runtime_state.propolis_id.is_some() { + return Error::invalid_request( "cannot delete instance: instance is running or has \ not yet fully stopped", ); - } - let instance_state = - collection.runtime_state.nexus_state.state(); - match instance_state { - api::external::InstanceState::Stopped - | api::external::InstanceState::Failed => { - Error::internal_error("cannot delete instance") } - _ => Error::invalid_request(&format!( - "instance cannot be deleted in state \"{}\"", - instance_state, - )), + let instance_state = + collection.runtime_state.nexus_state.state(); + match instance_state { + api::external::InstanceState::Stopped + | api::external::InstanceState::Failed => { + Error::internal_error("cannot delete instance") + } + _ => Error::invalid_request(&format!( + "instance cannot be deleted in state \"{}\"", + instance_state, + )), + } } - } - DetachManyError::DatabaseError(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - })?; + DetachManyError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index a44fed4cdf..655a267fe1 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -23,12 +23,14 @@ impl DataStore { &self, opctx: &OpContext, nat_entry: Ipv4NatValues, - ) -> CreateResult<()> { + ) -> CreateResult { use db::schema::ipv4_nat_entry::dsl; use diesel::sql_types; // Look up any NAT entries that already have the exact parameters // we're trying to INSERT. + // We want to return any existing entry, but not to mask the UniqueViolation + // when trying to use an existing IP + port range with a different target. let matching_entry_subquery = dsl::ipv4_nat_entry .filter(dsl::external_address.eq(nat_entry.external_address)) .filter(dsl::first_port.eq(nat_entry.first_port)) @@ -58,7 +60,7 @@ impl DataStore { )) .filter(diesel::dsl::not(diesel::dsl::exists(matching_entry_subquery))); - diesel::insert_into(dsl::ipv4_nat_entry) + let out = diesel::insert_into(dsl::ipv4_nat_entry) .values(new_entry_subquery) .into_columns(( dsl::external_address, @@ -68,11 +70,24 @@ impl DataStore { dsl::vni, dsl::mac, )) - .execute_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(()) + .returning(Ipv4NatEntry::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await; + + match out { + Ok(o) => Ok(o), + Err(diesel::result::Error::NotFound) => { + // Idempotent ensure. Annoyingly, we can't easily extract + // the existing row as part of the insert query: + // - (SELECT ..) UNION (INSERT INTO .. RETURNING ..) isn't + // allowed by crdb. + // - Can't ON CONFLICT with a partial constraint, so we can't + // do a no-op write and return the row that way either. + // So, we do another lookup. + self.ipv4_nat_find_by_values(opctx, nat_entry).await + } + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } } pub async fn ipv4_nat_delete( diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index d61ff15a3d..5fd16e2633 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -408,6 +408,7 @@ mod test { use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; + use nexus_db_model::IpAttachState; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; @@ -1625,7 +1626,8 @@ mod test { // Create a few records. let now = Utc::now(); let instance_id = Uuid::new_v4(); - let ips = (0..4) + let kinds = [IpKind::SNat, IpKind::Ephemeral]; + let ips = (0..2) .map(|i| ExternalIp { id: Uuid::new_v4(), name: None, @@ -1638,12 +1640,13 @@ mod test { project_id: None, is_service: false, parent_id: Some(instance_id), - kind: IpKind::Ephemeral, + kind: kinds[i as usize], ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( 10, 0, 0, i, ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }) .collect::>(); diesel::insert_into(dsl::external_ip) @@ -1705,6 +1708,7 @@ mod test { ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; diesel::insert_into(dsl::external_ip) .values(ip.clone()) @@ -1775,6 +1779,7 @@ mod test { ip: addresses.next().unwrap().into(), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; // Combinations of NULL and non-NULL for: @@ -1782,6 +1787,7 @@ mod test { // - description // - parent (instance / service) UUID // - project UUID + // - attach state let names = [None, Some("foo")]; let descriptions = [None, Some("foo".to_string())]; let parent_ids = [None, Some(Uuid::new_v4())]; @@ -1822,6 +1828,12 @@ mod test { continue; } + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; + let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local.clone(), @@ -1830,6 +1842,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; @@ -1902,6 +1915,11 @@ mod test { let name_local = name.map(|v| { db::model::Name(Name::try_from(v.to_string()).unwrap()) }); + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local, @@ -1911,6 +1929,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; let res = diesel::insert_into(dsl::external_ip) @@ -1918,9 +1937,10 @@ mod test { .execute_async(&*conn) .await; let ip_type = if is_service { "Service" } else { "Instance" }; + let null_snat_parent = parent_id.is_none() && kind == IpKind::SNat; if name.is_none() && description.is_none() - && parent_id.is_some() + && !null_snat_parent && project_id.is_none() { // Name/description must be NULL, instance ID cannot diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 090c6865b7..e8ef721e98 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -47,6 +47,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "hw_rot_slot", "identity_type", "instance_state", + "ip_attach_state", "ip_kind", "ip_pool_resource_type", "network_interface_kind", diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 49403aac61..8114b9e363 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -26,10 +26,42 @@ use diesel::Column; use diesel::Expression; use diesel::QueryResult; use diesel::RunQueryDsl; +use nexus_db_model::InstanceState as DbInstanceState; +use nexus_db_model::IpAttachState; +use nexus_db_model::IpAttachStateEnum; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external; +use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; +// Broadly, we want users to be able to attach/detach at will +// once an instance is created and functional. +pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), + DbInstanceState(ApiInstanceState::Creating), +]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), +]; +// If we're in a state which will naturally resolve to either +// stopped/running, we want users to know that the request can be +// retried safely via Error::unavail. +// TODO: We currently stop if there's a migration or other state change. +// There may be a good case for RPWing +// external_ip_state -> { NAT RPW, sled-agent } in future. +pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [ + DbInstanceState(ApiInstanceState::Starting), + DbInstanceState(ApiInstanceState::Stopping), + DbInstanceState(ApiInstanceState::Creating), + DbInstanceState(ApiInstanceState::Rebooting), + DbInstanceState(ApiInstanceState::Migrating), +]; + +/// The maximum number of disks that can be attached to an instance. +pub const MAX_EXTERNAL_IPS_PER_INSTANCE: u32 = 32; + type FromClause = diesel::internal::table_macro::StaticQueryFragmentInstance; type IpPoolRangeFromClause = FromClause; @@ -99,7 +131,8 @@ const MAX_PORT: u16 = u16::MAX; /// candidate_ip AS ip, /// CAST(candidate_first_port AS INT4) AS first_port, /// CAST(candidate_last_port AS INT4) AS last_port, -/// AS project_id +/// AS project_id, +/// AS state /// FROM /// SELECT * FROM ( /// -- Select all IP addresses by pool and range. @@ -378,6 +411,14 @@ impl NextExternalIp { out.push_bind_param::, Option>(self.ip.project_id())?; out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(", "); + + // Initial state, mainly needed by Ephemeral/Floating IPs. + out.push_bind_param::( + self.ip.state(), + )?; + out.push_sql(" AS "); + out.push_identifier(dsl::state::NAME)?; out.push_sql(" FROM ("); self.push_address_sequence_subquery(out.reborrow())?; @@ -822,10 +863,12 @@ impl RunQueryDsl for NextExternalIp {} #[cfg(test)] mod tests { + use crate::authz; use crate::context::OpContext; use crate::db::datastore::DataStore; use crate::db::datastore::SERVICE_IP_POOL_NAME; use crate::db::identity::Resource; + use crate::db::lookup::LookupPath; use crate::db::model::IpKind; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; @@ -833,9 +876,13 @@ mod tests { use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use dropshot::test_util::LogContext; + use nexus_db_model::ByteCount; + use nexus_db_model::Instance; + use nexus_db_model::InstanceCpuCount; use nexus_db_model::IpPoolResource; use nexus_db_model::IpPoolResourceType; use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params::InstanceCreate; use nexus_types::external_api::shared::IpRange; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; @@ -878,7 +925,7 @@ mod tests { name: &str, range: IpRange, is_default: bool, - ) { + ) -> authz::IpPool { let pool = IpPool::new(&IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("ip pool {}", name), @@ -902,6 +949,13 @@ mod tests { .expect("Failed to associate IP pool with silo"); self.initialize_ip_pool(name, range).await; + + LookupPath::new(&self.opctx, &self.db_datastore) + .ip_pool_id(pool.id()) + .lookup_for(authz::Action::Read) + .await + .unwrap() + .0 } async fn initialize_ip_pool(&self, name: &str, range: IpRange) { @@ -937,6 +991,37 @@ mod tests { .expect("Failed to create IP Pool range"); } + async fn create_instance(&self, name: &str) -> Uuid { + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let instance = Instance::new(instance_id, project_id, &InstanceCreate { + identity: IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("instance {}", name) }, + ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), + memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), + hostname: "test".into(), + user_data: vec![], + network_interfaces: Default::default(), + external_ips: vec![], + disks: vec![], + start: false, + }); + + let conn = self + .db_datastore + .pool_connection_authorized(&self.opctx) + .await + .unwrap(); + + use crate::db::schema::instance::dsl as instance_dsl; + diesel::insert_into(instance_dsl::instance) + .values(instance.clone()) + .execute_async(&*conn) + .await + .expect("Failed to create Instance"); + + instance_id + } + async fn default_pool_id(&self) -> Uuid { let (.., pool) = self .db_datastore @@ -1021,7 +1106,7 @@ mod tests { // Allocate an Ephemeral IP, which should take the entire port range of // the only address in the pool. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-eph").await; let ephemeral_ip = context .db_datastore .allocate_instance_ephemeral_ip( @@ -1029,16 +1114,18 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await - .expect("Failed to allocate Ephemeral IP when there is space"); + .expect("Failed to allocate Ephemeral IP when there is space") + .0; assert_eq!(ephemeral_ip.ip.ip(), range.last_address()); assert_eq!(ephemeral_ip.first_port.0, 0); assert_eq!(ephemeral_ip.last_port.0, super::MAX_PORT); // At this point, we should be able to allocate neither a new Ephemeral // nor any SNAT IPs. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-snat").await; let res = context .db_datastore .allocate_instance_snat_ip( @@ -1069,6 +1156,7 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await; assert!( @@ -1203,7 +1291,7 @@ mod tests { .unwrap(); context.create_ip_pool("default", range, true).await; - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("all-the-ports").await; let id = Uuid::new_v4(); let pool_name = None; @@ -1214,9 +1302,11 @@ mod tests { id, instance_id, pool_name, + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1729,13 +1819,12 @@ mod tests { Ipv4Addr::new(10, 0, 0, 6), )) .unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocating an address on an instance in the second pool should be // respected, even though there are IPs available in the first. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("test").await; let id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let ip = context .db_datastore @@ -1743,10 +1832,12 @@ mod tests { &context.opctx, id, instance_id, - pool_name, + Some(p1), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), second_range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1772,24 +1863,26 @@ mod tests { let last_address = Ipv4Addr::new(10, 0, 0, 6); let second_range = IpRange::try_from((first_address, last_address)).unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocate all available addresses in the second pool. - let instance_id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let first_octet = first_address.octets()[3]; let last_octet = last_address.octets()[3]; for octet in first_octet..=last_octet { + let instance_id = + context.create_instance(&format!("o{octet}")).await; let ip = context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name.clone(), + Some(p1.clone()), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; println!("{ip:#?}"); if let IpAddr::V4(addr) = ip.ip.ip() { assert_eq!(addr.octets()[3], octet); @@ -1799,13 +1892,15 @@ mod tests { } // Allocating another address should _fail_, and not use the first pool. + let instance_id = context.create_instance("final").await; context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name, + Some(p1), + true, ) .await .expect_err("Should not use IP addresses from a different pool"); diff --git a/nexus/src/app/external_ip.rs b/nexus/src/app/external_ip.rs index 404f597288..45b05fbb0b 100644 --- a/nexus/src/app/external_ip.rs +++ b/nexus/src/app/external_ip.rs @@ -4,14 +4,18 @@ //! External IP addresses for instances +use std::sync::Arc; + use crate::external_api::views::ExternalIp; use crate::external_api::views::FloatingIp; +use nexus_db_model::IpAttachState; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::IpKind; use nexus_types::external_api::params; +use nexus_types::external_api::views; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; @@ -19,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::UpdateResult; impl super::Nexus { pub(crate) async fn instance_list_external_ips( @@ -34,7 +39,9 @@ impl super::Nexus { .await? .into_iter() .filter_map(|ip| { - if ip.kind == IpKind::SNat { + if ip.kind == IpKind::SNat + || ip.state != IpAttachState::Attached + { None } else { Some(ip.try_into().unwrap()) @@ -102,9 +109,19 @@ impl super::Nexus { let (.., authz_project) = project_lookup.lookup_for(authz::Action::CreateChild).await?; + let pool = match ¶ms.pool { + Some(pool) => Some( + self.ip_pool_lookup(opctx, pool)? + .lookup_for(authz::Action::Read) + .await? + .0, + ), + None => None, + }; + Ok(self .db_datastore - .allocate_floating_ip(opctx, authz_project.id(), params) + .allocate_floating_ip(opctx, authz_project.id(), params, pool) .await? .try_into() .unwrap()) @@ -115,9 +132,68 @@ impl super::Nexus { opctx: &OpContext, ip_lookup: lookup::FloatingIp<'_>, ) -> DeleteResult { + let (.., authz_fip) = + ip_lookup.lookup_for(authz::Action::Delete).await?; + + self.db_datastore.floating_ip_delete(opctx, &authz_fip).await + } + + pub(crate) async fn floating_ip_attach( + self: &Arc, + opctx: &OpContext, + fip_selector: params::FloatingIpSelector, + target: params::FloatingIpAttach, + ) -> UpdateResult { + match target.kind { + params::FloatingIpParentKind::Instance => { + let instance_selector = params::InstanceSelector { + project: fip_selector.project, + instance: target.parent, + }; + let instance = + self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpCreate::Floating { + floating_ip: fip_selector.floating_ip, + }; + self.instance_attach_external_ip( + opctx, + &instance, + attach_params, + ) + .await + .and_then(FloatingIp::try_from) + } + } + } + + pub(crate) async fn floating_ip_detach( + self: &Arc, + opctx: &OpContext, + ip_lookup: lookup::FloatingIp<'_>, + ) -> UpdateResult { + // XXX: Today, this only happens for instances. + // In future, we will need to separate out by the *type* of + // parent attached to a floating IP. We don't yet store this + // in db for user-facing FIPs (is_service => internal-only + // at this point). let (.., authz_fip, db_fip) = - ip_lookup.fetch_for(authz::Action::Delete).await?; + ip_lookup.fetch_for(authz::Action::Modify).await?; + + let Some(parent_id) = db_fip.parent_id else { + return Ok(db_fip.into()); + }; + + let instance_selector = params::InstanceSelector { + project: None, + instance: parent_id.into(), + }; + let instance = self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpDetach::Floating { + floating_ip: authz_fip.id().into(), + }; - self.db_datastore.floating_ip_delete(opctx, &authz_fip, &db_fip).await + self.instance_detach_external_ip(opctx, &instance, attach_params) + .await + .and_then(FloatingIp::try_from) } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 778c5e2fe1..f924653525 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -17,6 +17,7 @@ use crate::external_api::params; use cancel_safe_futures::prelude::*; use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; +use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; use nexus_db_queries::authn; use nexus_db_queries::authz; @@ -26,6 +27,7 @@ use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; use omicron_common::address::PROPOLIS_PORT; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::ByteCount; @@ -1052,6 +1054,15 @@ impl super::Nexus { )); } + // If there are any external IPs not yet fully attached/detached,then + // there are attach/detach sagas in progress. That should complete in + // its own time, so return a 503 to indicate a possible retry. + if external_ips.iter().any(|v| v.state != IpAttachState::Attached) { + return Err(Error::unavail( + "External IP attach/detach is in progress during instance_ensure_registered" + )); + } + // Partition remaining external IPs by class: we can have at most // one ephemeral ip. let (ephemeral_ips, floating_ips): (Vec<_>, Vec<_>) = external_ips @@ -1904,6 +1915,73 @@ impl super::Nexus { Ok(()) } + + /// Attach an external IP to an instance. + pub(crate) async fn instance_attach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpCreate, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_attach::Params { + create_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip attach saga") + } + + /// Detach an external IP from an instance. + pub(crate) async fn instance_detach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpDetach, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_detach::Params { + delete_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::>("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip detach saga") + .and_then(|eip| { + // Saga idempotency means we'll get Ok(None) on double detach + // of an ephemeral IP. Convert this case to an error here. + eip.ok_or_else(|| { + Error::invalid_request( + "instance does not have an ephemeral IP attached", + ) + }) + }) + } } #[cfg(test)] diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 8f97642c88..c0bc5d237b 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -7,6 +7,9 @@ use crate::app::sagas::retry_until_known_result; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; +use nexus_db_model::ExternalIp; +use nexus_db_model::IpAttachState; +use nexus_db_model::Ipv4NatEntry; use nexus_db_model::Ipv4NatValues; use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; @@ -24,7 +27,6 @@ use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; -use std::sync::Arc; use uuid::Uuid; impl super::Nexus { @@ -276,6 +278,10 @@ impl super::Nexus { /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. /// + /// Returns a list of live NAT RPW table entries from this call. Generally + /// these should only be needed for specific unwind operations, like in + /// the IP attach saga. + /// /// # Parameters /// /// - `opctx`: An operation context that grants read and list-children @@ -283,22 +289,21 @@ impl super::Nexus { /// - `instance_id`: The ID of the instance to act on. /// - `sled_ip_address`: The internal IP address assigned to the sled's /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's + /// - `ip_filter`: An optional filter on the index into the instance's /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. This will + /// proceed even when the target IP is 'attaching'. /// - If this is `None`, this routine configures DPD for all external - /// IPs. + /// IPs and *will back out* if any IPs are not yet fully attached to + /// the instance. pub(crate) async fn instance_ensure_dpd_config( &self, opctx: &OpContext, instance_id: Uuid, sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { + ip_filter: Option, + ) -> Result, Error> { let log = &self.log; info!(log, "looking up instance's primary network interface"; @@ -309,6 +314,9 @@ impl super::Nexus { .lookup_for(authz::Action::ListChildren) .await?; + // XXX: Need to abstract over v6 and v4 entries here. + let mut nat_entries = vec![]; + // All external IPs map to the primary network interface, so find that // interface. If there is no such interface, there's no way to route // traffic destined to those IPs, so there's nothing to configure and @@ -324,7 +332,7 @@ impl super::Nexus { None => { info!(log, "Instance has no primary network interface"; "instance_id" => %instance_id); - return Ok(()); + return Ok(nat_entries); } }; @@ -344,49 +352,104 @@ impl super::Nexus { .instance_lookup_external_ips(&opctx, instance_id) .await?; - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { + let (ips_of_interest, must_all_be_attached) = if let Some(wanted_id) = + ip_filter + { + if let Some(ip) = ips.iter().find(|v| v.id == wanted_id) { + (std::slice::from_ref(ip), false) + } else { return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index + "failed to find external ip address with id: {wanted_id}, saw {ips:?}", ))); } + } else { + (&ips[..], true) + }; + + // This is performed so that an IP attach/detach will block the + // instance_start saga. Return service unavailable to indicate + // the request is retryable. + if must_all_be_attached + && ips_of_interest + .iter() + .any(|ip| ip.state != IpAttachState::Attached) + { + return Err(Error::unavail( + "cannot push all DPD state: IP attach/detach in progress", + )); } let sled_address = Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap()); - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { + // If all of our IPs are attached or are guaranteed to be owned + // by the saga calling this fn, then we need to disregard and + // remove conflicting rows. No other instance/service should be + // using these as its own, and we are dealing with detritus, e.g., + // the case where we have a concurrent stop -> detach followed + // by an attach to another instance, or other ongoing attach saga + // cleanup. + let mut err_and_limit = None; + for (i, external_ip) in ips_of_interest.iter().enumerate() { // For each external ip, add a nat entry to the database - self.ensure_nat_entry( - target_ip, - sled_address, - &network_interface, - mac_address, - opctx, - ) - .await?; + if let Ok(id) = self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + nat_entries.push(id); + continue; + } + + // We seem to be blocked by a bad row -- take it out and retry. + // This will return Ok() for a non-existent row. + if let Err(e) = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await + { + err_and_limit = Some((e, i)); + break; + }; + + match self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + Ok(id) => nat_entries.push(id), + Err(e) => { + err_and_limit = Some((e, i)); + break; + } + } } - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; + // In the event of an unresolvable failure, we need to remove + // the entries we just added because the undo won't call into + // `instance_delete_dpd_config`. These entries won't stop a + // future caller, but it's better not to pollute switch state. + if let Some((e, max)) = err_and_limit { + for external_ip in &ips_of_interest[..max] { + let _ = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await; + } + return Err(e); + } - Ok(()) + self.notify_dendrite_nat_state(Some(instance_id), true).await?; + + Ok(nat_entries) } async fn ensure_nat_entry( @@ -396,7 +459,7 @@ impl super::Nexus { network_interface: &sled_agent_client::types::NetworkInterface, mac_address: macaddr::MacAddr6, opctx: &OpContext, - ) -> Result<(), Error> { + ) -> Result { match target_ip.ip { IpNetwork::V4(v4net) => { let nat_entry = Ipv4NatValues { @@ -409,9 +472,10 @@ impl super::Nexus { omicron_common::api::external::MacAddr(mac_address), ), }; - self.db_datastore + Ok(self + .db_datastore .ensure_ipv4_nat_entry(opctx, nat_entry) - .await?; + .await?) } IpNetwork::V6(_v6net) => { // TODO: implement handling of v6 nat. @@ -419,13 +483,16 @@ impl super::Nexus { internal_message: "ipv6 nat is not yet implemented".into(), }); } - }; - Ok(()) + } } /// Attempts to delete all of the Dendrite NAT configuration for the /// instance identified by `authz_instance`. /// + /// Unlike `instance_ensure_dpd_config`, this function will disregard the + /// attachment states of any external IPs because likely callers (instance + /// delete) cannot be piecewise undone. + /// /// # Return value /// /// - `Ok(())` if all NAT entries were successfully deleted. @@ -435,6 +502,12 @@ impl super::Nexus { /// - If an operation fails while this routine is walking NAT entries, it /// will continue trying to delete subsequent entries but will return the /// first error it encountered. + /// - `ip_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. pub(crate) async fn instance_delete_dpd_config( &self, opctx: &OpContext, @@ -451,37 +524,122 @@ impl super::Nexus { .instance_lookup_external_ips(opctx, instance_id) .await?; - let mut errors = vec![]; for entry in external_ips { - // Soft delete the NAT entry - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &entry) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!(log, "no matching nat entries to soft delete"); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; + self.external_ip_delete_dpd_config_inner(opctx, &entry).await?; } + self.notify_dendrite_nat_state(Some(instance_id), false).await + } + + /// Attempts to delete Dendrite NAT configuration for a single external IP. + /// + /// This function is primarily used to detach an IP which currently belongs + /// to a known instance. + pub(crate) async fn external_ip_delete_dpd_config( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = external_ip.parent_id; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "instance_id" => ?instance_id, + "external_ip" => %external_ip.ip); + + self.external_ip_delete_dpd_config_inner(opctx, external_ip).await?; + + self.notify_dendrite_nat_state(instance_id, false).await + } + + /// Attempts to soft-delete Dendrite NAT configuration for a specific entry + /// via ID. + /// + /// This function is needed to safely cleanup in at least one unwind scenario + /// where a potential second user could need to use the same (IP, portset) pair, + /// e.g. a rapid reattach or a reallocated ephemeral IP. + pub(crate) async fn delete_dpd_config_by_entry( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "id" => ?nat_entry.id, + "version_added" => %nat_entry.external_address.0); + + match self.db_datastore.ipv4_nat_delete(&opctx, nat_entry).await { + Ok(_) => {} + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + return Err(Error::internal_error(&message)); + } + }, + } + + self.notify_dendrite_nat_state(None, false).await + } + + /// Soft-delete an individual external IP from the NAT RPW, without + /// triggering a Dendrite notification. + async fn external_ip_delete_dpd_config_inner( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + + // Soft delete the NAT entry + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, external_ip) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + } + } + + /// Informs all available boundary switches that the set of NAT entries + /// has changed. + /// + /// When `fail_fast` is set, this function will return on any error when + /// acquiring a handle to a DPD client. Otherwise, it will attempt to notify + /// all clients and then finally return the first error. + async fn notify_dendrite_nat_state( + &self, + instance_id: Option, + fail_fast: bool, + ) -> Result<(), Error> { + // Querying boundary switches also requires fleet access and the use of the + // instance allocator context. let boundary_switches = self.boundary_switches(&self.opctx_alloc).await?; + let mut errors = vec![]; for switch in &boundary_switches { debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), + "instance_id" => ?instance_id, "switch" => switch.to_string()); let client_result = self.dpd_clients.get(switch).ok_or_else(|| { @@ -494,7 +652,11 @@ impl super::Nexus { Ok(client) => client, Err(new_error) => { errors.push(new_error); - continue; + if fail_fast { + break; + } else { + continue; + } } }; @@ -506,7 +668,7 @@ impl super::Nexus { }; } - if let Some(e) = errors.into_iter().nth(0) { + if let Some(e) = errors.into_iter().next() { return Err(e); } @@ -525,58 +687,9 @@ impl super::Nexus { ) -> Result<(), Error> { self.delete_instance_v2p_mappings(opctx, authz_instance.id()).await?; - let external_ips = self - .datastore() - .instance_lookup_external_ips(opctx, authz_instance.id()) - .await?; - - let boundary_switches = self.boundary_switches(opctx).await?; - for external_ip in external_ips { - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &external_ip) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!( - self.log, - "no matching nat entries to soft delete" - ); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(self.log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; - } - - for switch in &boundary_switches { - debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), - "switch" => switch.to_string()); - - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - })?; + self.instance_delete_dpd_config(opctx, authz_instance).await?; - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; - } - - Ok(()) + self.notify_dendrite_nat_state(Some(authz_instance.id()), true).await } /// Given old and new instance runtime states, determines the desired @@ -715,24 +828,13 @@ impl super::Nexus { .fetch() .await?; - let boundary_switches = - self.boundary_switches(&self.opctx_alloc).await?; - - for switch in &boundary_switches { - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "could not find dpd client for {switch}" - )) - })?; - self.instance_ensure_dpd_config( - opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await?; - } + self.instance_ensure_dpd_config( + opctx, + instance_id, + &sled.address(), + None, + ) + .await?; Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 80bfd5ef22..d643969924 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -87,7 +87,9 @@ pub(crate) const MAX_NICS_PER_INSTANCE: usize = 8; // The value here is arbitrary, but we need *a* limit for the instance // create saga to have a bounded DAG. We might want to only enforce // this during instance create (rather than live attach) in future. -pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = 32; +pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = + nexus_db_queries::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE + as usize; pub(crate) const MAX_EPHEMERAL_IPS_PER_INSTANCE: usize = 1; pub const MAX_VCPU_PER_INSTANCE: u16 = 64; diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 8f9197b03b..445abd5daf 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -8,12 +8,22 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::Nexus; use chrono::Utc; -use nexus_db_model::{ByteCount, SledReservationConstraints, SledResource}; -use nexus_db_queries::{context::OpContext, db, db::DataStore}; +use nexus_db_model::{ + ByteCount, ExternalIp, IpAttachState, Ipv4NatEntry, + SledReservationConstraints, SledResource, +}; +use nexus_db_queries::authz; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; +use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; +use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; +use serde::{Deserialize, Serialize}; use steno::ActionError; use uuid::Uuid; +use super::NexusActionContext; + /// Reserves resources for a new VMM whose instance has `ncpus` guest logical /// processors and `guest_memory` bytes of guest RAM. The selected sled is /// random within the set of sleds allowed by the supplied `constraints`. @@ -133,3 +143,325 @@ pub(super) async fn allocate_vmm_ipv6( .await .map_err(ActionError::action_failed) } + +/// External IP state needed for IP attach/detachment. +/// +/// This holds a record of the mid-processing external IP, where possible. +/// there are cases where this might not be known (e.g., double detach of an +/// ephemeral IP). +/// In particular we need to explicitly no-op if not `do_saga`, to prevent +/// failures borne from instance state changes from knocking out a valid IP binding. +#[derive(Debug, Deserialize, Serialize)] +pub struct ModifyStateForExternalIp { + pub external_ip: Option, + pub do_saga: bool, +} + +/// Move an external IP from one state to another as a saga operation, +/// returning `Ok(true)` if the record was successfully moved and `Ok(false)` +/// if the record was lost. +/// +/// Returns `Err` if given an illegal state transition or several rows +/// were updated, which are programmer errors. +pub async fn instance_ip_move_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + from: IpAttachState, + to: IpAttachState, + new_ip: &ModifyStateForExternalIp, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + if !new_ip.do_saga { + return Ok(true); + } + let Some(new_ip) = new_ip.external_ip.as_ref() else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + match datastore + .external_ip_complete_op(&opctx, new_ip.id, new_ip.kind, from, to) + .await + .map_err(ActionError::action_failed)? + { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(ActionError::action_failed(Error::internal_error( + "ip state change affected > 1 row", + ))), + } +} + +pub async fn instance_ip_get_instance_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + verb: &str, +) -> Result, ActionError> { + // XXX: we can get instance state (but not sled ID) in same transaction + // as attach (but not detach) wth current design. We need to re-query + // for sled ID anyhow, so keep consistent between attach/detach. + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + let inst_and_vmm = datastore + .instance_fetch_with_vmm(&opctx, authz_instance) + .await + .map_err(ActionError::action_failed)?; + + let found_state = inst_and_vmm.instance().runtime_state.nexus_state.0; + let mut sled_id = inst_and_vmm.sled_id(); + + // Arriving here means we started in a correct state (running/stopped). + // We need to consider how we interact with the other sagas/ops: + // - starting: our claim on an IP will block it from moving past + // DPD_ensure and instance_start will undo. If we complete + // before then, it can move past and will fill in routes/opte. + // Act as though we have no sled_id. + // - stopping: this is not sagaized, and the propolis/sled-agent might + // go away. Act as though stopped if we catch it here, + // otherwise convert OPTE ensure to 'service unavailable' + // and undo. + // - deleting: can only be called from stopped -- we won't push to dpd + // or sled-agent, and IP record might be deleted or forcibly + // detached. Catch here just in case. + match found_state { + InstanceState::Stopped + | InstanceState::Starting + | InstanceState::Stopping => { + sled_id = None; + } + InstanceState::Running => {} + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state.into()) => { + return Err(ActionError::action_failed(Error::unavail(&format!( + "can't {verb} in transient state {state}" + )))) + } + InstanceState::Destroyed => { + return Err(ActionError::action_failed(Error::not_found_by_id( + omicron_common::api::external::ResourceType::Instance, + &authz_instance.id(), + ))) + } + // Final cases are repairing/failed. + _ => { + return Err(ActionError::action_failed(Error::invalid_request( + "cannot modify instance IPs, instance is in unhealthy state", + ))) + } + } + + Ok(sled_id) +} + +/// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a +/// target sled. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + let Some(sled_uuid) = sled_uuid else { + return Ok(None); + }; + + if !target_ip.do_saga { + return Ok(None); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + // Querying sleds requires fleet access; use the instance allocator context + // for this. + let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore) + .sled_id(sled_uuid) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .instance_ensure_dpd_config( + &opctx, + authz_instance.id(), + &sled.address(), + Some(target_ip.id), + ) + .await + .and_then(|v| { + v.into_iter().next().map(Some).ok_or_else(|| { + Error::internal_error( + "NAT RPW failed to return concrete NAT entry", + ) + }) + }) + .map_err(ActionError::action_failed) +} + +/// Remove a single NAT entry from DPD, dropping packets bound for `target_ip`. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + if sled_uuid.is_none() { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + osagactx + .nexus() + .external_ip_delete_dpd_config(&opctx, &target_ip) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should start +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_put_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should cease +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_delete_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index c4c9c4e083..3aa491d978 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -10,7 +10,7 @@ use crate::app::{ MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; -use nexus_db_model::NetworkInterfaceKind; +use nexus_db_model::{ExternalIp, NetworkInterfaceKind}; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::queries::network_interface::InsertError as InsertNicError; @@ -21,7 +21,9 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; +use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::SwitchLocation; +use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; use slog::warn; @@ -223,7 +225,7 @@ impl NexusSaga for SagaInstanceCreate { SagaName::new(&format!("instance-create-external-ip{i}")); let mut subsaga_builder = DagBuilder::new(subsaga_name); subsaga_builder.append(Node::action( - "output", + format!("external-ip-{i}").as_str(), format!("CreateExternalIp{i}").as_str(), CREATE_EXTERNAL_IP.as_ref(), )); @@ -597,7 +599,7 @@ async fn sic_allocate_instance_snat_ip_undo( /// index `ip_index`, and return its ID if one is created (or None). async fn sic_allocate_instance_external_ip( sagactx: NexusActionContext, -) -> Result<(), ActionError> { +) -> Result, ActionError> { // XXX: may wish to restructure partially: we have at most one ephemeral // and then at most $n$ floating. let osagactx = sagactx.user_data(); @@ -607,7 +609,7 @@ async fn sic_allocate_instance_external_ip( let ip_index = repeat_saga_params.which; let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { - return Ok(()); + return Ok(None); }; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -615,39 +617,80 @@ async fn sic_allocate_instance_external_ip( ); let instance_id = repeat_saga_params.instance_id; - match ip_params { + // We perform the 'complete_op' in this saga stage because our IPs are + // created in the attaching state, and we need to move them to attached. + // We *can* do so because the `creating` state will block the IP attach/detach + // sagas from running, so we can safely undo in event of later error in this saga + // without worrying they have been detached by another API call. + // Runtime state should never be able to make 'complete_op' fallible. + let ip = match ip_params { // Allocate a new IP address from the target, possibly default, pool - params::ExternalIpCreate::Ephemeral { ref pool_name } => { - let pool_name = - pool_name.as_ref().map(|name| db::model::Name(name.clone())); + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + let ip_id = repeat_saga_params.new_id; datastore .allocate_instance_ephemeral_ip( &opctx, ip_id, instance_id, - pool_name, + pool, + true, ) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } // Set the parent of an existing floating IP to the new instance's ID. - params::ExternalIpCreate::Floating { ref floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(saga_params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; datastore - .floating_ip_attach(&opctx, &authz_fip, &db_fip, instance_id) + .floating_ip_begin_attach(&opctx, &authz_fip, instance_id, true) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } - } - Ok(()) + }; + + // Ignore row count here, this is infallible with correct + // (state, state', kind) but may be zero on repeat call for + // idempotency. + _ = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Attaching, + nexus_db_model::IpAttachState::Attached, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(Some(ip)) } async fn sic_allocate_instance_external_ip_undo( @@ -662,6 +705,16 @@ async fn sic_allocate_instance_external_ip_undo( &sagactx, &saga_params.serialized_authn, ); + + // We store and lookup `ExternalIp` so that we can detach + // and/or deallocate without double name resolution. + let new_ip = sagactx + .lookup::>(&format!("external-ip-{ip_index}"))?; + + let Some(ip) = new_ip else { + return Ok(()); + }; + let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { return Ok(()); @@ -669,18 +722,42 @@ async fn sic_allocate_instance_external_ip_undo( match ip_params { params::ExternalIpCreate::Ephemeral { .. } => { - let ip_id = repeat_saga_params.new_id; - datastore.deallocate_external_ip(&opctx, ip_id).await?; + datastore.deallocate_external_ip(&opctx, ip.id).await?; } - params::ExternalIpCreate::Floating { floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) + params::ExternalIpCreate::Floating { .. } => { + let (.., authz_fip) = LookupPath::new(&opctx, &datastore) + .floating_ip_id(ip.id) + .lookup_for(authz::Action::Modify) + .await?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + repeat_saga_params.instance_id, + true, + ) .await?; - datastore.floating_ip_detach(&opctx, &authz_fip, &db_fip).await?; + let n_rows = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Detaching, + nexus_db_model::IpAttachState::Detached, + ) + .await + .map_err(ActionError::action_failed)?; + + if n_rows != 1 { + error!( + osagactx.log(), + "sic_allocate_instance_external_ip_undo: failed to \ + completely detach ip {}", + ip.id + ); + } } } Ok(()) @@ -953,7 +1030,7 @@ pub mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 013bececee..aaf5dcb033 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -240,7 +240,7 @@ mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs new file mode 100644 index 0000000000..be7f81368e --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -0,0 +1,583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_opte, ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::{IpAttachState, Ipv4NatEntry}; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::{Error, NameOrId}; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// The IP attach/detach sagas do some resource locking -- because we +// allow them to be called in [Running, Stopped], they must contend +// with each other/themselves, instance start, instance delete, and +// the instance stop action (noting the latter is not a saga). +// +// The main means of access control here is an external IP's `state`. +// Entering either saga begins with an atomic swap from Attached/Detached +// to Attaching/Detaching. This prevents concurrent attach/detach on the +// same EIP, and prevents instance start and migrate from completing with an +// Error::unavail via instance_ensure_registered and/or DPD. +// +// Overlap with stop is handled by treating comms failures with +// sled-agent as temporary errors and unwinding. For the delete case, we +// allow the detach completion to have a missing record -- both instance delete +// and detach will leave NAT in the correct state. For attach, if we make it +// to completion and an IP is `detached`, we unwind as a precaution. +// See `instance_common::instance_ip_get_instance_state` for more info. +// +// One more consequence of sled state being able to change beneath us +// is that the central undo actions (DPD/OPTE state) *must* be best-effort. +// This is not bad per-se: instance stop does not itself remove NAT routing +// rules. The only reason these should fail is because an instance has stopped, +// or DPD has died. + +declare_saga_actions! { + instance_ip_attach; + ATTACH_EXTERNAL_IP -> "target_ip" { + + siia_begin_attach_ip + - siia_begin_attach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siia_get_instance_state + } + + REGISTER_NAT -> "nat_entry" { + + siia_nat + - siia_nat_undo + } + + ENSURE_OPTE_PORT -> "no_result1" { + + siia_update_opte + - siia_update_opte_undo + } + + COMPLETE_ATTACH -> "output" { + + siia_complete_attach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub create_params: params::ExternalIpCreate, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siia_begin_attach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.create_params { + // Allocate a new IP address from the target, possibly default, pool + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + + datastore + .allocate_instance_ephemeral_ip( + &opctx, + Uuid::new_v4(), + params.authz_instance.id(), + pool, + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + // Set the parent of an existing floating IP to the new instance's ID. + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_attach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siia_begin_attach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siia_begin_attach_ip_undo: Reverting detached->attaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Detached, + &new_ip, + ) + .await? + { + error!(log, "siia_begin_attach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siia_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "attach", + ) + .await +} + +// XXX: Need to abstract over v4 and v6 NAT entries when the time comes. +async fn siia_nat( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siia_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nat_entry = sagactx.lookup::>("nat_entry")?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let Some(nat_entry) = nat_entry else { + // Seeing `None` here means that we never pushed DPD state in + // the first instance. Nothing to undo. + return Ok(()); + }; + + // This requires some explanation in one case, where we can fail because an + // instance may have moved running -> stopped -> deleted. + // An instance delete will cause us to unwind and return to this stage *but* + // the ExternalIp will no longer have a useful parent (or even a + // different parent!). + // + // Internally, we delete the NAT entry *without* checking its instance state because + // it may either be `None`, or another instance may have attached. The + // first case is fine, but we need to consider NAT RPW semantics for the second: + // * The NAT entry table will ensure uniqueness on (external IP, low_port, + // high_port) for non-deleted rows. + // * Instance start and IP attach on a running instance will try to insert such + // a row, fail, and then delete this row before moving forwards. + // - Until either side deletes the row, we're polluting switch NAT. + // - We can't guarantee quick reuse to remove this rule via attach. + // - This will lead to a *new* NAT entry we need to protect, so we need to be careful + // that we only remove *our* incarnation. This is likelier to be hit + // if an ephemeral IP is deallocated, reallocated, and reused in a short timeframe. + // * Instance create will successfully set parent, since it won't attempt to ensure + // DPD has correct NAT state unless set to `start: true`. + // So it is safe/necessary to remove using the old entry here to target the + // exact row we created.. + + if let Err(e) = osagactx + .nexus() + .delete_dpd_config_by_entry(&opctx, &nat_entry) + .await + .map_err(ActionError::action_failed) + { + error!(log, "siia_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siia_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_opte(&sagactx, ¶ms.authz_instance, sled_id, target_ip) + .await +} + +async fn siia_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siia_complete_attach( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + // There is a clause in `external_ip_complete_op` which specifically + // causes an unwind here if the instance delete saga fires and an IP is either + // detached or deleted. + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Attached, + &target_ip, + ) + .await? + { + warn!(log, "siia_complete_attach: call was idempotent") + } + + target_ip + .external_ip + .ok_or_else(|| { + Error::internal_error( + "must always have a defined external IP during instance attach", + ) + }) + .and_then(TryInto::try_into) + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpAttach; +impl NexusSaga for SagaInstanceIpAttach { + const NAME: &'static str = "external-ip-attach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_attach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(attach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(register_nat_action()); + builder.append(ensure_opte_port_action()); + builder.append(complete_attach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::app::{saga::create_saga_dag, sagas::test_helpers}; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use dropshot::test_util::ClientTestContext; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_floating_ip, create_instance, + create_project, + }; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + pub async fn ip_manip_test_setup(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + create_floating_ip( + client, + FIP_NAME, + &project.identity.id.to_string(), + None, + None, + ) + .await; + + project.id() + } + + pub async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let create_params = if use_floating { + params::ExternalIpCreate::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpCreate::Ephemeral { pool: None } + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + create_params, + authz_instance, + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Attach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has a record of the new external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Floating(_) + ))); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_) + ))); + + // DB has records for SNAT plus the new IPs. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No Floating IPs exist in states other than 'detached'. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Floating)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // All ephemeral IPs are removed. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Ephemeral)) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No IP bindings remain on sled-agent. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpAttach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs new file mode 100644 index 0000000000..da6c92077d --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -0,0 +1,551 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte, + ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::IpAttachState; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::NameOrId; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// This runs on similar logic to instance IP attach: see its head +// comment for an explanation of the structure wrt. other sagas. + +declare_saga_actions! { + instance_ip_detach; + DETACH_EXTERNAL_IP -> "target_ip" { + + siid_begin_detach_ip + - siid_begin_detach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siid_get_instance_state + } + + REMOVE_NAT -> "no_result0" { + + siid_nat + - siid_nat_undo + } + + REMOVE_OPTE_PORT -> "no_result1" { + + siid_update_opte + - siid_update_opte_undo + } + + COMPLETE_DETACH -> "output" { + + siid_complete_detach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub delete_params: params::ExternalIpDetach, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siid_begin_detach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.delete_params { + params::ExternalIpDetach::Ephemeral => { + let eip = datastore + .instance_lookup_ephemeral_ip( + &opctx, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed)?; + + if let Some(eph_ip) = eip { + datastore + .begin_deallocate_ephemeral_ip( + &opctx, + eph_ip.id, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed) + .map(|external_ip| ModifyStateForExternalIp { + do_saga: external_ip.is_some(), + external_ip, + }) + } else { + Ok(ModifyStateForExternalIp { + do_saga: false, + external_ip: None, + }) + } + } + params::ExternalIpDetach::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siid_begin_detach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siid_begin_detach_ip_undo: Reverting attached->detaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Attached, + &new_ip, + ) + .await? + { + error!(log, "siid_begin_detach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siid_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "detach", + ) + .await +} + +async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_nat( + &sagactx, + ¶ms.serialized_authn, + sled_id, + target_ip, + ) + .await +} + +async fn siid_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siid_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siid_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siid_complete_detach( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Detached, + &target_ip, + ) + .await? + { + warn!( + log, + "siid_complete_detach: external IP was deleted or call was idempotent" + ) + } + + target_ip + .external_ip + .map(TryInto::try_into) + .transpose() + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpDetach; +impl NexusSaga for SagaInstanceIpDetach { + const NAME: &'static str = "external-ip-detach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_detach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(detach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(remove_nat_action()); + builder.append(remove_opte_port_action()); + builder.append(complete_detach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::{ + app::{ + saga::create_saga_dag, + sagas::{ + instance_ip_attach::{self, test::ip_manip_test_setup}, + test_helpers, + }, + }, + Nexus, + }; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::create_instance; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + use std::sync::Arc; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let delete_params = if use_floating { + params::ExternalIpDetach::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpDetach::Ephemeral + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + delete_params, + authz_instance, + } + } + + async fn attach_instance_ips(nexus: &Arc, opctx: &OpContext) { + let datastore = &nexus.db_datastore; + + let proj_name = db::model::Name(PROJECT_NAME.parse().unwrap()); + let inst_name = db::model::Name(INSTANCE_NAME.parse().unwrap()); + let lookup = LookupPath::new(opctx, datastore) + .project_name(&proj_name) + .instance_name(&inst_name); + + for use_float in [false, true] { + let params = instance_ip_attach::test::new_test_params( + opctx, datastore, use_float, + ) + .await; + nexus + .instance_attach_external_ip( + opctx, + &lookup, + ¶ms.create_params, + ) + .await + .unwrap(); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _ = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Detach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has removed its records of the external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + + // DB only has record for SNAT. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 1); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let opctx = test_helpers::test_opctx(cptestctx); + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No IPs in transitional states w/ current instance. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Attached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No external IPs in detached state. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // Instance still has one Ephemeral IP, and one Floating IP. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + + // No IP bindings remain on sled-agent. + let eips = &*sled_agent.external_ips.lock().await; + for (_nic_id, eip_set) in eips { + assert_eq!(eip_set.len(), 2); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpDetach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9d12bd8031..92c927e1ce 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -405,35 +405,12 @@ async fn sis_dpd_ensure( .await .map_err(ActionError::action_failed)?; - // Querying boundary switches also requires fleet access and the use of the - // instance allocator context. - let boundary_switches = osagactx + osagactx .nexus() - .boundary_switches(&osagactx.nexus().opctx_alloc) + .instance_ensure_dpd_config(&opctx, instance_id, &sled.address(), None) .await .map_err(ActionError::action_failed)?; - for switch in boundary_switches { - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find client for switch {switch}" - ))) - })?; - - osagactx - .nexus() - .instance_ensure_dpd_config( - &opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await - .map_err(ActionError::action_failed)?; - } - Ok(()) } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index c5918d32ef..1bd85ecf32 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -26,6 +26,8 @@ pub mod image_delete; mod instance_common; pub mod instance_create; pub mod instance_delete; +pub mod instance_ip_attach; +pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; pub mod loopback_address_create; @@ -130,6 +132,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); ::register_actions( &mut registry, ); diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 65b03a9fdf..a6cb9e80fe 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -142,6 +142,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(floating_ip_create)?; api.register(floating_ip_view)?; api.register(floating_ip_delete)?; + api.register(floating_ip_attach)?; + api.register(floating_ip_detach)?; api.register(disk_list)?; api.register(disk_create)?; @@ -200,6 +202,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_network_interface_delete)?; api.register(instance_external_ip_list)?; + api.register(instance_ephemeral_ip_attach)?; + api.register(instance_ephemeral_ip_detach)?; api.register(vpc_router_list)?; api.register(vpc_router_view)?; @@ -1976,6 +1980,69 @@ async fn floating_ip_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Attach a floating IP to an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/attach", + tags = ["floating-ips"], +}] +async fn floating_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + target: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let ip = nexus + .floating_ip_attach( + &opctx, + floating_ip_selector, + target.into_inner(), + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach a floating IP from an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/detach", + tags = ["floating-ips"], +}] +async fn floating_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + let ip = nexus.floating_ip_detach(&opctx, fip_lookup).await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Disks /// List disks @@ -3884,6 +3951,79 @@ async fn instance_external_ip_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Allocate and attach an ephemeral IP to an instance +#[endpoint { + method = POST, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + ip_to_create: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ip = nexus + .instance_attach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpCreate::Ephemeral { + pool: ip_to_create.into_inner().pool, + }, + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach and deallocate an ephemeral IP from an instance +#[endpoint { + method = DELETE, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + nexus + .instance_detach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpDetach::Ephemeral, + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Snapshots /// List snapshots diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 4fe03f204c..d82a934686 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -492,6 +492,7 @@ pub async fn create_instance( Vec::::new(), // External IPs= Vec::::new(), + true, ) .await } @@ -504,6 +505,7 @@ pub async fn create_instance_with( nics: ¶ms::InstanceNetworkInterfaceAttachment, disks: Vec, external_ips: Vec, + start: bool, ) -> Instance { let url = format!("/v1/instances?project={}", project_name); object_create( @@ -523,7 +525,7 @@ pub async fn create_instance_with( network_interfaces: nics.clone(), external_ips, disks, - start: true, + start, }, ) .await diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index b9023a8212..379042c849 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1747,6 +1747,7 @@ async fn create_instance_with_disk(client: &ClientTestContext) { params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, )], Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 8beffe43a5..4f606f2bff 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -390,6 +390,12 @@ pub static DEMO_INSTANCE_DISKS_DETACH_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); +pub static DEMO_INSTANCE_EPHEMERAL_IP_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/external-ips/ephemeral?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); pub static DEMO_INSTANCE_NICS_URL: Lazy = Lazy::new(|| { format!( "/v1/network-interfaces?project={}&instance={}", @@ -414,7 +420,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(DEMO_IP_POOL_NAME.clone()), + pool: Some(DEMO_IP_POOL_NAME.clone().into()), }], disks: vec![], start: true, @@ -720,6 +726,19 @@ pub static DEMO_FLOAT_IP_URL: Lazy = Lazy::new(|| { ) }); +pub static DEMO_FLOATING_IP_ATTACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/attach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_FLOATING_IP_DETACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/detach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); + pub static DEMO_FLOAT_IP_CREATE: Lazy = Lazy::new(|| params::FloatingIpCreate { identity: IdentityMetadataCreateParams { @@ -730,6 +749,13 @@ pub static DEMO_FLOAT_IP_CREATE: Lazy = pool: None, }); +pub static DEMO_FLOAT_IP_ATTACH: Lazy = + Lazy::new(|| params::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: DEMO_FLOAT_IP_NAME.clone().into(), + }); +pub static DEMO_EPHEMERAL_IP_ATTACH: Lazy = + Lazy::new(|| params::EphemeralIpCreate { pool: None }); // Identity providers pub const IDENTITY_PROVIDERS_URL: &'static str = "/v1/system/identity-providers?silo=demo-silo"; @@ -1767,6 +1793,18 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &DEMO_INSTANCE_EPHEMERAL_IP_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_EPHEMERAL_IP_ATTACH).unwrap() + ), + AllowedMethod::Delete, + ], + }, + /* IAM */ VerifyEndpoint { @@ -2240,5 +2278,27 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Delete, ], }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_ATTACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_FLOAT_IP_ATTACH).unwrap(), + ), + ], + }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_DETACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&()).unwrap(), + ), + ], + }, ] }); diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index 3b6127ceb1..57f813d505 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -7,6 +7,7 @@ use std::net::IpAddr; use std::net::Ipv4Addr; +use crate::integration_tests::instances::fetch_instance_external_ips; use crate::integration_tests::instances::instance_simulate; use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; @@ -30,12 +31,14 @@ use nexus_test_utils::resource_helpers::object_delete_error; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::shared; +use nexus_types::external_api::views; use nexus_types::external_api::views::FloatingIp; use nexus_types::identity::Resource; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; +use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use uuid::Uuid; @@ -47,10 +50,33 @@ const PROJECT_NAME: &str = "rootbeer-float"; const FIP_NAMES: &[&str] = &["vanilla", "chocolate", "strawberry", "pistachio", "caramel"]; +const INSTANCE_NAMES: &[&str] = &["anonymous-diner", "anonymous-restaurant"]; + pub fn get_floating_ips_url(project_name: &str) -> String { format!("/v1/floating-ips?project={project_name}") } +pub fn instance_ephemeral_ip_url( + instance_name: &str, + project_name: &str, +) -> String { + format!("/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}") +} + +pub fn attach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/attach?project={project_name}") +} + +pub fn detach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/detach?project={project_name}") +} + pub fn get_floating_ip_by_name_url( fip_name: &str, project_name: &str, @@ -392,7 +418,9 @@ async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { +async fn test_floating_ip_create_attachment( + cptestctx: &ControlPlaneTestContext, +) { let client = &cptestctx.external_client; let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; @@ -410,16 +438,13 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .await; // Bind the floating IP to an instance at create time. - let instance_name = "anonymous-diner"; - let instance = create_instance_with( - &client, - PROJECT_NAME, + let instance_name = INSTANCE_NAMES[0]; + let instance = instance_for_external_ips( + client, instance_name, - ¶ms::InstanceNetworkInterfaceAttachment::Default, - vec![], - vec![params::ExternalIpCreate::Floating { - floating_ip_name: FIP_NAMES[0].parse().unwrap(), - }], + true, + false, + &FIP_NAMES[..1], ) .await; @@ -430,20 +455,12 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { assert_eq!(fetched_fip.instance_id, Some(instance.identity.id)); // Try to delete the floating IP, which should fail. - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::DELETE, - &get_floating_ip_by_id_url(&fip.identity.id), - ) - .expect_status(Some(StatusCode::BAD_REQUEST)), + let error = object_delete_error( + client, + &get_floating_ip_by_id_url(&fip.identity.id), + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; assert_eq!( error.message, format!("Floating IP cannot be deleted while attached to an instance"), @@ -497,6 +514,340 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .unwrap(); } +#[nexus_test] +async fn test_external_ip_live_attach_detach( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, and a floating IP for each instance. + // One instance will be started, and one will be stopped. + let mut fips = vec![]; + for i in 0..2 { + fips.push( + create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await, + ); + } + + let mut instances = vec![]; + for (i, start) in [false, true].iter().enumerate() { + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + *start, + false, + &[], + ) + .await; + + if *start { + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + } + + // Verify that each instance has no external IPs. + assert_eq!( + fetch_instance_external_ips( + client, + INSTANCE_NAMES[i], + PROJECT_NAME + ) + .await + .len(), + 0 + ); + + instances.push(instance); + } + + // Attach a floating IP and ephemeral IP to each instance. + let mut recorded_ephs = vec![]; + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + let eph_resp = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + // Verify both appear correctly. + // This implicitly checks FIP parent_id matches the instance, + // and state has fully moved into 'Attached'. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 2); + assert!(eip_list.contains(&eph_resp)); + assert!(eip_list + .iter() + .any(|v| matches!(v, views::ExternalIp::Floating(..)) + && v.ip() == fip_resp.ip)); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values. + let eph_resp_2 = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp_2 = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + assert_eq!(eph_resp, eph_resp_2); + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + recorded_ephs.push(eph_resp); + } + + // Detach a floating IP and ephemeral IP from each instance. + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + ephemeral_ip_detach(client, instance_name).await; + let fip_resp = + floating_ip_detach(client, fip.identity.name.as_str()).await; + + // Verify both are removed, and that their bodies match the known FIP/EIP combo. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 0); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values for FIP, + // but in ephemeral case there is no currently known IP so we return an error. + let fip_resp_2 = + floating_ip_detach(client, fip.identity.name.as_str()).await; + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error = + object_delete_error(client, &url, StatusCode::BAD_REQUEST).await; + assert_eq!( + error.message, + "instance does not have an ephemeral IP attached".to_string() + ); + } +} + +#[nexus_test] +async fn test_external_ip_attach_detach_fail_if_in_use_by_other( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, bind a FIP to each. + let mut instances = vec![]; + let mut fips = vec![]; + for i in 0..2 { + let fip = create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await; + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + true, + false, + &[FIP_NAMES[i]], + ) + .await; + + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + + instances.push(instance); + fips.push(fip); + } + + // Attach in-use FIP to *other* instance should fail. + let url = + attach_floating_ip_url(fips[1].identity.name.as_str(), PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: INSTANCE_NAMES[0].parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "floating IP cannot be attached to one instance while still attached to another".to_string()); +} + +#[nexus_test] +async fn test_external_ip_attach_fails_after_maximum( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 33 floating IPs, and bind the first 32 to an instance. + let mut fip_names = vec![]; + for i in 0..33 { + let fip_name = format!("fip-{i}"); + create_floating_ip( + client, + &fip_name, + project.identity.name.as_str(), + None, + None, + ) + .await; + fip_names.push(fip_name); + } + + let fip_name_slice = + fip_names.iter().map(String::as_str).collect::>(); + let instance_name = INSTANCE_NAMES[0]; + instance_for_external_ips( + client, + instance_name, + true, + false, + &fip_name_slice[..32], + ) + .await; + + // Attempt to attach the final FIP should fail. + let url = attach_floating_ip_url(fip_name_slice[32], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); + + // Attempt to attach an ephemeral IP should fail. + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { pool: None })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); +} + +#[nexus_test] +async fn test_external_ip_attach_ephemeral_at_pool_exhaustion( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let other_pool_range = IpRange::V4( + Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 1)) + .unwrap(), + ); + create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + let silo_id = DEFAULT_SILO.id(); + link_ip_pool(&client, "other-pool", &silo_id, false).await; + + create_project(client, PROJECT_NAME).await; + + // Create two instances, to which we will later add eph IPs from 'other-pool'. + for name in &INSTANCE_NAMES[..2] { + instance_for_external_ips(client, name, false, false, &[]).await; + } + + let pool_name: Name = "other-pool".parse().unwrap(); + + // Attach a new EIP from other-pool to both instances. + // This should succeed for the first, and fail for the second + // due to pool exhaustion. + let eph_resp = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp.ip(), other_pool_range.first_address()); + assert_eq!(eph_resp.ip(), other_pool_range.last_address()); + + let url = instance_ephemeral_ip_url(INSTANCE_NAMES[1], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::ExternalIpCreate::Ephemeral { + pool: Some(pool_name.clone().into()), + })) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "Insufficient capacity: No external IP addresses available".to_string() + ); + + // Idempotent re-add to the first instance should succeed even if + // an internal attempt to alloc a new EIP would fail. + let eph_resp_2 = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp_2, eph_resp); +} + pub async fn floating_ip_get( client: &ClientTestContext, fip_url: &str, @@ -521,3 +872,96 @@ async fn floating_ip_get_as( panic!("failed to make \"get\" request to {fip_url}: {e}") }) } + +async fn instance_for_external_ips( + client: &ClientTestContext, + instance_name: &str, + start: bool, + use_ephemeral_ip: bool, + floating_ip_names: &[&str], +) -> Instance { + let mut fips: Vec<_> = floating_ip_names + .iter() + .map(|s| params::ExternalIpCreate::Floating { + floating_ip: s.parse::().unwrap().into(), + }) + .collect(); + if use_ephemeral_ip { + fips.push(params::ExternalIpCreate::Ephemeral { pool: None }) + } + create_instance_with( + &client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::Default, + vec![], + fips, + start, + ) + .await +} + +async fn ephemeral_ip_attach( + client: &ClientTestContext, + instance_name: &str, + pool_name: Option<&str>, +) -> views::ExternalIp { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { + pool: pool_name.map(|v| v.parse::().unwrap().into()), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn ephemeral_ip_detach(client: &ClientTestContext, instance_name: &str) { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + object_delete(client, &url).await; +} + +async fn floating_ip_attach( + client: &ClientTestContext, + instance_name: &str, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = attach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn floating_ip_detach( + client: &ClientTestContext, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = detach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2f4e913185..8d97df6cda 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -649,6 +649,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -752,6 +753,7 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // located with their instances. Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -1104,6 +1106,7 @@ async fn test_instance_metrics_with_migration( ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -3644,7 +3647,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( let ip = fetch_instance_ephemeral_ip(client, "pool1-inst").await; assert!( - ip.ip >= range1.first_address() && ip.ip <= range1.last_address(), + ip.ip() >= range1.first_address() && ip.ip() <= range1.last_address(), "Expected ephemeral IP to come from pool1" ); @@ -3652,7 +3655,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst", Some("pool2")).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3667,7 +3670,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst2", None).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst2").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3705,7 +3708,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("pool1".parse().unwrap()), + pool: Some("pool1".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3769,7 +3772,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("orphan-pool".parse().unwrap()), + pool: Some("orphan-pool".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3829,7 +3832,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, // <--- the only important thing here + pool: None, // <--- the only important thing here }], disks: vec![], start: true, @@ -3845,7 +3848,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( // same deal if you specify a pool that doesn't exist let body = params::InstanceCreate { external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("nonexistent-pool".parse().unwrap()), + pool: Some("nonexistent-pool".parse::().unwrap().into()), }], ..body }; @@ -3879,7 +3882,7 @@ async fn test_instance_attach_several_external_ips( // Create several floating IPs for the instance, totalling 8 IPs. let mut external_ip_create = - vec![params::ExternalIpCreate::Ephemeral { pool_name: None }]; + vec![params::ExternalIpCreate::Ephemeral { pool: None }]; let mut fips = vec![]; for i in 1..8 { let name = format!("fip-{i}"); @@ -3887,7 +3890,7 @@ async fn test_instance_attach_several_external_ips( create_floating_ip(&client, &name, PROJECT_NAME, None, None).await, ); external_ip_create.push(params::ExternalIpCreate::Floating { - floating_ip_name: name.parse().unwrap(), + floating_ip: name.parse::().unwrap().into(), }); } @@ -3900,30 +3903,31 @@ async fn test_instance_attach_several_external_ips( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], external_ip_create, + true, ) .await; // Verify that all external IPs are visible on the instance and have // been allocated in order. let external_ips = - fetch_instance_external_ips(&client, instance_name).await; + fetch_instance_external_ips(&client, instance_name, PROJECT_NAME).await; assert_eq!(external_ips.len(), 8); eprintln!("{external_ips:?}"); for (i, eip) in external_ips .iter() - .sorted_unstable_by(|a, b| a.ip.cmp(&b.ip)) + .sorted_unstable_by(|a, b| a.ip().cmp(&b.ip())) .enumerate() { let last_octet = i + if i != external_ips.len() - 1 { - assert_eq!(eip.kind, IpKind::Floating); + assert_eq!(eip.kind(), IpKind::Floating); 1 } else { // SNAT will occupy 1.0.0.8 here, since it it alloc'd before // the ephemeral. - assert_eq!(eip.kind, IpKind::Ephemeral); + assert_eq!(eip.kind(), IpKind::Ephemeral); 2 }; - assert_eq!(eip.ip, Ipv4Addr::new(10, 0, 0, last_octet as u8)); + assert_eq!(eip.ip(), Ipv4Addr::new(10, 0, 0, last_octet as u8)); } // Verify that all floating IPs are bound to their parent instance. @@ -3948,7 +3952,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( // don't need any IP pools because request fails at parse time let ephemeral_create = params::ExternalIpCreate::Ephemeral { - pool_name: Some("default".parse().unwrap()), + pool: Some("default".parse::().unwrap().into()), }; let create_params = params::InstanceCreate { identity: IdentityMetadataCreateParams { @@ -3992,19 +3996,20 @@ async fn create_instance_with_pool( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], vec![params::ExternalIpCreate::Ephemeral { - pool_name: pool_name.map(|name| name.parse().unwrap()), + pool: pool_name.map(|name| name.parse::().unwrap().into()), }], + true, ) .await } -async fn fetch_instance_external_ips( +pub async fn fetch_instance_external_ips( client: &ClientTestContext, instance_name: &str, + project_name: &str, ) -> Vec { let ips_url = format!( - "/v1/instances/{}/external-ips?project={}", - instance_name, PROJECT_NAME + "/v1/instances/{instance_name}/external-ips?project={project_name}", ); let ips = NexusRequest::object_get(client, &ips_url) .authn_as(AuthnMode::PrivilegedUser) @@ -4020,10 +4025,10 @@ async fn fetch_instance_ephemeral_ip( client: &ClientTestContext, instance_name: &str, ) -> views::ExternalIp { - fetch_instance_external_ips(client, instance_name) + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) .await .into_iter() - .find(|v| v.kind == IpKind::Ephemeral) + .find(|v| v.kind() == IpKind::Ephemeral) .unwrap() } @@ -4087,7 +4092,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(Name::try_from(String::from("default")).unwrap()), + pool: Some("default".parse::().unwrap().into()), }], disks: vec![], start: true, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 91a933754c..e36b213f7e 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -143,6 +143,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { Vec::::new(), // External IPs= Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index bd79a9c3e9..8bd2f34de5 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -12,8 +12,10 @@ disk_view GET /v1/disks/{disk} API operations found with tag "floating-ips" OPERATION ID METHOD URL PATH +floating_ip_attach POST /v1/floating-ips/{floating_ip}/attach floating_ip_create POST /v1/floating-ips floating_ip_delete DELETE /v1/floating-ips/{floating_ip} +floating_ip_detach POST /v1/floating-ips/{floating_ip}/detach floating_ip_list GET /v1/floating-ips floating_ip_view GET /v1/floating-ips/{floating_ip} @@ -40,6 +42,8 @@ instance_delete DELETE /v1/instances/{instance} instance_disk_attach POST /v1/instances/{instance}/disks/attach instance_disk_detach POST /v1/instances/{instance}/disks/detach instance_disk_list GET /v1/instances/{instance}/disks +instance_ephemeral_ip_attach POST /v1/instances/{instance}/external-ips/ephemeral +instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances instance_migrate POST /v1/instances/{instance}/migrate diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 750e83c2a2..62c8224461 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -71,7 +71,7 @@ path_param!(VpcPath, vpc, "VPC"); path_param!(SubnetPath, subnet, "subnet"); path_param!(RouterPath, router, "router"); path_param!(RoutePath, route, "route"); -path_param!(FloatingIpPath, floating_ip, "Floating IP"); +path_param!(FloatingIpPath, floating_ip, "floating IP"); path_param!(DiskPath, disk, "disk"); path_param!(SnapshotPath, snapshot, "snapshot"); path_param!(ImagePath, image, "image"); @@ -890,6 +890,23 @@ pub struct FloatingIpCreate { pub pool: Option, } +/// The type of resource that a floating IP is attached to +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum FloatingIpParentKind { + Instance, +} + +/// Parameters for attaching a floating IP address to another resource +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct FloatingIpAttach { + /// Name or ID of the resource that this IP address should be attached to + pub parent: NameOrId, + + /// The type of `parent`'s resource + pub kind: FloatingIpParentKind, +} + // INSTANCES /// Describes an attachment of an `InstanceNetworkInterface` to an `Instance`, @@ -954,14 +971,30 @@ pub struct InstanceDiskAttach { #[serde(tag = "type", rename_all = "snake_case")] pub enum ExternalIpCreate { /// An IP address providing both inbound and outbound access. The address is - /// automatically-assigned from the provided IP Pool, or all available pools - /// if not specified. - Ephemeral { pool_name: Option }, + /// automatically-assigned from the provided IP Pool, or the current silo's + /// default pool if not specified. + Ephemeral { pool: Option }, /// An IP address providing both inbound and outbound access. The address is - /// an existing Floating IP object assigned to the current project. + /// an existing floating IP object assigned to the current project. /// /// The floating IP must not be in use by another instance or service. - Floating { floating_ip_name: Name }, + Floating { floating_ip: NameOrId }, +} + +/// Parameters for creating an ephemeral IP address for an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub struct EphemeralIpCreate { + /// Name or ID of the IP pool used to allocate an address + pub pool: Option, +} + +/// Parameters for detaching an external IP from an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExternalIpDetach { + Ephemeral, + Floating { floating_ip: NameOrId }, } /// Create-time parameters for an `Instance` diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index a4c5ae1e62..f6b4db18a3 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -221,7 +221,9 @@ pub enum ServiceUsingCertificate { } /// The kind of an external IP address for an instance -#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq)] +#[derive( + Debug, Clone, Copy, Deserialize, Eq, Serialize, JsonSchema, PartialEq, +)] #[serde(rename_all = "snake_case")] pub enum IpKind { Ephemeral, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 314dd4ed00..5e31be7af8 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -12,8 +12,8 @@ use api_identity::ObjectIdentity; use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ - ByteCount, Digest, IdentityMetadata, InstanceState, Ipv4Net, Ipv6Net, Name, - ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, + ByteCount, Digest, Error, IdentityMetadata, InstanceState, Ipv4Net, + Ipv6Net, Name, ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -337,16 +337,34 @@ pub struct IpPoolRange { // INSTANCE EXTERNAL IP ADDRESSES -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct ExternalIp { - pub ip: IpAddr, - pub kind: IpKind, +#[derive(Debug, Clone, Deserialize, PartialEq, Serialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ExternalIp { + Ephemeral { ip: IpAddr }, + Floating(FloatingIp), +} + +impl ExternalIp { + pub fn ip(&self) -> IpAddr { + match self { + Self::Ephemeral { ip } => *ip, + Self::Floating(float) => float.ip, + } + } + + pub fn kind(&self) -> IpKind { + match self { + Self::Ephemeral { .. } => IpKind::Ephemeral, + Self::Floating(_) => IpKind::Floating, + } + } } /// A Floating IP is a well-known IP address which can be attached /// and detached from instances. -#[derive(ObjectIdentity, Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[derive( + ObjectIdentity, Debug, PartialEq, Clone, Deserialize, Serialize, JsonSchema, +)] #[serde(rename_all = "snake_case")] pub struct FloatingIp { #[serde(flatten)] @@ -360,6 +378,25 @@ pub struct FloatingIp { pub instance_id: Option, } +impl From for ExternalIp { + fn from(value: FloatingIp) -> Self { + ExternalIp::Floating(value) + } +} + +impl TryFrom for FloatingIp { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + match value { + ExternalIp::Ephemeral { .. } => Err(Error::internal_error( + "tried to convert an ephemeral IP into a floating IP", + )), + ExternalIp::Floating(v) => Ok(v), + } + } +} + // RACKS /// View of an Rack diff --git a/openapi/nexus.json b/openapi/nexus.json index 2dd4037430..59206ed010 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -930,7 +930,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -974,7 +974,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -1002,6 +1002,108 @@ } } }, + "/v1/floating-ips/{floating_ip}/attach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Attach a floating IP to an instance or other resource", + "operationId": "floating_ip_attach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIpAttach" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/floating-ips/{floating_ip}/detach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Detach a floating IP from an instance or other resource", + "operationId": "floating_ip_detach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/groups": { "get": { "tags": [ @@ -1826,6 +1928,99 @@ } } }, + "/v1/instances/{instance}/external-ips/ephemeral": { + "post": { + "tags": [ + "instances" + ], + "summary": "Allocate and attach an ephemeral IP to an instance", + "operationId": "instance_ephemeral_ip_attach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EphemeralIpCreate" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ExternalIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "instances" + ], + "summary": "Detach and deallocate an ephemeral IP from an instance", + "operationId": "instance_ephemeral_ip_detach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/instances/{instance}/migrate": { "post": { "tags": [ @@ -11005,6 +11200,21 @@ } ] }, + "EphemeralIpCreate": { + "description": "Parameters for creating an ephemeral IP address for an instance.", + "type": "object", + "properties": { + "pool": { + "nullable": true, + "description": "Name or ID of the IP pool used to allocate an address", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + } + }, "Error": { "description": "Error information from a response.", "type": "object", @@ -11025,33 +11235,105 @@ ] }, "ExternalIp": { - "type": "object", - "properties": { - "ip": { - "type": "string", - "format": "ip" + "oneOf": [ + { + "type": "object", + "properties": { + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "ephemeral" + ] + } + }, + "required": [ + "ip", + "kind" + ] }, - "kind": { - "$ref": "#/components/schemas/IpKind" + { + "description": "A Floating IP is a well-known IP address which can be attached and detached from instances.", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "instance_id": { + "nullable": true, + "description": "The ID of the instance that this Floating IP is attached to, if it is presently in use.", + "type": "string", + "format": "uuid" + }, + "ip": { + "description": "The IP address held by this resource.", + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "floating" + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "project_id": { + "description": "The project this resource exists within.", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "ip", + "kind", + "name", + "project_id", + "time_created", + "time_modified" + ] } - }, - "required": [ - "ip", - "kind" ] }, "ExternalIpCreate": { "description": "Parameters for creating an external IP address for instances.", "oneOf": [ { - "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or all available pools if not specified.", + "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or the current silo's default pool if not specified.", "type": "object", "properties": { - "pool_name": { + "pool": { "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/Name" + "$ref": "#/components/schemas/NameOrId" } ] }, @@ -11067,11 +11349,11 @@ ] }, { - "description": "An IP address providing both inbound and outbound access. The address is an existing Floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", + "description": "An IP address providing both inbound and outbound access. The address is an existing floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", "type": "object", "properties": { - "floating_ip_name": { - "$ref": "#/components/schemas/Name" + "floating_ip": { + "$ref": "#/components/schemas/NameOrId" }, "type": { "type": "string", @@ -11081,7 +11363,7 @@ } }, "required": [ - "floating_ip_name", + "floating_ip", "type" ] } @@ -11226,6 +11508,32 @@ "time_modified" ] }, + "FloatingIpAttach": { + "description": "Parameters for attaching a floating IP address to another resource", + "type": "object", + "properties": { + "kind": { + "description": "The type of `parent`'s resource", + "allOf": [ + { + "$ref": "#/components/schemas/FloatingIpParentKind" + } + ] + }, + "parent": { + "description": "Name or ID of the resource that this IP address should be attached to", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + }, + "required": [ + "kind", + "parent" + ] + }, "FloatingIpCreate": { "description": "Parameters for creating a new floating IP address for instances.", "type": "object", @@ -11257,6 +11565,13 @@ "name" ] }, + "FloatingIpParentKind": { + "description": "The type of resource that a floating IP is attached to", + "type": "string", + "enum": [ + "instance" + ] + }, "FloatingIpResultsPage": { "description": "A single page of results", "type": "object", @@ -12481,14 +12796,6 @@ } ] }, - "IpKind": { - "description": "The kind of an external IP address for an instance", - "type": "string", - "enum": [ - "ephemeral", - "floating" - ] - }, "IpNet": { "oneOf": [ { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b5b9d3fd5b..3e3f6abec6 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -327,6 +327,78 @@ } } }, + "/instances/{instance_id}/external-ip": { + "put": { + "operationId": "instance_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "instance_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/instances/{instance_id}/migration-ids": { "put": { "operationId": "instance_put_migration_ids", @@ -4541,6 +4613,49 @@ "vmm_runtime" ] }, + "InstanceExternalIpBody": { + "description": "Used to dynamically update external IPs attached to an instance.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ephemeral" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "floating" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceHardware": { "description": "Describes the instance hardware.", "type": "object", diff --git a/schema/crdb/25.0.0/up01.sql b/schema/crdb/25.0.0/up01.sql new file mode 100644 index 0000000000..0cb511fb91 --- /dev/null +++ b/schema/crdb/25.0.0/up01.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); diff --git a/schema/crdb/25.0.0/up02.sql b/schema/crdb/25.0.0/up02.sql new file mode 100644 index 0000000000..324a907dd4 --- /dev/null +++ b/schema/crdb/25.0.0/up02.sql @@ -0,0 +1,4 @@ +-- Intentionally nullable for now as we need to backfill using the current +-- value of parent_id. +ALTER TABLE omicron.public.external_ip +ADD COLUMN IF NOT EXISTS state omicron.public.ip_attach_state; diff --git a/schema/crdb/25.0.0/up03.sql b/schema/crdb/25.0.0/up03.sql new file mode 100644 index 0000000000..ea1d461250 --- /dev/null +++ b/schema/crdb/25.0.0/up03.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for detached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'detached' +WHERE parent_id IS NULL; diff --git a/schema/crdb/25.0.0/up04.sql b/schema/crdb/25.0.0/up04.sql new file mode 100644 index 0000000000..7bf89d6626 --- /dev/null +++ b/schema/crdb/25.0.0/up04.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for attached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'attached' +WHERE parent_id IS NOT NULL; diff --git a/schema/crdb/25.0.0/up05.sql b/schema/crdb/25.0.0/up05.sql new file mode 100644 index 0000000000..894806a3dc --- /dev/null +++ b/schema/crdb/25.0.0/up05.sql @@ -0,0 +1,2 @@ +-- Now move the new column to its intended state of non-nullable. +ALTER TABLE omicron.public.external_ip ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/25.0.0/up06.sql b/schema/crdb/25.0.0/up06.sql new file mode 100644 index 0000000000..ca19081e37 --- /dev/null +++ b/schema/crdb/25.0.0/up06.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/25.0.0/up07.sql b/schema/crdb/25.0.0/up07.sql new file mode 100644 index 0000000000..00f9310c2e --- /dev/null +++ b/schema/crdb/25.0.0/up07.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; diff --git a/schema/crdb/25.0.0/up08.sql b/schema/crdb/25.0.0/up08.sql new file mode 100644 index 0000000000..3d85aaad05 --- /dev/null +++ b/schema/crdb/25.0.0/up08.sql @@ -0,0 +1,2 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +DROP CONSTRAINT IF EXISTS null_non_fip_parent_id; diff --git a/schema/crdb/25.0.0/up09.sql b/schema/crdb/25.0.0/up09.sql new file mode 100644 index 0000000000..bac963cce5 --- /dev/null +++ b/schema/crdb/25.0.0/up09.sql @@ -0,0 +1,4 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f3ca5c4b85..86d88f5fe9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1669,6 +1669,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.ip_kind AS ENUM ( 'floating' ); +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); + /* * External IP addresses used for guest instances and externally-facing * services. @@ -1714,6 +1721,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( /* FK to the `project` table. */ project_id UUID, + /* State of this IP with regard to instance attach/detach + * operations. This is mainly used to prevent concurrent use + * across sagas and allow rollback to correct state. + */ + state omicron.public.ip_attach_state NOT NULL, + /* The name must be non-NULL iff this is a floating IP. */ CONSTRAINT null_fip_name CHECK ( (kind != 'floating' AND name IS NULL) OR @@ -1735,16 +1748,27 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( ), /* - * Only nullable if this is a floating IP, which may exist not - * attached to any instance or service yet. + * Only nullable if this is a floating/ephemeral IP, which may exist not + * attached to any instance or service yet. Ephemeral IPs should not generally + * exist without parent instances/services, but need to temporarily exist in + * this state for live attachment. */ - CONSTRAINT null_non_fip_parent_id CHECK ( - (kind != 'floating' AND parent_id is NOT NULL) OR (kind = 'floating') + CONSTRAINT null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) ), /* Ephemeral IPs are not supported for services. */ CONSTRAINT ephemeral_kind_service CHECK ( (kind = 'ephemeral' AND is_service = FALSE) OR (kind != 'ephemeral') + ), + + /* + * (Not detached) => non-null parent_id. + * This is not a two-way implication because SNAT IPs + * cannot have a null parent_id. + */ + CONSTRAINT detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) ) ); @@ -1777,6 +1801,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_ip_by_parent ON omicron.public ) WHERE parent_id IS NOT NULL AND time_deleted IS NULL; +/* Enforce a limit of one Ephemeral IP per instance */ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; + /* Enforce name-uniqueness of floating (service) IPs at fleet level. */ CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name on omicron.public.external_ip ( name diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 39d1ae26a0..0798aed664 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -9,7 +9,7 @@ use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, - InstancePutMigrationIdsBody, InstancePutStateBody, + InstanceExternalIpBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, ZoneBundleMetadata, Zpool, @@ -53,6 +53,8 @@ pub fn api() -> SledApiDescription { api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_register)?; api.register(instance_unregister)?; api.register(omicron_zones_get)?; @@ -467,6 +469,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + /// Path parameters for Disk requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct DiskPathParam { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 057402c57a..3bbe0762f8 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -10,8 +10,8 @@ use crate::common::instance::{ }; use crate::instance_manager::{InstanceManagerServices, InstanceTicket}; use crate::nexus::NexusClientWithResolver; -use crate::params::ZoneBundleCause; use crate::params::ZoneBundleMetadata; +use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, @@ -558,6 +558,110 @@ impl InstanceInner { Ok(()) } + + pub async fn add_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + InstanceExternalIpBody::Ephemeral(ip) + if Some(ip) == self.ephemeral_ip.as_ref() => + { + return Ok(()); + } + InstanceExternalIpBody::Floating(ip) + if self.floating_ips.contains(ip) => + { + return Ok(()); + } + // New Ephemeral IP while current exists -- error without + // explicit delete. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip.is_some() => + { + return Err(Error::Opte( + illumos_utils::opte::Error::ImplicitEphemeralIpDetach( + *ip, + self.ephemeral_ip.unwrap(), + ), + )); + } + // Not found, proceed with OPTE update. + InstanceExternalIpBody::Ephemeral(ip) => { + self.ephemeral_ip = Some(*ip); + } + InstanceExternalIpBody::Floating(ip) => { + self.floating_ips.push(*ip); + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } + + pub async fn delete_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + // IP Mismatch and 'deleted in past' can't really be + // disambiguated here. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip != Some(*ip) => + { + return Ok(()); + } + InstanceExternalIpBody::Ephemeral(_) => { + self.ephemeral_ip = None; + } + InstanceExternalIpBody::Floating(ip) => { + let floating_index = + self.floating_ips.iter().position(|v| v == ip); + if let Some(pos) = floating_index { + // Swap remove is valid here, OPTE is not sensitive + // to Floating Ip ordering. + self.floating_ips.swap_remove(pos); + } else { + return Ok(()); + } + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } } /// A reference to a single instance running a running Propolis server. @@ -1094,4 +1198,52 @@ impl Instance { Err(Error::InstanceNotRunning(inner.properties.id)) } } + + pub async fn add_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // The internal call can either fail on adding the IP + // to the list, or on the OPTE step. + // Be cautious and reset state if either fails. + // Note we don't need to re-ensure port manager/OPTE state + // since that's the last call we make internally. + let old_eph = inner.ephemeral_ip; + let out = inner.add_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + inner.floating_ips.retain(|v| v != ip); + } + } + + out + } + + pub async fn delete_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // Similar logic to `add_external_ip`, except here we + // need to readd the floating IP if it was removed. + // OPTE doesn't care about the order of floating IPs. + let old_eph = inner.ephemeral_ip; + let out = inner.delete_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + if !inner.floating_ips.contains(ip) { + inner.floating_ips.push(*ip); + } + } + } + + out + } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index c1b7e402a4..b66b0400e1 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -7,6 +7,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::nexus::NexusClientWithResolver; +use crate::params::InstanceExternalIpBody; use crate::params::ZoneBundleMetadata; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, @@ -434,6 +435,42 @@ impl InstanceManager { }; instance.request_zone_bundle().await } + + pub async fn add_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.add_external_ip(ip).await?; + Ok(()) + } + + pub async fn delete_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.delete_external_ip(ip).await?; + Ok(()) + } } /// Represents membership of an instance in the [`InstanceManager`]. diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 9120bafa9a..f14a13aa41 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -818,6 +818,16 @@ pub struct CleanupContextUpdate { pub storage_limit: Option, } +/// Used to dynamically update external IPs attached to an instance. +#[derive( + Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, +)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceExternalIpBody { + Ephemeral(IpAddr), + Floating(IpAddr), +} + // Our SledRole and Baseboard types do not have to be identical to the Nexus // ones, but they generally should be, and this avoids duplication. If it // becomes easier to maintain a separate copy, we should do that. diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index e5d7752511..09ffdf5dc4 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -8,9 +8,10 @@ use crate::bootstrap::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - Inventory, OmicronZonesConfig, VpcFirewallRulesEnsureBody, + DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, + InstancePutMigrationIdsBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, VpcFirewallRulesEnsureBody, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -45,6 +46,8 @@ pub fn api() -> SledApiDescription { api.register(instance_put_state)?; api.register(instance_register)?; api.register(instance_unregister)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_poke_post)?; api.register(disk_put)?; api.register(disk_poke_post)?; @@ -152,6 +155,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + #[endpoint { method = POST, path = "/instances/{instance_id}/poke", diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 8a76bf6abc..56cfaf57c8 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -12,9 +12,10 @@ use super::storage::CrucibleData; use super::storage::Storage; use crate::nexus::NexusClient; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, }; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; @@ -41,7 +42,7 @@ use propolis_client::{ }; use propolis_mock_server::Context as PropolisContext; use slog::Logger; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; @@ -69,6 +70,8 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, + /// lists of external IPs assigned to instances + pub external_ips: Mutex>>, config: Config, fake_zones: Mutex, instance_ensure_state_error: Mutex>, @@ -162,6 +165,7 @@ impl SledAgent { nexus_client, disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), + external_ips: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { @@ -627,6 +631,58 @@ impl SledAgent { Ok(()) } + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + // High-level behaviour: this should always succeed UNLESS + // trying to add a double ephemeral. + if let InstanceExternalIpBody::Ephemeral(curr_ip) = &body_args { + if my_eips.iter().any(|v| { + if let InstanceExternalIpBody::Ephemeral(other_ip) = v { + curr_ip != other_ip + } else { + false + } + }) { + return Err(Error::invalid_request("cannot replace existing ephemeral IP without explicit removal")); + } + } + + my_eips.insert(*body_args); + + Ok(()) + } + + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + my_eips.remove(&body_args); + + Ok(()) + } + /// Used for integration tests that require a component to talk to a /// mocked propolis-server API. // TODO: fix schemas so propolis-server's port isn't hardcoded in nexus diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 71fe3584f0..eaf354db26 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,10 +16,11 @@ use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, - TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, + ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_monitor::UnderlayAccess; @@ -979,6 +980,37 @@ impl SledAgent { .map_err(|e| Error::Instance(e)) } + /// Idempotently ensures that an instance's OPTE/port state includes the + /// specified external IP address. + /// + /// This method will return an error when trying to register an ephemeral IP which + /// does not match the current ephemeral IP. + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .add_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + + /// Idempotently ensures that an instance's OPTE/port state does not include the + /// specified external IP address in either its ephemeral or floating IP set. + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .delete_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + /// Idempotently ensures that the given virtual disk is attached (or not) as /// specified. ///