diff --git a/Cargo.lock b/Cargo.lock index 664facf5e2..f4d2ba1984 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -49,9 +49,9 @@ dependencies = [ [[package]] name = "ansi_term" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" dependencies = [ "winapi", ] @@ -269,9 +269,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.7.1" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538" +checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" [[package]] name = "byte-tools" @@ -296,9 +296,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.71" +version = "1.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" +checksum = "22a9137b95ea06864e018375b72adfb7db6e6f68cfc8df5a04d00288050485ee" [[package]] name = "cfg-if" @@ -331,9 +331,9 @@ dependencies = [ [[package]] name = "clap" -version = "2.33.3" +version = "2.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" dependencies = [ "ansi_term", "atty", @@ -1126,9 +1126,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c06815895acec637cd6ed6e9662c935b866d20a106f8361892893a7d9234964" +checksum = "7fd819562fcebdac5afc5c113c3ec36f902840b70fd4fc458799c8ce4607ae55" dependencies = [ "bytes", "fnv", @@ -1222,9 +1222,9 @@ dependencies = [ [[package]] name = "http-body" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "399c583b2979440c60be0821a6199eca73bc3c8dcd9d070d75ac726e2c6186e5" +checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" dependencies = [ "bytes", "http", @@ -1239,9 +1239,9 @@ checksum = "acd94fdbe1d4ff688b67b04eee2e17bd50995534a61539e45adfefb45e5e5503" [[package]] name = "httpdate" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6456b8a6c8f33fee7d958fcd1b60d55b11940a79e63ae87013e6d22e26034440" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "hyper" @@ -1955,9 +1955,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.36" +version = "0.10.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d9facdb76fec0b73c406f125d44d86fdad818d66fef0531eec9233ca425ff4a" +checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95" dependencies = [ "bitflags", "cfg-if", @@ -1975,9 +1975,9 @@ checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a" [[package]] name = "openssl-sys" -version = "0.9.67" +version = "0.9.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69df2d8dfc6ce3aaf44b40dec6f487d5a886516cf6879c49e98e0710f310a058" +checksum = "7df13d165e607909b363a4757a6f133f8a818a74e9d3a98d09c6128e15fa4c73" dependencies = [ "autocfg", "cc", @@ -2340,9 +2340,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.20" +version = "0.3.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb" +checksum = "12295df4f294471248581bc09bef3c38a5e46f1e36d6a37353621a0c6c357e1f" [[package]] name = "polar-core" @@ -2406,9 +2406,9 @@ dependencies = [ [[package]] name = "ppv-lite86" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3ca011bd0129ff4ae15cd04c4eef202cadf6c51c21e47aba319b4e0501db741" +checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" [[package]] name = "pq-sys" @@ -2505,7 +2505,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/progenitor#f53d122406fa27f48728330b20c96147881bb296" +source = "git+https://github.com/oxidecomputer/progenitor#66b41ba301793b8d720770b2210bee8884446d3f" dependencies = [ "anyhow", "getopts", @@ -2519,7 +2519,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/progenitor#f53d122406fa27f48728330b20c96147881bb296" +source = "git+https://github.com/oxidecomputer/progenitor#66b41ba301793b8d720770b2210bee8884446d3f" dependencies = [ "anyhow", "convert_case", @@ -2540,7 +2540,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/progenitor#f53d122406fa27f48728330b20c96147881bb296" +source = "git+https://github.com/oxidecomputer/progenitor#66b41ba301793b8d720770b2210bee8884446d3f" dependencies = [ "openapiv3 1.0.0-beta.5", "proc-macro2", @@ -2892,9 +2892,9 @@ checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088" [[package]] name = "ryu" -version = "1.0.5" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "3c9613b5a66ab9ba26415184cfc41156594925a9cf3a2057e57f31ff145f6568" [[package]] name = "same-file" @@ -3136,9 +3136,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98c1fcca18d55d1763e1c16873c4bde0ac3ef75179a28c7b372917e0494625be" +checksum = "12e47be9471c72889ebafb5e14d5ff930d89ae7a67bbdb5f8abb564f845a927e" dependencies = [ "darling", "proc-macro2", @@ -3666,9 +3666,9 @@ dependencies = [ [[package]] name = "termtree" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78fbf2dd23e79c28ccfa2472d3e6b3b189866ffef1aeb91f17c2d968b6586378" +checksum = "13a4ec180a2de59b57434704ccfad967f789b12737738798fa08798cd5824c16" [[package]] name = "textwrap" @@ -3779,9 +3779,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83b2a3d4d9091d0abd7eba4dc2710b1718583bd4d8992e2190720ea38f391f7" +checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2" dependencies = [ "tinyvec_macros", ] @@ -4015,7 +4015,7 @@ checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" [[package]] name = "typify" version = "0.0.6-dev" -source = "git+https://github.com/oxidecomputer/typify#58bfcd02a2cd74bff047e9e8ad6e4f2b4f84f3af" +source = "git+https://github.com/oxidecomputer/typify#5132e748f91311aadd56011b97f51c4f32373985" dependencies = [ "typify-impl", "typify-macro", @@ -4024,7 +4024,7 @@ dependencies = [ [[package]] name = "typify-impl" version = "0.0.6-dev" -source = "git+https://github.com/oxidecomputer/typify#58bfcd02a2cd74bff047e9e8ad6e4f2b4f84f3af" +source = "git+https://github.com/oxidecomputer/typify#5132e748f91311aadd56011b97f51c4f32373985" dependencies = [ "convert_case", "proc-macro2", @@ -4039,7 +4039,7 @@ dependencies = [ [[package]] name = "typify-macro" version = "0.0.6-dev" -source = "git+https://github.com/oxidecomputer/typify#58bfcd02a2cd74bff047e9e8ad6e4f2b4f84f3af" +source = "git+https://github.com/oxidecomputer/typify#5132e748f91311aadd56011b97f51c4f32373985" dependencies = [ "proc-macro2", "quote", diff --git a/sled-agent/README.adoc b/sled-agent/README.adoc index 36321876a1..b997c1e966 100644 --- a/sled-agent/README.adoc +++ b/sled-agent/README.adoc @@ -17,6 +17,8 @@ between the two implementations. == Code Tour * `src/bin`: Contains binaries for the sled agent (and simulated sled agent). +* `src/bootstrap`: Contains bootstrap-related services, operating on a distinct + HTTP endpoint from typical sled operation. * `src/common`: Shared state machine code between the simulated and real sled agent. * `src/sim`: Library code responsible for operating a simulated sled agent. * `src/illumos`: Illumos-specific helpers for accessing OS utilities to manage a sled. @@ -25,6 +27,12 @@ Additionally, there are some noteworthy top-level files used by the sled agent: * `src/instance_manager.rs`: Manages multiple instances on a sled. * `src/instance.rs`: Manages a single instance. +* `src/storage_manager.rs`: Manages storage within a sled. + +As well as some utilities: + +* `src/running_zone.rs`: RAII wrapper around a running Zone owned by the Sled Agent. +* `src/vnic.rs`: RAII wrapper around VNICs owned by the Sled Agent. == Life of an Instance @@ -39,8 +47,8 @@ following steps on initialization to manage OS-local resources. . ... creates a new "base zone", which contains the necessary pieces to execute a Propolis server, and as little else as possible. This base zone is derived from the "sparse" zone template. -. ... identifies all Oxide-controlled zones (with the prefix `propolis_instance_`) -and all Oxide-controlled VNICs (with the prefix `vnic_propolis`), which are +. ... identifies all Oxide-controlled zones (with the prefix `oxz_`) +and all Oxide-controlled VNICs (with the prefix `ox_vnic_`), which are removed from the machine. .To allocate an instance on the Sled, the following steps occur: diff --git a/sled-agent/src/bin/sled-agent.rs b/sled-agent/src/bin/sled-agent.rs index d56d6287f6..fcaa169989 100644 --- a/sled-agent/src/bin/sled-agent.rs +++ b/sled-agent/src/bin/sled-agent.rs @@ -54,20 +54,29 @@ enum Args { }, /// Runs the Sled Agent server. Run { + /// UUID of the Sled Agent. #[structopt(name = "SA_UUID", parse(try_from_str))] uuid: Uuid, + /// Socket address of the bootstrap agent. #[structopt(name = "BA_IP:PORT", parse(try_from_str))] bootstrap_agent_addr: SocketAddr, + /// Socket address of the sled agent. #[structopt(name = "SA_IP:PORT", parse(try_from_str))] sled_agent_addr: SocketAddr, + /// Socket address of Nexus. #[structopt(name = "NEXUS_IP:PORT", parse(try_from_str))] nexus_addr: SocketAddr, + /// Optional VLAN, tagged on all guest NICs. #[structopt(long = "vlan")] vlan: Option, + + /// Optional list of zpools managed by Sled agent. + #[structopt(long = "zpools", name = "zpools", parse(try_from_str))] + zpools: Option>, }, } @@ -98,6 +107,7 @@ async fn do_run() -> Result<(), CmdError> { sled_agent_addr, nexus_addr, vlan, + zpools, } => { // Configure and run the Bootstrap server. let config = BootstrapConfig { @@ -127,6 +137,7 @@ async fn do_run() -> Result<(), CmdError> { level: ConfigLoggingLevel::Info, }, vlan, + zpools, }; let sled_server = sled_server::Server::start(&config) diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index a2e7934cab..1cc26bdb5d 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -23,4 +23,6 @@ pub struct Config { pub log: ConfigLogging, /// Optional VLAN ID to be used for tagging guest VNICs. pub vlan: Option, + /// Optional list of zpools to be used as "discovered disks". + pub zpools: Option>, } diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 718dc7b89a..d7b433118c 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -12,6 +12,7 @@ use dropshot::HttpResponseOk; use dropshot::Path; use dropshot::RequestContext; use dropshot::TypedBody; +use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::sled_agent::InstanceEnsureBody; @@ -59,7 +60,8 @@ async fn instance_put( let body_args = body.into_inner(); Ok(HttpResponseOk( sa.instance_ensure(instance_id, body_args.initial, body_args.target) - .await?, + .await + .map_err(|e| Error::from(e))?, )) } @@ -87,6 +89,7 @@ async fn disk_put( body_args.initial_runtime.clone(), body_args.target.clone(), ) - .await?, + .await + .map_err(|e| Error::from(e))?, )) } diff --git a/sled-agent/src/illumos/dladm.rs b/sled-agent/src/illumos/dladm.rs index 1b89f1821c..62db17600c 100644 --- a/sled-agent/src/illumos/dladm.rs +++ b/sled-agent/src/illumos/dladm.rs @@ -5,13 +5,27 @@ //! Utilities for poking at data links. use crate::common::vlan::VlanID; -use crate::illumos::{execute, PFEXEC}; -use omicron_common::api::external::Error; +use crate::illumos::{execute, ExecutionError, PFEXEC}; use omicron_common::api::external::MacAddr; -pub const VNIC_PREFIX: &str = "vnic_propolis"; +pub const VNIC_PREFIX: &str = "ox_vnic_"; +pub const VNIC_PREFIX_GUEST: &str = "ox_vnic_guest"; +pub const VNIC_PREFIX_CONTROL: &str = "ox_vnic_control"; + pub const DLADM: &str = "/usr/sbin/dladm"; +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Device not found")] + NotFound, + + #[error("Subcommand failure: {0}")] + Execution(#[from] ExecutionError), + + #[error("Failed to parse output: {0}")] + Parse(#[from] std::string::FromUtf8Error), +} + /// The name of a physical datalink. #[derive(Debug, Clone)] pub struct PhysicalLink(pub String); @@ -26,22 +40,14 @@ impl Dladm { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[DLADM, "show-phys", "-p", "-o", "LINK"]); let output = execute(cmd)?; - let name = String::from_utf8(output.stdout) - .map_err(|e| Error::InternalError { - internal_message: format!( - "Cannot parse dladm output as UTF-8: {}", - e - ), - })? + let name = String::from_utf8(output.stdout)? .lines() // TODO: This is arbitrary, but we're currently grabbing the first // physical device. Should we have a more sophisticated method for // selection? .next() .map(|s| s.trim()) - .ok_or_else(|| Error::InternalError { - internal_message: "No physical devices found".to_string(), - })? + .ok_or_else(|| Error::NotFound)? .to_string(); Ok(PhysicalLink(name)) } @@ -90,13 +96,7 @@ impl Dladm { let cmd = command.args(&[DLADM, "show-vnic", "-p", "-o", "LINK"]); let output = execute(cmd)?; - let vnics = String::from_utf8(output.stdout) - .map_err(|e| Error::InternalError { - internal_message: format!( - "Failed to parse UTF-8 from dladm output: {}", - e - ), - })? + let vnics = String::from_utf8(output.stdout)? .lines() .filter(|vnic| vnic.starts_with(VNIC_PREFIX)) .map(|s| s.to_owned()) diff --git a/sled-agent/src/illumos/mod.rs b/sled-agent/src/illumos/mod.rs index e811d394b9..787ba1c116 100644 --- a/sled-agent/src/illumos/mod.rs +++ b/sled-agent/src/illumos/mod.rs @@ -8,27 +8,33 @@ pub mod dladm; pub mod svc; pub mod zfs; pub mod zone; - -use omicron_common::api::external::Error; +pub mod zpool; const PFEXEC: &str = "/usr/bin/pfexec"; +#[derive(thiserror::Error, Debug)] +pub enum ExecutionError { + #[error("Failed to start execution of process: {0}")] + ExecutionStart(std::io::Error), + + #[error( + "Command executed and failed with status: {status}. Output: {stderr}" + )] + CommandFailure { status: std::process::ExitStatus, stderr: String }, +} + // Helper function for starting the process and checking the // exit code result. fn execute( command: &mut std::process::Command, -) -> Result { - let output = command.output().map_err(|e| Error::InternalError { - internal_message: format!("Failed to execute {:?}: {}", command, e), - })?; +) -> Result { + let output = + command.output().map_err(|e| ExecutionError::ExecutionStart(e))?; if !output.status.success() { - return Err(Error::InternalError { - internal_message: format!( - "Command {:?} executed and failed: {}", - command, - String::from_utf8_lossy(&output.stderr) - ), + return Err(ExecutionError::CommandFailure { + status: output.status, + stderr: String::from_utf8_lossy(&output.stderr).to_string(), }); } diff --git a/sled-agent/src/illumos/zfs.rs b/sled-agent/src/illumos/zfs.rs index a3c138a17e..a59b9058b7 100644 --- a/sled-agent/src/illumos/zfs.rs +++ b/sled-agent/src/illumos/zfs.rs @@ -5,41 +5,63 @@ //! Utilities for poking at ZFS. use crate::illumos::{execute, PFEXEC}; -use omicron_common::api::external::Error; +use std::fmt; +use std::path::PathBuf; pub const ZONE_ZFS_DATASET_MOUNTPOINT: &str = "/zone"; pub const ZONE_ZFS_DATASET: &str = "rpool/zone"; const ZFS: &str = "/usr/sbin/zfs"; +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("ZFS execution error: {0}")] + Execution(#[from] crate::illumos::ExecutionError), + + #[error("Does not exist: {0}")] + NotFound(String), + + #[error("Unexpected output from ZFS commands: {0}")] + Output(String), + + #[error("Failed to parse output: {0}")] + Parse(#[from] std::string::FromUtf8Error), +} + /// Wraps commands for interacting with ZFS. pub struct Zfs {} +/// Describes a mountpoint for a ZFS filesystem. +pub enum Mountpoint { + Legacy, + Path(PathBuf), +} + +impl fmt::Display for Mountpoint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Mountpoint::Legacy => write!(f, "legacy"), + Mountpoint::Path(p) => write!(f, "{}", p.display()), + } + } +} + #[cfg_attr(test, mockall::automock, allow(dead_code))] impl Zfs { /// Creates a new ZFS filesystem named `name`, unless one already exists. - pub fn ensure_dataset(name: &str) -> Result<(), Error> { + pub fn ensure_filesystem( + name: &str, + mountpoint: Mountpoint, + ) -> Result<(), Error> { // If the dataset exists, we're done. let mut command = std::process::Command::new(ZFS); let cmd = command.args(&["list", "-Hpo", "name,type,mountpoint", name]); // If the list command returns any valid output, validate it. if let Ok(output) = execute(cmd) { - let stdout = String::from_utf8(output.stdout).map_err(|e| { - Error::InternalError { - internal_message: format!( - "Cannot parse 'zfs list' output as UTF-8: {}", - e - ), - } - })?; + let stdout = String::from_utf8(output.stdout)?; let values: Vec<&str> = stdout.trim().split('\t').collect(); - if values != &[name, "filesystem", ZONE_ZFS_DATASET_MOUNTPOINT] { - return Err(Error::InternalError { - internal_message: format!( - "{} exists, but has unexpected values: {:?}", - name, values - ), - }); + if values != &[name, "filesystem", &mountpoint.to_string()] { + return Err(Error::Output(stdout)); } return Ok(()); } @@ -50,10 +72,53 @@ impl Zfs { ZFS, "create", "-o", - &format!("mountpoint={}", ZONE_ZFS_DATASET_MOUNTPOINT), + &format!("mountpoint={}", mountpoint), name, ]); execute(cmd)?; Ok(()) } + + pub fn set_oxide_value( + filesystem_name: &str, + name: &str, + value: &str, + ) -> Result<(), Error> { + Zfs::set_value(filesystem_name, &format!("oxide:{}", name), value) + } + + fn set_value( + filesystem_name: &str, + name: &str, + value: &str, + ) -> Result<(), Error> { + let mut command = std::process::Command::new(PFEXEC); + let value_arg = format!("{}={}", name, value); + let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]); + execute(cmd)?; + Ok(()) + } + + pub fn get_oxide_value( + filesystem_name: &str, + name: &str, + ) -> Result { + Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) + } + + fn get_value(filesystem_name: &str, name: &str) -> Result { + let mut command = std::process::Command::new(PFEXEC); + let cmd = + command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); + let output = execute(cmd)?; + let stdout = String::from_utf8(output.stdout)?; + let value = stdout.trim(); + if value == "-" { + return Err(Error::NotFound(format!( + "Property {}, within filesystem {}", + name, filesystem_name + ))); + } + Ok(value.to_string()) + } } diff --git a/sled-agent/src/illumos/zone.rs b/sled-agent/src/illumos/zone.rs index 713b0d5820..5d47339a25 100644 --- a/sled-agent/src/illumos/zone.rs +++ b/sled-agent/src/illumos/zone.rs @@ -5,31 +5,71 @@ //! API for interacting with Zones running Propolis. use ipnetwork::IpNetwork; -use omicron_common::api::external::Error; use slog::Logger; use std::net::SocketAddr; use uuid::Uuid; +use crate::illumos::dladm::VNIC_PREFIX_CONTROL; use crate::illumos::zfs::ZONE_ZFS_DATASET_MOUNTPOINT; use crate::illumos::{execute, PFEXEC}; -const BASE_ZONE: &str = "propolis_base"; +const PROPOLIS_BASE_ZONE: &str = "oxz_propolis_base"; +const STORAGE_BASE_ZONE: &str = "oxz_storage_base"; const PROPOLIS_SVC_DIRECTORY: &str = "/opt/oxide/propolis-server"; +pub const CRUCIBLE_SVC_DIRECTORY: &str = "/opt/oxide/crucible-agent"; +pub const COCKROACH_SVC_DIRECTORY: &str = "/opt/oxide/cockroachdb"; +const DLADM: &str = "/usr/sbin/dladm"; const IPADM: &str = "/usr/sbin/ipadm"; const SVCADM: &str = "/usr/sbin/svcadm"; const SVCCFG: &str = "/usr/sbin/svccfg"; const ZLOGIN: &str = "/usr/sbin/zlogin"; -pub const ZONE_PREFIX: &str = "propolis_instance_"; +// TODO: These could become enums +pub const ZONE_PREFIX: &str = "oxz_"; +pub const PROPOLIS_ZONE_PREFIX: &str = "oxz_propolis_instance_"; +pub const CRUCIBLE_ZONE_PREFIX: &str = "oxz_crucible_instance_"; +pub const COCKROACH_ZONE_PREFIX: &str = "oxz_cockroach_instance_"; -fn get_zone(name: &str) -> Result, Error> { - Ok(zone::Adm::list() - .map_err(|e| Error::InternalError { - internal_message: format!("Cannot list zones: {}", e), - })? - .into_iter() - .find(|zone| zone.name() == name)) +#[derive(thiserror::Error, Debug)] +pub enum Error { + // TODO: These could be grouped into an "operation" error with an enum + // variant, if we want... + #[error("Cannot halt zone: {0}")] + Halt(zone::ZoneError), + + #[error("Cannot uninstall zone: {0}")] + Uninstall(zone::ZoneError), + + #[error("Cannot delete zone: {0}")] + Delete(zone::ZoneError), + + #[error("Cannot install zone: {0}")] + Install(zone::ZoneError), + + #[error("Cannot configure zone: {0}")] + Configure(zone::ZoneError), + + #[error("Cannot clone zone: {0}")] + Clone(zone::ZoneError), + + #[error("Cannot boot zone: {0}")] + Boot(zone::ZoneError), + + #[error("Cannot list zones: {0}")] + List(zone::ZoneError), + + #[error("Zone execution error: {0}")] + Execution(#[from] crate::illumos::ExecutionError), + + #[error("Failed to parse output: {0}")] + Parse(#[from] std::string::FromUtf8Error), + + #[error("Error accessing filesystem: {0}")] + Filesystem(std::io::Error), + + #[error("Value not found")] + NotFound, } /// Wraps commands for interacting with Zones. @@ -39,31 +79,18 @@ pub struct Zones {} impl Zones { /// Ensures a zone is halted before both uninstalling and deleting it. pub fn halt_and_remove(log: &Logger, name: &str) -> Result<(), Error> { - if let Some(zone) = get_zone(name)? { + if let Some(zone) = Self::find(name)? { info!(log, "halt_and_remove: Zone state: {:?}", zone.state()); if zone.state() == zone::State::Running { - zone::Adm::new(name).halt().map_err(|e| { - Error::InternalError { - internal_message: format!( - "Cannot halt zone {}: {}", - name, e - ), - } - })?; + zone::Adm::new(name).halt().map_err(Error::Halt)?; } - zone::Adm::new(name).uninstall(/* force= */ true).map_err(|e| { - Error::InternalError { - internal_message: format!( - "Cannot uninstall {}: {}", - name, e - ), - } - })?; - zone::Config::new(name).delete(/* force= */ true).run().map_err( - |e| Error::InternalError { - internal_message: format!("Cannot delete {}: {}", name, e), - }, - )?; + zone::Adm::new(name) + .uninstall(/* force= */ true) + .map_err(Error::Uninstall)?; + zone::Config::new(name) + .delete(/* force= */ true) + .run() + .map_err(Error::Delete)?; } Ok(()) } @@ -96,9 +123,7 @@ impl Zones { log: &Logger, mountpoint: &std::path::Path, ) -> Result<(), Error> { - let tmpdir = tempfile::tempdir().map_err(|e| { - Error::internal_error(&format!("Tempdir err: {}", e)) - })?; + let tmpdir = tempfile::tempdir().map_err(Error::Filesystem)?; let mountpoint = mountpoint.to_str().unwrap(); let repo = format!("{}/repo.db", tmpdir.as_ref().to_string_lossy()); @@ -106,12 +131,7 @@ impl Zones { let manifests = format!("{}/lib/svc/manifest", mountpoint); let installto = format!("{}/etc/svc/repository.db", mountpoint); - std::fs::copy(&seed, &repo).map_err(|e| { - Error::internal_error(&format!( - "Cannot copy seed DB to tempdir: {}", - e - )) - })?; + std::fs::copy(&seed, &repo).map_err(Error::Filesystem)?; let mut env = std::collections::HashMap::new(); let dtd = "/usr/share/lib/xml/dtd/service_bundle.dtd.1".to_string(); @@ -126,20 +146,19 @@ impl Zones { execute(command)?; info!(log, "Seeding SMF repository at {} - Complete", mountpoint); - std::fs::copy(&repo, &installto).map_err(|e| { - Error::internal_error(&format!("Cannot copy SMF DB: {}", e)) - })?; + std::fs::copy(&repo, &installto).map_err(Error::Filesystem)?; Ok(()) } - /// Creates a "base" zone for Propolis, from which other Propolis - /// zones may quickly be cloned. - pub fn create_base(log: &Logger) -> Result<(), Error> { - let name = BASE_ZONE; - + fn create_base( + name: &str, + log: &Logger, + filesystems: &[zone::Fs], + devices: &[zone::Device], + ) -> Result<(), Error> { info!(log, "Querying for prescence of zone: {}", name); - if let Some(zone) = get_zone(name)? { + if let Some(zone) = Self::find(name)? { info!( log, "Found zone: {} in state {:?}", @@ -171,23 +190,17 @@ impl Zones { .set_path(&path) .set_autoboot(false) .set_ip_type(zone::IpType::Exclusive); - cfg.add_fs(&zone::Fs { - ty: "lofs".to_string(), - dir: PROPOLIS_SVC_DIRECTORY.to_string(), - special: PROPOLIS_SVC_DIRECTORY.to_string(), - options: vec!["ro".to_string()], - ..Default::default() - }); - cfg.run().map_err(|e| Error::InternalError { - internal_message: format!("Failed to create base zone: {}", e), - })?; + for fs in filesystems { + cfg.add_fs(&fs); + } + for device in devices { + cfg.add_device(device); + } + cfg.run().map_err(Error::Configure)?; + // TODO: This process takes a little while... Consider optimizing. info!(log, "Installing base zone: {}", name); - zone::Adm::new(name).install(&[]).map_err(|e| { - Error::InternalError { - internal_message: format!("Failed to install base zone: {}", e), - } - })?; + zone::Adm::new(name).install(&[]).map_err(Error::Install)?; info!(log, "Seeding base zone: {}", name); let root = format!("{}/{}", path, "root"); @@ -196,15 +209,42 @@ impl Zones { Ok(()) } - /// Sets the configuration for a Propolis zone. - /// - /// This zone will be cloned as a child of the "base propolis zone". - pub fn configure_child_zone( + /// Creates a "base" zone for Propolis, from which other Propolis + /// zones may quickly be cloned. + pub fn create_propolis_base(log: &Logger) -> Result<(), Error> { + Zones::create_base( + PROPOLIS_BASE_ZONE, + log, + &[zone::Fs { + ty: "lofs".to_string(), + dir: PROPOLIS_SVC_DIRECTORY.to_string(), + special: PROPOLIS_SVC_DIRECTORY.to_string(), + options: vec!["ro".to_string()], + ..Default::default() + }], + &[ + zone::Device { name: "/dev/vmm/*".to_string() }, + zone::Device { name: "/dev/vmmctl".to_string() }, + zone::Device { name: "/dev/viona".to_string() }, + ], + ) + } + + /// Creates a "base" zone for storage services, from which other + /// zones may quickly be cloned. + pub fn create_storage_base(log: &Logger) -> Result<(), Error> { + Zones::create_base(STORAGE_BASE_ZONE, log, &[], &[]) + } + + /// Sets the configuration for a zone. + pub fn configure_zone( log: &Logger, name: &str, + filesystems: &[zone::Fs], + devices: &[zone::Device], vnics: Vec, ) -> Result<(), Error> { - info!(log, "Creating child zone: {}", name); + info!(log, "Configuring zone: {}", name); let mut cfg = zone::Config::create( name, /* overwrite= */ true, @@ -215,76 +255,144 @@ impl Zones { .set_path(format!("{}/{}", ZONE_ZFS_DATASET_MOUNTPOINT, name)) .set_autoboot(false) .set_ip_type(zone::IpType::Exclusive); - cfg.add_fs(&zone::Fs { - ty: "lofs".to_string(), - dir: PROPOLIS_SVC_DIRECTORY.to_string(), - special: PROPOLIS_SVC_DIRECTORY.to_string(), - options: vec!["ro".to_string()], - ..Default::default() - }); + for fs in filesystems { + cfg.add_fs(&fs); + } + for device in devices { + cfg.add_device(device); + } for vnic in &vnics { cfg.add_net(&zone::Net { physical: vnic.to_string(), ..Default::default() }); } - cfg.add_device(&zone::Device { name: "/dev/vmm/*".to_string() }); - cfg.add_device(&zone::Device { name: "/dev/vmmctl".to_string() }); - cfg.add_device(&zone::Device { name: "/dev/viona".to_string() }); - cfg.run().map_err(|e| Error::InternalError { - internal_message: format!("Failed to create child zone: {}", e), - })?; - + cfg.run().map_err(Error::Configure)?; Ok(()) } + /// Sets the configuration for a Propolis zone. + /// + /// This zone will be cloned as a child of the "base propolis zone". + pub fn configure_propolis_zone( + log: &Logger, + name: &str, + vnics: Vec, + ) -> Result<(), Error> { + Zones::configure_zone( + log, + name, + &[zone::Fs { + ty: "lofs".to_string(), + dir: PROPOLIS_SVC_DIRECTORY.to_string(), + special: PROPOLIS_SVC_DIRECTORY.to_string(), + options: vec!["ro".to_string()], + ..Default::default() + }], + &[ + zone::Device { name: "/dev/vmm/*".to_string() }, + zone::Device { name: "/dev/vmmctl".to_string() }, + zone::Device { name: "/dev/viona".to_string() }, + ], + vnics, + ) + } + /// Clones a zone (named `name`) from the base Propolis zone. - pub fn clone_from_base(name: &str) -> Result<(), Error> { - zone::Adm::new(name).clone(BASE_ZONE).map_err(|e| { - Error::InternalError { - internal_message: format!("Failed to clone zone: {}", e), - } - })?; + fn clone_from_base(name: &str, base: &str) -> Result<(), Error> { + zone::Adm::new(name).clone(base).map_err(Error::Clone)?; Ok(()) } + /// Clones a zone (named `name`) from the base Propolis zone. + pub fn clone_from_base_propolis(name: &str) -> Result<(), Error> { + Zones::clone_from_base(name, PROPOLIS_BASE_ZONE) + } + + /// Clones a zone (named `name`) from the base Crucible zone. + pub fn clone_from_base_storage(name: &str) -> Result<(), Error> { + Zones::clone_from_base(name, STORAGE_BASE_ZONE) + } + /// Boots a zone (named `name`). pub fn boot(name: &str) -> Result<(), Error> { - zone::Adm::new(name).boot().map_err(|e| Error::InternalError { - internal_message: format!("Failed to boot zone: {}", e), - })?; + zone::Adm::new(name).boot().map_err(Error::Boot)?; Ok(()) } /// Returns all zones that may be managed by the Sled Agent. + /// + /// These zones must have names starting with [`ZONE_PREFIX`]. pub fn get() -> Result, Error> { Ok(zone::Adm::list() - .map_err(|e| Error::InternalError { - internal_message: format!("Failed to list zones: {}", e), - })? + .map_err(Error::List)? .into_iter() .filter(|z| z.name().starts_with(ZONE_PREFIX)) .collect()) } - /// Creates an IP address within a Zone. - pub fn create_address( - zone: &str, - interface: &str, - ) -> Result { + /// Identical to [`Self::get`], but filters out "base" zones. + pub fn get_non_base_zones() -> Result, Error> { + Self::get().map(|zones| { + zones + .into_iter() + .filter(|z| match z.name() { + PROPOLIS_BASE_ZONE | STORAGE_BASE_ZONE => false, + _ => true, + }) + .collect() + }) + } + + /// Finds a zone with a specified name. + /// + /// Can only return zones that start with [`ZONE_PREFIX`], as they + /// are managed by the Sled Agent. + pub fn find(name: &str) -> Result, Error> { + Ok(Self::get()?.into_iter().find(|zone| zone.name() == name)) + } + + /// Returns the name of the VNIC used to communicate with the control plane. + pub fn get_control_interface(zone: &str) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, zone, - IPADM, - "create-addr", - "-t", - "-T", - "dhcp", - interface, + DLADM, + "show-vnic", + "-p", + "-o", + "LINK", ]); - execute(cmd)?; + let output = execute(cmd)?; + String::from_utf8(output.stdout) + .map_err(Error::Parse)? + .lines() + .find_map(|name| { + if name.starts_with(VNIC_PREFIX_CONTROL) { + Some(name.to_string()) + } else { + None + } + }) + .ok_or(Error::NotFound) + } + /// Gets the address if one exists, creates one if one does not exist. + pub fn ensure_address( + zone: &str, + addrobj: &str, + ) -> Result { + match Zones::get_address(zone, addrobj) { + Ok(addr) => Ok(addr), + Err(_) => Zones::create_address(zone, addrobj), + } + } + + /// Gets the IP address of an interface within a Zone. + /// + /// TODO: Use types to distinguish "addrobj" from "interface" objects. + pub fn get_address(zone: &str, addrobj: &str) -> Result { let mut command = std::process::Command::new(PFEXEC); let cmd = command.args(&[ ZLOGIN, @@ -294,24 +402,33 @@ impl Zones { "-p", "-o", "ADDR", - interface, + addrobj, ]); let output = execute(cmd)?; - String::from_utf8(output.stdout) - .map_err(|e| Error::InternalError { - internal_message: format!( - "Cannot parse ipadm output as UTF-8: {}", - e - ), - })? + String::from_utf8(output.stdout)? .lines() .find_map(|s| s.parse().ok()) - .ok_or(Error::InternalError { - internal_message: format!( - "Cannot find a valid IP address on {}", - interface - ), - }) + .ok_or(Error::NotFound) + } + + /// Creates an IP address within a Zone. + pub fn create_address( + zone: &str, + addrobj: &str, + ) -> Result { + let mut command = std::process::Command::new(PFEXEC); + let cmd = command.args(&[ + ZLOGIN, + zone, + IPADM, + "create-addr", + "-t", + "-T", + "dhcp", + addrobj, + ]); + execute(cmd)?; + Self::get_address(zone, addrobj) } /// Configures and initializes a Propolis server within the specified Zone. diff --git a/sled-agent/src/illumos/zpool.rs b/sled-agent/src/illumos/zpool.rs new file mode 100644 index 0000000000..4e9d3dadb5 --- /dev/null +++ b/sled-agent/src/illumos/zpool.rs @@ -0,0 +1,216 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for managing Zpools. + +use crate::illumos::execute; +use omicron_common::api::external::Error as ExternalError; +use std::str::FromStr; + +const ZPOOL: &str = "/usr/sbin/zpool"; + +#[derive(thiserror::Error, Debug, PartialEq, Eq)] +pub enum ParseError { + #[error("Failed to parse output as UTF-8: {0}")] + Utf8(#[from] std::string::FromUtf8Error), + + #[error("Failed to parse output: {0}")] + Parse(String), +} + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Zpool execution error: {0}")] + Execution(#[from] crate::illumos::ExecutionError), + + #[error(transparent)] + Parse(#[from] ParseError), + + #[error("Failed to execute subcommand: {0}")] + Command(ExternalError), +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum ZpoolHealth { + /// The device is online and functioning. + Online, + /// One or more components are degraded or faulted, but sufficient + /// replicas exist to continue functioning. + Degraded, + /// One or more components are degraded or faulted, and insufficient + /// replicas exist to continue functioning. + Faulted, + /// The device was explicitly taken offline by "zpool offline". + Offline, + /// The device was physically removed. + Removed, + /// The device could not be opened. + Unavailable, +} + +impl FromStr for ZpoolHealth { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + match s { + "ONLINE" => Ok(ZpoolHealth::Online), + "DEGRADED" => Ok(ZpoolHealth::Degraded), + "FAULTED" => Ok(ZpoolHealth::Faulted), + "OFFLINE" => Ok(ZpoolHealth::Offline), + "REMOVED" => Ok(ZpoolHealth::Removed), + "UNAVAIL" => Ok(ZpoolHealth::Unavailable), + _ => Err(ParseError::Parse(format!( + "Unrecognized zpool 'health': {}", + s + ))), + } + } +} + +/// Describes a Zpool. +#[derive(Clone, Debug)] +pub struct ZpoolInfo { + name: String, + size: u64, + allocated: u64, + free: u64, + health: ZpoolHealth, +} + +impl ZpoolInfo { + pub fn name(&self) -> &str { + &self.name + } + + pub fn size(&self) -> u64 { + self.size + } + + #[allow(dead_code)] + pub fn allocated(&self) -> u64 { + self.allocated + } + + #[allow(dead_code)] + pub fn free(&self) -> u64 { + self.free + } + + #[allow(dead_code)] + pub fn health(&self) -> ZpoolHealth { + self.health + } +} + +impl FromStr for ZpoolInfo { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + // Lambda helpers for error handling. + let expected_field = |name| { + ParseError::Parse(format!( + "Missing '{}' value in zpool list output", + name + )) + }; + let failed_to_parse = |name, err| { + ParseError::Parse(format!( + "Failed to parse field '{}': {}", + name, err + )) + }; + + let mut values = s.trim().split_whitespace(); + let name = + values.next().ok_or_else(|| expected_field("name"))?.to_string(); + let size = values + .next() + .ok_or_else(|| expected_field("size"))? + .parse::() + .map_err(|e| failed_to_parse("size", e))?; + let allocated = values + .next() + .ok_or_else(|| expected_field("allocated"))? + .parse::() + .map_err(|e| failed_to_parse("allocated", e))?; + let free = values + .next() + .ok_or_else(|| expected_field("free"))? + .parse::() + .map_err(|e| failed_to_parse("free", e))?; + let health = values + .next() + .ok_or_else(|| expected_field("health"))? + .parse::()?; + + Ok(ZpoolInfo { name, size, allocated, free, health }) + } +} + +/// Wraps commands for interacting with ZFS pools. +pub struct Zpool {} + +#[cfg_attr(test, mockall::automock, allow(dead_code))] +impl Zpool { + pub fn get_info(name: &str) -> Result { + let mut command = std::process::Command::new(ZPOOL); + let cmd = command.args(&[ + "list", + "-Hpo", + "name,size,allocated,free,health", + name, + ]); + + let output = execute(cmd)?; + let stdout = String::from_utf8(output.stdout) + .map_err(|e| ParseError::Utf8(e))?; + + let zpool = stdout.parse::()?; + Ok(zpool) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_parse_zpool() { + let name = "rpool"; + let size = 10000; + let allocated = 6000; + let free = 4000; + let health = "ONLINE"; + + // We should be able to tolerate any whitespace between columns. + let input = format!( + "{} {} {} \t\t\t {} {}", + name, size, allocated, free, health + ); + let output: ZpoolInfo = input.parse().unwrap(); + assert_eq!(output.name(), name); + assert_eq!(output.size(), size); + assert_eq!(output.allocated(), allocated); + assert_eq!(output.free(), free); + assert_eq!(output.health(), ZpoolHealth::Online); + } + + #[test] + fn test_parse_zpool_missing_column() { + let name = "rpool"; + let size = 10000; + let allocated = 6000; + let free = 4000; + let _health = "ONLINE"; + + // Similar to the prior test case, just omit "health". + let input = format!("{} {} {} {}", name, size, allocated, free); + let result: Result = input.parse(); + + let expected_err = ParseError::Parse( + "Missing 'health' value in zpool list output".to_owned(), + ); + assert_eq!(result.unwrap_err(), expected_err,); + } +} diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 0a7b240c51..26a54f67c7 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -9,14 +9,10 @@ use crate::common::{ vlan::VlanID, }; use crate::illumos::svc::wait_for_service; -use crate::illumos::{ - dladm::{PhysicalLink, VNIC_PREFIX}, - zone::ZONE_PREFIX, -}; -use crate::instance_manager::{IdAllocator, InstanceTicket}; +use crate::illumos::zone::PROPOLIS_ZONE_PREFIX; +use crate::instance_manager::InstanceTicket; +use crate::vnic::{interface_name, IdAllocator, Vnic}; use futures::lock::Mutex; -use omicron_common::api::external::Error; -use omicron_common::api::external::MacAddr; use omicron_common::api::external::NetworkInterface; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::sled_agent::InstanceHardware; @@ -39,6 +35,29 @@ use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] use nexus_client::Client as NexusClient; +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Failed to wait for service: {0}")] + Timeout(String), + + #[error("Failure accessing data links: {0}")] + Datalink(#[from] crate::illumos::dladm::Error), + + #[error("Error accessing zones: {0}")] + Zone(#[from] crate::illumos::zone::Error), + + #[error("Failure from Propolis Client: {0}")] + Propolis(#[from] propolis_client::Error), + + // TODO: Remove this error; prefer to retry notifications. + #[error("Notifying Nexus failed: {0}")] + Notification(anyhow::Error), + + // TODO: This error type could become more specific + #[error("Error performing a state transition: {0}")] + Transition(omicron_common::api::external::Error), +} + // Issues read-only, idempotent HTTP requests at propolis until it responds with // an acknowledgement. This provides a hacky mechanism to "wait until the HTTP // server is serving requests". @@ -77,9 +96,7 @@ async fn wait_for_http_server( log_notification_failure, ) .await - .map_err(|e| Error::InternalError { - internal_message: format!("Failed to wait for HTTP server: {}", e), - }) + .map_err(|_| Error::Timeout("Propolis".to_string())) } fn service_name() -> &'static str { @@ -94,20 +111,8 @@ fn fmri_name(id: &Uuid) -> String { format!("{}:{}", service_name(), instance_name(id)) } -fn zone_name(id: &Uuid) -> String { - format!("{}{}", ZONE_PREFIX, id) -} - -fn vnic_name(id: u64) -> String { - format!("{}{}", VNIC_PREFIX, id) -} - -fn guest_vnic_name(id: u64) -> String { - format!("{}_guest{}", VNIC_PREFIX, id) -} - -fn interface_name(vnic_name: &str) -> String { - format!("{}/omicron", vnic_name) +fn propolis_zone_name(id: &Uuid) -> String { + format!("{}{}", PROPOLIS_ZONE_PREFIX, id) } // Action to be taken by the Sled Agent after monitoring Propolis for @@ -149,60 +154,6 @@ impl Drop for RunningState { } } -/// Represents an allocated VNIC on the system. -/// The VNIC is de-allocated when it goes out of scope. -/// -/// Note that the "ownership" of the VNIC is based on convention; -/// another process in the global zone could also modify / destroy -/// the VNIC while this object is alive. -#[derive(Debug)] -struct Vnic { - name: String, - deleted: bool, -} - -impl Vnic { - // Creates a new NIC, intended for usage by the guest. - fn new_guest( - allocator: &IdAllocator, - physical_dl: &PhysicalLink, - mac: Option, - vlan: Option, - ) -> Result { - let name = guest_vnic_name(allocator.next()); - Dladm::create_vnic(physical_dl, &name, mac, vlan)?; - Ok(Vnic { name, deleted: false }) - } - - // Creates a new NIC, intended for allowing Propolis to communicate - // with the control plane. - fn new_control( - allocator: &IdAllocator, - physical_dl: &PhysicalLink, - mac: Option, - ) -> Result { - let name = vnic_name(allocator.next()); - Dladm::create_vnic(physical_dl, &name, mac, None)?; - Ok(Vnic { name, deleted: false }) - } - - // Deletes a NIC (if it has not already been deleted). - fn delete(&mut self) -> Result<(), Error> { - if self.deleted { - Ok(()) - } else { - self.deleted = true; - Dladm::delete_vnic(&self.name) - } - } -} - -impl Drop for Vnic { - fn drop(&mut self) { - let _ = self.delete(); - } -} - struct InstanceInner { id: Uuid, @@ -259,7 +210,7 @@ impl InstanceInner { ), ) .await - .map_err(Error::from)?; + .map_err(|e| Error::Notification(e))?; // Take the next action, if any. if let Some(action) = action { @@ -278,13 +229,8 @@ impl InstanceInner { .expect("Propolis client should be initialized before usage") .client .instance_state_put(*self.propolis_id(), request) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "Failed to set state of instance: {}", - e - ), - }) + .await?; + Ok(()) } async fn ensure(&self, guest_nics: &Vec) -> Result<(), Error> { @@ -294,7 +240,7 @@ impl InstanceInner { .iter() .enumerate() .map(|(i, _)| propolis_client::api::NetworkInterfaceRequest { - name: guest_nics[i].name.clone(), + name: guest_nics[i].name().to_string(), slot: propolis_client::api::Slot(i as u8), }) .collect(); @@ -312,10 +258,7 @@ impl InstanceInner { .expect("Propolis client should be initialized before usage") .client .instance_ensure(&request) - .await - .map_err(|e| Error::InternalError { - internal_message: format!("Failed to ensure instance: {}", e), - })?; + .await?; Ok(()) } @@ -465,36 +408,45 @@ impl Instance { Some(nic.mac), inner.vlan, ) + .map_err(|e| e.into()) }) .collect::, Error>>()?; // Create a zone for the propolis instance, using the previously // configured VNICs. - let zname = zone_name(inner.propolis_id()); + let zname = propolis_zone_name(inner.propolis_id()); let nics_to_put_in_zone: Vec = guest_nics .iter() - .map(|nic| nic.name.clone()) - .chain(std::iter::once(control_nic.name.clone())) + .map(|nic| nic.name().to_string()) + .chain(std::iter::once(control_nic.name().to_string())) .collect(); - Zones::configure_child_zone(&inner.log, &zname, nics_to_put_in_zone)?; - info!(inner.log, "Configured child zone: {}", zname); + Zones::configure_propolis_zone( + &inner.log, + &zname, + nics_to_put_in_zone, + )?; + info!(inner.log, "Configured propolis zone: {}", zname); // Clone the zone from a base zone (faster than installing) and // boot it up. - Zones::clone_from_base(&zname)?; + Zones::clone_from_base_propolis(&zname)?; info!(inner.log, "Cloned child zone: {}", zname); Zones::boot(&zname)?; info!(inner.log, "Booted zone: {}", zname); // Wait for the network services to come online, then create an address. - wait_for_service(Some(&zname), "svc:/milestone/network:default") - .await?; + let fmri = "svc:/milestone/network:default"; + wait_for_service(Some(&zname), fmri) + .await + .map_err(|_| Error::Timeout(fmri.to_string()))?; info!(inner.log, "Network milestone ready for {}", zname); - let network = - Zones::create_address(&zname, &interface_name(&control_nic.name))?; + let network = Zones::create_address( + &zname, + &interface_name(&control_nic.name()), + )?; info!(inner.log, "Created address {} for zone: {}", network, zname); // Run Propolis in the Zone. @@ -507,7 +459,9 @@ impl Instance { // but it helps distinguish "online in SMF" from "responding to HTTP // requests". let fmri = fmri_name(inner.propolis_id()); - wait_for_service(Some(&zname), &fmri).await?; + wait_for_service(Some(&zname), &fmri) + .await + .map_err(|_| Error::Timeout(fmri.to_string()))?; let client = Arc::new(PropolisClient::new( server_addr, @@ -551,7 +505,7 @@ impl Instance { async fn stop(&self) -> Result<(), Error> { let mut inner = self.inner.lock().await; - let zname = zone_name(inner.propolis_id()); + let zname = propolis_zone_name(inner.propolis_id()); warn!(inner.log, "Halting and removing zone: {}", zname); Zones::halt_and_remove(&inner.log, &zname).unwrap(); @@ -591,15 +545,8 @@ impl Instance { loop { // State monitoring always returns the most recent state/gen pair // known to Propolis. - let response = client - .instance_state_monitor(propolis_id, gen) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "Failed to monitor propolis: {}", - e - ), - })?; + let response = + client.instance_state_monitor(propolis_id, gen).await?; let reaction = self.inner.lock().await.observe_state(response.state).await?; @@ -629,8 +576,10 @@ impl Instance { target: InstanceRuntimeStateRequested, ) -> Result { let mut inner = self.inner.lock().await; - if let Some(action) = - inner.state.request_transition(target.run_state)? + if let Some(action) = inner + .state + .request_transition(target.run_state) + .map_err(|e| Error::Transition(e))? { info!( &inner.log, @@ -645,8 +594,12 @@ impl Instance { #[cfg(test)] mod test { use super::*; - use crate::illumos::{dladm::MockDladm, zone::MockZones}; + use crate::illumos::{ + dladm::{MockDladm, PhysicalLink}, + zone::MockZones, + }; use crate::mocks::MockNexusClient; + use crate::vnic::control_vnic_name; use chrono::Utc; use dropshot::{ endpoint, ApiDescription, ConfigDropshot, ConfigLogging, @@ -870,37 +823,38 @@ mod test { .in_sequence(&mut seq) .returning(|phys, vnic, _maybe_mac, _maybe_vlan| { assert_eq!(phys.0, "physical"); - assert_eq!(vnic, vnic_name(0)); + assert_eq!(vnic, control_vnic_name(0)); Ok(()) }); - let zone_configure_child_ctx = - MockZones::configure_child_zone_context(); - zone_configure_child_ctx + let zone_configure_propolis_ctx = + MockZones::configure_propolis_zone_context(); + zone_configure_propolis_ctx .expect() .times(1) .in_sequence(&mut seq) .returning(|_, zone, vnics| { - assert_eq!(zone, zone_name(&test_propolis_uuid())); + assert_eq!(zone, propolis_zone_name(&test_propolis_uuid())); assert_eq!(vnics.len(), 1); - assert_eq!(vnics[0], vnic_name(0)); + assert_eq!(vnics[0], control_vnic_name(0)); Ok(()) }); - let zone_clone_from_base_ctx = MockZones::clone_from_base_context(); - zone_clone_from_base_ctx + let zone_clone_from_base_propolis_ctx = + MockZones::clone_from_base_propolis_context(); + zone_clone_from_base_propolis_ctx .expect() .times(1) .in_sequence(&mut seq) .returning(|zone| { - assert_eq!(zone, zone_name(&test_propolis_uuid())); + assert_eq!(zone, propolis_zone_name(&test_propolis_uuid())); Ok(()) }); let zone_boot_ctx = MockZones::boot_context(); zone_boot_ctx.expect().times(1).in_sequence(&mut seq).returning( |zone| { - assert_eq!(zone, zone_name(&test_propolis_uuid())); + assert_eq!(zone, propolis_zone_name(&test_propolis_uuid())); Ok(()) }, ); @@ -909,7 +863,10 @@ mod test { crate::illumos::svc::wait_for_service_context(); wait_for_service_ctx.expect().times(1).in_sequence(&mut seq).returning( |zone, fmri| { - assert_eq!(zone.unwrap(), zone_name(&test_propolis_uuid())); + assert_eq!( + zone.unwrap(), + propolis_zone_name(&test_propolis_uuid()) + ); assert_eq!(fmri, "svc:/milestone/network:default"); Ok(()) }, @@ -921,8 +878,8 @@ mod test { .times(1) .in_sequence(&mut seq) .returning(|zone, iface| { - assert_eq!(zone, zone_name(&test_propolis_uuid())); - assert_eq!(iface, interface_name(&vnic_name(0))); + assert_eq!(zone, propolis_zone_name(&test_propolis_uuid())); + assert_eq!(iface, interface_name(&control_vnic_name(0))); Ok("127.0.0.1/24".parse().unwrap()) }); @@ -932,7 +889,7 @@ mod test { .times(1) .in_sequence(&mut seq) .returning(|zone, id, addr| { - assert_eq!(zone, zone_name(&test_propolis_uuid())); + assert_eq!(zone, propolis_zone_name(&test_propolis_uuid())); assert_eq!(id, &test_propolis_uuid()); assert_eq!( addr, @@ -946,7 +903,7 @@ mod test { wait_for_service_ctx.expect().times(1).in_sequence(&mut seq).returning( |zone, fmri| { let id = test_propolis_uuid(); - assert_eq!(zone.unwrap(), zone_name(&id)); + assert_eq!(zone.unwrap(), propolis_zone_name(&id)); assert_eq!( fmri, format!("{}:{}", service_name(), instance_name(&id)) @@ -1086,7 +1043,7 @@ mod test { .times(1) .in_sequence(&mut seq) .returning(|vnic| { - assert_eq!(vnic, vnic_name(0)); + assert_eq!(vnic, control_vnic_name(0)); Ok(()) }); inst.transition(InstanceRuntimeStateRequested { diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index d7bf3f8d5e..0307762fe6 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -5,17 +5,13 @@ //! API for controlling multiple instances on a sled. use crate::common::vlan::VlanID; -use crate::illumos::zfs::ZONE_ZFS_DATASET; -use omicron_common::api::external::Error; +use crate::vnic::IdAllocator; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::sled_agent::InstanceHardware; use omicron_common::api::internal::sled_agent::InstanceRuntimeStateRequested; use slog::Logger; use std::collections::BTreeMap; -use std::sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, -}; +use std::sync::{Arc, Mutex}; use uuid::Uuid; #[cfg(test)] @@ -23,35 +19,20 @@ use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] use nexus_client::Client as NexusClient; -#[cfg(not(test))] -use crate::{ - illumos::{dladm::Dladm, zfs::Zfs, zone::Zones}, - instance::Instance, -}; #[cfg(test)] use crate::{ - illumos::{ - dladm::MockDladm as Dladm, zfs::MockZfs as Zfs, - zone::MockZones as Zones, - }, - instance::MockInstance as Instance, + illumos::zone::MockZones as Zones, instance::MockInstance as Instance, }; +#[cfg(not(test))] +use crate::{illumos::zone::Zones, instance::Instance}; -/// A shareable wrapper around an atomic counter. -/// May be used to allocate runtime-unique IDs. -#[derive(Clone, Debug)] -pub struct IdAllocator { - value: Arc, -} - -impl IdAllocator { - pub fn new() -> Self { - Self { value: Arc::new(AtomicU64::new(0)) } - } +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Instance error: {0}")] + Instance(#[from] crate::instance::Error), - pub fn next(&self) -> u64 { - self.value.fetch_add(1, Ordering::SeqCst) - } + #[error(transparent)] + Zone(#[from] crate::illumos::zone::Error), } struct InstanceManagerInternal { @@ -79,37 +60,8 @@ impl InstanceManager { vlan: Option, nexus_client: Arc, ) -> Result { - // Before we start creating instances, we need to ensure that the - // necessary ZFS and Zone resources are ready. - Zfs::ensure_dataset(ZONE_ZFS_DATASET)?; - // Create a base zone, from which all running instance zones are cloned. - Zones::create_base(&log)?; - - // Identify all existing zones which should be managed by the Sled - // Agent. - // - // NOTE: Currently, we're removing these zones. In the future, we should - // re-establish contact (i.e., if the Sled Agent crashed, but we wanted - // to leave the running Zones intact). - let zones = Zones::get()?; - for z in zones { - warn!(log, "Deleting zone: {}", z.name()); - Zones::halt_and_remove(&log, z.name())?; - } - - // Identify all VNICs which should be managed by the Sled Agent. - // - // NOTE: Currently, we're removing these VNICs. In the future, we should - // identify if they're being used by the aforementioned existing zones, - // and track them once more. - // - // (dladm show-vnic -p -o ZONE,LINK) might help - let vnics = Dladm::get_vnics()?; - for vnic in vnics { - warn!(log, "Deleting VNIC: {}", vnic); - Dladm::delete_vnic(&vnic)?; - } + Zones::create_propolis_base(&log)?; Ok(InstanceManager { inner: Arc::new(InstanceManagerInternal { @@ -177,7 +129,7 @@ impl InstanceManager { instance.start(instance_ticket).await?; } - instance.transition(target).await + instance.transition(target).await.map_err(|e| e.into()) } } @@ -221,7 +173,7 @@ impl Drop for InstanceTicket { #[cfg(test)] mod test { use super::*; - use crate::illumos::{dladm::MockDladm, zfs::MockZfs, zone::MockZones}; + use crate::illumos::{dladm::MockDladm, zone::MockZones}; use crate::instance::MockInstance; use crate::mocks::MockNexusClient; use chrono::Utc; @@ -272,14 +224,9 @@ mod test { // checks - creation of the base zone, and cleanup of existing // zones + vnics. - let zfs_ensure_dataset_ctx = MockZfs::ensure_dataset_context(); - zfs_ensure_dataset_ctx.expect().return_once(|pool| { - assert_eq!(pool, ZONE_ZFS_DATASET); - Ok(()) - }); - - let zones_create_base_ctx = MockZones::create_base_context(); - zones_create_base_ctx.expect().return_once(|_| Ok(())); + let zones_create_propolis_base_ctx = + MockZones::create_propolis_base_context(); + zones_create_propolis_base_ctx.expect().return_once(|_| Ok(())); let zones_get_ctx = MockZones::get_context(); zones_get_ctx.expect().return_once(|| Ok(vec![])); @@ -354,14 +301,9 @@ mod test { // Instance Manager creation. - let zfs_ensure_dataset_ctx = MockZfs::ensure_dataset_context(); - zfs_ensure_dataset_ctx.expect().return_once(|pool| { - assert_eq!(pool, ZONE_ZFS_DATASET); - Ok(()) - }); - - let zones_create_base_ctx = MockZones::create_base_context(); - zones_create_base_ctx.expect().return_once(|_| Ok(())); + let zones_create_propolis_base_ctx = + MockZones::create_propolis_base_context(); + zones_create_propolis_base_ctx.expect().return_once(|_| Ok(())); let zones_get_ctx = MockZones::get_context(); zones_get_ctx.expect().return_once(|| Ok(vec![])); diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 6ab2a05fbf..c31e977cc8 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -24,8 +24,11 @@ mod illumos; mod instance; mod instance_manager; mod params; +mod running_zone; pub mod server; mod sled_agent; +mod storage_manager; +mod vnic; #[cfg(test)] mod mocks; diff --git a/sled-agent/src/mocks/mod.rs b/sled-agent/src/mocks/mod.rs index d01db9974b..438a83f455 100644 --- a/sled-agent/src/mocks/mod.rs +++ b/sled-agent/src/mocks/mod.rs @@ -4,9 +4,12 @@ //! Mock structures for testing. +use anyhow::Error; use mockall::mock; -use nexus_client::types::{InstanceRuntimeState, SledAgentStartupInfo}; -use omicron_common::api::external::Error; +use nexus_client::types::{ + DatasetPutRequest, DatasetPutResponse, InstanceRuntimeState, + SledAgentStartupInfo, ZpoolPutRequest, ZpoolPutResponse, +}; use slog::Logger; use uuid::Uuid; @@ -23,5 +26,17 @@ mock! { id: &Uuid, new_runtime_state: &InstanceRuntimeState, ) -> Result<(), Error>; + pub async fn zpool_put( + &self, + sled_id: &Uuid, + zpool_id: &Uuid, + info: &ZpoolPutRequest, + ) -> Result; + pub async fn dataset_put( + &self, + zpool_id: &Uuid, + dataset_id: &Uuid, + info: &DatasetPutRequest, + ) -> Result; } } diff --git a/sled-agent/src/running_zone.rs b/sled-agent/src/running_zone.rs new file mode 100644 index 0000000000..9f9fd1b032 --- /dev/null +++ b/sled-agent/src/running_zone.rs @@ -0,0 +1,126 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities to manage running zones. + +use crate::illumos::svc::wait_for_service; +use crate::vnic::{interface_name, Vnic}; +use slog::Logger; +use std::net::SocketAddr; + +#[cfg(test)] +use crate::illumos::zone::MockZones as Zones; +#[cfg(not(test))] +use crate::illumos::zone::Zones; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("Zone not found")] + NotFound, + + #[error("Zone is not running")] + NotRunning, + + #[error("Zone operation failed: {0}")] + Operation(#[from] crate::illumos::zone::Error), + + #[error("Timeout waiting for a service: {0}")] + Timeout(String), +} + +/// Represents a running zone. +pub struct RunningZone { + log: Logger, + + // Name of the zone. + name: String, + + // NIC used for control plane communication. + _nic: Vnic, + address: SocketAddr, +} + +impl RunningZone { + pub fn name(&self) -> &str { + &self.name + } + + pub fn address(&self) -> SocketAddr { + self.address + } + + /// Boots a new zone. + /// + /// Note that the zone must already be configured to be booted. + pub async fn boot( + log: &Logger, + zone_name: String, + nic: Vnic, + port: u16, + ) -> Result { + // Boot the zone. + info!(log, "Zone {} booting", zone_name); + + // TODO: "Ensure booted", to make this more idempotent? + Zones::boot(&zone_name)?; + + // Wait for the network services to come online, then create an address + // to use for communicating with the newly created zone. + let fmri = "svc:/milestone/network:default"; + wait_for_service(Some(&zone_name), fmri) + .await + .map_err(|_| Error::Timeout(fmri.to_string()))?; + + let network = + Zones::ensure_address(&zone_name, &interface_name(&nic.name()))?; + + Ok(RunningZone { + log: log.clone(), + name: zone_name, + _nic: nic, + address: SocketAddr::new(network.ip(), port), + }) + } + + /// Looks up a running zone, if one already exists. + pub async fn get( + log: &Logger, + zone_prefix: &str, + port: u16, + ) -> Result { + let zone = Zones::get()? + .into_iter() + .find(|zone| zone.name().starts_with(&zone_prefix)) + .ok_or_else(|| Error::NotFound)?; + + if zone.state() != zone::State::Running { + return Err(Error::NotRunning); + } + + let zone_name = zone.name(); + let vnic_name = Zones::get_control_interface(zone_name)?; + let network = + Zones::ensure_address(zone_name, &interface_name(&vnic_name))?; + + Ok(Self { + log: log.clone(), + name: zone_name.to_string(), + _nic: Vnic::wrap_existing(vnic_name), + address: SocketAddr::new(network.ip(), port), + }) + } +} + +impl Drop for RunningZone { + fn drop(&mut self) { + match Zones::halt_and_remove(&self.log, self.name()) { + Ok(()) => { + info!(self.log, "Stopped and uninstalled zone: {}", self.name) + } + Err(e) => { + warn!(self.log, "Failed to stop zone {}: {}", self.name, e) + } + } + } +} diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index dae148ed7b..a13d4d3458 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -45,13 +45,9 @@ impl Server { "component" => "SledAgent", "server" => config.id.clone().to_string() )); - let sled_agent = SledAgent::new( - &config.id, - sa_log, - config.vlan, - nexus_client.clone(), - ) - .map_err(|e| e.to_string())?; + let sled_agent = SledAgent::new(&config, sa_log, nexus_client.clone()) + .await + .map_err(|e| e.to_string())?; let dropshot_log = log.new(o!("component" => "dropshot")); let http_server = dropshot::HttpServerStarter::new( @@ -71,7 +67,10 @@ impl Server { // return a permanent error from the `notify_nexus` closure. let sa_address = http_server.local_addr(); let notify_nexus = || async { - debug!(log, "contacting server nexus"); + info!( + log, + "contacting server nexus, registering sled: {}", config.id + ); nexus_client .cpapi_sled_agents_post( &config.id, diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e77be1fc8c..1ee6680ede 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -4,46 +4,125 @@ //! Sled agent implementation +use crate::config::Config; +use crate::illumos::zfs::{ + Mountpoint, ZONE_ZFS_DATASET, ZONE_ZFS_DATASET_MOUNTPOINT, +}; +use crate::instance_manager::InstanceManager; use crate::params::DiskStateRequested; +use crate::storage_manager::StorageManager; use omicron_common::api::{ - external::Error, internal::nexus::DiskRuntimeState, - internal::nexus::InstanceRuntimeState, + internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, internal::sled_agent::InstanceHardware, internal::sled_agent::InstanceRuntimeStateRequested, }; - use slog::Logger; use std::sync::Arc; use uuid::Uuid; -#[cfg(test)] -use crate::mocks::MockNexusClient as NexusClient; #[cfg(not(test))] -use nexus_client::Client as NexusClient; +use { + crate::illumos::{dladm::Dladm, zfs::Zfs, zone::Zones}, + nexus_client::Client as NexusClient, +}; +#[cfg(test)] +use { + crate::illumos::{ + dladm::MockDladm as Dladm, zfs::MockZfs as Zfs, + zone::MockZones as Zones, + }, + crate::mocks::MockNexusClient as NexusClient, +}; -use crate::common::vlan::VlanID; -use crate::instance_manager::InstanceManager; +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + Datalink(#[from] crate::illumos::dladm::Error), + + #[error(transparent)] + Zone(#[from] crate::illumos::zone::Error), + + #[error(transparent)] + Zfs(#[from] crate::illumos::zfs::Error), + + #[error("Error managing instances: {0}")] + Instance(#[from] crate::instance_manager::Error), + + #[error("Error managing storage: {0}")] + Storage(#[from] crate::storage_manager::Error), +} + +impl From for omicron_common::api::external::Error { + fn from(err: Error) -> Self { + omicron_common::api::external::Error::InternalError { + internal_message: err.to_string(), + } + } +} /// Describes an executing Sled Agent object. /// /// Contains both a connection to the Nexus, as well as managed instances. pub struct SledAgent { + _storage: StorageManager, instances: InstanceManager, } impl SledAgent { /// Initializes a new [`SledAgent`] object. - pub fn new( - id: &Uuid, + pub async fn new( + config: &Config, log: Logger, - vlan: Option, nexus_client: Arc, ) -> Result { + let id = &config.id; + let vlan = config.vlan; info!(&log, "created sled agent"; "id" => ?id); - let instances = InstanceManager::new(log, vlan, nexus_client)?; + // Before we start creating zones, we need to ensure that the + // necessary ZFS and Zone resources are ready. + Zfs::ensure_filesystem( + ZONE_ZFS_DATASET, + Mountpoint::Path(std::path::PathBuf::from( + ZONE_ZFS_DATASET_MOUNTPOINT, + )), + )?; + + // Identify all existing zones which should be managed by the Sled + // Agent. + // + // NOTE: Currently, we're removing these zones. In the future, we should + // re-establish contact (i.e., if the Sled Agent crashed, but we wanted + // to leave the running Zones intact). + let zones = Zones::get_non_base_zones()?; + for z in zones { + warn!(log, "Deleting zone: {}", z.name()); + Zones::halt_and_remove(&log, z.name())?; + } + + // Identify all VNICs which should be managed by the Sled Agent. + // + // NOTE: Currently, we're removing these VNICs. In the future, we should + // identify if they're being used by the aforementioned existing zones, + // and track them once more. + // + // (dladm show-vnic -p -o ZONE,LINK) might help + let vnics = Dladm::get_vnics()?; + for vnic in vnics { + warn!(log, "Deleting VNIC: {}", vnic); + Dladm::delete_vnic(&vnic)?; + } + + let storage = + StorageManager::new(&log, *id, nexus_client.clone()).await?; + if let Some(pools) = &config.zpools { + for pool in pools { + storage.upsert_zpool(pool).await?; + } + } + let instances = InstanceManager::new(log, vlan, nexus_client.clone())?; - Ok(SledAgent { instances }) + Ok(SledAgent { _storage: storage, instances }) } /// Idempotently ensures that a given Instance is running on the sled. @@ -53,10 +132,13 @@ impl SledAgent { initial: InstanceHardware, target: InstanceRuntimeStateRequested, ) -> Result { - self.instances.ensure(instance_id, initial, target).await + self.instances + .ensure(instance_id, initial, target) + .await + .map_err(|e| Error::Instance(e)) } - /// Idempotently ensures that the given Disk is attached (or not) as + /// Idempotently ensures that the given virtual disk is attached (or not) as /// specified. /// /// NOTE: Not yet implemented. diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs new file mode 100644 index 0000000000..d96ead7c22 --- /dev/null +++ b/sled-agent/src/storage_manager.rs @@ -0,0 +1,451 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Management of sled-local storage. + +use crate::illumos::{ + zfs::Mountpoint, + zone::{ + COCKROACH_SVC_DIRECTORY, COCKROACH_ZONE_PREFIX, CRUCIBLE_SVC_DIRECTORY, + CRUCIBLE_ZONE_PREFIX, + }, + zpool::ZpoolInfo, +}; +use crate::running_zone::RunningZone; +use crate::vnic::{IdAllocator, Vnic}; +use futures::stream::FuturesOrdered; +use futures::StreamExt; +use nexus_client::types::{DatasetKind, DatasetPutRequest, ZpoolPutRequest}; +use omicron_common::api::external::{ByteCount, ByteCountRangeError}; +use omicron_common::backoff; +use slog::Logger; +use std::collections::HashMap; +use std::convert::TryFrom; +use std::sync::Arc; +use tokio::sync::{mpsc, Mutex}; +use tokio::task::JoinHandle; +use uuid::Uuid; + +#[cfg(not(test))] +use crate::illumos::{dladm::Dladm, zfs::Zfs, zone::Zones, zpool::Zpool}; +#[cfg(test)] +use crate::illumos::{ + dladm::MockDladm as Dladm, zfs::MockZfs as Zfs, zone::MockZones as Zones, + zpool::MockZpool as Zpool, +}; + +#[cfg(test)] +use crate::mocks::MockNexusClient as NexusClient; +#[cfg(not(test))] +use nexus_client::Client as NexusClient; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error(transparent)] + Datalink(#[from] crate::illumos::dladm::Error), + + #[error(transparent)] + Zfs(#[from] crate::illumos::zfs::Error), + + #[error(transparent)] + Zpool(#[from] crate::illumos::zpool::Error), + + #[error("Failed to create base zone: {0}")] + BaseZoneCreation(crate::illumos::zone::Error), + + #[error("Failed to configure a zone: {0}")] + ZoneConfiguration(crate::illumos::zone::Error), + + #[error("Failed to manage a running zone: {0}")] + ZoneManagement(#[from] crate::running_zone::Error), + + #[error("Error parsing pool size: {0}")] + BadPoolSize(#[from] ByteCountRangeError), + + #[error("Failed to parse as UUID: {0}")] + Parse(uuid::Error), + + #[error("Timed out waiting for service: {0}")] + Timeout(String), +} + +/// A ZFS storage pool. +struct Pool { + id: Uuid, + info: ZpoolInfo, + // ZFS filesytem UUID -> Zone. + zones: HashMap, +} + +impl Pool { + /// Queries for an existing Zpool by name. + /// + /// Returns Ok if the pool exists. + fn new(name: &str) -> Result { + let info = Zpool::get_info(name)?; + + // NOTE: This relies on the name being a UUID exactly. + // We could be more flexible... + let id: Uuid = info.name().parse().map_err(|e| Error::Parse(e))?; + Ok(Pool { id, info, zones: HashMap::new() }) + } + + /// Associate an already running zone with this pool object. + /// + /// Typically this is used when a dataset within the zone (identified + /// by ID) has a running zone (e.g. Crucible, Cockroach) operating on + /// behalf of that data. + fn add_zone(&mut self, id: Uuid, zone: RunningZone) { + self.zones.insert(id, zone); + } + + /// Access a zone managing data within this pool. + fn get_zone(&self, id: Uuid) -> Option<&RunningZone> { + self.zones.get(&id) + } + + /// Returns the ID of the pool itself. + fn id(&self) -> Uuid { + self.id + } +} + +// Description of a dataset within a ZFS pool, which should be created +// by the Sled Agent. +struct PartitionInfo<'a> { + name: &'a str, + zone_prefix: &'a str, + data_directory: &'a str, + svc_directory: &'a str, + port: u16, + kind: DatasetKind, +} + +async fn ensure_running_zone( + log: &Logger, + vnic_id_allocator: &IdAllocator, + partition_info: &PartitionInfo<'_>, + dataset_name: &str, +) -> Result { + match RunningZone::get(log, partition_info.zone_prefix, partition_info.port) + .await + { + Ok(zone) => { + info!(log, "Zone for {} is already running", dataset_name); + Ok(zone) + } + Err(_) => { + info!(log, "Zone for {} is not running: Booting", dataset_name); + let (nic, zname) = configure_zone( + log, + vnic_id_allocator, + partition_info, + dataset_name, + )?; + RunningZone::boot(log, zname, nic, partition_info.port) + .await + .map_err(|e| e.into()) + } + } +} + +// Creates a VNIC and configures a zone. +fn configure_zone( + log: &Logger, + vnic_id_allocator: &IdAllocator, + partition_info: &PartitionInfo<'_>, + dataset_name: &str, +) -> Result<(Vnic, String), Error> { + let physical_dl = Dladm::find_physical()?; + let nic = Vnic::new_control(vnic_id_allocator, &physical_dl, None)?; + let id = Uuid::new_v4(); + let zname = format!("{}{}", partition_info.zone_prefix, id); + + // Configure the new zone - this should be identical to the base zone, + // but with a specified VNIC and pool. + Zones::configure_zone( + log, + &zname, + &[ + zone::Fs { + ty: "lofs".to_string(), + dir: partition_info.svc_directory.to_string(), + special: partition_info.svc_directory.to_string(), + options: vec!["ro".to_string()], + ..Default::default() + }, + zone::Fs { + ty: "zfs".to_string(), + dir: partition_info.data_directory.to_string(), + special: dataset_name.to_string(), + options: vec!["rw".to_string()], + ..Default::default() + }, + ], + &[], + vec![nic.name().to_string()], + ) + .map_err(|e| Error::ZoneConfiguration(e))?; + + // Clone from the base zone installation. + Zones::clone_from_base_storage(&zname) + .map_err(|e| Error::BaseZoneCreation(e))?; + + Ok((nic, zname)) +} + +const PARTITIONS: &[PartitionInfo<'static>] = &[ + PartitionInfo { + name: "crucible", + zone_prefix: CRUCIBLE_ZONE_PREFIX, + data_directory: "/data", + svc_directory: CRUCIBLE_SVC_DIRECTORY, + // TODO: Ensure crucible agent uses this port. + // Currently, nothing is running in the zone, so it's made up. + port: 8080, + kind: DatasetKind::Crucible, + }, + PartitionInfo { + name: "cockroach", + zone_prefix: COCKROACH_ZONE_PREFIX, + data_directory: "/data", + svc_directory: COCKROACH_SVC_DIRECTORY, + // TODO: Ensure cockroach uses this port. + // Currently, nothing is running in the zone, so it's made up. + port: 8080, + kind: DatasetKind::Cockroach, + }, +]; + +// A worker that starts zones for pools as they are received. +struct StorageWorker { + log: Logger, + sled_id: Uuid, + nexus_client: Arc, + pools: Arc>>, + new_pools_rx: mpsc::Receiver, + vnic_id_allocator: IdAllocator, +} + +impl StorageWorker { + // Idempotently ensure the named dataset exists as a filesystem with a UUID. + // + // Returns the UUID attached to the ZFS filesystem. + fn ensure_dataset_with_id(fs_name: &str) -> Result { + Zfs::ensure_filesystem(&fs_name, Mountpoint::Legacy)?; + // Ensure the dataset has a usable UUID. + if let Ok(id_str) = Zfs::get_oxide_value(&fs_name, "uuid") { + if let Ok(id) = id_str.parse::() { + return Ok(id); + } + } + let id = Uuid::new_v4(); + Zfs::set_oxide_value(&fs_name, "uuid", &id.to_string())?; + Ok(id) + } + + // Formats a partition within a zpool, starting a zone for it. + // Returns the UUID attached to the underlying ZFS partition. + // + // For now, we place all "expected" datasets on each new zpool + // we see. The decision of "whether or not to actually use the + // dataset" is a decision left to both the bootstrapping protocol + // and Nexus. + // + // If we had a better signal - from the bootstrapping system - about + // where Cockroach nodes should exist, we could be more selective + // about this placement. + async fn initialize_partition( + &self, + pool: &mut Pool, + partition: &PartitionInfo<'static>, + ) -> Result { + let name = format!("{}/{}", pool.info.name(), partition.name); + + info!(&self.log, "Ensuring dataset {} exists", name); + let id = StorageWorker::ensure_dataset_with_id(&name)?; + + info!(&self.log, "Creating zone for {}", name); + let zone = ensure_running_zone( + &self.log, + &self.vnic_id_allocator, + partition, + &name, + ) + .await?; + + info!(&self.log, "Created zone with address {}", zone.address()); + pool.add_zone(id, zone); + Ok(id) + } + + // Small wrapper around `Self::do_work_internal` that ensures we always + // emit info to the log when we exit. + async fn do_work(&mut self) -> Result<(), Error> { + self.do_work_internal() + .await + .map(|()| { + info!(self.log, "StorageWorker exited successfully"); + }) + .map_err(|e| { + warn!(self.log, "StorageWorker exited unexpectedly: {}", e); + e + }) + } + + async fn do_work_internal(&mut self) -> Result<(), Error> { + info!(self.log, "StorageWorker creating storage base zone"); + // Create a base zone, from which all running storage zones are cloned. + Zones::create_storage_base(&self.log) + .map_err(|e| Error::BaseZoneCreation(e))?; + info!(self.log, "StorageWorker creating storage base zone - DONE"); + + let mut nexus_notifications = FuturesOrdered::new(); + + loop { + tokio::select! { + _ = nexus_notifications.next(), if !nexus_notifications.is_empty() => {}, + Some(pool_name) = self.new_pools_rx.recv() => { + let mut pools = self.pools.lock().await; + let pool = pools.get_mut(&pool_name).unwrap(); + + info!( + &self.log, + "Storage manager processing zpool: {:#?}", pool.info + ); + + let size = ByteCount::try_from(pool.info.size())?; + + // Initialize all sled-local state. + let mut partitions = vec![]; + for partition in PARTITIONS { + let id = self.initialize_partition(pool, partition).await?; + // Unwrap safety: We just put this zone in the pool. + let zone = pool.get_zone(id).unwrap(); + partitions.push((id, zone.address(), partition.kind.clone())); + } + + // Notify Nexus of the zpool and all datasets within. + let pool_id = pool.id(); + let sled_id = self.sled_id; + let nexus = self.nexus_client.clone(); + let notify_nexus = move || { + let zpool_request = ZpoolPutRequest { size: size.into() }; + let nexus = nexus.clone(); + let partitions = partitions.clone(); + async move { + nexus + .zpool_put(&sled_id, &pool_id, &zpool_request) + .await + .map_err(backoff::BackoffError::Transient)?; + + for (id, address, kind) in partitions { + let request = DatasetPutRequest { + address: address.to_string(), + kind, + }; + nexus + .dataset_put(&pool_id, &id, &request) + .await + .map_err(backoff::BackoffError::Transient)?; + } + + Ok::<(), backoff::BackoffError>(()) + } + }; + let log = self.log.clone(); + let log_post_failure = move |error, delay| { + warn!( + log, + "failed to notify nexus, will retry in {:?}", delay; + "error" => ?error, + ); + }; + nexus_notifications.push( + backoff::retry_notify( + backoff::internal_service_policy(), + notify_nexus, + log_post_failure, + ) + ); + }, + } + } + } +} + +/// A sled-local view of all attached storage. +pub struct StorageManager { + // A map of "zpool name" to "pool". + pools: Arc>>, + new_pools_tx: mpsc::Sender, + + // A handle to a worker which updates "pools". + task: JoinHandle>, +} + +impl StorageManager { + /// Creates a new [`StorageManager`] which should manage local storage. + pub async fn new( + log: &Logger, + sled_id: Uuid, + nexus_client: Arc, + ) -> Result { + let log = log.new(o!("component" => "sled agent storage manager")); + let pools = Arc::new(Mutex::new(HashMap::new())); + let (new_pools_tx, new_pools_rx) = mpsc::channel(10); + let mut worker = StorageWorker { + log, + sled_id, + nexus_client, + pools: pools.clone(), + new_pools_rx, + vnic_id_allocator: IdAllocator::new(), + }; + Ok(StorageManager { + pools, + new_pools_tx, + task: tokio::task::spawn(async move { worker.do_work().await }), + }) + } + + /// Adds a zpool to the storage manager. + pub async fn upsert_zpool(&self, name: &str) -> Result<(), Error> { + let zpool = Pool::new(name)?; + + let is_new = { + let mut pools = self.pools.lock().await; + let entry = pools.entry(name.to_string()); + let is_new = + matches!(entry, std::collections::hash_map::Entry::Vacant(_)); + + // Ensure that the pool info is up-to-date. + entry + .and_modify(|e| { + e.info = zpool.info.clone(); + }) + .or_insert_with(|| zpool); + is_new + }; + + // If we hadn't previously been handling this zpool, hand it off to the + // worker for management (zone creation). + if is_new { + self.new_pools_tx.send(name.to_string()).await.unwrap(); + } + Ok(()) + } +} + +impl Drop for StorageManager { + fn drop(&mut self) { + // NOTE: Ideally, with async drop, we'd await completion of the worker + // somehow. + // + // Without that option, we instead opt to simply cancel the worker + // task to ensure it does not remain alive beyond the StorageManager + // itself. + self.task.abort(); + } +} diff --git a/sled-agent/src/vnic.rs b/sled-agent/src/vnic.rs new file mode 100644 index 0000000000..764812ddb1 --- /dev/null +++ b/sled-agent/src/vnic.rs @@ -0,0 +1,118 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! API for controlling a single instance. + +use crate::common::vlan::VlanID; +use crate::illumos::dladm::{ + PhysicalLink, VNIC_PREFIX_CONTROL, VNIC_PREFIX_GUEST, +}; +use omicron_common::api::external::MacAddr; +use std::sync::{ + atomic::{AtomicU64, Ordering}, + Arc, +}; + +#[cfg(not(test))] +use crate::illumos::dladm::Dladm; +#[cfg(test)] +use crate::illumos::dladm::MockDladm as Dladm; + +type Error = crate::illumos::dladm::Error; + +fn guest_vnic_name(id: u64) -> String { + format!("{}{}", VNIC_PREFIX_GUEST, id) +} + +pub fn control_vnic_name(id: u64) -> String { + format!("{}{}", VNIC_PREFIX_CONTROL, id) +} + +pub fn interface_name(vnic_name: &str) -> String { + format!("{}/omicron", vnic_name) +} + +/// A shareable wrapper around an atomic counter. +/// May be used to allocate runtime-unique IDs for objects +/// which have naming constraints - such as VNICs. +#[derive(Clone, Debug)] +pub struct IdAllocator { + value: Arc, +} + +impl IdAllocator { + pub fn new() -> Self { + Self { value: Arc::new(AtomicU64::new(0)) } + } + + pub fn next(&self) -> u64 { + self.value.fetch_add(1, Ordering::SeqCst) + } +} + +/// Represents an allocated VNIC on the system. +/// The VNIC is de-allocated when it goes out of scope. +/// +/// Note that the "ownership" of the VNIC is based on convention; +/// another process in the global zone could also modify / destroy +/// the VNIC while this object is alive. +#[derive(Debug)] +pub struct Vnic { + name: String, + deleted: bool, +} + +impl Vnic { + /// Takes ownership of an existing VNIC. + pub fn wrap_existing(name: String) -> Self { + Vnic { name, deleted: false } + } + + /// Creates a new NIC, intended for usage by the guest. + pub fn new_guest( + allocator: &IdAllocator, + physical_dl: &PhysicalLink, + mac: Option, + vlan: Option, + ) -> Result { + let name = guest_vnic_name(allocator.next()); + Dladm::create_vnic(physical_dl, &name, mac, vlan)?; + Ok(Vnic { name, deleted: false }) + } + + /// Creates a new NIC, intended for allowing Propolis to communicate + // with the control plane. + pub fn new_control( + allocator: &IdAllocator, + physical_dl: &PhysicalLink, + mac: Option, + ) -> Result { + let name = control_vnic_name(allocator.next()); + Dladm::create_vnic(physical_dl, &name, mac, None)?; + Ok(Vnic { name, deleted: false }) + } + + // Deletes a NIC (if it has not already been deleted). + pub fn delete(&mut self) -> Result<(), Error> { + if self.deleted { + Ok(()) + } else { + self.deleted = true; + Dladm::delete_vnic(&self.name) + } + } + + pub fn name(&self) -> &str { + &self.name + } +} + +impl Drop for Vnic { + fn drop(&mut self) { + let r = self.delete(); + if let Err(e) = r { + eprintln!("Failed to delete VNIC: {}", e); + } + } +} diff --git a/smf/sled-agent/manifest.xml b/smf/sled-agent/manifest.xml index b15661acf4..0bdb81951f 100644 --- a/smf/sled-agent/manifest.xml +++ b/smf/sled-agent/manifest.xml @@ -34,6 +34,13 @@ +