diff --git a/Cargo.lock b/Cargo.lock index 579f833a27..588f0200d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11411,6 +11411,7 @@ dependencies = [ "dropshot", "illumos-utils", "omicron-common", + "omicron-sled-agent", "omicron-workspace-hack", "serde_json", "sled-hardware-types", diff --git a/sled-agent/src/profile.rs b/sled-agent/src/profile.rs index af2d05ce54..937fb81466 100644 --- a/sled-agent/src/profile.rs +++ b/sled-agent/src/profile.rs @@ -425,11 +425,11 @@ mod tests { - - + + - + diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index bb557da172..178f2f2f1f 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -92,7 +92,7 @@ use rand::prelude::SliceRandom; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware::SledMode; -// use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; +use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; use sled_hardware_types::Baseboard; use sled_storage::config::MountConfig; use sled_storage::dataset::{ @@ -118,6 +118,7 @@ use illumos_utils::zone::MockZones as Zones; use illumos_utils::zone::Zones; const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED); +pub const SWITCH_ZONE_BASEBOARD_FILE: &str = "/opt/oxide/baseboard.json"; #[derive(thiserror::Error, Debug, slog_error_chain::SlogInlineError)] pub enum Error { @@ -2480,6 +2481,11 @@ impl ServiceManager { "astring", &format!("[{bootstrap_address}]:{BOOTSTRAP_ARTIFACT_PORT}"), ) + .add_property( + "baseboard-file", + "astring", + SWITCH_ZONE_BASEBOARD_FILE, + ) .add_property( "mgs-address", "astring", @@ -2991,14 +2997,13 @@ impl ServiceManager { .map_err(|err| { Error::io("Failed to setup Switch zone profile", err) })?; - return Ok(RunningZone::boot(installed_zone).await?); + // return Ok(RunningZone::boot(installed_zone).await?); } } // // TODO: Remove from here until end - // let running_zone = RunningZone::boot(installed_zone).await?; + let running_zone = RunningZone::boot(installed_zone).await?; // - // // TODO: Perhaps add a new tiny service that adds these links? // for (link, needs_link_local) in // running_zone.links().iter().zip(links_need_link_local) // { @@ -3017,34 +3022,39 @@ impl ServiceManager { // } // } // - // if let Some((bootstrap_name, bootstrap_address)) = - // bootstrap_name_and_address.as_ref() - // { - // info!( - // self.inner.log, - // "Ensuring bootstrap address {} exists in {} zone", - // bootstrap_address.to_string(), - // &zone_type_str, - // ); - // running_zone.ensure_bootstrap_address(*bootstrap_address).await?; - // info!( - // self.inner.log, - // "Forwarding bootstrap traffic via {} to {}", - // bootstrap_name, - // self.inner.global_zone_bootstrap_link_local_address, - // ); - // running_zone - // .add_bootstrap_route( - // BOOTSTRAP_PREFIX, - // self.inner.global_zone_bootstrap_link_local_address, - // bootstrap_name, - // ) - // .map_err(|err| Error::ZoneCommand { - // intent: "add bootstrap network route".to_string(), - // err, - // })?; - // } + // TODO: Figure out where to best do this. + // It's not possible to have it from the zone itself + // as it needs to be run from the global zone + if let Some((bootstrap_name, bootstrap_address)) = + bootstrap_name_and_address.as_ref() + { + info!( + self.inner.log, + "Ensuring bootstrap address {} exists in {} zone", + bootstrap_address.to_string(), + &zone_type_str, + ); + running_zone.ensure_bootstrap_address(*bootstrap_address).await?; + info!( + self.inner.log, + "Forwarding bootstrap traffic via {} to {}", + bootstrap_name, + self.inner.global_zone_bootstrap_link_local_address, + ); + running_zone + .add_bootstrap_route( + BOOTSTRAP_PREFIX, + self.inner.global_zone_bootstrap_link_local_address, + bootstrap_name, + ) + .map_err(|err| Error::ZoneCommand { + intent: "add bootstrap network route".to_string(), + err, + })?; + } + Ok(running_zone) + // // let addresses = match &request { // ZoneArgs::Omicron(OmicronZoneConfigLocal { // zone: OmicronZoneConfig { underlay_address, .. }, @@ -4561,7 +4571,7 @@ impl ServiceManager { SledLocalZone::Initializing { request, .. }, Some(new_request), ) => { - info!(log, "Enabling {zone_typestr} zone (already underway)"); + info!(log, "Enabling {zone_typestr} zone (already underway)"; "switch-state" => "first",); // The zone has not started yet -- we can simply replace // the next request with our new request. *request = new_request; @@ -4569,6 +4579,60 @@ impl ServiceManager { (SledLocalZone::Running { request, zone }, Some(new_request)) if request.addresses != new_request.addresses => { + +// let req = request.clone(); +// // TODO: make this cleaner +// let switch_zone_config = SwitchZoneConfigLocal { zone: req, root: Utf8PathBuf::new() }; +// let zone_args = ZoneArgs::Switch(&switch_zone_config); +// +// let (bootstrap_vnic, bootstrap_name_and_address) = +// match self.bootstrap_address_needed(&zone_args)? { +// Some((vnic, address)) => { +// let name = vnic.name().to_string(); +// (Some(vnic), Some((name, address))) +// } +// None => (None, None), +// }; +// +// if let Some((bootstrap_name, bootstrap_address)) = +// bootstrap_name_and_address.as_ref() +// { +// +// info!( +// self.inner.log, +// "BOOTSTRAP VNIC {} BOOTSTRAP NAME {} BOOTSTRAP ADDRESS {}", +// bootstrap_vnic.unwrap().name(), +// bootstrap_name.to_string(), +// bootstrap_address.to_string(); +// "switch-state" => "second", +// ); +// +// info!( +// self.inner.log, +// "Ensuring bootstrap address {} exists in switch zone", +// bootstrap_address.to_string(); +// "switch-state" => "second", +// ); +// zone.ensure_bootstrap_address(*bootstrap_address).await?; +// info!( +// self.inner.log, +// "Forwarding bootstrap traffic via {} to {}", +// bootstrap_name, +// self.inner.global_zone_bootstrap_link_local_address; +// "switch-state" => "second", +// ); +// zone +// .add_bootstrap_route( +// BOOTSTRAP_PREFIX, +// self.inner.global_zone_bootstrap_link_local_address, +// bootstrap_name, +// ) +// .map_err(|err| Error::ZoneCommand { +// intent: "add bootstrap network route".to_string(), +// err, +// })?; +// } + // If the switch zone is running but we have new addresses, it // means we're moving from the bootstrap to the underlay // network. We need to add an underlay address and route in the @@ -4585,6 +4649,7 @@ impl ServiceManager { .map(|addr| addr.to_string()) .unwrap_or_else(|| "".to_string()); + // TODO: Need to wait for underlay to be up for this to work for addr in &request.addresses { if *addr == Ipv6Addr::LOCALHOST { continue; @@ -4604,14 +4669,14 @@ impl ServiceManager { ); } - if let Some(info) = self.inner.sled_info.get() { - zone.add_default_route(info.underlay_address).map_err( - |err| Error::ZoneCommand { - intent: "Adding Route".to_string(), - err, - }, - )?; - } + // if let Some(info) = self.inner.sled_info.get() { + // zone.add_default_route(info.underlay_address).map_err( + // |err| Error::ZoneCommand { + // intent: "Adding Route".to_string(), + // err, + // }, + // )?; + // } for service in &request.services { let smfh = SmfHelper::new(&zone, service); @@ -4845,6 +4910,10 @@ impl ServiceManager { let zone_request = SwitchZoneConfigLocal { root, zone: request.clone() }; let zone_args = ZoneArgs::Switch(&zone_request); + info!( + self.inner.log, + "Starting switch zone"; "switch-state" => "first", + ); let zone = self .initialize_zone(zone_args, filesystems, data_links, None) .await?; diff --git a/zone-setup/Cargo.toml b/zone-setup/Cargo.toml index ea51d0a281..05e564df02 100644 --- a/zone-setup/Cargo.toml +++ b/zone-setup/Cargo.toml @@ -16,4 +16,5 @@ sled-hardware-types.workspace = true slog.workspace = true tokio.workspace = true uzers.workspace = true -zone.workspace = true \ No newline at end of file +zone.workspace = true +omicron-sled-agent.workspace = true \ No newline at end of file diff --git a/zone-setup/src/bin/zone-setup.rs b/zone-setup/src/bin/zone-setup.rs index 1dcdf213f0..fb80686e2f 100644 --- a/zone-setup/src/bin/zone-setup.rs +++ b/zone-setup/src/bin/zone-setup.rs @@ -13,8 +13,10 @@ use illumos_utils::svcadm::Svcadm; use illumos_utils::zone::{AddressRequest, Zones}; use omicron_common::cmd::fatal; use omicron_common::cmd::CmdError; +use omicron_common::backoff::{self, BackoffError}; use serde_json::Value; -use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; +// use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; +use omicron_sled_agent::services::SWITCH_ZONE_BASEBOARD_FILE; use slog::{info, Logger}; use std::fs::{ copy, create_dir_all, metadata, read_to_string, set_permissions, write, @@ -29,7 +31,7 @@ use uzers::{get_group_by_name, get_user_by_name}; pub const HOSTS_FILE: &str = "/etc/inet/hosts"; pub const CHRONY_CONFIG_FILE: &str = "/etc/inet/chrony.conf"; pub const LOGADM_CONFIG_FILE: &str = "/etc/logadm.d/chrony.logadm.conf"; -pub const SWITCH_ZONE_BASEBOARD_FILE: &str = "/opt/oxide/baseboard.json"; +//pub const SWITCH_ZONE_BASEBOARD_FILE: &str = "/opt/oxide/baseboard.json"; pub const ROOT: &str = "root"; pub const SYS: &str = "sys"; @@ -626,44 +628,46 @@ async fn switch_zone_setup( } }; +// TODO: This CANNOT happen inside the zone as the vnics live in the global zone + info!(&log, "Ensuring bootstrap address exists in zone"; "bootstrap address" => ?bootstrap_addr, "bootstrap vnic" => ?bootstrap_vnic, "bootstrap address" => ?bootstrap_addr); - let addrtype = - AddressRequest::new_static(std::net::IpAddr::V6(*bootstrap_addr), None); - let addrobj_name = "bootstrap6"; - let addrobj = - AddrObject::new(&bootstrap_vnic, addrobj_name).map_err(|err| { - CmdError::Failure(anyhow!( - "Could not create new addrobj {:?}: {}", - addrobj_name, - err - )) - })?; - // TODO: Fix error - // root@oxz_switch:~# /usr/sbin/ipadm create-addr -t -T static -a fdb0:a8a1:59c7:4e85::2/64 oxBootstrap0/bootstrap6 - // ipadm: Could not create address: Can't assign requested address - let _ = Zones::ensure_address(None, &addrobj, addrtype) - .map_err(|err| { - CmdError::Failure(anyhow!( - "Could not ensure address {} {:?}: {}", - addrobj, - addrtype, - err - )) - })?; - +// let addrtype = +// AddressRequest::new_static(std::net::IpAddr::V6(*bootstrap_addr), None); +// let addrobj_name = "bootstrap6"; +// let addrobj = +// AddrObject::new(&bootstrap_vnic, addrobj_name).map_err(|err| { +// CmdError::Failure(anyhow!( +// "Could not create new addrobj {:?}: {}", +// addrobj_name, +// err +// )) +// })?; +// // TODO: Fix error +// // root@oxz_switch:~# /usr/sbin/ipadm create-addr -t -T static -a fdb0:a8a1:59c7:4e85::2/64 oxBootstrap0/bootstrap6 +// // ipadm: Could not create address: Can't assign requested address +// let _ = Zones::ensure_address(None, &addrobj, addrtype) +// .map_err(|err| { +// CmdError::Failure(anyhow!( +// "Could not ensure address {} {:?}: {}", +// addrobj, +// addrtype, +// err +// )) +// })?; +// info!( &log, "Forwarding bootstrap traffic via {} to {}", bootstrap_name, gz_local_link_addr ); - Route::add_bootstrap_route( - BOOTSTRAP_PREFIX, - *gz_local_link_addr, - &bootstrap_name, - ) - .map_err(|err| CmdError::Failure(anyhow!(err)))?; +// Route::add_bootstrap_route( +// BOOTSTRAP_PREFIX, +// *gz_local_link_addr, +// &bootstrap_name, +// ) +// .map_err(|err| CmdError::Failure(anyhow!(err)))?; Ok(()) } @@ -943,6 +947,7 @@ async fn common_nw_set_up( Ipadm::set_interface_mtu(&datalink) .map_err(|err| CmdError::Failure(anyhow!(err)))?; + // TODO: Log if there are no addresses, or add a flag so that this doesn't run on the switch zone for addr in &static_addrs { if **addr != Ipv6Addr::LOCALHOST { info!(&log, "Ensuring static and auto-configured addresses are set on the IP interface"; "data link" => ?datalink, "static address" => ?addr); @@ -951,9 +956,39 @@ async fn common_nw_set_up( } } - info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway); - Route::ensure_default_route_with_gateway(Gateway::Ipv6(gateway)) - .map_err(|err| CmdError::Failure(anyhow!(err)))?; + + // TODO: Run this enough times to make sure this implementation is solid + // perhaps somehow find out a way to know when the gateway is up? + // perhaps configure this for the switch zone separately? + backoff::retry_notify( + // TODO: Is this the best retry policy? + backoff::retry_policy_local(), + || async { + info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway); + Route::ensure_default_route_with_gateway(Gateway::Ipv6(gateway)) + .or_else(|err| { + // TODO: Only make transient for the following error: + // executed and failed with status: exit status: 128 stdout: add net default: gateway fd00:1122:3344:101::1: Network is unreachable\n stderr: + Err(backoff::BackoffError::transient( + CmdError::Failure(anyhow!(err)), + ) + )}) + }, + |err, delay| { + info!( + &log, + "Cannot ensure there is a default route yet (retrying in {:?})", + delay; + "error" => ?err + ); + }, + ) + .await?; + + // TODO: Route must come after bootstrap + // info!(&log, "Ensuring there is a default route"; "gateway" => ?gateway); + // Route::ensure_default_route_with_gateway(Gateway::Ipv6(gateway)) + // .map_err(|err| CmdError::Failure(anyhow!(err)))?; info!(&log, "Populating hosts file for zone"; "zonename" => ?zonename); let mut hosts_contents = String::from(