From eb8e53919bdfe91d129f0353fef8f88a32afc583 Mon Sep 17 00:00:00 2001 From: Matt Keeter Date: Mon, 5 Feb 2024 16:25:52 -0500 Subject: [PATCH] Multi region packing (#1599) This PR modifies the build system to let flash use all available MPU regions, resulting in tighter packing and smaller total image size. This shrinks the `sidecar-rev-c` image by 256 KiB, and the `gimlet-f` image by 188 KiB. In other words, tasks go from having exactly 1 flash region to _at least_ 1 flash region; this is mostly plumbing that change from `suggest_memory_region_size` all the way through the `kconfig`. The change does makes task packing trickier, because tasks can be placed in one of two orientations: largest-chunk-first, or largest-chunk-last. I went for a very dumb O(N^2) algorithm that checks every unplaced task and picks the best; we're far from having > 100 tasks, so I'm not worried about bad scaling (famous last words!). In addition, it updates `xtask/src/sizes.rs` to print the individual chunks when the `-v` flag is specified. --- app/sidecar/base.toml | 2 +- build/kconfig/src/lib.rs | 20 +- build/xtask/src/clippy.rs | 9 +- build/xtask/src/config.rs | 85 +++++- build/xtask/src/dist.rs | 529 +++++++++++++++++++++++++++++--------- build/xtask/src/main.rs | 12 +- build/xtask/src/sizes.rs | 189 ++++++++++---- sys/kern/build.rs | 60 ++++- 8 files changed, 697 insertions(+), 209 deletions(-) diff --git a/app/sidecar/base.toml b/app/sidecar/base.toml index e9ce78840..029799ca8 100644 --- a/app/sidecar/base.toml +++ b/app/sidecar/base.toml @@ -6,7 +6,7 @@ fwid = true [kernel] name = "sidecar" -requires = {flash = 24600, ram = 6256} +requires = {flash = 25184, ram = 6256} features = ["dump"] [caboose] diff --git a/build/kconfig/src/lib.rs b/build/kconfig/src/lib.rs index 2b388ade0..5aaed33e9 100644 --- a/build/kconfig/src/lib.rs +++ b/build/kconfig/src/lib.rs @@ -51,7 +51,7 @@ pub struct TaskConfig { /// /// The name is the "output" assignment that generated this region, /// typically (but not necessarily!) either `"ram"` or `"flash"`. - pub owned_regions: BTreeMap, + pub owned_regions: BTreeMap, /// Names of regions (in the app-level `shared_regions`) that this task /// needs access to. @@ -117,6 +117,24 @@ pub struct RegionConfig { pub attributes: RegionAttributes, } +/// Description of one memory span containing multiple adjacent regions +/// +/// This is equivalent to [`RegionConfig`], but represents a single memory span +/// that should be configured as multiple regions in the MPU. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct MultiRegionConfig { + /// Address of start of region. The platform likely has alignment + /// requirements for this; it must meet them. (For example, on ARMv7-M, it + /// must be naturally aligned for the size.) + pub base: u32, + /// Size of region, in bytes for each chunk. The platform likely has + /// alignment requirements for this; it must meet them. (For example, on + /// ARMv7-M, it must be a power of two greater than 16.) + pub sizes: Vec, + /// Flags describing what can be done with this region. + pub attributes: RegionAttributes, +} + #[derive(Copy, Clone, Debug, Serialize, Deserialize)] pub struct RegionAttributes { /// Region can be read by tasks that include it. diff --git a/build/xtask/src/clippy.rs b/build/xtask/src/clippy.rs index cea586776..6593a184f 100644 --- a/build/xtask/src/clippy.rs +++ b/build/xtask/src/clippy.rs @@ -5,7 +5,6 @@ use std::path::PathBuf; use anyhow::{bail, Result}; -use indexmap::IndexMap; use crate::config::Config; @@ -49,8 +48,10 @@ pub fn run( let build_config = if name == "kernel" { // Build dummy allocations for each task - let fake_sizes: IndexMap<_, _> = - [("flash", 64), ("ram", 64)].into_iter().collect(); + let fake_sizes = crate::dist::TaskRequest { + memory: [("flash", 64), ("ram", 64)].into_iter().collect(), + spare_regions: 0, + }; let task_sizes = toml .tasks .keys() @@ -71,7 +72,7 @@ pub fn run( let mut entry_points: std::collections::HashMap<_, _> = allocs .tasks .iter() - .map(|(k, v)| (k.clone(), v["flash"].start)) + .map(|(k, v)| (k.clone(), v["flash"].start())) .collect(); // add a dummy caboose point diff --git a/build/xtask/src/config.rs b/build/xtask/src/config.rs index ef2d2f2cd..db593073a 100644 --- a/build/xtask/src/config.rs +++ b/build/xtask/src/config.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use std::collections::{hash_map::DefaultHasher, BTreeMap, BTreeSet}; +use std::collections::{hash_map::DefaultHasher, BTreeMap, BTreeSet, VecDeque}; use std::hash::Hasher; use std::ops::Range; use std::path::{Path, PathBuf}; @@ -461,15 +461,24 @@ impl Config { } /// Suggests an appropriate size for the given task (or "kernel"), given - /// its true size. The size depends on MMU implementation, dispatched - /// based on the `target` in the config file. - pub fn suggest_memory_region_size(&self, name: &str, size: u64) -> u64 { + /// its true size and a number of available regions. The size depends on + /// MMU implementation, dispatched based on the `target` in the config file. + /// + /// The returned `Vec` always has the largest value first. + pub fn suggest_memory_region_size( + &self, + name: &str, + size: u64, + regions: usize, + ) -> VecDeque { match name { "kernel" => { // Nearest chunk of 16 - ((size + 15) / 16) * 16 + [((size + 15) / 16) * 16].into_iter().collect() } - _ => self.mpu_alignment().suggest_memory_region_size(size), + _ => self + .mpu_alignment() + .suggest_memory_region_size(size, regions), } } @@ -525,10 +534,68 @@ enum MpuAlignment { impl MpuAlignment { /// Suggests a minimal memory region size fitting the given number of bytes - fn suggest_memory_region_size(&self, size: u64) -> u64 { + /// + /// If multiple regions are available, then we may use them for efficiency. + /// The resulting `Vec` is guaranteed to have the largest value first. + fn suggest_memory_region_size( + &self, + mut size: u64, + regions: usize, + ) -> VecDeque { match self { - MpuAlignment::PowerOfTwo => size.next_power_of_two(), - MpuAlignment::Chunk(c) => ((size + c - 1) / c) * c, + MpuAlignment::PowerOfTwo => { + const MIN_MPU_REGION_SIZE: u64 = 32; + let mut out = VecDeque::new(); + for _ in 0..regions { + let s = + (size.next_power_of_two() / 2).max(MIN_MPU_REGION_SIZE); + out.push_back(s); + size = size.saturating_sub(s); + if size == 0 { + break; + } + } + if size > 0 { + if let Some(s) = out.back_mut() { + *s *= 2; + } else { + out.push_back(size.next_power_of_two()); + } + } + // Merge duplicate regions at the end + while out.len() >= 2 { + let n = out.len(); + if out[n - 1] == out[n - 2] { + out.pop_back(); + *out.back_mut().unwrap() *= 2; + } else { + break; + } + } + // Split the initial (largest) region into as many smaller + // regions as we can fit. This doesn't change total size, but + // can make alignment more flexible, since smaller regions have + // less stringent alignment requirements. + while out[0] > MIN_MPU_REGION_SIZE { + let largest = out[0]; + let n = out.iter().filter(|c| **c == largest).count(); + if out.len() + n > regions { + break; + } + // Replace `n` instances of `largest` at the start of `out` + // with `n * 2` instances of `largest / 2` + for _ in 0..n { + out.pop_front(); + } + for _ in 0..n * 2 { + out.push_front(largest / 2); + } + } + out + } + MpuAlignment::Chunk(c) => { + [((size + c - 1) / c) * c].into_iter().collect() + } } } /// Returns the desired alignment for a region of a particular size diff --git a/build/xtask/src/dist.rs b/build/xtask/src/dist.rs index fc83779fd..033ceb119 100644 --- a/build/xtask/src/dist.rs +++ b/build/xtask/src/dist.rs @@ -210,6 +210,99 @@ pub fn list_tasks(app_toml: &Path) -> Result<()> { /// Represents allocations and free spaces for a particular image type AllocationMap = (Allocations, IndexMap>); +/// Module to prevent people from messing with invariants of checked types +mod checked_types { + use super::*; + + /// Simple data structure to store a set of guaranteed-contiguous ranges + /// + /// This will panic if you violate that constraint! + #[derive(Debug, Clone, Default, Hash)] + pub struct ContiguousRanges(Vec>); + impl ContiguousRanges { + pub fn new(r: Range) -> Self { + Self(vec![r]) + } + pub fn iter(&self) -> impl Iterator> { + self.0.iter() + } + pub fn start(&self) -> u32 { + self.0.first().unwrap().start + } + pub fn end(&self) -> u32 { + self.0.last().unwrap().end + } + pub fn contains(&self, v: &u32) -> bool { + (self.start()..self.end()).contains(v) + } + pub fn push(&mut self, r: Range) { + if let Some(t) = &self.0.last() { + assert_eq!(t.end, r.start, "ranges must be contiguous"); + } + self.0.push(r) + } + } + + impl<'a> IntoIterator for &'a ContiguousRanges { + type Item = &'a Range; + type IntoIter = std::slice::Iter<'a, Range>; + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } + } + + /// Simple wrapper data structure that enforces that values are decreasing + /// + /// Each value must be the same or smaller than the previous value + #[derive(Debug)] + pub struct OrderedVecDeque { + data: VecDeque, + } + impl OrderedVecDeque { + pub fn new() -> Self { + Self { + data: VecDeque::new(), + } + } + pub fn iter(&self) -> impl Iterator { + self.data.iter() + } + pub fn into_iter( + self, + ) -> impl Iterator + DoubleEndedIterator { + self.data.into_iter() + } + pub fn front(&self) -> Option<&u32> { + self.data.front() + } + pub fn pop_front(&mut self) -> Option { + self.data.pop_front() + } + pub fn push_front(&mut self, v: u32) { + if let Some(f) = self.front() { + assert!(v >= *f); + } + self.data.push_front(v) + } + pub fn back(&self) -> Option<&u32> { + self.data.back() + } + pub fn push_back(&mut self, v: u32) { + if let Some(f) = self.back() { + assert!(v <= *f); + } + self.data.push_back(v) + } + } + impl From for VecDeque { + fn from(v: OrderedVecDeque) -> Self { + v.data + } + } +} +// Republish these types for widespread availability +pub use checked_types::{ContiguousRanges, OrderedVecDeque}; + pub fn package( verbose: bool, edges: bool, @@ -279,9 +372,35 @@ pub fn package( }) .collect::>()?; + // Build a set of requests for the memory allocator + let mut task_reqs = HashMap::new(); + for (t, sz) in task_sizes { + let n = sz.len() + + cfg + .toml + .extern_regions_for(t, &cfg.toml.image_names[0]) + .unwrap() + .len() + + cfg.toml.tasks.get(t).unwrap().uses.len() + + cfg + .toml + .caboose + .as_ref() + .map(|c| c.tasks.contains(&t.to_string())) + .unwrap_or(false) as usize; + + task_reqs.insert( + t, + TaskRequest { + memory: sz, + spare_regions: 7 - n, + }, + ); + } + // Allocate memories. let allocated = - allocate_all(&cfg.toml, &task_sizes, cfg.toml.caboose.as_ref())?; + allocate_all(&cfg.toml, &task_reqs, cfg.toml.caboose.as_ref())?; for image_name in &cfg.toml.image_names { // Build each task. @@ -330,7 +449,7 @@ pub fn package( task_entry_point(&cfg, name, image_name) } else { // Dummy entry point - Ok(allocs.tasks[name]["flash"].start) + Ok(allocs.tasks[name]["flash"].start()) }; ep.map(|ep| (name.clone(), ep)) }) @@ -976,6 +1095,7 @@ fn link_dummy_task( .toml .memories(&cfg.toml.image_names[0])? .into_iter() + .map(|(name, r)| (name, ContiguousRanges::new(r))) .collect(); let extern_regions = cfg.toml.extern_regions_for(name, image_name)?; @@ -1290,7 +1410,7 @@ fn check_task_priorities(toml: &Config) -> Result<()> { fn generate_task_linker_script( name: &str, - map: &BTreeMap>, + map: &BTreeMap, sections: Option<&IndexMap>, stacksize: u32, images: &IndexMap>, @@ -1310,9 +1430,9 @@ fn generate_task_linker_script( } writeln!(linkscr, "MEMORY\n{{")?; - for (name, range) in map { - let mut start = range.start; - let end = range.end; + for (name, ranges) in map { + let mut start = ranges.start(); + let end = ranges.end(); let name = name.to_ascii_uppercase(); // Our stack comes out of RAM @@ -1735,7 +1855,11 @@ pub struct Allocations { /// Map from memory-name to address-range pub kernel: BTreeMap>, /// Map from task-name to memory-name to address-range - pub tasks: BTreeMap>>, + /// + /// A task may have multiple address ranges in the same memory space for + /// efficient packing; if this is the case, the addresses will be contiguous + /// and each individual range will respect MPU requirements. + pub tasks: BTreeMap>, /// Optional trailing caboose, located in the given region pub caboose: Option<(String, Range)>, } @@ -1761,6 +1885,18 @@ impl Allocations { } } +/// A set of memory requests from a single task +#[derive(Debug, Clone)] +pub struct TaskRequest<'a> { + /// Memory requests, as a map from memory name -> size + pub memory: IndexMap<&'a str, u64>, + + /// Number of extra regions available for more efficient packing + /// + /// If this is zero, then each request in `memory` can only use 1 region + pub spare_regions: usize, +} + /// Allocates address space from all regions for the kernel and all tasks. /// /// The allocation strategy is slightly involved, because of the limitations of @@ -1797,7 +1933,7 @@ impl Allocations { /// requests per alignment size. pub fn allocate_all( toml: &Config, - task_sizes: &HashMap<&str, IndexMap<&str, u64>>, + task_sizes: &HashMap<&str, TaskRequest>, caboose: Option<&CabooseConfig>, ) -> Result> { // Collect all allocation requests into queues, one per memory type, indexed @@ -1807,7 +1943,7 @@ pub fn allocate_all( // We keep kernel and task requests separate so we can always service the // kernel first. // - // The task map is: memory name -> allocation size -> queue of task name. + // The task map is: memory name -> task name -> requested regions // The kernel map is: memory name -> allocation size let kernel = &toml.kernel; let tasks = &toml.tasks; @@ -1821,105 +1957,53 @@ pub fn allocate_all( let mut free = toml.memories(image_name)?; let kernel_requests = &kernel.requires; - let mut task_requests: BTreeMap<&str, BTreeMap>> = + let mut task_requests: BTreeMap<&str, IndexMap<&str, OrderedVecDeque>> = BTreeMap::new(); for name in tasks.keys() { - for (mem, amt) in task_sizes[name.as_str()].iter() { - let bytes = toml.suggest_memory_region_size(name, *amt); + let req = &task_sizes[name.as_str()]; + for (&mem, &amt) in req.memory.iter() { + // Right now, flash is most limited, so it gets to use all of + // our spare regions (if present) + let n = if mem == "flash" { + req.spare_regions + 1 + } else { + 1 + }; + let bytes = toml.suggest_memory_region_size(name, amt, n); if let Some(r) = tasks[name].max_sizes.get(&mem.to_string()) { - if bytes > *r as u64 { + let total_bytes = bytes.iter().sum::(); + if total_bytes > u64::from(*r) { bail!( "task {}: needs {} bytes of {} but max-sizes limits it to {}", - name, bytes, mem, r); + name, total_bytes, mem, r); } } + // Convert from u64 -> u32 + let mut bs = OrderedVecDeque::new(); + for b in bytes { + bs.push_back(b.try_into().unwrap()); + } task_requests .entry(mem) .or_default() - .entry(bytes.try_into().unwrap()) - .or_default() - .push_back(name.as_str()); + .insert(name.as_str(), bs); } } // Okay! Do memory types one by one, fitting kernel first. for (region, avail) in &mut free { let mut k_req = kernel_requests.get(region.as_str()); - let mut t_reqs = task_requests.get_mut(region.as_str()); - - fn reqs_map_not_empty( - om: &Option<&mut BTreeMap>>, - ) -> bool { - om.iter() - .flat_map(|map| map.values()) - .any(|q| !q.is_empty()) - } - - 'fitloop: while k_req.is_some() || reqs_map_not_empty(&t_reqs) { - let align = if avail.start == 0 { - // Lie to keep the masks in range. This could be avoided by - // tracking log2 of masks rather than masks. - 1 << 31 - } else { - 1 << avail.start.trailing_zeros() - }; - - // Search order is: - // - Kernel. - // - Task requests equal to or smaller than this alignment, in - // descending order of size. - // - Task requests larger than this alignment, in ascending - // order of size. - - if let Some(&sz) = k_req.take() { - // The kernel wants in on this. - allocs.kernel.insert( - region.to_string(), - allocate_k(region, sz, avail)?, - ); - continue 'fitloop; - } - - if let Some(t_reqs) = t_reqs.as_mut() { - for (&sz, q) in t_reqs.range_mut(..=align).rev() { - if let Some(task) = q.pop_front() { - // We can pack an equal or smaller one in. - let align = toml.task_memory_alignment(sz); - allocs - .tasks - .entry(task.to_string()) - .or_default() - .insert( - region.to_string(), - allocate_one(region, sz, align, avail)?, - ); - continue 'fitloop; - } - } - - for (&sz, q) in t_reqs.range_mut(align + 1..) { - if let Some(task) = q.pop_front() { - // We've gotta use a larger one. - let align = toml.task_memory_alignment(sz); - allocs - .tasks - .entry(task.to_string()) - .or_default() - .insert( - region.to_string(), - allocate_one(region, sz, align, avail)?, - ); - continue 'fitloop; - } - } - } - - // If we reach this point, it means our loop condition is wrong, - // because one of the above things should really have happened. - // Panic here because otherwise it's a hang. - panic!("loop iteration without progess made!"); - } + let t_reqs = task_requests.get_mut(region.as_str()); + let mut t_reqs_empty = IndexMap::new(); + allocate_region( + region, + toml, + &mut k_req, + t_reqs.unwrap_or(&mut t_reqs_empty), + avail, + &mut allocs, + )?; } if let Some(caboose) = caboose { @@ -1944,6 +2028,195 @@ pub fn allocate_all( Ok(result) } +fn allocate_region( + region: &str, + toml: &Config, + k_req: &mut Option<&u32>, + t_reqs: &mut IndexMap<&str, OrderedVecDeque>, + avail: &mut Range, + allocs: &mut Allocations, +) -> Result<()> { + // The kernel gets to go first! + if let Some(&sz) = k_req.take() { + allocs + .kernel + .insert(region.to_string(), allocate_k(region, sz, avail)?); + } + + while !t_reqs.is_empty() { + // At this point, we need to find a task that fits based on our existing + // alignment. This is tricky, because -- for efficient packing -- we + // allow tasks to span multiple regions. For example, a task could look + // like this: + // + // 4444221 + // + // representing three regions of size 4, 2, 1. + // + // Such a task could be placed in two ways: + // + // |4444221 ("forward") + // 122|4444 ("reverse") + // | where this line is the alignment for the largest chunk + + #[derive(Debug)] + enum Direction { + Forward, + Reverse, + } + #[derive(Debug)] + struct Match<'a> { + gap: u32, + align: u32, + size: u32, + name: &'a str, + dir: Direction, + } + impl<'a> Match<'a> { + /// Updates our "current best" with new values, if they're better + /// + /// Our policy is to rank by + /// 1) smallest gap required, and then + /// 2) largest resulting alignment + /// 3) smallest task size (for backwards compatibility) + fn update( + &mut self, + gap: u32, + align: u32, + size: u32, + name: &'a str, + dir: Direction, + ) { + // Ignore any gap that's < 1/8 of final alignment, since that's + // "close enough" + let gap_m = gap.saturating_sub(align / 8); + let our_gap_m = self.gap.saturating_sub(self.align / 8); + if gap_m < our_gap_m + || (gap_m == our_gap_m && align > self.align) + || gap < self.gap + || (gap == self.gap && align > self.align) + || (gap == self.gap + && align == self.align + && size < self.size) + { + self.gap = gap; + self.align = align; + self.size = size; + self.name = name; + self.dir = dir; + } + } + } + + let mut best = Match { + gap: u32::MAX, + align: 0, + size: u32::MAX, + name: "", + dir: Direction::Forward, + }; + + for (&task_name, mem) in t_reqs.iter() { + let align = toml.task_memory_alignment(*mem.front().unwrap()); + let size_mask = align - 1; + + // Place the chunk using reverse orientation, with padding if it's + // not aligned. The alignment (for scoring purposes) is the + // alignment of the largest region, since that's last in memory. + let total_size: u32 = mem.iter().sum(); + let base = (avail.start + total_size + size_mask) & !size_mask; + let gap_reverse = base - avail.start - total_size; + best.update( + gap_reverse, + align, + total_size, + task_name, + Direction::Reverse, + ); + + // Place the chunk using forward orientation, with padding if it's + // not aligned. The alignment (for scoring purposes) is the + // alignment of the last region, since that may be worse than the + // starting alignment. + let base = (avail.start + size_mask) & !size_mask; + let gap_forward = base - avail.start; + best.update( + gap_forward, + toml.task_memory_alignment(*mem.back().unwrap()), + total_size, + task_name, + Direction::Forward, + ); + } + let Some(sizes) = t_reqs.remove(best.name) else { + panic!("could not find a task"); + }; + + // Prepare to pack values either forward or reverse + // + // At this point, we drop the "values must be ordered" constraint, + // because we may combine adjacent regions to reduce the total region + // count. This could violate the ordering constraint, but is still + // valid from the MPU's perspective. + let mut sizes: VecDeque = match best.dir { + Direction::Forward => sizes.into(), + Direction::Reverse => sizes.into_iter().rev().collect(), + }; + avail.start += best.gap; + + while let Some(mut size) = sizes.pop_front() { + // When building the size list, we split the largest size to reduce + // alignment requirements. Now, we try to merge them again, to + // reduce the number of regions stored in the kernel's flash. + // + // For example, [256, 256, 64] => [512, 64] if the initial position + // is aligned for a 512-byte region. + let mut n = sizes.iter().filter(|s| **s == size).count() + 1; + if n > 1 { + n &= !1; // only consider an even number of regions + let possible_align = toml.task_memory_alignment(size * 2); + if avail.start & (possible_align - 1) == 0 { + size *= 2; + for _ in 0..n - 1 { + sizes.pop_front(); + } + for _ in 0..n / 2 - 1 { + sizes.push_front(size); + } + } + } + + // We do our own alignment management, so assert that we haven't + // messed it up: + let align = toml.task_memory_alignment(size); + assert!(avail.start & (align - 1) == 0); + + allocs + .tasks + .entry(best.name.to_string()) + .or_default() + .entry(region.to_string()) + .or_default() + .push(allocate_one(region, size, align, avail)?); + } + + // Check that our allocations are all aligned and contiguous + let mut prev: Option> = None; + for r in &allocs.tasks[best.name][region] { + if let Some(prev) = prev { + assert_eq!(prev.end, r.start); + } + let size = r.end - r.start; + assert!(size >= 32); // minimum MPU size + let align = toml.task_memory_alignment(size); + assert!(r.start.trailing_zeros() >= align.trailing_zeros()); + prev = Some(r.clone()); + } + } + + Ok(()) +} + fn allocate_k( region: &str, size: u32, @@ -2003,7 +2276,7 @@ fn allocate_one( /// system. pub fn make_kconfig( toml: &Config, - task_allocations: &BTreeMap>>, + task_allocations: &BTreeMap>, entry_points: &HashMap, image_name: &str, ) -> Result { @@ -2084,7 +2357,7 @@ pub fn make_kconfig( let flash = &task_allocations[name]["flash"]; let entry_offset = if flash.contains(&entry_points[name]) { - entry_points[name] - flash.start + entry_points[name] - flash.start() } else { bail!( "entry point {:#x} is not in flash range {:#x?}", @@ -2111,45 +2384,47 @@ pub fn make_kconfig( } let extern_regions = toml.extern_regions_for(name, image_name)?; - let owned_regions = task_allocations[name] + let mut owned_regions = BTreeMap::new(); + for (out_name, range) in task_allocations[name] .iter() + .flat_map(|(name, chunks)| chunks.iter().map(move |c| (name, c))) .chain(extern_regions.iter()) - .map(|(out_name, range)| { - // Look up region for this image - let mut regions = toml.outputs[out_name] - .iter() - .filter(|o| &o.name == image_name); - let out = regions.next().expect("no region for name"); - if regions.next().is_some() { - bail!("multiple {out_name} regions for name {image_name}"); - } - let size = range.end - range.start; - if p2_required && !size.is_power_of_two() { - bail!( - "memory region for task '{name}' output '{out_name}' \ - is required to be a power of two, but has size {size}" - ); - } + { + // Look up region for this image + let mut regions = toml.outputs[out_name] + .iter() + .filter(|o| &o.name == image_name); + let out = regions.next().expect("no region for name"); + if regions.next().is_some() { + bail!("multiple {out_name} regions for name {image_name}"); + } + let size = range.end - range.start; + if p2_required && !size.is_power_of_two() { + bail!( + "memory region for task '{name}' output '{out_name}' \ + is required to be a power of two, but has size {size}" + ); + } - Ok(( - out_name.to_string(), - build_kconfig::RegionConfig { - base: range.start, - size, - attributes: build_kconfig::RegionAttributes { - read: out.read, - write: out.write, - execute: out.execute, - special_role: if out.dma { - Some(build_kconfig::SpecialRole::Dma) - } else { - None - }, + owned_regions + .entry(out_name.to_string()) + .or_insert(build_kconfig::MultiRegionConfig { + base: range.start, + sizes: vec![], + attributes: build_kconfig::RegionAttributes { + read: out.read, + write: out.write, + execute: out.execute, + special_role: if out.dma { + Some(build_kconfig::SpecialRole::Dma) + } else { + None }, }, - )) - }) - .collect::, _>>()?; + }) + .sizes + .push(size); + } tasks.push(build_kconfig::TaskConfig { owned_regions, diff --git a/build/xtask/src/main.rs b/build/xtask/src/main.rs index 4ebb116fb..294c00121 100644 --- a/build/xtask/src/main.rs +++ b/build/xtask/src/main.rs @@ -92,9 +92,9 @@ enum Xtask { /// Runs `xtask dist` and reports the sizes of resulting tasks Sizes { - /// Request verbosity from tools we shell out to. - #[clap(short)] - verbose: bool, + /// `-v` shows chunk sizes; `-vv` makes build verbose + #[clap(short, action = clap::ArgAction::Count)] + verbose: u8, /// Path to the image configuration file, in TOML. cfg: PathBuf, @@ -248,7 +248,7 @@ fn run(xtask: Xtask) -> Result<()> { } => { let allocs = dist::package(verbose, edges, &cfg, None, dirty)?; for (_, (a, _)) in allocs { - sizes::run(&cfg, &a, true, false, false)?; + sizes::run(&cfg, &a, true, false, false, false)?; } } Xtask::Build { @@ -289,9 +289,9 @@ fn run(xtask: Xtask) -> Result<()> { save, dirty, } => { - let allocs = dist::package(verbose, false, &cfg, None, dirty)?; + let allocs = dist::package(verbose >= 2, false, &cfg, None, dirty)?; for (_, (a, _)) in allocs { - sizes::run(&cfg, &a, false, compare, save)?; + sizes::run(&cfg, &a, false, compare, save, verbose >= 1)?; } } Xtask::Humility { args } => { diff --git a/build/xtask/src/sizes.rs b/build/xtask/src/sizes.rs index 48f3c962f..6970c74db 100644 --- a/build/xtask/src/sizes.rs +++ b/build/xtask/src/sizes.rs @@ -8,14 +8,14 @@ use std::io::Write; use std::path::Path; use std::process; -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use colored::*; use goblin::Object; use indexmap::map::Entry; use indexmap::IndexMap; use crate::{ - dist::{Allocations, DEFAULT_KERNEL_STACK}, + dist::{Allocations, ContiguousRanges, DEFAULT_KERNEL_STACK}, Config, }; @@ -34,6 +34,7 @@ pub fn run( only_suggest: bool, compare: bool, save: bool, + verbose: bool, ) -> Result<()> { let toml = Config::from_file(cfg)?; let sizes = create_sizes(&toml)?; @@ -63,7 +64,7 @@ pub fn run( // Print detailed sizes relative to usage if !only_suggest { let map = build_memory_map(&toml, &sizes, allocs)?; - print_memory_map(&toml, &map)?; + print_memory_map(&toml, &map, verbose)?; print!("\n\n"); print_task_table(&toml, &map)?; } @@ -78,7 +79,10 @@ pub fn run( } let size = toml.kernel.requires[&mem.to_string()]; - let suggestion = toml.suggest_memory_region_size("kernel", used); + let suggestion = toml.suggest_memory_region_size("kernel", used, 1); + assert_eq!(suggestion.len(), 1, "kernel should not be > 1 region"); + let suggestion = suggestion[0]; + if suggestion >= size as u64 { continue; } @@ -117,10 +121,10 @@ enum Recommended { FixedSize(u32), MaxSize(u32), } -#[derive(Copy, Clone, Debug)] +#[derive(Clone, Debug)] struct MemoryChunk<'a> { used_size: u64, - total_size: u32, + total_size: Vec, owner: &'a str, recommended: Option, } @@ -145,11 +149,17 @@ fn build_memory_map<'a>( .chain(std::iter::once(( "kernel", toml.kernel.requires.clone(), - allocs.kernel.clone(), + allocs + .kernel + .iter() + .map(|(name, v)| { + (name.to_owned(), ContiguousRanges::new(v.clone())) + }) + .collect(), ))) .chain(allocs.caboose.iter().map(|(region, size)| { let mut alloc = BTreeMap::new(); - alloc.insert(region.clone(), size.clone()); + alloc.insert(region.clone(), ContiguousRanges::new(size.clone())); let mut requires = IndexMap::new(); requires .insert(region.clone(), toml.caboose.as_ref().unwrap().size); @@ -164,10 +174,10 @@ fn build_memory_map<'a>( } let alloc = &alloc[&mem_name.to_string()]; map.entry(mem_name).or_default().insert( - alloc.start, + alloc.start(), MemoryChunk { used_size: used, - total_size: alloc.end - alloc.start, + total_size: alloc.iter().map(|v| v.end - v.start).collect(), owner: name, recommended: requires .get(mem_name.to_owned()) @@ -198,7 +208,7 @@ fn print_task_table( let mem_pad = map .values() .flat_map(|m| m.values()) - .map(|c| format!("{}", c.total_size).len()) + .map(|c| format!("{}", c.total_size.iter().sum::()).len()) .chain(std::iter::once(4)) .max() .unwrap_or(0) as usize; @@ -215,7 +225,9 @@ fn print_task_table( .map(|(region, map)| { ( *region, - map.iter().map(|(_, chunk)| (chunk.owner, *chunk)).collect(), + map.iter() + .map(|(_, chunk)| (chunk.owner, chunk.clone())) + .collect(), ) }) .collect(); @@ -247,7 +259,7 @@ fn print_task_table( print!( "{:(), mem = mem_pad, ); match chunk.recommended { @@ -265,6 +277,7 @@ fn print_task_table( fn print_memory_map( toml: &Config, map: &BTreeMap<&str, BTreeMap>, + verbose: bool, ) -> Result<()> { let task_pad = toml .tasks @@ -277,61 +290,133 @@ fn print_memory_map( let mem_pad = map .values() .flat_map(|m| m.values()) - .map(|c| format!("{}", c.total_size).len()) + .map(|c| format!("{}", c.total_size.iter().sum::()).len()) .max() .unwrap_or(0) as usize; for (mem_name, map) in map { println!("\n{}:", mem_name); - println!( - " ADDRESS | {:^task$} | {:>mem$} | {:>mem$} | LIMIT", + if verbose { + println!( + " ADDRESS | {:^task$} | {:>mem$} | {:>mem$} | {:>mem$} | LIMIT", "PROGRAM", "USED", "SIZE", + "CHUNKS", task = task_pad, mem = mem_pad, ); + } else { + println!( + " ADDRESS | {:^task$} | {:>mem$} | {:>mem$} | LIMIT", + "PROGRAM", + "USED", + "SIZE", + task = task_pad, + mem = mem_pad, + ); + } let next = map.keys().skip(1).map(Some).chain(std::iter::once(None)); for ((start, chunk), next) in map.iter().zip(next) { - print!(" {:#010x} | ", start); - print!( - "{:mem$} | {:>mem$} | ", - chunk.owner, - chunk.used_size, - chunk.total_size, - size = task_pad, - mem = mem_pad, - ); - match chunk.recommended { - None => print!("(auto)"), - Some(Recommended::MaxSize(m)) => print!("{}", m), - Some(Recommended::FixedSize(_)) => print!("(fixed)"), + for (i, mem) in chunk.total_size.iter().enumerate() { + print!( + " {:#010x} | ", + start + chunk.total_size[0..i].iter().sum::() + ); + if verbose { + if i == 0 { + print!( + "{:mem$} | {:>mem$} | {:>mem$} | ", + chunk.owner, + chunk.used_size, + chunk.total_size.iter().sum::(), + mem, + size = task_pad, + mem = mem_pad, + ); + } else { + print!( + "{:mem$} | {:>mem$} | {:>mem$} | ", + "", + "", + "", + mem, + size = task_pad, + mem = mem_pad, + ); + } + } else { + print!( + "{:mem$} | {:>mem$} | ", + chunk.owner, + chunk.used_size, + chunk.total_size.iter().sum::(), + size = task_pad, + mem = mem_pad, + ); + } + if i == 0 { + match chunk.recommended { + None => print!("(auto)"), + Some(Recommended::MaxSize(m)) => print!("{}", m), + Some(Recommended::FixedSize(_)) => print!("(fixed)"), + } + } + println!(); + // Only print the header if we're not being verbose + if !verbose { + break; + } } - println!(); // Print padding, if relevant + let chunk_size = chunk.total_size.iter().sum::(); if let Some(&next) = next { - if next != start + chunk.total_size { - print!(" {:#010x} | ", start + chunk.total_size); + if next != start + chunk_size { + print!(" {:#010x} | ", start + chunk_size); + if verbose { + println!( + "{:mem$} | {:>mem$} | {:>mem$} |", + "-padding-", + "--", + next - (start + chunk_size), + "--", + size = task_pad, + mem = mem_pad, + ); + } else { + println!( + "{:mem$} | {:>mem$} | ", + "-padding-", + "--", + next - (start + chunk_size), + size = task_pad, + mem = mem_pad, + ); + } + } + } else { + print!(" {:#010x} | ", start + chunk_size); + if verbose { + println!( + "{:mem$} | {:>mem$} | {:>mem$} | ", + "--end--", + "", + "", + "", + size = task_pad, + mem = mem_pad, + ); + } else { println!( "{:mem$} | {:>mem$} | ", - "-padding-", - "--", - next - (start + chunk.total_size), + "--end--", + "", + "", size = task_pad, mem = mem_pad, ); } - } else { - print!(" {:#010x} | ", start + chunk.total_size); - println!( - "{:mem$} | {:>mem$} | ", - "--end--", - "", - "", - size = task_pad, - mem = mem_pad, - ); } } } @@ -371,25 +456,25 @@ pub fn load_task_size<'a>( let r = memory_sizes.entry(region).or_insert_with(|| start..end); r.start = r.start.min(start); r.end = r.end.max(end); - true + Ok(()) } else { - false + bail!("could not find region at {start}"); } }; for phdr in &elf.program_headers { if phdr.p_type != goblin::elf::program_header::PT_LOAD { continue; } - record_size(phdr.p_vaddr, phdr.p_memsz); + record_size(phdr.p_vaddr, phdr.p_memsz)?; // If the VirtAddr disagrees with the PhysAddr, then this is a // section which is relocated into RAM, so we also accumulate // its FileSiz in the physical address (which is presumably // flash). - if phdr.p_vaddr != phdr.p_paddr - && !record_size(phdr.p_paddr, phdr.p_filesz) - { - bail!("Failed to remap relocated section at {}", phdr.p_paddr); + if phdr.p_vaddr != phdr.p_paddr { + record_size(phdr.p_paddr, phdr.p_filesz).with_context(|| { + format!("Failed to remap relocated section at {}", phdr.p_paddr) + })?; } } diff --git a/sys/kern/build.rs b/sys/kern/build.rs index ad78672cd..fd7cbbf51 100644 --- a/sys/kern/build.rs +++ b/sys/kern/build.rs @@ -33,7 +33,20 @@ struct Generated { enum RegionKey { Null, Shared(String), - Owned(usize, String), + Owned { + /// Index of the task, based on ordering in the kconfig + task_index: usize, + + /// Index of this particular region within the task + /// + /// Each task's memory span can be built from multiple contiguous MPU + /// regions; if that's the case, then `chunk_index` varies. The region + /// with `chunk_index == 0` is at the base address. + chunk_index: usize, + + /// Name of memory which we're using for this region + memory_name: String, + }, } fn process_config() -> Result { @@ -77,7 +90,23 @@ fn process_config() -> Result { // Finally, the task-specific regions. for (i, task) in kconfig.tasks.iter().enumerate() { for (name, region) in &task.owned_regions { - region_table.insert(RegionKey::Owned(i, name.clone()), *region); + let mut base = region.base; + for (j, &size) in region.sizes.iter().enumerate() { + let r = RegionConfig { + base, + size, + attributes: region.attributes, + }; + base += size; + region_table.insert( + RegionKey::Owned { + task_index: i, + chunk_index: j, + memory_name: name.clone(), + }, + r, + ); + } } } @@ -95,12 +124,18 @@ fn process_config() -> Result { region_table.get_index_of(&RegionKey::Null).unwrap(), ]; - for name in task.owned_regions.keys() { - regions.push( - region_table - .get_index_of(&RegionKey::Owned(i, name.clone())) - .unwrap(), - ); + for (name, region) in &task.owned_regions { + for j in 0..region.sizes.len() { + regions.push( + region_table + .get_index_of(&RegionKey::Owned { + task_index: i, + chunk_index: j, + memory_name: name.clone(), + }) + .unwrap(), + ); + } } for name in &task.shared_regions { @@ -292,7 +327,14 @@ fn translate_address( task_index: usize, address: OwnedAddress, ) -> u32 { - let key = RegionKey::Owned(task_index, address.region_name); + // Addresses within a particular task's memory span can be calculated from + // the base address of the task's first memory region, which has a + // chunk_index of 0 (since all chunks within the span are contiguous). + let key = RegionKey::Owned { + task_index, + chunk_index: 0, + memory_name: address.region_name, + }; region_table[&key].base + address.offset }