From bdc0d972561b0ff762a39ebb4c37545af93c48db Mon Sep 17 00:00:00 2001
From: lif <>
Date: Wed, 11 Jan 2023 08:37:52 +0000
Subject: [PATCH] Make the mechanism for escaping discoverable and customizable

This changes the method of entering an escape sequence:
- raw Ctrl+C gets sent to the VM unimpeded.
- by default, the sequence Ctrl+], Ctrl+C is used to quit the program
  (`^]^C`)
- this can be customized or removed via CLI flags, allowing the string
  be of arbitrary length.
  - i.e. if you `propolis-cli serial -e "beans"` and then type "bea",
    nothing gets sent to the VM after the "b" yet. and then if you type:
    1. "k", the VM gets sent "beak"
    2. '"ns", the VM doesn't get sent anything else, and the client
       exits.
- the client can be configured to pass through an arbitrary prefix
  length of the escape string before it starts suppressing inputs, such
  that you can, for example, mimic ssh's Enter-tilde-dot sequence
  without temporarily suppressing Enter presses not intended to
  start an escape sequence, which would interfere with function:
  `-e '^M~.' --escape-prefix-length=1` (this also works around ANSI
  escape sequences being sent by xterm-like emulators when Enter is
  pressed in a shell that sends a request for such)
---
 Cargo.lock                   |   1 +
 Cargo.toml                   |   1 +
 bin/propolis-cli/Cargo.toml  |   1 +
 bin/propolis-cli/src/main.rs | 246 +++++++++++++++++++++++++----------
 4 files changed, 179 insertions(+), 70 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 3fc4953b2..471297877 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2813,6 +2813,7 @@ dependencies = [
  "futures",
  "libc",
  "propolis-client",
+ "regex",
  "reqwest",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index fe9f4b769..5d4e02eaa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -96,6 +96,7 @@ propolis-server-config = { path = "crates/propolis-server-config" }
 propolis_types = { path = "crates/propolis-types" }
 quote = "1.0"
 rand = "0.8"
+regex = "1.7.1"
 reqwest = "0.11.12"
 rfb = { git = "https://github.com/oxidecomputer/rfb", rev = "0cac8d9c25eb27acfa35df80f3b9d371de98ab3b" }
 ring = "0.16"
diff --git a/bin/propolis-cli/Cargo.toml b/bin/propolis-cli/Cargo.toml
index f9e17b258..7463dfc8c 100644
--- a/bin/propolis-cli/Cargo.toml
+++ b/bin/propolis-cli/Cargo.toml
@@ -11,6 +11,7 @@ anyhow.workspace = true
 clap = { workspace = true, features = ["derive"] }
 futures.workspace = true
 libc.workspace = true
+regex.workspace = true
 propolis-client = { workspace = true, features = ["generated"] }
 slog.workspace = true
 slog-async.workspace = true
diff --git a/bin/propolis-cli/src/main.rs b/bin/propolis-cli/src/main.rs
index 80102aaac..cf86e79a7 100644
--- a/bin/propolis-cli/src/main.rs
+++ b/bin/propolis-cli/src/main.rs
@@ -17,6 +17,7 @@ use propolis_client::handmade::{
     },
     Client,
 };
+use regex::bytes::Regex;
 use slog::{o, Drain, Level, Logger};
 use tokio::io::{AsyncReadExt, AsyncWriteExt};
 use tokio_tungstenite::tungstenite::protocol::Role;
@@ -90,6 +91,29 @@ enum Command {
         /// Defaults to the most recent 16 KiB of console output (-16384).
         #[clap(long, short)]
         byte_offset: Option<i64>,
+
+        /// If this sequence of bytes is typed, the client will exit.
+        /// Defaults to "^]^C" (Ctrl+], Ctrl+C). Note that the string passed
+        /// for this argument must be valid UTF-8, and is used verbatim without
+        /// any parsing; in most shells, if you wish to include a special
+        /// character (such as Enter or a Ctrl+letter combo), you can insert
+        /// the character by preceding it with Ctrl+V at the command line.
+        #[clap(long, short, default_value = "\x1d\x03")]
+        escape_string: String,
+
+        /// The number of bytes from the beginning of the escape string to pass
+        /// to the VM before beginning to buffer inputs until a mismatch.
+        /// Defaults to 0, such that input matching the escape string does not
+        /// get sent to the VM at all until a non-matching character is typed.
+        /// To mimic the escape sequence for exiting SSH ("\n~."),you may pass
+        /// `-e '^M~.' --escape-prefix-length=1` such that newlines are sent to
+        /// the VM immediately.
+        #[clap(long, default_value = "0")]
+        escape_prefix_length: usize,
+
+        /// Disable escape string altogether (to exit, use pkill or similar).
+        #[clap(long, short = 'E')]
+        no_escape: bool,
     },
 
     /// Migrate instance to new propolis-server
@@ -225,60 +249,28 @@ async fn put_instance(
 async fn stdin_to_websockets_task(
     mut stdinrx: tokio::sync::mpsc::Receiver<Vec<u8>>,
     wstx: tokio::sync::mpsc::Sender<Vec<u8>>,
+    mut escape: Option<EscapeSequence>,
 ) {
-    // next_raw must live outside loop, because Ctrl-A should work across
-    // multiple inbuf reads.
-    let mut next_raw = false;
-
-    loop {
-        let inbuf = if let Some(inbuf) = stdinrx.recv().await {
-            inbuf
-        } else {
-            continue;
-        };
-
-        // Put bytes from inbuf to outbuf, but don't send Ctrl-A unless
-        // next_raw is true.
-        let mut outbuf = Vec::with_capacity(inbuf.len());
-
-        let mut exit = false;
-        for c in inbuf {
-            match c {
-                // Ctrl-A means send next one raw
-                b'\x01' => {
-                    if next_raw {
-                        // Ctrl-A Ctrl-A should be sent as Ctrl-A
-                        outbuf.push(c);
-                        next_raw = false;
-                    } else {
-                        next_raw = true;
-                    }
-                }
-                b'\x03' => {
-                    if !next_raw {
-                        // Exit on non-raw Ctrl-C
-                        exit = true;
-                        break;
-                    } else {
-                        // Otherwise send Ctrl-C
-                        outbuf.push(c);
-                        next_raw = false;
-                    }
+    if let Some(esc_sequence) = &mut escape {
+        loop {
+            if let Some(inbuf) = stdinrx.recv().await {
+                // process potential matches of our escape sequence to determine
+                // whether we should exit the loop
+                let (outbuf, exit) = esc_sequence.process(inbuf);
+
+                // Send what we have, even if we're about to exit.
+                if !outbuf.is_empty() {
+                    wstx.send(outbuf).await.unwrap();
                 }
-                _ => {
-                    outbuf.push(c);
-                    next_raw = false;
+
+                if exit {
+                    break;
                 }
             }
         }
-
-        // Send what we have, even if there's a Ctrl-C at the end.
-        if !outbuf.is_empty() {
-            wstx.send(outbuf).await.unwrap();
-        }
-
-        if exit {
-            break;
+    } else {
+        while let Some(buf) = stdinrx.recv().await {
+            wstx.send(buf).await.unwrap();
         }
     }
 }
@@ -290,7 +282,10 @@ async fn test_stdin_to_websockets_task() {
     let (stdintx, stdinrx) = tokio::sync::mpsc::channel(16);
     let (wstx, mut wsrx) = tokio::sync::mpsc::channel(16);
 
-    tokio::spawn(async move { stdin_to_websockets_task(stdinrx, wstx).await });
+    let escape = Some(EscapeSequence::new(vec![0x1d, 0x03], 0).unwrap());
+    tokio::spawn(async move {
+        stdin_to_websockets_task(stdinrx, wstx, escape).await
+    });
 
     // send characters, receive characters
     stdintx
@@ -300,33 +295,22 @@ async fn test_stdin_to_websockets_task() {
     let actual = wsrx.recv().await.unwrap();
     assert_eq!(String::from_utf8(actual).unwrap(), "test post please ignore");
 
-    // don't send ctrl-a
-    stdintx.send("\x01".chars().map(|c| c as u8).collect()).await.unwrap();
+    // don't send a started escape sequence
+    stdintx.send("\x1d".chars().map(|c| c as u8).collect()).await.unwrap();
     assert_eq!(wsrx.try_recv(), Err(TryRecvError::Empty));
 
-    // the "t" here is sent "raw" because of last ctrl-a but that doesn't change anything
+    // since we didn't enter the \x03, the previous \x1d shows up here
     stdintx.send("test".chars().map(|c| c as u8).collect()).await.unwrap();
     let actual = wsrx.recv().await.unwrap();
-    assert_eq!(String::from_utf8(actual).unwrap(), "test");
+    assert_eq!(String::from_utf8(actual).unwrap(), "\x1dtest");
 
-    // ctrl-a ctrl-c = only ctrl-c sent
-    stdintx.send("\x01\x03".chars().map(|c| c as u8).collect()).await.unwrap();
-    let actual = wsrx.recv().await.unwrap();
-    assert_eq!(String::from_utf8(actual).unwrap(), "\x03");
-
-    // same as above, across two messages
-    stdintx.send("\x01".chars().map(|c| c as u8).collect()).await.unwrap();
+    // \x03 gets sent if not preceded by \x1d
     stdintx.send("\x03".chars().map(|c| c as u8).collect()).await.unwrap();
-    assert_eq!(wsrx.try_recv(), Err(TryRecvError::Empty));
     let actual = wsrx.recv().await.unwrap();
     assert_eq!(String::from_utf8(actual).unwrap(), "\x03");
 
-    // ctrl-a ctrl-a = only ctrl-a sent
-    stdintx.send("\x01\x01".chars().map(|c| c as u8).collect()).await.unwrap();
-    let actual = wsrx.recv().await.unwrap();
-    assert_eq!(String::from_utf8(actual).unwrap(), "\x01");
-
-    // ctrl-c on its own means exit
+    // \x1d followed by \x03 means exit, even if they're separate messages
+    stdintx.send("\x1d".chars().map(|c| c as u8).collect()).await.unwrap();
     stdintx.send("\x03".chars().map(|c| c as u8).collect()).await.unwrap();
     assert_eq!(wsrx.try_recv(), Err(TryRecvError::Empty));
 
@@ -337,6 +321,7 @@ async fn test_stdin_to_websockets_task() {
 async fn serial(
     addr: SocketAddr,
     byte_offset: Option<i64>,
+    escape: Option<EscapeSequence>,
 ) -> anyhow::Result<()> {
     let client = propolis_client::Client::new(&format!("http://{}", addr));
     let mut req = client.instance_serial();
@@ -379,7 +364,9 @@ async fn serial(
         }
     });
 
-    tokio::spawn(async move { stdin_to_websockets_task(stdinrx, wstx).await });
+    tokio::spawn(async move {
+        stdin_to_websockets_task(stdinrx, wstx, escape).await
+    });
 
     loop {
         tokio::select! {
@@ -574,7 +561,20 @@ async fn main() -> anyhow::Result<()> {
         }
         Command::Get => get_instance(&client).await?,
         Command::State { state } => put_instance(&client, state).await?,
-        Command::Serial { byte_offset } => serial(addr, byte_offset).await?,
+        Command::Serial {
+            byte_offset,
+            escape_string,
+            escape_prefix_length,
+            no_escape,
+        } => {
+            let escape = if no_escape || escape_string.is_empty() {
+                None
+            } else {
+                let escape_vector = escape_string.into_bytes();
+                Some(EscapeSequence::new(escape_vector, escape_prefix_length)?)
+            };
+            serial(addr, byte_offset, escape).await?
+        }
         Command::Migrate { dst_server, dst_port, dst_uuid, crucible_disks } => {
             let dst_addr = SocketAddr::new(dst_server, dst_port);
             let dst_client = Client::new(dst_addr, log.clone());
@@ -628,3 +628,109 @@ impl Drop for RawTermiosGuard {
         }
     }
 }
+
+struct EscapeSequence {
+    bytes: Vec<u8>,
+    prefix_length: usize,
+
+    // the following are member variables because their values persist between
+    // invocations of EscapeSequence::process, because the relevant bytes of
+    // the things for which we're checking likely won't all arrive at once.
+    // ---
+    // position of next potential match in the escape sequence
+    esc_pos: usize,
+    // buffer for accumulating characters that may be part of an ANSI Cursor
+    // Position Report sent from xterm-likes that we should ignore (this will
+    // otherwise render any escape sequence containing newlines before its
+    // `prefix_length` unusable, if they're received by a shell that sends
+    // requests for these reports for each newline received)
+    ansi_curs_check: Vec<u8>,
+    // pattern used for matching partial-to-complete versions of the above.
+    // stored here such that it's only instantiated once at construction time.
+    ansi_curs_pat: Regex,
+}
+
+impl EscapeSequence {
+    fn new(bytes: Vec<u8>, prefix_length: usize) -> anyhow::Result<Self> {
+        let escape_len = bytes.len();
+        if prefix_length > escape_len {
+            anyhow::bail!(
+                "prefix length {} is greater than length of escape string ({})",
+                prefix_length,
+                escape_len
+            );
+        }
+        // matches partial prefixes of 'CSI row ; column R' (e.g. "\x1b[14;30R")
+        let ansi_curs_pat = Regex::new("^\x1b(\\[([0-9]+(;([0-9]+R?)?)?)?)?$")?;
+
+        Ok(EscapeSequence {
+            bytes,
+            prefix_length,
+            esc_pos: 0,
+            ansi_curs_check: Vec::new(),
+            ansi_curs_pat,
+        })
+    }
+
+    // return the bytes we can safely commit to sending to the serial port, and
+    // determine if the user has entered the escape sequence completely.
+    // returns true iff the program should exit.
+    fn process(&mut self, inbuf: Vec<u8>) -> (Vec<u8>, bool) {
+        // Put bytes from inbuf to outbuf, but don't send characters in the
+        // escape string sequence unless we bail.
+        let mut outbuf = Vec::with_capacity(inbuf.len());
+
+        for c in inbuf {
+            if !self.ignore_ansi_cpr_seq(&mut outbuf, c) {
+                // is this char a match for the next byte of the sequence?
+                if c == self.bytes[self.esc_pos] {
+                    self.esc_pos += 1;
+                    if self.esc_pos == self.bytes.len() {
+                        // Exit on completed escape string
+                        return (outbuf, true);
+                    } else if self.esc_pos <= self.prefix_length {
+                        // let through incomplete prefix up to the given limit
+                        outbuf.push(c);
+                    }
+                } else {
+                    // they bailed from the sequence,
+                    // feed everything that matched so far through
+                    if self.esc_pos != 0 {
+                        outbuf.extend(
+                            &self.bytes[self.prefix_length..self.esc_pos],
+                        )
+                    }
+                    self.esc_pos = 0;
+                    outbuf.push(c);
+                }
+            }
+        }
+        (outbuf, false)
+    }
+
+    // ignore ANSI escape sequence for the Cursor Position Report sent by
+    // xterm-likes in response to shells requesting one after each newline.
+    // returns true if further processing of character `c` shouldn't apply
+    // (i.e. we find a partial or complete match of the ANSI CSR pattern)
+    fn ignore_ansi_cpr_seq(&mut self, outbuf: &mut Vec<u8>, c: u8) -> bool {
+        if self.esc_pos > 0
+            && self.esc_pos <= self.prefix_length
+            && b"\r\n".contains(&self.bytes[self.esc_pos - 1])
+        {
+            self.ansi_curs_check.push(c);
+            if self.ansi_curs_pat.is_match(&self.ansi_curs_check) {
+                // end of the sequence?
+                if c == b'R' {
+                    outbuf.extend(&self.ansi_curs_check);
+                    self.ansi_curs_check.clear();
+                }
+                return true;
+            } else {
+                self.ansi_curs_check.pop(); // we're not `continue`ing
+                outbuf.extend(&self.ansi_curs_check);
+                self.ansi_curs_check.clear();
+            }
+        }
+        false
+    }
+}