diff options
Diffstat (limited to 'src/lib.rs')
| -rw-r--r-- | src/lib.rs | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7977d17 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,200 @@ +use std::io::{self, BufRead as _}; +use std::str; + +/// A single zsh extended history item. +#[derive(Debug)] +pub struct Item<'a> { + pub time: u64, + pub duration: u64, + pub cmd: &'a [u8], +} + +impl Item<'_> { + /// Writes one item in the format b": <timestamp>:<duration>;<command>\n". + pub fn write<W: io::Write>(&self, out: &mut W) -> Result<(), Error> { + write!(out, ": {}:{};", self.time, self.duration)?; + out.write_all(self.cmd)?; + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] + Utf8(#[from] str::Utf8Error), + #[error(transparent)] + Int(#[from] std::num::ParseIntError), + #[error("Format error: {0}")] + Format(String), +} + +/// A reader that parses extended zsh history items from a stream. +/// It reuses an internal buffer so that items are yielded without copying. +pub struct Reader<R: io::Read> { + reader: io::BufReader<R>, + buf: Vec<u8>, +} + +impl<R: io::Read> Reader<R> { + pub fn new(inner: R) -> Self { + Reader { + reader: io::BufReader::new(inner), + buf: Vec::with_capacity(1024), + } + } + + /// Parses self.buf into an Item. + /// Expected format: b": <timestamp>:<duration>;<command>" + /// This method borrows from self.buf. + fn parse(&self) -> Result<Item<'_>, Error> { + let mut item = &self.buf[..]; + + if let Some(stripped) = item.strip_prefix(b":").or_else(|| item.strip_prefix(b"\\:")) { + item = stripped; + } else { + return Err(Error::Format(format!( + "Item does not start with ':' or '\\:' : {:?}", + str::from_utf8(&item)? + ))); + } + + item = item.strip_prefix(b" ").unwrap_or(item); + + let colon_pos = item.iter().position(|&c| c == b':') + .ok_or_else(|| Error::Format("Missing colon between timestamp and duration.".into()))?; + let time: u64 = str::from_utf8(&item[..colon_pos])?.parse()?; + + let remainder = &item[colon_pos + 1..]; + let semi_pos = remainder.iter().position(|&c| c == b';') + .ok_or_else(|| Error::Format("Missing semicolon in item.".into()))?; + let duration: u64 = str::from_utf8(&remainder[..semi_pos])?.parse()?; + let cmd = &remainder[semi_pos + 1..]; + + Ok(Item { time, duration, cmd }) + } + + // Reads and returns the next item, or None on EOF. + pub fn read_item(&mut self) -> Result<Option<Item<'_>>, Error> { + self.buf.clear(); + + loop { + if self.reader.read_until(b'\n', &mut self.buf)? == 0 { + return if self.buf.is_empty() { + Ok(None) + } else { + Ok(Some(self.parse()?)) + } + } + + // If a command ends with a backslash, zsh makes sure to prepend a space, such that + // backslash-quoted newlines mean: command continues on next line. + if !self.buf.ends_with(b"\\\n") { + break; + } + } + Ok(Some(self.parse()?)) + } +} + +pub fn merge<R1: io::Read, R2: io::Read, R3: io::Read, W: io::Write>( + mut left: Reader<R1>, + mut right: Reader<R2>, + mut ancestor: Reader<R3>, + out: &mut W, +) -> Result<(), Error> { + // Get the first item from each stream. + let mut l = left.read_item()?; + let mut r = right.read_item()?; + let mut a = ancestor.read_item()?; + + let out = &mut io::BufWriter::new(out); + + // While any stream still has an item: + while l.is_some() || r.is_some() || a.is_some() { + // Determine the current earliest time over all non-None items. + let current_time = [l.as_ref(), r.as_ref(), a.as_ref()] + .iter() + .filter_map(|opt| opt.map(|i| i.time)) + .min() + .unwrap(); + + // Grab the "chunk": all items whose time equals the current time. + // (After this step, each of left, right, and ancestor is either Some(item) + // with the current time or None.) + let l_chunk = l.as_ref().filter(|i| i.time == current_time); + let r_chunk = r.as_ref().filter(|i| i.time == current_time); + let a_chunk = a.as_ref().filter(|i| i.time == current_time); + + match (l_chunk, r_chunk, a_chunk) { + // Case 1: Both left and right are present. + (Some(l), Some(r), a_opt) => { + if l.cmd == r.cmd { + // They agree: output it. + l.write(out)?; + } else { + // They differ. Check against ancestor. + if let Some(a) = a_opt { + if l.cmd == a.cmd && r.cmd != a.cmd { + r.write(out)?; + } else if r.cmd == a.cmd && l.cmd != a.cmd { + l.write(out)?; + } else { + // Otherwise both differ, so output both. + l.write(out)?; + r.write(out)?; + } + } else { + // No ancestor: output both. + l.write(out)?; + r.write(out)?; + } + } + } + + // Case 2a: Left is present; right is missing. + (Some(l), None, a_opt) => { + if let Some(a) = a_opt { + if l.cmd == a.cmd { + // Left == ancestor: deletion. + } else { + l.write(out)?; + } + } else { + l.write(out)?; + } + } + + // Case 2b: Right is present; left is missing. + (None, Some(r), a_opt) => { + if let Some(a) = a_opt { + if r.cmd == a.cmd { + } else { + r.write(out)?; + } + } else { + r.write(out)?; + } + } + + // Case 3: Neither left nor right have a item. + (None, None, Some(_)) => { + // Interpret as a deletion. + } + (None, None, None) => unreachable!(), + } + + // Advance the readers. + if l_chunk.is_some() { + l = left.read_item()?; + } + if r_chunk.is_some() { + r = right.read_item()?; + } + if a_chunk.is_some() { + a = ancestor.read_item()?; + } + } + Ok(()) +} |
