From 1c157cdf7da21f02172088cdc9616e709b51b46b Mon Sep 17 00:00:00 2001 From: Christoph Groth Date: Mon, 24 Mar 2025 17:51:01 +0100 Subject: Initial version --- src/lib.rs | 200 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 47 ++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 src/lib.rs create mode 100644 src/main.rs (limited to 'src') diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7977d17 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,200 @@ +use std::io::{self, BufRead as _}; +use std::str; + +/// A single zsh extended history item. +#[derive(Debug)] +pub struct Item<'a> { + pub time: u64, + pub duration: u64, + pub cmd: &'a [u8], +} + +impl Item<'_> { + /// Writes one item in the format b": :;\n". + pub fn write(&self, out: &mut W) -> Result<(), Error> { + write!(out, ": {}:{};", self.time, self.duration)?; + out.write_all(self.cmd)?; + Ok(()) + } +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + Io(#[from] io::Error), + #[error(transparent)] + Utf8(#[from] str::Utf8Error), + #[error(transparent)] + Int(#[from] std::num::ParseIntError), + #[error("Format error: {0}")] + Format(String), +} + +/// A reader that parses extended zsh history items from a stream. +/// It reuses an internal buffer so that items are yielded without copying. +pub struct Reader { + reader: io::BufReader, + buf: Vec, +} + +impl Reader { + pub fn new(inner: R) -> Self { + Reader { + reader: io::BufReader::new(inner), + buf: Vec::with_capacity(1024), + } + } + + /// Parses self.buf into an Item. + /// Expected format: b": :;" + /// This method borrows from self.buf. + fn parse(&self) -> Result, Error> { + let mut item = &self.buf[..]; + + if let Some(stripped) = item.strip_prefix(b":").or_else(|| item.strip_prefix(b"\\:")) { + item = stripped; + } else { + return Err(Error::Format(format!( + "Item does not start with ':' or '\\:' : {:?}", + str::from_utf8(&item)? + ))); + } + + item = item.strip_prefix(b" ").unwrap_or(item); + + let colon_pos = item.iter().position(|&c| c == b':') + .ok_or_else(|| Error::Format("Missing colon between timestamp and duration.".into()))?; + let time: u64 = str::from_utf8(&item[..colon_pos])?.parse()?; + + let remainder = &item[colon_pos + 1..]; + let semi_pos = remainder.iter().position(|&c| c == b';') + .ok_or_else(|| Error::Format("Missing semicolon in item.".into()))?; + let duration: u64 = str::from_utf8(&remainder[..semi_pos])?.parse()?; + let cmd = &remainder[semi_pos + 1..]; + + Ok(Item { time, duration, cmd }) + } + + // Reads and returns the next item, or None on EOF. + pub fn read_item(&mut self) -> Result>, Error> { + self.buf.clear(); + + loop { + if self.reader.read_until(b'\n', &mut self.buf)? == 0 { + return if self.buf.is_empty() { + Ok(None) + } else { + Ok(Some(self.parse()?)) + } + } + + // If a command ends with a backslash, zsh makes sure to prepend a space, such that + // backslash-quoted newlines mean: command continues on next line. + if !self.buf.ends_with(b"\\\n") { + break; + } + } + Ok(Some(self.parse()?)) + } +} + +pub fn merge( + mut left: Reader, + mut right: Reader, + mut ancestor: Reader, + out: &mut W, +) -> Result<(), Error> { + // Get the first item from each stream. + let mut l = left.read_item()?; + let mut r = right.read_item()?; + let mut a = ancestor.read_item()?; + + let out = &mut io::BufWriter::new(out); + + // While any stream still has an item: + while l.is_some() || r.is_some() || a.is_some() { + // Determine the current earliest time over all non-None items. + let current_time = [l.as_ref(), r.as_ref(), a.as_ref()] + .iter() + .filter_map(|opt| opt.map(|i| i.time)) + .min() + .unwrap(); + + // Grab the "chunk": all items whose time equals the current time. + // (After this step, each of left, right, and ancestor is either Some(item) + // with the current time or None.) + let l_chunk = l.as_ref().filter(|i| i.time == current_time); + let r_chunk = r.as_ref().filter(|i| i.time == current_time); + let a_chunk = a.as_ref().filter(|i| i.time == current_time); + + match (l_chunk, r_chunk, a_chunk) { + // Case 1: Both left and right are present. + (Some(l), Some(r), a_opt) => { + if l.cmd == r.cmd { + // They agree: output it. + l.write(out)?; + } else { + // They differ. Check against ancestor. + if let Some(a) = a_opt { + if l.cmd == a.cmd && r.cmd != a.cmd { + r.write(out)?; + } else if r.cmd == a.cmd && l.cmd != a.cmd { + l.write(out)?; + } else { + // Otherwise both differ, so output both. + l.write(out)?; + r.write(out)?; + } + } else { + // No ancestor: output both. + l.write(out)?; + r.write(out)?; + } + } + } + + // Case 2a: Left is present; right is missing. + (Some(l), None, a_opt) => { + if let Some(a) = a_opt { + if l.cmd == a.cmd { + // Left == ancestor: deletion. + } else { + l.write(out)?; + } + } else { + l.write(out)?; + } + } + + // Case 2b: Right is present; left is missing. + (None, Some(r), a_opt) => { + if let Some(a) = a_opt { + if r.cmd == a.cmd { + } else { + r.write(out)?; + } + } else { + r.write(out)?; + } + } + + // Case 3: Neither left nor right have a item. + (None, None, Some(_)) => { + // Interpret as a deletion. + } + (None, None, None) => unreachable!(), + } + + // Advance the readers. + if l_chunk.is_some() { + l = left.read_item()?; + } + if r_chunk.is_some() { + r = right.read_item()?; + } + if a_chunk.is_some() { + a = ancestor.read_item()?; + } + } + Ok(()) +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..044852b --- /dev/null +++ b/src/main.rs @@ -0,0 +1,47 @@ +use std::io; +use zsh_history as zh; + +fn print_usage(prog: &str) { + eprintln!("Usage:"); + eprintln!(" {} merge # two–way merge", prog); + eprintln!(" {} merge # three–way merge", prog); +} + +fn main() -> Result<(), Box> { + let args: Vec = std::env::args().collect(); + let prog = &args[0]; + + if args.len() < 4 || args.len() > 5 || args[1] != "merge" { + print_usage(prog); + return Ok(()); + } + + // Open files and create readers in one go. + let mut readers: Vec<_> = args[2..] + .iter() + .map(|name| { + std::fs::File::open(name) + .map_err(zh::Error::Io) + .map(zh::Reader::new) + }) + .collect::>()?; + + let mut out = io::stdout().lock(); + + let result = match readers.len() { + 2 => { + let ancestor = zh::Reader::new(io::empty()); + zh::merge(readers.remove(0), readers.remove(0), ancestor, &mut out) + } + 3 => zh::merge( + readers.remove(0), + readers.remove(0), + readers.remove(0), + &mut out, + ), + _ => unreachable!(), + }; + + result?; + Ok(()) +} -- cgit v1.2.3-74-g4815