1 files changed, 200 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..7977d17
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,200 @@
+use std::io::{self, BufRead as _};
+use std::str;
+
+/// A single zsh extended history item.
+#[derive(Debug)]
+pub struct Item<'a> {
+    pub time: u64,
+    pub duration: u64,
+    pub cmd: &'a [u8],
+}
+
+impl Item<'_> {
+    /// Writes one item in the format b": <timestamp>:<duration>;<command>\n".
+    pub fn write<W: io::Write>(&self, out: &mut W) -> Result<(), Error> {
+        write!(out, ": {}:{};", self.time, self.duration)?;
+        out.write_all(self.cmd)?;
+        Ok(())
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error(transparent)]
+    Io(#[from] io::Error),
+    #[error(transparent)]
+    Utf8(#[from] str::Utf8Error),
+    #[error(transparent)]
+    Int(#[from] std::num::ParseIntError),
+    #[error("Format error: {0}")]
+    Format(String),
+}
+
+/// A reader that parses extended zsh history items from a stream.
+/// It reuses an internal buffer so that items are yielded without copying.
+pub struct Reader<R: io::Read> {
+    reader: io::BufReader<R>,
+    buf: Vec<u8>,
+}
+
+impl<R: io::Read> Reader<R> {
+    pub fn new(inner: R) -> Self {
+        Reader {
+            reader: io::BufReader::new(inner),
+            buf: Vec::with_capacity(1024),
+        }
+    }
+
+    /// Parses self.buf into an Item.
+    /// Expected format: b": <timestamp>:<duration>;<command>"
+    /// This method borrows from self.buf.
+    fn parse(&self) -> Result<Item<'_>, Error> {
+        let mut item = &self.buf[..];
+
+        if let Some(stripped) = item.strip_prefix(b":").or_else(|| item.strip_prefix(b"\\:")) {
+            item = stripped;
+        } else {
+            return Err(Error::Format(format!(
+                "Item does not start with ':' or '\\:' : {:?}",
+                str::from_utf8(&item)?
+            )));
+        }
+
+        item = item.strip_prefix(b" ").unwrap_or(item);
+
+        let colon_pos = item.iter().position(|&c| c == b':')
+            .ok_or_else(|| Error::Format("Missing colon between timestamp and duration.".into()))?;
+        let time: u64 = str::from_utf8(&item[..colon_pos])?.parse()?;
+
+        let remainder = &item[colon_pos + 1..];
+        let semi_pos = remainder.iter().position(|&c| c == b';')
+            .ok_or_else(|| Error::Format("Missing semicolon in item.".into()))?;
+        let duration: u64 = str::from_utf8(&remainder[..semi_pos])?.parse()?;
+        let cmd = &remainder[semi_pos + 1..];
+
+        Ok(Item { time, duration, cmd })
+    }
+
+    // Reads and returns the next item, or None on EOF.
+    pub fn read_item(&mut self) -> Result<Option<Item<'_>>, Error> {
+        self.buf.clear();
+
+        loop {
+            if self.reader.read_until(b'\n', &mut self.buf)? == 0 {
+                return if self.buf.is_empty() {
+                    Ok(None)
+                } else {
+                    Ok(Some(self.parse()?))
+                }
+            }
+
+            // If a command ends with a backslash, zsh makes sure to prepend a space, such that
+            // backslash-quoted newlines mean: command continues on next line.
+            if !self.buf.ends_with(b"\\\n") {
+                break;
+            }
+        }
+        Ok(Some(self.parse()?))
+    }
+}
+
+pub fn merge<R1: io::Read, R2: io::Read, R3: io::Read, W: io::Write>(
+    mut left: Reader<R1>,
+    mut right: Reader<R2>,
+    mut ancestor: Reader<R3>,
+    out: &mut W,
+) -> Result<(), Error> {
+    // Get the first item from each stream.
+    let mut l = left.read_item()?;
+    let mut r = right.read_item()?;
+    let mut a = ancestor.read_item()?;
+
+    let out = &mut io::BufWriter::new(out);
+
+    // While any stream still has an item:
+    while l.is_some() || r.is_some() || a.is_some() {
+        // Determine the current earliest time over all non-None items.
+        let current_time = [l.as_ref(), r.as_ref(), a.as_ref()]
+            .iter()
+            .filter_map(|opt| opt.map(|i| i.time))
+            .min()
+            .unwrap();
+
+        // Grab the "chunk": all items whose time equals the current time.
+        // (After this step, each of left, right, and ancestor is either Some(item)
+        // with the current time or None.)
+        let l_chunk = l.as_ref().filter(|i| i.time == current_time);
+        let r_chunk = r.as_ref().filter(|i| i.time == current_time);
+        let a_chunk = a.as_ref().filter(|i| i.time == current_time);
+
+        match (l_chunk, r_chunk, a_chunk) {
+            // Case 1: Both left and right are present.
+            (Some(l), Some(r), a_opt) => {
+                if l.cmd == r.cmd {
+                    // They agree: output it.
+                    l.write(out)?;
+                } else {
+                    // They differ.  Check against ancestor.
+                    if let Some(a) = a_opt {
+                        if l.cmd == a.cmd && r.cmd != a.cmd {
+                            r.write(out)?;
+                        } else if r.cmd == a.cmd && l.cmd != a.cmd {
+                            l.write(out)?;
+                        } else {
+                            // Otherwise both differ, so output both.
+                            l.write(out)?;
+                            r.write(out)?;
+                        }
+                    } else {
+                        // No ancestor: output both.
+                        l.write(out)?;
+                        r.write(out)?;
+                    }
+                }
+            }
+
+            // Case 2a: Left is present; right is missing.
+            (Some(l), None, a_opt) => {
+                if let Some(a) = a_opt {
+                    if l.cmd == a.cmd {
+                        // Left == ancestor: deletion.
+                    } else {
+                        l.write(out)?;
+                    }
+                } else {
+                    l.write(out)?;
+                }
+            }
+
+            // Case 2b: Right is present; left is missing.
+            (None, Some(r), a_opt) => {
+                if let Some(a) = a_opt {
+                    if r.cmd == a.cmd {
+                    } else {
+                        r.write(out)?;
+                    }
+                } else {
+                    r.write(out)?;
+                }
+            }
+
+            // Case 3: Neither left nor right have a item.
+            (None, None, Some(_)) => {
+                // Interpret as a deletion.
+            }
+            (None, None, None) => unreachable!(),
+        }
+
+        // Advance the readers.
+        if l_chunk.is_some() {
+            l = left.read_item()?;
+        }
+        if r_chunk.is_some() {
+            r = right.read_item()?;
+        }
+        if a_chunk.is_some() {
+            a = ancestor.read_item()?;
+        }
+    }
+    Ok(())
+}