From 81516ffa14bc06f91b7e51cafe175cb620dc1be5 Mon Sep 17 00:00:00 2001 From: Toby Vincent Date: Fri, 2 Dec 2022 19:17:45 -0600 Subject: feat: impl nonprintable parsing --- Cargo.lock | 2 +- Cargo.toml | 3 +- src/cli.rs | 105 +++++++++++------------------------------------------------- src/lib.rs | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 24 ++++++++++---- 5 files changed, 138 insertions(+), 96 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5855f2b..3f56043 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,5 +3,5 @@ version = 3 [[package]] -name = "cat" +name = "catr" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 8356e5d..6d2cea2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,8 @@ [package] -name = "cat" +name = "catr" version = "0.1.0" edition = "2021" +description = "Rust port of GNU CoreUtils/Unix command `cat`" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/cli.rs b/src/cli.rs index bab0b84..12f1cfc 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,36 +1,14 @@ -use std::{ - env, - io::{BufRead, BufReader, Write}, -}; +use std::env; use crate::{Error, Input, Result}; -const HELP: &str = r#"Usage: cat [OPTION]... [FILE]... -Concatenate FILE(s) to standard output. - -With no FILE, or when FILE is -, read standard input. - - -A, --show-all equivalent to -vET - -b, --number-nonblank number nonempty output lines, overrides -n - -e equivalent to -vE - -E, --show-ends display $ at end of each line - -n, --number number all output lines - -s, --squeeze-blank suppress repeated empty output lines - -t equivalent to -vT - -T, --show-tabs display TAB characters as ^I - -u (ignored) - -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB - --help display this help and exit - --version output version information and exit - -Examples: - cat f - g Output f's contents, then standard input, then g's contents. - cat Copy standard input to standard output."#; +// NOTE: I assumed I should keep this "DIY". I would most likely use some "de facto" external +// libraries like `clap`, `serde`, ect. #[derive(Debug, Default)] pub struct Cli { - files: Vec, - opts: Opts, + pub files: Vec, + pub opts: Opts, } impl Cli { @@ -46,87 +24,40 @@ impl Cli { }; Ok(cli) }) - } - - pub fn run(mut self) -> Result<()> { - if self.opts.help { - println!("{}", HELP); - std::process::exit(0); - } - if self.opts.version { - println!("cat (ported to Rust) {}", env!("CARGO_PKG_VERSION")); - std::process::exit(0); - } - - if self.files.is_empty() { - self.files.push(Input::Stdin); - } - - let stdout = std::io::stdout(); - let mut writer = stdout.lock(); - - for file in self.files.into_iter() { - let reader = BufReader::new(file.reader()?); - - let mut nonblank_line_nr = 0; - - for (index, res) in reader.lines().enumerate() { - let mut line = res?; - - if line.is_empty() && self.opts.squeeze_blank { - continue; - } - - if self.opts.show_ends { - line.push('$'); - } - - if self.opts.show_tabs { - line = line.replace('\t', "^I"); - } - - if self.opts.number_nonblank { - if !line.is_empty() { - nonblank_line_nr += 1; - write!(writer, "{:>6} ", nonblank_line_nr)?; - } - } else if self.opts.number { - write!(writer, "{:>6} ", index + 1)?; + .map(|mut cli| { + if cli.files.is_empty() { + cli.files.push(Input::Stdin); } - - writeln!(writer, "{}", line)?; - } - } - - Ok(()) + cli + }) } } #[derive(Debug, Default)] pub struct Opts { /// number nonempty output lines, overrides -n - number_nonblank: bool, + pub number_nonblank: bool, /// display $ at end of each line - show_ends: bool, + pub show_ends: bool, /// number all output lines - number: bool, + pub number: bool, /// suppress repeated empty output lines - squeeze_blank: bool, + pub squeeze_blank: bool, /// display TAB characters as ^I - show_tabs: bool, + pub show_tabs: bool, /// use ^ and M- notation, except for LFD and TAB - show_nonprinting: bool, + pub show_nonprinting: bool, /// display help and exit - help: bool, + pub help: bool, /// output version information and exit - version: bool, + pub version: bool, } impl Opts { diff --git a/src/lib.rs b/src/lib.rs index ab2ff99..0d45606 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,3 +5,103 @@ pub use crate::input::Input; mod cli; mod error; mod input; + +pub const HELP: &str = r#"Usage: cat [OPTION]... [FILE]... +Concatenate FILE(s) to standard output. + +With no FILE, or when FILE is -, read standard input. + + -A, --show-all equivalent to -vET + -b, --number-nonblank number nonempty output lines, overrides -n + -e equivalent to -vE + -E, --show-ends display $ at end of each line + -n, --number number all output lines + -s, --squeeze-blank suppress repeated empty output lines + -t equivalent to -vT + -T, --show-tabs display TAB characters as ^I + -u (ignored) + -v, --show-nonprinting use ^ and M- notation, except for LFD and TAB + --help display this help and exit + --version output version information and exit + +Examples: + cat f - g Output f's contents, then standard input, then g's contents. + cat Copy standard input to standard output."#; + +pub fn run(cli: Cli) -> Result<()> { + use std::io::{BufReader, Read, Write}; + + let stdout = std::io::stdout(); + let mut writer = stdout.lock(); + + for file in cli.files.into_iter() { + let mut reader = BufReader::new(file.reader()?); + + let mut nonblank_line_nr = 0; + let mut buf = Vec::new(); + reader.read_to_end(&mut buf)?; + + for (index, arr) in buf.split(|c| *c == b'\n').enumerate() { + if arr.is_empty() && cli.opts.squeeze_blank { + continue; + } + + let mut line = Vec::new(); + for c in arr { + let parsed = match *c { + b'\t' if cli.opts.show_tabs => vec![b'^', b'I'], + b'\t' => vec![b'\t'], + c if cli.opts.show_nonprinting => parse_nonprinting_char(c), + c => vec![c], + }; + line.extend(parsed) + } + + if cli.opts.show_ends { + line.push(b'$'); + } + + line.push(b'\n'); + + if cli.opts.number_nonblank { + if !line.is_empty() { + nonblank_line_nr += 1; + write!(writer, "{:>6} ", nonblank_line_nr)?; + } + } else if cli.opts.number { + write!(writer, "{:>6} ", index + 1)?; + } + + writer.write_all(&line)? + } + } + + Ok(()) +} + +fn parse_nonprinting_char(c: u8) -> Vec { + match c { + c @ 0..=31 => into_ctrl_char(c), + c @ 32..=126 => vec![c], + 127 => into_unknown_char(), + c @ 128.. => into_meta_char(c), + } +} + +fn into_ctrl_char(c: u8) -> Vec { + vec![b'^', c + 64] +} + +fn into_meta_char(c: u8) -> Vec { + let mut buf = "M-^".as_bytes().to_vec(); + match c - 128 { + c @ 0..=31 => buf.extend(vec![b'^', c + 64]), + c @ 32..=127 => buf.push(c), + 128.. => buf.extend(into_unknown_char()), + }; + buf +} + +fn into_unknown_char() -> Vec { + vec![b'^', b'?'] +} diff --git a/src/main.rs b/src/main.rs index 290ddda..8966028 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,20 @@ -use cat::{Cli, Result}; +use catr::{run, Cli, HELP}; -/// I assumed I should keep this "DIY". I would most likely use some "de facto" external -/// libraries like `clap`, `serde`, ect. +fn main() { + let cli = match Cli::parse() { + Ok(cli) => cli, + Err(err) => { + eprintln!("{}\n{}", err, HELP); + std::process::exit(1); + } + }; -fn main() -> Result<()> { - let cli = Cli::parse()?; - - cli.run() + if cli.opts.help { + println!("{}", HELP) + } else if cli.opts.version { + println!("catr (rust port of cat) v{}", env!("CARGO_PKG_VERSION")) + } else if let Err(err) = run(cli) { + eprintln!("{}", err); + std::process::exit(1); + } } -- cgit v1.2.3-70-g09d2