From 53af93aaab74caf1a3c4c7b960d01f16c300491c Mon Sep 17 00:00:00 2001 From: Nick Krichevsky Date: Sat, 22 Jan 2022 16:54:51 -0500 Subject: [PATCH] Add parsing of the most basic crontab entries --- Cargo.toml | 14 ++++ src/lib.rs | 70 +++++++++++++++++++ src/parse.rs | 194 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 278 insertions(+) create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 src/parse.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8c30f17 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "ehron" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +nom = "7.1" +thiserror = "1.0" +itertools = "0.10" + +[dev-dependencies] +test-case = "1.2" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..afb18d8 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,70 @@ +#![warn(clippy::all, clippy::pedantic)] + +use std::str::FromStr; + +mod parse; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CronSpecifier { + Any, + Specifically(u8), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct CronEntry { + minute: CronSpecifier, + hour: CronSpecifier, + day_of_month: CronSpecifier, + month: CronSpecifier, + day_of_week: CronSpecifier, +} + +impl FromStr for CronEntry { + type Err = parse::Error; + + fn from_str(entry: &str) -> Result { + parse::parse_entry(entry) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem; + use test_case::test_case; + use CronSpecifier::{Any, Specifically}; + + #[test_case("* * * * *", &CronEntry{minute: Any, hour: Any, day_of_month: Any, month: Any, day_of_week: Any})] + #[test_case("0 4 * * *", &CronEntry{minute: Specifically(0), hour: Specifically(4), day_of_month: Any, month: Any, day_of_week: Any})] + #[test_case("0 4 10 5 4", &CronEntry{minute: Specifically(0), hour: Specifically(4), day_of_month: Specifically(10), month: Specifically(5), day_of_week: Specifically(4)})] + #[test_case("* * 10 5 *", &CronEntry{minute: Any, hour: Any, day_of_month: Specifically(10), month: Specifically(5), day_of_week: Any})] + fn test_successful_parse(to_parse: &str, expected: &CronEntry) { + let parse_res = CronEntry::from_str(to_parse); + + match parse_res { + Ok(parsed_entry) => assert_eq!(expected, &parsed_entry), + Err(err) => panic!("Got error: '{}'", err), + } + } + + #[test_case("1 2 3 4 5 6 7", &parse::Error::ExtraData("Extra data found".to_string()))] + #[test_case("* * * *", &parse::Error::NotEnoughData)] + #[test_case("* * * * a", &parse::Error::InvalidSpecifier("Parsing a letter isn't gonna work".to_string()))] + #[test_case("100 * * * *", &parse::Error::InvalidMinute(100))] + #[test_case("* * 35 * *", &parse::Error::InvalidDayOfMonth(35))] + #[test_case("* * * 13 *", &parse::Error::InvalidMonth(13))] + #[test_case("* * * * 7", &parse::Error::InvalidDayOfWeek(7))] + fn test_failed_parse(to_parse: &str, expected_error: &parse::Error) { + let parse_res = CronEntry::from_str(to_parse); + match parse_res { + Ok(_) => panic!("Parsing unexpectedly succeeded"), + Err(err) => assert_eq!( + mem::discriminant(expected_error), + mem::discriminant(&err), + "Expected error '{}', got '{}'", + expected_error, + err + ), + } + } +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..fbf26d3 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,194 @@ +use std::str::FromStr; + +use itertools::Itertools; +use nom::{ + branch::alt, + character::complete::{char, digit1}, + combinator::map_res, + IResult, +}; +use thiserror::Error; + +use crate::{CronEntry, CronSpecifier}; + +#[derive(Debug, Clone, Error, PartialEq, Eq)] +#[non_exhaustive] +pub enum Error { + #[error("An unparsable specifier of '{0}' was given")] + InvalidSpecifier(String), + #[error("Extra data found after entry: '{0}'")] + ExtraData(String), + #[error("Not enough data to parse a full entry")] + NotEnoughData, + #[error("{0} is an out of range minute")] + InvalidMinute(u8), + #[error("{0} is an out of range hour")] + InvalidHour(u8), + #[error("{0} is an out of range day of month")] + InvalidDayOfMonth(u8), + #[error("{0} is an out of range month")] + InvalidMonth(u8), + #[error("{0} is an out of range day of week")] + InvalidDayOfWeek(u8), +} + +impl From for Error { + fn from(adapter: ParseErrorAdapter) -> Self { + adapter.0 + } +} + +/// `ParseErrorAdapter` is used to produce [`Error`]s from [`nom::Error`]s. +/// this uses the newtype idiom like this specifically so that the +/// implementation isn't visible externally, without having to duplicate +/// the enum variants on an internal type. +struct ParseErrorAdapter(Error); + +impl From> for ParseErrorAdapter { + /// When given a nom error that otherwise couldn't be converted to + /// a parse error, we will get as much of the current specifier as possible + /// and declare it an invalid specifier. + fn from(err: nom::error::Error<&str>) -> Self { + // If the failed input part is empty, the only thing we can really assume is that there wasn't + // enough data, as the parser can't really consume nothing + if err.input.is_empty() { + return Self(Error::NotEnoughData); + } + + let specifier = err + .input + .chars() + .take_while(|&c| c != ' ') + .collect::(); + + Self(Error::InvalidSpecifier(specifier)) + } +} + +impl From>> for ParseErrorAdapter { + fn from(err: nom::Err>) -> Self { + match err { + nom::Err::Incomplete(_) => { + panic!("streaming parser was used, but not expected to be. This is a bug.") + } + nom::Err::Error(underlying) | nom::Err::Failure(underlying) => underlying.into(), + } + } +} + +/// `parse_entry` parses a crontab string into a `CronEntry`. These are of the +/// form of five strings matching the regular expression`(\d+|\*)`, separated by spaces +pub(crate) fn parse_entry(entry: &str) -> Result { + let res: IResult<&str, CronEntry> = perform_entry_parse(entry); + match res { + Ok((remaining, result)) => { + ensure_all_data_parsed(remaining, &result)?; + ensure_ranges_valid(&result)?; + + Ok(result) + } + Err(err) => { + let err_adapter: ParseErrorAdapter = err.into(); + Err(err_adapter.into()) + } + } +} + +fn ensure_all_data_parsed(remaining: &str, parsed_entry: &CronEntry) -> Result<(), Error> { + if remaining.is_empty() { + Ok(()) + } else { + Err(Error::ExtraData(remaining.to_string())) + } +} + +fn ensure_ranges_valid(parsed_entry: &CronEntry) -> Result<(), Error> { + let check_range = |specifier, min, max, error_variant: fn(u8) -> Error| match specifier { + CronSpecifier::Any => Ok(()), + CronSpecifier::Specifically(value) => { + if value >= min && value <= max { + Ok(()) + } else { + Err(error_variant(value)) + } + } + }; + + check_range(parsed_entry.minute, 0, 59, Error::InvalidMinute)?; + check_range(parsed_entry.hour, 0, 23, Error::InvalidHour)?; + check_range(parsed_entry.day_of_month, 1, 31, Error::InvalidDayOfMonth)?; + check_range(parsed_entry.month, 1, 12, Error::InvalidMonth)?; + check_range(parsed_entry.day_of_week, 0, 6, Error::InvalidDayOfWeek)?; + + Ok(()) +} + +fn perform_entry_parse(entry: &str) -> IResult<&str, CronEntry> { + let (remaining, specifiers) = separated_five_tuple(char(' '), parse_specifier)(entry)?; + let parsed_entry = CronEntry { + minute: specifiers.0, + hour: specifiers.1, + day_of_month: specifiers.2, + month: specifiers.3, + day_of_week: specifiers.4, + }; + + Ok((remaining, parsed_entry)) +} + +fn parse_specifier(chunk: &str) -> IResult<&str, CronSpecifier> { + alt((parse_star_specifier, parse_numeric_specifier))(chunk) +} + +fn parse_star_specifier(chunk: &str) -> IResult<&str, CronSpecifier> { + let (remaining, _) = char('*')(chunk)?; + + Ok((remaining, CronSpecifier::Any)) +} + +fn parse_numeric_specifier(chunk: &str) -> IResult<&str, CronSpecifier> { + map_res::<_, _, _, _, ::Err, _, _>(digit1, |raw_num: &str| { + let num = raw_num.parse()?; + Ok(CronSpecifier::Specifically(num)) + })(chunk) +} + +/// Parse five elements separated by some kind of constant specifier. +/// This is similar to [`nom::sequence::separated_pair`], but with five elements, specifically +fn separated_five_tuple( + mut sep: S, + mut parser: P, +) -> impl FnMut(I) -> IResult +where + E: nom::error::ParseError, + S: nom::Parser, + P: nom::Parser, +{ + const NUM_ELEMENTS: usize = 5; + + move |data: I| { + // TODO: This heap allocation is a bit overkill, and this could likely be + // done with some unsafe array initialization, but I wasn't sure how to ensure + // that I handled the dropping properly, so I shiyed waay from it. + let mut parser_outputs = Vec::with_capacity(NUM_ELEMENTS); + let mut remaining = data; + for i in 0..NUM_ELEMENTS { + let (remaining_from_parser, parsed) = parser.parse(remaining)?; + remaining = remaining_from_parser; + parser_outputs.push(parsed); + + if i != NUM_ELEMENTS - 1 { + let (remaining_from_sep, _) = sep.parse(remaining)?; + remaining = remaining_from_sep; + } + } + + assert_eq!(NUM_ELEMENTS, parser_outputs.len()); + let res_tuple = parser_outputs + .into_iter() + .collect_tuple::<(OP, OP, OP, OP, OP)>() + .expect("not enough elements to pack into tuple"); + + Ok((remaining, res_tuple)) + } +}