From 3100b770037b6f63c519e8556cbbb10409983a4f Mon Sep 17 00:00:00 2001 From: Nick Krichevsky Date: Wed, 15 May 2024 15:23:20 -0400 Subject: [PATCH] Add first pass at parser from parsing chapter --- src/lex.rs | 8 ++- src/lib.rs | 99 ++++++++++++++------------- src/parse.rs | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 246 insertions(+), 50 deletions(-) create mode 100644 src/parse.rs diff --git a/src/lex.rs b/src/lex.rs index fb7f4e9..8c15afd 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -3,9 +3,9 @@ use std::iter::{self, Peekable}; use thiserror::Error; -use crate::{ScriptError, ScriptErrors}; +use crate::ScriptError; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum TokenKind { // Punctuation LeftParen, @@ -93,6 +93,10 @@ impl Token { &self.kind } + pub fn into_kind(self) -> TokenKind { + self.kind + } + pub fn lexeme(&self) -> &str { &self.lexeme } diff --git a/src/lib.rs b/src/lib.rs index b44183c..f99d0f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,11 @@ #![warn(clippy::pedantic)] +use crate::ast::{Expr, Visitor}; use std::fmt::{self, Display, Formatter}; mod ast; mod lex; +mod parse; #[derive(thiserror::Error, Debug, Clone)] #[error("[line {line}] Error {location}: {message}")] @@ -36,18 +38,63 @@ pub fn run(script: &str) -> Result<(), ScriptErrors> { let mut errors = Vec::new(); let tokens = lex::scan_source(script, |err| errors.push(err)); if !errors.is_empty() { - dbg!(tokens); return Err(errors.into()); } - for token in &tokens { - print!("{}", token.lexeme()); + let parsed = parse::parse_expression(tokens.into_iter(), |err| errors.push(err)); + if !errors.is_empty() { + return Err(errors.into()); } - println!(); + + println!("{}", ASTPrinter.visit_expr(&parsed)); Ok(()) } +struct ASTPrinter; +impl ASTPrinter { + // meh + #[allow(clippy::format_collect)] + fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String { + format!( + "({name}{})", + exprs + .iter() + .map(|expr| format!(" {}", self.visit_expr(expr))) + .collect::() + ) + } +} + +impl Visitor for ASTPrinter { + fn visit_binary( + &mut self, + left: &ast::Expr, + operator: &lex::Token, + right: &ast::Expr, + ) -> String { + self.parenthesize(operator.lexeme(), &[left, right]) + } + + fn visit_grouping(&mut self, expr: &Expr) -> String { + self.parenthesize("group", &[expr]) + } + + fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String { + self.parenthesize(operator.lexeme(), &[expr]) + } + + fn visit_literal(&mut self, value: &ast::LiteralValue) -> String { + match value { + ast::LiteralValue::Nil => "nil".to_string(), + ast::LiteralValue::False => "false".to_string(), + ast::LiteralValue::True => "true".to_string(), + ast::LiteralValue::Number(n) => n.to_string(), + ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()), + } + } +} + #[cfg(test)] mod tests { use self::{ @@ -57,50 +104,6 @@ mod tests { use super::*; - struct ASTPrinter; - impl ASTPrinter { - // meh - #[allow(clippy::format_collect)] - fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String { - format!( - "({name}{})", - exprs - .iter() - .map(|expr| format!(" {}", self.visit_expr(expr))) - .collect::() - ) - } - } - - impl Visitor for ASTPrinter { - fn visit_binary( - &mut self, - left: &ast::Expr, - operator: &lex::Token, - right: &ast::Expr, - ) -> String { - self.parenthesize(operator.lexeme(), &[left, right]) - } - - fn visit_grouping(&mut self, expr: &Expr) -> String { - self.parenthesize("group", &[expr]) - } - - fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String { - self.parenthesize(operator.lexeme(), &[expr]) - } - - fn visit_literal(&mut self, value: &ast::LiteralValue) -> String { - match value { - ast::LiteralValue::Nil => "nil".to_string(), - ast::LiteralValue::False => "false".to_string(), - ast::LiteralValue::True => "true".to_string(), - ast::LiteralValue::Number(n) => n.to_string(), - ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()), - } - } - } - #[test] fn test_simple_add() { let result = ASTPrinter.visit_expr(&Expr::Binary { diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..680dd56 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,189 @@ +use std::iter::Peekable; + +use crate::{ + ast::{Expr, LiteralValue}, + lex::{Token, TokenKind}, + ScriptError, +}; + +#[derive(thiserror::Error, Debug, Clone)] +#[error("{message}")] +struct ParseError { + message: String, + line: Option, +} + +pub fn parse_expression, F: FnMut(ScriptError)>( + iter: I, + mut on_error: F, +) -> Expr { + let parse_res = parse_equality(&mut iter.peekable()); + match parse_res { + Ok(expr) => expr, + Err(error) => { + on_error(ScriptError { + message: error.message, + // TODO: This sucks and we should make ScriptError handle optional line numbers somehow + line: error.line.unwrap_or_default(), + location: String::new(), + }); + todo!() + } + } +} + +// To be used later? +fn synchronize_to_next_statement>(iter: &mut I) { + for item in iter { + if item.kind() == &TokenKind::SemiColon { + return; + } + } +} + +fn parse_expression_>( + iter: &mut Peekable, +) -> Result { + parse_equality(iter) +} + +fn parse_equality>(iter: &mut Peekable) -> Result { + parse_binary( + iter, + &[TokenKind::EqualEqual, TokenKind::BangEqual], + parse_comparison, + ) +} + +fn parse_comparison>(iter: &mut Peekable) -> Result { + parse_binary( + iter, + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + parse_term, + ) +} + +fn parse_term>(iter: &mut Peekable) -> Result { + parse_binary(iter, &[TokenKind::Minus, TokenKind::Plus], parse_factor) +} + +fn parse_factor>(iter: &mut Peekable) -> Result { + parse_binary(iter, &[TokenKind::Star, TokenKind::Slash], parse_unary) +} + +fn parse_binary, F: Fn(&mut Peekable) -> Result>( + iter: &mut Peekable, + valid_operators: &[TokenKind], + parse_operand: F, +) -> Result { + let mut expr = parse_operand(iter)?; + + while let Some(token) = match_token(iter, valid_operators) { + let operator = token; + let right = parse_operand(iter)?; + expr = Expr::Binary { + left: Box::new(expr), + operator, + right: Box::new(right), + } + } + + Ok(expr) +} + +fn parse_unary>(iter: &mut Peekable) -> Result { + if let Some(token) = match_token(iter, &[TokenKind::Bang, TokenKind::Minus]) { + let operator = token; + let right = parse_unary(iter)?; + Ok(Expr::Unary { + expr: Box::new(right), + operator, + }) + } else { + parse_primary(iter) + } +} + +fn parse_primary>(iter: &mut Peekable) -> Result { + let Some(token) = iter.next() else { + return Err(ParseError { + message: "Unexpected end of input".to_string(), + line: None, + }); + }; + + match token.kind() { + TokenKind::False => Ok(Expr::Literal { + value: LiteralValue::False, + }), + TokenKind::True => Ok(Expr::Literal { + value: LiteralValue::True, + }), + TokenKind::Nil => Ok(Expr::Literal { + value: LiteralValue::Nil, + }), + TokenKind::Number(number) => Ok(Expr::Literal { + value: LiteralValue::Number(*number), + }), + TokenKind::String(_) => { + // special case to avoid cloning + let next_token = iter.next().unwrap(); + match next_token.into_kind() { + TokenKind::String(string) => Ok(Expr::Literal { + value: LiteralValue::String(string), + }), + _ => unreachable!(), + } + } + TokenKind::LeftParen => { + // special case so we can check for right paren + // consume the left paren + iter.next(); + let expr = parse_expression_(iter)?; + if match_token(iter, &[TokenKind::RightParen]).is_some() { + Ok(Expr::Grouping { + expr: Box::new(expr), + }) + } else { + Err(ParseError { + message: "Missing right parenthesis".to_string(), + line: Some(token.line()), + }) + } + } + + _ => Err(ParseError { + message: "Expected an expression".to_string(), + line: Some(token.line()), + }), + } +} + +fn match_token>( + iter: &mut Peekable, + valid_kinds: &[TokenKind], +) -> Option { + valid_kinds + .iter() + .find_map(|kind| match_next_token(iter, kind)) +} + +fn match_next_token>( + tokens: &mut Peekable, + kind: &TokenKind, +) -> Option { + match tokens.peek() { + Some(peeked) if peeked.kind() == kind => { + // Force the iterator to iterate, we know there will be a value + // and we move out of the iterator to return it + let result = tokens.next().unwrap(); + Some(result) + } + None | Some(_) => None, + } +}