Add first pass at parser from parsing chapter

master
Nick Krichevsky 2024-05-15 15:23:20 -04:00
parent 70625afcaf
commit 3100b77003
3 changed files with 246 additions and 50 deletions

View File

@ -3,9 +3,9 @@ use std::iter::{self, Peekable};
use thiserror::Error; use thiserror::Error;
use crate::{ScriptError, ScriptErrors}; use crate::ScriptError;
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq)]
pub enum TokenKind { pub enum TokenKind {
// Punctuation // Punctuation
LeftParen, LeftParen,
@ -93,6 +93,10 @@ impl Token {
&self.kind &self.kind
} }
pub fn into_kind(self) -> TokenKind {
self.kind
}
pub fn lexeme(&self) -> &str { pub fn lexeme(&self) -> &str {
&self.lexeme &self.lexeme
} }

View File

@ -1,9 +1,11 @@
#![warn(clippy::pedantic)] #![warn(clippy::pedantic)]
use crate::ast::{Expr, Visitor};
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
mod ast; mod ast;
mod lex; mod lex;
mod parse;
#[derive(thiserror::Error, Debug, Clone)] #[derive(thiserror::Error, Debug, Clone)]
#[error("[line {line}] Error {location}: {message}")] #[error("[line {line}] Error {location}: {message}")]
@ -36,18 +38,63 @@ pub fn run(script: &str) -> Result<(), ScriptErrors> {
let mut errors = Vec::new(); let mut errors = Vec::new();
let tokens = lex::scan_source(script, |err| errors.push(err)); let tokens = lex::scan_source(script, |err| errors.push(err));
if !errors.is_empty() { if !errors.is_empty() {
dbg!(tokens);
return Err(errors.into()); return Err(errors.into());
} }
for token in &tokens { let parsed = parse::parse_expression(tokens.into_iter(), |err| errors.push(err));
print!("{}", token.lexeme()); if !errors.is_empty() {
return Err(errors.into());
} }
println!();
println!("{}", ASTPrinter.visit_expr(&parsed));
Ok(()) Ok(())
} }
struct ASTPrinter;
impl ASTPrinter {
// meh
#[allow(clippy::format_collect)]
fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String {
format!(
"({name}{})",
exprs
.iter()
.map(|expr| format!(" {}", self.visit_expr(expr)))
.collect::<String>()
)
}
}
impl Visitor<String> for ASTPrinter {
fn visit_binary(
&mut self,
left: &ast::Expr,
operator: &lex::Token,
right: &ast::Expr,
) -> String {
self.parenthesize(operator.lexeme(), &[left, right])
}
fn visit_grouping(&mut self, expr: &Expr) -> String {
self.parenthesize("group", &[expr])
}
fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String {
self.parenthesize(operator.lexeme(), &[expr])
}
fn visit_literal(&mut self, value: &ast::LiteralValue) -> String {
match value {
ast::LiteralValue::Nil => "nil".to_string(),
ast::LiteralValue::False => "false".to_string(),
ast::LiteralValue::True => "true".to_string(),
ast::LiteralValue::Number(n) => n.to_string(),
ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()),
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use self::{ use self::{
@ -57,50 +104,6 @@ mod tests {
use super::*; use super::*;
struct ASTPrinter;
impl ASTPrinter {
// meh
#[allow(clippy::format_collect)]
fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String {
format!(
"({name}{})",
exprs
.iter()
.map(|expr| format!(" {}", self.visit_expr(expr)))
.collect::<String>()
)
}
}
impl Visitor<String> for ASTPrinter {
fn visit_binary(
&mut self,
left: &ast::Expr,
operator: &lex::Token,
right: &ast::Expr,
) -> String {
self.parenthesize(operator.lexeme(), &[left, right])
}
fn visit_grouping(&mut self, expr: &Expr) -> String {
self.parenthesize("group", &[expr])
}
fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String {
self.parenthesize(operator.lexeme(), &[expr])
}
fn visit_literal(&mut self, value: &ast::LiteralValue) -> String {
match value {
ast::LiteralValue::Nil => "nil".to_string(),
ast::LiteralValue::False => "false".to_string(),
ast::LiteralValue::True => "true".to_string(),
ast::LiteralValue::Number(n) => n.to_string(),
ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()),
}
}
}
#[test] #[test]
fn test_simple_add() { fn test_simple_add() {
let result = ASTPrinter.visit_expr(&Expr::Binary { let result = ASTPrinter.visit_expr(&Expr::Binary {

189
src/parse.rs Normal file
View File

@ -0,0 +1,189 @@
use std::iter::Peekable;
use crate::{
ast::{Expr, LiteralValue},
lex::{Token, TokenKind},
ScriptError,
};
#[derive(thiserror::Error, Debug, Clone)]
#[error("{message}")]
struct ParseError {
message: String,
line: Option<usize>,
}
pub fn parse_expression<I: Iterator<Item = Token>, F: FnMut(ScriptError)>(
iter: I,
mut on_error: F,
) -> Expr {
let parse_res = parse_equality(&mut iter.peekable());
match parse_res {
Ok(expr) => expr,
Err(error) => {
on_error(ScriptError {
message: error.message,
// TODO: This sucks and we should make ScriptError handle optional line numbers somehow
line: error.line.unwrap_or_default(),
location: String::new(),
});
todo!()
}
}
}
// To be used later?
fn synchronize_to_next_statement<I: Iterator<Item = Token>>(iter: &mut I) {
for item in iter {
if item.kind() == &TokenKind::SemiColon {
return;
}
}
}
fn parse_expression_<I: Iterator<Item = Token>>(
iter: &mut Peekable<I>,
) -> Result<Expr, ParseError> {
parse_equality(iter)
}
fn parse_equality<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
parse_binary(
iter,
&[TokenKind::EqualEqual, TokenKind::BangEqual],
parse_comparison,
)
}
fn parse_comparison<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
parse_binary(
iter,
&[
TokenKind::Greater,
TokenKind::GreaterEqual,
TokenKind::Less,
TokenKind::LessEqual,
],
parse_term,
)
}
fn parse_term<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
parse_binary(iter, &[TokenKind::Minus, TokenKind::Plus], parse_factor)
}
fn parse_factor<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
parse_binary(iter, &[TokenKind::Star, TokenKind::Slash], parse_unary)
}
fn parse_binary<I: Iterator<Item = Token>, F: Fn(&mut Peekable<I>) -> Result<Expr, ParseError>>(
iter: &mut Peekable<I>,
valid_operators: &[TokenKind],
parse_operand: F,
) -> Result<Expr, ParseError> {
let mut expr = parse_operand(iter)?;
while let Some(token) = match_token(iter, valid_operators) {
let operator = token;
let right = parse_operand(iter)?;
expr = Expr::Binary {
left: Box::new(expr),
operator,
right: Box::new(right),
}
}
Ok(expr)
}
fn parse_unary<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
if let Some(token) = match_token(iter, &[TokenKind::Bang, TokenKind::Minus]) {
let operator = token;
let right = parse_unary(iter)?;
Ok(Expr::Unary {
expr: Box::new(right),
operator,
})
} else {
parse_primary(iter)
}
}
fn parse_primary<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
let Some(token) = iter.next() else {
return Err(ParseError {
message: "Unexpected end of input".to_string(),
line: None,
});
};
match token.kind() {
TokenKind::False => Ok(Expr::Literal {
value: LiteralValue::False,
}),
TokenKind::True => Ok(Expr::Literal {
value: LiteralValue::True,
}),
TokenKind::Nil => Ok(Expr::Literal {
value: LiteralValue::Nil,
}),
TokenKind::Number(number) => Ok(Expr::Literal {
value: LiteralValue::Number(*number),
}),
TokenKind::String(_) => {
// special case to avoid cloning
let next_token = iter.next().unwrap();
match next_token.into_kind() {
TokenKind::String(string) => Ok(Expr::Literal {
value: LiteralValue::String(string),
}),
_ => unreachable!(),
}
}
TokenKind::LeftParen => {
// special case so we can check for right paren
// consume the left paren
iter.next();
let expr = parse_expression_(iter)?;
if match_token(iter, &[TokenKind::RightParen]).is_some() {
Ok(Expr::Grouping {
expr: Box::new(expr),
})
} else {
Err(ParseError {
message: "Missing right parenthesis".to_string(),
line: Some(token.line()),
})
}
}
_ => Err(ParseError {
message: "Expected an expression".to_string(),
line: Some(token.line()),
}),
}
}
fn match_token<I: Iterator<Item = Token>>(
iter: &mut Peekable<I>,
valid_kinds: &[TokenKind],
) -> Option<Token> {
valid_kinds
.iter()
.find_map(|kind| match_next_token(iter, kind))
}
fn match_next_token<I: Iterator<Item = Token>>(
tokens: &mut Peekable<I>,
kind: &TokenKind,
) -> Option<Token> {
match tokens.peek() {
Some(peeked) if peeked.kind() == kind => {
// Force the iterator to iterate, we know there will be a value
// and we move out of the iterator to return it
let result = tokens.next().unwrap();
Some(result)
}
None | Some(_) => None,
}
}