Add first pass at parser from parsing chapter
parent
70625afcaf
commit
3100b77003
|
@ -3,9 +3,9 @@ use std::iter::{self, Peekable};
|
|||
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{ScriptError, ScriptErrors};
|
||||
use crate::ScriptError;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TokenKind {
|
||||
// Punctuation
|
||||
LeftParen,
|
||||
|
@ -93,6 +93,10 @@ impl Token {
|
|||
&self.kind
|
||||
}
|
||||
|
||||
pub fn into_kind(self) -> TokenKind {
|
||||
self.kind
|
||||
}
|
||||
|
||||
pub fn lexeme(&self) -> &str {
|
||||
&self.lexeme
|
||||
}
|
||||
|
|
99
src/lib.rs
99
src/lib.rs
|
@ -1,9 +1,11 @@
|
|||
#![warn(clippy::pedantic)]
|
||||
|
||||
use crate::ast::{Expr, Visitor};
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
||||
mod ast;
|
||||
mod lex;
|
||||
mod parse;
|
||||
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
#[error("[line {line}] Error {location}: {message}")]
|
||||
|
@ -36,18 +38,63 @@ pub fn run(script: &str) -> Result<(), ScriptErrors> {
|
|||
let mut errors = Vec::new();
|
||||
let tokens = lex::scan_source(script, |err| errors.push(err));
|
||||
if !errors.is_empty() {
|
||||
dbg!(tokens);
|
||||
return Err(errors.into());
|
||||
}
|
||||
|
||||
for token in &tokens {
|
||||
print!("{}", token.lexeme());
|
||||
let parsed = parse::parse_expression(tokens.into_iter(), |err| errors.push(err));
|
||||
if !errors.is_empty() {
|
||||
return Err(errors.into());
|
||||
}
|
||||
println!();
|
||||
|
||||
println!("{}", ASTPrinter.visit_expr(&parsed));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct ASTPrinter;
|
||||
impl ASTPrinter {
|
||||
// meh
|
||||
#[allow(clippy::format_collect)]
|
||||
fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String {
|
||||
format!(
|
||||
"({name}{})",
|
||||
exprs
|
||||
.iter()
|
||||
.map(|expr| format!(" {}", self.visit_expr(expr)))
|
||||
.collect::<String>()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Visitor<String> for ASTPrinter {
|
||||
fn visit_binary(
|
||||
&mut self,
|
||||
left: &ast::Expr,
|
||||
operator: &lex::Token,
|
||||
right: &ast::Expr,
|
||||
) -> String {
|
||||
self.parenthesize(operator.lexeme(), &[left, right])
|
||||
}
|
||||
|
||||
fn visit_grouping(&mut self, expr: &Expr) -> String {
|
||||
self.parenthesize("group", &[expr])
|
||||
}
|
||||
|
||||
fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String {
|
||||
self.parenthesize(operator.lexeme(), &[expr])
|
||||
}
|
||||
|
||||
fn visit_literal(&mut self, value: &ast::LiteralValue) -> String {
|
||||
match value {
|
||||
ast::LiteralValue::Nil => "nil".to_string(),
|
||||
ast::LiteralValue::False => "false".to_string(),
|
||||
ast::LiteralValue::True => "true".to_string(),
|
||||
ast::LiteralValue::Number(n) => n.to_string(),
|
||||
ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use self::{
|
||||
|
@ -57,50 +104,6 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
|
||||
struct ASTPrinter;
|
||||
impl ASTPrinter {
|
||||
// meh
|
||||
#[allow(clippy::format_collect)]
|
||||
fn parenthesize(&mut self, name: &str, exprs: &[&Expr]) -> String {
|
||||
format!(
|
||||
"({name}{})",
|
||||
exprs
|
||||
.iter()
|
||||
.map(|expr| format!(" {}", self.visit_expr(expr)))
|
||||
.collect::<String>()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Visitor<String> for ASTPrinter {
|
||||
fn visit_binary(
|
||||
&mut self,
|
||||
left: &ast::Expr,
|
||||
operator: &lex::Token,
|
||||
right: &ast::Expr,
|
||||
) -> String {
|
||||
self.parenthesize(operator.lexeme(), &[left, right])
|
||||
}
|
||||
|
||||
fn visit_grouping(&mut self, expr: &Expr) -> String {
|
||||
self.parenthesize("group", &[expr])
|
||||
}
|
||||
|
||||
fn visit_unary(&mut self, expr: &Expr, operator: &lex::Token) -> String {
|
||||
self.parenthesize(operator.lexeme(), &[expr])
|
||||
}
|
||||
|
||||
fn visit_literal(&mut self, value: &ast::LiteralValue) -> String {
|
||||
match value {
|
||||
ast::LiteralValue::Nil => "nil".to_string(),
|
||||
ast::LiteralValue::False => "false".to_string(),
|
||||
ast::LiteralValue::True => "true".to_string(),
|
||||
ast::LiteralValue::Number(n) => n.to_string(),
|
||||
ast::LiteralValue::String(s) => format!("\"{}\"", s.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_simple_add() {
|
||||
let result = ASTPrinter.visit_expr(&Expr::Binary {
|
||||
|
|
|
@ -0,0 +1,189 @@
|
|||
use std::iter::Peekable;
|
||||
|
||||
use crate::{
|
||||
ast::{Expr, LiteralValue},
|
||||
lex::{Token, TokenKind},
|
||||
ScriptError,
|
||||
};
|
||||
|
||||
#[derive(thiserror::Error, Debug, Clone)]
|
||||
#[error("{message}")]
|
||||
struct ParseError {
|
||||
message: String,
|
||||
line: Option<usize>,
|
||||
}
|
||||
|
||||
pub fn parse_expression<I: Iterator<Item = Token>, F: FnMut(ScriptError)>(
|
||||
iter: I,
|
||||
mut on_error: F,
|
||||
) -> Expr {
|
||||
let parse_res = parse_equality(&mut iter.peekable());
|
||||
match parse_res {
|
||||
Ok(expr) => expr,
|
||||
Err(error) => {
|
||||
on_error(ScriptError {
|
||||
message: error.message,
|
||||
// TODO: This sucks and we should make ScriptError handle optional line numbers somehow
|
||||
line: error.line.unwrap_or_default(),
|
||||
location: String::new(),
|
||||
});
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// To be used later?
|
||||
fn synchronize_to_next_statement<I: Iterator<Item = Token>>(iter: &mut I) {
|
||||
for item in iter {
|
||||
if item.kind() == &TokenKind::SemiColon {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_expression_<I: Iterator<Item = Token>>(
|
||||
iter: &mut Peekable<I>,
|
||||
) -> Result<Expr, ParseError> {
|
||||
parse_equality(iter)
|
||||
}
|
||||
|
||||
fn parse_equality<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
parse_binary(
|
||||
iter,
|
||||
&[TokenKind::EqualEqual, TokenKind::BangEqual],
|
||||
parse_comparison,
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_comparison<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
parse_binary(
|
||||
iter,
|
||||
&[
|
||||
TokenKind::Greater,
|
||||
TokenKind::GreaterEqual,
|
||||
TokenKind::Less,
|
||||
TokenKind::LessEqual,
|
||||
],
|
||||
parse_term,
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_term<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
parse_binary(iter, &[TokenKind::Minus, TokenKind::Plus], parse_factor)
|
||||
}
|
||||
|
||||
fn parse_factor<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
parse_binary(iter, &[TokenKind::Star, TokenKind::Slash], parse_unary)
|
||||
}
|
||||
|
||||
fn parse_binary<I: Iterator<Item = Token>, F: Fn(&mut Peekable<I>) -> Result<Expr, ParseError>>(
|
||||
iter: &mut Peekable<I>,
|
||||
valid_operators: &[TokenKind],
|
||||
parse_operand: F,
|
||||
) -> Result<Expr, ParseError> {
|
||||
let mut expr = parse_operand(iter)?;
|
||||
|
||||
while let Some(token) = match_token(iter, valid_operators) {
|
||||
let operator = token;
|
||||
let right = parse_operand(iter)?;
|
||||
expr = Expr::Binary {
|
||||
left: Box::new(expr),
|
||||
operator,
|
||||
right: Box::new(right),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
fn parse_unary<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
if let Some(token) = match_token(iter, &[TokenKind::Bang, TokenKind::Minus]) {
|
||||
let operator = token;
|
||||
let right = parse_unary(iter)?;
|
||||
Ok(Expr::Unary {
|
||||
expr: Box::new(right),
|
||||
operator,
|
||||
})
|
||||
} else {
|
||||
parse_primary(iter)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_primary<I: Iterator<Item = Token>>(iter: &mut Peekable<I>) -> Result<Expr, ParseError> {
|
||||
let Some(token) = iter.next() else {
|
||||
return Err(ParseError {
|
||||
message: "Unexpected end of input".to_string(),
|
||||
line: None,
|
||||
});
|
||||
};
|
||||
|
||||
match token.kind() {
|
||||
TokenKind::False => Ok(Expr::Literal {
|
||||
value: LiteralValue::False,
|
||||
}),
|
||||
TokenKind::True => Ok(Expr::Literal {
|
||||
value: LiteralValue::True,
|
||||
}),
|
||||
TokenKind::Nil => Ok(Expr::Literal {
|
||||
value: LiteralValue::Nil,
|
||||
}),
|
||||
TokenKind::Number(number) => Ok(Expr::Literal {
|
||||
value: LiteralValue::Number(*number),
|
||||
}),
|
||||
TokenKind::String(_) => {
|
||||
// special case to avoid cloning
|
||||
let next_token = iter.next().unwrap();
|
||||
match next_token.into_kind() {
|
||||
TokenKind::String(string) => Ok(Expr::Literal {
|
||||
value: LiteralValue::String(string),
|
||||
}),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
TokenKind::LeftParen => {
|
||||
// special case so we can check for right paren
|
||||
// consume the left paren
|
||||
iter.next();
|
||||
let expr = parse_expression_(iter)?;
|
||||
if match_token(iter, &[TokenKind::RightParen]).is_some() {
|
||||
Ok(Expr::Grouping {
|
||||
expr: Box::new(expr),
|
||||
})
|
||||
} else {
|
||||
Err(ParseError {
|
||||
message: "Missing right parenthesis".to_string(),
|
||||
line: Some(token.line()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
_ => Err(ParseError {
|
||||
message: "Expected an expression".to_string(),
|
||||
line: Some(token.line()),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn match_token<I: Iterator<Item = Token>>(
|
||||
iter: &mut Peekable<I>,
|
||||
valid_kinds: &[TokenKind],
|
||||
) -> Option<Token> {
|
||||
valid_kinds
|
||||
.iter()
|
||||
.find_map(|kind| match_next_token(iter, kind))
|
||||
}
|
||||
|
||||
fn match_next_token<I: Iterator<Item = Token>>(
|
||||
tokens: &mut Peekable<I>,
|
||||
kind: &TokenKind,
|
||||
) -> Option<Token> {
|
||||
match tokens.peek() {
|
||||
Some(peeked) if peeked.kind() == kind => {
|
||||
// Force the iterator to iterate, we know there will be a value
|
||||
// and we move out of the iterator to return it
|
||||
let result = tokens.next().unwrap();
|
||||
Some(result)
|
||||
}
|
||||
None | Some(_) => None,
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue