way better parser errors

This commit is contained in:
brevalferrari 2025-06-07 02:18:46 +02:00
parent f5448e0e78
commit 8ce0676e7d
2 changed files with 283 additions and 117 deletions

View file

@ -1,6 +1,6 @@
mod cli;
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
fs::File,
io::{Cursor, Write, read_to_string, stdout},
iter::once,
@ -9,7 +9,7 @@ use std::{
use anyhow::{Context as _, anyhow};
use bliplib::{
compiler::{Compiler, Context, SAMPLE_RATE, VariableChange},
parser::Parser,
parser::{LocatedVerboseError, Parser},
};
use clap::Parser as _;
use cli::Cli;
@ -102,10 +102,12 @@ fn parse_and_compile(opts: &PlayOpts) -> anyhow::Result<Vec<f64>> {
opts.slopes()
.map(|(s, (v, e))| (s, VariableChange(*v, e.clone())))
.collect::<Vec<_>>(),
HashMap::from(default_variables)
default_variables
.iter()
.chain(opts.variables())
.map(|(v, _)| *v)
.chain(opts.variables().map(|(v, _)| *v))
.collect::<HashSet<_>>()
.into_iter()
.collect::<Vec<_>>(),
);
info!("reading input");
@ -113,18 +115,31 @@ fn parse_and_compile(opts: &PlayOpts) -> anyhow::Result<Vec<f64>> {
info!("parsing tokens");
let tokens = parser
.parse_all(&input)
.map_err(|nom::error::Error { input, code }| {
anyhow!(
"{code:?} line {line} column {column} (at \"{at}\")",
code = code,
line = input.location_line(),
column = input.get_utf8_column(),
at = input
.chars()
.chain("...".chars())
.take(10)
.collect::<String>()
)
.map_err(|e| match e {
nom::Err::Incomplete(n) => {
anyhow!("nom parsers said the input was incomplete and needed {n:?} bytes")
}
nom::Err::Error(LocatedVerboseError { location, error })
| nom::Err::Failure(LocatedVerboseError { location, error }) => error
.unwrap_or(anyhow!("input did not match any known grammar (typo?)"))
.context(format!(
"line {line} column {column} (at \"{at}\")",
line = location.location_line(),
column = location.get_utf8_column(),
at = {
if location.len() > 10 {
location
.chars()
.take(10)
.chain("...".chars())
.collect::<String>()
} else if location.is_empty() {
String::from("EOF")
} else {
location.to_string()
}
}
)),
})
.context("Failed to parse input")?;
info!("found {} tokens", tokens.as_ref().len());
@ -138,9 +153,10 @@ fn parse_and_compile(opts: &PlayOpts) -> anyhow::Result<Vec<f64>> {
'L'.to_string(),
'n'.to_string(),
default_variables
.map(|(c, v)| (c.to_string(), v))
.into_iter()
.chain(opts.variables().map(|(a, b)| (a.to_string(), *b))),
.chain(opts.variables().map(|(a, b)| (*a, *b)))
.map(|(c, v)| (c.to_string(), v))
.collect::<HashMap<_, _>>(),
opts.instrument().clone(),
opts.slopes()
.map(|(_, (a, b))| (a.to_string(), b.clone()))

View file

@ -1,20 +1,19 @@
use std::{any::type_name, borrow::Borrow, collections::BTreeMap, marker::PhantomData};
use std::{
any::type_name,
borrow::{Borrow, Cow},
collections::BTreeMap,
marker::PhantomData,
};
use anyhow::anyhow;
use derive_new::new;
use fasteval::Evaler;
use log::{debug, error, trace};
use log::{debug, trace, warn};
use nom::{
AsChar, Compare, Finish, IResult, Input, Parser as NomParser,
branch::alt,
bytes::complete::{tag, take, take_till},
character::{
branch::alt, bytes::complete::{tag, take, take_till}, character::{
complete::{char, space1, usize},
streaming::one_of,
},
combinator::{all_consuming, cut, opt, value},
error::{Error, ParseError},
multi::many0,
sequence::{delimited, preceded},
}, combinator::{all_consuming, cut, opt, value}, error::{ErrorKind, FromExternalError, ParseError}, multi::many0, sequence::{delimited, preceded}, AsChar, Compare, Input, Parser as _
};
use nom_locate::LocatedSpan;
@ -23,6 +22,82 @@ use crate::compiler::{
VariableChange,
};
pub type IResult<I, O, E = LocatedVerboseError<I>> = nom::IResult<I, O, E>;
#[derive(new)]
pub struct VerboseParser<P: nom::Parser<I, Error = LocatedVerboseError<I>>, I> {
parser: P,
context: Cow<'static, str>,
#[new(default)]
phantom: PhantomData<I>,
}
impl<I> ParseError<I> for LocatedVerboseError<I> {
fn from_error_kind(input: I, _kind: ErrorKind) -> Self {
Self {
location: input,
error: None,
}
}
fn append(_input: I, _kind: ErrorKind, other: Self) -> Self {
other
}
}
impl<I, P: nom::Parser<I, Error = LocatedVerboseError<I>>> nom::Parser<I> for VerboseParser<P, I> {
type Output = P::Output;
type Error = LocatedVerboseError<I>;
fn process<OM: nom::OutputMode>(
&mut self,
input: I,
) -> nom::PResult<OM, I, Self::Output, Self::Error> {
use nom::Err::*;
use nom::Mode;
let stack_verbose_error = |e: LocatedVerboseError<I>| -> LocatedVerboseError<I> {
LocatedVerboseError {
error: Some(if let Some(cause) = e.error {
cause.context(self.context.clone())
} else {
anyhow::Error::msg(self.context.clone())
}),
..e
}
};
match self.parser.process::<OM>(input) {
Ok(o) => Ok(o),
Err(Error(e)) => Err(Error(OM::Error::map(e, stack_verbose_error))),
Err(Failure(e)) => Err(Failure(stack_verbose_error(e))),
Err(Incomplete(e)) => Err(Incomplete(e)),
}
}
}
#[derive(Debug)]
pub struct LocatedVerboseError<I> {
pub location: I,
pub error: Option<anyhow::Error>,
}
pub fn expect<P, I>(
parser: P,
error_message: impl Into<Cow<'static, str>>,
) -> impl nom::Parser<I, Output = P::Output, Error = LocatedVerboseError<I>>
where
P: nom::Parser<I, Error = LocatedVerboseError<I>>,
{
VerboseParser::new(parser, error_message.into())
}
impl<I> FromExternalError<I, anyhow::Error> for LocatedVerboseError<I> {
fn from_external_error(input: I, _kind: ErrorKind, e: anyhow::Error) -> Self {
Self { location: input, error: Some(e) }
}
}
#[derive(new)]
pub struct Parser<N, NS, S, SS, SV, V>
where
@ -39,7 +114,7 @@ where
phantom: PhantomData<(NS, SS)>,
}
impl<'a, N, NS, S, SS, SV, V> Parser<N, NS, S, SS, SV, V>
impl<'a, 'p, N, NS, S, SS, SV, V> Parser<N, NS, S, SS, SV, V>
where
N: AsRef<[NS]>,
NS: AsRef<str>,
@ -47,32 +122,34 @@ where
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
pub fn parse_all(
&self,
&'p self,
input: &'a str,
) -> Result<TokenVec, Error<nom_locate::LocatedSpan<&'a str>>> {
) -> Result<TokenVec, nom::Err<LocatedVerboseError<nom_locate::LocatedSpan<&'a str>>>> {
debug!("parsing input \"{input}\"");
all_consuming(token_parser(self))
.parse_complete(LocatedSpan::new(input))
.finish()
.map(|(_, o)| o)
.map(move |(_, o)| o)
}
}
fn token_parser<'a, I, N, NS, S, SS, SV, V>(
parser: &Parser<N, NS, S, SS, SV, V>,
) -> impl NomParser<I, Output = TokenVec, Error = nom::error::Error<I>>
fn token_parser<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl nom::Parser<
LocatedSpan<&'a str>,
Output = TokenVec,
Error = LocatedVerboseError<LocatedSpan<&'a str>>,
>
where
I: Input + AsRef<str> + for<'z> nom::Compare<&'z str> + Copy,
<I as Input>::Item: AsChar,
<I as Input>::Item: PartialEq<char>,
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
trace!("making the TOKEN parser");
let space_or_comment = || {
@ -84,13 +161,20 @@ where
many0(delimited(
space_or_comment(),
alt((
Silence::parser().map(into_box),
Marker::parser().map(into_box),
Note::parser(parser.notes.as_ref()).map(into_box),
VariableChange::parser(&parser.variables).map(into_box),
Loop::parser(parser).map(into_box),
Tuplet::parser(parser).map(into_box),
Slope::parser(parser).map(into_box),
expect(Silence::parser(), "expected a silence").map(into_box),
expect(Marker::parser(), "expected a marker").map(into_box),
expect(
VariableChange::parser(&parser.variables).map(into_box),
"variable assignment",
),
expect(Loop::parser(parser).map(into_box), "expected a loop"),
expect(Tuplet::parser(parser).map(into_box), "expected a tuplet"),
expect(Slope::parser(parser).map(into_box), "expected a slope"),
expect(
Note::parser(parser.notes.as_ref()),
"expected a note as last appeal (input didn't match anything known)",
)
.map(into_box),
)),
space_or_comment(),
))
@ -102,7 +186,7 @@ fn into_box<'a>(token: impl Token + 'a) -> Box<dyn Token + 'a> {
}
impl Silence {
fn parser<I>() -> impl NomParser<I, Output = Self, Error = nom::error::Error<I>>
fn parser<I>() -> impl nom::Parser<I, Output = Self, Error = LocatedVerboseError<I>>
where
I: Input,
<I as Input>::Item: AsChar,
@ -113,7 +197,7 @@ impl Silence {
}
impl Marker {
fn parser<I>() -> impl NomParser<I, Output = Self, Error = nom::error::Error<I>>
fn parser<I>() -> impl nom::Parser<I, Output = Self, Error = LocatedVerboseError<I>>
where
I: Input,
<I as Input>::Item: AsChar,
@ -126,7 +210,7 @@ impl Marker {
impl Note {
fn parser<'a, N, NS, I>(
notes: N,
) -> impl NomParser<I, Output = Self, Error = nom::error::Error<I>> + 'a
) -> impl nom::Parser<I, Output = Self, Error = LocatedVerboseError<I>> + 'a
where
N: IntoIterator<Item = NS>,
NS: AsRef<str>,
@ -168,14 +252,26 @@ impl VariableChange {
V: AsRef<[char]>,
{
trace!("making the {} parser", type_name::<Self>());
let variables_string = variables.as_ref().iter().collect::<String>();
move |i: I| {
preceded(
char('$'),
cut(
one_of(variables.as_ref().iter().collect::<String>().as_str())
.and(expression_parser(variables.as_ref()))
.map(|(name, change)| VariableChange(name, change)),
),
cut(expect(
one_of(variables_string.as_str()),
format!(
"got unknown variable '{}', expected one of these instead: {:?}",
i.as_ref().chars().nth(1).unwrap_or('?'),
variables_string.chars().collect::<Vec<_>>()
),
)
.and(cut(expect(
expression_parser(variables.as_ref()),
format!(
"expected a valid expression to assign variable {} to",
i.as_ref().chars().nth(1).unwrap_or('?')
),
)))
.map(|(name, change)| VariableChange(name, change))),
)
.parse(i)
}
@ -183,19 +279,17 @@ impl VariableChange {
}
impl Loop {
fn parser<I, N, NS, S, SS, SV, V>(
parser: &Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(I) -> IResult<I, Self>
fn parser<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(LocatedSpan<&'a str>) -> IResult<LocatedSpan<&'a str>, Self>
where
I: Input + AsRef<str> + for<'z> nom::Compare<&'z str> + Copy,
<I as Input>::Item: AsChar,
<I as Input>::Item: PartialEq<char>,
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
trace!("making the {} parser", type_name::<Self>());
move |input| {
@ -213,8 +307,17 @@ impl Loop {
)
.map(LoopCount::Variable),
)))
.and(token_parser(parser)),
cut(char(')')),
.and(cut(take_till(|c| c == ')').and_then(cut(expect(all_consuming(token_parser(parser)), "input did not match any known grammar for inner tokens (typo?)"))))),
cut(
expect(
char(')'),
format!(
"the loop started at line {line} column {column} was not closed at this point",
line = input.location_line(),
column = input.get_utf8_column()
)
)
),
)
.map(|(c, v)| Self(c.unwrap_or_default(), v))
.parse(input)
@ -223,23 +326,30 @@ impl Loop {
}
impl Tuplet {
fn parser<I, N, NS, S, SS, SV, V>(
parser: &Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(I) -> IResult<I, Self>
fn parser<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(LocatedSpan<&'a str>) -> IResult<LocatedSpan<&'a str>, Self>
where
I: Input + for<'z> Compare<&'z str> + AsRef<str> + Copy,
<I as Input>::Item: AsChar,
<I as Input>::Item: PartialEq<char>,
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
trace!("making the {} parser", type_name::<Self>());
|input| {
delimited(char('['), token_parser(parser), cut(char(']')))
delimited(char('['), cut(take_till(|c| c == ']').and_then(cut(expect(all_consuming(token_parser(parser)), "input did not match any known grammar for inner tokens (typo?)")))), cut(
expect(
char(']'),
format!(
"the tuplet started at line {line} column {column} was not closed at this point",
line = input.location_line(),
column = input.get_utf8_column()
)
)
))
.map(Self)
.parse(input)
}
@ -247,19 +357,17 @@ impl Tuplet {
}
impl Slope {
fn parser<'p, I, N, NS, S, SS, SV, V>(
fn parser<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(I) -> IResult<I, Self>
) -> impl Fn(LocatedSpan<&'a str>) -> IResult<LocatedSpan<&'a str>, Self>
where
I: Input + for<'z> Compare<&'z str> + AsRef<str> + Copy,
<I as Input>::Item: AsChar,
<I as Input>::Item: PartialEq<char>,
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
trace!("making the {} parser", type_name::<Self>());
move |input| {
@ -279,15 +387,36 @@ impl Slope {
let iter: std::vec::IntoIter<(String, VariableChange)> = slopes.into_iter();
delimited(
char('{'),
cut(alt(iter
cut(expect(alt(iter
.map(|(k, v)| {
Box::new(move |input: I| value(v.clone(), tag(k.as_str())).parse(input))
as Box<dyn Fn(I) -> IResult<I, VariableChange>>
Box::new(move |input| value(v.clone(), tag(k.as_str())).parse(input))
as Box<
dyn Fn(
LocatedSpan<&'a str>,
)
-> IResult<LocatedSpan<&'a str>, VariableChange>,
>
})
.collect::<Vec<Box<dyn Fn(I) -> IResult<I, VariableChange>>>>()
.as_mut_slice()))
.and(token_parser(parser)),
cut(char('}')),
.collect::<Vec<
Box<
dyn Fn(
LocatedSpan<&'a str>,
)
-> IResult<LocatedSpan<&'a str>, VariableChange>,
>,
>>()
.as_mut_slice()), format!("expected a slope name from available slope names ({:?})", parser.slopes.clone().into_iter().map(|(s1, _)| s1.as_ref().to_string()).collect::<Vec<_>>())))
.and(cut(take_till(|c| c == '}').and_then(cut(expect(all_consuming(token_parser(parser)), "input did not match any known grammar for inner tokens (typo?)"))))),
cut(
expect(
char('}'),
format!(
"the slope started at line {line} column {column} was not closed at this point",
line = input.location_line(),
column = input.get_utf8_column()
)
)
),
)
.map(|(i, v)| Self::new(i, v))
.parse(input)
@ -342,9 +471,7 @@ where
}
}
pub fn take_while_map<F, I, O, Error: ParseError<I>>(
cond: F,
) -> impl FnMut(I) -> IResult<I, O, Error>
pub fn take_while_map<F, I, O>(cond: F) -> impl FnMut(I) -> IResult<I, O, LocatedVerboseError<I>>
where
I: Input + Copy,
F: Fn(I) -> Option<O>,
@ -364,24 +491,25 @@ where
len -= 1;
}
}
error!("take_while_map found no match");
Err(nom::Err::Incomplete(nom::Needed::Unknown))
warn!("take_while_map found no match");
Err(nom::Err::Error(LocatedVerboseError {
location: input,
error: Some(anyhow!("invalid expression")),
}))
}
}
#[cfg(test)]
mod tests {
use super::Parser;
use super::{IResult, Parser, expression_parser};
use std::collections::HashMap;
use std::{borrow::Borrow, collections::HashMap};
use nom::{IResult, Parser as _};
use nom::Parser as _;
use nom_locate::LocatedSpan;
use crate::{
compiler::{
Loop, LoopCount, Marker, Note, Silence, Slope, TokenVec, Tuplet, VariableChange,
},
parser::expression_parser,
use crate::compiler::{
Loop, LoopCount, Marker, Note, Silence, Slope, TokenVec, Tuplet, VariableChange,
};
fn very_fancy_slope() -> VariableChange {
@ -568,15 +696,26 @@ mod tests {
#[test]
fn r#loop() {
let slopes = Default::default();
fn parser_builder<'s>(
slopes: &'s HashMap<String, VariableChange>,
) -> impl Fn(&str) -> IResult<&str, Loop> {
move |input: &str| {
Loop::parser(&Parser::new(["do", "", "mi"], slopes, ['n'])).parse(input)
}
let parser = Parser::new(
["do", "", "mi"],
HashMap::<String, VariableChange>::default(),
['n'],
);
fn parser_builder<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(LocatedSpan<&'a str>) -> IResult<LocatedSpan<&'a str>, Loop>
where
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
move |input| Loop::parser(parser).parse(input)
}
let parser = parser_builder(&slopes);
let parser = parser_builder(&parser);
let mut working_cases = vec![
(
"(.%)",
@ -604,7 +743,7 @@ mod tests {
];
let mut not_working_cases = vec!["", "(", ")", "(2", "(p)"];
for (test, expected) in working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
if let Ok(result) = output {
assert_eq!(expected, result, "case \"{test}\"");
} else {
@ -612,7 +751,7 @@ mod tests {
}
}
for test in not_working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
assert!(
output.is_err(),
"result of \"{test}\" was not Err: {output:?}"
@ -622,15 +761,26 @@ mod tests {
#[test]
fn tuplet() {
let slopes = Default::default();
fn parser_builder<'s>(
slopes: &'s HashMap<String, VariableChange>,
) -> impl Fn(&str) -> IResult<&str, Tuplet> {
move |input: &str| {
Tuplet::parser(&Parser::new(["do", "", "mi"], slopes, ['n'])).parse(input)
}
let parser = Parser::new(
["do", "", "mi"],
HashMap::<String, VariableChange>::default(),
['n'],
);
fn parser_builder<'a, 'p, N, NS, S, SS, SV, V>(
parser: &'p Parser<N, NS, S, SS, SV, V>,
) -> impl Fn(LocatedSpan<&'a str>) -> IResult<LocatedSpan<&'a str>, Tuplet>
where
N: AsRef<[NS]>,
NS: AsRef<str>,
S: IntoIterator<Item = (SS, SV)> + Clone,
SS: AsRef<str>,
SV: Borrow<VariableChange>,
V: AsRef<[char]>,
'p: 'a,
{
move |input| Tuplet::parser(parser).parse(input)
}
let parser = parser_builder(&slopes);
let parser = parser_builder(&parser);
let mut working_cases = vec![
(
"[.%]",
@ -644,7 +794,7 @@ mod tests {
];
let mut not_working_cases = vec!["", "[", "]", "[2", "[p]"];
for (test, expected) in working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
if let Ok(result) = output {
assert_eq!(expected, result, "case \"{test}\"");
} else {
@ -652,7 +802,7 @@ mod tests {
}
}
for test in not_working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
assert!(
output.is_err(),
"result of \"{test}\" was not Err: {output:?}"
@ -699,7 +849,7 @@ mod tests {
"{}",
];
for (test, expected) in working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
if let Ok(result) = output {
assert_eq!(expected, result, "case \"{test}\"");
} else {
@ -707,7 +857,7 @@ mod tests {
}
}
for test in not_working_cases.drain(..) {
let output = parser(test);
let output = parser(test.into()).map(|(ls, o)| (*ls, o));
assert!(
output.is_err(),
"result of \"{test}\" was not Err: {output:?}"