From 2f771c55ac1331f51578dfa022982a8498d0db8a Mon Sep 17 00:00:00 2001 From: trimill Date: Wed, 5 Feb 2025 21:33:07 -0500 Subject: [PATCH] docs and defs --- Cargo.lock | 28 ++++---- docs/src/SUMMARY.md | 4 +- docs/src/lang/grammar.md | 95 ++++++++++++++++++++++++ docs/src/lang/lists.md | 78 +++++++++++++++++++- docs/src/lang/operators.md | 2 +- docs/src/lang/reference.md | 4 ++ docs/theme/highlight.js | 32 +++++++++ talc-lang/src/parser/parser.rs | 128 ++++++++++++++++----------------- 8 files changed, 290 insertions(+), 81 deletions(-) create mode 100644 docs/src/lang/grammar.md create mode 100644 docs/src/lang/reference.md diff --git a/Cargo.lock b/Cargo.lock index f034f27..6b01cc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" @@ -55,9 +55,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "clap" -version = "4.5.26" +version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8eb5e908ef3a6efbe1ed62520fb7287959888c88485abe072543190ecc66783" +checksum = "3e77c3243bd94243c03672cb5154667347c457ca271254724f9f393aee1c05ff" dependencies = [ "clap_builder", "clap_derive", @@ -65,9 +65,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.26" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96b01801b5fc6a0a232407abc821660c9c6d25a1cafc0d4f85f29fb8d9afc121" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" dependencies = [ "anstyle", "clap_lex", @@ -75,9 +75,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.24" +version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" dependencies = [ "heck", "proc-macro2", @@ -410,9 +410,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustix" -version = "0.38.43" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a78891ee6bf2340288408954ac787aa063d8e8817e9f53abb37c695c6d834ef6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ "bitflags", "errno", @@ -457,9 +457,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "syn" -version = "2.0.96" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -519,9 +519,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-segmentation" diff --git a/docs/src/SUMMARY.md b/docs/src/SUMMARY.md index 1d7ef76..4eae6c8 100644 --- a/docs/src/SUMMARY.md +++ b/docs/src/SUMMARY.md @@ -10,7 +10,6 @@ - [Arithmetic](./lang/arithmetic.md) - [Bitwise operators](./lang/bitwise.md) - - [Operators reference](./lang/operators.md) - [Variables and scope](./lang/variables.md) - [Functions](./lang/functions.md) - [Partial functions and pipes](./lang/partial.md) @@ -21,5 +20,8 @@ - [Iterators](./lang/iterators.md) - [Ranges](./lang/ranges.md) - [Exceptions](./lang/exceptions.md) +- [Language reference](./lang/reference.md) + - [Grammar](./lang/grammar.md) + - [Operators reference](./lang/operators.md) diff --git a/docs/src/lang/grammar.md b/docs/src/lang/grammar.md new file mode 100644 index 0000000..3c43758 --- /dev/null +++ b/docs/src/lang/grammar.md @@ -0,0 +1,95 @@ +# Grammar + +Some additional restrictions, such as the precedence of operators or the kinds +of expressions that are valid to assign to, are not represented here. See the +[operators reference](operators.md) for a description of operator precedence. + +```grammar +program := block EOF + +# LINESEP: either a semicolon or newline +block := LINESEP* (expr LINESEP+)* expr? + +expr := assign + | return assign? + | break assign? + | continue + +assign := pipeline | pipeline assign_op assign + +assign_op := "=" | "++=" | "&=" | "#|=" | "#^=" | "#&=" | "<<=" | ">>=" | "+=" + | "-=" | "*=" | "/=" | "//=" | "%=" | "^=" + +pipeline := lambda | pipeline "|" lambda + +lambda := prec_expr + | "\" ident_list "->" lambda + | "&" lambda + +# See the operators reference +prec_expr := access + | prec_expr infix_op prec_expr + | prec_expr postfix_op + | prefix_op prec_expr + +infix_op := "==" | "!=" | ">" | ">=" | "<" | "<=" | "++" | "&" | ".." | "..=" + | "#|" | "#^" | "#&" | "<<" | ">>" | "+" | "-" | "*" | "/" | "//" + | "%" | "^" | "and" | "or" + +prefix_op := "-" | "!" | "~" | "*.." | "*..=" + +postfix_op := "..*" + +access := term access_op* +access_op := "(" expr_list ")" + | "[" expr "]" + | "." IDENT + | "->" IDENT "(" expr_list ")" + +var := term + | "global" IDENT + | "var" IDENT + | fn_decl +fn_decl := "fn" IDENT? "(" ident_list ")" fn_body +fn_body := "do" block "end" + | "=" expr + +term := IDENT + | "(" expr ")" + | "[" list_items "]" + | "{" table_items "}" + | "$" + | "do" block "end" + | "if" if_stmt_chain + | "while" expr "do" block "end" + | "for" IDENT "in" expr "do" block "end" + | "try" block catch_block* "end" + | INTEGER + | FLOAT + | IMAGINARY + | STRING + | SYMBOL + | "true" + | "false" + | "nil" + | "*..*" + +if_stmt_chain := expr "then" block if_stmt_end +if_stmt_end := "elif" if_stmt_chain + | "else" block "end" + | "end" + +catch_block := "catch" ("*" | symbol_list) ("in" IDENT)? "do" block +symbol_list := (SYMBOL ",")* SYMBOL? + +ident_list := (IDENT ",")* IDENT? + +expr_list := (expr ",")* expr? + +list_items := (list_item ",")* list_item? +list_item := ".."? expr + +table_items := (table_item ",")* table_item? +table_item := ".." expr + | term "=" expr +``` diff --git a/docs/src/lang/lists.md b/docs/src/lang/lists.md index 3e2c58d..5657af1 100644 --- a/docs/src/lang/lists.md +++ b/docs/src/lang/lists.md @@ -4,7 +4,8 @@ Lists in Talc are written as comma-separated items between square brackets. Lists may be *heterogeneous* (contain items of different types). Similarly to strings, lists may be concatenated using `++`. Lists can be indexed by writing the index in square brackets after the list. The first element of the list is -at index zero. +at index zero. The `&` operator will add a single element to the end of a list, +returning a new list. ```talc >> numbers = [1, 5, 6, 4, 2, 3] @@ -13,6 +14,15 @@ at index zero. 4 >> numbers ++ [8, 9] [1, 5, 6, 4, 2, 3, 8, 9] +>> numbers & 7 +[1, 5, 6, 4, 2, 3, 7] +``` + +Lists are heterogeneous, so they may contain values of different types. + +```talc +>> things = [1, 2.0, "three", 4/1, :five] +[1, 2.0, "three", 4/1, :five] ``` Lists are *mutable* and their elements can be assigned to or modified. @@ -46,3 +56,69 @@ ranges further in their own chapter. >> squares[*..-3] [0, 1, 4, 9, 16] ``` + +## Interpolation + +Using the `..` operator, a list can be created by combining the elements from +multiple different collections + +```talc +>> even = [2, 4, 6, 8] +[2, 4, 6, 8] +>> odd = [1, 3, 5, 7, ..even] +[1, 3, 5, 7, 2, 4, 6, 8] +>> n = [1, ..[2, 3, 4], 5, ..[6, ..[7, 8]], ..[9]] +[1, 2, 3, 4, 5, 6, 7, 8, 9] +``` + +Any iterator or iterable value can be used + +```talc +>> squares = [..(0..10 | map(\x -> x*x))] +[0, 1, 4, 9, 16, 25, 36, 49, 64, 81] +``` + +## Destructuring + +A list can be destructured in an assignment expression as follows: + +```talc +>> vals = [6, 2, 8] +[6, 2, 8] +>> [a, b, c] = vals +[6, 2, 8] +>> a +6 +>> b +2 +>> c +8 +``` + +If the number of elements in the list does not match the number in the +destructure, an error is thrown. + +A single interpolation may also be used to collect all remaining values. The +interpolation may occur in the beginning, middle, or end of the assignment. + +```talc +>> ns = [1, 2, 3, 4, 5] +[1, 2, 3, 4, 5] +>> [a, b, ..rest] = ns +[1, 2, 3, 4, 5] +>> a +1 +>> b +2 +>> rest +[3, 4, 5] +>> [a, ..middle, z] = ns +[1, 2, 3, 4, 5] +>> a +1 +>> middle +[2, 3, 4] +>> rest +5 +``` + diff --git a/docs/src/lang/operators.md b/docs/src/lang/operators.md index d2805d1..557fbb1 100644 --- a/docs/src/lang/operators.md +++ b/docs/src/lang/operators.md @@ -18,7 +18,7 @@ In increasing order of precedence: | `..*` | Suffix | Range from | | `#\|` | Left | Bitwise OR | | `#^` | Left | Bitwise XOR | -| `#&` | Left | Bitwise AND | +| `#&` | Left | Bitwise AND | | `<< >>` | Left | Shift left and right | | `+ -` | Left | Add and subtract | | `* / // %` | Left | Multiply, divide, integer divide, modulo | diff --git a/docs/src/lang/reference.md b/docs/src/lang/reference.md new file mode 100644 index 0000000..cbf3220 --- /dev/null +++ b/docs/src/lang/reference.md @@ -0,0 +1,4 @@ +# Language reference + +These pages present a more thorough (and less guided) description of the +Talc language. diff --git a/docs/theme/highlight.js b/docs/theme/highlight.js index 08c22f1..271eed1 100644 --- a/docs/theme/highlight.js +++ b/docs/theme/highlight.js @@ -105,3 +105,35 @@ hljs.registerLanguage('talc', function() { ] } }) + +hljs.registerLanguage('grammar', function() { + const STR_RE = "'[^']*'|\"[^\"]*\""; + const TERMINAL_RE = "\\b[A-Z_]+\\b" + const OP_RE = ":=|\\||\\+|\\*|\\?|\\(|\\)" + + return { + name: "Grammar", + contains: [ + { + className: 'string', + begin: STR_RE, + relevance: 0 + }, + { + className: 'variable', + begin: TERMINAL_RE, + relevance: 0 + }, + { + className: 'literal', + begin: OP_RE, + relevance: 0 + }, + hljs.COMMENT( + '#', // begin + '\n', // end + {} + ) + ] + } +}) diff --git a/talc-lang/src/parser/parser.rs b/talc-lang/src/parser/parser.rs index 14e9c96..c16feda 100644 --- a/talc-lang/src/parser/parser.rs +++ b/talc-lang/src/parser/parser.rs @@ -272,8 +272,15 @@ impl<'s> Parser<'s> { let ext = self.parse_expr()?; items.push(TableItem::Interpolate(b(ext))); } - k if k.expr_first() => { - let key = self.parse_term_not_ident()?; + T::True + | T::False + | T::Nil + | T::Dollar + | T::Integer + | T::String + | T::Symbol + | T::LParen => { + let key = self.parse_term()?; expect!(self, T::Equal); let value = self.parse_expr()?; items.push(TableItem::Pair(b(key), b(value))); @@ -399,9 +406,10 @@ impl<'s> Parser<'s> { } } - fn parse_term_not_ident(&mut self) -> Result { + fn parse_term(&mut self) -> Result { let tok = self.next()?; match tok.kind { + T::Identifier => Ok(E::Ident(Symbol::get(tok.content)).span(tok.span)), T::LParen => { let e = self.parse_expr()?; expect!(self, T::RParen); @@ -485,26 +493,46 @@ impl<'s> Parser<'s> { } } - fn parse_term(&mut self) -> Result { - let Some(tok) = try_next!(self, T::Identifier | T::Var | T::Global) else { - return self.parse_term_not_ident() - }; - match tok.kind { - T::Identifier => Ok(E::Ident(Symbol::get(tok.content)).span(tok.span)), + fn parse_fn_decl(&mut self) -> Result { + let tok_fn = expect!(self, T::Fn); + let name = try_next!(self, T::Identifier).map(|t| Symbol::get(t.content)); + expect!(self, T::LParen); + let args = self.parse_ident_list()?; + expect!(self, T::RParen); + match expect!(self, T::Do | T::Equal).kind { + T::Do => { + let content = self.parse_block()?; + let end = expect!(self, T::End); + Ok(E::FnDef(name, args, b(content)).span(tok_fn.span + end.span)) + } + T::Equal => { + let content = self.parse_expr()?; + let span = tok_fn.span + content.span; + Ok(E::FnDef(name, args, b(content)).span(span)) + } + _ => unreachable!("parse_fn_decl: guaranteed by try_next!"), + } + } + + fn parse_var(&mut self) -> Result { + match self.peek()?.kind { T::Global => { + let tok = self.next()?; let ident = expect!(self, T::Identifier); Ok(E::Global(Symbol::get(ident.content)).span(tok.span + ident.span)) } T::Var => { + let tok = self.next()?; let ident = expect!(self, T::Identifier); Ok(E::Var(Symbol::get(ident.content)).span(tok.span + ident.span)) } - _ => unreachable!("guarenteed by try_next"), + T::Fn => self.parse_fn_decl(), + _ => self.parse_term(), } } fn parse_access(&mut self) -> Result { - let mut lhs = self.parse_term()?; + let mut lhs = self.parse_var()?; loop { let tok = try_next!(self, T::LParen | T::LBrack | T::Arrow | T::Dot); match tok.map(|t| t.kind) { @@ -651,7 +679,7 @@ impl<'s> Parser<'s> { if let Some(op) = self.peek()?.kind.assign_op() { let lval = LValue::from_expr(lhs)?; self.next()?; - let rhs = self.parse_decl()?; + let rhs = self.parse_assign()?; let rhs_span = rhs.span; Ok(E::Assign(op, b(lval), b(rhs)).span(lhs_span + rhs_span)) } else { @@ -659,61 +687,33 @@ impl<'s> Parser<'s> { } } - fn parse_fn_decl(&mut self) -> Result { - let tok_fn = expect!(self, T::Fn); - let name = try_next!(self, T::Identifier).map(|t| Symbol::get(t.content)); - expect!(self, T::LParen); - let args = self.parse_ident_list()?; - expect!(self, T::RParen); - match expect!(self, T::Do | T::Equal).kind { - T::Do => { - let content = self.parse_block()?; - let end = expect!(self, T::End); - Ok(E::FnDef(name, args, b(content)).span(tok_fn.span + end.span)) - } - T::Equal => { - let content = self.parse_expr()?; - let span = tok_fn.span + content.span; - Ok(E::FnDef(name, args, b(content)).span(span)) - } - _ => unreachable!("parse_fn_decl: guaranteed by try_next!"), - } - } - - fn parse_decl(&mut self) -> Result { - match self.peek()?.kind { - T::Fn => self.parse_fn_decl(), - _ => self.parse_assign(), - } - } - fn parse_expr(&mut self) -> Result { - let tok = try_next!(self, T::Return | T::Break | T::Continue); - if let Some(tok) = tok { - match tok.kind { - T::Return => { - if self.peek()?.kind.expr_first() { - let expr = self.parse_decl()?; - let span = expr.span; - Ok(E::Return(Some(b(expr))).span(tok.span + span)) - } else { - Ok(E::Return(None).span(tok.span)) - } + match self.peek()?.kind { + T::Return => { + let tok = self.next()?; + if self.peek()?.kind.expr_first() { + let expr = self.parse_assign()?; + let span = expr.span; + Ok(E::Return(Some(b(expr))).span(tok.span + span)) + } else { + Ok(E::Return(None).span(tok.span)) } - T::Break => { - if self.peek()?.kind.expr_first() { - let expr = self.parse_decl()?; - let span = expr.span; - Ok(E::Break(Some(b(expr))).span(tok.span + span)) - } else { - Ok(E::Break(None).span(tok.span)) - } - } - T::Continue => Ok(E::Continue.span(tok.span)), - _ => unreachable!("parse_expr: guaranteed by try_next!"), } - } else { - self.parse_decl() + T::Break => { + let tok = self.next()?; + if self.peek()?.kind.expr_first() { + let expr = self.parse_assign()?; + let span = expr.span; + Ok(E::Break(Some(b(expr))).span(tok.span + span)) + } else { + Ok(E::Break(None).span(tok.span)) + } + } + T::Continue => { + let tok = self.next()?; + Ok(E::Continue.span(tok.span)) + } + _ => self.parse_assign(), } }