From 0560847753297fb66719c10b16e42dc9a0e524ee Mon Sep 17 00:00:00 2001 From: trimill Date: Sun, 3 Nov 2024 12:50:36 -0500 Subject: [PATCH] rewrite parser, checked arithmetic --- Cargo.lock | 358 +--------- examples/closures.talc | 2 + examples/closures2.talc | 2 + examples/totient.talc | 4 +- talc-bin/src/helper.rs | 148 ++--- talc-bin/src/main.rs | 5 +- talc-bin/src/repl.rs | 5 +- talc-lang/Cargo.toml | 2 - talc-lang/build.rs | 3 - talc-lang/src/ast.rs | 59 -- talc-lang/src/chunk.rs | 10 +- talc-lang/src/compiler.rs | 436 ++++++------ talc-lang/src/lib.rs | 46 +- talc-lang/src/parser/ast.rs | 284 ++++++++ talc-lang/src/parser/lexer.rs | 559 ++++++++++++++++ talc-lang/src/parser/mod.rs | 14 + .../parser.lalrpop.OLD} | 1 + talc-lang/src/parser/parser.rs | 618 ++++++++++++++++++ talc-lang/src/parser/pos.rs | 100 +++ .../src/{parser_util.rs => parser/util.rs} | 56 +- talc-lang/src/value/mod.rs | 65 +- talc-lang/src/value/ops.rs | 203 ++++-- talc-lang/src/vm.rs | 62 +- talc-macros/src/lib.rs | 44 +- talc-macros/src/native_func.rs | 47 ++ talc-std/src/file.rs | 4 +- talc-std/src/iter.rs | 2 + talc-std/src/lib.rs | 2 + talc-std/src/num.rs | 2 +- talc-std/src/regex.rs | 2 +- talc-std/src/value.rs | 16 +- 31 files changed, 2267 insertions(+), 894 deletions(-) mode change 100644 => 100755 examples/closures.talc mode change 100644 => 100755 examples/closures2.talc mode change 100644 => 100755 examples/totient.talc delete mode 100644 talc-lang/build.rs delete mode 100644 talc-lang/src/ast.rs create mode 100644 talc-lang/src/parser/ast.rs create mode 100644 talc-lang/src/parser/lexer.rs create mode 100644 talc-lang/src/parser/mod.rs rename talc-lang/src/{parser.lalrpop => parser/parser.lalrpop.OLD} (99%) create mode 100644 talc-lang/src/parser/parser.rs create mode 100644 talc-lang/src/parser/pos.rs rename talc-lang/src/{parser_util.rs => parser/util.rs} (65%) create mode 100644 talc-macros/src/native_func.rs diff --git a/Cargo.lock b/Cargo.lock index 4ec6217..bf08894 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,18 +13,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.9" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56" - -[[package]] -name = "ascii-canvas" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef1e3e699d84ab1b0911a1010c5c106aa34ae89aeac103be5ce0c3859db1e891" -dependencies = [ - "term", -] +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "autocfg" @@ -32,36 +23,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" -[[package]] -name = "bit-set" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" - [[package]] name = "bitflags" version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "byteorder" version = "1.5.0" @@ -133,25 +100,6 @@ dependencies = [ "error-code", ] -[[package]] -name = "cpufeatures" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" -dependencies = [ - "libc", -] - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - [[package]] name = "ctrlc" version = "3.4.5" @@ -162,43 +110,12 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "either" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" - -[[package]] -name = "ena" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d248bdd43ce613d87415282f69b9bb99d947d290b10962dd6c56233312c2ad5" -dependencies = [ - "log", -] - [[package]] name = "endian-type" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" -[[package]] -name = "equivalent" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" - [[package]] name = "errno" version = "0.3.9" @@ -226,22 +143,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "fixedbitset" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -253,12 +154,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "hashbrown" -version = "0.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" - [[package]] name = "heck" version = "0.5.0" @@ -274,65 +169,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "indexmap" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "keccak" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecc2af9a1119c51f12a14607e783cb977bde58bc069ff0c3da1095e635d70654" -dependencies = [ - "cpufeatures", -] - -[[package]] -name = "lalrpop" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06093b57658c723a21da679530e061a8c25340fa5a6f98e313b542268c7e2a1f" -dependencies = [ - "ascii-canvas", - "bit-set", - "ena", - "itertools", - "lalrpop-util", - "petgraph", - "regex", - "regex-syntax", - "sha3", - "string_cache", - "term", - "unicode-xid", - "walkdir", -] - -[[package]] -name = "lalrpop-util" -version = "0.22.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feee752d43abd0f4807a921958ab4131f692a44d4d599733d4419c5d586176ce" -dependencies = [ - "regex-automata", - "rustversion", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -351,16 +187,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.22" @@ -373,12 +199,6 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" -[[package]] -name = "new_debug_unreachable" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" - [[package]] name = "nibble_vec" version = "0.1.0" @@ -449,54 +269,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "once_cell" -version = "1.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" - -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - -[[package]] -name = "petgraph" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" -dependencies = [ - "fixedbitset", - "indexmap", -] - -[[package]] -name = "phf_shared" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" -dependencies = [ - "siphasher", -] - [[package]] name = "ppv-lite86" version = "0.2.20" @@ -506,12 +278,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "precomputed-hash" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" - [[package]] name = "proc-macro2" version = "1.0.89" @@ -570,15 +336,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "redox_syscall" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" -dependencies = [ - "bitflags", -] - [[package]] name = "regex" version = "1.11.1" @@ -621,12 +378,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustversion" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" - [[package]] name = "rustyline" version = "14.0.0" @@ -649,61 +400,17 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "sha3" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75872d278a8f37ef87fa0ddbda7802605cb18344497949862c0d4dcb291eba60" -dependencies = [ - "digest", - "keccak", -] - -[[package]] -name = "siphasher" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" - [[package]] name = "smallvec" version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" -[[package]] -name = "string_cache" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b" -dependencies = [ - "new_debug_unreachable", - "once_cell", - "parking_lot", - "phf_shared", - "precomputed-hash", -] - [[package]] name = "syn" -version = "2.0.85" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5023162dfcd14ef8f32034d8bcd4cc5ddc61ef7a247c024a33e24e1f24d21b56" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -725,8 +432,6 @@ dependencies = [ name = "talc-lang" version = "0.2.0" dependencies = [ - "lalrpop", - "lalrpop-util", "lazy_static", "num-complex", "num-rational", @@ -754,42 +459,26 @@ dependencies = [ "talc-macros", ] -[[package]] -name = "term" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4df4175de05129f31b80458c6df371a15e7fc3fd367272e6bf938e5c351c7ea0" -dependencies = [ - "home", - "windows-sys 0.52.0", -] - [[package]] name = "thiserror" -version = "1.0.65" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d11abd9594d9b38965ef50805c5e469ca9cc6f197f883f717e0269a3057b3d5" +checksum = "3b3c6efbfc763e64eb85c11c25320f0737cb7364c4b6336db90aa9ebe27a0bbd" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.65" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae71770322cbd277e69d762a16c444af02aa0575ac0d174f0b9562d3b37f8602" +checksum = "b607164372e89797d78b8e23a6d67d5d1038c1c65efd52e1389ef8b77caba2a6" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - [[package]] name = "unicode-ident" version = "1.0.13" @@ -808,49 +497,18 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" -[[package]] -name = "unicode-xid" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" - [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "windows-sys" version = "0.52.0" diff --git a/examples/closures.talc b/examples/closures.talc old mode 100644 new mode 100755 index 2acbf6d..9a9d53e --- a/examples/closures.talc +++ b/examples/closures.talc @@ -1,3 +1,5 @@ +#!/usr/bin/env talc + -- adapted from Crafting Interpreters 10.6 make_counter = \-> do diff --git a/examples/closures2.talc b/examples/closures2.talc old mode 100644 new mode 100755 index 1f50bb8..96a2494 --- a/examples/closures2.talc +++ b/examples/closures2.talc @@ -1,3 +1,5 @@ +#!/usr/bin/env talc + var i = 0; var outer = \n -> do diff --git a/examples/totient.talc b/examples/totient.talc old mode 100644 new mode 100755 index 5c705ec..f076cab --- a/examples/totient.talc +++ b/examples/totient.talc @@ -1,6 +1,8 @@ +#!/usr/bin/env talc + totient = \n -> do count(factors(n)) | pairs | map(\v -> do - (v!0)^(v!1) - (v!0)^(v!1 - 1) + v[0]^v[1] - v[0]^(v[1] - 1) end) | prod end diff --git a/talc-bin/src/helper.rs b/talc-bin/src/helper.rs index 037667b..e73ac94 100644 --- a/talc-bin/src/helper.rs +++ b/talc-bin/src/helper.rs @@ -1,41 +1,16 @@ -use std::{borrow::Cow, cell::RefCell, collections::HashMap, rc::Rc}; +use std::{borrow::Cow, cell::RefCell, rc::Rc}; use rustyline::{completion::Completer, highlight::Highlighter, hint::Hinter, validate::{ValidationContext, ValidationResult, Validator}, Helper, Result}; -use talc_lang::{lstring::LStr, Lexer, Vm}; - -#[derive(Clone, Copy)] -enum TokenType { - String, Symbol, Number, Literal -} +use talc_lang::{lstring::LStr, parser::{Lexer, Pos, Span, TokenKind}, Vm}; pub struct TalcHelper { vm: Rc>, - lex: Lexer, - token_types: HashMap, -} - -macro_rules! load_tokens { - ($token_types:expr, $lex:expr, {$($($tok:literal)|+ => $ty:expr,)*}) => {{ - $($( - $token_types.insert($lex.lex($tok).next().unwrap().unwrap().1.0, $ty); - )*)* - }}; } impl TalcHelper { pub fn new(vm: Rc>) -> Self { - let lex = Lexer::new(); - let mut token_types = HashMap::new(); - load_tokens!(token_types, lex, { - "\"\"" | "''" => TokenType::String, - ":a" | ":''" | ":\"\"" => TokenType::Symbol, - "0" | "0.0" | "0x0" | "0b0" | "0o0" | "0s0" => TokenType::Number, - "true" | "false" | "nil" => TokenType::Literal, - }); Self { vm, - lex, - token_types, } } } @@ -76,24 +51,28 @@ impl Hinter for TalcHelper { impl Highlighter for TalcHelper { fn highlight<'l>(&self, line: &'l str, _pos: usize) -> Cow<'l, str> { - let mut tokens = self.lex.lex(line).peekable(); + let mut lexer = Lexer::new(line); let mut buf = String::new(); - let mut last = 0; - while let Some(Ok((l, tok, r))) = tokens.next() { - buf += &line[last..l]; - last = r; - let tokty = self.token_types.get(&tok.0); - buf += match tokty { - Some(TokenType::Literal) => "\x1b[93m", - Some(TokenType::Number) => "\x1b[93m", - Some(TokenType::String) => "\x1b[92m", - Some(TokenType::Symbol) => "\x1b[96m", - None => "", + let mut last = Pos::new(); + while let Some(Ok(token)) = lexer.next() { + if token.kind == TokenKind::Eof { break } + buf += Span::new(last, token.span.start).of(line); + last = token.span.end; + let format = match token.kind { + TokenKind::Nil + | TokenKind::True + | TokenKind::False + | TokenKind::Integer + | TokenKind::Float => "\x1b[93m", + TokenKind::String => "\x1b[92m", + TokenKind::Symbol => "\x1b[96m", + _ => "", }; - buf += tok.1; - if tokty.is_some() { buf += "\x1b[0m" } + buf += format; + buf += token.content; + if !format.is_empty() { buf += "\x1b[0m" } } - buf += &line[last..]; + buf += &line[(last.idx as usize)..]; Cow::Owned(buf) } @@ -116,62 +95,73 @@ impl Highlighter for TalcHelper { impl Validator for TalcHelper { fn validate(&self, ctx: &mut ValidationContext) -> Result { - let tokens = self.lex.lex(ctx.input()); + use TokenKind as K; + let lexer = Lexer::new(ctx.input()); let mut delims = Vec::new(); let mut mismatch = None; - for token in tokens { + for token in lexer { let token = match token { Ok(t) => t, - Err(e) => return Ok(ValidationResult::Invalid( - Some(e.to_string()))), + Err(e) => { + return Ok(ValidationResult::Invalid( + Some(format!(" {e}")))) + } }; - let t = token.1.1; - match t { - "(" | "{" | "[" | "if" | "while" | "for" | "try" - => delims.push(token.1.1), - ")" => match delims.pop() { - Some("(") => (), - v => { mismatch = Some((v, t)); break } + let k = token.kind; + let s = token.span; + match k { + K::Eof => break, + K::LParen + | K::LBrack + | K::LBrace + | K::If + | K::While + | K::For + | K::Try + => delims.push(token.kind), + K::RParen => match delims.pop() { + Some(K::LParen) => (), + v => { mismatch = Some((v, k, s)); break } }, - "}" => match delims.pop() { - Some("{") => (), - v => { mismatch = Some((v, t)); break } + K::RBrack => match delims.pop() { + Some(K::LBrack) => (), + v => { mismatch = Some((v, k, s)); break } }, - "]" => match delims.pop() { - Some("[") => (), - v => { mismatch = Some((v, t)); break } + K::RBrace => match delims.pop() { + Some(K::LBrace) => (), + v => { mismatch = Some((v, k, s)); break } }, - "then" => match delims.pop() { - Some("if" | "elif") => delims.push(t), - v => { mismatch = Some((v, t)); break } + K::Then => match delims.pop() { + Some(K::If | K::Elif) => delims.push(k), + v => { mismatch = Some((v, k, s)); break } } - "catch" => match delims.pop() { - Some("try") => delims.push(t), - v => { mismatch = Some((v, t)); break } + K::Catch => match delims.pop() { + Some(K::Try) => delims.push(k), + v => { mismatch = Some((v, k, s)); break } } - "do" => match delims.last().copied() { - Some("while" | "for" | "catch") => { + K::Do => match delims.last().copied() { + Some(K::While | K::For | K::Catch) => { delims.pop(); - delims.push(t); + delims.push(k); }, - _ => delims.push(t) + _ => delims.push(k) }, - "elif" | "else" => match delims.pop() { - Some("then") => delims.push(t), - v => { mismatch = Some((v, t)); break } + K::Elif | K::Else => match delims.pop() { + Some(K::Then) => delims.push(k), + v => { mismatch = Some((v, k, s)); break } }, - "end" => match delims.pop() { - Some("then" | "elif" | "else" | "do" | "try") => (), - v => { mismatch = Some((v, t)); break } + K::End => match delims.pop() { + Some(K::Then | K::Else | K::Do | K::Try | K::Catch) => (), + v => { mismatch = Some((v, k, s)); break } }, _ => (), } } match mismatch { - Some((None, b)) => return Ok(ValidationResult::Invalid(Some( - format!(" found unmatched {b}")))), - Some((Some(a), b)) => return Ok(ValidationResult::Invalid(Some( - format!(" found {a} matched with {b}")))), + Some((None, b, s)) => return Ok(ValidationResult::Invalid(Some( + format!(" found unmatched {b} at {s}")))), + Some((Some(a), b, s)) => return Ok(ValidationResult::Invalid(Some( + format!(" found {a} matched with {b} at {s}")))), _ => (), } diff --git a/talc-bin/src/main.rs b/talc-bin/src/main.rs index 3b1d1fa..2333265 100644 --- a/talc-bin/src/main.rs +++ b/talc-bin/src/main.rs @@ -1,5 +1,5 @@ use clap::{ColorChoice, Parser}; -use talc_lang::{compiler::compile, value::function::disasm_recursive, Vm}; +use talc_lang::{compiler::compile, parser, value::function::disasm_recursive, Vm}; use std::{path::PathBuf, process::ExitCode, rc::Rc}; mod repl; @@ -29,11 +29,10 @@ struct Args { } fn exec(src: &str, args: &Args) -> ExitCode { - let parser = talc_lang::Parser::new(); let mut vm = Vm::new(256); talc_std::load_all(&mut vm); - let ex = match parser.parse(src) { + let ex = match parser::parse(src) { Ok(ex) => ex, Err(e) => { eprintln!("Error: {e}"); diff --git a/talc-bin/src/repl.rs b/talc-bin/src/repl.rs index 063c47b..4847495 100644 --- a/talc-bin/src/repl.rs +++ b/talc-bin/src/repl.rs @@ -2,7 +2,7 @@ use std::{cell::RefCell, io::IsTerminal, process::ExitCode, rc::Rc}; use clap::ColorChoice; use rustyline::{error::ReadlineError, history::{FileHistory, History}, ColorMode, Config, Editor}; -use talc_lang::{compiler::compile_repl, symbol::Symbol, value::{function::disasm_recursive, Value}, Vm}; +use talc_lang::{compiler::compile_repl, parser, symbol::Symbol, value::{function::disasm_recursive, Value}, Vm}; use crate::{helper::TalcHelper, Args}; @@ -74,7 +74,6 @@ pub fn repl(args: &Args) -> ExitCode { eprintln!("input disassembly enabled"); } - let parser = talc_lang::Parser::new(); let mut compiler_globals = Vec::new(); let mut vm = Vm::new(256); talc_std::load_all(&mut vm); @@ -121,7 +120,7 @@ pub fn repl(args: &Args) -> ExitCode { }, }; - let ex = match parser.parse(&line) { + let ex = match parser::parse(&line) { Ok(ex) => ex, Err(e) => { eprintln!("{}Error:{} {e}", c.error, c.reset); diff --git a/talc-lang/Cargo.toml b/talc-lang/Cargo.toml index 0e4ec17..78ebd80 100644 --- a/talc-lang/Cargo.toml +++ b/talc-lang/Cargo.toml @@ -4,7 +4,6 @@ version = "0.2.0" edition = "2021" [dependencies] -lalrpop-util = { version = "0.22", features = ["lexer", "unicode"] } num-complex = "0.4" num-rational = { version = "0.4", default-features = false, features = [] } num-traits = "0.2" @@ -13,4 +12,3 @@ lazy_static = "1.5" unicode-ident = "1.0" [build-dependencies] -lalrpop = { version = "0.22", default-features = false, features = ["lexer", "unicode"]} diff --git a/talc-lang/build.rs b/talc-lang/build.rs deleted file mode 100644 index 143899f..0000000 --- a/talc-lang/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() -> Result<(), Box> { - lalrpop::process_root() -} diff --git a/talc-lang/src/ast.rs b/talc-lang/src/ast.rs deleted file mode 100644 index cc5c306..0000000 --- a/talc-lang/src/ast.rs +++ /dev/null @@ -1,59 +0,0 @@ -use crate::{lstring::LStr, symbol::Symbol, value::Value}; - -#[derive(Clone, Copy, Debug)] -pub enum BinaryOp { - Add, Sub, Mul, Div, Mod, Pow, IntDiv, - Shr, Shl, BitAnd, BitXor, BitOr, - Eq, Ne, Gt, Lt, Ge, Le, - Concat, Append, - Range, RangeIncl, -} - -#[derive(Clone, Copy, Debug)] -pub enum UnaryOp { - Neg, Not, RangeEndless, -} - -#[derive(Debug)] -pub enum Expr<'s> { - Literal(Value), - Ident(&'s LStr), - - UnaryOp(UnaryOp, Box>), - BinaryOp(BinaryOp, Box>, Box>), - - Assign(Option, Box>, Box>), - AssignVar(&'s LStr, Box>), - AssignGlobal(&'s LStr, Box>), - - Index(Box>, Box>), - FnCall(Box>, Vec>), - AssocFnCall(Box>, Symbol, Vec>), - Pipe(Box>, Box>), - - Block(Vec>), - List(Vec>), - Table(Vec<(Expr<'s>, Expr<'s>)>), - - Return(Box>), - And(Box>, Box>), - Or(Box>, Box>), - If(Box>, Box>, Option>>), - While(Box>, Box>), - For(&'s LStr, Box>, Box>), - Lambda(Vec<&'s LStr>, Box>), - Try(Box>, Vec>), -} - -#[derive(Debug)] -pub struct CatchBlock<'s> { - pub name: Option<&'s LStr>, - pub types: Option>, - pub body: Expr<'s>, -} - -#[derive(Debug)] -pub enum LValue<'s> { - Ident(&'s LStr), - Index(Box>, Box>), -} diff --git a/talc-lang/src/chunk.rs b/talc-lang/src/chunk.rs index 2465db6..a4b1945 100644 --- a/talc-lang/src/chunk.rs +++ b/talc-lang/src/chunk.rs @@ -1,4 +1,4 @@ -use crate::{value::Value, ast::{UnaryOp, BinaryOp}, symbol::Symbol}; +use crate::{value::Value, parser::ast::{UnaryOp, BinaryOp}, symbol::Symbol}; #[derive(Clone, Copy, Debug, Default)] pub struct Arg24([u8; 3]); @@ -100,8 +100,9 @@ pub enum Instruction { CloseOver(Arg24), // load nth local and convert to cell, write back a copy Closure(Arg24), // load constant function and fill state from stack - LoadUpvalue(Arg24), // load a cell from closure state to new local - StoreUpvalue(Arg24), // load a cell from closure state to new local + LoadUpvalue(Arg24), // load + StoreUpvalue(Arg24), // store a cell from closure state to new local + ContinueUpvalue(Arg24), // LoadClosedLocal(Arg24), // load through cell in nth local StoreClosedLocal(Arg24), // store through cell in nth local @@ -115,7 +116,7 @@ pub enum Instruction { DupTwo, Drop(Arg24), Swap, - + UnaryOp(UnaryOp), BinaryOp(BinaryOp), @@ -157,6 +158,7 @@ impl std::fmt::Display for Instruction { Self::Closure(c) => write!(f, "closure {}", usize::from(c)), Self::LoadUpvalue(n) => write!(f, "load_upval {}", usize::from(n)), Self::StoreUpvalue(n) => write!(f, "store_upval {}", usize::from(n)), + Self::ContinueUpvalue(n) => write!(f, "cont_upval {}", usize::from(n)), Self::LoadClosedLocal(n) => write!(f, "load_closed {}", usize::from(n)), Self::StoreClosedLocal(n) => write!(f, "store_closed {}", usize::from(n)), Self::Const(c) => write!(f, "const {}", usize::from(c)), diff --git a/talc-lang/src/compiler.rs b/talc-lang/src/compiler.rs index 623b0f1..5166673 100644 --- a/talc-lang/src/compiler.rs +++ b/talc-lang/src/compiler.rs @@ -1,6 +1,7 @@ +use std::collections::{BTreeMap, HashMap}; use std::rc::Rc; -use crate::ast::{BinaryOp, Expr, LValue, CatchBlock}; +use crate::parser::ast::{BinaryOp, CatchBlock, Expr, ExprKind, LValue, LValueKind}; use crate::chunk::{Instruction as I, Chunk, Arg24, Catch}; use crate::lstr; use crate::lstring::LStr; @@ -8,11 +9,21 @@ use crate::symbol::Symbol; use crate::value::function::{FuncAttrs, Function}; use crate::value::Value; +enum ResolveOutcome { + Var(VarKind), + InParent, + None, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VarKind { + Local(usize), Closed(usize), Global +} + #[derive(Debug, Clone)] -pub struct Local { +pub struct Var { name: Rc, - scope: usize, - closed: bool, + kind: VarKind, } #[derive(Clone, Copy, PartialEq, Eq)] @@ -25,9 +36,10 @@ struct Compiler<'a> { parent: Option<&'a Compiler<'a>>, chunk: Chunk, attrs: FuncAttrs, - scope: usize, - locals: Vec, - globals: Vec, + scope: HashMap, Var>, + shadowed: Vec<(Rc, Option)>, + closes: BTreeMap, usize>, + local_count: usize, } pub fn compile(expr: &Expr) -> Function { @@ -36,7 +48,7 @@ pub fn compile(expr: &Expr) -> Function { comp.finish() } -pub fn compile_repl(expr: &Expr, globals: &[Local]) -> (Function, Vec) { +pub fn compile_repl(expr: &Expr, globals: &[Rc]) -> (Function, Vec>) { let mut comp = Compiler::new_repl(globals); comp.expr(expr); comp.finish_repl() @@ -44,30 +56,35 @@ pub fn compile_repl(expr: &Expr, globals: &[Local]) -> (Function, Vec) { impl<'a> Default for Compiler<'a> { fn default() -> Self { - let locals = vec![Local { - name: lstr!("self").into(), - scope: 0, - closed: false, - }]; + let mut scope = HashMap::new(); + let self_name: Rc = lstr!("self").into(); + scope.insert(self_name.clone(), Var { + name: self_name, + kind: VarKind::Local(0), + }); Self { mode: CompilerMode::Function, parent: None, chunk: Chunk::new(), attrs: FuncAttrs::default(), - scope: 0, - locals, - globals: Vec::new(), + scope, + shadowed: Vec::new(), + local_count: 1, + closes: BTreeMap::new(), } } } impl<'a> Compiler<'a> { - fn new_repl(globals: &[Local]) -> Self { - Self { + fn new_repl(globals: &'a [Rc]) -> Self { + let mut new = Self { mode: CompilerMode::Repl, - globals: globals.to_vec(), ..Self::default() - } + }; + for g in globals { + new.declare_global(g); + } + new } fn new_module(parent: Option<&'a Self>) -> Self { @@ -86,12 +103,9 @@ impl<'a> Compiler<'a> { }; new.attrs.arity = args.len(); + for arg in args { - new.locals.push(Local { - name: (*arg).into(), - scope: 0, - closed: false, - }); + new.declare_local(arg); } new @@ -99,16 +113,26 @@ impl<'a> Compiler<'a> { pub fn finish(mut self) -> Function { self.emit(I::Return); - // TODO closure - Function::from_parts(Rc::new(self.chunk), self.attrs, Vec::new().into()) + Function::new(Rc::new(self.chunk), self.attrs, self.closes.len()) } - pub fn finish_repl(mut self) -> (Function, Vec) { + pub fn finish_inner(mut self) -> (Function, BTreeMap, usize>) { + self.emit(I::Return); + // TODO closure + ( + Function::new(Rc::new(self.chunk), self.attrs, self.closes.len()), + self.closes + ) + } + + pub fn finish_repl(mut self) -> (Function, Vec>) { self.emit(I::Return); ( // TODO closure - Function::from_parts(Rc::new(self.chunk), self.attrs, Vec::new().into()), - self.globals + Function::new(Rc::new(self.chunk), self.attrs, self.closes.len()), + self.scope.into_iter().filter_map(|(_,v)| { + (v.kind == VarKind::Global).then_some(v.name) + }).collect() ) } @@ -133,7 +157,7 @@ impl<'a> Compiler<'a> { && matches!(instrs.get(instrs.len() - 2), Some(I::Dup)) && matches!(instrs.last(), Some( I::NewLocal | I::StoreLocal(_) | I::StoreGlobal(_) - | I::StoreClosedLocal(_) + | I::StoreClosedLocal(_) | I::StoreUpvalue(_) )) { // can't panic: checked that instrs.len() >= 2 @@ -151,6 +175,7 @@ impl<'a> Compiler<'a> { I::Dup | I::Const(_) | I::Int(_) | I::Nil | I::Bool(_) | I::Symbol(_) | I::LoadLocal(_) | I::LoadClosedLocal(_) + | I::LoadUpvalue(_) ) ); if poppable { @@ -176,130 +201,142 @@ impl<'a> Compiler<'a> { self.chunk.instrs[n] = new; } - fn begin_scope(&mut self) { - self.scope += 1; + #[must_use] + fn begin_scope(&mut self) -> usize { + self.shadowed.len() } - fn end_scope(&mut self) { - self.scope -= 1; + fn end_scope(&mut self, scope: usize) { + let mut locals = 0; + while self.shadowed.len() > scope { + let (name, var) = self.shadowed.pop().expect("scope bad"); - // no need to clean up at bottom scope - if self.scope == 0 { return; } + if let Some(var) = var { + if var.kind != VarKind::Global { + locals += 1; + } + self.scope.insert(name, var); + } else { + self.scope.remove(&name); + } + } - for i in (0..self.globals.len()).rev() { - if self.globals[i].scope <= self.scope { - break; - } - self.globals.pop(); - } - - let mut count = 0; - for i in (0..self.locals.len()).rev() { - if self.locals[i].scope <= self.scope { - break; - } - self.locals.pop(); - count += 1; - } - - if count > 0 && self.scope > 0 { - self.emit(I::DropLocal(Arg24::from_usize(count))); - } + if locals > 0 { + self.emit(I::DropLocal(Arg24::from_usize(locals))); + self.local_count -= locals; + } } // // variables // + + fn resolve_name(&self, name: &LStr) -> ResolveOutcome { + if let Some(v) = self.scope.get(name) { + return ResolveOutcome::Var(v.kind) + } + let Some(parent) = self.parent else { + return ResolveOutcome::None + }; + if let ResolveOutcome::None = parent.resolve_name(name) { + return ResolveOutcome::None + } + ResolveOutcome::InParent + } - fn resolve_local(&mut self, name: &LStr) -> Option { - self.locals.iter().rev() - .position(|v| v.name.as_ref() == name) - .map(|x| self.locals.len() - x - 1) - } - - fn resolve_global(&mut self, name: &LStr) -> Option { - self.globals.iter().rev() - .position(|v| v.name.as_ref() == name) - .map(|x| self.globals.len() - x - 1) - } - - - fn load_var(&mut self, name: &LStr) { - match (self.resolve_local(name), self.resolve_global(name)) { - (Some(n), None) => self.load_local(n), - (Some(n), Some(m)) if n >= m => self.load_local(n), - _ => { - let sym = Symbol::get(name); - self.emit(I::LoadGlobal(Arg24::from_symbol(sym))); - } - } - } - - fn load_local(&mut self, n: usize) { - if self.locals[n].closed { - self.emit(I::LoadClosedLocal(Arg24::from_usize(n))); - } else { - self.emit(I::LoadLocal(Arg24::from_usize(n))); + fn load_var(&mut self, name: &LStr) { + match self.resolve_name(name) { + ResolveOutcome::Var(VarKind::Local(n)) => { + self.emit(I::LoadLocal(Arg24::from_usize(n))); + } + ResolveOutcome::Var(VarKind::Closed(n)) => { + self.emit(I::LoadClosedLocal(Arg24::from_usize(n))); + } + ResolveOutcome::InParent => { + let n = match self.closes.get(name) { + Some(n) => *n, + None => { + let n = self.closes.len(); + self.closes.insert(name.into(), n); + n + } + }; + self.emit(I::LoadUpvalue(Arg24::from_usize(n))); + } + ResolveOutcome::None | ResolveOutcome::Var(VarKind::Global) => { + let s = Symbol::get(name); + self.emit(I::LoadGlobal(Arg24::from_symbol(s))); + } } } - fn declare_local(&mut self, name: &LStr) -> usize { - if let Some(i) = self.resolve_local(name) { - if self.locals[i].scope == self.scope && !self.locals[i].closed { - self.emit(I::StoreLocal(Arg24::from_usize(i))); - return i; - } - } + fn declare_local(&mut self, name: &LStr) -> usize { + let name: Rc = name.into(); + let local = Var { + name: name.clone(), + kind: VarKind::Local(self.local_count) + }; + self.local_count += 1; + let shadowed = self.scope.insert(name.clone(), local); + self.shadowed.push((name, shadowed)); + self.local_count - 1 + } - self.locals.push(Local { - name: name.into(), - scope: self.scope, - closed: false, - }); + fn assign_local(&mut self, name: &LStr) -> usize { + let n = self.declare_local(name); + self.emit(I::NewLocal); + n + } - let i = self.locals.len() - 1; - self.emit(I::NewLocal); - i - } + fn assign_global(&mut self, name: &LStr) { + self.declare_global(name); + self.store_var(name); + } - fn store_local(&mut self, i: usize) { - if self.locals[i].closed { - self.emit(I::StoreLocal(Arg24::from_usize(i))); - } else { - self.emit(I::StoreClosedLocal(Arg24::from_usize(i))); + fn declare_global(&mut self, name: &LStr) { + let name: Rc = name.into(); + let global = Var { + name: name.clone(), + kind: VarKind::Global + }; + let shadowed = self.scope.insert(name.clone(), global); + self.shadowed.push((name, shadowed)); + } + + fn store_var(&mut self, name: &LStr) { + match self.resolve_name(name) { + ResolveOutcome::Var(VarKind::Local(n)) => { + self.emit(I::StoreLocal(Arg24::from_usize(n))); + } + ResolveOutcome::Var(VarKind::Closed(n)) => { + self.emit(I::StoreClosedLocal(Arg24::from_usize(n))); + } + ResolveOutcome::InParent => { + let n = match self.closes.get(name) { + Some(n) => *n, + None => { + let n = self.closes.len(); + self.closes.insert(name.into(), n); + n + } + }; + self.emit(I::StoreUpvalue(Arg24::from_usize(n))); + } + ResolveOutcome::Var(VarKind::Global) => { + let s = Symbol::get(name); + self.emit(I::StoreGlobal(Arg24::from_symbol(s))); + } + ResolveOutcome::None if self.mode == CompilerMode::Repl => { + let s = Symbol::get(name); + self.emit(I::StoreGlobal(Arg24::from_symbol(s))); + } + ResolveOutcome::None => { + self.assign_local(name); + } } - } + } - fn store_global(&mut self, name: &LStr) { - let sym = Symbol::get(name); - self.emit(I::StoreGlobal(Arg24::from_symbol(sym))); - if let Some(i) = self.resolve_global(name) { - if self.globals[i].scope == self.scope { - return - } - } - self.globals.push(Local { - name: name.into(), - scope: self.scope, - closed: false, - }); - } - - fn store_default(&mut self, name: &LStr) { - match (self.resolve_local(name), self.resolve_global(name)) { - (Some(n), None) => self.store_local(n), - (Some(n), Some(m)) if n >= m => self.store_local(n), - (_, Some(_)) => self.store_global(name), - (None, None) => { - if self.mode == CompilerMode::Repl && self.scope == 1 { - self.store_global(name); - } else { - self.declare_local(name); - } - } - } - } // @@ -307,43 +344,44 @@ impl<'a> Compiler<'a> { // fn expr(&mut self, e: &Expr) { - match e { - Expr::Block(xs) if xs.is_empty() => { self.emit(I::Nil); }, - Expr::Block(xs) => { - self.begin_scope(); + let Expr { kind, .. } = e; + match kind { + ExprKind::Block(xs) if xs.is_empty() => { self.emit(I::Nil); }, + ExprKind::Block(xs) => { + let scope = self.begin_scope(); for x in &xs[0..xs.len()-1] { self.expr(x); self.emit_discard(1); } self.expr(&xs[xs.len()-1]); - self.end_scope(); + self.end_scope(scope); }, - Expr::Literal(v) => self.expr_literal(v), - Expr::Ident(ident) => self.load_var(ident), - Expr::UnaryOp(o, a) => { + ExprKind::Literal(v) => self.expr_literal(v), + ExprKind::Ident(ident) => self.load_var(ident), + ExprKind::UnaryOp(o, a) => { self.expr(a); self.emit(I::UnaryOp(*o)); }, - Expr::BinaryOp(o, a, b) => { + ExprKind::BinaryOp(o, a, b) => { self.expr(a); self.expr(b); self.emit(I::BinaryOp(*o)); }, - Expr::Assign(o, lv, a) => self.expr_assign(*o, lv, a), - Expr::AssignVar(name, a) => { + ExprKind::Assign(o, lv, a) => self.expr_assign(*o, lv, a), + ExprKind::AssignVar(name, a) => { self.expr(a); self.emit(I::Dup); - self.declare_local(name); + self.assign_local(name); }, - Expr::AssignGlobal(name, a) => { + ExprKind::AssignGlobal(name, a) => { self.expr(a); self.emit(I::Dup); - self.store_global(name); + self.assign_global(name); }, - Expr::List(xs) if xs.is_empty() => { + ExprKind::List(xs) if xs.is_empty() => { self.emit(I::NewList(0)); }, - Expr::List(xs) => { + ExprKind::List(xs) => { let mut first = true; for chunk in xs.chunks(16) { for e in chunk { @@ -357,10 +395,10 @@ impl<'a> Compiler<'a> { } } }, - Expr::Table(xs) if xs.is_empty() => { + ExprKind::Table(xs) if xs.is_empty() => { self.emit(I::NewTable(0)); }, - Expr::Table(xs) => { + ExprKind::Table(xs) => { let mut first = true; for chunk in xs.chunks(8) { for (k, v) in chunk { @@ -375,19 +413,19 @@ impl<'a> Compiler<'a> { } } }, - Expr::Index(ct, idx) => { + ExprKind::Index(ct, idx) => { self.expr(ct); self.expr(idx); self.emit(I::Index); }, - Expr::FnCall(f, args) => { + ExprKind::FnCall(f, args) => { self.expr(f); for a in args { self.expr(a); } self.emit(I::Call(args.len() as u8)); }, - Expr::AssocFnCall(o, f, args) => { + ExprKind::AssocFnCall(o, f, args) => { self.expr(o); self.emit(I::Dup); self.emit(I::Symbol(Arg24::from_symbol(*f))); @@ -398,18 +436,18 @@ impl<'a> Compiler<'a> { } self.emit(I::Call((args.len() + 1) as u8)); }, - Expr::Return(e) => { + ExprKind::Return(e) => { self.expr(e); self.emit(I::Return); }, - Expr::Pipe(a, f) => { + ExprKind::Pipe(a, f) => { self.expr(a); self.expr(f); self.emit(I::Swap); self.emit(I::Call(1)); }, - Expr::Lambda(args, body) => self.expr_lambda(args, body), - Expr::And(a, b) => { + ExprKind::Lambda(args, body) => self.expr_lambda(args, body), + ExprKind::And(a, b) => { self.expr(a); self.emit(I::Dup); let j1 = self.emit(I::JumpFalse(Arg24::from_usize(0))); @@ -417,7 +455,7 @@ impl<'a> Compiler<'a> { self.expr(b); self.update_instr(j1, I::JumpFalse(Arg24::from_usize(self.ip()))); }, - Expr::Or(a, b) => { + ExprKind::Or(a, b) => { self.expr(a); self.emit(I::Dup); let j1 = self.emit(I::JumpTrue(Arg24::from_usize(0))); @@ -425,7 +463,7 @@ impl<'a> Compiler<'a> { self.expr(b); self.update_instr(j1, I::JumpTrue(Arg24::from_usize(self.ip()))); }, - Expr::If(cond, b1, b2) => { + ExprKind::If(cond, b1, b2) => { self.expr(cond); let j1 = self.emit(I::JumpFalse(Arg24::from_usize(0))); self.expr(b1); @@ -440,7 +478,7 @@ impl<'a> Compiler<'a> { I::Jump(Arg24::from_usize(self.ip())) ); }, - Expr::While(cond, body) => { + ExprKind::While(cond, body) => { let start = self.ip(); self.expr(cond); @@ -452,13 +490,13 @@ impl<'a> Compiler<'a> { self.emit(I::Nil); }, - Expr::For(name, iter, body) => self.expr_for(name, iter, body), - Expr::Try(body, catches) => self.expr_try(body, catches), + ExprKind::For(name, iter, body) => self.expr_for(name, iter, body), + ExprKind::Try(body, catches) => self.expr_try(body, catches), } } fn expr_try(&mut self, body: &Expr, catch_blocks: &[CatchBlock]) { - let (idx, mut table) = self.chunk.begin_try_table(self.locals.len()); + let (idx, mut table) = self.chunk.begin_try_table(self.local_count); self.emit(I::BeginTry(Arg24::from_usize(idx))); self.expr(body); @@ -472,16 +510,16 @@ impl<'a> Compiler<'a> { types: catch_block.types.clone(), }); - self.begin_scope(); + let scope = self.begin_scope(); if let Some(name) = catch_block.name { - self.declare_local(name); + self.assign_local(name); } else { self.emit_discard(1); } self.expr(&catch_block.body); - self.end_scope(); + self.end_scope(scope); let end_addr = self.emit(I::Jump(Arg24::from_usize(0))); catch_end_addrs.push(end_addr); @@ -502,9 +540,9 @@ impl<'a> Compiler<'a> { self.emit(I::IterBegin); // declare loop variable - self.begin_scope(); + let scope = self.begin_scope(); self.emit(I::Nil); - let local = self.declare_local(name); + self.assign_local(name); // begin loop let start = self.ip(); @@ -513,7 +551,7 @@ impl<'a> Compiler<'a> { self.emit(I::Dup); self.emit(I::Call(0)); let j1 = self.emit(I::IterTest(Arg24::from_usize(0))); - self.store_local(local); + self.store_var(name); // body self.expr(body); @@ -523,7 +561,7 @@ impl<'a> Compiler<'a> { self.emit(I::Jump(Arg24::from_usize(start))); self.update_instr(j1, I::IterTest(Arg24::from_usize(self.ip()))); - self.end_scope(); + self.end_scope(scope); self.emit(I::Nil); } @@ -531,9 +569,39 @@ impl<'a> Compiler<'a> { let mut inner = self.new_function(args); inner.parent = Some(self); inner.expr(body); - let func = inner.finish(); - let n = self.add_const(func.into()); - self.emit(I::Const(Arg24::from_usize(n))); + + + let (func, closes) = inner.finish_inner(); + let func_const = self.add_const(func.into()); + + let num_closed = closes.len(); + + for (name, _) in closes { + match self.resolve_name(&name) { + ResolveOutcome::Var(VarKind::Local(n)) => { + self.emit(I::CloseOver(Arg24::from_usize(n))); + self.scope.entry(name).and_modify(|v| { + v.kind = VarKind::Closed(n); + }); + }, + ResolveOutcome::Var(VarKind::Closed(n)) => { + self.emit(I::LoadLocal(Arg24::from_usize(n))); + }, + ResolveOutcome::InParent => { + let n = self.closes.len(); + self.closes.insert(name, n); + self.emit(I::ContinueUpvalue(Arg24::from_usize(n))); + }, + ResolveOutcome::None | ResolveOutcome::Var(VarKind::Global) + => panic!("upvalue resolved to none or global"), + } + } + + if num_closed == 0 { + self.emit(I::Const(Arg24::from_usize(func_const))); + } else { + self.emit(I::Closure(Arg24::from_usize(func_const))); + } } fn expr_literal(&mut self, val: &Value) { @@ -554,26 +622,26 @@ impl<'a> Compiler<'a> { } fn expr_assign(&mut self, o: Option, lv: &LValue, a: &Expr) { - match (lv, o) { - (LValue::Ident(i), None) => { + match (&lv.kind, o) { + (LValueKind::Ident(i), None) => { self.expr(a); self.emit(I::Dup); - self.store_default(i); + self.store_var(i); }, - (LValue::Ident(i), Some(o)) => { + (LValueKind::Ident(i), Some(o)) => { self.load_var(i); self.expr(a); self.emit(I::BinaryOp(o)); self.emit(I::Dup); - self.store_default(i); + self.store_var(i); }, - (LValue::Index(ct, i), None) => { + (LValueKind::Index(ct, i), None) => { self.expr(ct); self.expr(i); self.expr(a); self.emit(I::StoreIndex); }, - (LValue::Index(ct, i), Some(o)) => { + (LValueKind::Index(ct, i), Some(o)) => { self.expr(ct); self.expr(i); self.emit(I::DupTwo); diff --git a/talc-lang/src/lib.rs b/talc-lang/src/lib.rs index 8e2fcae..219db23 100644 --- a/talc-lang/src/lib.rs +++ b/talc-lang/src/lib.rs @@ -1,50 +1,14 @@ +#![allow(clippy::mutable_key_type)] +#![warn(clippy::semicolon_if_nothing_returned)] +#![warn(clippy::allow_attributes)] -#[rustfmt::skip] -#[allow(clippy::extra_unused_lifetimes)] -#[allow(clippy::needless_lifetimes)] -#[allow(clippy::let_unit_value)] -#[allow(clippy::just_underscores_and_digits)] -#[allow(clippy::pedantic)] -mod parser { - pub use __intern_token::new_builder as new_builder; - include!(concat!(env!("OUT_DIR"),"/parser.rs")); -} -mod parser_util; -mod vm; pub mod symbol; - -pub mod ast; +pub mod parser; pub mod value; pub mod exception; pub mod chunk; pub mod compiler; pub mod lstring; -pub use parser::BlockParser as Parser; +mod vm; pub use vm::Vm; - -pub use parser_util::{parse_int, parse_float, parse_str_escapes}; - -type LexResult<'input> = Result< - (usize, Token<'input>, usize), - ParseError, parser_util::ParseError> ->; - -use lalrpop_util::{ParseError, lexer::{Token, MatcherBuilder}}; - -pub struct Lexer { - builder: MatcherBuilder, -} - -impl Default for Lexer { - fn default() -> Self { Self::new() } -} - -impl Lexer { - pub fn new() -> Self { - Self { builder: crate::parser::new_builder() } - } - pub fn lex<'s>(&'s self, input: &'s str) -> impl Iterator> { - self.builder.matcher::(input) - } -} diff --git a/talc-lang/src/parser/ast.rs b/talc-lang/src/parser/ast.rs new file mode 100644 index 0000000..6d22784 --- /dev/null +++ b/talc-lang/src/parser/ast.rs @@ -0,0 +1,284 @@ +use core::fmt; + +use crate::{lstring::LStr, symbol::Symbol, value::Value}; + +use super::Span; + +#[derive(Clone, Copy, Debug)] +pub enum BinaryOp { + Add, Sub, Mul, Div, Mod, Pow, IntDiv, + Shr, Shl, BitAnd, BitXor, BitOr, + Eq, Ne, Gt, Lt, Ge, Le, + Concat, Append, + Range, RangeIncl, +} + +#[derive(Clone, Copy, Debug)] +pub enum UnaryOp { + Neg, Not, RangeEndless, +} + +#[derive(Debug)] +pub struct Expr<'s> { + pub span: Span, + pub kind: ExprKind<'s>, +} + +#[derive(Debug)] +pub enum ExprKind<'s> { + Literal(Value), + Ident(&'s LStr), + + UnaryOp(UnaryOp, Box>), + BinaryOp(BinaryOp, Box>, Box>), + + Assign(Option, Box>, Box>), + AssignVar(&'s LStr, Box>), + AssignGlobal(&'s LStr, Box>), + + Index(Box>, Box>), + FnCall(Box>, Vec>), + AssocFnCall(Box>, Symbol, Vec>), + Pipe(Box>, Box>), + + Block(Vec>), + List(Vec>), + Table(Vec<(Expr<'s>, Expr<'s>)>), + + Return(Box>), + And(Box>, Box>), + Or(Box>, Box>), + If(Box>, Box>, Option>>), + While(Box>, Box>), + For(&'s LStr, Box>, Box>), + Lambda(Vec<&'s LStr>, Box>), + Try(Box>, Vec>), +} + +impl<'s> ExprKind<'s> { + pub fn span(self, span: Span) -> Expr<'s> { + Expr { kind: self, span } + } +} + +#[derive(Debug)] +pub struct CatchBlock<'s> { + pub span: Span, + pub name: Option<&'s LStr>, + pub types: Option>, + pub body: Expr<'s>, +} + +#[derive(Debug)] +pub struct LValue<'s> { + pub span: Span, + pub kind: LValueKind<'s>, +} + +#[derive(Debug)] +pub enum LValueKind<'s> { + Ident(&'s LStr), + Index(Box>, Box>), +} + +impl<'s> LValueKind<'s> { + pub fn span(self, span: Span) -> LValue<'s> { + LValue { kind: self, span } + } +} + +impl<'s> LValue<'s> { + pub fn from_expr(e: Expr<'s>) -> Option> { + let Expr { span, kind } = e; + match kind { + ExprKind::Ident(i) => Some(LValueKind::Ident(i).span(span)), + ExprKind::Index(l, r) => Some(LValueKind::Index(l, r).span(span)), + _ => None, + } + } +} + +impl<'s> CatchBlock<'s> { + pub fn write_to(&self, w: &mut impl fmt::Write, depth: usize) -> fmt::Result { + write!(w, "{0: >1$}catch", "", depth*2)?; + if let Some(name) = self.name { + write!(w, " ${name}")?; + } + if let Some(types) = &self.types { + write!(w, ":")?; + for ty in types { + write!(w, " {}", ty.name())?; + } + } + writeln!(w)?; + self.body.write_to(w, depth + 1) + } +} + +impl fmt::Display for CatchBlock<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write_to(f, 0) + } +} + +impl<'s> LValue<'s> { + pub fn write_to(&self, w: &mut impl fmt::Write, depth: usize) -> fmt::Result { + write!(w, "{0: >1$}", "", depth*2)?; + let depth = depth + 1; + match &self.kind { + LValueKind::Ident(n) => writeln!(w, "${n}"), + LValueKind::Index(l, r) => { + writeln!(w, "index")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + }, + } + } +} + +impl fmt::Display for LValue<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write_to(f, 0) + } +} + +impl<'s> Expr<'s> { + pub fn write_to(&self, w: &mut impl fmt::Write, depth: usize) -> fmt::Result { + write!(w, "{0: >1$}", "", depth*2)?; + let depth = depth + 1; + match &self.kind { + ExprKind::Literal(val) => writeln!(w, "{val}"), + ExprKind::Ident(n) => writeln!(w, "${n}"), + ExprKind::UnaryOp(op, e) => { + writeln!(w, "uop {op:?}")?; + e.write_to(w, depth) + }, + ExprKind::BinaryOp(op, l, r) => { + writeln!(w, "bop {op:?}")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + }, + ExprKind::Assign(op, l, r) => { + if let Some(op) = op { + writeln!(w, "asgn {op:?}")?; + } else { + writeln!(w, "asgn =")?; + } + l.write_to(w, depth)?; + r.write_to(w, depth) + }, + ExprKind::AssignVar(l, r) => { + writeln!(w, "var {l}")?; + r.write_to(w, depth) + }, + ExprKind::AssignGlobal(l, r) => { + writeln!(w, "global {l}")?; + r.write_to(w, depth) + }, + ExprKind::Index(l, r) => { + writeln!(w, "index")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + }, + ExprKind::FnCall(f, a) => { + writeln!(w, "call")?; + f.write_to(w, depth)?; + for arg in a { + arg.write_to(w, depth)?; + } + Ok(()) + }, + ExprKind::AssocFnCall(d, f, a) => { + writeln!(w, "assoc call {}", f.name())?; + d.write_to(w, depth)?; + for arg in a { + arg.write_to(w, depth)?; + } + Ok(()) + }, + ExprKind::Pipe(l, r) => { + writeln!(w, "pipe")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + }, + ExprKind::Block(b) => { + writeln!(w, "block")?; + for e in b { + e.write_to(w, depth)?; + } + Ok(()) + }, + ExprKind::List(l) => { + writeln!(w, "list")?; + for e in l { + e.write_to(w, depth)?; + } + Ok(()) + }, + ExprKind::Table(t) => { + writeln!(w, "list")?; + for (k, v) in t { + k.write_to(w, depth)?; + v.write_to(w, depth)?; + } + Ok(()) + }, + ExprKind::Return(e) => { + writeln!(w, "return")?; + e.write_to(w, depth) + } + ExprKind::And(l, r) => { + writeln!(w, "and")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + } + ExprKind::Or(l, r) => { + writeln!(w, "or")?; + l.write_to(w, depth)?; + r.write_to(w, depth) + } + ExprKind::If(c, b, e) => { + writeln!(w, "if")?; + c.write_to(w, depth)?; + b.write_to(w, depth)?; + if let Some(e) = e { + e.write_to(w, depth)?; + } + Ok(()) + } + ExprKind::While(c, b) => { + writeln!(w, "while")?; + c.write_to(w, depth)?; + b.write_to(w, depth) + } + ExprKind::For(v, i, b) => { + writeln!(w, "for {v}")?; + i.write_to(w, depth)?; + b.write_to(w, depth) + } + ExprKind::Lambda(a, b) => { + write!(w, "lambda")?; + for arg in a { + write!(w, " {arg}")?; + } + writeln!(w)?; + b.write_to(w, depth) + } + ExprKind::Try(t, c) => { + write!(w, "try")?; + t.write_to(w, depth)?; + for catch in c { + catch.write_to(w, depth)?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for Expr<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.write_to(f, 0) + } +} + diff --git a/talc-lang/src/parser/lexer.rs b/talc-lang/src/parser/lexer.rs new file mode 100644 index 0000000..f1a38f3 --- /dev/null +++ b/talc-lang/src/parser/lexer.rs @@ -0,0 +1,559 @@ +use std::{fmt, iter::Peekable, str::Chars}; + +use unicode_ident::{is_xid_continue, is_xid_start}; + +use super::{ParserError, Pos, Span}; + +type Result = std::result::Result; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum TokenKind { + Eof, + LineSeparator, + + Bang, + BangEqual, + HashAmper, + HashAmperEqual, + HashCaret, + HashCaretEqual, + HashPipe, + HashPipeEqual, + Dollar, + Percent, + PercentEqual, + Amper, + AmperEqual, + Star, + StarEqual, + Plus, + PlusPlus, + PlusPlusEqual, + PlusEqual, + Comma, + Minus, + MinusEqual, + Arrow, + Dot, + DotDot, + DotDotStar, + DotDotEqual, + Slash, + SlashSlash, + SlashSlashEqual, + SlashEqual, + Colon, + Less, + LessLess, + LessLessEqual, + LessEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + GreaterGreater, + GreaterGreaterEqual, + LParen, + RParen, + LBrack, + RBrack, + LBrace, + RBrace, + Backslash, + Caret, + CaretEqual, + Pipe, + + Identifier, + Integer, + Float, + String, + Symbol, + And, + Break, + Catch, + Continue, + Do, + Elif, + Else, + End, + False, + For, + Global, + If, + In, + Nil, + Not, + Or, + Return, + Then, + True, + Try, + Var, + While, +} +use TokenKind as K; + +impl TokenKind { + pub fn name(self) -> &'static str { + match self { + K::Eof => "end of file", + K::LineSeparator => "line separator", + K::Bang => "'!'", + K::BangEqual => "'!='", + K::HashAmper => "'#&'", + K::HashAmperEqual => "'#&='", + K::HashCaret => "'#^'", + K::HashCaretEqual => "'#^='", + K::HashPipe => "'#|'", + K::HashPipeEqual => "'#|='", + K::Dollar => "'$'", + K::Percent => "'%'", + K::PercentEqual => "'%='", + K::Amper => "'&'", + K::AmperEqual => "'&='", + K::Star => "'*'", + K::StarEqual => "'*='", + K::Plus => "'+'", + K::PlusPlus => "'++'", + K::PlusPlusEqual => "'++='", + K::PlusEqual => "'+='", + K::Comma => "','", + K::Minus => "'-'", + K::MinusEqual => "'-='", + K::Arrow => "'=>'", + K::Dot => "'.'", + K::DotDot => "'..'", + K::DotDotStar => "'..*'", + K::DotDotEqual => "'..='", + K::Slash => "'/'", + K::SlashSlash => "'//'", + K::SlashSlashEqual => "'//='", + K::SlashEqual => "'/='", + K::Colon => "':'", + K::Less => "'<'", + K::LessLess => "'<<'", + K::LessLessEqual => "'<<='", + K::LessEqual => "'<='", + K::Equal => "'='", + K::EqualEqual => "'=='", + K::Greater => "'>'", + K::GreaterEqual => "'>='", + K::GreaterGreater => "'>>'", + K::GreaterGreaterEqual => "'>>='", + K::LParen => "'('", + K::RParen => "')'", + K::LBrack => "'['", + K::RBrack => "']'", + K::LBrace => "'{'", + K::RBrace => "'}'", + K::Backslash => "'\\'", + K::Caret => "'^'", + K::CaretEqual => "'^='", + K::Pipe => "'|'", + K::Identifier => "identifier", + K::Integer => "integer", + K::Float => "float", + K::String => "string", + K::Symbol => "symbol", + K::And => "'and'", + K::Break => "'break'", + K::Catch => "'catch'", + K::Continue => "'continue'", + K::Do => "'do'", + K::Elif => "'elif'", + K::Else => "'else'", + K::End => "'end'", + K::False => "'false'", + K::For => "'for'", + K::Global => "'global'", + K::If => "'if'", + K::In => "'in'", + K::Nil => "'nil'", + K::Not => "'not'", + K::Or => "'or'", + K::Return => "'return'", + K::Then => "'then'", + K::True => "'true'", + K::Try => "'try'", + K::Var => "'var'", + K::While => "'while'", + } + } +} + +impl fmt::Display for TokenKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.name()) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Token<'s> { + pub span: Span, + pub content: &'s str, + pub kind: TokenKind, +} + +pub struct Lexer<'s> { + src: &'s str, + chars: Peekable>, + start_pos: Pos, + pos: Pos, +} + +impl<'s> Lexer<'s> { + pub fn new(src: &'s str) -> Self { + Self { + src, + chars: src.chars().peekable(), + start_pos: Pos::new(), + pos: Pos::new(), + } + } + + fn invalid_char(&self, c: char) -> ParserError { + let span = Span::new(self.pos, self.pos.advance(c)); + let msg = match c as u32 { + c @ 0x00..=0x7f => format!("invalid character (codepoint 0x{:2x})", c), + c => format!("invalid character (codepoint U+{:04x})", c), + }; + ParserError { span, msg } + } + + fn filter_char(&mut self, c: Option, advance: bool) -> Result { + match c { + Some(c) if c.is_control() && !matches!(c, '\n' | '\r' | '\t') => Err(self.invalid_char(c)), + Some(c) => { + if advance { self.pos = self.pos.advance(c); } + Ok(c) + }, + None => Ok('\0'), + } + } + + fn peek(&mut self) -> Result { + let c = self.chars.peek().copied(); + self.filter_char(c, false) + } + + fn peek_n(&mut self, n: usize) -> Option { + let chars = &self.src[(self.pos.idx as usize)..]; + chars.chars().nth(n) + } + + fn next(&mut self) -> Result { + let c = self.chars.next(); + self.filter_char(c, true) + } + + fn and_peek(&mut self) -> Result { + self.next()?; self.peek() + } + + fn emit(&self, kind: TokenKind) -> Result> { + let span = Span::new(self.start_pos, self.pos); + Ok(Token { + span, + content: span.of(self.src), + kind, + }) + } + + fn and_emit(&mut self, kind: TokenKind) -> Result> { + self.next()?; + self.emit(kind) + } + + fn unexpected(&mut self) -> Result> { + let c = self.peek()?; + let span = Span::new(self.pos, self.pos.advance(c)); + let msg = match c { + '\0' => "unexpected end of file".to_owned(), + '\n' => "unexpected newline character".to_owned(), + '\t' => "unexpected tab character".to_owned(), + '\r' => "unexpected return character".to_owned(), + c => format!("unexpected character {c}"), + }; + Err(ParserError { span, msg }) + } + + fn next_ident(&mut self) -> Result> { + self.next()?; + while is_xid_continue(self.peek()?) { + self.next()?; + } + let kind = match Span::new(self.start_pos, self.pos).of(self.src) { + "and" => K::And, + "break" => K::Break, + "catch" => K::Catch, + "continue" => K::Continue, + "do" => K::Do, + "elif" => K::Elif, + "else" => K::Else, + "end" => K::End, + "false" => K::False, + "for" => K::For, + "global" => K::Global, + "if" => K::If, + "in" => K::In, + "nil" => K::Nil, + "not" => K::Not, + "or" => K::Or, + "return" => K::Return, + "then" => K::Then, + "true" => K::True, + "try" => K::Try, + "var" => K::Var, + "while" => K::While, + _ => K::Identifier, + }; + self.emit(kind) + } + + fn next_int_base(&mut self, radix: u32) -> Result> { + loop { + let c = self.peek()?; + if c == '_' || c.is_digit(radix) { + self.next()?; + } else if is_xid_start(c) { + return self.unexpected() + } else { + return self.emit(K::Integer) + } + } + } + + fn next_float(&mut self, mut has_e: bool) -> Result> { + while !has_e { + while matches!(self.peek()?, '_' | '0'..='9') { + self.next()?; + } + match self.peek()? { + 'e' => { self.next()?; has_e = true } + c if is_xid_start(c) => return self.unexpected(), + _ => return self.emit(K::Float) + } + } + if matches!(self.peek()?, '+' | '-') { + self.next()?; + } + while matches!(self.peek()?, '_' | '0'..='9') { + self.next()?; + } + if is_xid_start(self.peek()?) { + self.unexpected() + } else { + self.emit(K::Float) + } + } + + fn next_number(&mut self) -> Result> { + if self.next()? == '0' { + while self.peek()? == '_' { self.next()?; } + match self.peek()? { + 'x' => { self.next()?; return self.next_int_base(16) }, + 'o' => { self.next()?; return self.next_int_base(8) }, + 's' => { self.next()?; return self.next_int_base(6) }, + 'b' => { self.next()?; return self.next_int_base(2) }, + c if is_xid_start(c) => return self.unexpected(), + '0'..='9' => (), + _ => return self.emit(K::Integer) + } + } + while matches!(self.peek()?, '_' | '0'..='9') { + self.next()?; + } + match self.peek()? { + 'r' => todo!("arbitrary radix integer literals"), + 'e' => { self.next()?; self.next_float(true) }, + '.' => { + if self.peek_n(1) == Some('.') { + self.emit(K::Integer) + } else { + self.next()?; + self.next_float(false) + } + }, + c if is_xid_start(c) => self.unexpected(), + _ => self.emit(K::Integer) + } + } + + fn next_string(&mut self) -> Result> { + let double_quote = self.next()? == '"'; + loop { + match self.next()? { + '\0' => return self.unexpected(), + '"' if double_quote => break, + '\'' if !double_quote => break, + '\\' if double_quote => { self.next()?; }, + _ => (), + } + } + self.emit(K::String) + } + + fn next_symbol(&mut self) -> Result> { + if matches!(self.peek()?, '\'' | '"') { + self.next_string()?; + } else { + self.next_ident()?; + } + self.emit(K::Symbol) + } + + fn line_comment(&mut self) -> Result> { + while !matches!(self.peek()?, '\0' | '\n') { + self.next()?; + } + self.next_token() + } + + fn next_token(&mut self) -> Result> { + while matches!(self.peek()?, ' ' | '\t' | '\r') { + self.next()?; + } + self.start_pos = self.pos; + match self.peek()? { + // misc + '\0' => self.emit(K::Eof), + ';' | '\n' => self.and_emit(K::LineSeparator), + '(' => self.and_emit(K::LParen), + ')' => self.and_emit(K::RParen), + '{' => self.and_emit(K::LBrace), + '}' => self.and_emit(K::RBrace), + '[' => self.and_emit(K::LBrack), + ']' => self.and_emit(K::RBrack), + ',' => self.and_emit(K::Comma), + '$' => self.and_emit(K::Dollar), + '|' => self.and_emit(K::Pipe), + '\\' => match self.and_peek()? { + '\n' => { + self.next()?; + self.next_token() + } + _ => self.emit(K::Backslash), + } + // arithmetic + '+' => match self.and_peek()? { + '+' => match self.and_peek()? { + '=' => self.and_emit(K::PlusPlusEqual), + _ => self.emit(K::PlusPlus), + }, + '=' => self.and_emit(K::PlusEqual), + _ => self.emit(K::Plus), + }, + '-' => match self.and_peek()? { + '-' => self.line_comment(), + '=' => self.and_emit(K::MinusEqual), + '>' => self.and_emit(K::Arrow), + _ => self.emit(K::Minus), + }, + '*' => match self.and_peek()? { + '=' => self.and_emit(K::StarEqual), + _ => self.emit(K::Star), + }, + '/' => match self.and_peek()? { + '/' => match self.and_peek()? { + '=' => self.and_emit(K::SlashSlashEqual), + _ => self.emit(K::SlashSlash), + }, + '=' => self.and_emit(K::SlashEqual), + _ => self.emit(K::Slash), + }, + '%' => match self.and_peek()? { + '=' => self.and_emit(K::PercentEqual), + _ => self.emit(K::Percent), + }, + '^' => match self.and_peek()? { + '=' => self.and_emit(K::CaretEqual), + _ => self.emit(K::Caret), + }, + // logical + '#' => match self.and_peek()? { + '&' => match self.and_peek()? { + '=' => self.and_emit(K::HashAmperEqual), + _ => self.emit(K::HashAmper), + } + '^' => match self.and_peek()? { + '=' => self.and_emit(K::HashCaretEqual), + _ => self.emit(K::HashCaret), + } + '|' => match self.and_peek()? { + '=' => self.and_emit(K::HashPipeEqual), + _ => self.emit(K::HashPipe), + } + '!' => self.line_comment(), + _ => self.unexpected(), + }, + // lists + '!' => match self.and_peek()? { + '=' => self.and_emit(K::BangEqual), + _ => self.emit(K::Bang), + }, + '&' => match self.and_peek()? { + '=' => self.and_emit(K::AmperEqual), + _ => self.emit(K::Amper), + }, + // comparison + '<' => match self.and_peek()? { + '<' => match self.and_peek()? { + '=' => self.and_emit(K::LessLessEqual), + _ => self.emit(K::LessLess), + }, + '=' => self.and_emit(K::LessEqual), + _ => self.emit(K::Less), + }, + '>' => match self.and_peek()? { + '>' => match self.and_peek()? { + '=' => self.and_emit(K::GreaterGreaterEqual), + _ => self.emit(K::GreaterGreater), + }, + '=' => self.and_emit(K::GreaterEqual), + _ => self.emit(K::Greater), + }, + '=' => match self.and_peek()? { + '=' => self.and_emit(K::EqualEqual), + _ => self.emit(K::Equal), + }, + // range + '.' => match self.and_peek()? { + '.' => match self.and_peek()? { + '*' => self.and_emit(K::DotDotStar), + '=' => self.and_emit(K::DotDotEqual), + _ => self.emit(K::DotDot), + }, + _ => self.emit(K::Dot), + }, + ':' => match self.and_peek()? { + c if is_xid_start(c) || c == '"' || c == '\'' => self.next_symbol(), + _ => self.emit(K::Colon), + } + '0'..='9' => self.next_number(), + c if is_xid_start(c) => self.next_ident(), + '"' | '\'' => self.next_string(), + _ => self.unexpected(), + } + } + + pub fn tokens(mut self) -> Result>> { + let mut res = Vec::new(); + loop { + let t = self.next_token()?; + let k = t.kind; + res.push(t); + if k == TokenKind::Eof { + return Ok(res) + } + } + } +} + +impl<'s> Iterator for Lexer<'s> { + type Item = Result>; + + fn next(&mut self) -> Option { + Some(self.next_token()) + } +} diff --git a/talc-lang/src/parser/mod.rs b/talc-lang/src/parser/mod.rs new file mode 100644 index 0000000..7312b3b --- /dev/null +++ b/talc-lang/src/parser/mod.rs @@ -0,0 +1,14 @@ +pub mod ast; + +mod lexer; +pub use lexer::*; + +#[expect(clippy::module_inception)] +mod parser; +pub use parser::*; + +mod pos; +pub use pos::*; + +mod util; +pub use util::*; diff --git a/talc-lang/src/parser.lalrpop b/talc-lang/src/parser/parser.lalrpop.OLD similarity index 99% rename from talc-lang/src/parser.lalrpop rename to talc-lang/src/parser/parser.lalrpop.OLD index 985b238..ebcf19a 100644 --- a/talc-lang/src/parser.lalrpop +++ b/talc-lang/src/parser/parser.lalrpop.OLD @@ -1,3 +1,4 @@ +// vim: set syn=rust: use std::rc::Rc; use crate::ast::*; use crate::value::Value; diff --git a/talc-lang/src/parser/parser.rs b/talc-lang/src/parser/parser.rs new file mode 100644 index 0000000..fa19f5f --- /dev/null +++ b/talc-lang/src/parser/parser.rs @@ -0,0 +1,618 @@ +use std::iter::Peekable; + + +use crate::{lstr, lstring::LStr, symbol::Symbol, value::Value}; + +use super::{ast::{BinaryOp, CatchBlock, Expr, ExprKind, LValue, UnaryOp}, parse_float, parse_int_literal, parse_str_escapes, Lexer, ParserError, Span, SpanParserError, Token, TokenKind}; +use TokenKind as T; +use ExprKind as E; + +type Result = std::result::Result; + +macro_rules! expect { + ($self:expr, $($t:tt)*) => {{ + let t = $self.next()?; + match t.kind { + $($t)* => t, + e => return Err(ParserError { span: t.span, msg: expect_inner!(e, $($t)*) }) + } + }}; +} + +macro_rules! expect_inner { + ($e:expr, $($tok:path)|*) => { + { + let mut s = format!("unexpected token {}, expected ", $e.name()) + + $($tok.name() + ", " +)* ""; + s.truncate(s.len() - 2); + s + } + }; +} + +macro_rules! try_next { + ($self:expr, $pat:pat) => {{ + let t = $self.peek()?; + match t.kind { + $pat => Some($self.next()?), + _ => None, + + } + }}; +} + +macro_rules! throw { + ($span:expr, $($t:tt)*) => { + return Err(ParserError { span: $span, msg: format!($($t)*) }) + }; +} + +impl TokenKind { + pub fn assign_op(self) -> Option> { + Some(match self { + T::PlusPlusEqual => Some(BinaryOp::Concat), + T::AmperEqual => Some(BinaryOp::Append), + T::HashPipeEqual => Some(BinaryOp::BitOr), + T::HashCaretEqual => Some(BinaryOp::BitXor), + T::HashAmperEqual => Some(BinaryOp::BitAnd), + T::LessLessEqual => Some(BinaryOp::Shl), + T::GreaterGreaterEqual => Some(BinaryOp::Shr), + T::PlusEqual => Some(BinaryOp::Add), + T::MinusEqual => Some(BinaryOp::Sub), + T::StarEqual => Some(BinaryOp::Mul), + T::SlashEqual => Some(BinaryOp::Div), + T::SlashSlashEqual => Some(BinaryOp::IntDiv), + T::PercentEqual => Some(BinaryOp::Mod), + T::CaretEqual => Some(BinaryOp::Pow), + T::Equal => None, + _ => return None, + }) + } + + pub fn binary_op(self) -> Option { + Some(match self { + T::EqualEqual => BinaryOp::Eq, + T::BangEqual => BinaryOp::Ne, + T::Greater => BinaryOp::Gt, + T::GreaterEqual => BinaryOp::Ge, + T::Less => BinaryOp::Lt, + T::LessEqual => BinaryOp::Le, + T::PlusPlus => BinaryOp::Concat, + T::Amper => BinaryOp::Append, + T::DotDot => BinaryOp::Range, + T::DotDotEqual => BinaryOp::RangeIncl, + T::HashPipe => BinaryOp::BitOr, + T::HashCaret => BinaryOp::BitXor, + T::HashAmper => BinaryOp::BitAnd, + T::LessLess => BinaryOp::Shl, + T::GreaterGreater => BinaryOp::Shr, + T::Plus => BinaryOp::Add, + T::Minus => BinaryOp::Sub, + T::Star => BinaryOp::Mul, + T::Slash => BinaryOp::Div, + T::SlashSlash => BinaryOp::IntDiv, + T::Percent => BinaryOp::Mod, + T::Caret => BinaryOp::Pow, + _ => return None, + }) + } + + pub fn unary_op(self) -> Option { + match self { + T::Minus => Some(UnaryOp::Neg), + T::Not => Some(UnaryOp::Not), + _ => None, + } + } + + pub fn postfix_unary_op(self) -> Option { + match self { + T::DotDotStar => Some(UnaryOp::RangeEndless), + _ => None, + } + } +} + +impl UnaryOp { + pub fn precedence(self) -> u8 { + match self { + UnaryOp::Not => 0, + UnaryOp::RangeEndless => 40, + UnaryOp::Neg => 110, + } + } +} + +impl BinaryOp { + pub fn precedence(self) -> (u8, u8) { + match self { + BinaryOp::Eq + | BinaryOp::Ne + | BinaryOp::Gt + | BinaryOp::Ge + | BinaryOp::Lt + | BinaryOp::Le => (10, 10), + BinaryOp::Concat => (20, 25), + BinaryOp::Append => (30, 35), + BinaryOp::Range + | BinaryOp::RangeIncl => (40, 40), + BinaryOp::BitOr => (50, 55), + BinaryOp::BitXor => (60, 65), + BinaryOp::BitAnd => (70, 75), + BinaryOp::Shl + | BinaryOp::Shr => (80, 85), + BinaryOp::Add + | BinaryOp::Sub => (90, 95), + BinaryOp::Mul + | BinaryOp::Div + | BinaryOp::IntDiv + | BinaryOp::Mod => (100, 105), + BinaryOp::Pow => (125, 120), + } + } +} + +#[inline(always)] +fn b(t: T) -> Box { + Box::new(t) +} + +impl TokenKind { + fn expr_first(self) -> bool { + matches!(self, + | T::Return + | T::Var + | T::Global + | T::Not + | T::Backslash + | T::Colon + | T::Minus + | T::Identifier + | T::LParen + | T::LBrack + | T::LBrace + | T::Dollar + | T::Do + | T::If + | T::While + | T::For + | T::Try + | T::Integer + | T::Float + | T::String + | T::Symbol + | T::True + | T::False + | T::Nil + ) + } +} + + +struct Parser<'s> { + lexer: Peekable>, +} + +impl<'s> Parser<'s> { + fn new(src: &'s str) -> Self { + Self { lexer: Lexer::new(src).peekable() } + } + + fn next(&mut self) -> Result> { + self.lexer.next().unwrap() + } + + fn peek(&mut self) -> Result> { + self.lexer.peek().unwrap().clone() + } + + + fn parse_table_items(&mut self) -> Result, Expr<'s>)>> { + let mut items = Vec::new(); + while self.peek()?.kind.expr_first() { + let key = if let Some(id) = try_next!(self, T::Identifier) { + E::Literal(Symbol::get(id.content).into()).span(id.span) + } else { + self.parse_term_not_ident()? + }; + + expect!(self, T::Equal); + + let value = self.parse_expr()?; + + items.push((key, value)); + + if try_next!(self, T::Comma).is_none() { + break + } + } + Ok(items) + } + + fn parse_expr_list(&mut self) -> Result>> { + let mut exprs = Vec::new(); + while self.peek()?.kind.expr_first() { + exprs.push(self.parse_expr()?); + if try_next!(self, T::Comma).is_none() { + break + } + } + Ok(exprs) + } + + fn parse_ident_list(&mut self) -> Result> { + let mut idents = Vec::new(); + while let Some(tok) = try_next!(self, T::Identifier) { + idents.push(tok.content.into()); + if try_next!(self, T::Comma).is_none() { + break + } + } + Ok(idents) + } + + fn parse_symbol_list(&mut self) -> Result> { + let mut syms = Vec::new(); + while let Some(tok) = try_next!(self, T::Symbol) { + syms.push(Symbol::get(tok.content)); + if try_next!(self, T::Comma).is_none() { + break + } + } + Ok(syms) + } + + fn parse_catch_blocks(&mut self) -> Result<(Vec>, Span)> { + let mut blocks = Vec::new(); + let mut outer_span = self.peek()?.span; + loop { + let tok = expect!(self, T::Catch | T::End); + if tok.kind == T::End { break } + + let types = match try_next!(self, T::Star) { + Some(_) => None, + None => Some(self.parse_symbol_list()?) + }; + + let name = match try_next!(self, T::In) { + Some(_) => Some(expect!(self, T::Identifier).content.into()), + None => None, + }; + + expect!(self, T::Do); + let body = self.parse_block()?; + + let span = tok.span + body.span; + blocks.push(CatchBlock { span, name, types, body }); + outer_span += span; + } + Ok((blocks, outer_span)) + } + + fn parse_if_stmt_chain(&mut self) -> Result> { + let cond = self.parse_expr()?; + expect!(self, T::Then); + let body = self.parse_block()?; + let tok = expect!(self, T::End | T::Else | T::Elif); + let span = cond.span + tok.span; + match tok.kind { + T::End => Ok(E::If(b(cond), b(body), None).span(span)), + T::Else => { + let else_body = self.parse_block()?; + expect!(self, T::End); + let span = span + else_body.span; + Ok(E::If(b(cond), b(body), Some(b(else_body))).span(span)) + } + T::Elif => { + let elif_body = self.parse_if_stmt_chain()?; + let span = span + elif_body.span; + Ok(E::If(b(cond), b(body), Some(b(elif_body))).span(span)) + } + _ => unreachable!("parse_if_stmt_chain: guaranteed by expect!") + } + + } + + fn parse_term_not_ident(&mut self) -> Result> { + let tok = self.next()?; + match tok.kind { + T::LParen => { + let e = self.parse_expr()?; + expect!(self, T::RParen); + Ok(e) + }, + T::LBrack => { + let args = self.parse_expr_list()?; + let end = expect!(self, T::RBrack); + Ok(E::List(args).span(tok.span + end.span)) + }, + T::LBrace => { + let args = self.parse_table_items()?; + let end = expect!(self, T::RBrace); + Ok(E::Table(args).span(tok.span + end.span)) + }, + T::Dollar => Ok(E::Ident("$".into()).span(tok.span)), + T::Do => { + let b = self.parse_block()?; + expect!(self, T::End); + Ok(b) + }, + T::If => self.parse_if_stmt_chain(), + T::While => { + let cond = self.parse_expr()?; + expect!(self, T::Do); + let body = self.parse_block()?; + let end = expect!(self, T::End); + let span = cond.span + end.span; + Ok(E::While(b(cond), b(body)).span(span)) + }, + T::For => { + let var = expect!(self, T::Identifier); + expect!(self, T::In); + let iter = self.parse_expr()?; + expect!(self, T::Do); + let body = self.parse_block()?; + let end = expect!(self, T::End); + let span = var.span + end.span; + Ok(E::For(var.content.into(), b(iter), b(body)).span(span)) + }, + T::Try => { + let body = self.parse_block()?; + let (catch, span) = self.parse_catch_blocks()?; + Ok(E::Try(b(body), catch).span(tok.span + span)) + }, + T::Integer => { + let n = parse_int_literal(tok.content) + .span_err(tok.span)?; + Ok(E::Literal(n.into()).span(tok.span)) + }, + T::Float => { + let x = parse_float(tok.content) + .span_err(tok.span)?; + Ok(E::Literal(x.into()).span(tok.span)) + }, + T::String => { + let inner = &tok.content[1..tok.content.len()-1]; + let s = if &tok.content[..1] == "\"" { + parse_str_escapes(inner).span_err(tok.span)? + } else { + inner.into() + }; + Ok(E::Literal(s.into()).span(tok.span)) + }, + T::Symbol => { + let inner = &tok.content[1..]; + let s = match inner.chars().next() { + Some('\'') => Symbol::get(&inner[1..inner.len()-1]), + Some('\"') => Symbol::get( + &parse_str_escapes(&inner[1..inner.len()-1]) + .span_err(tok.span)? + ), + _ => Symbol::get(inner), + }; + Ok(E::Literal(s.into()).span(tok.span)) + }, + T::True => Ok(E::Literal(Value::Bool(true)).span(tok.span)), + T::False => Ok(E::Literal(Value::Bool(false)).span(tok.span)), + T::Nil => Ok(E::Literal(Value::Nil).span(tok.span)), + t => throw!(tok.span, "unexpected token {}, expected expression", t.name()), + } + } + + fn parse_term(&mut self) -> Result> { + if let Some(tok) = try_next!(self, T::Identifier) { + Ok(E::Ident(tok.content.into()).span(tok.span)) + } else { + self.parse_term_not_ident() + } + } + + fn parse_access(&mut self) -> Result> { + let mut lhs = self.parse_term()?; + loop { + let tok = try_next!(self, T::LParen | T::LBrack | T::Arrow | T::Dot); + match tok.map(|t| t.kind) { + Some(T::LParen) => { + let args = self.parse_expr_list()?; + let end = expect!(self, T::RParen); + let lhs_span = lhs.span; + lhs = E::FnCall(b(lhs), args).span(lhs_span + end.span); + } + Some(T::LBrack) => { + let idx = self.parse_expr()?; + let end = expect!(self, T::RBrack); + let lhs_span = lhs.span; + lhs = E::Index(b(lhs), b(idx)).span(lhs_span + end.span); + } + Some(T::Arrow) => { + let field = expect!(self, T::Identifier); + let symbol = Symbol::get(field.content); + expect!(self, T::LParen); + let args = self.parse_expr_list()?; + let end = expect!(self, T::RParen); + let lhs_span = lhs.span; + lhs = E::AssocFnCall(b(lhs), symbol, args).span(lhs_span + end.span); + } + Some(T::Dot) => { + let field = expect!(self, T::Identifier); + let symbol = Symbol::get(field.content); + let idx = E::Literal(symbol.into()).span(field.span); + let lhs_span = lhs.span; + lhs = E::Index(b(lhs), b(idx)).span(lhs_span + field.span); + } + None => break, + _ => unreachable!("parse_access: guaranteed by try_next!"), + } + } + Ok(lhs) + } + + fn parse_precedence(&mut self, min_prec: u8) -> Result> { + let mut lhs = if let Some(op) = self.peek()?.kind.unary_op() { + let tok = self.next()?; + let rhs = self.parse_precedence(op.precedence())?; + let span = tok.span + rhs.span; + E::UnaryOp(op, b(rhs)).span(span) + } else { + self.parse_access()? + }; + let mut span = lhs.span; + + loop { + let tok = self.peek()?; + if let Some(op) = tok.kind.postfix_unary_op() { + if op.precedence() < min_prec { + break + } + self.next()?; + span += tok.span; + lhs = E::UnaryOp(op, b(lhs)).span(span); + continue + }; + let Some(op) = tok.kind.binary_op() else { + break + }; + let (lp, rp) = op.precedence(); + if lp < min_prec { + break + } + + self.next()?; + let rhs = self.parse_precedence(rp)?; + span += rhs.span; + + lhs = E::BinaryOp(op, Box::new(lhs), Box::new(rhs)) + .span(span); + } + + Ok(lhs) + } + + fn parse_lambda(&mut self) -> Result> { + let tok = try_next!(self, T::Backslash | T::Colon); + match tok { + Some(Token { kind: T::Backslash, span, .. }) => { + let args = self.parse_ident_list()?; + expect!(self, T::Arrow); + let body = self.parse_lambda()?; + let body_span = body.span; + Ok(E::Lambda(args, b(body)).span(span + body_span)) + }, + Some(Token { kind: T::Colon, span, .. }) => { + let args = vec![lstr!("$")]; + let body = self.parse_lambda()?; + let body_span = body.span; + Ok(E::Lambda(args, b(body)).span(span + body_span)) + }, + None => self.parse_precedence(0), + _ => unreachable!("parse_lambda: guaranteed by try_next!"), + } + } + + fn parse_pipeline(&mut self) -> Result> { + let mut lhs = self.parse_lambda()?; + let mut span = lhs.span; + while try_next!(self, T::Pipe).is_some() { + let rhs = self.parse_lambda()?; + span += rhs.span; + lhs = E::Pipe(b(lhs), b(rhs)).span(span); + } + Ok(lhs) + } + + fn parse_not(&mut self) -> Result> { + if let Some(tok) = try_next!(self, T::Not) { + let expr = self.parse_not()?; + let span = tok.span + expr.span; + Ok(E::UnaryOp(UnaryOp::Not, b(expr)).span(span)) + } else { + self.parse_pipeline() + } + } + + fn parse_and(&mut self) -> Result> { + let mut lhs = self.parse_not()?; + let mut span = lhs.span; + while try_next!(self, T::And).is_some() { + let rhs = self.parse_not()?; + span += rhs.span; + lhs = E::And(b(lhs), b(rhs)).span(span); + } + Ok(lhs) + } + + fn parse_or(&mut self) -> Result> { + let mut lhs = self.parse_and()?; + let mut span = lhs.span; + while try_next!(self, T::Or).is_some() { + let rhs = self.parse_and()?; + span += rhs.span; + lhs = E::Or(b(lhs), b(rhs)).span(span); + } + Ok(lhs) + } + + fn parse_assign(&mut self) -> Result> { + if let Some(tok) = try_next!(self, T::Global | T::Var) { + self.next()?; + let name = expect!(self, T::Identifier); + expect!(self, T::Equal); + let val = self.parse_or()?; + let val_span = val.span; + let kind = if tok.kind == T::Global { E::AssignGlobal } else { E::AssignVar }; + return Ok(kind(name.content.into(), b(val)) + .span(tok.span + val_span)) + } + let lhs = self.parse_or()?; + let lhs_span = lhs.span; + if let Some(op) = self.peek()?.kind.assign_op() { + let Some(lval) = LValue::from_expr(lhs) else { + throw!(lhs_span, "invalid lvalue for assingment") + }; + self.next()?; + let rhs = self.parse_assign()?; + let rhs_span = rhs.span; + Ok(E::Assign(op, b(lval), b(rhs)).span(lhs_span + rhs_span)) + } else { + Ok(lhs) + } + } + + fn parse_expr(&mut self) -> Result> { + if let Some(tok) = try_next!(self, T::Return) { + let expr = self.parse_assign()?; + let span = expr.span; + Ok(E::Return(b(expr)).span(tok.span + span)) + } else { + self.parse_assign() + } + } + + fn parse_block(&mut self) -> Result> { + while try_next!(self, T::LineSeparator).is_some() {} + + let mut span = self.peek()?.span; + let mut exprs = Vec::new(); + while self.peek()?.kind.expr_first() { + let expr = self.parse_expr()?; + span += expr.span; + exprs.push(expr); + + if try_next!(self, T::LineSeparator).is_none() { + break + } + while try_next!(self, T::LineSeparator).is_some() {} + } + Ok(E::Block(exprs).span(span)) + } + + fn parse(mut self) -> Result> { + let block = self.parse_block()?; + expect!(self, T::Eof); + Ok(block) + } +} + +pub fn parse(src: &str) -> Result { + Parser::new(src).parse() +} diff --git a/talc-lang/src/parser/pos.rs b/talc-lang/src/parser/pos.rs new file mode 100644 index 0000000..d8b637f --- /dev/null +++ b/talc-lang/src/parser/pos.rs @@ -0,0 +1,100 @@ +use std::fmt; + +#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)] +pub struct Pos { + pub idx: u32, + pub line: u32, + pub col: u32, +} + +impl std::cmp::PartialOrd for Pos { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl std::cmp::Ord for Pos { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.idx.cmp(&other.idx) + } +} + +impl Pos { + pub const fn new() -> Pos { + Pos { idx: 0, line: 1, col: 1 } + } + + #[must_use] + pub fn advance(self, c: char) -> Pos { + let idx = self.idx.checked_add(c.len_utf8() as u32) + .expect("source file contains more than u32::MAX chars"); + if c == '\n' { + Pos { + idx, + line: self.line + 1, + col: 1, + } + } else { + Pos { + idx, + line: self.line, + col: self.col + 1, + } + } + } +} + +impl fmt::Display for Pos { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.line, self.col) + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Span { + pub start: Pos, + pub end: Pos, +} + +impl Span { + pub const fn new(start: Pos, end: Pos) -> Self { + if end.idx < start.idx { + Self { start: end, end: start } + } else { + Self { start, end } + } + } + + pub fn of<'a>(&self, s: &'a str) -> &'a str { + &s[(self.start.idx as usize)..(self.end.idx as usize)] + } +} + +impl std::ops::Add for Span { + type Output = Span; + + fn add(self, rhs: Self) -> Self::Output { + let start = self.start.min(rhs.start); + let end = self.end.max(rhs.end); + Self::new(start, end) + } +} + +impl std::ops::AddAssign for Span { + fn add_assign(&mut self, rhs: Self) { + self.start = self.start.min(rhs.start); + self.end = self.end.max(rhs.end); + } +} + +impl From<(Pos, Pos)> for Span { + fn from((start, end): (Pos, Pos)) -> Self { + Self { start, end } + } +} + +impl fmt::Display for Span { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", self.start, self.end) + } +} diff --git a/talc-lang/src/parser_util.rs b/talc-lang/src/parser/util.rs similarity index 65% rename from talc-lang/src/parser_util.rs rename to talc-lang/src/parser/util.rs index 8193db8..e03da70 100644 --- a/talc-lang/src/parser_util.rs +++ b/talc-lang/src/parser/util.rs @@ -1,17 +1,34 @@ +use core::fmt; use std::num::{ParseIntError, ParseFloatError}; use thiserror::Error; use crate::lstring::{LStr, LString}; +use super::Span; + #[derive(Clone, Debug, Error)] -pub enum ParseError { - #[error("{0}")] - StrEscape(#[from] StrEscapeError), - #[error("{0}")] - Integer(#[from] ParseIntError), - #[error("{0}")] - Float(#[from] ParseFloatError), +pub struct ParserError { + pub span: Span, + pub msg: String, +} + +impl fmt::Display for ParserError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} | {}", self.span, self.msg) + } +} + +pub trait SpanParserError { + type Output; + fn span_err(self, span: Span) -> Self::Output; +} + +impl SpanParserError for Result { + type Output = Result; + fn span_err(self, span: Span) -> Self::Output { + self.map_err(|e| ParserError { span, msg: e.to_string() }) + } } #[derive(Clone, Copy, Debug, Error)] @@ -22,15 +39,15 @@ pub enum StrEscapeError { HexEof, #[error("EOF in string escape \\u")] UnicodeEof, - #[error("Invalid string escape \\{0}")] + #[error("invalid string escape \\{0}")] Invalid(char), - #[error("Invalid hex digit '{0}' in string escape")] + #[error("invalid hex digit '{0}' in string escape")] InvalidHex(char), - #[error("Missing brace after string escape \\u")] + #[error("missing brace after string escape \\u")] MissingBrace, - #[error("Invalid codepoint in string escape: {0:x}")] + #[error("invalid codepoint in string escape: {0:x}")] InvalidCodepoint(u32), - #[error("Codepoint in string escape too large")] + #[error("codepoint in string escape too large")] CodepointTooLarge, } @@ -63,13 +80,13 @@ pub fn parse_str_escapes(src: &str) -> Result { let Some('{') = chars.next() else { return Err(StrEscapeError::MissingBrace) }; - let mut n = 0u32; + let mut n = 0_u32; loop { let Some(c) = chars.next() else { return Err(StrEscapeError::UnicodeEof) }; if c == '}' { break } - if n >= 0x1000_0000u32 { + if n > 0x10ffff { return Err(StrEscapeError::CodepointTooLarge) } n = n * 16 + c.to_digit(16).ok_or(StrEscapeError::InvalidHex(c))?; @@ -104,3 +121,14 @@ pub fn parse_int<'a, S: Into<&'a LStr>>(f: S, radix: u32) -> Result>(f: S) -> Result { + let f = f.into(); + match f.chars().nth(2) { + Some('x') => parse_int(&f[2..], 16), + Some('o') => parse_int(&f[2..], 8), + Some('s') => parse_int(&f[2..], 6), + Some('b') => parse_int(&f[2..], 2), + _ => parse_int(f, 10), + } +} diff --git a/talc-lang/src/value/mod.rs b/talc-lang/src/value/mod.rs index b567890..f84bf04 100644 --- a/talc-lang/src/value/mod.rs +++ b/talc-lang/src/value/mod.rs @@ -49,7 +49,7 @@ pub trait NativeValue: std::fmt::Debug + Any { fn get_type(&self) -> Symbol; fn as_any(&self) -> &dyn Any; - fn to_lstring(&self, w: &mut LString, _repr: bool) -> io::Result<()> { + fn to_lstring(&self, w: &mut LString, _repr: bool, _recur: &mut Vec<*const ()>) -> io::Result<()> { w.extend(b""); Ok(()) } @@ -81,7 +81,7 @@ impl Value { table.into() } - pub fn write_to_lstring(&self, w: &mut LString, repr: bool) -> io::Result<()> { + pub fn write_to_lstring(&self, w: &mut LString, repr: bool, recur: &mut Vec<*const ()>) -> io::Result<()> { use std::io::Write; match self { Self::Nil => write!(w, "nil"), @@ -112,62 +112,89 @@ impl Value { write!(w, "{:?}i", z.im()) }, Self::Cell(v) if repr => { + if recur.contains(&(v.as_ptr() as _)) { + return w.write_all(b"cell(...)") + } w.write_all(b"cell(")?; - v.borrow().write_to_lstring(w, repr)?; + recur.push(v.as_ptr() as _); + v.borrow().write_to_lstring(w, repr, recur)?; + recur.pop(); w.write_all(b")") }, - Self::Cell(v) => v.borrow().write_to_lstring(w, false), - + Self::Cell(v) => { + if recur.contains(&(v.as_ptr() as _)) { + return w.write_all(b"cell(...)") + } + recur.push(v.as_ptr() as _); + v.borrow().write_to_lstring(w, true, recur)?; + recur.pop(); + Ok(()) + }, Self::String(s) if repr => write!(w, "{s:?}"), Self::String(s) => w.write_all(s.as_bytes()), Self::List(l) => { + if recur.contains(&(l.as_ptr() as _)) { + return w.write_all(b"[...]") + } w.write_all(b"[")?; + recur.push(l.as_ptr() as _); for (i, item) in l.borrow().iter().enumerate() { if i != 0 { w.write_all(b", ")?; } - item.write_to_lstring(w, true)?; + item.write_to_lstring(w, true, recur)?; } + recur.pop(); w.write_all(b"]") }, Self::Table(t) => { + if recur.contains(&(t.as_ptr() as _)) { + return w.write_all(b"{...}") + } w.write_all(b"{ ")?; + recur.push(t.as_ptr() as _); for (i, (k, v)) in t.borrow().iter().enumerate() { if i != 0 { w.write_all(b", ")?; } - k.0.write_table_key_repr(w)?; + k.0.write_table_key_repr(w, recur)?; w.write_all(b" = ")?; - v.write_to_lstring(w, true)?; + v.write_to_lstring(w, true, recur)?; } + recur.pop(); w.write_all(b" }") }, - Self::Function(g) - => write!(w, "", Rc::as_ptr(g)), + Self::Function(g) => { + if g.state.is_empty() { + write!(w, "", g.attrs.arity, Rc::as_ptr(g)) + } else { + write!(w, "", g.attrs.arity, Rc::as_ptr(g)) + } + } Self::NativeFunc(g) - => write!(w, "", Rc::as_ptr(g)), - Self::Native(n) => n.to_lstring(w, repr), + => write!(w, "", g.attrs.arity, Rc::as_ptr(g)), + Self::Native(n) => n.to_lstring(w, repr, recur), } } - fn write_table_key_repr(&self, w: &mut LString) -> std::io::Result<()> { + fn write_table_key_repr(&self, w: &mut LString, recur: &mut Vec<*const ()>) -> std::io::Result<()> { match self { Self::Nil | Self::Bool(_) | Self::Int(_) | Self::String(_) - => self.write_to_lstring(w, true), + => self.write_to_lstring(w, true, recur), Self::Symbol(s) => { let name = s.name(); if name.is_identifier() { w.push_lstr(name); Ok(()) } else { - self.write_to_lstring(w, true) + self.write_to_lstring(w, true, recur) } }, _ => { w.push_byte(b'('); - self.write_to_lstring(w, true)?; + self.write_to_lstring(w, true, recur)?; w.push_byte(b')'); Ok(()) } @@ -179,14 +206,16 @@ impl Value { Cow::Borrowed(s) } else { let mut s = LString::new(); - self.write_to_lstring(&mut s, false).expect("write_to_lstring failed"); + let mut recur = Vec::new(); + self.write_to_lstring(&mut s, false, &mut recur).expect("write_to_lstring failed"); Cow::Owned(s) } } pub fn repr(&self) -> LString { let mut s = LString::new(); - self.write_to_lstring(&mut s, true).expect("write_to_lstring failed"); + let mut recur = Vec::new(); + self.write_to_lstring(&mut s, true, &mut recur).expect("write_to_lstring failed"); s } diff --git a/talc-lang/src/value/ops.rs b/talc-lang/src/value/ops.rs index aad92db..3c20c76 100644 --- a/talc-lang/src/value/ops.rs +++ b/talc-lang/src/value/ops.rs @@ -1,9 +1,11 @@ + use std::{cell::RefCell, cmp::Ordering, ops::{Add, BitAnd, BitOr, BitXor, Div, Mul, Neg, Shl, Shr, Sub}, rc::Rc}; use num_complex::{Complex64, ComplexFloat}; use num_rational::Rational64; +use num_traits::{CheckedAdd, CheckedDiv, CheckedMul, CheckedSub, Signed, Zero}; -use crate::{exception::{throw, Result}, lstring::LString, symbol::{SYM_END_ITERATION, SYM_TYPE_ERROR, SYM_VALUE_ERROR}, value::range::RangeType, Vm}; +use crate::{exception::{exception, throw, Result}, lstring::LString, symbol::{SYM_END_ITERATION, SYM_TYPE_ERROR, SYM_VALUE_ERROR}, value::range::RangeType, Vm}; use super::{function::{FuncAttrs, NativeFunc}, range::Range, HashValue, Value}; @@ -38,7 +40,7 @@ impl Value { } } -#[allow(clippy::cast_precision_loss)] +#[expect(clippy::cast_precision_loss)] pub fn promote(a: Value, b: Value) -> (Value, Value) { use Value as V; match (&a, &b) { @@ -64,8 +66,16 @@ impl Neg for Value { fn neg(self) -> Self::Output { use Value as V; match self { - V::Int(x) => Ok(V::Int(-x)), - V::Ratio(x) => Ok(V::Ratio(-x)), + V::Int(x) => if let Some(x) = x.checked_neg() { + Ok(V::Int(x)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when negating {self}") + }, + V::Ratio(x) => if let Some(x) = Rational64::ZERO.checked_sub(&x) { + Ok(V::Ratio(x)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when negating {self}") + } V::Float(x) => Ok(V::Float(-x)), V::Complex(x) => Ok(V::Complex(-x)), a => throw!(*SYM_TYPE_ERROR, "cannot negate {a:#}") @@ -77,9 +87,18 @@ impl Add for Value { type Output = Result; fn add(self, rhs: Value) -> Self::Output { use Value as V; - match promote(self, rhs) { - (V::Int(x), V::Int(y)) => Ok(V::Int(x + y)), - (V::Ratio(x), V::Ratio(y)) => Ok(V::Ratio(x + y)), + let (a, b) = promote(self, rhs); + match (&a, &b) { + (V::Int(x), V::Int(y)) => if let Some(v) = x.checked_add(y) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when adding {a} and {b}") + }, + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = x.checked_add(y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when adding {a} and {b}") + }, (V::Float(x), V::Float(y)) => Ok(V::Float(x + y)), (V::Complex(x), V::Complex(y)) => Ok(V::Complex(x + y)), (l, r) => throw!(*SYM_TYPE_ERROR, "cannot add {l:#} and {r:#}") @@ -91,9 +110,18 @@ impl Sub for Value { type Output = Result; fn sub(self, rhs: Value) -> Self::Output { use Value as V; - match promote(self, rhs) { - (V::Int(x), V::Int(y)) => Ok(V::Int(x - y)), - (V::Ratio(x), V::Ratio(y)) => Ok(V::Ratio(x - y)), + let (a, b) = promote(self, rhs); + match (&a, &b) { + (V::Int(x), V::Int(y)) => if let Some(v) = x.checked_sub(y) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when subtracting {a} and {b}") + }, + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = x.checked_sub(y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when subtracting {a} and {b}") + }, (V::Float(x), V::Float(y)) => Ok(V::Float(x - y)), (V::Complex(x), V::Complex(y)) => Ok(V::Complex(x - y)), (l, r) => throw!(*SYM_TYPE_ERROR, "cannot subtract {l:#} and {r:#}") @@ -105,9 +133,18 @@ impl Mul for Value { type Output = Result; fn mul(self, rhs: Value) -> Self::Output { use Value as V; - match promote(self, rhs) { - (V::Int(x), V::Int(y)) => Ok(V::Int(x * y)), - (V::Ratio(x), V::Ratio(y)) => Ok(V::Ratio(x * y)), + let (a, b) = promote(self, rhs); + match (&a, &b) { + (V::Int(x), V::Int(y)) => if let Some(v) = x.checked_mul(y) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when multiplying {a} and {b}") + }, + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = x.checked_mul(y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when multiplying {a} and {b}") + }, (V::Float(x), V::Float(y)) => Ok(V::Float(x * y)), (V::Complex(x), V::Complex(y)) => Ok(V::Complex(x * y)), (l, r) => throw!(*SYM_TYPE_ERROR, "cannot multiply {l:#} and {r:#}") @@ -119,12 +156,17 @@ impl Div for Value { type Output = Result; fn div(self, rhs: Value) -> Self::Output { use Value as V; - match promote(self, rhs) { + let (a, b) = promote(self, rhs); + match (&a, &b) { (V::Int(_), V::Int(0)) => throw!(*SYM_VALUE_ERROR, "integer division by 0"), - (V::Int(x), V::Int(y)) => Ok(V::Ratio(Rational64::new(x, y))), - (V::Ratio(_), V::Ratio(r)) if *r.numer() == 0 && *r.denom() != 0 + (V::Int(x), V::Int(y)) => Ok(Value::Ratio((*x,*y).into())), + (V::Ratio(_), V::Ratio(r)) if r.is_zero() => throw!(*SYM_VALUE_ERROR, "rational division by 0"), - (V::Ratio(x), V::Ratio(y)) => Ok(V::Ratio(x / y)), + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = x.checked_div(y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when dividing {a} and {b}") + }, (V::Float(x), V::Float(y)) => Ok(V::Float(x / y)), (V::Complex(x), V::Complex(y)) => Ok(V::Complex(x / y)), (l, r) => throw!(*SYM_TYPE_ERROR, "cannot divide {l:#} and {r:#}") @@ -132,44 +174,63 @@ impl Div for Value { } } -#[allow(clippy::cast_sign_loss)] #[inline] -fn ipow(n: i64, p: u64) -> Result { +fn ipow(n: i64, p: u64) -> Option { match (n, p) { - (0, 0) => throw!(*SYM_VALUE_ERROR, "integer 0 raised to power 0"), - (0, _) => Ok(0), - (_, 0) => Ok(1), - (n, p) if p > u32::MAX as u64 => { - let (lo, hi) = (p as u32, (p >> 32) as u32); - let (a, b) = (n.pow(lo), n.pow(hi)); - Ok(a * b.pow(0x1_0000).pow(0x1_0000)) - } - (n, p) => Ok(n.pow(p as u32)), + (0, 0) => None, + (0, _) => Some(0), + (_, 0) => Some(1), + (1, _) => Some(1), + (-1, p) => (-1_i64).checked_pow((p % 2) as u32), + (_, p) if p > u32::MAX as u64 => None, + (n, p) => n.checked_pow(p as u32), } } -#[allow(clippy::cast_sign_loss)] +#[expect(clippy::cast_sign_loss)] #[inline] -fn rpow(n: i64, d: i64, p: i64) -> Result<(i64, i64)> { - Ok(match p { - 0.. => (ipow(n, p as u64)?, ipow(d, p as u64)?), - _ => (ipow(d, (-p) as u64)?, ipow(n, (-p) as u64)?), - }) +fn rpow(n: i64, d: i64, p: i64) -> Option<(i64, i64)> { + match p { + i64::MIN => match (n, d) { + (0, _) => Some((0, 1)), + (1, 1) => Some((1, 1)), + (-1, 1) => Some((-1, 1)), + _ => None, + } + 0.. => Some((ipow(n, p as u64)?, ipow(d, p as u64)?)), + _ => Some((ipow(d, (-p) as u64)?, ipow(n, (-p) as u64)?)), + } +} + +fn ratio_checked_rem_euclid(r1: &Rational64, r2: &Rational64) -> Option { + todo!() +} + +fn ratio_checked_div_euclid(r1: &Rational64, r2: &Rational64) -> Option { + todo!() } impl Value { pub fn modulo(self, rhs: Value) -> Result { use Value as V; - match promote(self, rhs) { + let (a, b) = promote(self, rhs); + match (&a, &b) { (V::Int(_), V::Int(0)) => throw!(*SYM_VALUE_ERROR, "integer modulo by 0"), - (V::Int(x), V::Int(y)) => Ok(V::Int(x.rem_euclid(y))), - (V::Ratio(_), V::Ratio(y)) if *y.numer() == 0 && *y.denom() != 0 + (V::Int(x), V::Int(y)) => if let Some(v) = x.checked_rem_euclid(*y) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when calculating {a} modulo {b}") + }, + + (V::Ratio(_), V::Ratio(y)) if y.is_zero() => throw!(*SYM_VALUE_ERROR, "rational modulo by 0"), - (V::Ratio(x), V::Ratio(y)) => { - let n = (x / y).floor(); - Ok(Value::Ratio(x - n * y)) + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = ratio_checked_rem_euclid(x, y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when calculating {a} modulo {b}") } - (V::Float(x), V::Float(y)) => Ok(V::Float(x.rem_euclid(y))), + + (V::Float(x), V::Float(y)) => Ok(V::Float(x.rem_euclid(*y))), (V::Complex(x), V::Complex(y)) => { let n = x / y; let n = Complex64::new(n.re().floor(), n.im().floor()); @@ -181,13 +242,24 @@ impl Value { pub fn int_div(self, rhs: Value) -> Result { use Value as V; - match promote(self, rhs) { - (V::Int(_), V::Int(0)) => throw!(*SYM_VALUE_ERROR, "integer division by 0"), - (V::Int(x), V::Int(y)) => Ok(V::Int(x.div_euclid(y))), - (V::Ratio(_), V::Ratio(r)) if *r.numer() == 0 && *r.denom() != 0 - => throw!(*SYM_VALUE_ERROR, "rational division by 0"), - (V::Ratio(x), V::Ratio(y)) => Ok(V::Ratio((x / y).floor())), - (V::Float(x), V::Float(y)) => Ok(V::Float(x.div_euclid(y))), + let (a, b) = promote(self, rhs); + match (&a, &b) { + (V::Int(_), V::Int(0)) => throw!(*SYM_VALUE_ERROR, "integer divsion by 0"), + (V::Int(x), V::Int(y)) => if let Some(v) = x.checked_div_euclid(*y) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when integer dividing {a} and {b}") + }, + + (V::Ratio(_), V::Ratio(y)) if y.is_zero() + => throw!(*SYM_VALUE_ERROR, "integer division by 0"), + (V::Ratio(x), V::Ratio(y)) => if let Some(v) = ratio_checked_div_euclid(x, y) { + Ok(V::Ratio(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when integer dividing {a} and {b}") + }, + + (V::Float(x), V::Float(y)) => Ok(V::Float(x.div_euclid(*y))), (V::Complex(x), V::Complex(y)) => { let n = x / y; Ok(V::from(Complex64::new(n.re().floor(), n.im().floor()))) @@ -199,17 +271,34 @@ impl Value { pub fn pow(self, rhs: Value) -> Result { use Value as V; if let (V::Ratio(x), V::Int(y)) = (&self, &rhs) { - return Ok(V::Ratio(rpow(*(*x).numer(), *(*x).denom(), *y)?.into())); + if x.is_zero() && *y == 0 { + throw!(*SYM_VALUE_ERROR, "rational zero to integer zero power") + } + let Some(v) = rpow(*(*x).numer(), *(*x).denom(), *y) else { + throw!(*SYM_VALUE_ERROR, "overflow when raising {self} to the power {rhs}") + }; + return Ok(V::Ratio(v.into())) } - match promote(self, rhs) { - (V::Int(x), V::Int(y)) if y >= 0 => Ok(V::Int(ipow(x, y as u64)?)), - (V::Int(x), V::Int(y)) => Ok(V::Ratio(rpow(x, 1, y)?.into())), - (V::Float(x), V::Float(y)) - => Ok(V::Float(x.powf(y))), + let (a, b) = promote(self, rhs); + match (&a, &b) { + (V::Int(0), V::Int(0)) + => throw!(*SYM_VALUE_ERROR, "integer zero to integer zero power"), + (V::Int(x), V::Int(y @ 0..)) => if let Some(v) = ipow(*x, *y as u64) { + Ok(V::Int(v)) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when raising {a} to the power {b}") + }, + (V::Int(x), V::Int(y)) => if let Some(v) = rpow(*x, 1, *y) { + Ok(V::Ratio(v.into())) + } else { + throw!(*SYM_VALUE_ERROR, "overflow when raising {a} to the power {b}") + }, (V::Ratio(x), V::Ratio(y)) => Ok(V::Float(x.to_f64().powf(y.to_f64()))), + (V::Float(x), V::Float(y)) + => Ok(V::Float(x.powf(*y))), (V::Complex(x), V::Complex(y)) - => Ok(V::Complex(x.powc(y))), + => Ok(V::Complex(x.powc(*y))), (l, r) => throw!(*SYM_TYPE_ERROR, "cannot exponentiate {l:#} and {r:#}") } } @@ -276,7 +365,7 @@ impl BitOr for Value { } impl PartialEq for Value { - #[allow(clippy::cast_precision_loss)] + #[expect(clippy::cast_precision_loss)] fn eq(&self, other: &Self) -> bool { use Value as V; use super::range::RangeType as Rty; @@ -319,7 +408,7 @@ impl PartialEq for Value { } impl PartialOrd for Value { - #[allow(clippy::cast_precision_loss)] + #[expect(clippy::cast_precision_loss)] fn partial_cmp(&self, other: &Self) -> Option { use Value as V; match (self, other) { diff --git a/talc-lang/src/vm.rs b/talc-lang/src/vm.rs index b6f9d2a..38df830 100644 --- a/talc-lang/src/vm.rs +++ b/talc-lang/src/vm.rs @@ -1,6 +1,6 @@ use std::{cmp::Ordering, collections::HashMap, rc::Rc, sync::{atomic::AtomicBool, Arc}}; -use crate::{ast::{BinaryOp, UnaryOp}, chunk::Instruction, exception::{throw, Exception, Result}, lstring::LStr, symbol::{Symbol, SYM_CALL_STACK_OVERFLOW, SYM_INTERRUPTED, SYM_NAME_ERROR, SYM_TYPE_ERROR}, value::{function::{FuncAttrs, Function, NativeFunc}, Value}}; +use crate::{parser::ast::{BinaryOp, UnaryOp}, chunk::Instruction, exception::{throw, Exception, Result}, lstring::LStr, symbol::{Symbol, SYM_CALL_STACK_OVERFLOW, SYM_INTERRUPTED, SYM_NAME_ERROR, SYM_TYPE_ERROR}, value::{function::{FuncAttrs, Function, NativeFunc}, Value}}; struct TryFrame { idx: usize, stack_len: usize } @@ -78,25 +78,25 @@ fn get_call_outcome(args: Vec) -> Result { throw!(*SYM_TYPE_ERROR, "cannot call non-function {f:#}") }; let argc = args.len() - 1; - if argc == attrs.arity { - Ok(CallOutcome::Call(args)) - } else if argc > attrs.arity { - throw!(*SYM_TYPE_ERROR, "too many arguments for function") - } else { - let remaining = attrs.arity - argc; - let f = f.clone(); - let nf = move |vm: &mut Vm, inner_args: Vec| { - let mut ia = inner_args.into_iter(); - ia.next(); - let args: Vec = args.clone().into_iter().chain(ia).collect(); - vm.call_value(f.clone(), args) - }; - let nf = NativeFunc { - attrs: FuncAttrs { arity: remaining }, - func: Box::new(nf), - }; - Ok(CallOutcome::Partial(nf.into())) - } + match argc.cmp(&attrs.arity) { + Ordering::Equal => Ok(CallOutcome::Call(args)), + Ordering::Greater => throw!(*SYM_TYPE_ERROR, "too many arguments for function"), + Ordering::Less => { + let remaining = attrs.arity - argc; + let f = f.clone(); + let nf = move |vm: &mut Vm, inner_args: Vec| { + let mut ia = inner_args.into_iter(); + ia.next(); + let args: Vec = args.clone().into_iter().chain(ia).collect(); + vm.call_value(f.clone(), args) + }; + let nf = NativeFunc { + attrs: FuncAttrs { arity: remaining }, + func: Box::new(nf), + }; + Ok(CallOutcome::Partial(nf.into())) + } + } } impl Vm { @@ -261,14 +261,16 @@ impl Vm { panic!("attempt to build closure from non-closure constant") }; let mut f = f.as_ref().clone(); - let mut args = Vec::with_capacity(f.state.len()); - for _ in 0..f.state.len() { - let Value::Cell(c) = self.pop() else { - panic!("attempt to build closure from non-cell local"); - }; - args.push(c); - } - f.state = args.into_boxed_slice(); + + let captured: Vec<_> = self.pop_n(f.state.len()).into_iter() + .map(|v| { + let Value::Cell(v) = v else { + panic!("attempt to build closure from non-cell local"); + }; + v + }).collect(); + + f.state = captured.into_boxed_slice(); self.push(f.into()); }, I::LoadUpvalue(n) => { @@ -279,6 +281,10 @@ impl Vm { let v = frame.func.state[usize::from(n)].clone(); *v.borrow_mut() = self.pop(); }, + I::ContinueUpvalue(n) => { + let v = frame.func.state[usize::from(n)].clone(); + self.push(Value::Cell(v)); + }, I::LoadClosedLocal(n) => { let Value::Cell(c) = &frame.locals[usize::from(n)] else { panic!("attempt to load from closed non-cell local"); diff --git a/talc-macros/src/lib.rs b/talc-macros/src/lib.rs index 7e1a1f6..3d701d7 100644 --- a/talc-macros/src/lib.rs +++ b/talc-macros/src/lib.rs @@ -1,48 +1,8 @@ use proc_macro::TokenStream; -use syn::{parse::Parse, parse_macro_input, ItemFn, LitInt}; -use quote::quote; -struct NativeFuncArgs { - arity: LitInt, -} - -impl Parse for NativeFuncArgs { - fn parse(input: syn::parse::ParseStream) -> syn::Result { - let arity = input.parse()?; - Ok(Self { arity }) - } -} +mod native_func; #[proc_macro_attribute] pub fn native_func(input: TokenStream, annotated_item: TokenStream) -> TokenStream { - let Ok(itemfn) = syn::parse::(annotated_item.clone()) else { - return annotated_item - }; - let args: NativeFuncArgs = parse_macro_input!(input as NativeFuncArgs); - - let visibility = itemfn.vis; - let block = itemfn.block; - let name = itemfn.sig.ident; - let inputs = itemfn.sig.inputs; - let output = itemfn.sig.output; - let arity = args.arity; - - assert!(itemfn.sig.constness.is_none(), "item must not be const"); - assert!(itemfn.sig.asyncness.is_none(), "item must not be async"); - assert!(itemfn.sig.unsafety.is_none(), "item must not be unsafe"); - assert!(itemfn.sig.abi.is_none(), "item must not contain an ABI specifier"); - assert!(itemfn.sig.variadic.is_none(), "item must not be variadic"); - - let expanded = quote! { - #visibility fn #name() -> ::talc_lang::value::function::NativeFunc { - ::talc_lang::value::function::NativeFunc { - attrs: ::talc_lang::value::function::FuncAttrs{ - arity: #arity, - }, - func: Box::new(|#inputs| #output #block) - } - } - }; - - TokenStream::from(expanded) + native_func::native_func(input, annotated_item) } diff --git a/talc-macros/src/native_func.rs b/talc-macros/src/native_func.rs new file mode 100644 index 0000000..186b31c --- /dev/null +++ b/talc-macros/src/native_func.rs @@ -0,0 +1,47 @@ +use proc_macro::TokenStream; +use syn::{parse::Parse, parse_macro_input, ItemFn, LitInt}; +use quote::quote; + +struct NativeFuncArgs { + arity: LitInt, +} + +impl Parse for NativeFuncArgs { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let arity = input.parse()?; + Ok(Self { arity }) + } +} + +pub fn native_func(input: TokenStream, annotated_item: TokenStream) -> TokenStream { + let Ok(itemfn) = syn::parse::(annotated_item.clone()) else { + return annotated_item + }; + let args: NativeFuncArgs = parse_macro_input!(input as NativeFuncArgs); + + let visibility = itemfn.vis; + let block = itemfn.block; + let name = itemfn.sig.ident; + let inputs = itemfn.sig.inputs; + let output = itemfn.sig.output; + let arity = args.arity; + + assert!(itemfn.sig.constness.is_none(), "item must not be const"); + assert!(itemfn.sig.asyncness.is_none(), "item must not be async"); + assert!(itemfn.sig.unsafety.is_none(), "item must not be unsafe"); + assert!(itemfn.sig.abi.is_none(), "item must not contain an ABI specifier"); + assert!(itemfn.sig.variadic.is_none(), "item must not be variadic"); + + let expanded = quote! { + #visibility fn #name() -> ::talc_lang::value::function::NativeFunc { + ::talc_lang::value::function::NativeFunc { + attrs: ::talc_lang::value::function::FuncAttrs{ + arity: #arity, + }, + func: Box::new(|#inputs| #output #block) + } + } + }; + + TokenStream::from(expanded) +} diff --git a/talc-std/src/file.rs b/talc-std/src/file.rs index 9b5968a..8257247 100644 --- a/talc-std/src/file.rs +++ b/talc-std/src/file.rs @@ -154,7 +154,7 @@ impl From for ValueFile { impl NativeValue for ValueFile { fn get_type(&self) -> Symbol { *SYM_STD_FILE } fn as_any(&self) -> &dyn std::any::Any { self } - fn to_lstring(&self, w: &mut LString, _repr: bool) -> std::io::Result<()> { + fn to_lstring(&self, w: &mut LString, _repr: bool, _recur: &mut Vec<*const ()>) -> std::io::Result<()> { w.push_str(""); Ok(()) } @@ -172,7 +172,7 @@ impl From for ValueProcess { impl NativeValue for ValueProcess { fn get_type(&self) -> Symbol { *SYM_STD_PROCESS } fn as_any(&self) -> &dyn std::any::Any { self } - fn to_lstring(&self, w: &mut LString, _repr: bool) -> std::io::Result<()> { + fn to_lstring(&self, w: &mut LString, _repr: bool, _recur: &mut Vec<*const ()>) -> std::io::Result<()> { let id = self.0.borrow().id(); write!(w, "") } diff --git a/talc-std/src/iter.rs b/talc-std/src/iter.rs index 3ac685b..a899df1 100644 --- a/talc-std/src/iter.rs +++ b/talc-std/src/iter.rs @@ -914,6 +914,8 @@ pub fn pstdev(vm: &mut Vm, args: Vec) -> Result { #[derive(PartialEq, PartialOrd)] struct OrdValue(Value); impl std::cmp::Eq for OrdValue {} + +#[allow(clippy::derive_ord_xor_partial_ord)] impl std::cmp::Ord for OrdValue { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.partial_cmp(other).unwrap_or(std::cmp::Ordering::Less) diff --git a/talc-std/src/lib.rs b/talc-std/src/lib.rs index 0893a9e..8708d71 100644 --- a/talc-std/src/lib.rs +++ b/talc-std/src/lib.rs @@ -1,3 +1,5 @@ +#![allow(clippy::mutable_key_type)] + use talc_lang::{symbol::{symbol, Symbol}, Vm}; pub mod value; diff --git a/talc-std/src/num.rs b/talc-std/src/num.rs index dbff4e9..1a93655 100644 --- a/talc-std/src/num.rs +++ b/talc-std/src/num.rs @@ -1,7 +1,7 @@ use std::cmp::Ordering; use lazy_static::lazy_static; -use talc_lang::{exception::Result, lstring::LString, parse_int, symbol::{Symbol, SYM_TYPE_ERROR, SYM_VALUE_ERROR}, throw, value::{ops::RatioExt, Complex64, Value}, vmcalliter, Vm}; +use talc_lang::{exception::Result, lstring::LString, parser::parse_int, symbol::{Symbol, SYM_TYPE_ERROR, SYM_VALUE_ERROR}, throw, value::{ops::RatioExt, Complex64, Value}, vmcalliter, Vm}; use talc_macros::native_func; use crate::unpack_args; diff --git a/talc-std/src/regex.rs b/talc-std/src/regex.rs index 84b068e..d5deff9 100644 --- a/talc-std/src/regex.rs +++ b/talc-std/src/regex.rs @@ -28,7 +28,7 @@ impl From for Regex { impl NativeValue for ValueRegex { fn get_type(&self) -> Symbol { *SYM_STD_REGEX } fn as_any(&self) -> &dyn std::any::Any { self } - fn to_lstring(&self, w: &mut LString, repr: bool) -> std::io::Result<()> { + fn to_lstring(&self, w: &mut LString, repr: bool, _recur: &mut Vec<*const ()>) -> std::io::Result<()> { use std::io::Write; if repr { write!(w, "/{}/", self.0) diff --git a/talc-std/src/value.rs b/talc-std/src/value.rs index eaddb3c..b8a614e 100644 --- a/talc-std/src/value.rs +++ b/talc-std/src/value.rs @@ -1,6 +1,6 @@ use std::{cell::RefCell, collections::HashMap, rc::Rc}; -use talc_lang::{exception::{exception, Result}, lformat, parse_float, parse_int, symbol::{SYM_TYPE_ERROR, SYM_VALUE_ERROR}, throw, value::{ops::RatioExt, HashValue, Rational64, Value}, Vm}; +use talc_lang::{exception::{exception, Result}, lformat, parser::{parse_float, parse_int}, symbol::{SYM_TYPE_ERROR, SYM_VALUE_ERROR}, throw, value::{ops::RatioExt, HashValue, Rational64, Value}, Vm}; use talc_macros::native_func; use crate::unpack_args; @@ -17,7 +17,9 @@ pub fn load(vm: &mut Vm) { vm.set_global_name("cell", cell().into()); vm.set_global_name("uncell", uncell().into()); vm.set_global_name("cell_replace", cell_replace().into()); - vm.set_global_name("cell_take", cell_replace().into()); + vm.set_global_name("cell_take", cell_take().into()); + + vm.set_global_name("closure_state", closure_state().into()); } // @@ -181,3 +183,13 @@ pub fn cell_take(_: &mut Vm, args: Vec) -> Result { Ok(cell.replace(Value::Nil)) } +#[native_func(1)] +pub fn closure_state(_: &mut Vm, args: Vec) -> Result { + let [_, func] = unpack_args!(args); + let Value::Function(func) = func else { + throw!(*SYM_TYPE_ERROR, "closure_state: value is not a function") + }; + let l: Vec = func.state.iter().map(|v| Value::Cell(v.clone())).collect(); + Ok(l.into()) +} +