diff --git a/docs/grammar.md b/docs/grammar.md new file mode 100644 index 0000000..23ef737 --- /dev/null +++ b/docs/grammar.md @@ -0,0 +1,50 @@ +# Boolia Grammar + +This document captures the lexical rules, operator precedence, and expression grammar used by the Boolia parser. + +## Lexical grammar + +- **Whitespace**: spaces, tabs, and newlines are ignored between tokens. +- **Identifiers**: `[A-Za-z_][A-Za-z0-9_]*` (case-sensitive). Bare identifiers may be treated as tags when they resolve to `None` but exist in the tag set. +- **Numbers**: decimal integers or floats: `\d+`, `\d+\.\d*`, or `\d*\.\d+` (no exponents). +- **Strings**: single- or double-quoted with Python `unicode_escape` semantics inside the quotes; escapes like `\n`, `\"`, and `\\` are allowed. +- **Keywords** (case-insensitive): `true`, `false`, `null`, `none`, `not`. Operator keywords: `and`, `or`, `in` (plus any custom operators you register with keywords). +- **Symbols**: `(`, `)`, `,`, `.`, and operator symbols (`==`, `!=`, `>`, `>=`, `<`, `<=`, plus any custom symbols you register). +- Any unrecognized character causes a `SyntaxError` at tokenization time. + +## Operator precedence (higher binds tighter) + +1. Comparisons and membership: `==`, `!=`, `>`, `>=`, `<`, `<=`, `in`, and any custom operators registered at precedence 40 or higher. +2. Unary `not` (binding power 30). +3. Logical `and` (20). +4. Logical `or` (10). + +Operators of the same precedence group are left-associative. + +## Expression grammar (EBNF) + +``` +expression ::= or_expr +or_expr ::= and_expr { "or" and_expr } +and_expr ::= not_expr { "and" not_expr } +not_expr ::= [ "not" ] compare_expr +compare_expr ::= primary { binary_op primary } (* binary_op tokens come from OperatorRegistry; precedence decides grouping *) +primary ::= literal + | identifier call_args + | dotted_name + | "(" expression ")" + +dotted_name ::= identifier { "." identifier } +call_args ::= "(" [ expression { "," expression } ] ")" +literal ::= number | string | "true" | "false" | "null" | "none" +identifier ::= /[A-Za-z_][A-Za-z0-9_]*/ +number ::= int | float (* decimal only; no exponent *) +string ::= single_quoted | double_quoted (* with Python-style escapes *) +``` + +### Semantics notes + +- **Name resolution**: dotted names walk the provided context using mapping keys, then attributes; zero-argument bound methods are invoked automatically. A missing path follows the chosen policy (`raise`, `none`, `false`, or `default` value). +- **Tags**: if a bare identifier resolves to `None` and its name is present in the tag set, it evaluates to `True`. +- **Functions**: a bare identifier followed by `(` is a function call looked up in the active `FunctionRegistry`; unknown names raise `NameError`. +- **Custom operators**: you can register new binary operators with custom precedence, keywords, and/or symbols via `OperatorRegistry`. diff --git a/syntaxes/boolia.tmLanguage.json b/syntaxes/boolia.tmLanguage.json new file mode 100644 index 0000000..a0a8688 --- /dev/null +++ b/syntaxes/boolia.tmLanguage.json @@ -0,0 +1,92 @@ +{ + "name": "Boolia", + "scopeName": "source.boolia", + "fileTypes": ["boolia", "bool"], + "patterns": [ + { "include": "#strings" }, + { "include": "#numbers" }, + { "include": "#keywords" }, + { "include": "#operators" }, + { "include": "#functions" }, + { "include": "#dotted-names" }, + { "include": "#identifiers" } + ], + "repository": { + "strings": { + "patterns": [ + { + "name": "string.quoted.single.boolia", + "match": "'(?:[^\\\\']|\\\\.)*'" + }, + { + "name": "string.quoted.double.boolia", + "match": "\"(?:[^\\\\\"]|\\\\.)*\"" + } + ] + }, + "numbers": { + "patterns": [ + { + "name": "constant.numeric.boolia", + "match": "\\b(?:\\d+\\.\\d*|\\d*\\.\\d+|\\d+)\\b" + } + ] + }, + "keywords": { + "patterns": [ + { + "name": "keyword.control.boolia", + "match": "(?i:\\b(?:and|or|not|in)\\b)" + }, + { + "name": "constant.language.boolia", + "match": "(?i:\\b(?:true|false|null|none)\\b)" + } + ] + }, + "operators": { + "patterns": [ + { + "name": "keyword.operator.comparison.boolia", + "match": "==|!=|>=|<=|>|<" + }, + { + "name": "punctuation.section.parens.boolia", + "match": "[()]" + }, + { + "name": "punctuation.separator.comma.boolia", + "match": "," + }, + { + "name": "punctuation.accessor.dot.boolia", + "match": "\\." + } + ] + }, + "functions": { + "patterns": [ + { + "name": "support.function.boolia", + "match": "\\b([A-Za-z_]\\w*)(?=\\s*\\()" + } + ] + }, + "dotted-names": { + "patterns": [ + { + "name": "variable.other.member.boolia", + "match": "\\b[A-Za-z_]\\w*(?:\\.[A-Za-z_]\\w*)+\\b" + } + ] + }, + "identifiers": { + "patterns": [ + { + "name": "variable.other.boolia", + "match": "\\b[A-Za-z_]\\w*\\b" + } + ] + } + } +}