diff --git a/src/main/rascal/Syntax.rsc b/src/main/rascal/Syntax.rsc index 5479a42..c6fbdc7 100644 --- a/src/main/rascal/Syntax.rsc +++ b/src/main/rascal/Syntax.rsc @@ -4,23 +4,51 @@ extend lang::std::Whitespace; extend lang::std::Id; extend lang::std::ASCII; +/* + The grammar of MiniPas +*/ + +/* + A program consists of an id, followed by declarations followed by its body +*/ start syntax Program = 'program' Id ";" ConstDecl* VarDecl* SubProgDecl* CompoundStatement "." ; +/* + A constant's type is implicit by the value it's assigned +*/ + syntax ConstDecl = 'const' Id "=" Number ";" ; +/* + You can declare multiple variables of the same type, in a simple declaration +*/ syntax VarDecl = 'var' IdList ":" TypeSpec ";" ; syntax IdList = Id ("," Id)* ; +/* + Observe that there is no boolean type. Guards for if-else blocks and while-blocks have a special + non-terminal that defines boolean expressions, essentially. Thus, normal expressions (which are (potentially array) of basic types) + don't need to be Boolean. +*/ syntax TypeSpec = BasicType | 'array' "[" Number ".." Number "]" 'of' BasicType ; syntax BasicType = 'integer' | 'real' ; +/* + There are two key differences between a function and a procedure. + 1. A function returns a value, a procedure returns nothing. + 2. A procedure can have no parameters. + + Observe that you can have local variables. + + Also note that we use SubProg to refer to both functions / procedures. +*/ syntax SubProgDecl = 'function' Id Parameters ":" BasicType ";" VarDecl* CompoundStatement ";" | 'procedure' Id Parameters? ";" VarDecl* CompoundStatement ";" ; @@ -30,6 +58,14 @@ syntax Parameters = "(" ParameterList ")" syntax ParameterList = ParamList | ParameterList ";" ParamList ; +/* + Observe you can declare multiple parametrs in a single delcaration. + Also note the 'var'. 'var' parameters are passed by reference, whereas non 'var' parameters + are passed by value. + + Furthermore, you can have (reference) array parameters. Array argumetns must have the same + size +*/ syntax ParamList = 'var'? IdList ":" TypeSpec ; @@ -37,8 +73,16 @@ syntax CompoundStatement = 'begin' StatementList? 'end'; syntax StatementList = Statement | StatementList ";" Statement; +/* + We have: + 1. Assignment statements + 2. Nested blocks + 3. A no-op + 4. if-else blocks + 5. white blocks +*/ syntax Statement = Lhs ":=" Expr - | ProcedureCall + | SubProgCall | CompoundStatement | 'skip' | 'if' Guard 'then' Statement 'else' Statement @@ -54,18 +98,25 @@ syntax Guard = "(" Guard ")" ) ; +/* + These define expressions that can be assigned to +*/ syntax LhsList = Lhs ("," Lhs)* ; syntax Lhs = Id ("[" Expr "]")? ; -syntax ProcedureCall = 'readln'"(" LhsList ")" +syntax SubProgCall = 'readln'"(" LhsList ")" | 'writeln'"(" ExprList ")" > Id \ 'writeln' \ 'readln' \ 'skip' ("(" ExprList ")")? ; syntax ExprList = Expr ("," Expr)* ; +/* + A fairly normal set of expressions, perhaps with the exception of Id [Expr .. Expr]. + This is syntax for array slicing. +*/ syntax Expr = "(" Expr ")" | Id | Id "[" Expr (".." Expr)?"]" @@ -84,15 +135,51 @@ syntax Expr = "(" Expr ")" ) ; +/* + Basic lexical constructs. +*/ lexical Relop = "\<"|"\<="|"\>"|"\>="|"="|"\<\>"; lexical Number = Digit+ | Digit* "." Digit+ ; +/* + A comment is anything between a "{" and a "}" +*/ lexical Comment = "{" ![}]* "}"; - +/* + Layout essentially captures all characters between our main syntactic objects. You can read this as follows: + + Between any two syntactic elemnets, eat as much whitespace or comments as you can. + + Now, interestingly, if you remove everything after !>>, this makes parsing a program ambiguous. This is because + what happens is that Standard is silently placed between all symbols in our production rule and so + our Program Non-Terminal is actually as follows: + + start syntax Program = Standard 'program' Standard Id Standard ";" + Standard ConstDecl* Standard VarDecl* + Standard SubProgDecl* Standard CompoundStatement "." + + In this way, any white space or comments get eaten up between our actual definitions. However, what happens if there are no const decls + or var decls, let's say? Then we have at some point ... Standard Standard Standard SubProgDecl* ... + + And since Standard can be the empty string, this means that if there is a comment between the program id and the first function, Rascal + doesn't know _which_ Standard to use when reducing the comment. If there are multiple comments, it can go into any of the Standards. + + To fix this, we essentially force the first Standard to eat _all_ of the comments / whitespace. To do this, we say that Standard cannot + have whitespace or our comment deliminators ("{", "}"), follow it, which is what !>> [...] does. + + A !>> [...] says that A cannot be matched if there's something in ... after it, so if ... can be in A, it will be placed in there. + + Note: The UTF characters in the character class define whitespace characters (I believe). I could be wrong, I stole this from + std::lang:Layout ;) + + Idea from: https://stackoverflow.com/a/75458702 + + This is also in the documentation. See: https://www.rascal-mpl.org/docs/Rascal/Declarations/SyntaxDefinition/Disambiguation/Follow/ +*/ layout Standard - = WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 \ \t\n\r] !>> [{}]; + = WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 {}]; lexical WhitespaceOrComment = whitespace: Whitespace