Commented Syntax.rsc

10 months ago · bbb3b5f55a
parent 89e60b4136
commit bbb3b5f55a
1 changed files with 91 additions and 4 deletions
--- a/src/main/rascal/Syntax.rsc
+++ b/src/main/rascal/Syntax.rsc
@ -4,23 +4,51 @@ extend lang::std::Whitespace;
 extend lang::std::Id;
 extend lang::std::ASCII;

+/*
+  The grammar of MiniPas
+*/
+
+/*
+  A program consists of an id, followed by declarations followed by its body
+*/
 start syntax Program      = 'program' Id ";" ConstDecl* VarDecl* SubProgDecl* CompoundStatement "."
                          ;

+/*
+  A constant's type is implicit by the value it's assigned
+*/
+
 syntax ConstDecl          = 'const' Id "=" Number ";"
                          ;

+/*
+  You can declare multiple variables of the same type, in a simple declaration
+*/
 syntax VarDecl            =  'var' IdList ":" TypeSpec ";"
                          ;

 syntax IdList             = Id ("," Id)*
                          ;

+/*
+  Observe that there is no boolean type. Guards for if-else blocks and while-blocks have a special 
+  non-terminal that defines boolean expressions, essentially. Thus, normal expressions (which are (potentially array) of basic types)
+  don't need to be Boolean.
+*/
 syntax TypeSpec           = BasicType | 'array' "[" Number ".." Number "]" 'of' BasicType
                          ;
 syntax BasicType          = 'integer' | 'real'
                          ;

+/*
+  There are two key differences between a function and a procedure.
+  1. A function returns a value, a procedure returns nothing. 
+  2. A procedure can have no parameters.
+
+  Observe that you can have local variables.
+
+  Also note that we use SubProg to refer to both functions / procedures.
+*/
 syntax SubProgDecl        = 'function' Id Parameters ":" BasicType ";" VarDecl* CompoundStatement ";"
                          | 'procedure' Id Parameters? ";" VarDecl* CompoundStatement ";"
                          ;
@ -30,6 +58,14 @@ syntax Parameters         = "(" ParameterList ")"
 syntax ParameterList      = ParamList
                          | ParameterList ";" ParamList
                          ;
+/*
+  Observe you can declare multiple parametrs in a single delcaration.
+  Also note the 'var'. 'var' parameters are passed by reference, whereas non 'var' parameters
+  are passed by value.
+
+  Furthermore, you can have (reference) array parameters. Array argumetns must have the same
+  size
+*/
 syntax ParamList          = 'var'? IdList ":" TypeSpec
                          ;

@ -37,8 +73,16 @@ syntax CompoundStatement  = 'begin' StatementList? 'end';

 syntax StatementList      = Statement 
                          | StatementList ";" Statement;
+/*
+  We have:
+  1. Assignment statements
+  2. Nested blocks
+  3. A no-op
+  4. if-else blocks
+  5. white blocks
+*/
 syntax Statement          = Lhs ":=" Expr
-                          | ProcedureCall                                 
+                          | SubProgCall                                
                          | CompoundStatement                             
                          | 'skip'
                          | 'if' Guard 'then' Statement 'else' Statement 
@ -54,18 +98,25 @@ syntax Guard              = "(" Guard ")"
                            )
                          ;

+/*
+  These define expressions that can be assigned to
+*/
 syntax LhsList            = Lhs ("," Lhs)*
                          ;
 syntax Lhs                = Id ("[" Expr "]")?
                          ;

-syntax ProcedureCall      = 'readln'"(" LhsList ")"
+syntax SubProgCall        = 'readln'"(" LhsList ")"
                          | 'writeln'"(" ExprList ")"
                          > Id \ 'writeln' \ 'readln' \ 'skip' ("(" ExprList ")")?
                          ;

 syntax ExprList           = Expr ("," Expr)*
                          ;
+/*
+  A fairly normal set of expressions, perhaps with the exception of Id [Expr .. Expr].
+  This is syntax for array slicing.
+*/
 syntax Expr               = "(" Expr ")"                    
                          | Id
                          | Id "[" Expr (".." Expr)?"]"
@ -84,15 +135,51 @@ syntax Expr               = "(" Expr ")"
                            )
                          ;
 
+/*
+  Basic lexical constructs. 
+*/
 lexical Relop   = "\<"|"\<="|"\>"|"\>="|"="|"\<\>";
 lexical Number  = Digit+
                | Digit* "." Digit+
                ;
+/*
+  A comment is anything between a "{" and a "}"
+*/
 lexical Comment = "{" ![}]* "}";

- 
+/*
+  Layout essentially captures all characters between our main syntactic objects. You can read this as follows:
+
+  Between any two syntactic elemnets, eat as much whitespace or comments as you can.
+
+  Now, interestingly, if you remove everything after !>>, this makes parsing a program ambiguous. This is because
+  what happens is that Standard is silently placed between all symbols in our production rule and so 
+  our Program Non-Terminal is actually as follows:
+
+  start syntax Program      = Standard 'program' Standard Id Standard ";" 
+                              Standard ConstDecl* Standard VarDecl* 
+                              Standard SubProgDecl* Standard CompoundStatement "."
+
+  In this way, any white space or comments get eaten up between our actual definitions. However, what happens if there are no const decls
+  or var decls, let's say? Then we have at some point ... Standard Standard Standard SubProgDecl* ...
+
+  And since Standard can be the empty string, this means that if there is a comment between the program id and the first function, Rascal
+  doesn't know _which_ Standard to use when reducing the comment. If there are multiple comments, it can go into any of the Standards.
+
+  To fix this, we essentially force the first Standard to eat _all_ of the comments / whitespace. To do this, we say that Standard cannot
+  have whitespace or our comment deliminators ("{", "}"), follow it, which is what !>> [...] does. 
+
+  A !>> [...] says that A cannot be matched if there's something in ... after it, so if ... can be in A, it will be placed in there.
+
+  Note: The UTF characters in the character class define whitespace characters (I believe). I could be wrong, I stole this from 
+  std::lang:Layout ;)
+
+  Idea from: https://stackoverflow.com/a/75458702
+
+  This is also in the documentation. See: https://www.rascal-mpl.org/docs/Rascal/Declarations/SyntaxDefinition/Disambiguation/Follow/
+*/
 layout Standard 
-  = WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 \ \t\n\r] !>> [{}];
+  = WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 {}];
  
 lexical WhitespaceOrComment 
  = whitespace: Whitespace