|
|
|
@ -4,23 +4,51 @@ extend lang::std::Whitespace;
|
|
|
|
|
extend lang::std::Id;
|
|
|
|
|
extend lang::std::ASCII;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
The grammar of MiniPas
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
A program consists of an id, followed by declarations followed by its body
|
|
|
|
|
*/
|
|
|
|
|
start syntax Program = 'program' Id ";" ConstDecl* VarDecl* SubProgDecl* CompoundStatement "."
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
A constant's type is implicit by the value it's assigned
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
syntax ConstDecl = 'const' Id "=" Number ";"
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
You can declare multiple variables of the same type, in a simple declaration
|
|
|
|
|
*/
|
|
|
|
|
syntax VarDecl = 'var' IdList ":" TypeSpec ";"
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
syntax IdList = Id ("," Id)*
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Observe that there is no boolean type. Guards for if-else blocks and while-blocks have a special
|
|
|
|
|
non-terminal that defines boolean expressions, essentially. Thus, normal expressions (which are (potentially array) of basic types)
|
|
|
|
|
don't need to be Boolean.
|
|
|
|
|
*/
|
|
|
|
|
syntax TypeSpec = BasicType | 'array' "[" Number ".." Number "]" 'of' BasicType
|
|
|
|
|
;
|
|
|
|
|
syntax BasicType = 'integer' | 'real'
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
There are two key differences between a function and a procedure.
|
|
|
|
|
1. A function returns a value, a procedure returns nothing.
|
|
|
|
|
2. A procedure can have no parameters.
|
|
|
|
|
|
|
|
|
|
Observe that you can have local variables.
|
|
|
|
|
|
|
|
|
|
Also note that we use SubProg to refer to both functions / procedures.
|
|
|
|
|
*/
|
|
|
|
|
syntax SubProgDecl = 'function' Id Parameters ":" BasicType ";" VarDecl* CompoundStatement ";"
|
|
|
|
|
| 'procedure' Id Parameters? ";" VarDecl* CompoundStatement ";"
|
|
|
|
|
;
|
|
|
|
@ -30,6 +58,14 @@ syntax Parameters = "(" ParameterList ")"
|
|
|
|
|
syntax ParameterList = ParamList
|
|
|
|
|
| ParameterList ";" ParamList
|
|
|
|
|
;
|
|
|
|
|
/*
|
|
|
|
|
Observe you can declare multiple parametrs in a single delcaration.
|
|
|
|
|
Also note the 'var'. 'var' parameters are passed by reference, whereas non 'var' parameters
|
|
|
|
|
are passed by value.
|
|
|
|
|
|
|
|
|
|
Furthermore, you can have (reference) array parameters. Array argumetns must have the same
|
|
|
|
|
size
|
|
|
|
|
*/
|
|
|
|
|
syntax ParamList = 'var'? IdList ":" TypeSpec
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
@ -37,8 +73,16 @@ syntax CompoundStatement = 'begin' StatementList? 'end';
|
|
|
|
|
|
|
|
|
|
syntax StatementList = Statement
|
|
|
|
|
| StatementList ";" Statement;
|
|
|
|
|
/*
|
|
|
|
|
We have:
|
|
|
|
|
1. Assignment statements
|
|
|
|
|
2. Nested blocks
|
|
|
|
|
3. A no-op
|
|
|
|
|
4. if-else blocks
|
|
|
|
|
5. white blocks
|
|
|
|
|
*/
|
|
|
|
|
syntax Statement = Lhs ":=" Expr
|
|
|
|
|
| ProcedureCall
|
|
|
|
|
| SubProgCall
|
|
|
|
|
| CompoundStatement
|
|
|
|
|
| 'skip'
|
|
|
|
|
| 'if' Guard 'then' Statement 'else' Statement
|
|
|
|
@ -54,18 +98,25 @@ syntax Guard = "(" Guard ")"
|
|
|
|
|
)
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
These define expressions that can be assigned to
|
|
|
|
|
*/
|
|
|
|
|
syntax LhsList = Lhs ("," Lhs)*
|
|
|
|
|
;
|
|
|
|
|
syntax Lhs = Id ("[" Expr "]")?
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
syntax ProcedureCall = 'readln'"(" LhsList ")"
|
|
|
|
|
syntax SubProgCall = 'readln'"(" LhsList ")"
|
|
|
|
|
| 'writeln'"(" ExprList ")"
|
|
|
|
|
> Id \ 'writeln' \ 'readln' \ 'skip' ("(" ExprList ")")?
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
syntax ExprList = Expr ("," Expr)*
|
|
|
|
|
;
|
|
|
|
|
/*
|
|
|
|
|
A fairly normal set of expressions, perhaps with the exception of Id [Expr .. Expr].
|
|
|
|
|
This is syntax for array slicing.
|
|
|
|
|
*/
|
|
|
|
|
syntax Expr = "(" Expr ")"
|
|
|
|
|
| Id
|
|
|
|
|
| Id "[" Expr (".." Expr)?"]"
|
|
|
|
@ -84,15 +135,51 @@ syntax Expr = "(" Expr ")"
|
|
|
|
|
)
|
|
|
|
|
;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Basic lexical constructs.
|
|
|
|
|
*/
|
|
|
|
|
lexical Relop = "\<"|"\<="|"\>"|"\>="|"="|"\<\>";
|
|
|
|
|
lexical Number = Digit+
|
|
|
|
|
| Digit* "." Digit+
|
|
|
|
|
;
|
|
|
|
|
/*
|
|
|
|
|
A comment is anything between a "{" and a "}"
|
|
|
|
|
*/
|
|
|
|
|
lexical Comment = "{" ![}]* "}";
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Layout essentially captures all characters between our main syntactic objects. You can read this as follows:
|
|
|
|
|
|
|
|
|
|
Between any two syntactic elemnets, eat as much whitespace or comments as you can.
|
|
|
|
|
|
|
|
|
|
Now, interestingly, if you remove everything after !>>, this makes parsing a program ambiguous. This is because
|
|
|
|
|
what happens is that Standard is silently placed between all symbols in our production rule and so
|
|
|
|
|
our Program Non-Terminal is actually as follows:
|
|
|
|
|
|
|
|
|
|
start syntax Program = Standard 'program' Standard Id Standard ";"
|
|
|
|
|
Standard ConstDecl* Standard VarDecl*
|
|
|
|
|
Standard SubProgDecl* Standard CompoundStatement "."
|
|
|
|
|
|
|
|
|
|
In this way, any white space or comments get eaten up between our actual definitions. However, what happens if there are no const decls
|
|
|
|
|
or var decls, let's say? Then we have at some point ... Standard Standard Standard SubProgDecl* ...
|
|
|
|
|
|
|
|
|
|
And since Standard can be the empty string, this means that if there is a comment between the program id and the first function, Rascal
|
|
|
|
|
doesn't know _which_ Standard to use when reducing the comment. If there are multiple comments, it can go into any of the Standards.
|
|
|
|
|
|
|
|
|
|
To fix this, we essentially force the first Standard to eat _all_ of the comments / whitespace. To do this, we say that Standard cannot
|
|
|
|
|
have whitespace or our comment deliminators ("{", "}"), follow it, which is what !>> [...] does.
|
|
|
|
|
|
|
|
|
|
A !>> [...] says that A cannot be matched if there's something in ... after it, so if ... can be in A, it will be placed in there.
|
|
|
|
|
|
|
|
|
|
Note: The UTF characters in the character class define whitespace characters (I believe). I could be wrong, I stole this from
|
|
|
|
|
std::lang:Layout ;)
|
|
|
|
|
|
|
|
|
|
Idea from: https://stackoverflow.com/a/75458702
|
|
|
|
|
|
|
|
|
|
This is also in the documentation. See: https://www.rascal-mpl.org/docs/Rascal/Declarations/SyntaxDefinition/Disambiguation/Follow/
|
|
|
|
|
*/
|
|
|
|
|
layout Standard
|
|
|
|
|
= WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 \ \t\n\r] !>> [{}];
|
|
|
|
|
= WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 {}];
|
|
|
|
|
|
|
|
|
|
lexical WhitespaceOrComment
|
|
|
|
|
= whitespace: Whitespace
|
|
|
|
|