Commented Syntax.rsc

main
channa 8 months ago
parent 89e60b4136
commit bbb3b5f55a

@ -4,23 +4,51 @@ extend lang::std::Whitespace;
extend lang::std::Id;
extend lang::std::ASCII;
/*
The grammar of MiniPas
*/
/*
A program consists of an id, followed by declarations followed by its body
*/
start syntax Program = 'program' Id ";" ConstDecl* VarDecl* SubProgDecl* CompoundStatement "."
;
/*
A constant's type is implicit by the value it's assigned
*/
syntax ConstDecl = 'const' Id "=" Number ";"
;
/*
You can declare multiple variables of the same type, in a simple declaration
*/
syntax VarDecl = 'var' IdList ":" TypeSpec ";"
;
syntax IdList = Id ("," Id)*
;
/*
Observe that there is no boolean type. Guards for if-else blocks and while-blocks have a special
non-terminal that defines boolean expressions, essentially. Thus, normal expressions (which are (potentially array) of basic types)
don't need to be Boolean.
*/
syntax TypeSpec = BasicType | 'array' "[" Number ".." Number "]" 'of' BasicType
;
syntax BasicType = 'integer' | 'real'
;
/*
There are two key differences between a function and a procedure.
1. A function returns a value, a procedure returns nothing.
2. A procedure can have no parameters.
Observe that you can have local variables.
Also note that we use SubProg to refer to both functions / procedures.
*/
syntax SubProgDecl = 'function' Id Parameters ":" BasicType ";" VarDecl* CompoundStatement ";"
| 'procedure' Id Parameters? ";" VarDecl* CompoundStatement ";"
;
@ -30,6 +58,14 @@ syntax Parameters = "(" ParameterList ")"
syntax ParameterList = ParamList
| ParameterList ";" ParamList
;
/*
Observe you can declare multiple parametrs in a single delcaration.
Also note the 'var'. 'var' parameters are passed by reference, whereas non 'var' parameters
are passed by value.
Furthermore, you can have (reference) array parameters. Array argumetns must have the same
size
*/
syntax ParamList = 'var'? IdList ":" TypeSpec
;
@ -37,8 +73,16 @@ syntax CompoundStatement = 'begin' StatementList? 'end';
syntax StatementList = Statement
| StatementList ";" Statement;
/*
We have:
1. Assignment statements
2. Nested blocks
3. A no-op
4. if-else blocks
5. white blocks
*/
syntax Statement = Lhs ":=" Expr
| ProcedureCall
| SubProgCall
| CompoundStatement
| 'skip'
| 'if' Guard 'then' Statement 'else' Statement
@ -54,18 +98,25 @@ syntax Guard = "(" Guard ")"
)
;
/*
These define expressions that can be assigned to
*/
syntax LhsList = Lhs ("," Lhs)*
;
syntax Lhs = Id ("[" Expr "]")?
;
syntax ProcedureCall = 'readln'"(" LhsList ")"
syntax SubProgCall = 'readln'"(" LhsList ")"
| 'writeln'"(" ExprList ")"
> Id \ 'writeln' \ 'readln' \ 'skip' ("(" ExprList ")")?
;
syntax ExprList = Expr ("," Expr)*
;
/*
A fairly normal set of expressions, perhaps with the exception of Id [Expr .. Expr].
This is syntax for array slicing.
*/
syntax Expr = "(" Expr ")"
| Id
| Id "[" Expr (".." Expr)?"]"
@ -84,15 +135,51 @@ syntax Expr = "(" Expr ")"
)
;
/*
Basic lexical constructs.
*/
lexical Relop = "\<"|"\<="|"\>"|"\>="|"="|"\<\>";
lexical Number = Digit+
| Digit* "." Digit+
;
/*
A comment is anything between a "{" and a "}"
*/
lexical Comment = "{" ![}]* "}";
/*
Layout essentially captures all characters between our main syntactic objects. You can read this as follows:
Between any two syntactic elemnets, eat as much whitespace or comments as you can.
Now, interestingly, if you remove everything after !>>, this makes parsing a program ambiguous. This is because
what happens is that Standard is silently placed between all symbols in our production rule and so
our Program Non-Terminal is actually as follows:
start syntax Program = Standard 'program' Standard Id Standard ";"
Standard ConstDecl* Standard VarDecl*
Standard SubProgDecl* Standard CompoundStatement "."
In this way, any white space or comments get eaten up between our actual definitions. However, what happens if there are no const decls
or var decls, let's say? Then we have at some point ... Standard Standard Standard SubProgDecl* ...
And since Standard can be the empty string, this means that if there is a comment between the program id and the first function, Rascal
doesn't know _which_ Standard to use when reducing the comment. If there are multiple comments, it can go into any of the Standards.
To fix this, we essentially force the first Standard to eat _all_ of the comments / whitespace. To do this, we say that Standard cannot
have whitespace or our comment deliminators ("{", "}"), follow it, which is what !>> [...] does.
A !>> [...] says that A cannot be matched if there's something in ... after it, so if ... can be in A, it will be placed in there.
Note: The UTF characters in the character class define whitespace characters (I believe). I could be wrong, I stole this from
std::lang:Layout ;)
Idea from: https://stackoverflow.com/a/75458702
This is also in the documentation. See: https://www.rascal-mpl.org/docs/Rascal/Declarations/SyntaxDefinition/Disambiguation/Follow/
*/
layout Standard
= WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 \ \t\n\r] !>> [{}];
= WhitespaceOrComment* !>> [\u0009-\u000D \u0020 \u0085 \u00A0 \u1680 \u180E \u2000-\u200A \u2028 \u2029 \u202F \u205F \u3000 {}];
lexical WhitespaceOrComment
= whitespace: Whitespace

Loading…
Cancel
Save