# Grammar for minijava+ (adopted from MYOCC version 2014) # This package includes this grammar, enumabsyn.rs and main.rs. # There are also several "minijava" programs that can be parsed including # QuickSort.mj and BinaryTree.mj # The grammar defines a scaled-down version of Java similar to the "Tiger" # language in Andrew Appel's compiler textbooks. This language was originally # used in a compilers class taught at Hofstra University in 2014, during which # students wrote their own LR(1) parser generator in Java. It's easier to # implement full LR(1) as opposed to LALR(1), which is all that's needed for # this grammar. !use rustlr::LBox; !use crate::enumabsyn::*; !use crate::enumabsyn::Declaration::*; !use crate::enumabsyn::Expr::*; !use crate::enumabsyn::Stat::*; lifetime 'lt absyntype Program<'lt> typedterminal ID &'lt str typedterminal STRING &'lt str typedterminal BOOL bool typedterminal INTEGER i32 terminal class public static void main String extends return length terminal ( ) [ ] ; DOT ! , new this terminal LBR RBR OROR terminal int boolean if else while == = + - * / < && MOD nonterminal Program Program<'lt> nonterminal MainCl Mainclass<'lt> nonterminal ClassDec ClassDec<'lt> nonterminal ClassDecl Vec>> nonterminal Extension &'lt str nonterminal VarDec VarDec<'lt> nonterminal MethodDec MethodDec<'lt> nonterminal Decl Vec>> nonterminal FormalLst Vec>> nonterminal FormalRst Vec>> nonterminal Type &'lt str nonterminal Stat Stat<'lt> nonterminal Stats Vec>> nonterminal Exp Expr<'lt> nonterminal Rxp Expr<'lt> nonterminal Dxp Expr<'lt> nonterminal Bxp Expr<'lt> nonterminal ExpLst Vec>> nonterminal ExpRst Vec>> topsym Program resync ; # precedence/associativity declarations for common binary operators left * 700 left / 700 left MOD 700 left + 500 left - 500 left == 450 left < 450 left && 400 left OROR 350 right = 200 # other operators are defined by different levels of "Expression": from # loosest to tightest: Exp, Bxp, Rxp, Dxp. These include unary operators, # array expressions and "." expressions. # to deal with the dangling-else problem, else is given higher precedence # than if. Reduction by (Stat --> if (Exp) Stat) will be delayed if the # the lookahead symbol is 'else'. nonassoc if 30 nonassoc else 40 Program --> MainCl:[mc] ClassDecl:cs { Program {mainclass:mc, otherclasses:cs } } MainCl ==> class ID:cn LBR public static void main ( String [ ] ID:an ) LBR Stats:thebody RBR RBR { Mainclass{classname:cn, argvname:an, body: Blockst(thebody), } } <== ClassDecl --> { Vec::new() } ClassDecl --> ClassDecl:cs ClassDec:[cl] { cs.push(cl); cs } ClassDec ==> class ID:name Extension:sup LBR Decl:ds RBR { let mut vdecs=Vec::new(); let mut mdecs=Vec::new(); separatedecs(ds,&mut vdecs,&mut mdecs); /*split var and method declarations*/ ClassDec {superclass:sup, classname:name, vars:vdecs, methods:mdecs} } <== Extension --> extends Type:sup { sup } Extension --> { "Object" } VarDec --> Type:t ID:v ; { VarDec{dname:v,dtype:t,initval:Nothing,} } VarDec --> Type:t ID:v = Exp:e ; {VarDec{dname:v,dtype:t,initval:e}} MethodDec ==> public Type:ty ID:name ( FormalLst:args ) LBR Stats:mbody RBR { MethodDec{ formals:args, body: mbody, classname:ty, methodname:name, } } <== Decl --> { Vec::new() } Decl --> Decl:ds VarDec:v { ds.push(parser.lbx(1,Vdec(v))); ds } Decl --> Decl:ds MethodDec:m { ds.push(parser.lbx(1,Mdec(m))); ds } FormalLst --> { Vec::new() } FormalLst --> FormalRst:v {v} FormalRst ==> Type:ty ID:a { vec![ parser.lb(VarDec{dname:a,dtype:ty,initval:Nothing}) ] } <== FormalRst ==> FormalRst:v , Type:ty ID:a { v.push(parser.lb(VarDec{dname:a,dtype:ty,initval:Nothing})); v } <== Type --> int [ ] { return "int[]"; } Type --> boolean { return "boolean"; } Type --> String { return "String"; } Type --> int { return "int"; } Type --> void { return "void"; } Type --> ID:c { c } Stats --> { Vec::new() } Stats --> Stats:sv Stat:[s] { sv.push(s); sv } Stat --> LBR Stats:sv RBR { Blockst(sv) } Stat --> if ( Exp:[c] ) Stat:[a] else Stat:[b] { Ifstat(c, a, b) } Stat --> if ( Exp:[c] ) Stat:[a] { Ifstat(c,a,parser.lb(Nopst)) } Stat --> while ( Exp:[c] ) Stat:[s] { Whilest(c,s) } Stat --> ID:v = Exp:[e] ; { Assignst(v,e) } Dxp --> Dxp:[obj] DOT ID:field { Field(field,obj) } Rxp --> Dxp:e {e} Rxp --> Rxp:[a] [ Exp:[i] ] { Binop("[]",a,i) } Bxp --> Rxp:e {e} Bxp --> ! Bxp:[a] { Notexp(a) } Exp --> Bxp:e {e} Stat --> Rxp:[v] [ Exp:[i] ] = Exp:[e] ; { ArAssignst(v,i,e) } Stat --> Dxp:[obj] DOT ID:m ( ExpLst:args ) ; {Callstat(obj,m,args)} Stat --> return Exp:[e] ; { Returnst(e) } Stat --> VarDec:v {Vardecst(v.dname,v.dtype,parser.lb(v.initval))} Exp --> Exp:[a] * Exp:[b] { Binop("*",a,b) } Exp --> Exp:[a] + Exp:[b] { Binop("+",a,b) } Exp --> Exp:[a] / Exp:[b] { Binop("/",a,b) } Exp --> Exp:[a] - Exp:[b] { Binop("-",a,b) } Exp --> Exp:[a] && Exp:[b] { Binop("&&",a,b) } Exp --> Exp:[a] OROR Exp:[b] { Binop("OROR",a,b) } Exp --> Exp:[a] < Exp:[b] { Binop("<",a,b) } Exp --> Exp:[a] MOD Exp:[b] { Binop("%",a,b) } Exp --> Exp:[a] == Exp:[b] { Binop("==",a,b) } Bxp --> Dxp:[obj] DOT ID:f ( ExpLst:args ) { Callexp(obj,f,args) } Bxp --> INTEGER:i { Int(i) } Bxp --> STRING:s { Strlit(s) } Bxp --> BOOL:b { Bool(b) } Dxp --> ID:x { Var(x) } Dxp --> this { Thisptr } Exp --> new int [ Exp:[s] ] { Newarray(s) } Exp --> new Type:x ( ) { Newobj(x) } Dxp --> ( Exp:e ) { e } # zero or more comma-separated expressions with no trailing comma: ExpLst --> { Vec::new() } ExpLst --> ExpRst:v {v} ExpRst --> Exp:[e] { vec![e] } ExpRst --> ExpRst:v , Exp:[e] { v.push(e); v } # With automatic AST generation, this can now be done with: # ExpLst --> Exp<,*> (see Chapter 4 and 5 of the tutorial). # Lexical scanner setup using StrTokenizer lexname DOT . lexname MOD % lexname LBR { lexname RBR } lexname OROR || lexvalue BOOL Alphanum("true") true lexvalue BOOL Alphanum("false") false lexvalue ID Alphanum(x) x lexvalue INTEGER Num(n) (n as i32) lexvalue STRING Strlit(s) s EOF