# grammar for "C-plus-minus" - zc Tokenizer version

# Anything on a ! line will be inserted verbatim into the generated parser
! // still need to put into a crate
!use std::io::{Write};
!
!fn readln()-> String {
!  let mut s = String::new();
!  std::io::stdin().read_line(&mut s);
!  s
!}
!
!struct Slex<'t> {
! split : std::str::SplitWhitespace<'t>,
!}
!impl<'t> Tokenizer<'t,i32> for Slex<'t> {
!  fn nextsym(&mut self) -> Option<TerminalToken<'t,i32>> {
!    match self.split.next() {
!     None => None,
!     Some(sym) => Some(TerminalToken::new(sym.trim(),0,1,0)),
!    }//match
!  }//nextsym
!  fn current_line(&self) -> &str
!  {
!    "current_line function not re-implemented for tokenizer"
!  }
!  fn linenum(&self) -> usize {1}
!}
!
!fn main() {
!   print!("Write something in C+- : ");
!   std::io::stdout().flush().unwrap();
!   let input = readln();
!   let mut lexer1 =  Slex{split:input.split_whitespace()};
!   let mut parser1 = new_parser();
!   parser1.parse_train(&mut lexer1,"src/main.rs");
!   //parser1.train_from_script( &mut lexer1,"cpmparser.rs","cpmparser.rs_script.txt" );
!   println!("parsing success: {}",!parser1.error_occurred());
!}//main

grammarname cpmz
absyntype i32
nonterminals STAT STATLIST EXPR
nonterminal EXPRLIST mut
terminals x y z cin cout ; ( ) << >> ERROR
topsym STATLIST
#errsym ERROR
resync ;

STATLIST --> STAT  |  STATLIST STAT 
STAT --> cin >> EXPR ; 
STAT --> cout << EXPRLIST:s ;

EXPR --> x | y | z 
EXPR --> ( EXPR:s ) 
EXPRLIST --> EXPR | EXPRLIST << EXPR

STAT --> ERROR ; { parser.report("invalid statement, skipping to ;"); 0}
EXPR --> ( EXPR ERROR { parser.report("unmatched ("); 0 }
EXPR --> EXPR ERROR ) { parser.report("unmatched )"); 0 }

EOF


Everything after 'EOF' is ignored and can be used for comments.

Rustlr uses two methods of error recovery, which this toy language
experiments with.  The first method requires the designation of a
special terminal symbol as an error recovery symbol, using the
directive "errsym" or "errorsymbol".  This symbol is assumed to not
conflict with actual input tokens.  The error symbol can appear at
most once on the right-hand side of a production rule, followed by
zero or more terminal symbols.  When an error is encountered (a
lookahead symbol with no defined transition in the LR/LALR
finite-state machine), the parser looks down the parse stack for a
state that has a transition defined on the symbol.  It truncates the
stack and performs all possible reductions until a state that can
"shift" the error symbol (ERROR in this example) is found, and
simuluates a shift of the next state associated with the error symbol
onto the stack.  It then skips lookheads until a valid transition is
found for the new state.  Since only terminal symbols may follow the
error symbol, eventually one of the error productions will be reduced.
These productions can have semantic actions that report specific
errors.

The second method of error recovery is rather straightforward: the
grammar can define one or more "resynchronization" terminal symbols
using the "resynch" directive.  When an error is encountered, the
parser skips ahead past the first resynchronization symbol.  Then it
looks down the parse stack for a state that has a valid transition on
the next symbol.  For languages that ends statements with a ;
(semicolon), the ; is the natural choice as the resynch symbol.  The
parser will report an error message for the current statement, then
skip over to the next statement.

The second (resynch) method of error recovery is attempted if the
first method fails or if no error symbol is defined.

If both methods of error-recovery fail, the parser simply skips input
tokens until a suitable action is found.

One can experiment with this grammar with "C+-" input such as
cout << x ; cout y ; cin >> ; cout << y << z ;