# This part of the Rustlr tutorial is under construction and now only # contains the grammar. # ANSI C YACC Grammar 2011 version re-written in Rustlr # Compare to the original from http://www.quut.com/c/ANSI-C-grammar-y.html # This grammar can only be processed with rustlr -lrsd 3, as it is # a "Selective Markus-Leermakers" or selML(3,1) grammar, which is a class # of unambiguous grammars properly larger than LR(1). auto lifetime 'lt # The following structure is shared between the parser and lexer and represents # a rudimentary kind of symbol table, to keep track of identifiers that should # be tokenized as TYPE_NAME or ENUMERATION_CONSTANT instead of IDENTIFIER. # It also contains a stateful flag that triggers the recognition of typedefs: # this simplifies the process of unraveling the abstract syntax. $pub use std::collections::HashSet; $#[derive(Debug,Default)] $pub struct defined_id_table<'t> { $ pub typedefs: HashSet<&'t str>, $ pub enum_ids: HashSet<&'t str>, $ pub anticipate_typedef: bool, $ pub in_function:bool, $ pub warnings_issued:bool, $} externtype defined_id_table<'lt> # new keywords lexterminal FUNC_NAME __func__ lexterminal INLINE inline lexterminal RESTRICT restrict lexterminal ALIGNAS _Alignas lexterminal ALIGNOF _Alignof lexterminal ATOMIC _Atomic lexterminal BOOL _Bool lexterminal COMPLEX _Complex lexterminal GENERIC _Generic lexterminal IMAGINARY _Imaginary lexterminal NORETURN _Noreturn lexterminal STATIC_ASSERT _Static_assert lexterminal THREAD_LOCAL _Thread_local # from original lexterminal PTR_OP -> lexterminal INC_OP ++ lexterminal DEC_OP -- lexterminal LEFT_OP << lexterminal RIGHT_OP >> lexterminal LE_OP <= lexterminal GE_OP >= lexterminal EQ_OP == lexterminal NE_OP != lexterminal AND_OP && lexterminal OR_OP || lexterminal MUL_ASSIGN *= lexterminal DIV_ASSIGN /= lexterminal MOD_ASSIGN %= lexterminal ADD_ASSIGN += lexterminal SUB_ASSIGN -= lexterminal LEFT_ASSIGN <<= lexterminal RIGHT_ASSIGN >>= lexterminal AND_ASSIGN &= lexterminal XOR_ASSIGN ^= lexterminal OR_ASSIGN |= lexterminal SIZEOF sizeof lexterminal TYPEDEF typedef lexterminal EXTERN extern lexterminal STATIC static lexterminal AUTO auto lexterminal REGISTER register lexterminal CHAR char lexterminal SHORT short lexterminal INT int lexterminal LONG long lexterminal SIGNED signed lexterminal UNSIGNED unsigned lexterminal FLOAT float lexterminal DOUBLE double lexterminal CONST const lexterminal VOLATILE volatile lexterminal VOID void lexterminal STRUCT struct lexterminal UNION union lexterminal ENUM enum lexterminal CASE case lexterminal DEFAULT default lexterminal IF if lexterminal ELSE else lexterminal SWITCH switch lexterminal WHILE while lexterminal DO do lexterminal FOR for lexterminal GOTO goto lexterminal CONTINUE continue lexterminal BREAK break lexterminal RETURN return lexterminal ELLIPSIS ... lexterminal Lbrace { lexterminal Rbrace } lexterminal Amp & lexterminal Bang ! lexterminal Bar | lexterminal Hat ^ lexterminal Dot . lexterminal Comma , lexterminal Colon : lexterminal Star * lexterminal Div / lexterminal Percent % lexterminal Lt < lexterminal Gt > lexterminal ASSIGN = lexterminal Quest ? lexterminal SEMICOLON ; terminals [ ] ( ) + - ~ # self below refers to the generated lexer, which will always have a # shared_state with the parser (via an Rc>) valueterminal TYPEDEF_NAME~ &'lt str~ Alphanum(n) if self.shared_state.borrow().typedefs.contains(n)~ n valueterminal ENUMERATION_CONSTANT~ &'lt str~ Alphanum(n) if self.shared_state.borrow().enum_ids.contains(n) ~ n valueterminal IDENTIFIER~ &'lt str~ Alphanum(n)~ n valueterminal I_CONSTANT~ i32~ Num(n)~ (n as i32) valueterminal F_CONSTANT~ f64~ Float(n)~ n valueterminal STRING_LITERAL~ &'lt str~ Strlit(s)~ s lexattribute add_custom("directive",r"^(?m)^#.*$") valueterminal COMPILER_DIRECTIVE~ &'lt str~ Custom("directive",d)~ d nonterminals unary_operator assignment_operator expression statement nonterminal primary_expression : expression nonterminal postfix_expression : expression nonterminal unary_expression : expression nonterminal cast_expression : expression nonterminal multiplicative_expression : expression nonterminal additive_expression : expression nonterminal shift_expression : expression nonterminal relational_expression : expression nonterminal equality_expression : expression nonterminal and_expression : expression nonterminal exclusive_or_expression : expression nonterminal inclusive_or_expression : expression nonterminal logical_and_expression : expression nonterminal logical_or_expression : expression nonterminal conditional_expression : expression nonterminal assignment_expression : expression nonterminal constant_expression : expression nonterminal expressions nonterminal labeled_statement : statement nonterminal compound_statement : statement nonterminal expression_statement : statement nonterminal selection_statement : statement nonterminal iteration_statement : statement nonterminal jump_statement : statement nonterminals declaration declaration_specifiers init_declarator nonterminals storage_class_specifier type_specifier struct_or_union_specifier nonterminals struct_or_union struct_declaration translation_unit nonterminals enum_specifier enumerator type_qualifier nonterminals pointer struct_declarator elseopt nonterminals parameter_type_list parameter_declaration nonterminals type_name abstract_declarator nonterminals direct_abstract_declarator initializer Constant Stringlit nonterminals external_declaration generic_selection function_header nonterminals generic_association static_assert_declaration designation nonterminals function_specifier alignment_specifier atomic_type_specifier nonterminals designator block_item declaration_specifier nonterminals specifier_qualifier initializer_designation nonterminals direct_declarator direct_contents declarator da_base nonterminal direct_base nonterminal function_definition : external_declaration terminal errorterm topsym translation_unit errsym errorterm # added for Rustlr edition to resolve dangling else problem: nonassoc IF 20 nonassoc ELSE 30 # Known issue with ANSI grammar: nonassoc ( 2 nonassoc ATOMIC 1 resynch SEMICOLON ) primary_expression:Var --> IDENTIFIER primary_expression --> ( expression ) primary_expression:Constant --> Constant primary_expression:Stringlit --> Stringlit primary_expression:Generic --> generic_selection # ENUMERATION CONSTANT needs to be treated like TYPE_NAME Constant --> I_CONSTANT | F_CONSTANT | ENUMERATION_CONSTANT Stringlit --> STRING_LITERAL | FUNC_NAME generic_selection --> GENERIC ( assignment_expression Comma generic_association ) generic_association:by_type --> type_name Colon assignment_expression generic_association:by_default --> DEFAULT Colon assignment_expression postfix_expression --> primary_expression postfix_expression:Indexing --> postfix_expression [ expression ] postfix_expression:Function_call --> postfix_expression ( assignment_expression ) postfix_expression:Dotaccess --> postfix_expression Dot IDENTIFIER postfix_expression:Ptraccess --> postfix_expression PTR_OP IDENTIFIER postfix_expression:PlusPlus --> postfix_expression INC_OP postfix_expression:MinusMinus --> postfix_expression DEC_OP postfix_expression:dont_know_what_this_is --> ( type_name ) Lbrace initializer_designation Comma? Rbrace unary_expression --> postfix_expression unary_expression:Unaryop --> unary_operator cast_expression unary_expression ==> INC_OP unary_expression | DEC_OP unary_expression | SIZEOF unary_expression | SIZEOF ( type_name ) | ALIGNOF ( type_name ) <== unary_operator --> Amp unary_operator --> Star unary_operator:UniPlus --> + unary_operator:Neg --> - unary_operator:BinComp --> ~ unary_operator:Not --> Bang cast_expression --> unary_expression cast_expression:Typecast --> ( type_name ) cast_expression multiplicative_expression --> cast_expression multiplicative_expression:Times --> multiplicative_expression Star cast_expression multiplicative_expression:Divide --> multiplicative_expression Div cast_expression multiplicative_expression:Mod --> multiplicative_expression Percent cast_expression additive_expression --> multiplicative_expression additive_expression:Plus --> additive_expression + multiplicative_expression additive_expression:Minus --> additive_expression - multiplicative_expression shift_expression --> additive_expression shift_expression:Lshift --> shift_expression LEFT_OP additive_expression shift_expression:Rshift --> shift_expression RIGHT_OP additive_expression relational_expression --> shift_expression relational_expression:Lt --> relational_expression Lt shift_expression relational_expression:Gt --> relational_expression Gt shift_expression relational_expression:Leq --> relational_expression LE_OP shift_expression relational_expression:Geq --> relational_expression GE_OP shift_expression equality_expression --> relational_expression equality_expression:Equals --> equality_expression EQ_OP relational_expression equality_expression:NotEquals --> equality_expression NE_OP relational_expression and_expression --> equality_expression and_expression:BitAnd --> and_expression Amp equality_expression exclusive_or_expression --> and_expression exclusive_or_expression:BitXor --> exclusive_or_expression Hat and_expression inclusive_or_expression --> exclusive_or_expression inclusive_or_expression:BitOr --> inclusive_or_expression Bar exclusive_or_expression logical_and_expression --> inclusive_or_expression logical_and_expression:And --> logical_and_expression AND_OP inclusive_or_expression logical_or_expression --> logical_and_expression logical_or_expression:Or --> logical_or_expression OR_OP logical_and_expression conditional_expression --> logical_or_expression conditional_expression:Quest --> logical_or_expression Quest expression Colon conditional_expression assignment_expression --> conditional_expression assignment_expression:Assignment --> unary_expression assignment_operator assignment_expression assignment_operator ==> ASSIGN | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN | ADD_ASSIGN | SUB_ASSIGN | LEFT_ASSIGN | RIGHT_ASSIGN | AND_ASSIGN | XOR_ASSIGN | OR_ASSIGN <== # unifying point for expression-type in AST expression:expr_list --> expressions expressions --> assignment_expression # intentionally ambiguous #expression --> assignment_expression # the following rule was marked "with constraints" constant_expression:const_expr --> conditional_expression ############# declarations !use crate::c11_ast::declaration_specifier::*; !use crate::c11_ast::storage_class_specifier::*; !use crate::c11_ast::direct_base::*; declaration:DecSpec --> declaration_specifier+ SEMICOLON declaration:Statassert --> static_assert_declaration declaration:fundef --> function_definition # TYPEDEF CASES declaration:DecSpecList --> declaration_specifier+:dsl init_declarator:initdc SEMICOLON declaration_specifier:storage --> storage_class_specifier declaration_specifier:typespec --> type_specifier declaration_specifier:typequal --> type_qualifier declaration_specifier:funcspec --> function_specifier declaration_specifier:alignspec --> alignment_specifier init_declarator --> declarator:decl (ASSIGN initializer)?:initializer storage_class_specifier ==> EXTERN | STATIC | AUTO | REGISTER | THREAD_LOCAL <== storage_class_specifier --> TYPEDEF { parser.shared_state.borrow_mut().anticipate_typedef=true; ...} type_specifier:Typename --> TYPEDEF_NAME type_specifier ==> VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE | SIGNED | UNSIGNED | BOOL | COMPLEX | IMAGINARY | struct_or_union_specifier | enum_specifier | atomic_type_specifier <== struct_or_union_specifier ==> struct_or_union IDENTIFIER? Lbrace struct_declaration+ Rbrace | struct_or_union IDENTIFIER <== # labels ensure enum generated. struct_or_union --> STRUCT | UNION struct_declaration ==> specifier_qualifier+ struct_declarator SEMICOLON | static_assert_declaration <== specifier_qualifier --> type_specifier | type_qualifier struct_declarator:declare --> declarator (Colon constant_expression)? struct_declarator:nodeclare --> Colon constant_expression enum_specifier ==> ENUM IDENTIFIER? Lbrace enumerator Comma? Rbrace | ENUM IDENTIFIER <== enumerator ==> IDENTIFIER:id (ASSIGN constant_expression)? { parser.shared_state.borrow_mut().enum_ids.insert(id); ... } <== atomic_type_specifier --> ATOMIC ( type_name ) type_qualifier --> CONST | VOLATILE | ATOMIC | RESTRICT function_specifier --> INLINE | NORETURN alignment_specifier --> ALIGNAS ( type_name ) alignment_specifier --> ALIGNAS ( constant_expression ) # PROBLEMATIC LINE declarator --> pointer? direct_declarator #declarator:DEC --> # pointer? direct_declarator # direct_base ==> IDENTIFIER:id { let mut table = parser.shared_state.borrow_mut(); if table.anticipate_typedef { table.typedefs.insert(id); table.anticipate_typedef = false; } ... } <== direct_base:DEC --> ( declarator ) direct_contents ==> [ Star?:star ] | [ STATIC?:is_static assignment_expression ] | [ type_qualifier+ Star ] | [ STATIC?:is_static type_qualifier+ assignment_expression ] | [ type_qualifier+ (STATIC assignment_expression)? ] | ( parameter_type_list ) | ( IDENTIFIER ) <== flatten direct_declarator direct_declarator --> direct_base direct_contents* pointer --> Star type_qualifier*:tql pointer?:ptr parameter_type_list --> parameter_declaration:parameters (Comma ELLIPSIS)?:ellipsis parameter_declaration ==> declaration_specifier+ declarator | declaration_specifier+ abstract_declarator | declaration_specifier+ <== type_name --> specifier_qualifier+ abstract_declarator? abstract_declarator --> pointer abstract_declarator --> pointer? ( abstract_declarator ) da_base* abstract_declarator --> pointer? da_base+ da_base --> [ Star?:star ] da_base --> ( parameter_type_list ) da_base --> [ expression ] da_base --> [ STATIC type_qualifier* assignment_expression ] da_base --> [ type_qualifier+ STATIC?:is_static assignment_expression ] da_base --> [ type_qualifier+ ] initializer:expr --> assignment_expression initializer:values --> Lbrace initializer_designation Comma? Rbrace # replace initializer_list with initializer_designation initializer_designation --> designation? initializer designation --> designator+ ASSIGN designator:brackexpr --> [ constant_expression ] designator:dotid --> Dot IDENTIFIER static_assert_declaration --> STATIC_ASSERT ( constant_expression Comma STRING_LITERAL ) SEMICOLON ################ statements # unifying point for statement types in AST statement ==> labeled_statement | compound_statement | expression_statement | selection_statement | iteration_statement | jump_statement <== labeled_statement:Labelstat --> IDENTIFIER Colon statement labeled_statement:Casestat --> CASE expression Colon statement labeled_statement:Defaultcase --> DEFAULT Colon statement compound_statement:Blockstat --> Lbrace block_item* Rbrace block_item:Declaration --> declaration block_item:Statement --> statement expression_statement:Expr_list --> expression? SEMICOLON #selection_statement:Ifstat --> IF ( expression ) statement (ELSE statement)? #elseopt --> #elseopt --> ELSE statement selection_statement:Ifstat --> IF ( expression ) statement selection_statement:Ifelse --> IF ( expression ) statement ELSE statement selection_statement:Switchstat --> SWITCH ( expression ) statement iteration_statement:Whileloop --> WHILE ( expression ) statement iteration_statement:Dowhileloop --> DO statement WHILE ( expression ) SEMICOLON iteration_statement:Forloop --> FOR ( expression_statement expression_statement expression? ) statement iteration_statement:ForDecloop --> FOR ( declaration expression_statement expression? ) statement jump_statement:Goto_oh_no --> GOTO IDENTIFIER SEMICOLON jump_statement:Continuestat --> CONTINUE SEMICOLON jump_statement:Breakstat --> BREAK SEMICOLON jump_statement:Returnstat --> RETURN assignment_expression SEMICOLON translation_unit --> external_declaration+ #external_declaration --> function_definition external_declaration:declaration --> declaration external_declaration:directive --> COMPILER_DIRECTIVE function_header ==> declaration_specifier+:[ds] declarator declaration* { let mut table=parser.shared_state.borrow_mut(); let mut report = String::new(); if table.in_function { println!("WARNING: NESTED FUNCTION DEFINITIONS ARE NOT TECHNICALLY ALLOWED IN ANSI C, line {}, column {}",ds.line,ds.column); table.warnings_issued=true; } else { table.in_function=true; } ... } <== function_definition:functiondef ==> function_header compound_statement { parser.shared_state.borrow_mut().in_function=false; ... } <== #function_definition:functiondef --> declaration_specifier+ declarator declaration* compound_statement EOF everything after EOF is ignored alternative rule without the anticipate_typedef flag: this would look nicer if Rust allows deep pattern matching on recursive structures: declaration:DecSpecList ==> declaration_specifier+:dsl init_declarator:initdc SEMICOLON { if let (dslvec,idvec) = (&dsl,&initdc) { //vec> if let storage(TYPEDEF) = &*dslvec[0] { if let init_declarator{decl,initializer} = &*idvec[0] { if let direct_declarator(dbid,_) = &*(decl.1) { if let IDENTIFIER(id) = &**dbid { parser.shared_state.borrow_mut().typedefs.insert(id); } else if let DEC(lbxdecl) = &**dbid { if let direct_declarator(dbid2,_) = &*lbxdecl.1 { if let IDENTIFIER(id) = &**dbid2 { parser.shared_state.borrow_mut().typedefs.insert(id); } } } } } } } ... } <==