(* CSC 123/252 Assignment: Completing an Interpreter for a Programming Language This assignment can be completed at several levels. The most basic level only asks you to implement booleans and if-else expressions. The next level requires you to implement variables, assignment and while-loops. The most advanced level asks you to implement mutable closures and recursive functions. The programming language `Lambda7b` (version 7e8) is an interpreted, non-statically typed language that supports first class mutable closures, as in the ability to pass and return lambda-closure as values. Lambda7b has two primitive types, integers and strings, though strings are only used for output: there are no operations allowed on strings and thus there can only be string literals (constants). Booleans are represented by integer 0 for false and any non-zero integer for true. If-else is in the functional style: if (a) b else c, which evaluates to either b or c. There is no stand-alone if statement, only if-else. A series of expressions can be separated using ; and grouped together using parentheses (). The value of such a series is the value of the last expresssion. There is a while loop and destructive assignment is allowed. Functions are written using the lambda keyword, such as `lambda x:x+1`. Programs are evaluated as scripts (no main). The following is a sample lambda7b program: # function to compute base**n by binary factorization on n: define power = lambda base:lambda n:( define ax = 1; define factor = base; while (00" one writes "0;; // bindings are lists of pairs let rec lookup (B:bindings) (x:string) = // lookup stack for binding for x match B with | (y,e)::bs when x=y -> Some(e) | _::bs -> lookup bs x | [] -> None let lookupval B x = (lookup B x) |> map (fun r -> !r) // dereferences // to add to environment, do non-destructive cons: (x,ref Num(0))::bindings //// The AST incorporates its own monad for error handling. let numap f = function // equiv to let numap f arg = match arg with ... | Num(n) -> Num(f(n)) | RuntimeError(e) -> RuntimeError(e) | _ -> RuntimeError("cannot map function to non-numerical value");; let numap2 f a b = match (a,b) with | (Num(x),Num(y)) -> Num(f(x,y)) | _ -> RuntimeError("function requires two numerical normal forms");; let numbind2 f a b = match (a,b) with | (Num(x),Num(y)) -> f(x,y) | _ -> RuntimeError("function requires two numerical normal forms");; // sample use: let safediv(x,y) = if y<>0 then Num(x/y) else RuntimeError("div by zero") // numbind2 safediv (Num 6) (Num 2) will return Num(3) let numtest p a = // test predicate on num match a with | Num(x) -> p(x) | _ -> false;; let istrue = function // for convenience, not as safe to use | Num(x) when x<>0 -> true | _ -> false let iserr = function | RuntimeError(_) -> true | _ -> false ///// eval maps AST to another AST in normal form let normal_form = function | Num(_) | Str(_) | Void | Closure(_,_) | RuntimeError(_) -> true | _ -> false // evaluation defined simultaneously by eval and eval_seq: let rec eval_seq (stack:bindings) (seq:expr list) = match seq with | Define(x,e)::es -> let ev = eval stack e if (iserr ev) then ev else eval_seq ((x,ref ev)::stack) es | e::e2::es -> eval stack e |> ignore eval_seq stack (e2::es) | [e] -> eval stack e | [] -> Void; and eval (stack:bindings) expression = match expression with | n when normal_form(n) -> n // normal forms eval to themselves | Uniop("-",e) -> (eval stack e) |> numap (fun x -> -1*x) | Binop("+",a,b) -> match ((eval stack a), (eval stack b)) with | (Num(x),Num(y)) -> Num(x+y) | _ -> RuntimeError("+ requires two numbers") | Binop("-",a,b) -> numap2 (fun (x,y) -> x-y) (eval stack a) (eval stack b) | Sequence es -> // sequences are constructed in reverse by parser eval_seq stack (List.rev es) | Sym("cin") -> let cin = Console.ReadLine(); let x = ref 0; if Int32.TryParse(cin,x) then Num(!x) else Str(cin) | Uniop("cout",Str(s)) -> let fixeds = s.Substring(1,s.Length-2).Replace("\\n","\n"); printf "%s" fixeds; Void // cout always returns Void | Uniop("cout",Num(n)) -> printf "%d" n; Void; | Uniop("cout",n) when normal_form(n) -> printf "%A" n; Void; | Uniop("cout",e) -> eval stack (Uniop("cout",(eval stack e))); | Var x -> match (lookup stack x) with | Some(v) -> eval stack !v | None -> RuntimeError(sprintf "%s not found in scope" x) | e -> RuntimeError(sprintf "no rule to evaluate %A" e);; //////////////////////////// PARSING (leave alone if you're not exageek) let precedence = function | Sym("+") -> 100 | Sym("-") -> 100 | Sym("u-") -> 210 | Sym("*") -> 200 | Sym("/") -> 200 | Sym("%") -> 200 | Sym("||") -> 400 | Sym("&&") -> 400 | Sym("==") -> 400 | Sym("!") -> 500 | Sym("(") -> 1000 | Sym("in") -> 70 // forces shift on let x=1 in x+x | Sym("=") -> 50 | Sym(")") -> 20 | EOF -> 0 // don't ever shift EOF | _ -> 20;; let binops = ["+";"-";"*";"/";"%";"||";"&&";"=="]; let uniops = ["-";"!";"cout"]; let inlist(x,L) = List.exists (fun y -> y=x) L let proper_expr = function // identify pre-parsing symbols | EOF -> false | Sym("cin") -> true | Sym(_) -> false | _ -> true; // assume all operators are left-associative, for now let mutable leftassoc = fun x -> true;; // change this later with: let leftassoc_base = leftassoc; leftassoc <- fun x -> // illustrates how to "extend" a method dynamically match x with | Sym("=") -> false // assignment operator is right-associative | _ -> leftassoc_base x;; // check for precedence and proper expressions let check(a,b,es) = match (a,b) with | (a,b) when a=b -> leftassoc(a) && List.forall(fun e -> proper_expr(e)) es | (a,b) -> let (pa,pb) = (precedence(a),precedence(b)); (pa >= pb) && List.forall(fun e -> proper_expr(e)) es let applicable = function // things that can be applied as functions | Var(_) | Lambda(_,_) | Apply(_,_) -> true | _ -> false let error_report (stack:expr list) (input:expr list) = let mutable report = sprintf "Parse error on reading token %A. Top of parse stack: " input.[0] let mutable i = 0 while i<4 && i ast // ACCEPT | (Sym(")")::e::Sym("(")::f::pst, la::inp) when (applicable f) && (proper_expr e) -> parse (Apply(f,e)::pst, la::inp) | (Sym(")")::e::Sym("(")::pst, la::inp) when check(Sym("("),la,[e]) -> parse (e::pst, la::inp) | (fcase::Sym("else")::tcase::cond::Sym("if")::pst, la::inp) when check(Sym("if"),la,[cond;tcase;fcase]) -> parse (Ternop("if",cond,tcase,fcase)::pst, la::inp) | (e2::Sym(s)::e1::pst, la::inp) when inlist(s,binops) && check(Sym(s),la,[e1;e2]) -> parse (Binop(s,e1,e2)::pst, la::inp) | (body::cond::Sym("while")::pst, la::inp) when check(Sym("while"),la,[body;cond]) -> parse (Binop("while",cond,body)::pst, la::inp) | (e::Sym("-")::pst, la::inp) when check(Sym("u-"),la,[e]) -> parse (Uniop("-",e)::pst, la::inp) // unary - has higher prec than * | (e::Sym(s)::pst, la::inp) when inlist(s,uniops) && check(Sym(s),la,[e]) -> parse (Uniop(s,e)::pst, la::inp) | (e::Sym("=")::Var(x)::Sym("define")::pst, la::inp) when check(Sym("="),la,[e]) -> parse (Define(x,e)::pst, la::inp) | (body::Sym("in")::init::Sym("=")::Var(x)::Sym("let")::pst, la::inp) when check(Sym("let"),la,[init;body]) -> parse (Let(x,init,body)::pst, la::inp) | (e::Sym("=")::Var(x)::pst, la::inp) when check(Sym("="),la,[e]) -> parse (Assign(lvalue=Var(x),rvalue=e)::pst, la::inp) | (body::Sym(":")::Var(x)::Sym("lambda")::pst, la::inp) when check(Sym("lambda"),la,[body]) -> parse(Lambda(x,body)::pst, la::inp) | (Sym(";")::e::(Sequence es)::pst, la::inp) when (proper_expr e) -> parse ((Sequence (e::es))::pst, la::inp) | (Sym(";")::e::pst, la::inp) when (proper_expr e) -> parse ((Sequence [e])::pst, la::inp) | (pst, la::inp) when la<>EOF -> parse (la::pst, inp) // shift | (p,i) -> RuntimeError(error_report p i);; /////// lexical tokenizer // function to convert C# lexToken structures to F# type expr let convert_token (token:lexToken) = match token.token_type with | "Integer" -> Num(token.token_value :?> int) // :?> downcasts from obj | "Alphanumeric" -> Var(token.token_value :?> string) | "Symbol" | "Keyword" -> Sym(token.token_value :?> string) | "StringLiteral" -> Str(token.token_value :?> string) | _ -> EOF;; // if can't tokenize, force end of stream // reverses list m while applying function f to each value let rec revmap f M stack = match (M,stack) with | ([],s) -> s | (a::b,s) -> revmap f b ((f a)::s);; let rec gettokens (lexer:simpleLexer) ax = let next_token = lexer.next() if next_token=null then ax else gettokens lexer (next_token::ax);; // collect all tokens into a list, which will enable pattern matching let tokenize(lexer:simpleLexer) = let tokens = gettokens lexer [] revmap convert_token tokens [EOF];; //////////////////////////// "main" let main(print_final_result:bool) = let argv = Environment.GetCommandLineArgs(); //command-line args let mutable lexer = null; // no choice but to use null here if argv.Length>1 then lexer <- simpleLexer(argv.[1],""); else // if no filename given, read from stdin let mutable inp = ""; // input let mutable input_line = "abc" while input_line <> null && input_line <> "EOF" do input_line <- Console.ReadLine() if input_line<>null && input_line<>"EOF" && (input_line.Length=0 || input_line.[0]<>'#') then inp <- inp + input_line lexer <- simpleLexer(inp); // create lexical tokenizer for k in ["cout";"cin";"define";"let";"in";"lambda"] do lexer.addKeyword(k) // keywords are recognized as symbols instead of variables (Sym, not Var) let input_tokens = tokenize lexer //printfn "input_tokens: %A" input_tokens //trace let ast = parse([],input_tokens) //printfn "\nAST: %A" ast // trace let result = eval [] ast if print_final_result then printfn "%A" result;; main(false); // run main