(* This file converts a file containing a PCF term into an abstract *) (* syntax tree suitable for interpreting. The function "parsefile" *) (* takes a file name as input and returns an abstract syntax tree. *) (* *) (* Created by Jon Riecke for Programming Paradigms Short Course. *) (* 3/7/98: Modified by Jay Sachs for SML 110. Added parsestr *) datatype term = AST_ID of string | AST_NUM of int | AST_BOOL of bool | AST_FUN of (string * term) | AST_APP of (term * term) | AST_SUCC | AST_PRED | AST_ISZERO | AST_IF of (term * term * term) | AST_REC of (string * term) | AST_ERROR datatype token = ID of string | NUM of int | IFSYM | THENSYM | ELSESYM | TRUESYM | FALSESYM | SUCCSYM | PREDSYM | ISZEROSYM | FNSYM | RECSYM | EQUAL | LPAREN | RPAREN | FNARROW | LETSYM | INSYM | EOF signature PCFLEXER = sig val lex : string -> token list val lexstr : string -> token list end structure PCFlexer: PCFLEXER = struct open TextIO; fun skipwhite strm = if (endOfStream strm) then () else case (lookahead strm) of (SOME(#" ")) => (input1 strm;(skipwhite strm)) | (SOME(#"\n")) => (input1 strm;(skipwhite strm)) | (SOME(#"\t")) => (input1 strm;(skipwhite strm)) | _ => () fun alpha str = ((ord str > 64) andalso (ord str < 91)) orelse ((ord str > 96) andalso (ord str < 123)) fun numeric str = ((ord str > 47) andalso (ord str < 58)) fun alphanum str = (alpha str) orelse (numeric str) fun getid strm id = case (lookahead strm) of NONE => id | (SOME(c)) => if (alphanum c) then (input1 strm;getid strm (id ^ (str(c)))) else id fun getnum strm num = case (lookahead strm) of NONE => num | (SOME(c)) => if (numeric c) then (input1 strm;getnum strm (num*10 + ((ord c)-48))) else num fun nextsymbol strm = case (skipwhite strm; input1 strm) of NONE => EOF | (SOME(c)) => if (alpha c) then let val id = (getid strm (str c)) in case id of "fn" => FNSYM | "if" => IFSYM | "then" => THENSYM | "else" => ELSESYM | "true" => TRUESYM | "false" => FALSESYM | "succ" => SUCCSYM | "pred" => PREDSYM | "iszero" => ISZEROSYM | "rec" => RECSYM | "let" => LETSYM | "in" => INSYM | _ => (ID id) end else if (numeric c) then (NUM (getnum strm ((ord c) - 48))) else case c of #"=" => (case (lookahead strm) of (SOME(#">")) => (input1 strm; FNARROW) | _ => EQUAL) | #"(" => LPAREN | #")" => RPAREN | _ => (print ("Skipping illegal character "^ (str c) ^"."); nextsymbol strm) fun lexit strm = let fun gettokens lst = let val nxt = (nextsymbol strm) in if nxt = EOF then (closeIn strm; rev (EOF::lst)) else gettokens (nxt::lst) end in gettokens [] end fun lexstr str = lexit (openString str) fun lex file = lexit (openIn file) end; (* Now define the parsing part of the program, which takes a list of *) (* tokens and returns an abstract syntax tree. *) (* Turn off the checking for case statements, since this will take a *) (* very long time for this program! *) (* #CheckMatch Debug := false; -- put back in if run on Mac *) signature PCFPARSER = sig val parse : token list -> term end structure PCFparser : PCFPARSER = struct fun error (msg:string) = print msg fun parseit [] = [] | parseit [EOF] = [] | parseit ((ID v)::tl) = (AST_ID v)::(parseit tl) | parseit ((NUM n)::tl) = (AST_NUM n)::(parseit tl) | parseit (TRUESYM::tl) = (AST_BOOL true)::(parseit tl) | parseit (FALSESYM::tl) = (AST_BOOL false)::(parseit tl) | parseit (SUCCSYM::tl) = AST_SUCC::(parseit tl) | parseit (PREDSYM::tl) = AST_PRED::(parseit tl) | parseit (ISZEROSYM::tl)= AST_ISZERO::(parseit tl) | parseit (FNSYM::(ID v)::FNARROW::tl) = [AST_FUN (v,parse tl)] | parseit (RECSYM::(ID v)::FNARROW::tl) = [AST_REC (v,parse tl)] | parseit (LPAREN::tl) = parse_paren tl [LPAREN] [] | parseit (IFSYM::tl) = parse_if tl [IFSYM] [] | parseit (LETSYM::tl) = parse_let tl [LETSYM] [] | parseit _ = (error "Fatal error in parse---got too confused.\n"; [AST_ERROR]) and parse_paren (RPAREN::tl) [] lst = (error "Unbalanced parentheses.\n"; [AST_ERROR]) | parse_paren (RPAREN::tl) [LPAREN] [] = (error "Empty expression between parentheses.\n"; [AST_ERROR]) | parse_paren (RPAREN::tl) [LPAREN] lst = (parse (rev lst)) :: (parseit tl) | parse_paren (RPAREN::tl) (LPAREN::stk) lst = parse_paren tl stk (RPAREN::lst) | parse_paren (LPAREN::tl) stk lst = parse_paren tl (LPAREN::stk) (LPAREN::lst) | parse_paren (hd::tl) stk lst = parse_paren tl stk (hd::lst) | parse_paren _ _ _ = (error "Unbalanced parentheses.\n"; [AST_ERROR]) and parse_if (THENSYM::tl) [] lst = (error "No matching IF for THEN.\n"; [AST_ERROR]) | parse_if (THENSYM::tl) [IFSYM] [] = (error "Empty boolean expression in IF.\n"; [AST_ERROR]) | parse_if (THENSYM::tl) [IFSYM] lst = parse_then tl [THENSYM,IFSYM] [] lst | parse_if (THENSYM::tl) (IFSYM::stk) lst = parse_if tl stk (THENSYM::lst) | parse_if (IFSYM::tl) stk lst = parse_if tl (IFSYM::stk) (IFSYM::lst) | parse_if (x::tl) stk lst = parse_if tl stk (x::lst) | parse_if [] stk lst = (error "No matching THEN for IF.\n"; [AST_ERROR]) and parse_then (ELSESYM::tl) [] lst blst = (error "No matching IF for ELSE.\n"; [AST_ERROR]) | parse_then [ELSESYM] [THENSYM,IFSYM] [] blst = (error "Empty ELSE expression.\n"; [AST_ERROR]) | parse_then (ELSESYM::tl) [THENSYM,IFSYM] [] blst = (error "Empty THEN expression.\n"; [AST_ERROR]) | parse_then (ELSESYM::tl) [THENSYM,IFSYM] lst blst = [AST_IF (parse (rev blst), parse (rev lst), parse tl)] | parse_then (ELSESYM::tl) (THENSYM::IFSYM::stk) lst blst = parse_then tl stk (ELSESYM::lst) blst | parse_then (IFSYM::tl) stk lst blst = parse_then tl (IFSYM::stk) (IFSYM::lst) blst | parse_then (THENSYM::tl) (IFSYM::stk) lst blst = parse_then tl (THENSYM::IFSYM::stk) (THENSYM::lst) blst | parse_then (THENSYM::tl) (THENSYM::stk) lst blst = (error "No matching IF for THEN.\n"; [AST_ERROR]) | parse_then (x::tl) stk lst blst = parse_then tl stk (x::lst) blst | parse_then _ _ _ _ = (error "Got confused trying to parse a THEN clause.\n"; [AST_ERROR]) and parse_let (INSYM::tl) [] lst = (error "No matching LET for IN.\n"; [AST_ERROR]) | parse_let (INSYM::tl) [LETSYM] [] = (error "Empty val expression in LET.\n"; [AST_ERROR]) | parse_let (INSYM::tl) [LETSYM] lst = let_to_fun (rev lst) tl | parse_let (INSYM::tl) (LETSYM::stk) lst = parse_let tl stk (INSYM::lst) | parse_let (LETSYM::tl) stk lst = parse_let tl (LETSYM::stk) (LETSYM::lst) | parse_let (x::tl) stk lst = parse_let tl stk (x::lst) | parse_let [] stk lst = (error "No matching IN for LET.\n"; [AST_ERROR]) and let_to_fun ((ID v)::EQUAL::tl) exp = [AST_APP( AST_FUN(v, parse exp), parse tl)] | let_to_fun _ _ = (error "Val clause missing vble = at beginning.\n"; [AST_ERROR]) and makeapp rator [] = rator | makeapp rator (arg1::args) = makeapp (AST_APP (rator,arg1)) args and parse lst = let val trmlst = (parseit lst) in case trmlst of [] => (error "Fatal error---missing an expression.\n"; AST_ERROR) | (hd::tl) => makeapp hd tl end end; (* #CheckMatch Debug := true; -- put back in if run on Mac *) (* The final definition of the function puts the pieces together. *) fun parsefile str = PCFparser.parse (PCFlexer.lex str); fun parsestr str = PCFparser.parse (PCFlexer.lexstr str)