From 42198578566be256bbdebf22757f41edef4aa6ee Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Sun, 23 Jul 2006 23:57:40 +0000 Subject: [PATCH] Parsing expressions --- src/.cvsignore | 3 + src/ast.sml | 77 ++++++++++++++++++++++++ src/dataStructures.sml | 37 ++++++++++++ src/domtool.cm | 15 +++++ src/domtool.grm | 98 +++++++++++++++++++++++++++++++ src/domtool.lex | 129 +++++++++++++++++++++++++++++++++++++++++ src/errormsg.sig | 23 ++++++++ src/errormsg.sml | 48 +++++++++++++++ 8 files changed, 430 insertions(+) create mode 100644 src/.cvsignore create mode 100644 src/ast.sml create mode 100644 src/dataStructures.sml create mode 100644 src/domtool.cm create mode 100644 src/domtool.grm create mode 100644 src/domtool.lex create mode 100644 src/errormsg.sig create mode 100644 src/errormsg.sml diff --git a/src/.cvsignore b/src/.cvsignore new file mode 100644 index 0000000..3d38035 --- /dev/null +++ b/src/.cvsignore @@ -0,0 +1,3 @@ +.cm +*.lex.* +*.grm.* diff --git a/src/ast.sml b/src/ast.sml new file mode 100644 index 0000000..3fdc22f --- /dev/null +++ b/src/ast.sml @@ -0,0 +1,77 @@ +(* HCoop Domtool (http://hcoop.sourceforge.net/) + * Copyright (c) 2006, Adam Chlipala + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*) + +(* Configuration language abstract syntax *) + +structure Ast = struct + +open DataStructures + +(* A description of a predicate on configuration block stacks *) +datatype context' = + CRoot + (* The stack is empty. *) + | CConst of string + (* The given context name is on top of the stack. *) + | CPrefix of context + (* Some prefix of the stack matches the context. *) + | CNot of context + (* The context does not match. *) + | CAnd of context * context + (* Both contexts match. *) +withtype context = context' * position + +datatype typ' = + TBase of string + (* Base type *) + | TList of typ + (* SML 'a list *) + | TArrow of typ * typ + (* SML -> *) + | TAction of context * record * record + (* An action that: + * - Is valid in the given context + * - Expects an environment compatible with the first record + * - Modifies it according to the second record *) +withtype typ = typ' * position + and record = typ StringMap.map + +datatype exp' = + EInt of int + (* Constant integer *) + | EString of string + (* Constant string *) + | EList of exp list + (* Basic list constructor *) + + | ELam of string * typ * exp + (* Function abstraction *) + | EApp of exp * exp + (* Function application *) + + | ESet of string * exp + (* Set an environment variable *) + | ESeq of exp list + (* Monad sequencer; execute a number of commands in order *) + | ELocal of exp + (* Local execution; execute the action and then restore the previous + * environment. *) +withtype exp = exp' * position + + +end diff --git a/src/dataStructures.sml b/src/dataStructures.sml new file mode 100644 index 0000000..75531fa --- /dev/null +++ b/src/dataStructures.sml @@ -0,0 +1,37 @@ +(* HCoop Domtool (http://hcoop.sourceforge.net/) + * Copyright (c) 2006, Adam Chlipala + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*) + +(* Some useful data structures *) + +structure DataStructures = struct + +type position = int * int + +structure StringOrdKey = struct +type ord_key = string +val compare = String.compare +end + +structure StringMap = BinaryMapFn(StringOrdKey) +structure StringSet = BinarySetFn(StringOrdKey) + +datatype 'a hierarchy = + Leaf of 'a + | Internal of 'a hierarchy StringMap.map + +end diff --git a/src/domtool.cm b/src/domtool.cm new file mode 100644 index 0000000..a3ee84f --- /dev/null +++ b/src/domtool.cm @@ -0,0 +1,15 @@ +Group is + +$/basis.cm +$/smlnj-lib.cm +$/ml-yacc-lib.cm + +errormsg.sig +errormsg.sml + +dataStructures.sml + +ast.sml + +domtool.lex +domtool.grm diff --git a/src/domtool.grm b/src/domtool.grm new file mode 100644 index 0000000..79d13a1 --- /dev/null +++ b/src/domtool.grm @@ -0,0 +1,98 @@ +(* HCoop Domtool (http://hcoop.sourceforge.net/) + * Copyright (c) 2006, Adam Chlipala + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*) + +(* Parser for Domtool configuration files *) +open Ast + +%% +%header (functor DomtoolLrValsFn(structure Token : TOKEN)) + +%term + EOF + | SYMBOL of string | CSYMBOL of string + | STRING of string + | INT of int + | ARROW | DARROW | COLON + | LPAREN | RPAREN | LBRACK | RBRACK | LBRACE | RBRACE + | EQ | COMMA | BSLASH | SEMI | LET | IN | END + + +%nonterm + file of exp + | exp of exp + | apps of exp + | term of exp + | elist of exp list + | elistNe of exp list + | clist of exp list + | typ of typ + +%verbose (* print summary of errors *) +%pos int (* positions *) +%start file +%pure +%eop EOF +%noshift EOF + +%name Domtool + +%right SEMI +%nonassoc IN +%right ARROW +%right COMMA +%nonassoc EQ + +%% + +file : exp (exp) + +exp : apps (apps) + | BSLASH SYMBOL COLON typ ARROW exp (ELam (SYMBOL, typ, exp), (BSLASHleft, expright)) + | CSYMBOL EQ exp (ESet (CSYMBOL, exp), (CSYMBOLleft, expright)) + | exp SEMI exp (let + val ls = case #1 exp2 of + ESeq ls => exp :: ls + | _ => [exp1, exp2] + in + (ESeq ls, (exp1left, exp2right)) + end) + +apps : term (term) + | apps term (EApp (apps, term), (appsleft, termright)) + +term : LPAREN exp RPAREN (exp) + | INT (EInt INT, (INTleft, INTright)) + | STRING (EString STRING, (STRINGleft, STRINGright)) + | LBRACK elist RBRACK (EList elist, (LBRACKleft, RBRACKright)) + | LET exp IN exp END (let + val ls = case (#1 exp1, #1 exp2) of + (ESeq ls1, ESeq ls2) => ls1 @ ls2 + | (ESeq ls, _) => ls @ [exp2] + | (_, ESeq ls) => exp1 :: ls + | _ => [exp1, exp2] + in + (ESeq ls, (exp1left, exp2right)) + end) + +elist : ([]) + | elistNe (elistNe) + +elistNe: exp ([exp]) + | exp COMMA elistNe (exp :: elistNe) + +typ : SYMBOL (TBase SYMBOL, (SYMBOLleft, SYMBOLright)) diff --git a/src/domtool.lex b/src/domtool.lex new file mode 100644 index 0000000..91af342 --- /dev/null +++ b/src/domtool.lex @@ -0,0 +1,129 @@ +(* HCoop Domtool (http://hcoop.sourceforge.net/) + * Copyright (c) 2006, Adam Chlipala + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*) + +(* Lexer for Domtool configuration files *) + +type pos = int +type svalue = Tokens.svalue +type ('a,'b) token = ('a,'b) Tokens.token +type lexresult = (svalue,pos) Tokens.token + +val lineNum = ErrorMsg.lineNum +val linePos = ErrorMsg.linePos + +local + val commentLevel = ref 0 + val commentPos = ref 0 +in + fun enterComment yypos = (commentLevel := !commentLevel + 1; commentPos := yypos) + + fun exitComment () = + let val _ = commentLevel := !commentLevel - 1 in + !commentLevel = 0 + end + + fun eof () = + let + val pos = hd (!linePos) + in + if (!commentLevel > 0) then + (ErrorMsg.error (SOME (!commentPos,!commentPos)) "Unterminated comment") + else (); + Tokens.EOF (pos,pos) + end +end + +val str = ref ([] : char list) +val strStart = ref 0 + +%% +%header (functor DomtoolLexFn(structure Tokens : Domtool_TOKENS)); +%full +%s COMMENT STRING; + +id = [a-z_][A-Za-z0-9_]*; +cid = [A-Z][A-Za-z0-9_]*; +intconst = [0-9]+; +ws = [\ \t\012]; +lineComment = #[^\n]*\n; + +%% + + \n => (lineNum := !lineNum + 1; + linePos := yypos :: ! linePos; + continue ()); + \n => (lineNum := !lineNum + 1; + linePos := yypos :: ! linePos; + continue ()); + + {ws}+ => (lex ()); + + lineComment => (lex ()); + + "(*" => (YYBEGIN COMMENT; enterComment yypos; continue()); + "*)" => (ErrorMsg.error (SOME (yypos, yypos)) "Unbalanced comments"; + continue()); + + "(*" => (enterComment yypos; continue()); + "*)" => (if exitComment () then YYBEGIN INITIAL else (); + continue()); + + "\"" => (YYBEGIN STRING; strStart := yypos; str := []; continue()); + "\\\"" => (str := #"\"" :: !str; continue()); + "\"" => (YYBEGIN INITIAL; + Tokens.STRING (String.implode (List.rev (!str)), !strStart, yypos + 1)); + "\n" => (lineNum := !lineNum + 1; + linePos := yypos :: ! linePos; + str := #"\n" :: !str; continue()); + . => (str := String.sub (yytext, 0) :: !str; continue()); + + "(" => (Tokens.LPAREN (yypos, yypos + size yytext)); + ")" => (Tokens.RPAREN (yypos, yypos + size yytext)); + + "[" => (Tokens.LBRACK (yypos, yypos + size yytext)); + "]" => (Tokens.RBRACK (yypos, yypos + size yytext)); + + "{" => (Tokens.LBRACE (yypos, yypos + size yytext)); + "}" => (Tokens.RBRACE (yypos, yypos + size yytext)); + + "->" => (Tokens.ARROW (yypos, yypos + size yytext)); + "=>" => (Tokens.DARROW (yypos, yypos + size yytext)); + + "=" => (Tokens.EQ (yypos, yypos + size yytext)); + "," => (Tokens.COMMA (yypos, yypos + size yytext)); + "\\" => (Tokens.BSLASH (yypos, yypos + size yytext)); + ":" => (Tokens.COLON (yypos, yypos + size yytext)); + ";" => (Tokens.SEMI (yypos, yypos + size yytext)); + + "let" => (Tokens.LET (yypos, yypos + size yytext)); + "in" => (Tokens.IN (yypos, yypos + size yytext)); + "end" => (Tokens.END (yypos, yypos + size yytext)); + + {id} => (Tokens.SYMBOL (yytext, yypos, yypos + size yytext)); + {cid} => (Tokens.CSYMBOL (yytext, yypos, yypos + size yytext)); + {intconst} => (case Int.fromString yytext of + SOME x => Tokens.INT (x, yypos, yypos + size yytext) + | NONE => (ErrorMsg.error (SOME (yypos, yypos)) + ("Expected int, received: " ^ yytext); + continue ())); + + . => (continue()); + + . => (ErrorMsg.error (SOME (yypos,yypos)) + ("illegal character: \"" ^ yytext ^ "\""); + continue ()); diff --git a/src/errormsg.sig b/src/errormsg.sig new file mode 100644 index 0000000..5404f2e --- /dev/null +++ b/src/errormsg.sig @@ -0,0 +1,23 @@ +(* This file comes mostly from "Modern Compiler Implementation in ML," by Andrew Appel + * http://www.cs.princeton.edu/~appel/modern/ml/ + *) + +signature ERRORMSG = + sig + val reset : unit -> unit + + val anyErrors : bool ref + val errorText : string ref + + val fileName : string ref + val sourceStream : TextIO.instream ref + + val lineNum : int ref + val linePos : int list ref + + val error : (int * int) option -> string -> unit + + val dummyLoc : int * int + + exception Error +end diff --git a/src/errormsg.sml b/src/errormsg.sml new file mode 100644 index 0000000..25b7161 --- /dev/null +++ b/src/errormsg.sml @@ -0,0 +1,48 @@ +(* This file comes mostly from "Modern Compiler Implementation in ML," by Andrew Appel + * http://www.cs.princeton.edu/~appel/modern/ml/ + *) + +structure ErrorMsg :> ERRORMSG = + struct + (* Initial values of compiler state variables *) + val anyErrors = ref false + val errorText = ref "" + val fileName = ref "" + val lineNum = ref 1 + val linePos = ref [1] + val sourceStream = ref TextIO.stdIn + + fun print msg = (errorText := !errorText ^ msg; + TextIO.print msg) + + (* Reset compiler to initial state *) + fun reset() = (anyErrors:=false; + errorText:=""; + fileName:=""; + lineNum:=1; + linePos:=[1]; + sourceStream:=TextIO.stdIn) + + (* Print the given error message *) + fun error posopt (msg:string) = + let + val (startpos, endpos) = Option.getOpt (posopt, (0, 0)) + fun look(pos,a::rest,n) = + if a