Error on unterminated strings.

author Joel Martin <github@martintribe.org>

Thu, 24 Jan 2019 18:40:36 +0000 (12:40 -0600)

committer Joel Martin <github@martintribe.org>

Fri, 25 Jan 2019 22:16:06 +0000 (16:16 -0600)
author Joel Martin <github@martintribe.org>
Thu, 24 Jan 2019 18:40:36 +0000 (12:40 -0600)
committer Joel Martin <github@martintribe.org>
Fri, 25 Jan 2019 22:16:06 +0000 (16:16 -0600)
diff --git a/awk/reader.awk b/awk/reader.awk

index 905ef52..a45e6e5 100644 (file)
--- a/awk/reader.awk
+++ b/awk/reader.awk
@@ -18,7 +18,11 @@ function reader_read_atom(token)
         case /^:/:
                 return ":" token
         case /^"/:
-               return reader_read_string(token)
+               if (token ~ /"$/) {
+                               return reader_read_string(token)
+               } else {
+                               return "!\"Expected '\"', got EOF."
+               }
         case /^-?[0-9]+$/:
                 return "+" token
         default:
@@ -147,7 +151,7 @@ function reader_read_from(reader,    current)
  
  function reader_tokenizer(str,    reader,    len, r)
  {
-       for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\[^\r\n]|[^\\"\r\n])*\"|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
+       for (len = 0; match(str, /^[ \t\r\n,]*(~@|[\[\]{}()'`~^@]|\"(\\[^\r\n]|[^\\"\r\n])*\"?|;[^\r\n]*|[^ \t\r\n\[\]{}('"`,;)^~@][^ \t\r\n\[\]{}('"`,;)]*)/, r); ) {
                 if (substr(r[1], 1, 1) != ";") {
                         reader[len++] = r[1]
                 }
diff --git a/basic/reader.in.bas b/basic/reader.in.bas

index ee2e864..2482646 100644 (file)
--- a/basic/reader.in.bas
+++ b/basic/reader.in.bas
@@ -166,7 +166,7 @@ SUB READ_FORM
    READ_STRING:
      REM PRINT "READ_STRING"
      C=ASC(MID$(T$,LEN(T$),1))
-    IF C<>34 THEN R=-1:ER=-1:E$="expected '"+CHR$(34)+"'":GOTO READ_FORM_RETURN
+    IF C<>34 THEN R=-1:ER=-1:E$="expected '"+CHR$(34)+"', got EOF":GOTO READ_FORM_RETURN
      R$=MID$(T$,2,LEN(T$)-2)
      S1$=CHR$(92)+CHR$(92):S2$=CHR$(127):GOSUB REPLACE: REM protect backslashes
      S1$=CHR$(92)+CHR$(34):S2$=CHR$(34):GOSUB REPLACE: REM unescape quotes
diff --git a/c/reader.c b/c/reader.c

index 2528b3a..45615c5 100644 (file)
--- a/c/reader.c
+++ b/c/reader.c
@@ -52,7 +52,7 @@ Reader *tokenize(char *line) {
  
      Reader *reader = reader_new();
  
-    regex = g_regex_new ("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)", 0, 0, &err);   
+    regex = g_regex_new ("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)", 0, 0, &err);
      g_regex_match (regex, line, 0, &matchInfo);
  
      if (err != NULL) {
@@ -89,7 +89,7 @@ MalVal *read_atom(Reader *reader) {
      token = reader_next(reader);
      //g_print("read_atom token: %s\n", token);
      
-    regex = g_regex_new ("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)", 0, 0, &err);
+    regex = g_regex_new ("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"?$|:(.*)|(^[^\"]*$)", 0, 0, &err);
      g_regex_match (regex, token, 0, &matchInfo);
  
      if (g_match_info_fetch_pos(matchInfo, 1, &pos, NULL) && pos != -1) {
@@ -109,6 +109,9 @@ MalVal *read_atom(Reader *reader) {
          atom = &mal_false;
      } else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) {
          //g_print("read_atom string: %s\n", token);
+        int end = strlen(token)-1;
+        if (token[end] != '"') { abort("expected '\"', got EOF"); }
+        token[end] = '\0';
          atom = malval_new_string(g_strcompress(g_match_info_fetch(matchInfo, 6)));
      } else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) {
          //g_print("read_atom keyword\n");
diff --git a/clojure/src/mal/reader.cljc b/clojure/src/mal/reader.cljc

index 93b2df3..9fb1941 100644 (file)
--- a/clojure/src/mal/reader.cljc
+++ b/clojure/src/mal/reader.cljc
@@ -17,6 +17,7 @@
  
  (def tok-re #"[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\].|[^\\\"])*\"?|;.*|[^\s\[\]{}()'\"`@,;]+)")
  (def int-re #"^-?[0-9]+$")
+(def badstr-re #"^\"(.*)[^\"]$")
  (def str-re #"^\"(.*)\"$")
  
  (defn tokenize [s]
@@ -32,14 +33,15 @@
  (defn read-atom [rdr]
    (let [token (rdr-next rdr)]
      (cond
-     (re-seq int-re token) #?(:cljs (js/parseInt token)
-                              :clj (Integer/parseInt token))
-     (re-seq str-re token) (unescape (second (re-find str-re token)))
-     (= \: (get token 0))  (keyword (subs token 1))
-     (= "nil" token)       nil
-     (= "true" token)      true
-     (= "false" token)     false
-     :else                 (symbol token))))
+     (re-seq int-re token)    #?(:cljs (js/parseInt token)
+                                 :clj (Integer/parseInt token))
+     (re-seq badstr-re token) (throw-str (str "expected '\"', got EOF"))
+     (re-seq str-re token)    (unescape (second (re-find str-re token)))
+     (= \: (get token 0))     (keyword (subs token 1))
+     (= "nil" token)          nil
+     (= "true" token)         true
+     (= "false" token)        false
+     :else                    (symbol token))))
  
  (declare read-form)
  
diff --git a/crystal/reader.cr b/crystal/reader.cr

index 3fbbd7c..5eca482 100644 (file)
--- a/crystal/reader.cr
+++ b/crystal/reader.cr
@@ -81,9 +81,11 @@ class Reader
      when token == "true"    then true
      when token == "false"   then false
      when token == "nil"     then nil
-    when token[0] == '"' then token[1..-2].gsub(/\\(.)/, {"\\\"" => "\"",
-                                                          "\\n"  => "\n",
-                                                          "\\\\" => "\\"})
+    when token[0] == '"'
+      parse_error "expected '\"', got EOF" if token[-1] != '"'
+      token[1..-2].gsub(/\\(.)/, {"\\\"" => "\"",
+                                  "\\n"  => "\n",
+                                  "\\\\" => "\\"})
      when token[0] == ':' then "\u029e#{token[1..-1]}"
      else                      Mal::Symbol.new token
      end
@@ -121,7 +123,7 @@ class Reader
  end
  
  def tokenize(str)
-  regex = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/
+  regex = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)/
    str.scan(regex).map { |m| m[1] }.reject(&.empty?)
  end
  
diff --git a/cs/reader.cs b/cs/reader.cs

index 60798e3..3136b90 100644 (file)
--- a/cs/reader.cs
+++ b/cs/reader.cs
@@ -39,7 +39,7 @@ namespace Mal {
  
          public static List<string> tokenize(string str) {
              List<string> tokens = new List<string>();
-            string pattern = @"[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""|;.*|[^\s \[\]{}()'""`~@,;]*)";
+            string pattern = @"[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""?|;.*|[^\s \[\]{}()'""`~@,;]*)";
              Regex regex = new Regex(pattern);
              foreach (Match match in regex.Matches(str)) {
                  string token = match.Groups[1].Value;
@@ -53,7 +53,7 @@ namespace Mal {
  
          public static MalVal read_atom(Reader rdr) {
              string token = rdr.next();
-            string pattern = @"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*"")$|:(.*)|(^[^""]*$)";
+            string pattern = @"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*)|:(.*)|(^[^""]*$)";
              Regex regex = new Regex(pattern);
              Match match = regex.Match(token);
              //Console.WriteLine("token: ^" + token + "$");
@@ -70,6 +70,9 @@ namespace Mal {
                  return Mal.types.False;
              } else if (match.Groups[6].Value != String.Empty) {
                  string str = match.Groups[6].Value;
+                if (str[str.Length-1] != '"') {
+                    throw new ParseError("expected '\"', got EOF");
+                }
                  str = str.Substring(1, str.Length-2)
                      .Replace("\\\\",   "\u029e")
                      .Replace("\\\"",   "\"")
diff --git a/d/reader.d b/d/reader.d

index 2f6ed44..19681cb 100644 (file)
--- a/d/reader.d
+++ b/d/reader.d
@@ -44,7 +44,7 @@ class Reader
      }
  }
  
-auto tokenize_ctr = ctRegex!(r"[\s,]*(~@|[\[\]{}()'`~^@]|" `"` `(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"` r"`,;)]*)");
+auto tokenize_ctr = ctRegex!(r"[\s,]*(~@|[\[\]{}()'`~^@]|" `"` `(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"` r"`,;)]*)");
  
  string[] tokenize(string str)
  {
@@ -87,6 +87,10 @@ MalType read_atom(Reader reader)
                  case ':':
                      return new MalString("\u029e" ~ token[1..$]);
                  case '"':
+                    if (token[$-1] != '"')
+                    {
+                        throw new Exception("expected '\"', got EOF");
+                    }
                      return parse_string(token);
                  default:
                      auto captures = matchFirst(token, integer_ctr);
diff --git a/dart/reader.dart b/dart/reader.dart

index e440ae0..fc6eb51 100644 (file)
--- a/dart/reader.dart
+++ b/dart/reader.dart
@@ -1,7 +1,7 @@
  import 'types.dart';
  
  final malRegExp = new RegExp(
-    r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)""");
+    r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)""");
  
  class Reader {
    final List<String> tokens;
@@ -115,6 +115,9 @@ MalType read_atom(Reader reader) {
    }
  
    if (token[0] == '"') {
+    if (token[token.length -1 ] != '"') {
+      throw new ParseException("expected '\"', got EOF");
+    }
      var sanitizedToken = token
          // remove surrounding quotes
          .substring(1, token.length - 1)
diff --git a/factor/lib/reader/reader.factor b/factor/lib/reader/reader.factor

index da72277..568eae0 100644 (file)
--- a/factor/lib/reader/reader.factor
+++ b/factor/lib/reader/reader.factor
@@ -4,19 +4,23 @@ USING: arrays combinators grouping hashtables kernel lists locals
  make lib.types math.parser regexp sequences splitting strings ;
  IN: lib.reader
  
-CONSTANT: token-regex R/ (~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)~^@]+)/
+CONSTANT: token-regex R/ (~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)~^@]+)/
  
  DEFER: read-form
  
  : (read-string) ( str -- maltype )
-    rest but-last R/ \\./ [
-        {
-            { [ dup >string "\\\\" = ] [ drop "\\" ] }
-            { [ dup >string "\\n"  = ] [ drop "\n" ] }
-            { [ dup >string "\\\"" = ] [ drop "\"" ] }
-            [ ]
-        } cond
-    ] re-replace-with ;
+    dup last CHAR: " = [
+        rest but-last R/ \\./ [
+            {
+                { [ dup >string "\\\\" = ] [ drop "\\" ] }
+                { [ dup >string "\\n"  = ] [ drop "\n" ] }
+                { [ dup >string "\\\"" = ] [ drop "\"" ] }
+                [ ]
+            } cond
+        ] re-replace-with
+    ] [
+        "expected '\"', got EOF" throw
+    ] if ;
  
  : (read-atom) ( str -- maltype )
      {
diff --git a/fantom/src/mallib/fan/reader.fan b/fantom/src/mallib/fan/reader.fan

index edf9fe1..d7a1f9c 100644 (file)
--- a/fantom/src/mallib/fan/reader.fan
+++ b/fantom/src/mallib/fan/reader.fan
@@ -18,7 +18,7 @@ class Reader
  {
    private static Str[] tokenize(Str s)
    {
-    r := Regex <|[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)|>
+    r := Regex <|[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)|>
      m := r.matcher(s)
      tokens := Str[,]
      while (m.find())
@@ -39,10 +39,14 @@ class Reader
    {
      token := reader.next
      intRegex := Regex <|^-?\d+$|>
+    strRegex := Regex <|^".*"|>
+    strBadRegex := Regex <|^".*|>
      if (token == "nil") return MalNil.INSTANCE
      if (token == "true") return MalTrue.INSTANCE
      if (token == "false") return MalFalse.INSTANCE
      if (intRegex.matches(token)) return MalInteger(token.toInt)
+    if (strRegex.matches(token)) return MalString.make(unescape_str(token[1..-2]))
+    if (strBadRegex.matches(token)) throw Err("expected '\"', got EOF")
      if (token[0] == '"') return MalString.make(unescape_str(token[1..-2]))
      if (token[0] == ':') return MalString.makeKeyword(token[1..-1])
      return MalSymbol(token)
diff --git a/go/src/reader/reader.go b/go/src/reader/reader.go

index dc0f54f..e16dca8 100644 (file)
--- a/go/src/reader/reader.go
+++ b/go/src/reader/reader.go
@@ -42,7 +42,7 @@ func tokenize(str string) []string {
         results := make([]string, 0, 1)
         // Work around lack of quoting in backtick
         re := regexp.MustCompile(`[\s,]*(~@|[\[\]{}()'` + "`" +
-               `~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"` + "`" +
+               `~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"` + "`" +
                 `,;)]*)`)
         for _, group := range re.FindAllStringSubmatch(str, -1) {
                 if (group[1] == "") || (group[1][0] == ';') {
@@ -66,6 +66,9 @@ func read_atom(rdr Reader) (MalType, error) {
                 }
                 return i, nil
         } else if (*token)[0] == '"' {
+               if (*token)[len(*token)-1] != '"' {
+                       return nil, errors.New("expected '\"', got EOF")
+               }
                 str := (*token)[1 : len(*token)-1]
                 return strings.Replace(
                         strings.Replace(
diff --git a/groovy/reader.groovy b/groovy/reader.groovy

index b47f357..8b168d1 100644 (file)
--- a/groovy/reader.groovy
+++ b/groovy/reader.groovy
@@ -29,7 +29,7 @@ class reader {
      }
  
      def static tokenizer(String str) {
-        def m = str =~ /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/
+        def m = str =~ /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)/
          def tokens = []
          while (m.find()) {
              String token = m.group(1)
@@ -44,7 +44,7 @@ class reader {
  
      def static read_atom(Reader rdr) {
          def token = rdr.next()
-        def m = token =~ /(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^"(.*)"$|:(.*)|(^[^"]*$)/
+        def m = token =~ /(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^"(.*)"$|^"(.*)$|:(.*)|(^[^"]*$)/
          if (!m.find()) {
              throw new MalException("unrecognized token '$token'")
          }
@@ -57,11 +57,16 @@ class reader {
          } else if (m.group(5) != null) {
              false
          } else if (m.group(6) != null) {
+            if (token[token.length() - 1] != '"') {
+                throw new MalException("expected '\"', got EOF")
+            }
              StringEscapeUtils.unescapeJava(m.group(6))
          } else if (m.group(7) != null) {
-            "\u029e" + m.group(7)
+            throw new MalException("expected '\"', got EOF")
          } else if (m.group(8) != null) {
-            new MalSymbol(m.group(8))
+            "\u029e" + m.group(8)
+        } else if (m.group(9) != null) {
+            new MalSymbol(m.group(9))
          } else {
              throw new MalException("unrecognized '${m.group(0)}'")
          }
diff --git a/haxe/reader/Reader.hx b/haxe/reader/Reader.hx

index d06f800..53d6b51 100644 (file)
--- a/haxe/reader/Reader.hx
+++ b/haxe/reader/Reader.hx
@@ -73,7 +73,7 @@ class Reader {
                                  "\n"),
                                "\""),
                              "\\"));
-            case _ if (re_str.match(token)):
+            case _ if (re_str_bad.match(token)):
                  throw 'expected \'"\', got EOF';
              case _:
                  MalSymbol(token);
diff --git a/hy/reader.hy b/hy/reader.hy

index 4c9bd1d..0bf9f9c 100644 (file)
--- a/hy/reader.hy
+++ b/hy/reader.hy
@@ -17,6 +17,8 @@
  
  (def tok-re (.compile re "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}()'\"`@,;]+)"))
  (def int-re (.compile re "-?[0-9]+$"))
+(def str-re (.compile re "^\".*\"$"))
+(def str-bad-re (.compile re "^\".*$"))
  
  (defn tokenize [str]
    (list-comp
@@ -34,7 +36,8 @@
    (setv token (.next rdr))
    (if
      (.match re int-re token) (int token)
-    (= "\"" (get token 0))   (Str (unescape (cut token 1 -1)))
+    (.match re str-re token) (Str (unescape (cut token 1 -1)))
+    (.match re str-bad-re token) (raise (Exception (+ "expected '\"', got EOF")))
      (= ":" (get token 0))    (Keyword token)
      (= "nil" token)          None
      (= "true" token)         True
diff --git a/io/MalReader.io b/io/MalReader.io

index b753913..354ec30 100644 (file)
--- a/io/MalReader.io
+++ b/io/MalReader.io
@@ -16,7 +16,7 @@ MalReader := Object clone do (
          )
      )
  
-    tokenizerRegex := Regex with("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)")
+    tokenizerRegex := Regex with("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)")
  
      tokenize := method(str,
          tokenizerRegex matchesIn(str) \
@@ -28,6 +28,7 @@ MalReader := Object clone do (
      numberRegex := Regex with("^-?[0-9]+$")
  
      read_string := method(token,
+        (token endsWithSeq("\"")) ifFalse(Exception raise("expected '\"', got EOF"))
          placeholder := 127 asCharacter
          token exSlice(1, -1) replaceSeq("\\\\", placeholder) replaceSeq("\\\"", "\"") replaceSeq("\\n", "\n") replaceSeq(placeholder, "\\")
      )
diff --git a/java/src/main/java/mal/reader.java b/java/src/main/java/mal/reader.java

index 7c9d3aa..41b9349 100644 (file)
--- a/java/src/main/java/mal/reader.java
+++ b/java/src/main/java/mal/reader.java
@@ -35,7 +35,7 @@ public class reader {
  
      public static ArrayList<String> tokenize(String str) {
          ArrayList<String> tokens = new ArrayList<String>();
-        Pattern pattern = Pattern.compile("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)");
+        Pattern pattern = Pattern.compile("[\\s ,]*(~@|[\\[\\]{}()'`~@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\\s \\[\\]{}()'\"`~@,;]*)");
          Matcher matcher = pattern.matcher(str);
          while (matcher.find()) {
              String token = matcher.group(1);
@@ -51,7 +51,7 @@ public class reader {
      public static MalVal read_atom(Reader rdr)
              throws ParseError {
          String token = rdr.next();
-        Pattern pattern = Pattern.compile("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)");
+        Pattern pattern = Pattern.compile("(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|^\"(.*)$|:(.*)|(^[^\"]*$)");
          Matcher matcher = pattern.matcher(token);
          if (!matcher.find()) {
              throw new ParseError("unrecognized token '" + token + "'");
@@ -67,9 +67,11 @@ public class reader {
          } else if (matcher.group(6) != null) {
              return new MalString(StringEscapeUtils.unescapeJson(matcher.group(6)));
          } else if (matcher.group(7) != null) {
-            return new MalString("\u029e" + matcher.group(7));
+            throw new ParseError("expected '\"', got EOF");
          } else if (matcher.group(8) != null) {
-            return new MalSymbol(matcher.group(8));
+            return new MalString("\u029e" + matcher.group(8));
+        } else if (matcher.group(9) != null) {
+            return new MalSymbol(matcher.group(9));
          } else {
              throw new ParseError("unrecognized '" + matcher.group(0) + "'");
          }
diff --git a/julia/reader.jl b/julia/reader.jl

index 29b5dc9..6763bce 100644 (file)
--- a/julia/reader.jl
+++ b/julia/reader.jl
@@ -26,7 +26,7 @@ end
  
  
  function tokenize(str)
-    re = r"[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:\\.|[^\\\"])*\"|;.*|[^\s\[\]{}('\"`,;)]*)"
+    re = r"[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:\\.|[^\\\"])*\"?|;.*|[^\s\[\]{}('\"`,;)]*)"
      tokens = map((m) -> m.captures[1], eachmatch(re, str))
      filter((t) -> t != "" && t[1] != ';', tokens)
  end
@@ -41,6 +41,8 @@ function read_atom(rdr)
          replace(token[2:end-1], r"\\.", (r) -> get(Dict("\\n"=>"\n",
                                                          "\\\""=>"\"",
                                                          "\\\\"=>"\\"), r, r))
+    elseif ismatch(r"^\".*$", token)
+        error("expected '\"', got EOF")
      elseif token[1] == ':'
          "\u029e$(token[2:end])"
      elseif token == "nil"
diff --git a/kotlin/src/mal/reader.kt b/kotlin/src/mal/reader.kt

index 3b23be5..48b258e 100644 (file)
--- a/kotlin/src/mal/reader.kt
+++ b/kotlin/src/mal/reader.kt
@@ -2,8 +2,8 @@ package mal
  
  import kotlin.text.Regex
  
-val TOKEN_REGEX = Regex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]*)")
-val ATOM_REGEX = Regex("(^-?[0-9]+$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)")
+val TOKEN_REGEX = Regex("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}('\"`,;)]*)")
+val ATOM_REGEX = Regex("(^-?[0-9]+$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|^\"(.*)$|:(.*)|(^[^\"]*$)")
  
  class Reader(sequence: Sequence<String>) {
      val tokens = sequence.iterator()
@@ -145,9 +145,11 @@ fun read_atom(reader: Reader): MalType {
                  else m.groups[1]?.value.toString()
              })
      } else if (groups[6]?.value != null) {
-        MalKeyword(groups[6]?.value as String)
+        throw MalReaderException("expected '\"', got EOF")
      } else if (groups[7]?.value != null) {
-        MalSymbol(groups[7]?.value as String)
+        MalKeyword(groups[7]?.value as String)
+    } else if (groups[8]?.value != null) {
+        MalSymbol(groups[8]?.value as String)
      } else {
          throw MalReaderException("Unrecognized token: " + next)
      }
diff --git a/livescript/reader.ls b/livescript/reader.ls

index 96aad57..0eba293 100644 (file)
--- a/livescript/reader.ls
+++ b/livescript/reader.ls
@@ -47,7 +47,7 @@ tokenizer = (str) ->
      [\s,]*                  # whitespace or commas
      ( ~@                    # special two-char ~@
      | [\[\]{}()'`~^@]       # special single char one of []{}'`~^@
-    | "(?:\\.| [^\\"])*"    # double-quoted string
+    | "(?:\\.| [^\\"])*"?   # double-quoted string
      | ;.*                   # any seq of chars starting ;
      | [^\s\[\]{}('"`,;)]+   # seq of non-special chars: symbols, numbers,
      )                       # "true", "false" and "nil".
@@ -112,6 +112,8 @@ read_atom = (reader) ->
      if token in constants
          {type: \const, value: reader.next!}
      else if token[0] == '"'
+        if not token.endsWith '"'
+            parse-error "expected '\"', got EOF"
          {type: \string, value: decode-string reader.next!}
      else if token.match /^-?\d+$/
          {type: \int, value: parseInt reader.next!}
diff --git a/logo/reader.lg b/logo/reader.lg

index dc4c851..049428d 100644 (file)
--- a/logo/reader.lg
+++ b/logo/reader.lg
@@ -84,7 +84,7 @@ while [not emptyp :rest] [
    make "w word :w :c
    make "rest butfirst :rest
  ]
-(throw "error [Expected closing quotes])
+(throw "error [Expected closing quotes, not EOF])
  end
  
  to read_next_token :s
diff --git a/make/reader.mk b/make/reader.mk

index 3f620a1..df635cf 100755 (executable)
--- a/make/reader.mk
+++ b/make/reader.mk
@@ -85,7 +85,7 @@ $(foreach ch,$(word 1,$($(1))),\
      $(call __string,$(strip $(call READ_STRING,$(1))))\
      $(eval $(if $(filter $(DQUOTE),$(word 1,$($(1)))),\
             $(eval $(1) := $(wordlist 2,$(words $($(1))),$($(1)))),\
-           $(call _error,Expected '$(DQUOTE)' in; $($(1))))),\
+           $(call _error,Expected '$(DQUOTE)' in; $($(1))$(COMMA) got EOF))),\
    $(if $(filter $(COLON),$(ch)),\
      $(eval $(1) := $(wordlist 2,$(words $($(1))),$($(1))))\
      $(call _keyword,$(call READ_KEYWORD,$(1))),\
diff --git a/matlab/reader.m b/matlab/reader.m

index 937c953..9bfc72b 100644 (file)
--- a/matlab/reader.m
+++ b/matlab/reader.m
@@ -2,7 +2,7 @@
  classdef reader
      methods (Static = true)
          function tokens = tokenize(str)
-            re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"|;[^\n]*|[^\s\[\]{}(''"`,;)]*)';
+            re = '[\s,]*(~@|[\[\]{}()''`~^@]|"(?:\\.|[^\\"])*"?|;[^\n]*|[^\s\[\]{}(''"`,;)]*)';
              % extract the capture group (to ignore spaces and commas)
              tokens = cellfun(@(x) x(1), regexp(str, re, 'tokens'));
              comments = cellfun(@(x) length(x) > 0 && x(1) == ';', tokens);
@@ -15,6 +15,9 @@ classdef reader
              if not(isempty(regexp(token, '^-?[0-9]+$', 'match')))
                  atm = str2double(token);
              elseif strcmp(token(1), '"')
+                if not(token(end) == '"')
+                    error('expected ''"'', got EOF');
+                end
                  atm = token(2:length(token)-1);
                  atm = strrep(atm, '\\', char(255));
                  atm = strrep(atm, '\"', '"');
diff --git a/miniMAL/reader.json b/miniMAL/reader.json

index dafa691..b1de4d9 100644 (file)
--- a/miniMAL/reader.json
+++ b/miniMAL/reader.json
@@ -24,7 +24,7 @@
        ["re-matches", "re", "strn", ["concat", "acc", "g1"]]]]]],
  
  ["def", "tokenize", ["fn", ["strn"],
-  ["let", ["re-str", ["`", "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]*)"],
+  ["let", ["re-str", ["`", "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}('\"`,;)]*)"],
             "re", ["RegExp", "re-str", ["`", "g"]]],
      [".",
        ["re-matches", "re", "strn", ["`", []]],
@@ -37,13 +37,15 @@
      ["if", [".", "token", ["`", "match"], ["RegExp", ["`", "^-?[0-9]+$"]]],
        ["parseInt", "token", 10],
      ["if", ["=", ["`", "\""], ["get", "token", 0]],
-      [".",
-        ["slice", "token", 1, ["-", ["count", "token"], 1]],
-        ["`", "replace"], ["RegExp", ["`", "\\\\(.)"], ["`", "g"]],
-                          ["fn", ["_", "c"],
-                            ["if", ["=", "c", ["`", "n"]],
-                              ["`", "\n"],
-                              "c"]]],
+      ["if", ["=", ["`", "\""], ["get", "token", ["-", ["count", "token"], 1]]],
+        [".",
+          ["slice", "token", 1, ["-", ["count", "token"], 1]],
+          ["`", "replace"], ["RegExp", ["`", "\\\\(.)"], ["`", "g"]],
+                            ["fn", ["_", "c"],
+                              ["if", ["=", "c", ["`", "n"]],
+                                ["`", "\n"],
+                                "c"]]],
+        ["throw", ["`", "expected '\"', got EOF"]]],
      ["if", ["=", ["`", ":"], ["get", "token", 0]],
        ["keyword", ["slice", "token", 1]],
      ["if", ["=", ["`", "nil"], "token"],
diff --git a/nim/reader.nim b/nim/reader.nim

index 6bc2997..7c61dbd 100644 (file)
--- a/nim/reader.nim
+++ b/nim/reader.nim
@@ -1,7 +1,7 @@
  import re, strutils, sequtils, types
  
  let
-  tokenRE = re"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)"""
+  tokenRE = re"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)"""
    intRE   = re"-?[0-9]+$"
  
  type
@@ -61,7 +61,9 @@ proc read_hash_map(r: var Reader): MalType =
  proc read_atom(r: var Reader): MalType =
    let t = r.next
    if t.match(intRE): number t.parseInt
-  elif t[0] == '"':  str t[1 .. <t.high].multiReplace(("\\\"", "\""), ("\\n", "\n"), ("\\\\", "\\"))
+  elif t[0] == '"':
+    if t[^1] != '"': raise newException(ValueError, "expected '\"', got EOF")
+    str t[1 .. <t.high].multiReplace(("\\\"", "\""), ("\\n", "\n"), ("\\\\", "\\"))
    elif t[0] == ':':  keyword t[1 .. t.high]
    elif t == "nil":   nilObj
    elif t == "true":  trueObj
diff --git a/objc/reader.m b/objc/reader.m

index 2755ffd..cbb8f13 100644 (file)
--- a/objc/reader.m
+++ b/objc/reader.m
@@ -70,7 +70,7 @@ NSArray * tokenize(NSString *str) {
  
  NSObject * read_atom(Reader * rdr) {
      NSRegularExpression *regex = [NSRegularExpression
-        regularExpressionWithPattern:@"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|:(.*)|(^[^\"]*$)"
+        regularExpressionWithPattern:@"(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^\"(.*)\"$|^\"(.*)$|:(.*)|(^[^\"]*$)"
          options:0
          error:NULL];
      NSNumberFormatter *numf = [[NSNumberFormatter alloc] init];
@@ -103,10 +103,12 @@ NSObject * read_atom(Reader * rdr) {
                        stringByReplacingOccurrencesOfString:@"\\\"" withString:@"\""]
                       stringByReplacingOccurrencesOfString:@"\\n" withString:@"\n"]
                      stringByReplacingOccurrencesOfString:@"\u029e" withString:@"\\"];
-        } else if ([match rangeAtIndex:7].location < -1ULL/2) { // keyword
+        } else if ([match rangeAtIndex:7].location < -1ULL/2) { // string
+            @throw @"read_atom: expected '\"', got EOF";
+        } else if ([match rangeAtIndex:8].location < -1ULL/2) { // keyword
              return [NSString stringWithFormat:@"\u029e%@",
-                    [token substringWithRange:[match rangeAtIndex:7]]];
-        } else if ([match rangeAtIndex:8].location < -1ULL/2) { // symbol
+                    [token substringWithRange:[match rangeAtIndex:8]]];
+        } else if ([match rangeAtIndex:9].location < -1ULL/2) { // symbol
              return [MalSymbol stringWithString:token];
          }
      }
diff --git a/objpascal/reader.pas b/objpascal/reader.pas

index d1766a1..ad4bdd9 100644 (file)
--- a/objpascal/reader.pas
+++ b/objpascal/reader.pas
@@ -91,7 +91,7 @@ var
      Str    : string;
  begin
      RE := TRegExpr.Create;
-    RE.Expression := '(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^(\".*\")$|:(.*)|(^[^\"]*$)';
+    RE.Expression := '(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^(\".*\")$|^(\".*)$|:(.*)|(^[^\"]*$)';
      Token := Reader.Next();
      //WriteLn('token: ' + Token);
      if RE.Exec(Token) then
@@ -117,8 +117,10 @@ begin
              read_atom := TMalString.Create(Str)
          end
          else if RE.Match[7] <> '' then
-            read_atom := TMalString.Create(#127 + RE.Match[7])
+            raise Exception.Create('expected ''"'', got EOF')
          else if RE.Match[8] <> '' then
+            read_atom := TMalString.Create(#127 + RE.Match[8])
+        else if RE.Match[9] <> '' then
              read_atom := TMalSymbol.Create(Token);
      end
      else
diff --git a/ocaml/reader.ml b/ocaml/reader.ml

index fa009e2..24cd1e9 100644 (file)
--- a/ocaml/reader.ml
+++ b/ocaml/reader.ml
@@ -18,7 +18,7 @@ let gsub re f str =
      "" (List.map (function | Str.Delim x -> f x | Str.Text x -> x)
                   (Str.full_split re str))
  
-let token_re = (Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"\\|;.*\\|[^][  \n{}('\"`,;)]*")
+let token_re = (Str.regexp "~@\\|[][{}()'`~^@]\\|\"\\(\\\\.\\|[^\"]\\)*\"?\\|;.*\\|[^][  \n{}('\"`,;)]*")
  
  type reader = {
    form : Types.mal_type;
@@ -43,11 +43,15 @@ let read_atom token =
                    | _ -> (match token.[1] with
                              | '0'..'9' -> T.Int (int_of_string token)
                              | _ -> Types.symbol token))
-      | '"' -> T.String (gsub (Str.regexp "\\\\.")
-                              (function
-                                | "\\n" -> "\n"
-                                | x -> String.sub x 1 1)
-                              (String.sub token 1 ((String.length token) - 2)))
+      | '"' -> (match token.[String.length token - 1] with
+                  | '"' -> T.String (gsub (Str.regexp "\\\\.")
+                                          (function
+                                            | "\\n" -> "\n"
+                                            | x -> String.sub x 1 1)
+                                          (String.sub token 1 ((String.length token) - 2)))
+                  | _ -> output_string stderr ("expected '\"', got EOF\n");
+                          flush stderr;
+                          raise End_of_file)
        | ':' -> T.Keyword (Str.replace_first (Str.regexp "^:") "" token)
        | _ -> Types.symbol token
  
diff --git a/php/reader.php b/php/reader.php

index 3408b1a..055dee9 100644 (file)
--- a/php/reader.php
+++ b/php/reader.php
@@ -27,7 +27,7 @@ function _real_token($s) {
  }
  
  function tokenize($str) {
-    $pat = "/[\s,]*(php\/|~@|[\[\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\s\[\]{}('\"`,;)]*)/";
+    $pat = "/[\s,]*(php\/|~@|[\[\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\s\[\]{}('\"`,;)]*)/";
      preg_match_all($pat, $str, $matches);
      return array_values(array_filter($matches[1], '_real_token'));
  }
@@ -37,6 +37,9 @@ function read_atom($reader) {
      if (preg_match("/^-?[0-9]+$/", $token)) {
          return intval($token, 10);
      } elseif ($token[0] === "\"") {
+        if (substr($token, -1) !== "\"") {
+            throw new Exception("expected '\"', got EOF");
+        }
          $str = substr($token, 1, -1);
          $str = str_replace('\\\\', chr(0x7f), $str);
          $str = str_replace('\\"', '"', $str);
diff --git a/plpgsql/reader.sql b/plpgsql/reader.sql

index cf1d8fd..4210961 100644 (file)
--- a/plpgsql/reader.sql
+++ b/plpgsql/reader.sql
@@ -5,7 +5,7 @@ CREATE SCHEMA reader;
  
  CREATE FUNCTION reader.tokenize(str varchar) RETURNS varchar[] AS $$
  DECLARE
-    re varchar = E'[[:space:] ,]*(~@|[\\[\\]{}()\'`~@]|"(?:[\\\\].|[^\\\\"])*"|;[^\n]*|[^\\s \\[\\]{}()\'"`~@,;]*)';
+    re varchar = E'[[:space:] ,]*(~@|[\\[\\]{}()\'`~@]|"(?:[\\\\].|[^\\\\"])*"?|;[^\n]*|[^\\s \\[\\]{}()\'"`~@,;]*)';
  BEGIN
      RETURN ARRAY(SELECT tok FROM
          (SELECT (regexp_matches(str, re, 'g'))[1] AS tok) AS x
@@ -44,6 +44,8 @@ BEGIN
          str := replace(str, '\n', E'\n');
          str := replace(str, chr(CAST(x'7f' AS integer)), E'\\');
          result := types._stringv(str);
+    ELSIF token ~ '^".*' THEN  -- unclosed string
+        RAISE EXCEPTION 'expected ''"'', got EOF';
      ELSIF token ~ '^:.*' THEN  -- keyword
          -- keyword
          result := types._keywordv(substring(token FROM 2 FOR (char_length(token)-1)));
diff --git a/plsql/reader.sql b/plsql/reader.sql

index b48e7c0..e5e37cf 100644 (file)
--- a/plsql/reader.sql
+++ b/plsql/reader.sql
@@ -44,7 +44,7 @@ CREATE OR REPLACE PACKAGE BODY reader AS
  -- tokenize:
  -- takes a string and returns a nested table of token strings
  FUNCTION tokenize(str varchar) RETURN tokens IS
-    re      varchar2(100) := '[[:space:] ,]*(~@|[][{}()''`~@]|"(([\].|[^\"])*)"|;[^' || chr(10) || ']*|[^][[:space:] {}()''"`~@,;]*)';
+    re      varchar2(100) := '[[:space:] ,]*(~@|[][{}()''`~@]|"(([\].|[^\"])*)"?|;[^' || chr(10) || ']*|[^][[:space:] {}()''"`~@,;]*)';
      tok     CLOB;
      toks    tokens := tokens();
      cnt     integer;
@@ -90,6 +90,9 @@ BEGIN
          str := REPLACE(str, '\n', chr(10));
          str := REPLACE(str, '\\', chr(92));
          result := types.string(M, str);
+    ELSIF REGEXP_LIKE(token, '^".*') THEN  -- unclosed string
+        raise_application_error(-20003,
+            'expected ''"'', got EOF', TRUE);
      ELSIF REGEXP_LIKE(token, '^:.*') THEN  -- keyword
           -- keyword
           result := types.keyword(M, SUBSTR(token, 2, LENGTH(token)-1));
@@ -127,7 +130,7 @@ BEGIN
          token := rdr.peek();
          IF token IS NULL THEN
              raise_application_error(-20003,
-                'expected ''' || last || '''', TRUE);
+                'expected ''' || last || ''', got EOF', TRUE);
          END IF;
          IF token = last THEN EXIT; END IF;
          items.EXTEND();
diff --git a/powershell/reader.psm1 b/powershell/reader.psm1

index b957ca2..18d8f48 100644 (file)
--- a/powershell/reader.psm1
+++ b/powershell/reader.psm1
@@ -20,7 +20,7 @@ Class Reader {
  
  
  function tokenize {
-    $r = [regex]"[\s,]*(~@|[\[\]{}()'``~^@]|`"(?:\\.|[^\\`"])*`"|;.*|[^\s\[\]{}('`"``,;)]*)"
+    $r = [regex]"[\s,]*(~@|[\[\]{}()'``~^@]|`"(?:\\.|[^\\`"])*`"?|;.*|[^\s\[\]{}('`"``,;)]*)"
      $r.Matches($args) | 
          Where-Object { $_.Groups.Item(1).Value.Length -gt 0 -and
                         $_.Groups.Item(1).Value[0] -ne ";" } |
@@ -38,6 +38,8 @@ function read_atom([Reader] $rdr) {
          $s = $s -replace "\\n", "`n"
          $s = $s -replace "$([char]0x29e)", "\"
          return $s
+    } elseif ($token -match "^`".*") {
+        throw "expected '`"', got EOF"
      } elseif ($token -match ":.*") {
          return "$([char]0x29e)$($token.substring(1))"
      } elseif ($token -eq "true") {
diff --git a/process/guide.md b/process/guide.md

index aba0022..49bfb3b 100644 (file)
--- a/process/guide.md
+++ b/process/guide.md
@@ -318,7 +318,7 @@ expression support.
    of all the tokens (strings) in it. The following regular expression
    (PCRE) will match all mal tokens.
  ```
-[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)
+[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)
  ```
  * For each match captured within the parenthesis starting at char 6 of the
    regular expression a new token will be created.
@@ -331,9 +331,11 @@ expression support.
    * ```[\[\]{}()'`~^@]```: Captures any special single character, one of
      ```[]{}()'`~^@``` (tokenized).
  
-  * `"(?:\\.|[^\\"])*"`: Starts capturing at a double-quote and stops at the
+  * `"(?:\\.|[^\\"])*"?`: Starts capturing at a double-quote and stops at the
      next double-quote unless it was proceeded by a backslash in which case it
-    includes it until the next double-quote (tokenized).
+    includes it until the next double-quote (tokenized). It will also
+    match unbalanced strings (no ending double-quote) which should be
+    reported as an error.
  
    * `;.*`: Captures any sequence of characters starting with `;` (tokenized).
  
diff --git a/r/reader.r b/r/reader.r

index b2729f8..be163e6 100644 (file)
--- a/r/reader.r
+++ b/r/reader.r
@@ -21,7 +21,7 @@ Reader.next <- function(rdr) {
  }
  
  tokenize <- function(str) {
-    re <- "[\\s,]*(~@|[\\[\\]\\{\\}\\(\\)'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]\\{\\}\\('\"`,;\\)]*)"
+    re <- "[\\s,]*(~@|[\\[\\]\\{\\}\\(\\)'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]\\{\\}\\('\"`,;\\)]*)"
      m <- lapply(regmatches(str, gregexpr(re, str, perl=TRUE)), 
                  function(e) sub("^[\\s,]+", "", e, perl=TRUE))
      res <- list()
@@ -43,6 +43,9 @@ read_atom <- function(rdr) {
      } else if (re_match("^-?[0-9][0-9.]*$", token)) {
          as.double(token)
      } else if (substr(token,1,1) == "\"") {
+        if (substr(token, nchar(token), nchar(token)) != "\"") {
+            throw("expected '\"', got EOF")
+        }
          gsub("\x7f", "\\\\",
              gsub("\\\\n", "\n",
                   gsub("\\\\\"", "\"",
diff --git a/racket/reader.rkt b/racket/reader.rkt

index 1e7f5fa..93e49ef 100644 (file)
--- a/racket/reader.rkt
+++ b/racket/reader.rkt
@@ -22,7 +22,7 @@
  
  (define (tokenize str)
    (filter-not (lambda (s) (or (equal? s "") (equal? (substring s 0 1) ";")))
-    (regexp-match* #px"[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)"
+    (regexp-match* #px"[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)"
                     str #:match-select cadr)))
  
  (define (read_atom rdr)
@@ -33,6 +33,8 @@
             (string->number token)]
            [(regexp-match #px"^\".*\"$" token)
             (with-input-from-string token read)]
+          [(regexp-match #px"^\".*$" token)
+           (raise "expected '\"', got EOF")]
            [(regexp-match #px"^:" token) (_keyword (substring token 1))]
            [(equal? "nil" token) nil]
            [(equal? "true" token) #t]
diff --git a/rexx/reader.rexx b/rexx/reader.rexx

index 619e034..9443865 100644 (file)
--- a/rexx/reader.rexx
+++ b/rexx/reader.rexx
@@ -114,7 +114,13 @@ read_atom: procedure expose values. tokens. pos /* read_atom() */
      when token == "true" then return new_true()
      when token == "false" then return new_false()
      when substr(token, 1, 1) == ':' then return new_keyword(parse_keyword(token))
-    when substr(token, 1, 1) == '"' then return new_string(parse_string(token))
+    when substr(token, 1, 1) == '"' then do
+      if substr(token, length(token), 1) \== '"' then do
+        err = "expected '" || end_char || "', got EOF"
+        return "ERR"
+      end
+      return new_string(parse_string(token))
+    end
      otherwise
        return new_symbol(token)
      end
diff --git a/rpython/reader.py b/rpython/reader.py

index 4f5e3b4..b9208be 100644 (file)
--- a/rpython/reader.py
+++ b/rpython/reader.py
@@ -28,7 +28,7 @@ class Reader():
              return None
  
  def tokenize(str):
-    re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
+    re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"?|;.*|[^\s\[\]{}()'\"`@,;]+)"
      if IS_RPYTHON:
          tok_re = re_str
      else:
@@ -47,8 +47,10 @@ def read_atom(reader):
  ##    elif re.match(float_re, token): return int(token)
      elif token[0] == '"':
          end = len(token)-1
-        if end < 2:
+        if end == 1:
              return MalStr(u"")
+        elif end < 1 or token[end] != '"':
+            types.throw_str("expected '\"', got EOF")
          else:
              s = unicode(token[1:end])
              s = types._replace(u'\\\\',   u"\u029e", s)
diff --git a/ruby/reader.rb b/ruby/reader.rb

index 446f7ae..34d9fbe 100644 (file)
--- a/ruby/reader.rb
+++ b/ruby/reader.rb
@@ -16,7 +16,7 @@ end
  
  
  def tokenize(str)
-    re = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/
+    re = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)/
      return str.scan(re).map{|m| m[0]}.select{ |t|
          t != "" && t[0..0] != ";"
      }
@@ -32,6 +32,7 @@ def read_atom(rdr)
          when /^-?[0-9]+$/ then       token.to_i # integer
          when /^-?[0-9][0-9.]*$/ then token.to_f # float
          when /^".*"$/ then           parse_str(token) # string
+        when /^".*$/ then            raise "expected '\"', got EOF"
          when /^:/ then               "\u029e" + token[1..-1] # keyword
          when "nil" then              nil
          when "true" then             true
diff --git a/scala/reader.scala b/scala/reader.scala

index 891e677..cbd9ec5 100644 (file)
--- a/scala/reader.scala
+++ b/scala/reader.scala
@@ -19,7 +19,7 @@ object reader {
    }
  
    def tokenize(str: String): Array[String] = {
-    val re = """[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)""".r
+    val re = """[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)""".r
      re.findAllMatchIn(str).map{ _.group(1) }
                            .filter{ s => s != "" && s(0) != ';' }
                            .toArray
@@ -38,11 +38,14 @@ object reader {
      val re_int = """^(-?[0-9]+)$""".r
      val re_flt = """^(-?[0-9][0-9.]*)$""".r
      val re_str =  """^"(.*)"$""".r
+    val re_str_bad =  """^"(.*)$""".r
      val re_key = """^:(.*)$""".r
      return token match {
        case re_int(i) => i.toLong      // integer
        case re_flt(f) => f.toDouble    // float
        case re_str(s) => parse_str(s)  // string
+      case re_str_bad(s) =>
+        throw new Exception("expected '\"', got EOF")
        case re_key(k) => "\u029e" + k  // keyword
        case "nil"     => null
        case "true"    => true
diff --git a/skew/reader.sk b/skew/reader.sk

index 0aa723b..f383fdb 100644 (file)
--- a/skew/reader.sk
+++ b/skew/reader.sk
@@ -17,7 +17,7 @@ class Reader {
  }
  
  def tokenize(str string) List<string> {
-  var re = RegExp.new("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;.*|[^\\s\\[\\]{}('\"`,;)]*)", "g")
+  var re = RegExp.new("[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"?|;.*|[^\\s\\[\\]{}('\"`,;)]*)", "g")
    var tokens List<string> = []
    var match string
    while (match = re.exec(str)[1]) != "" {
diff --git a/swift/reader.swift b/swift/reader.swift

index 195cfe3..9e4cadf 100644 (file)
--- a/swift/reader.swift
+++ b/swift/reader.swift
@@ -14,7 +14,7 @@ private let token_pattern =
          "|" +
          "[\\[\\]{}()`'~^@]" +               // Punctuation: Any one of []{}()`'~^@
          "|" +
-        "\"(?:\\\\.|[^\\\\\"])*\"" +        // Quoted string: characters other than \ or ", or any escaped characters
+        "\"(?:\\\\.|[^\\\\\"])*\"?" +       // Quoted string: characters other than \ or ", or any escaped characters
          "|" +
          ";.*" +                             // Comment: semicolon followed by anything
          "|" +
@@ -36,6 +36,8 @@ private let atom_pattern =
      "|" +
      "(^\".*\"$)" +              // String
      "|" +
+    "(^\".*$)" +              // Invalid/unclosed string
+    "|" +
      "(:.*)" +                   // Keyword
      "|" +
      "(^[^\"]*$)"                // Symbol
@@ -112,9 +114,11 @@ private func read_atom(token: String) throws -> MalVal {
              return make_false()
          } else if have_match(match, at_index: 7) {         // String
              return make_string(unescape(token))
-        } else if have_match(match, at_index: 8) {         // Keyword
+        } else if have_match(match, at_index: 8) {         // Invalid/unclosed string
+            try throw_error("expected '\"', got EOF")
+        } else if have_match(match, at_index: 9) {         // Keyword
              return make_keyword(token[token.startIndex.successor() ..< token.endIndex])
-        } else if have_match(match, at_index: 9) {         // Symbol
+        } else if have_match(match, at_index: 10) {        // Symbol
              return make_symbol(token)
          }
      }
diff --git a/swift3/Sources/reader.swift b/swift3/Sources/reader.swift

index 66cef2d..f729dd6 100644 (file)
--- a/swift3/Sources/reader.swift
+++ b/swift3/Sources/reader.swift
@@ -74,7 +74,7 @@ func read_string(_ rdr: Reader) throws -> MalVal {
          if rdr.str[cidx] == "\"" { break }
          cidx = rdr.pos
      }
-    if rdr.pos > rdr.str.endIndex {
+    if rdr.str[rdr.str.index(before: rdr.pos)] != "\"" {
          throw MalError.Reader(msg: "Expected '\"', got EOF")
      }
      let matchStr = rdr.str.substring(with: 
diff --git a/tcl/reader.tcl b/tcl/reader.tcl

index b5b4a6f..ee7a7ec 100644 (file)
--- a/tcl/reader.tcl
+++ b/tcl/reader.tcl
@@ -18,7 +18,7 @@ oo::class create Reader {
  }
  
  proc tokenize str {
-    set re {[\s,]*(~@|[\[\]\{\}()'`~^@]|\"(?:\\.|[^\\\"])*\"|;.*|[^\s\[\]\{\}('\"`~^@,;)]*)}
+    set re {[\s,]*(~@|[\[\]\{\}()'`~^@]|\"(?:\\.|[^\\\"])*\"?|;.*|[^\s\[\]\{\}('\"`~^@,;)]*)}
      set tokens {}
      foreach {_ capture} [regexp -line -all -inline $re $str] {
          if {[string length $capture] > 0 && [string range $capture 0 0] != ";"} {
@@ -84,6 +84,7 @@ proc read_atom {reader} {
          ^false$    { return $::mal_false }
          ^:         { return [keyword_new [parse_keyword $token]] }
          ^\".*\"$   { return [string_new [parse_string $token]] }
+        ^\".*$     { error "expected '\"', got EOF" }
          default    { return [symbol_new $token] }
      }
  }
diff --git a/tests/step1_read_print.mal b/tests/step1_read_print.mal

index 266b016..a40e9db 100644 (file)
--- a/tests/step1_read_print.mal
+++ b/tests/step1_read_print.mal
@@ -82,11 +82,11 @@ false
  
  ;;; These should throw some error with no return value
  "abc
-;/.+
+;/.*(EOF|end of input|unbalanced).*
  (1 "abc
-;/.+
+;/.*(EOF|end of input|unbalanced).*
  (1 "abc"
-;/.+
+;/.*(EOF|end of input|unbalanced).*
  
  ;; Testing read of quoting
  '1
diff --git a/ts/reader.ts b/ts/reader.ts

index 6a900ab..107823e 100644 (file)
--- a/ts/reader.ts
+++ b/ts/reader.ts
@@ -23,7 +23,7 @@ export function readStr(input: string): MalType {
  }
  
  function tokenizer(input: string): string[] {
-    const regexp = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"|;.*|[^\s\[\]{}('"`,;)]*)/g;
+    const regexp = /[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)/g;
      const tokens: string[] = [];
      while (true) {
          const matches = regexp.exec(input);
@@ -123,6 +123,9 @@ function readAtom(reader: Reader): MalType {
          return new MalNumber(v);
      }
      if (token[0] === '"') {
+        if (token.slice(-1) !== '"') {
+            throw new Error("expected '\"', got EOF");
+        }
          const v = token.slice(1, token.length - 1)
              .replace(/\\(.)/g, (_, c: string) => c == 'n' ? '\n' : c)
          return new MalString(v);
diff --git a/vb/reader.vb b/vb/reader.vb

index 89b2c64..b0a56fa 100644 (file)
--- a/vb/reader.vb
+++ b/vb/reader.vb
@@ -48,7 +48,7 @@ Namespace Mal
  
          Shared Function tokenize(str As String) As List(Of String)
              Dim tokens As New List(Of String)
-            Dim pattern As String = "[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""|;.*|[^\s \[\]{}()'""`~@,;]*)"
+            Dim pattern As String = "[\s ,]*(~@|[\[\]{}()'`~@]|""(?:[\\].|[^\\""])*""?|;.*|[^\s \[\]{}()'""`~@,;]*)"
              Dim regex As New Regex(pattern)
              For Each match As Match In regex.Matches(str)
                  Dim token As String = match.Groups(1).Value
@@ -64,7 +64,7 @@ Namespace Mal
  
          Shared Function read_atom(rdr As Reader) As MalVal
              Dim token As String = rdr.get_next()
-            Dim pattern As String = "(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*"")$|^:(.*)|(^[^""]*$)"
+            Dim pattern As String = "(^-?[0-9]+$)|(^-?[0-9][0-9.]*$)|(^nil$)|(^true$)|(^false$)|^("".*)|^:(.*)|(^[^""]*$)"
              Dim regex As Regex = New Regex(pattern)
              Dim match As Match = regex.Match(token)
              'Console.WriteLine("token: ^" + token + "$")
@@ -81,6 +81,9 @@ Namespace Mal
                  return Mal.types.MalFalse
              Else If match.Groups(6).Value <> String.Empty Then
                  Dim str As String = match.Groups(6).Value
+                If str(str.Length-1) <> """" Then
+                    throw New ParseError("expected '""', got EOF")
+                End If
                  return New Mal.types.MalString(
                          str.Substring(1, str.Length-2) _
                          .Replace("\\",         ChrW(&H029e)) _
diff --git a/vhdl/reader.vhdl b/vhdl/reader.vhdl

index b197884..d4f7e2b 100644 (file)
--- a/vhdl/reader.vhdl
+++ b/vhdl/reader.vhdl
@@ -191,7 +191,7 @@ package body reader is
      deallocate(s);
    end procedure unescape_string_token;
  
-  procedure read_atom(r: inout reader_class; result: out mal_val_ptr) is
+  procedure read_atom(r: inout reader_class; result: out mal_val_ptr; err: out mal_val_ptr) is
      variable token, s: line;
      variable num: integer;
      variable ch: character;
@@ -221,6 +221,11 @@ package body reader is
            s(1 to s'length) := token(2 to token'length);
            new_keyword(s, result);
          when '"' =>
+          if token(token'length) /= '"' then
+            new_string("expected '""', got EOF", err);
+            result := null;
+            return;
+          end if;
            unescape_string_token(token, s);
            new_string(s, result);
          when others =>
@@ -328,7 +333,7 @@ package body reader is
        when ']' => new_string("unexcepted ']'", err);
        when '{' => read_sequence(mal_hashmap, "}", r, result, err);
        when '}' => new_string("unexcepted '}'", err);
-      when others => read_atom(r, result);
+      when others => read_atom(r, result, err);
      end case;
    end procedure read_form;
  
diff --git a/wasm/reader.wam b/wasm/reader.wam

index 0a46736..548cacf 100644 (file)
--- a/wasm/reader.wam
+++ b/wasm/reader.wam
@@ -253,7 +253,7 @@
          (local.set $slen ($strlen (i32.add $tok 1)))
          (if (i32.ne (i32.load8_u (i32.add $tok $slen)) (CHR "\""))
            (then
-            ($THROW_STR_0 "expected '\"'")
+            ($THROW_STR_0 "expected '\"', got EOF")
              (return 0))
            (else
              ;; unescape backslashes, quotes, and newlines
diff --git a/yorick/reader.i b/yorick/reader.i

index 6da51d4..571192e 100644 (file)
--- a/yorick/reader.i
+++ b/yorick/reader.i
@@ -1,7 +1,7 @@
  #include "yeti_regex.i"
  require, "types.i"
  
-TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1)
+TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"?|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1)
  
  func tokenize(str)
  {
@@ -45,6 +45,8 @@ func reader_next(rdr)
  }
  
  NUMBER_REGEXP = regcomp("^-?[0-9]+$")
+STR_REGEXP = regcomp("^\".*\"$")
+STR_BAD_REGEXP = regcomp("^\".*$")
  
  func unescape(s)
  {
@@ -62,7 +64,8 @@ func read_atom(rdr)
    else if (token == "true") return MAL_TRUE
    else if (token == "false") return MAL_FALSE
    else if (regmatch(NUMBER_REGEXP, token)) return MalNumber(val=tonum(token))
-  else if (strpart(token, 1:1) == "\"") return MalString(val=unescape(token))
+  else if (regmatch(STR_REGEXP, token)) return MalString(val=unescape(token))
+  else if (regmatch(STR_BAD_REGEXP, token)) return MalError(message=("expected '\"', got EOF"))
    else if (strpart(token, 1:1) == ":") return MalKeyword(val=strpart(token, 2:))
    else return MalSymbol(val=token)
  }
author	Joel Martin <github@martintribe.org>
	Thu, 24 Jan 2019 18:40:36 +0000 (12:40 -0600)
committer	Joel Martin <github@martintribe.org>
	Fri, 25 Jan 2019 22:16:06 +0000 (16:16 -0600)
awk/reader.awk		patch \| blob \| blame \| history
basic/reader.in.bas		patch \| blob \| blame \| history
c/reader.c		patch \| blob \| blame \| history
clojure/src/mal/reader.cljc		patch \| blob \| blame \| history
crystal/reader.cr		patch \| blob \| blame \| history
cs/reader.cs		patch \| blob \| blame \| history
d/reader.d		patch \| blob \| blame \| history
dart/reader.dart		patch \| blob \| blame \| history
factor/lib/reader/reader.factor		patch \| blob \| blame \| history
fantom/src/mallib/fan/reader.fan		patch \| blob \| blame \| history
go/src/reader/reader.go		patch \| blob \| blame \| history
groovy/reader.groovy		patch \| blob \| blame \| history
haxe/reader/Reader.hx		patch \| blob \| blame \| history
hy/reader.hy		patch \| blob \| blame \| history
io/MalReader.io		patch \| blob \| blame \| history
java/src/main/java/mal/reader.java		patch \| blob \| blame \| history
julia/reader.jl		patch \| blob \| blame \| history
kotlin/src/mal/reader.kt		patch \| blob \| blame \| history
livescript/reader.ls		patch \| blob \| blame \| history
logo/reader.lg		patch \| blob \| blame \| history
make/reader.mk		patch \| blob \| blame \| history
matlab/reader.m		patch \| blob \| blame \| history
miniMAL/reader.json		patch \| blob \| blame \| history
nim/reader.nim		patch \| blob \| blame \| history
objc/reader.m		patch \| blob \| blame \| history
objpascal/reader.pas		patch \| blob \| blame \| history
ocaml/reader.ml		patch \| blob \| blame \| history
php/reader.php		patch \| blob \| blame \| history
plpgsql/reader.sql		patch \| blob \| blame \| history
plsql/reader.sql		patch \| blob \| blame \| history
powershell/reader.psm1		patch \| blob \| blame \| history
process/guide.md		patch \| blob \| blame \| history
r/reader.r		patch \| blob \| blame \| history
racket/reader.rkt		patch \| blob \| blame \| history
rexx/reader.rexx		patch \| blob \| blame \| history
rpython/reader.py		patch \| blob \| blame \| history
ruby/reader.rb		patch \| blob \| blame \| history
scala/reader.scala		patch \| blob \| blame \| history
skew/reader.sk		patch \| blob \| blame \| history
swift/reader.swift		patch \| blob \| blame \| history
swift3/Sources/reader.swift		patch \| blob \| blame \| history
tcl/reader.tcl		patch \| blob \| blame \| history
tests/step1_read_print.mal		patch \| blob \| blame \| history
ts/reader.ts		patch \| blob \| blame \| history
vb/reader.vb		patch \| blob \| blame \| history
vhdl/reader.vhdl		patch \| blob \| blame \| history
wasm/reader.wam		patch \| blob \| blame \| history
yorick/reader.i		patch \| blob \| blame \| history