All: fix read/print of \\, and \n

author Joel Martin <github@martintribe.org>

Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)

committer Joel Martin <github@martintribe.org>

Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)
author Joel Martin <github@martintribe.org>
Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)
committer Joel Martin <github@martintribe.org>
Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)
diff --git a/awk/reader.awk b/awk/reader.awk

index 948ff2a..95824ee 100644 (file)
--- a/awk/reader.awk
+++ b/awk/reader.awk
@@ -1,20 +1,10 @@
  function reader_read_string(token,    v, r)
  {
         token = substr(token, 1, length(token) - 1)
-       while (match(token, /\\["n\\]?/, r)) {
-               switch (r[0]) {
-               case "\\":
-                       return "!\"Invalid escape character '" substr(token, RSTART, 2) "'."
-               case "\\n":
-                       v = v substr(token, 1, RSTART - 1) "\n"
-                       break
-               default:
-                       v = v substr(token, 1, RSTART - 1) substr(r[0], 2, 1)
-                       break
-               }
-               token = substr(token, RSTART + RLENGTH)
-       }
-       return v token
+       gsub(/\\\\/, "\\", token)
+       gsub(/\\"/, "\"", token)
+       gsub(/\\n/, "\n", token)
+       return token
  }
  
  function reader_read_atom(token)
diff --git a/bash/printer.sh b/bash/printer.sh

index ca40c77..125b568 100644 (file)
--- a/bash/printer.sh
+++ b/bash/printer.sh
@@ -42,7 +42,8 @@ _raw_string_pr_str () {
          r=":${s:2}"
      elif [ "${print_readably}" == "yes" ]; then
          s="${s//\\/\\\\}"
-        r="\"${s//\"/\\\"}\""
+        s="${s//\"/\\\"}"
+        r="\"${s//$'\n'/\\n}\""
      else
          r="${s}"
      fi
diff --git a/bash/reader.sh b/bash/reader.sh

index 3fcd36c..359a6a0 100644 (file)
--- a/bash/reader.sh
+++ b/bash/reader.sh
@@ -13,7 +13,9 @@ READ_ATOM () {
      case "${token}" in
          [0-9]*) _number "${token}" ;;
          \"*)    token="${token:1:-1}"
+                token="${token//\\\\/\\}"
                  token="${token//\\\"/\"}"
+                token="${token//\\n/$'\n'}"
                  _string "${token}" ;;
          :*)     _keyword "${token:1}" ;;
          nil)    r="${__nil}" ;;
diff --git a/c/reader.c b/c/reader.c

index ae16321..107ec4c 100644 (file)
--- a/c/reader.c
+++ b/c/reader.c
@@ -78,36 +78,12 @@ Reader *tokenize(char *line) {
  }
  
  
-// From http://creativeandcritical.net/str-replace-c/ - Laird Shaw
  char *replace_str(const char *str, const char *old, const char *new)
  {
-    char *ret, *r;
-    const char *p, *q;
-    size_t oldlen = strlen(old);
-    size_t count, retlen, newlen = strlen(new);
-
-    if (oldlen != newlen) {
-        for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen)
-            count++;
-        /* this is undefined if p - str > PTRDIFF_MAX */
-        retlen = p - str + strlen(p) + count * (newlen - oldlen);
-    } else
-        retlen = strlen(str);
-
-    if ((ret = malloc(retlen + 1)) == NULL)
-        return NULL;
-
-    for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) {
-        /* this is undefined if q - p > PTRDIFF_MAX */
-        ptrdiff_t l = q - p;
-        memcpy(r, p, l);
-        r += l;
-        memcpy(r, new, newlen);
-        r += newlen;
-    }
-    strcpy(r, p);
-
-    return ret;
+    GRegex *reg = g_regex_new (old, 0, 0, NULL);
+    char *str_tmp = g_regex_replace_literal(reg, str, -1, 0, new, 0, NULL);
+    free(reg);
+    return str_tmp;
  }
  
  
@@ -142,8 +118,12 @@ MalVal *read_atom(Reader *reader) {
          atom = &mal_false;
      } else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) {
          //g_print("read_atom string: %s\n", token);
-        char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\"", "\"");
-        atom = malval_new_string(str_tmp);
+        char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\\\"", "\"");
+        char *str_tmp2 = replace_str(str_tmp, "\\\\n", "\n");
+        free(str_tmp);
+        char *str_tmp3 = replace_str(str_tmp2, "\\\\\\\\", "\\");
+        free(str_tmp2);
+        atom = malval_new_string(str_tmp3);
      } else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) {
          //g_print("read_atom keyword\n");
          atom = malval_new_keyword(g_match_info_fetch(matchInfo, 7));
diff --git a/coffee/reader.coffee b/coffee/reader.coffee

index 83d24d2..1c9ab3e 100644 (file)
--- a/coffee/reader.coffee
+++ b/coffee/reader.coffee
@@ -26,6 +26,7 @@ read_atom = (rdr) ->
      token.slice(1, token.length-1)
        .replace(/\\"/g, '"')
        .replace(/\\n/g, "\n")
+      .replace(/\\\\/g, "\\")
    else if token[0] == ':' then types._keyword(token[1..])
    else if token == "nil" then null
    else if token == "true" then true
diff --git a/crystal/reader.cr b/crystal/reader.cr

index 74a5605..60d86a0 100644 (file)
--- a/crystal/reader.cr
+++ b/crystal/reader.cr
@@ -82,6 +82,8 @@ class Reader
      when token == "false"   then false
      when token == "nil"     then nil
      when token[0] == '"'    then token[1..-2].gsub(/\\"/, "\"")
+                                             .gsub(/\\n/, "\n")
+                                             .gsub(/\\\\/, "\\")
      when token[0] == ':'    then "\u029e#{token[1..-1]}"
      else                         Mal::Symbol.new token
      end
diff --git a/cs/reader.cs b/cs/reader.cs

index 10973aa..a644309 100644 (file)
--- a/cs/reader.cs
+++ b/cs/reader.cs
@@ -72,7 +72,8 @@ namespace Mal {
                  string str = match.Groups[6].Value;
                  str = str.Substring(1, str.Length-2)
                      .Replace("\\\"", "\"")
-                    .Replace("\\n", "\n");
+                    .Replace("\\n", "\n")
+                    .Replace("\\\\", "\\");
                  return new Mal.types.MalString(str);
              } else if (match.Groups[7].Value != String.Empty) {
                  return new Mal.types.MalString("\u029e" + match.Groups[7].Value);
diff --git a/elixir/lib/mal/reader.ex b/elixir/lib/mal/reader.ex

index 13ea507..59365c7 100644 (file)
--- a/elixir/lib/mal/reader.ex
+++ b/elixir/lib/mal/reader.ex
@@ -87,6 +87,8 @@ defmodule Mal.Reader do
          token
            |> String.slice(1..-2)
            |> String.replace("\\\"", "\"")
+          |> String.replace("\\n", "\n")
+          |> String.replace("\\\\", "\\")
  
        integer?(token) ->
          Integer.parse(token)
diff --git a/es6/reader.js b/es6/reader.js

index 84bd38d..98b9861 100644 (file)
--- a/es6/reader.js
+++ b/es6/reader.js
@@ -32,7 +32,8 @@ function read_atom (reader) {
      } else if (token[0] === "\"") {
          return token.slice(1,token.length-1)
              .replace(/\\"/g, '"')
-            .replace(/\\n/g, "\n"); // string
+            .replace(/\\n/g, "\n")
+            .replace(/\\\\/g, "\\"); // string
      } else if (token[0] === ":") {
          return _keyword(token.slice(1));
      } else if (token === "nil") {
diff --git a/factor/mal/printer/printer.factor b/factor/mal/printer/printer.factor

index 8a540d9..0235dc2 100644 (file)
--- a/factor/mal/printer/printer.factor
+++ b/factor/mal/printer/printer.factor
@@ -16,6 +16,7 @@ M: string (pr-str)
      [
          "\\" "\\\\" replace
          "\"" "\\\"" replace
+        "\n" "\\n" replace
          "\"" dup surround
      ] when ;
  M: array (pr-str) '[ _ (pr-str) ] map " " join "(" ")" surround ;
diff --git a/factor/mal/reader/reader.factor b/factor/mal/reader/reader.factor

index 69380af..cd2fe8b 100644 (file)
--- a/factor/mal/reader/reader.factor
+++ b/factor/mal/reader/reader.factor
@@ -10,7 +10,9 @@ DEFER: read-form
  
  : (read-atom) ( str -- maltype )
      {
-        { [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace ] }
+        { [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace
+                                                  "\\n"  "\n" replace
+                                                  "\\\\" "\\" replace ] }
          { [ dup first CHAR: : = ] [ rest <malkeyword> ] }
          { [ dup "false" = ]       [ drop f ] }
          { [ dup "true" = ]        [ drop t ] }
diff --git a/go/src/reader/reader.go b/go/src/reader/reader.go

index ee64f56..6411d72 100644 (file)
--- a/go/src/reader/reader.go
+++ b/go/src/reader/reader.go
@@ -68,8 +68,10 @@ func read_atom(rdr Reader) (MalType, error) {
         } else if (*token)[0] == '"' {
                 str := (*token)[1 : len(*token)-1]
                 return strings.Replace(
-                       strings.Replace(str, `\"`, `"`, -1),
-                       `\n`, "\n", -1), nil
+                       strings.Replace(
+                        strings.Replace(str, `\"`, `"`, -1),
+                        `\n`, "\n", -1),
+                       `\\`, "\\", -1), nil
         } else if (*token)[0] == ':' {
                 return NewKeyword((*token)[1:len(*token)])
         } else if *token == "nil" {
diff --git a/guile/printer.scm b/guile/printer.scm

index 2461ba5..b5c7173 100644 (file)
--- a/guile/printer.scm
+++ b/guile/printer.scm
@@ -37,7 +37,7 @@
       (string-sub
        (string-sub s "\\\\" "\\\\")
        "\"" "\\\"")
-     "\n" "\\\n"))
+     "\n" "\\n"))
    (define (%pr_str o) (pr_str o readable?))
    (match obj
      ((? box?) (%pr_str (unbox obj)))
diff --git a/guile/reader.scm b/guile/reader.scm

index 3bbe632..c734759 100644 (file)
--- a/guile/reader.scm
+++ b/guile/reader.scm
@@ -80,8 +80,10 @@
  (define (read_atom reader)
    (define (->str s)
      (string-sub
-     (string-sub s "\\\\\"" "\"")
-     "\\\\\n" "\n"))
+     (string-sub
+      (string-sub s "\\\\\"" "\"")
+      "\\\\n" "\n")
+     "\\\\\\\\" "\\"))
    (let ((token (reader 'next)))
      (cond
       ((string-match "^-?[0-9][0-9.]*$" token)
diff --git a/js/reader.js b/js/reader.js

index dd4de9a..d2bcd91 100644 (file)
--- a/js/reader.js
+++ b/js/reader.js
@@ -34,7 +34,8 @@ function read_atom (reader) {
      } else if (token[0] === "\"") {
          return token.slice(1,token.length-1) 
              .replace(/\\"/g, '"')
-            .replace(/\\n/g, "\n"); // string
+            .replace(/\\n/g, "\n")
+            .replace(/\\\\/g, "\\"); // string
      } else if (token[0] === ":") {
          return types._keyword(token.slice(1));
      } else if (token === "nil") {
diff --git a/julia/reader.jl b/julia/reader.jl

index caabb94..487dfe8 100644 (file)
--- a/julia/reader.jl
+++ b/julia/reader.jl
@@ -39,9 +39,11 @@ function read_atom(rdr)
          float(token)
      elseif ismatch(r"^\".*\"$", token)
          replace(
-            replace(token[2:end-1],
-                    "\\\"", "\""),
-            "\\n", "\n")
+            replace(
+                replace(token[2:end-1],
+                        "\\\"", "\""),
+                "\\n", "\n"),
+            "\\\\", "\\")
      elseif token[1] == ':'
          "\u029e$(token[2:end])"
      elseif token == "nil"
diff --git a/kotlin/src/mal/printer.kt b/kotlin/src/mal/printer.kt

index e9f8703..a6c904e 100644 (file)
--- a/kotlin/src/mal/printer.kt
+++ b/kotlin/src/mal/printer.kt
@@ -7,7 +7,7 @@ fun pr_str(malType: MalType, print_readably: Boolean = false): String =
              ":" + malType.value.substring(1)
          } else if (malType is MalString) {
              if (print_readably) {
-                "\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""
+                "\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") + "\""
              } else malType.value
          } else if (malType is MalConstant) {
              malType.value
diff --git a/kotlin/src/mal/reader.kt b/kotlin/src/mal/reader.kt

index 145c167..a8a33f0 100644 (file)
--- a/kotlin/src/mal/reader.kt
+++ b/kotlin/src/mal/reader.kt
@@ -139,7 +139,7 @@ fun read_atom(reader: Reader): MalType {
      } else if (groups[4]?.value != null) {
          FALSE
      } else if (groups[5]?.value != null) {
-        MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\""))
+        MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\"").replace("\\\\", "\\"))
      } else if (groups[6]?.value != null) {
          MalKeyword(groups[6]?.value as String)
      } else if (groups[7]?.value != null) {
diff --git a/lua/reader.lua b/lua/reader.lua

index 2d29a52..ee0a61e 100644 (file)
--- a/lua/reader.lua
+++ b/lua/reader.lua
@@ -47,6 +47,7 @@ function M.read_atom(rdr)
          local sval = string.sub(token,2,string.len(token)-1)
          sval = string.gsub(sval, '\\"', '"')
          sval = string.gsub(sval, '\\n', '\n')
+        sval = string.gsub(sval, '\\\\', '\\')
          return sval
      elseif string.sub(token,1,1) == ':' then
          return "\177" .. string.sub(token,2)
diff --git a/make/printer.mk b/make/printer.mk

index 10b9789..dda5ee6 100644 (file)
--- a/make/printer.mk
+++ b/make/printer.mk
@@ -32,7 +32,7 @@ symbol_pr_str = $($(1)_value)
  
  keyword_pr_str = $(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value)))
  
-string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value))))",$(call str_decode,$($(1)_value))))
+string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(NEWLINE),$(ESC_N),$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value)))))",$(call str_decode,$($(1)_value))))
  
  function_pr_str = <$(if $(word 6,$(value $(1)_value)),$(wordlist 1,5,$(value $(1)_value))...,$(value $(1)_value))>
  
diff --git a/make/reader.mk b/make/reader.mk

index 8571785..5bf0937 100755 (executable)
--- a/make/reader.mk
+++ b/make/reader.mk
@@ -27,6 +27,9 @@ $(foreach ch,$(word 1,$($(1))),\
      ))
  endef
  
+# $(_NL) is used here instead of $(NEWLINE) because $(strip) removes
+# $(NEWLINE). str_encode will just pass through $(_NL) so str_decode
+# later will restore a correct newline
  define READ_STRING
  $(foreach ch,$(word 1,$($(1))),\
    $(if $(ch),\
@@ -34,11 +37,19 @@ $(foreach ch,$(word 1,$($(1))),\
        $(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
        $(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
        $(DQUOTE) $(strip $(call READ_STRING,$(1))),\
+    $(if $(and $(filter \,$(ch)),$(filter n,$(word 2,$($(1))))),\
+      $(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
+      $(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
+      $(_NL) $(strip $(call READ_STRING,$(1))),\
+    $(if $(and $(filter \,$(ch)),$(filter \,$(word 2,$($(1))))),\
+      $(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
+      $(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
+      \ $(strip $(call READ_STRING,$(1))),\
      $(if $(filter $(DQUOTE),$(ch)),\
        ,\
        $(eval $(1) := $(wordlist 2,$(words $($(1))),$($(1))))\
        $(and $(READER_DEBUG),$(info READ_STRING ch: $(ch) | $($(1))))\
-      $(ch) $(strip $(call READ_STRING,$(1))))),))
+      $(ch) $(strip $(call READ_STRING,$(1))))))),))
  endef
  
  define READ_SYMBOL
diff --git a/make/util.mk b/make/util.mk

index ffe635d..9ba78f6 100644 (file)
--- a/make/util.mk
+++ b/make/util.mk
@@ -20,6 +20,7 @@ RBRACKET := ]
  DQUOTE := "# "
  SLASH := $(strip \ )
  ESC_DQUOTE := $(SLASH)$(DQUOTE)
+ESC_N := $(SLASH)n
  SQUOTE := '# '
  QQUOTE := `# `
  SPACE := 
diff --git a/miniMAL/reader.json b/miniMAL/reader.json

index 5fa113b..d9ceedd 100644 (file)
--- a/miniMAL/reader.json
+++ b/miniMAL/reader.json
@@ -39,9 +39,11 @@
      ["if", ["=", ["`", "\""], ["get", "token", 0]],
        [".",
          [".",
-          ["slice", "token", 1, ["-", ["count", "token"], 1]],
-          ["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
-        ["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
+          [".",
+            ["slice", "token", 1, ["-", ["count", "token"], 1]],
+            ["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
+          ["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
+        ["`", "replace"], ["RegExp", ["`", "\\\\\\\\"], ["`", "g"]], ["`", "\\"]],
      ["if", ["=", ["`", ":"], ["get", "token", 0]],
        ["keyword", ["slice", "token", 1]],
      ["if", ["=", ["`", "nil"], "token"],
diff --git a/nim/printer.nim b/nim/printer.nim

index 912b8a9..5aab6f2 100644 (file)
--- a/nim/printer.nim
+++ b/nim/printer.nim
@@ -3,7 +3,7 @@ import strutils, sequtils, tables, types
  proc str_handle(x: string, pr = true): string =
    if x.len > 0 and x[0] == '\xff':
              result = ":" & x[1 .. x.high]
-  elif pr:  result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"") & "\""
+  elif pr:  result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") & "\""
    else:     result = x
  
  proc pr_str*(m: MalType, pr = true): string =
diff --git a/nim/reader.nim b/nim/reader.nim

index 40f3352..323b3d0 100644 (file)
--- a/nim/reader.nim
+++ b/nim/reader.nim
@@ -61,7 +61,7 @@ proc read_hash_map(r: var Reader): MalType =
  proc read_atom(r: var Reader): MalType =
    let t = r.next
    if t.match(intRE): number t.parseInt
-  elif t[0] == '"':  str t[1 .. <t.high].replace("\\\"", "\"")
+  elif t[0] == '"':  str t[1 .. <t.high].replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
    elif t[0] == ':':  keyword t[1 .. t.high]
    elif t == "nil":   nilObj
    elif t == "true":  trueObj
diff --git a/perl/reader.pm b/perl/reader.pm

index 501f992..9527231 100644 (file)
--- a/perl/reader.pm
+++ b/perl/reader.pm
@@ -35,6 +35,7 @@ sub read_atom {
              my $str = substr $token, 1, -1;
              $str =~ s/\\"/"/g;
              $str =~ s/\\n/\n/g;
+            $str =~ s/\\\\/\\/g;
              return String->new($str)
          }
          when(/^:/) { return _keyword(substr($token,1)) }
diff --git a/php/printer.php b/php/printer.php

index 130d31b..d4d53e0 100644 (file)
--- a/php/printer.php
+++ b/php/printer.php
@@ -26,7 +26,7 @@ function _pr_str($obj, $print_readably=True) {
          if (strpos($obj, chr(0x7f)) === 0) {
              return ":".substr($obj,1);
          } elseif ($print_readably) {
-            $obj = preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj));
+            $obj = preg_replace('/\n/', '\\n', preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj)));
              return '"' . $obj . '"';
          } else {
              return $obj;
diff --git a/php/reader.php b/php/reader.php

index ed9063f..68c21ea 100644 (file)
--- a/php/reader.php
+++ b/php/reader.php
@@ -39,6 +39,8 @@ function read_atom($reader) {
      } elseif ($token[0] === "\"") {
          $str = substr($token, 1, -1);
          $str = preg_replace('/\\\\"/', '"', $str);
+        $str = preg_replace('/\\\\n/', "\n", $str);
+        $str = preg_replace('/\\\\\\\\/', "\\", $str);
          return $str;
      } elseif ($token[0] === ":") {
          return _keyword(substr($token,1));
diff --git a/process/guide.md b/process/guide.md

index 1baa847..db751ff 100644 (file)
--- a/process/guide.md
+++ b/process/guide.md
@@ -365,15 +365,17 @@ and each step will give progressively more bang for the buck.
  
  * Add support for the other basic data type to your reader and printer
    functions: string, nil, true, and false. These become mandatory at
-  step 4. When a string is read, a slash followed by a doublequote is
-  translated into a plain doublequote character and a slash followed by
-  "n" is translated into a newline. To properly print a string (for
-  step 4 string functions), the `pr_str` function needs another
-  parameter called `print_readably`. When `print_readably` is true,
-  doublequotes and newlines are translated into their printed
-  representations (the reverse of the reader). The `PRINT` function in
-  the main program should call `pr_str` with print_readably set to
-  true.
+  step 4. When a string is read, the following transformations are
+  applied: a backslash followed by a doublequote is translated into
+  a plain doublequote character, a backslash followed by "n" is
+  translated into a newline, and a backslash followed by another
+  backslash is translated into a single backslash. To properly print
+  a string (for step 4 string functions), the `pr_str` function needs
+  another parameter called `print_readably`.  When `print_readably` is
+  true, doublequotes, newlines, and backslashes are translated into
+  their printed representations (the reverse of the reader). The
+  `PRINT` function in the main program should call `pr_str` with
+  print_readably set to true.
  
  * Add support for the other mal types: keyword, vector, hash-map, and
    atom.
diff --git a/ps/printer.ps b/ps/printer.ps

index 52d6c1e..b33ee5b 100644 (file)
--- a/ps/printer.ps
+++ b/ps/printer.ps
@@ -53,6 +53,7 @@
                  (")
                  obj (\\) (\\\\) replace
                      (") (\\") replace
+                    (\n) (\\n) replace
                  (") concatenate concatenate
              }{
                  obj
diff --git a/ps/reader.ps b/ps/reader.ps

index 4b268c0..ebfa963 100644 (file)
--- a/ps/reader.ps
+++ b/ps/reader.ps
@@ -103,6 +103,8 @@ end } def
      } loop
      str start cnt getinterval % the matched string
      (\\") (") replace
+    (\\n) (\n) replace
+    (\\\\) (\\) replace
      str idx % return: new_string string new_idx
  end } def
  
diff --git a/python/Dockerfile b/python/Dockerfile

index d101d2e..65ada90 100644 (file)
--- a/python/Dockerfile
+++ b/python/Dockerfile
@@ -22,3 +22,4 @@ WORKDIR /mal
  ##########################################################
  
  # Nothing additional needed for python
+RUN apt-get -y install python3
diff --git a/python/printer.py b/python/printer.py

index 98e3e90..e36d0ea 100644 (file)
--- a/python/printer.py
+++ b/python/printer.py
@@ -1,5 +1,8 @@
  import mal_types as types
  
+def _escape(s):
+    return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+
  def _pr_str(obj, print_readably=True):
      _r = print_readably
      if types._list_Q(obj):
@@ -15,7 +18,7 @@ def _pr_str(obj, print_readably=True):
          if len(obj) > 0 and obj[0] == types.u('\u029e'):
              return ':' + obj[1:]
          elif print_readably:
-            return '"' + obj.encode('unicode_escape').decode('latin1').replace('"', '\\"') + '"'
+            return '"' + _escape(obj) + '"'
          else:
              return obj
      elif types._nil_Q(obj):
diff --git a/python/reader.py b/python/reader.py

index 71ad3d6..d1c4d27 100644 (file)
--- a/python/reader.py
+++ b/python/reader.py
@@ -19,16 +19,21 @@ class Reader():
              return None
  
  def tokenize(str):
-    tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"|;.*|[^\s\[\]{}()'"`@,;]+)""");
+    tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
      return [t for t in re.findall(tre, str) if t[0] != ';']
  
+def _unescape(s):
+    return s.replace('\\"', '"').replace('\\n', '\n').replace('\\\\', '\\')
+
  def read_atom(reader):
      int_re = re.compile(r"-?[0-9]+$")
      float_re = re.compile(r"-?[0-9][0-9.]*$")
      token = reader.next()
      if re.match(int_re, token):     return int(token)
      elif re.match(float_re, token): return int(token)
-    elif token[0] == '"':           return token[1:-1].replace('\\"', '"')
+    elif token[0] == '"':
+        if token[-1] == '"':        return _unescape(token[1:-1])
+        else:                       raise Exception("expected '\"', got EOF")
      elif token[0] == ':':           return _keyword(token[1:])
      elif token == "nil":            return None
      elif token == "true":           return True
diff --git a/r/reader.r b/r/reader.r

index 7f20288..0305810 100644 (file)
--- a/r/reader.r
+++ b/r/reader.r
@@ -43,9 +43,10 @@ read_atom <- function(rdr) {
      } else if (re_match("^-?[0-9][0-9.]*$", token)) {
          as.double(token)
      } else if (substr(token,1,1) == "\"") {
-        gsub("\\\\n", "\\n",
-             gsub("\\\\\"", "\"",
-                  substr(token, 2, nchar(token)-1)))
+        gsub("\\\\\\\\", "\\\\",
+            gsub("\\\\n", "\n",
+                 gsub("\\\\\"", "\"",
+                      substr(token, 2, nchar(token)-1))))
      } else if (substr(token,1,1) == ":") {
          new.keyword(substring(token,2))
      } else if (token == "nil") {
diff --git a/racket/reader.rkt b/racket/reader.rkt

index 6db2e67..280b9af 100644 (file)
--- a/racket/reader.rkt
+++ b/racket/reader.rkt
@@ -34,9 +34,11 @@
            [(regexp-match #px"^\".*\"$" token)
             (string-replace
               (string-replace
-               (substring token 1 (- (string-length token) 1))
-               "\\\"" "\"")
-             "\\n" "\n")]
+               (string-replace
+                 (substring token 1 (- (string-length token) 1))
+                 "\\\"" "\"")
+               "\\n" "\n")
+             "\\\\" "\\")]
            [(regexp-match #px"^:" token) (_keyword (substring token 1))]
            [(equal? "nil" token) nil]
            [(equal? "true" token) #t]
diff --git a/rpython/printer.py b/rpython/printer.py

index 7df03ce..67b607b 100644 (file)
--- a/rpython/printer.py
+++ b/rpython/printer.py
@@ -14,9 +14,9 @@ def _pr_a_str(s, print_readably=True):
      if len(s) > 0 and s[0] == u'\u029e':
          return u':' + s[1:]
      elif print_readably:
-        return u'"' + types._replace(u'\\n', u'\\n',
+        return u'"' + types._replace(u'\n', u'\\n',
                          types._replace(u'\"', u'\\"',
-                        types._replace(u'\\', u'\\\\', s))) + u'"'
+                          types._replace(u'\\', u'\\\\', s))) + u'"'
      else:
          return s
  
diff --git a/rpython/reader.py b/rpython/reader.py

index bec422b..1e5acf6 100644 (file)
--- a/rpython/reader.py
+++ b/rpython/reader.py
@@ -50,7 +50,11 @@ def read_atom(reader):
          if end < 2:
              return MalStr(u"")
          else:
-            return MalStr(types._replace(u'\\"', u'"', unicode(token[1:end])))
+            s = unicode(token[1:end])
+            s = types._replace(u'\\"', u'"', s)
+            s = types._replace(u'\\n', u"\n", s)
+            s = types._replace(u'\\\\', u"\\", s)
+            return MalStr(s)
      elif token[0] == ':':           return _keywordu(unicode(token[1:]))
      elif token == "nil":            return types.nil
      elif token == "true":           return types.true
diff --git a/ruby/reader.rb b/ruby/reader.rb

index badc6ec..1e60174 100644 (file)
--- a/ruby/reader.rb
+++ b/ruby/reader.rb
@@ -22,8 +22,8 @@ def tokenize(str)
      }
  end
  
-def parse_str(t)
-    return t[1..-2].gsub(/\\"/, '"').gsub(/\\n/, "\n") # unescape
+def parse_str(t) # trim and unescape
+    return t[1..-2].gsub(/\\"/, '"').gsub(/\\n/, "\n").gsub(/\\\\/, "\\")
  end
  
  def read_atom(rdr)
diff --git a/runtest.py b/runtest.py

index a79f9da..2803989 100755 (executable)
--- a/runtest.py
+++ b/runtest.py
@@ -167,7 +167,7 @@ class TestReader:
              while self.data:
                  line = self.data[0]
                  if line[0:3] == ";=>":
-                    self.ret = line[3:].replace('\\r', '\r').replace('\\n', '\n')
+                    self.ret = line[3:]
                      self.line_num += 1
                      self.data.pop(0)
                      break
diff --git a/rust/Makefile b/rust/Makefile

index 39b0de8..ae5f2ee 100644 (file)
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -21,8 +21,9 @@ mal: ${SOURCES_BASE} $(word $(words ${SOURCES_LISP}),${SOURCES_LISP})
         cargo build --release
         cp $(word $(words ${BINS}),${BINS})  $@
  
-#$(BINS): target/release/%: src/%.rs
-#      cargo build --release $*
+# TODO: would be nice to build just the step requested
+$(BINS): target/release/%: src/bin/%.rs $(wildcard src/*.rs)
+       cargo build --release
  
  clean:
         cargo clean
diff --git a/rust/src/printer.rs b/rust/src/printer.rs

index 591822b..4150d39 100644 (file)
--- a/rust/src/printer.rs
+++ b/rust/src/printer.rs
@@ -23,8 +23,9 @@ pub fn escape_str(s: &str) -> String {
  
  pub fn unescape_str(s: &str) -> String {
      let re1 = regex!(r#"\\""#);
-    let re2 = regex!(r#"\n"#);
-    re2.replace_all(&re1.replace_all(&s, "\""), "\n")
+    let re2 = regex!(r#"\\n"#);
+    let re3 = regex!(r#"\\\\"#);
+    re3.replace_all(&re2.replace_all(&re1.replace_all(&s, "\""), "\n"), "\\")
  }
  
  pub fn pr_list(lst: &Vec<MalVal>, pr: bool,
diff --git a/scala/reader.scala b/scala/reader.scala

index de45923..c8d75e4 100644 (file)
--- a/scala/reader.scala
+++ b/scala/reader.scala
@@ -26,7 +26,7 @@ object reader {
    }
  
    def parse_str(s: String): String = {
-    s.replace("\\\"", "\"").replace("\\n", "\n")
+    s.replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
    }
  
    def read_atom(rdr: Reader): Any = {
diff --git a/tests/step4_if_fn_do.mal b/tests/step4_if_fn_do.mal

index b6ae4b5..15da56a 100644 (file)
--- a/tests/step4_if_fn_do.mal
+++ b/tests/step4_if_fn_do.mal
@@ -243,6 +243,11 @@ a
  "\""
  ;=>"\""
  
+"abc\ndef\nghi"
+;=>"abc\ndef\nghi"
+
+"abc\\def\\ghi"
+;=>"abc\\def\\ghi"
  
  ;; Testing pr-str
  
@@ -264,6 +269,12 @@ a
  (pr-str (list 1 2 "abc" "\"") "def")
  ;=>"(1 2 \"abc\" \"\\\"\") \"def\""
  
+(pr-str "abc\ndef\nghi")
+;=>"\"abc\\ndef\\nghi\""
+
+(pr-str "abc\\def\\ghi")
+;=>"\"abc\\\\def\\\\ghi\""
+
  
  ;; Testing str
  
@@ -285,6 +296,12 @@ a
  (str "abc  def" "ghi jkl")
  ;=>"abc  defghi jkl"
  
+(str "abc\ndef\nghi")
+;=>"abc\ndef\nghi"
+
+(str "abc\\def\\ghi")
+;=>"abc\\def\\ghi"
+
  ;;; TODO: get this working properly
  ;;;(str (list 1 2 "abc" "\"") "def")
  ;;;;=>"(1 2 \"abc\" \"\\\"\")def"
@@ -310,6 +327,14 @@ a
  ; "\""
  ;=>nil
  
+(prn "abc\ndef\nghi")
+; "abc\ndef\nghi"
+;=>nil
+
+(prn "abc\\def\\ghi")
+; "abc\\def\\ghi"
+nil
+
  (prn (list 1 2 "abc" "\"") "def")
  ; (1 2 "abc" "\"") "def"
  ;=>nil
@@ -335,6 +360,16 @@ a
  ; "
  ;=>nil
  
+(println "abc\ndef\nghi")
+; abc
+; def
+; ghi
+;=>nil
+
+(println "abc\\def\\ghi")
+; abc\def\ghi
+;=>nil
+
  (println (list 1 2 "abc" "\"") "def")
  ; (1 2 abc ") def
  ;=>nil
diff --git a/vb/reader.vb b/vb/reader.vb

index 9d4e03d..cc42a14 100644 (file)
--- a/vb/reader.vb
+++ b/vb/reader.vb
@@ -84,7 +84,8 @@ Namespace Mal
                  return New Mal.types.MalString(
                          str.Substring(1, str.Length-2) _
                          .Replace("\""", """") _
-                        .Replace("\n", Environment.NewLine))
+                        .Replace("\n", Environment.NewLine) _
+                        .Replace("\\", "\"))
              Else If match.Groups(7).Value <> String.Empty Then
                  return New Mal.types.MalString(ChrW(&H029e) & match.Groups(7).Value)
              Else If match.Groups(8).Value <> String.Empty Then
diff --git a/vimscript/reader.vim b/vimscript/reader.vim

index c3712ba..38510cd 100644 (file)
--- a/vimscript/reader.vim
+++ b/vimscript/reader.vim
@@ -46,6 +46,7 @@ function ParseString(token)
    let str = a:token[1:-2]
    let str = substitute(str, '\\"', '"', "g")
    let str = substitute(str, '\\n', "\n", "g")
+  let str = substitute(str, '\\\\', "\\", "g")
    return str
  endfunction
author	Joel Martin <github@martintribe.org>
	Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)
committer	Joel Martin <github@martintribe.org>
	Sat, 31 Oct 2015 03:05:49 +0000 (22:05 -0500)
awk/reader.awk		patch \| blob \| blame \| history
bash/printer.sh		patch \| blob \| blame \| history
bash/reader.sh		patch \| blob \| blame \| history
c/reader.c		patch \| blob \| blame \| history
coffee/reader.coffee		patch \| blob \| blame \| history
crystal/reader.cr		patch \| blob \| blame \| history
cs/reader.cs		patch \| blob \| blame \| history
elixir/lib/mal/reader.ex		patch \| blob \| blame \| history
es6/reader.js		patch \| blob \| blame \| history
factor/mal/printer/printer.factor		patch \| blob \| blame \| history
factor/mal/reader/reader.factor		patch \| blob \| blame \| history
go/src/reader/reader.go		patch \| blob \| blame \| history
guile/printer.scm		patch \| blob \| blame \| history
guile/reader.scm		patch \| blob \| blame \| history
js/reader.js		patch \| blob \| blame \| history
julia/reader.jl		patch \| blob \| blame \| history
kotlin/src/mal/printer.kt		patch \| blob \| blame \| history
kotlin/src/mal/reader.kt		patch \| blob \| blame \| history
lua/reader.lua		patch \| blob \| blame \| history
make/printer.mk		patch \| blob \| blame \| history
make/reader.mk		patch \| blob \| blame \| history
make/util.mk		patch \| blob \| blame \| history
miniMAL/reader.json		patch \| blob \| blame \| history
nim/printer.nim		patch \| blob \| blame \| history
nim/reader.nim		patch \| blob \| blame \| history
perl/reader.pm		patch \| blob \| blame \| history
php/printer.php		patch \| blob \| blame \| history
php/reader.php		patch \| blob \| blame \| history
process/guide.md		patch \| blob \| blame \| history
ps/printer.ps		patch \| blob \| blame \| history
ps/reader.ps		patch \| blob \| blame \| history
python/Dockerfile		patch \| blob \| blame \| history
python/printer.py		patch \| blob \| blame \| history
python/reader.py		patch \| blob \| blame \| history
r/reader.r		patch \| blob \| blame \| history
racket/reader.rkt		patch \| blob \| blame \| history
rpython/printer.py		patch \| blob \| blame \| history
rpython/reader.py		patch \| blob \| blame \| history
ruby/reader.rb		patch \| blob \| blame \| history
runtest.py		patch \| blob \| blame \| history
rust/Makefile		patch \| blob \| blame \| history
rust/src/printer.rs		patch \| blob \| blame \| history
scala/reader.scala		patch \| blob \| blame \| history
tests/step4_if_fn_do.mal		patch \| blob \| blame \| history
vb/reader.vb		patch \| blob \| blame \| history
vimscript/reader.vim		patch \| blob \| blame \| history