function reader_read_string(token, v, r)
{
token = substr(token, 1, length(token) - 1)
- while (match(token, /\\["n\\]?/, r)) {
- switch (r[0]) {
- case "\\":
- return "!\"Invalid escape character '" substr(token, RSTART, 2) "'."
- case "\\n":
- v = v substr(token, 1, RSTART - 1) "\n"
- break
- default:
- v = v substr(token, 1, RSTART - 1) substr(r[0], 2, 1)
- break
- }
- token = substr(token, RSTART + RLENGTH)
- }
- return v token
+ gsub(/\\\\/, "\\", token)
+ gsub(/\\"/, "\"", token)
+ gsub(/\\n/, "\n", token)
+ return token
}
function reader_read_atom(token)
r=":${s:2}"
elif [ "${print_readably}" == "yes" ]; then
s="${s//\\/\\\\}"
- r="\"${s//\"/\\\"}\""
+ s="${s//\"/\\\"}"
+ r="\"${s//$'\n'/\\n}\""
else
r="${s}"
fi
case "${token}" in
[0-9]*) _number "${token}" ;;
\"*) token="${token:1:-1}"
+ token="${token//\\\\/\\}"
token="${token//\\\"/\"}"
+ token="${token//\\n/$'\n'}"
_string "${token}" ;;
:*) _keyword "${token:1}" ;;
nil) r="${__nil}" ;;
}
-// From http://creativeandcritical.net/str-replace-c/ - Laird Shaw
char *replace_str(const char *str, const char *old, const char *new)
{
- char *ret, *r;
- const char *p, *q;
- size_t oldlen = strlen(old);
- size_t count, retlen, newlen = strlen(new);
-
- if (oldlen != newlen) {
- for (count = 0, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen)
- count++;
- /* this is undefined if p - str > PTRDIFF_MAX */
- retlen = p - str + strlen(p) + count * (newlen - oldlen);
- } else
- retlen = strlen(str);
-
- if ((ret = malloc(retlen + 1)) == NULL)
- return NULL;
-
- for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) {
- /* this is undefined if q - p > PTRDIFF_MAX */
- ptrdiff_t l = q - p;
- memcpy(r, p, l);
- r += l;
- memcpy(r, new, newlen);
- r += newlen;
- }
- strcpy(r, p);
-
- return ret;
+ GRegex *reg = g_regex_new (old, 0, 0, NULL);
+ char *str_tmp = g_regex_replace_literal(reg, str, -1, 0, new, 0, NULL);
+ free(reg);
+ return str_tmp;
}
atom = &mal_false;
} else if (g_match_info_fetch_pos(matchInfo, 6, &pos, NULL) && pos != -1) {
//g_print("read_atom string: %s\n", token);
- char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\"", "\"");
- atom = malval_new_string(str_tmp);
+ char *str_tmp = replace_str(g_match_info_fetch(matchInfo, 6), "\\\\\"", "\"");
+ char *str_tmp2 = replace_str(str_tmp, "\\\\n", "\n");
+ free(str_tmp);
+ char *str_tmp3 = replace_str(str_tmp2, "\\\\\\\\", "\\");
+ free(str_tmp2);
+ atom = malval_new_string(str_tmp3);
} else if (g_match_info_fetch_pos(matchInfo, 7, &pos, NULL) && pos != -1) {
//g_print("read_atom keyword\n");
atom = malval_new_keyword(g_match_info_fetch(matchInfo, 7));
token.slice(1, token.length-1)
.replace(/\\"/g, '"')
.replace(/\\n/g, "\n")
+ .replace(/\\\\/g, "\\")
else if token[0] == ':' then types._keyword(token[1..])
else if token == "nil" then null
else if token == "true" then true
when token == "false" then false
when token == "nil" then nil
when token[0] == '"' then token[1..-2].gsub(/\\"/, "\"")
+ .gsub(/\\n/, "\n")
+ .gsub(/\\\\/, "\\")
when token[0] == ':' then "\u029e#{token[1..-1]}"
else Mal::Symbol.new token
end
string str = match.Groups[6].Value;
str = str.Substring(1, str.Length-2)
.Replace("\\\"", "\"")
- .Replace("\\n", "\n");
+ .Replace("\\n", "\n")
+ .Replace("\\\\", "\\");
return new Mal.types.MalString(str);
} else if (match.Groups[7].Value != String.Empty) {
return new Mal.types.MalString("\u029e" + match.Groups[7].Value);
token
|> String.slice(1..-2)
|> String.replace("\\\"", "\"")
+ |> String.replace("\\n", "\n")
+ |> String.replace("\\\\", "\\")
integer?(token) ->
Integer.parse(token)
} else if (token[0] === "\"") {
return token.slice(1,token.length-1)
.replace(/\\"/g, '"')
- .replace(/\\n/g, "\n"); // string
+ .replace(/\\n/g, "\n")
+ .replace(/\\\\/g, "\\"); // string
} else if (token[0] === ":") {
return _keyword(token.slice(1));
} else if (token === "nil") {
[
"\\" "\\\\" replace
"\"" "\\\"" replace
+ "\n" "\\n" replace
"\"" dup surround
] when ;
M: array (pr-str) '[ _ (pr-str) ] map " " join "(" ")" surround ;
: (read-atom) ( str -- maltype )
{
- { [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace ] }
+ { [ dup first CHAR: " = ] [ rest but-last "\\\"" "\"" replace
+ "\\n" "\n" replace
+ "\\\\" "\\" replace ] }
{ [ dup first CHAR: : = ] [ rest <malkeyword> ] }
{ [ dup "false" = ] [ drop f ] }
{ [ dup "true" = ] [ drop t ] }
} else if (*token)[0] == '"' {
str := (*token)[1 : len(*token)-1]
return strings.Replace(
- strings.Replace(str, `\"`, `"`, -1),
- `\n`, "\n", -1), nil
+ strings.Replace(
+ strings.Replace(str, `\"`, `"`, -1),
+ `\n`, "\n", -1),
+ `\\`, "\\", -1), nil
} else if (*token)[0] == ':' {
return NewKeyword((*token)[1:len(*token)])
} else if *token == "nil" {
(string-sub
(string-sub s "\\\\" "\\\\")
"\"" "\\\"")
- "\n" "\\\n"))
+ "\n" "\\n"))
(define (%pr_str o) (pr_str o readable?))
(match obj
((? box?) (%pr_str (unbox obj)))
(define (read_atom reader)
(define (->str s)
(string-sub
- (string-sub s "\\\\\"" "\"")
- "\\\\\n" "\n"))
+ (string-sub
+ (string-sub s "\\\\\"" "\"")
+ "\\\\n" "\n")
+ "\\\\\\\\" "\\"))
(let ((token (reader 'next)))
(cond
((string-match "^-?[0-9][0-9.]*$" token)
} else if (token[0] === "\"") {
return token.slice(1,token.length-1)
.replace(/\\"/g, '"')
- .replace(/\\n/g, "\n"); // string
+ .replace(/\\n/g, "\n")
+ .replace(/\\\\/g, "\\"); // string
} else if (token[0] === ":") {
return types._keyword(token.slice(1));
} else if (token === "nil") {
float(token)
elseif ismatch(r"^\".*\"$", token)
replace(
- replace(token[2:end-1],
- "\\\"", "\""),
- "\\n", "\n")
+ replace(
+ replace(token[2:end-1],
+ "\\\"", "\""),
+ "\\n", "\n"),
+ "\\\\", "\\")
elseif token[1] == ':'
"\u029e$(token[2:end])"
elseif token == "nil"
":" + malType.value.substring(1)
} else if (malType is MalString) {
if (print_readably) {
- "\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"") + "\""
+ "\"" + malType.value.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") + "\""
} else malType.value
} else if (malType is MalConstant) {
malType.value
} else if (groups[4]?.value != null) {
FALSE
} else if (groups[5]?.value != null) {
- MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\""))
+ MalString((groups[5]?.value as String).replace("\\n", "\n").replace("\\\"", "\"").replace("\\\\", "\\"))
} else if (groups[6]?.value != null) {
MalKeyword(groups[6]?.value as String)
} else if (groups[7]?.value != null) {
local sval = string.sub(token,2,string.len(token)-1)
sval = string.gsub(sval, '\\"', '"')
sval = string.gsub(sval, '\\n', '\n')
+ sval = string.gsub(sval, '\\\\', '\\')
return sval
elseif string.sub(token,1,1) == ':' then
return "\177" .. string.sub(token,2)
keyword_pr_str = $(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value)))
-string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value))))",$(call str_decode,$($(1)_value))))
+string_pr_str = $(if $(filter $(__keyword)%,$(call str_decode,$($(1)_value))),$(COLON)$(patsubst $(__keyword)%,%,$(call str_decode,$($(1)_value))),$(if $(2),"$(subst $(NEWLINE),$(ESC_N),$(subst $(DQUOTE),$(ESC_DQUOTE),$(subst $(SLASH),$(SLASH)$(SLASH),$(call str_decode,$($(1)_value)))))",$(call str_decode,$($(1)_value))))
function_pr_str = <$(if $(word 6,$(value $(1)_value)),$(wordlist 1,5,$(value $(1)_value))...,$(value $(1)_value))>
))
endef
+# $(_NL) is used here instead of $(NEWLINE) because $(strip) removes
+# $(NEWLINE). str_encode will just pass through $(_NL) so str_decode
+# later will restore a correct newline
define READ_STRING
$(foreach ch,$(word 1,$($(1))),\
$(if $(ch),\
$(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
$(DQUOTE) $(strip $(call READ_STRING,$(1))),\
+ $(if $(and $(filter \,$(ch)),$(filter n,$(word 2,$($(1))))),\
+ $(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
+ $(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
+ $(_NL) $(strip $(call READ_STRING,$(1))),\
+ $(if $(and $(filter \,$(ch)),$(filter \,$(word 2,$($(1))))),\
+ $(eval $(1) := $(wordlist 3,$(words $($(1))),$($(1))))\
+ $(and $(READER_DEBUG),$(info READ_STRING ch: \$(word 1,$($(1))) | $($(1))))\
+ \ $(strip $(call READ_STRING,$(1))),\
$(if $(filter $(DQUOTE),$(ch)),\
,\
$(eval $(1) := $(wordlist 2,$(words $($(1))),$($(1))))\
$(and $(READER_DEBUG),$(info READ_STRING ch: $(ch) | $($(1))))\
- $(ch) $(strip $(call READ_STRING,$(1))))),))
+ $(ch) $(strip $(call READ_STRING,$(1))))))),))
endef
define READ_SYMBOL
DQUOTE := "# "
SLASH := $(strip \ )
ESC_DQUOTE := $(SLASH)$(DQUOTE)
+ESC_N := $(SLASH)n
SQUOTE := '# '
QQUOTE := `# `
SPACE :=
["if", ["=", ["`", "\""], ["get", "token", 0]],
[".",
[".",
- ["slice", "token", 1, ["-", ["count", "token"], 1]],
- ["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
- ["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
+ [".",
+ ["slice", "token", 1, ["-", ["count", "token"], 1]],
+ ["`", "replace"], ["RegExp", ["`", "\\\\\""], ["`", "g"]], ["`", "\""]],
+ ["`", "replace"], ["RegExp", ["`", "\\\\n"], ["`", "g"]], ["`", "\n"]],
+ ["`", "replace"], ["RegExp", ["`", "\\\\\\\\"], ["`", "g"]], ["`", "\\"]],
["if", ["=", ["`", ":"], ["get", "token", 0]],
["keyword", ["slice", "token", 1]],
["if", ["=", ["`", "nil"], "token"],
proc str_handle(x: string, pr = true): string =
if x.len > 0 and x[0] == '\xff':
result = ":" & x[1 .. x.high]
- elif pr: result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"") & "\""
+ elif pr: result = "\"" & x.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n") & "\""
else: result = x
proc pr_str*(m: MalType, pr = true): string =
proc read_atom(r: var Reader): MalType =
let t = r.next
if t.match(intRE): number t.parseInt
- elif t[0] == '"': str t[1 .. <t.high].replace("\\\"", "\"")
+ elif t[0] == '"': str t[1 .. <t.high].replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
elif t[0] == ':': keyword t[1 .. t.high]
elif t == "nil": nilObj
elif t == "true": trueObj
my $str = substr $token, 1, -1;
$str =~ s/\\"/"/g;
$str =~ s/\\n/\n/g;
+ $str =~ s/\\\\/\\/g;
return String->new($str)
}
when(/^:/) { return _keyword(substr($token,1)) }
if (strpos($obj, chr(0x7f)) === 0) {
return ":".substr($obj,1);
} elseif ($print_readably) {
- $obj = preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj));
+ $obj = preg_replace('/\n/', '\\n', preg_replace('/"/', '\\"', preg_replace('/\\\\/', '\\\\\\\\', $obj)));
return '"' . $obj . '"';
} else {
return $obj;
} elseif ($token[0] === "\"") {
$str = substr($token, 1, -1);
$str = preg_replace('/\\\\"/', '"', $str);
+ $str = preg_replace('/\\\\n/', "\n", $str);
+ $str = preg_replace('/\\\\\\\\/', "\\", $str);
return $str;
} elseif ($token[0] === ":") {
return _keyword(substr($token,1));
* Add support for the other basic data type to your reader and printer
functions: string, nil, true, and false. These become mandatory at
- step 4. When a string is read, a slash followed by a doublequote is
- translated into a plain doublequote character and a slash followed by
- "n" is translated into a newline. To properly print a string (for
- step 4 string functions), the `pr_str` function needs another
- parameter called `print_readably`. When `print_readably` is true,
- doublequotes and newlines are translated into their printed
- representations (the reverse of the reader). The `PRINT` function in
- the main program should call `pr_str` with print_readably set to
- true.
+ step 4. When a string is read, the following transformations are
+ applied: a backslash followed by a doublequote is translated into
+ a plain doublequote character, a backslash followed by "n" is
+ translated into a newline, and a backslash followed by another
+ backslash is translated into a single backslash. To properly print
+ a string (for step 4 string functions), the `pr_str` function needs
+ another parameter called `print_readably`. When `print_readably` is
+ true, doublequotes, newlines, and backslashes are translated into
+ their printed representations (the reverse of the reader). The
+ `PRINT` function in the main program should call `pr_str` with
+ print_readably set to true.
* Add support for the other mal types: keyword, vector, hash-map, and
atom.
(")
obj (\\) (\\\\) replace
(") (\\") replace
+ (\n) (\\n) replace
(") concatenate concatenate
}{
obj
} loop
str start cnt getinterval % the matched string
(\\") (") replace
+ (\\n) (\n) replace
+ (\\\\) (\\) replace
str idx % return: new_string string new_idx
end } def
##########################################################
# Nothing additional needed for python
+RUN apt-get -y install python3
import mal_types as types
+def _escape(s):
+ return s.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n')
+
def _pr_str(obj, print_readably=True):
_r = print_readably
if types._list_Q(obj):
if len(obj) > 0 and obj[0] == types.u('\u029e'):
return ':' + obj[1:]
elif print_readably:
- return '"' + obj.encode('unicode_escape').decode('latin1').replace('"', '\\"') + '"'
+ return '"' + _escape(obj) + '"'
else:
return obj
elif types._nil_Q(obj):
return None
def tokenize(str):
- tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"|;.*|[^\s\[\]{}()'"`@,;]+)""");
+ tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
return [t for t in re.findall(tre, str) if t[0] != ';']
+def _unescape(s):
+ return s.replace('\\"', '"').replace('\\n', '\n').replace('\\\\', '\\')
+
def read_atom(reader):
int_re = re.compile(r"-?[0-9]+$")
float_re = re.compile(r"-?[0-9][0-9.]*$")
token = reader.next()
if re.match(int_re, token): return int(token)
elif re.match(float_re, token): return int(token)
- elif token[0] == '"': return token[1:-1].replace('\\"', '"')
+ elif token[0] == '"':
+ if token[-1] == '"': return _unescape(token[1:-1])
+ else: raise Exception("expected '\"', got EOF")
elif token[0] == ':': return _keyword(token[1:])
elif token == "nil": return None
elif token == "true": return True
} else if (re_match("^-?[0-9][0-9.]*$", token)) {
as.double(token)
} else if (substr(token,1,1) == "\"") {
- gsub("\\\\n", "\\n",
- gsub("\\\\\"", "\"",
- substr(token, 2, nchar(token)-1)))
+ gsub("\\\\\\\\", "\\\\",
+ gsub("\\\\n", "\n",
+ gsub("\\\\\"", "\"",
+ substr(token, 2, nchar(token)-1))))
} else if (substr(token,1,1) == ":") {
new.keyword(substring(token,2))
} else if (token == "nil") {
[(regexp-match #px"^\".*\"$" token)
(string-replace
(string-replace
- (substring token 1 (- (string-length token) 1))
- "\\\"" "\"")
- "\\n" "\n")]
+ (string-replace
+ (substring token 1 (- (string-length token) 1))
+ "\\\"" "\"")
+ "\\n" "\n")
+ "\\\\" "\\")]
[(regexp-match #px"^:" token) (_keyword (substring token 1))]
[(equal? "nil" token) nil]
[(equal? "true" token) #t]
if len(s) > 0 and s[0] == u'\u029e':
return u':' + s[1:]
elif print_readably:
- return u'"' + types._replace(u'\\n', u'\\n',
+ return u'"' + types._replace(u'\n', u'\\n',
types._replace(u'\"', u'\\"',
- types._replace(u'\\', u'\\\\', s))) + u'"'
+ types._replace(u'\\', u'\\\\', s))) + u'"'
else:
return s
if end < 2:
return MalStr(u"")
else:
- return MalStr(types._replace(u'\\"', u'"', unicode(token[1:end])))
+ s = unicode(token[1:end])
+ s = types._replace(u'\\"', u'"', s)
+ s = types._replace(u'\\n', u"\n", s)
+ s = types._replace(u'\\\\', u"\\", s)
+ return MalStr(s)
elif token[0] == ':': return _keywordu(unicode(token[1:]))
elif token == "nil": return types.nil
elif token == "true": return types.true
}
end
-def parse_str(t)
- return t[1..-2].gsub(/\\"/, '"').gsub(/\\n/, "\n") # unescape
+def parse_str(t) # trim and unescape
+ return t[1..-2].gsub(/\\"/, '"').gsub(/\\n/, "\n").gsub(/\\\\/, "\\")
end
def read_atom(rdr)
while self.data:
line = self.data[0]
if line[0:3] == ";=>":
- self.ret = line[3:].replace('\\r', '\r').replace('\\n', '\n')
+ self.ret = line[3:]
self.line_num += 1
self.data.pop(0)
break
cargo build --release
cp $(word $(words ${BINS}),${BINS}) $@
-#$(BINS): target/release/%: src/%.rs
-# cargo build --release $*
+# TODO: would be nice to build just the step requested
+$(BINS): target/release/%: src/bin/%.rs $(wildcard src/*.rs)
+ cargo build --release
clean:
cargo clean
pub fn unescape_str(s: &str) -> String {
let re1 = regex!(r#"\\""#);
- let re2 = regex!(r#"\n"#);
- re2.replace_all(&re1.replace_all(&s, "\""), "\n")
+ let re2 = regex!(r#"\\n"#);
+ let re3 = regex!(r#"\\\\"#);
+ re3.replace_all(&re2.replace_all(&re1.replace_all(&s, "\""), "\n"), "\\")
}
pub fn pr_list(lst: &Vec<MalVal>, pr: bool,
}
def parse_str(s: String): String = {
- s.replace("\\\"", "\"").replace("\\n", "\n")
+ s.replace("\\\"", "\"").replace("\\n", "\n").replace("\\\\", "\\")
}
def read_atom(rdr: Reader): Any = {
"\""
;=>"\""
+"abc\ndef\nghi"
+;=>"abc\ndef\nghi"
+
+"abc\\def\\ghi"
+;=>"abc\\def\\ghi"
;; Testing pr-str
(pr-str (list 1 2 "abc" "\"") "def")
;=>"(1 2 \"abc\" \"\\\"\") \"def\""
+(pr-str "abc\ndef\nghi")
+;=>"\"abc\\ndef\\nghi\""
+
+(pr-str "abc\\def\\ghi")
+;=>"\"abc\\\\def\\\\ghi\""
+
;; Testing str
(str "abc def" "ghi jkl")
;=>"abc defghi jkl"
+(str "abc\ndef\nghi")
+;=>"abc\ndef\nghi"
+
+(str "abc\\def\\ghi")
+;=>"abc\\def\\ghi"
+
;;; TODO: get this working properly
;;;(str (list 1 2 "abc" "\"") "def")
;;;;=>"(1 2 \"abc\" \"\\\"\")def"
; "\""
;=>nil
+(prn "abc\ndef\nghi")
+; "abc\ndef\nghi"
+;=>nil
+
+(prn "abc\\def\\ghi")
+; "abc\\def\\ghi"
+nil
+
(prn (list 1 2 "abc" "\"") "def")
; (1 2 "abc" "\"") "def"
;=>nil
; "
;=>nil
+(println "abc\ndef\nghi")
+; abc
+; def
+; ghi
+;=>nil
+
+(println "abc\\def\\ghi")
+; abc\def\ghi
+;=>nil
+
(println (list 1 2 "abc" "\"") "def")
; (1 2 abc ") def
;=>nil
return New Mal.types.MalString(
str.Substring(1, str.Length-2) _
.Replace("\""", """") _
- .Replace("\n", Environment.NewLine))
+ .Replace("\n", Environment.NewLine) _
+ .Replace("\\", "\"))
Else If match.Groups(7).Value <> String.Empty Then
return New Mal.types.MalString(ChrW(&H029e) & match.Groups(7).Value)
Else If match.Groups(8).Value <> String.Empty Then
let str = a:token[1:-2]
let str = substitute(str, '\\"', '"', "g")
let str = substitute(str, '\\n', "\n", "g")
+ let str = substitute(str, '\\\\', "\\", "g")
return str
endfunction