Detect more cases where strings are unterminated.
(define *token-re*
(new-pcre "[\\s,]*(~@|[\\[\\]{}()'`~^@]|\"(?:\\\\.|[^\\\\\"])*\"|;[^\n]*|[^\\s\\[\\]{}('\"`,;)]*)"))
+(define *str-re*
+ (new-pcre "^(\"(?:\\\\.|[^\\\\\"])*\")$"))
+
(define (tokenizer str)
(filter (lambda (s) (and (not (string-null? s)) (not (string=? (substring s 0 1) ";"))))
(pcre-search *token-re* str)))
(cond
((string-match "^-?[0-9][0-9.]*$" token)
=> (lambda (m) (string->number (match:substring m 0))))
+ ((> (length (pcre-search *str-re* token)) 0)
+ (with-input-from-string token read))
((eqv? (string-ref token 0) #\")
- (if (eqv? (string-ref token (- (string-length token) 1)) #\")
- (with-input-from-string token read)
- (throw 'mal-error "expected '\"', got EOF")))
+ (throw 'mal-error "expected '\"', got EOF"))
((string-match "^:(.*)" token)
=> (lambda (m) (string->keyword (match:substring m 1))))
((string=? "nil" token) nil)
atm = str2double(token);
elseif not(isempty(regexp(token, '^"(?:\\.|[^\\"])*"$', 'match')))
atm = token(2:length(token)-1);
- atm = strrep(atm, '\\', char(255));
+ % If overlaps is enabled here then only the first '\\'
+ % is replaced. Probably an GNU Octave bug since the
+ % other repeated pairs are substituted correctly.
+ atm = strrep(atm, '\\', char(255), 'overlaps', false);
atm = strrep(atm, '\"', '"');
atm = strrep(atm, '\n', char(10));
atm = strrep(atm, char(255), '\');
let
tokenRE = re"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]*)"""
intRE = re"-?[0-9]+$"
+ strRE = re"""^"(?:\\.|[^\\"])*"$"""
type
Blank* = object of Exception
let t = r.next
if t.match(intRE): number t.parseInt
elif t[0] == '"':
- if t[^1] != '"': raise newException(ValueError, "expected '\"', got EOF")
+ if not t.match(strRE):
+ raise newException(ValueError, "expected '\"', got EOF")
str t[1 .. <t.high].multiReplace(("\\\"", "\""), ("\\n", "\n"), ("\\\\", "\\"))
elif t[0] == ':': keyword t[1 .. t.high]
elif t == "nil": nilObj
/ch str idx get def % current character
/idx idx 1 add def
ch 92 eq { % if \
- str idx get 34 eq { %if \"
- /idx idx 1 add def
- /cnt cnt 1 add def % 1 more below
- } if
+ /idx idx 1 add def
+ /cnt cnt 1 add def % 1 more below
} if
ch 34 eq { exit } if % '"' is end of string
/cnt cnt 1 add def