Fix unescaping in bash, basic, es6, js, ruby, rust.
[jackhill/mal.git] / bash / reader.sh
1 #
2 # mal (Make Lisp) Parser/Reader
3 #
4
5 if [ -z "${__mal_readerr_included__}" ]; then
6 __mal_readerr_included=true
7
8 source $(dirname $0)/types.sh
9
10 READ_ATOM () {
11 local token=${__reader_tokens[${__reader_idx}]}
12 __reader_idx=$(( __reader_idx + 1 ))
13 case "${token}" in
14 [0-9]*) _number "${token}" ;;
15 -[0-9]*) _number "${token}" ;;
16 \"*) token="${token:1:-1}"
17 token="${token//\\\\/${__keyw}}"
18 token="${token//\\\"/\"}"
19 token="${token//\\n/$'\n'}"
20 token="${token//${__keyw}/\\}"
21 _string "${token}" ;;
22 :*) _keyword "${token:1}" ;;
23 nil) r="${__nil}" ;;
24 true) r="${__true}" ;;
25 false) r="${__false}" ;;
26 *) _symbol "${token}" ;;
27 esac
28 }
29
30 # Return seqence of tokens into r.
31 # ${1}: Type of r (vector, list)
32 # ${2}: starting symbol
33 # ${3}: ending symbol
34 READ_SEQ () {
35 local start="${1}"
36 local end="${2}"
37 local items=""
38 local token=${__reader_tokens[${__reader_idx}]}
39 __reader_idx=$(( __reader_idx + 1 ))
40 if [[ "${token}" != "${start}" ]]; then
41 r=
42 _error "expected '${start}'"
43 return
44 fi
45 token=${__reader_tokens[${__reader_idx}]}
46 while [[ "${token}" != "${end}" ]]; do
47 if [[ ! "${token}" ]]; then
48 r=
49 _error "exepected '${end}', got EOF"
50 return
51 fi
52 READ_FORM
53 items="${items} ${r}"
54 token=${__reader_tokens[${__reader_idx}]}
55 done
56 __reader_idx=$(( __reader_idx + 1 ))
57 r="${items:1}"
58 }
59
60 # Return form in r
61 READ_FORM () {
62 local token=${__reader_tokens[${__reader_idx}]}
63 case "${token}" in
64 \') __reader_idx=$(( __reader_idx + 1 ))
65 _symbol quote; local q="${r}"
66 READ_FORM; local f="${r}"
67 _list "${q}" "${f}" ;;
68 \`) __reader_idx=$(( __reader_idx + 1 ))
69 _symbol quasiquote; local q="${r}"
70 READ_FORM; local f="${r}"
71 _list "${q}" "${f}" ;;
72 \~) __reader_idx=$(( __reader_idx + 1 ))
73 _symbol unquote; local q="${r}"
74 READ_FORM; local f="${r}"
75 _list "${q}" "${f}" ;;
76 \~\@) __reader_idx=$(( __reader_idx + 1 ))
77 _symbol splice-unquote; local q="${r}"
78 READ_FORM; local f="${r}"
79 _list "${q}" "${f}" ;;
80 ^) __reader_idx=$(( __reader_idx + 1 ))
81 _symbol with-meta; local wm="${r}"
82 READ_FORM; local meta="${r}"
83 READ_FORM; local obj="${r}"
84 _list "${wm}" "${obj}" "${meta}" ;;
85 @) __reader_idx=$(( __reader_idx + 1 ))
86 _symbol deref; local d="${r}"
87 READ_FORM; local f="${r}"
88 _list "${d}" "${f}" ;;
89 \)) _error "unexpected ')'" ;;
90 \() READ_SEQ "(" ")"
91 _list ${r} ;;
92 \]) _error "unexpected ']'" ;;
93 \[) READ_SEQ "[" "]"
94 _vector ${r} ;;
95 \}) _error "unexpected '}'" ;;
96 \{) READ_SEQ "{" "}"
97 _hash_map ${r} ;;
98 *) READ_ATOM
99 esac
100 }
101
102 # Returns __reader_tokens as an indexed array of tokens
103 TOKENIZE () {
104 local data="${*}"
105 local datalen=${#data}
106 local idx=0
107 local chunk=0
108 local chunksz=500
109 local match=
110 local token=
111 local str=
112
113 __reader_idx=0
114 __reader_tokens=
115 while true; do
116 if (( ${#str} < ( chunksz / 2) )) && (( chunk < datalen )); then
117 str="${str}${data:${chunk}:${chunksz}}"
118 chunk=$(( chunk + ${chunksz} ))
119 fi
120 (( ${#str} == 0 )) && break
121 [[ "${str}" =~ ^^([][{}\(\)^@])|^(~@)|(\"(\\.|[^\\\"])*\")|^(;[^$'\n']*)|^([~\'\`])|^([^][ ~\`\'\";{}\(\)^@\,]+)|^[,]|^[[:space:]]+ ]]
122 match=${BASH_REMATCH[0]}
123 str="${str:${#match}}"
124 token="${match//$'\n'/}"
125 #echo "MATCH: '${token}' / [${str}]"
126 if ! [[ "${token}" =~ (^[,]$|^[[:space:]]*;.*$|^[[:space:]]*$) ]]; then
127 __reader_tokens[${idx}]="${token}"
128 idx=$(( idx + 1 ))
129 fi
130 if [ -z "${match}" ]; then
131 _error "Tokenizing error at: ${str:0:50}"
132 return 1
133 fi
134 done
135 }
136
137 # read-str from a raw "string" or from a string object. Retruns object
138 # read in r.
139 READ_STR () {
140 declare -a __reader_tokens
141 TOKENIZE "${*}" || return 1 # sets __reader_tokens
142 #set | grep ^__reader_tokens
143 if [ -z "${__reader_tokens[0]}" ]; then
144 r=
145 return 1 # No tokens
146 fi
147 READ_FORM
148 #echo "Token: ${r}: <${ANON["${r}"]}>"
149 return
150 }
151
152 # Call readline and save the history. Returns the string read in r.
153 READLINE_EOF=
154 READLINE_HISTORY_FILE=${HOME}/.mal-history
155 READLINE () {
156 history -r "${READLINE_HISTORY_FILE}" 2>/dev/null || true
157 read -r -e -p "${1}" r || return "$?"
158 history -s -- "${r}"
159 history -a "${READLINE_HISTORY_FILE}" 2>/dev/null || true
160 }
161
162 fi