fsharp: step 2: Factored out funcs into Core module.
[jackhill/mal.git] / fsharp / tokenizer.fs
CommitLineData
327bd967
PS
1module Tokenizer
2
3 open System
8f1ee487 4 open Types
327bd967 5
327bd967
PS
6 type Token =
7 | EOF
8 | OpenBracket | CloseBracket
9 | OpenBrace | CloseBrace
10 | OpenParen | CloseParen
11 | SingleQuote
12 | Backtick
13 | Tilde | SpliceUnquote
14 | Caret
15 | At
16 | String of string
17 | Token of string
18 | Keyword of string
19 | Number of string
20
6c47cf67
PS
21 let errExpectedButEOF tok = ReaderError(sprintf "expected %s, got EOF" tok)
22 let errExpected tok = ReaderError(sprintf "expected %s" tok)
23 let errUnexpected () = ReaderError("unexpected char")
24
327bd967
PS
25 let tokenize (str : string) =
26 let len = str.Length
27
28 let inline isWhiteSpace ch = ch = ',' || Char.IsWhiteSpace(ch)
29 let inline isNotNewline ch = ch <> '\r' && ch <> '\n'
30 let inline isDigit ch = Char.IsDigit(ch)
31 let inline isTokenChar ch =
32 match ch with
33 | '[' | ']' | '{' | '}' | '(' | ')'
34 | '\'' | '"' | '`' | ',' | ';' -> false
35 | ch when Char.IsWhiteSpace(ch) -> false
36 | _ -> true
37
38 let rec skipWhile pred p =
39 if p >= len then p
40 elif pred (str.[p]) then p + 1 |> skipWhile pred
41 else p
42
43 let rec accumulateWhile pred (f : string -> Token) start p =
44 if p >= len then str.Substring(start, p - start) |> f, p
45 elif pred (str.[p]) then p + 1 |> accumulateWhile pred f start
46 else str.Substring(start, p - start) |> f, p
47
48 let accumulateString p =
49 let b = System.Text.StringBuilder()
50 let rec accChar (ch : char) n =
51 b.Append(ch) |> ignore
52 accChars n
53 and accChars p =
54 let n = p + 1
6c47cf67 55 if p >= len then raise <| errExpectedButEOF "'\"'"
327bd967
PS
56 match str.[p] with
57 | '\\' -> accEscaped n
58 | '"' -> n
59 | ch -> accChar ch n
60 and accEscaped p =
61 let n = p + 1
6c47cf67 62 if p >= len then raise <| errExpectedButEOF "char"
327bd967
PS
63 match str.[p] with
64 | 't' -> accChar '\t' n
65 | 'b' -> accChar '\b' n
66 | 'n' -> accChar '\n' n
67 | 'r' -> accChar '\r' n
68 | 'f' -> accChar '\f' n
69 | '\'' -> accChar '\'' n
70 | '"' -> accChar '"' n
71 | '\\' -> accChar '\\' n
6c47cf67 72 | _ -> raise <| errExpectedButEOF "valid escape char"
327bd967
PS
73 let n = accChars p
74 String(b.ToString()), n
75
76 let accumulateKeyword p =
77 let n = p + 1
6c47cf67 78 if p >= len then raise <| errExpectedButEOF "keyword"
327bd967 79 elif isTokenChar str.[p] then accumulateWhile isTokenChar Keyword p n
6c47cf67 80 else raise <| errExpected "keyword char"
327bd967
PS
81
82 let accumulateSpliceUnquote p =
83 if p >= len then Tilde, p
84 elif str.[p] = '@' then SpliceUnquote, (p + 1)
85 else Tilde, p
86
87 let rec getToken p =
88 if p >= len then
89 EOF, p
90 else
91 let n = p + 1
92 match str.[p] with
93 | ch when isWhiteSpace ch -> getToken n
94 | ';' -> skipWhile isNotNewline n |> getToken
95 | '[' -> OpenBracket, n
96 | ']' -> CloseBracket, n
97 | '{' -> OpenBrace, n
98 | '}' -> CloseBrace, n
99 | '(' -> OpenParen, n
100 | ')' -> CloseParen, n
101 | '\'' -> SingleQuote, n
102 | '`' -> Backtick, n
103 | '~' -> accumulateSpliceUnquote n
104 | '^' -> Caret, n
105 | '@' -> At, n
106 | '"' -> accumulateString n
107 | ':' -> accumulateKeyword n
108 | ch when isDigit ch -> accumulateWhile isDigit Number p n
109 | ch when isTokenChar ch -> accumulateWhile isTokenChar Token p n
6c47cf67 110 | _ -> raise <| errUnexpected ()
327bd967
PS
111
112 let rec accumulate acc p =
113 match getToken p with
114 | EOF, p -> List.rev acc
115 | tok, p -> accumulate (tok::acc) p
116
117 accumulate [] 0