Commit | Line | Data |
---|---|---|
327bd967 PS |
1 | module Tokenizer |
2 | ||
3 | open System | |
8f1ee487 | 4 | open Types |
327bd967 | 5 | |
327bd967 PS |
6 | type Token = |
7 | | EOF | |
8 | | OpenBracket | CloseBracket | |
9 | | OpenBrace | CloseBrace | |
10 | | OpenParen | CloseParen | |
11 | | SingleQuote | |
12 | | Backtick | |
13 | | Tilde | SpliceUnquote | |
14 | | Caret | |
15 | | At | |
16 | | String of string | |
17 | | Token of string | |
18 | | Keyword of string | |
19 | | Number of string | |
20 | ||
6c47cf67 PS |
21 | let errExpectedButEOF tok = ReaderError(sprintf "expected %s, got EOF" tok) |
22 | let errExpected tok = ReaderError(sprintf "expected %s" tok) | |
23 | let errUnexpected () = ReaderError("unexpected char") | |
24 | ||
327bd967 PS |
25 | let tokenize (str : string) = |
26 | let len = str.Length | |
27 | ||
28 | let inline isWhiteSpace ch = ch = ',' || Char.IsWhiteSpace(ch) | |
29 | let inline isNotNewline ch = ch <> '\r' && ch <> '\n' | |
30 | let inline isDigit ch = Char.IsDigit(ch) | |
31 | let inline isTokenChar ch = | |
32 | match ch with | |
33 | | '[' | ']' | '{' | '}' | '(' | ')' | |
34 | | '\'' | '"' | '`' | ',' | ';' -> false | |
35 | | ch when Char.IsWhiteSpace(ch) -> false | |
36 | | _ -> true | |
37 | ||
38 | let rec skipWhile pred p = | |
39 | if p >= len then p | |
40 | elif pred (str.[p]) then p + 1 |> skipWhile pred | |
41 | else p | |
42 | ||
43 | let rec accumulateWhile pred (f : string -> Token) start p = | |
44 | if p >= len then str.Substring(start, p - start) |> f, p | |
45 | elif pred (str.[p]) then p + 1 |> accumulateWhile pred f start | |
46 | else str.Substring(start, p - start) |> f, p | |
47 | ||
48 | let accumulateString p = | |
49 | let b = System.Text.StringBuilder() | |
50 | let rec accChar (ch : char) n = | |
51 | b.Append(ch) |> ignore | |
52 | accChars n | |
53 | and accChars p = | |
54 | let n = p + 1 | |
6c47cf67 | 55 | if p >= len then raise <| errExpectedButEOF "'\"'" |
327bd967 PS |
56 | match str.[p] with |
57 | | '\\' -> accEscaped n | |
58 | | '"' -> n | |
59 | | ch -> accChar ch n | |
60 | and accEscaped p = | |
61 | let n = p + 1 | |
6c47cf67 | 62 | if p >= len then raise <| errExpectedButEOF "char" |
327bd967 PS |
63 | match str.[p] with |
64 | | 't' -> accChar '\t' n | |
65 | | 'b' -> accChar '\b' n | |
66 | | 'n' -> accChar '\n' n | |
67 | | 'r' -> accChar '\r' n | |
68 | | 'f' -> accChar '\f' n | |
69 | | '\'' -> accChar '\'' n | |
70 | | '"' -> accChar '"' n | |
71 | | '\\' -> accChar '\\' n | |
6c47cf67 | 72 | | _ -> raise <| errExpectedButEOF "valid escape char" |
327bd967 PS |
73 | let n = accChars p |
74 | String(b.ToString()), n | |
75 | ||
76 | let accumulateKeyword p = | |
77 | let n = p + 1 | |
6c47cf67 | 78 | if p >= len then raise <| errExpectedButEOF "keyword" |
327bd967 | 79 | elif isTokenChar str.[p] then accumulateWhile isTokenChar Keyword p n |
6c47cf67 | 80 | else raise <| errExpected "keyword char" |
327bd967 PS |
81 | |
82 | let accumulateSpliceUnquote p = | |
83 | if p >= len then Tilde, p | |
84 | elif str.[p] = '@' then SpliceUnquote, (p + 1) | |
85 | else Tilde, p | |
86 | ||
87 | let rec getToken p = | |
88 | if p >= len then | |
89 | EOF, p | |
90 | else | |
91 | let n = p + 1 | |
92 | match str.[p] with | |
93 | | ch when isWhiteSpace ch -> getToken n | |
94 | | ';' -> skipWhile isNotNewline n |> getToken | |
95 | | '[' -> OpenBracket, n | |
96 | | ']' -> CloseBracket, n | |
97 | | '{' -> OpenBrace, n | |
98 | | '}' -> CloseBrace, n | |
99 | | '(' -> OpenParen, n | |
100 | | ')' -> CloseParen, n | |
101 | | '\'' -> SingleQuote, n | |
102 | | '`' -> Backtick, n | |
103 | | '~' -> accumulateSpliceUnquote n | |
104 | | '^' -> Caret, n | |
105 | | '@' -> At, n | |
106 | | '"' -> accumulateString n | |
107 | | ':' -> accumulateKeyword n | |
108 | | ch when isDigit ch -> accumulateWhile isDigit Number p n | |
109 | | ch when isTokenChar ch -> accumulateWhile isTokenChar Token p n | |
6c47cf67 | 110 | | _ -> raise <| errUnexpected () |
327bd967 PS |
111 | |
112 | let rec accumulate acc p = | |
113 | match getToken p with | |
114 | | EOF, p -> List.rev acc | |
115 | | tok, p -> accumulate (tok::acc) p | |
116 | ||
117 | accumulate [] 0 |