Common Lisp: Add documentation
[jackhill/mal.git] / fsharp / tokenizer.fs
1 module Tokenizer
2
3 open System
4 open Types
5
6 type Token =
7 | EOF
8 | OpenBracket | CloseBracket
9 | OpenBrace | CloseBrace
10 | OpenParen | CloseParen
11 | SingleQuote
12 | Backtick
13 | Tilde | SpliceUnquote
14 | Caret
15 | At
16 | String of string
17 | Token of string
18 | Keyword of string
19 | Number of string
20
21
22 let tokenize (str : string) =
23 let len = str.Length
24
25 let inline isWhiteSpace ch = ch = ',' || Char.IsWhiteSpace(ch)
26 let inline isNotNewline ch = ch <> '\r' && ch <> '\n'
27 let inline isDigit ch = Char.IsDigit(ch)
28 let inline isTokenChar ch =
29 match ch with
30 | '[' | ']' | '{' | '}' | '(' | ')'
31 | '\'' | '"' | '`' | ',' | ';' -> false
32 | ch when Char.IsWhiteSpace(ch) -> false
33 | _ -> true
34
35 let rec skipWhile pred p =
36 if p >= len then p
37 elif pred (str.[p]) then p + 1 |> skipWhile pred
38 else p
39
40 let rec accumulateWhile pred (f : string -> Token) start p =
41 if p >= len then str.Substring(start, p - start) |> f, p
42 elif pred (str.[p]) then p + 1 |> accumulateWhile pred f start
43 else str.Substring(start, p - start) |> f, p
44
45 let accumulateString p =
46 let b = System.Text.StringBuilder()
47 let rec accChar (ch : char) n =
48 b.Append(ch) |> ignore
49 accChars n
50 and accChars p =
51 let n = p + 1
52 if p >= len then raise <| Error.expectedXButEOF "'\"'"
53 match str.[p] with
54 | '\\' -> accEscaped n
55 | '"' -> n
56 | ch -> accChar ch n
57 and accEscaped p =
58 let n = p + 1
59 if p >= len then raise <| Error.expectedXButEOF "char"
60 match str.[p] with
61 | 't' -> accChar '\t' n
62 | 'b' -> accChar '\b' n
63 | 'n' -> accChar '\n' n
64 | 'r' -> accChar '\r' n
65 | 'f' -> accChar '\f' n
66 | '\'' -> accChar '\'' n
67 | '"' -> accChar '"' n
68 | '\\' -> accChar '\\' n
69 | _ -> raise <| Error.expectedXButEOF "valid escape char"
70 let n = accChars p
71 String(b.ToString()), n
72
73 let accumulateKeyword p =
74 let n = p + 1
75 if p >= len then raise <| Error.expectedXButEOF "keyword"
76 elif isTokenChar str.[p] then accumulateWhile isTokenChar Keyword p n
77 else raise <| Error.expectedX "keyword char"
78
79 let accumulateSpliceUnquote p =
80 if p >= len then Tilde, p
81 elif str.[p] = '@' then SpliceUnquote, (p + 1)
82 else Tilde, p
83
84 let rec getToken p =
85 if p >= len then
86 EOF, p
87 else
88 let n = p + 1
89 match str.[p] with
90 | ch when isWhiteSpace ch -> getToken n
91 | ';' -> skipWhile isNotNewline n |> getToken
92 | '[' -> OpenBracket, n
93 | ']' -> CloseBracket, n
94 | '{' -> OpenBrace, n
95 | '}' -> CloseBrace, n
96 | '(' -> OpenParen, n
97 | ')' -> CloseParen, n
98 | '\'' -> SingleQuote, n
99 | '`' -> Backtick, n
100 | '~' -> accumulateSpliceUnquote n
101 | '^' -> Caret, n
102 | '@' -> At, n
103 | '"' -> accumulateString n
104 | ':' -> accumulateKeyword n
105 | '-' when isDigit str.[n] -> accumulateWhile isDigit Number p n
106 | ch when isDigit ch -> accumulateWhile isDigit Number p n
107 | ch when isTokenChar ch -> accumulateWhile isTokenChar Token p n
108 | _ -> raise <| Error.unexpectedChar ()
109
110 let rec accumulate acc p =
111 match getToken p with
112 | EOF, p -> List.rev acc
113 | tok, p -> accumulate (tok::acc) p
114
115 accumulate [] 0