Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | # -*- coding: utf-8 -*- |
2 | """ | |
3 | Standard ML Lexer for Pygments. | |
4 | """ | |
5 | ||
6 | import re | |
7 | ||
8 | from pygments.lexer import RegexLexer, bygroups | |
9 | from pygments.token import * | |
10 | ||
11 | ||
12 | __all__ = ['StandardMLLexer'] | |
13 | ||
14 | ||
15 | class StandardMLLexer(RegexLexer): | |
16 | """ | |
17 | A Standard ML lexer. | |
18 | """ | |
19 | name = 'Standard ML' | |
20 | aliases = ['sml'] | |
21 | filenames = ['*.sml','*.sig','*.fun','*.ML'] | |
22 | mimetypes = ['text/x-standardml', 'application/x-standardml'] | |
23 | ||
24 | flags = re.DOTALL | re.MULTILINE | |
25 | ||
26 | alphanumid_reserved = [ | |
27 | ## Core | |
28 | 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', | |
29 | 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', | |
30 | 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', | |
31 | 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', | |
32 | ## Modules | |
33 | 'eqtype', 'functor', 'include', 'sharing', 'sig', | |
34 | 'signature', 'struct', 'structure', 'where' | |
35 | ] | |
36 | symbolicid_reserved = [ | |
37 | ## Core | |
38 | ':', '|', '=', '=>', '->', '#', | |
39 | ## Modules | |
40 | ':>' | |
41 | ] | |
42 | nonid_reserved = [ | |
43 | ## Core | |
44 | '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' | |
45 | ## Modules | |
46 | ] | |
47 | ||
48 | alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*" | |
49 | symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" | |
50 | long_id_re = r"((%s\.)*)((%s)|(%s))" % (alphanumid_re, alphanumid_re, symbolicid_re) | |
51 | primed_alphanumid_re = r"'[a-zA-Z0-9_']*" | |
52 | ||
53 | def long_id_callback(self, match): | |
54 | strids = match.group(1) | |
55 | pos = 0 | |
56 | for m in re.finditer(r'(%s)(\.)' % self.alphanumid_re, strids) : | |
57 | strid = m.group(1) | |
58 | if strid in self.alphanumid_reserved : | |
59 | token = Error | |
60 | else : | |
61 | token = Name | |
62 | yield pos, token, strid | |
63 | pos += len(strid) | |
64 | dot = m.group(2) | |
65 | yield pos, Punctuation, dot | |
66 | pos += len(dot) | |
67 | nqid = match.group(3) | |
68 | if strids == "" : | |
69 | if nqid in self.alphanumid_reserved : | |
70 | token = Keyword | |
71 | elif nqid in self.symbolicid_reserved : | |
72 | token = Punctuation | |
73 | else : | |
74 | token = Name | |
75 | else : | |
76 | if nqid in self.alphanumid_reserved : | |
77 | token = Error | |
78 | elif nqid in self.symbolicid_reserved : | |
79 | token = Error | |
80 | else : | |
81 | token = Name | |
82 | yield pos, token, nqid | |
83 | pos += len(nqid) | |
84 | ||
85 | printable_re = r'[^\x00-\x1F"\\\x7F]' | |
86 | escape_re = r'\\("|\\|a|b|t|n|v|f|r|^[@-_]|[0-9]{3}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})' | |
87 | ||
88 | tokens = { | |
89 | 'root': [ | |
90 | (r'\s+', Whitespace), | |
91 | (r'\(\*', Comment.Multiline, 'comment'), | |
92 | ||
93 | (r'~?[0-9]+\.[0-9]+((e|E)~?[0-9]+)?', Number.Float), | |
94 | (r'~?[0-9]+(e|E)~?[0-9]+', Number.Float), | |
95 | (r'0wx[0-9a-fA-F]+', Number.Hex), | |
96 | (r'~?0x[0-9a-fA-F]+', Number.Hex), | |
97 | (r'0w[0-9]+', Number.Integer), | |
98 | (r'~?[0-9]+', Number.Integer), | |
99 | ||
100 | (r'"', String, 'string'), | |
101 | (r'(#)(")', bygroups(Punctuation, String), 'string'), | |
102 | ||
103 | (long_id_re, long_id_callback), | |
104 | (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]), Punctuation), | |
105 | (primed_alphanumid_re, Name), | |
106 | ||
107 | (r'.', Error, 'error') | |
108 | ], | |
109 | 'error': [ | |
110 | (r'.', Error) | |
111 | ], | |
112 | 'comment': [ | |
113 | (r'\(\*', Comment.Multiline, '#push'), | |
114 | (r'\*\)', Comment.Multiline, '#pop'), | |
115 | (r'.', Comment.Multiline), | |
116 | ], | |
117 | 'string': [ | |
118 | (printable_re, String), | |
119 | (escape_re, String.Escape), | |
120 | (r'\\\s', String, 'gap'), | |
121 | (r'"', String, '#pop'), | |
122 | (r'.', Error), | |
123 | ], | |
124 | 'gap': [ | |
125 | (r'\s+', String), | |
126 | (r'\\', String, '#pop'), | |
127 | (r'.', Error), | |
128 | ], | |
129 | } |