Backport from sid to buster
[hcoop/debian/mlton.git] / ide / pygments / sml_lexer / __init__.py
1 # -*- coding: utf-8 -*-
2 """
3 Standard ML Lexer for Pygments.
4 """
5
6 import re
7
8 from pygments.lexer import RegexLexer, bygroups
9 from pygments.token import *
10
11
12 __all__ = ['StandardMLLexer']
13
14
15 class StandardMLLexer(RegexLexer):
16 """
17 A Standard ML lexer.
18 """
19 name = 'Standard ML'
20 aliases = ['sml']
21 filenames = ['*.sml','*.sig','*.fun','*.ML']
22 mimetypes = ['text/x-standardml', 'application/x-standardml']
23
24 flags = re.DOTALL | re.MULTILINE
25
26 alphanumid_reserved = [
27 ## Core
28 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else',
29 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix',
30 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse',
31 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while',
32 ## Modules
33 'eqtype', 'functor', 'include', 'sharing', 'sig',
34 'signature', 'struct', 'structure', 'where'
35 ]
36 symbolicid_reserved = [
37 ## Core
38 ':', '|', '=', '=>', '->', '#',
39 ## Modules
40 ':>'
41 ]
42 nonid_reserved = [
43 ## Core
44 '(', ')', '[', ']', '{', '}', ',', ';', '...', '_'
45 ## Modules
46 ]
47
48 alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*"
49 symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+"
50 long_id_re = r"((%s\.)*)((%s)|(%s))" % (alphanumid_re, alphanumid_re, symbolicid_re)
51 primed_alphanumid_re = r"'[a-zA-Z0-9_']*"
52
53 def long_id_callback(self, match):
54 strids = match.group(1)
55 pos = 0
56 for m in re.finditer(r'(%s)(\.)' % self.alphanumid_re, strids) :
57 strid = m.group(1)
58 if strid in self.alphanumid_reserved :
59 token = Error
60 else :
61 token = Name
62 yield pos, token, strid
63 pos += len(strid)
64 dot = m.group(2)
65 yield pos, Punctuation, dot
66 pos += len(dot)
67 nqid = match.group(3)
68 if strids == "" :
69 if nqid in self.alphanumid_reserved :
70 token = Keyword
71 elif nqid in self.symbolicid_reserved :
72 token = Punctuation
73 else :
74 token = Name
75 else :
76 if nqid in self.alphanumid_reserved :
77 token = Error
78 elif nqid in self.symbolicid_reserved :
79 token = Error
80 else :
81 token = Name
82 yield pos, token, nqid
83 pos += len(nqid)
84
85 printable_re = r'[^\x00-\x1F"\\\x7F]'
86 escape_re = r'\\("|\\|a|b|t|n|v|f|r|^[@-_]|[0-9]{3}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})'
87
88 tokens = {
89 'root': [
90 (r'\s+', Whitespace),
91 (r'\(\*', Comment.Multiline, 'comment'),
92
93 (r'~?[0-9]+\.[0-9]+((e|E)~?[0-9]+)?', Number.Float),
94 (r'~?[0-9]+(e|E)~?[0-9]+', Number.Float),
95 (r'0wx[0-9a-fA-F]+', Number.Hex),
96 (r'~?0x[0-9a-fA-F]+', Number.Hex),
97 (r'0w[0-9]+', Number.Integer),
98 (r'~?[0-9]+', Number.Integer),
99
100 (r'"', String, 'string'),
101 (r'(#)(")', bygroups(Punctuation, String), 'string'),
102
103 (long_id_re, long_id_callback),
104 (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]), Punctuation),
105 (primed_alphanumid_re, Name),
106
107 (r'.', Error, 'error')
108 ],
109 'error': [
110 (r'.', Error)
111 ],
112 'comment': [
113 (r'\(\*', Comment.Multiline, '#push'),
114 (r'\*\)', Comment.Multiline, '#pop'),
115 (r'.', Comment.Multiline),
116 ],
117 'string': [
118 (printable_re, String),
119 (escape_re, String.Escape),
120 (r'\\\s', String, 'gap'),
121 (r'"', String, '#pop'),
122 (r'.', Error),
123 ],
124 'gap': [
125 (r'\s+', String),
126 (r'\\', String, '#pop'),
127 (r'.', Error),
128 ],
129 }