| 1 | # -*- coding: utf-8 -*- |
| 2 | """ |
| 3 | Standard ML Lexer for Pygments. |
| 4 | """ |
| 5 | |
| 6 | import re |
| 7 | |
| 8 | from pygments.lexer import RegexLexer, bygroups |
| 9 | from pygments.token import * |
| 10 | |
| 11 | |
| 12 | __all__ = ['StandardMLLexer'] |
| 13 | |
| 14 | |
| 15 | class StandardMLLexer(RegexLexer): |
| 16 | """ |
| 17 | A Standard ML lexer. |
| 18 | """ |
| 19 | name = 'Standard ML' |
| 20 | aliases = ['sml'] |
| 21 | filenames = ['*.sml','*.sig','*.fun','*.ML'] |
| 22 | mimetypes = ['text/x-standardml', 'application/x-standardml'] |
| 23 | |
| 24 | flags = re.DOTALL | re.MULTILINE |
| 25 | |
| 26 | alphanumid_reserved = [ |
| 27 | ## Core |
| 28 | 'abstype', 'and', 'andalso', 'as', 'case', 'do', 'datatype', 'else', |
| 29 | 'end', 'exception', 'fn', 'fun', 'handle', 'if', 'in', 'infix', |
| 30 | 'infixr', 'let', 'local', 'nonfix', 'of', 'op', 'open', 'orelse', |
| 31 | 'raise', 'rec', 'then', 'type', 'val', 'with', 'withtype', 'while', |
| 32 | ## Modules |
| 33 | 'eqtype', 'functor', 'include', 'sharing', 'sig', |
| 34 | 'signature', 'struct', 'structure', 'where' |
| 35 | ] |
| 36 | symbolicid_reserved = [ |
| 37 | ## Core |
| 38 | ':', '|', '=', '=>', '->', '#', |
| 39 | ## Modules |
| 40 | ':>' |
| 41 | ] |
| 42 | nonid_reserved = [ |
| 43 | ## Core |
| 44 | '(', ')', '[', ']', '{', '}', ',', ';', '...', '_' |
| 45 | ## Modules |
| 46 | ] |
| 47 | |
| 48 | alphanumid_re = r"[a-zA-Z][a-zA-Z0-9_']*" |
| 49 | symbolicid_re = r"[!%&$#+\-/:<=>?@\\~`^|*]+" |
| 50 | long_id_re = r"((%s\.)*)((%s)|(%s))" % (alphanumid_re, alphanumid_re, symbolicid_re) |
| 51 | primed_alphanumid_re = r"'[a-zA-Z0-9_']*" |
| 52 | |
| 53 | def long_id_callback(self, match): |
| 54 | strids = match.group(1) |
| 55 | pos = 0 |
| 56 | for m in re.finditer(r'(%s)(\.)' % self.alphanumid_re, strids) : |
| 57 | strid = m.group(1) |
| 58 | if strid in self.alphanumid_reserved : |
| 59 | token = Error |
| 60 | else : |
| 61 | token = Name |
| 62 | yield pos, token, strid |
| 63 | pos += len(strid) |
| 64 | dot = m.group(2) |
| 65 | yield pos, Punctuation, dot |
| 66 | pos += len(dot) |
| 67 | nqid = match.group(3) |
| 68 | if strids == "" : |
| 69 | if nqid in self.alphanumid_reserved : |
| 70 | token = Keyword |
| 71 | elif nqid in self.symbolicid_reserved : |
| 72 | token = Punctuation |
| 73 | else : |
| 74 | token = Name |
| 75 | else : |
| 76 | if nqid in self.alphanumid_reserved : |
| 77 | token = Error |
| 78 | elif nqid in self.symbolicid_reserved : |
| 79 | token = Error |
| 80 | else : |
| 81 | token = Name |
| 82 | yield pos, token, nqid |
| 83 | pos += len(nqid) |
| 84 | |
| 85 | printable_re = r'[^\x00-\x1F"\\\x7F]' |
| 86 | escape_re = r'\\("|\\|a|b|t|n|v|f|r|^[@-_]|[0-9]{3}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})' |
| 87 | |
| 88 | tokens = { |
| 89 | 'root': [ |
| 90 | (r'\s+', Whitespace), |
| 91 | (r'\(\*', Comment.Multiline, 'comment'), |
| 92 | |
| 93 | (r'~?[0-9]+\.[0-9]+((e|E)~?[0-9]+)?', Number.Float), |
| 94 | (r'~?[0-9]+(e|E)~?[0-9]+', Number.Float), |
| 95 | (r'0wx[0-9a-fA-F]+', Number.Hex), |
| 96 | (r'~?0x[0-9a-fA-F]+', Number.Hex), |
| 97 | (r'0w[0-9]+', Number.Integer), |
| 98 | (r'~?[0-9]+', Number.Integer), |
| 99 | |
| 100 | (r'"', String, 'string'), |
| 101 | (r'(#)(")', bygroups(Punctuation, String), 'string'), |
| 102 | |
| 103 | (long_id_re, long_id_callback), |
| 104 | (r'(%s)' % '|'.join([re.escape(z) for z in nonid_reserved]), Punctuation), |
| 105 | (primed_alphanumid_re, Name), |
| 106 | |
| 107 | (r'.', Error, 'error') |
| 108 | ], |
| 109 | 'error': [ |
| 110 | (r'.', Error) |
| 111 | ], |
| 112 | 'comment': [ |
| 113 | (r'\(\*', Comment.Multiline, '#push'), |
| 114 | (r'\*\)', Comment.Multiline, '#pop'), |
| 115 | (r'.', Comment.Multiline), |
| 116 | ], |
| 117 | 'string': [ |
| 118 | (printable_re, String), |
| 119 | (escape_re, String.Escape), |
| 120 | (r'\\\s', String, 'gap'), |
| 121 | (r'"', String, '#pop'), |
| 122 | (r'.', Error), |
| 123 | ], |
| 124 | 'gap': [ |
| 125 | (r'\s+', String), |
| 126 | (r'\\', String, '#pop'), |
| 127 | (r'.', Error), |
| 128 | ], |
| 129 | } |