1 (* Copyright (C
) 2009 Matthew Fluet
.
2 * Copyright (C
) 1999-2006 Henry Cejtin
, Matthew Fluet
, Suresh
3 * Jagannathan
, and Stephen Weeks
.
5 * MLton is released under a BSD
-style license
.
6 * See the file MLton
-LICENSE for details
.
9 (* Many
of the algorithms
in this module are based on
10 * Compilers
: Principles
, Techniques
, and Tools by Aho
, Sethi
, and Ullman
,
11 * which I will refer to
in comments
as the Dragon Book
.
14 fun ++ (r
: int ref
): int =
21 val numChars
: int = Char.maxOrd
+ 1
24 val validCharsString
=
25 "\n\t@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ()[]<>!?-&#;'/=\"$.\\"
28 Vector.tabulate (numChars
, fn i
=>
29 String.contains (validCharsString
, Char.fromInt i
))
31 fun edgeLabel (cs
: char list
): string =
33 val chars
= implode cs
34 val n
= String.size chars
35 val numValidChars
= String.size validCharsString
40 (if n
>= Int.quot (numValidChars
, 2)
41 then (* Character complement
. *)
45 (validCharsString
, fn c
=>
49 else if (1 = String.size chars
50 andalso not (String.contains
51 (". ", String.sub (chars
, 0))))
53 else concat
["[", chars
, "]"])
56 Trace
.trace ("Regexp.edgeLabel", List.layout
Char.layout
, String.layout
)
62 datatype t
= T
of {index
: int ref
}
64 fun layout (T
{index
, ...}) =
68 seq
[str
"Save ", Int.layout (!index
)]
71 fun new () = T
{index
= ref ~
1}
73 fun equals (T
{index
= i
, ...}, T
{index
= i
', ...}) = i
= i
'
75 fun assign (T
{index
, ...}, i
) = index
:= i
77 fun index (T
{index
, ...}) = !index
79 val index
= Trace
.trace ("Regexp.Save.index", layout
, Int.layout
) index
87 | CharSet
of char
-> bool
93 fun layout (r
: t
): Layout
.t
=
98 AnchorFinish
=> str
"AnchorFinish"
99 | AnchorStart
=> str
"AnchorStart"
102 str (edgeLabel (Int.foldDown
103 (0, numChars
, [], fn (i
, ac
) =>
105 val c
= Char.fromInt i
112 | Or rs
=> seq
[str
"Or ", List.layout layout rs
]
113 | Seq rs
=> seq
[str
"Seq ", List.layout layout rs
]
114 |
Save (r
, s
) => seq
[str
"Save ",
115 Layout
.tuple
[layout r
, Save
.layout s
]]
116 | Star r
=> seq
[str
"Star ", layout r
]
119 val toString
= Layout
.toString
o layout
126 val clear
: 'a t
-> unit
127 val foreach
: 'a t
* ('a
-> unit
) -> unit
128 val new
: int * 'a
-> 'a t
129 val peekMap
: 'a t
* ('a
->'b option
) -> 'b option
130 val push
: 'a t
* 'a
-> unit
133 datatype 'a t
= T
of {elts
: 'a array
,
136 fun new (size
: int, dummy
: 'a
): 'a t
=
137 T
{elts
= Array
.new (size
, dummy
),
140 fun push (T
{elts
, size
}, x
) =
143 val _
= Array
.update (elts
, n
, x
)
144 val _
= size
:= n
+ 1
148 fun clear (T
{size
, ...}) = size
:= 0
150 fun foreach (T
{elts
, size
, ...}, f
) =
151 Int.for (0, !size
, fn i
=> f (Array
.sub (elts
, i
)))
153 fun peekMap (T
{elts
, size
, ...}, f
) =
159 else (case f (Array
.sub (elts
, i
)) of
168 (* The states
in an NFA are indexed from
0 to n
-1, where n is the number
175 val layout
= Int.layout
178 structure MatchAction
=
185 fn (Finish s
, Finish s
') => Save
.equals (s
, s
')
186 |
(Start s
, Start s
') => Save
.equals (s
, s
')
194 Finish s
=> seq
[str
"Finish ", Save
.layout s
]
195 | Start s
=> seq
[str
"Start ", Save
.layout s
]
201 datatype t
= T
of {all
: Substring
.t
,
202 matches
: (Save
.t
* Substring
.t
) array
}
204 fun all (T
{all
, ...}) = all
206 val startLength
= #
2 o Substring
.base
o all
208 val endOf
= Substring
.endOf
o all
210 val length
= Substring
.length
o all
212 fun layout (T
{all
, matches
}) =
214 in tuple
[Substring
.layout all
,
215 Array
.layout (Layout
.tuple2
216 (Save
.layout
, Substring
.layout
)) matches
]
219 fun funs (T
{matches
, ...}) =
221 fun peek (s
: Save
.t
): Substring
.t option
=
222 Option
.map (Array
.peek (matches
, fn (s
', _
) =>
223 Save
.equals (s
, s
')),
225 in {exists
= Option
.isSome
o peek
,
226 lookup
= valOf
o peek
,
232 val {peek
, lookup
, exists
} = funs m
235 lookup
= Substring
.toString
o lookup
,
236 peek
= fn s
=> Option
.map (peek s
, Substring
.toString
)}
240 fun make
sel (m
, s
) = sel (funs m
) s
242 val peek
= make #peek
243 val lookup
= make #lookup
244 val exists
= make #exists
247 fun peekString (m
, s
) = Option
.map (peek (m
, s
), Substring
.toString
)
248 val lookupString
= Substring
.toString
o lookup
253 datatype t
= T
of (int * MatchAction
.t vector
) list
256 List.layout (Layout
.tuple2 (Int.layout
,
257 Vector.layout MatchAction
.layout
))
262 fun add (a
as T l
, i
, v
: MatchAction
.t vector
) =
270 structure State
= State
271 (* State i is final iff
isSome (Array
.sub (final
, i
)).
272 * Characters are grouped into equivalence classes
, represented by
273 * integers
in [0, numCharClasses
).
274 * The equivalence class
of c is Array
.sub (charClass
, Char.toInt c
).
275 * The dimensions
of next is numStates x numCharClasses
.
276 * The outgoing states from state i on input char c are given by
277 * Array2
.sub (next
, i
, Array
.sub (charClass
, Char.to
Int c
)).
278 * seen
, stack1
, and stack2 are used
in the two stack simulation
of
279 * the
NFA (see
fun match
). We preallocate them
as part
of the NFA
280 * so they don
't have to be allocated on each call to match
.
283 T
of {anchorStarts
: (State
.t
* MatchAction
.t vector
) vector
,
284 charClass
: int array
, (* of length numChars
*)
285 final
: {actions
: MatchAction
.t vector
,
286 requireFinish
: bool} option array
,
287 next
: (State
.t
* MatchAction
.t vector
) array Array2
.t
,
288 saves
: Save
.t vector
,
290 stack1
: (State
.t
* Actions
.t
) Stack
.t
,
291 stack2
: (State
.t
* Actions
.t
) Stack
.t
,
295 (* Non
-deterministic Finite Automaton
. *)
302 val layout
: t
-> Layout
.t
305 datatype t
= datatype NFA
.t
307 val fromRegexp
: Regexp
.t
-> t
308 val layoutDot
: t
* string (* title
*) -> Layout
.t
312 startPos
: int} -> (int * Actions
.t
) option
313 val numCharClasses
: t
-> int
314 val numStates
: t
-> int
315 val saves
: t
-> Save
.t vector
320 fun numStates (T
{next
, ...}) = Array2
.nRows next
321 fun numCharClasses (T
{next
, ...}) = Array2
.nCols next
322 fun saves (T
{saves
, ...}) = saves
324 (* Simulating an NFA
with two stacks
and a bit vector
, as in Algorithm
325 * 3.4 (page
126) of the Dragon Book
.
327 fun match
{nfa
as T
{anchorStarts
, charClass
, final
,
328 next
, stack1
, stack2
, start
, ...},
331 startPos
}: (int * Actions
.t
) option
=
333 val numStates
= numStates nfa
334 val n
= String.size s
335 val seen
= Array
.array (numStates
, false)
336 fun loop (current
, nextStates
, i
: int,
337 last
: (int * Actions
.t
) option
)
338 : (int * Actions
.t
) option
=
342 (current
, fn (s
, a
) =>
343 case Array
.sub (final
, s
) of
345 | SOME
{actions
, requireFinish
} =>
346 if requireFinish
andalso i
< n
348 else SOME (i
, Actions
.add (a
, i
, actions
)))) of
354 orelse (short
andalso isSome last
)
355 then (Stack
.clear current
359 val _
= Array
.modify (seen
, fn _
=> false)
360 val c
= Array
.sub (charClass
,
361 Char.toInt (String.sub (s
, i
)))
364 (current
, fn (s
, a
) =>
366 (Array2
.sub (next
, s
, c
),
368 if Array
.sub (seen
, s
')
370 else (Array
.update (seen
, s
', true)
373 (s
', Actions
.add (a
, i
, v
)))))))
374 val _
= Stack
.clear current
375 in loop (nextStates
, current
, i
+ 1, last
)
378 val _
= Stack
.push (stack1
, (start
, Actions
.empty
))
382 (anchorStarts
, fn (s
, v
) =>
385 (s
, Actions
.add (Actions
.empty
, startPos
, v
)))))
388 loop (stack1
, stack2
, startPos
, NONE
)
391 (* This conversion from a regular expression to an NFA is based on
392 * Section
3.9 (pages
134 -- 140) of the Dragon Book
.
394 * It creates one NFA state for each
CharSet (called a
"position") that
395 * is
in the regexp
. There is also one extra state for the start state
.
396 * It adds edges
as in rules
1 and 2 (page
138) for the followpos
399 fun fromRegexp (r
: Regexp
.t
): t
=
401 fun loop (r
, ac
as (saves
, numPos
)) =
406 AnchorFinish
=> (saves
, numPos
+ 1)
407 | AnchorStart
=> (saves
, numPos
+ 1)
408 | CharSet _
=> (saves
, numPos
+ 1)
409 | Or rs
=> List.fold (rs
, ac
, loop
)
410 |
Save (r
, s
) => loop (r
, (s
:: saves
, numPos
))
411 | Seq rs
=> List.fold (rs
, ac
, loop
)
412 | Star r
=> loop (r
, ac
)
414 val (saves
, numPos
) = loop (r
, ([], 0))
415 val saves
= Vector.fromList saves
416 val numStates
= numPos
+ 1
418 val posCounter
= ref ~
1
419 val follow
: MatchAction
.t vector option Array2
.t
=
420 Array2
.new (numStates
, numStates
, NONE
)
421 val posChars
= Array2
.tabulate (numPos
, numChars
, fn _
=> false)
423 (* Sets
of positions represented
as vectors
of length numPos
.
425 datatype t
= T
of MatchAction
.t vector option vector
428 fun lookup (T v
, s
) = Vector.sub (v
, s
)
429 val empty
: t
= T (Vector.new (numPos
, NONE
))
430 fun addActions (T v
, a
) =
433 Option
.map (opt
, fn a
' => Vector.concat
[a
, a
'])))
434 fun addAction (s
, a
) = addActions (s
, Vector.new1 a
)
435 fun union (T v
, T v
'): t
=
438 case (Vector.sub (v
, i
), Vector.sub (v
', i
)) of
441 | _
=> Error
.bug
"Regexp.NFA.fromRegexp.union"))
442 fun singleton (i
: int): t
=
443 T (Vector.tabulate (numPos
, fn j
=>
445 then SOME (Vector.new0 ())
447 fun foreach (T v
, f
) =
448 Vector.foreachi (v
, fn (i
, opt
) =>
451 | SOME a
=> f (i
, a
))
453 fun connect (v
, v
') =
458 Array2
.update (follow
, s
, s
',
459 SOME (Vector.concat
[a
, a
']))))
460 val anchorFinishes
= ref
[]
461 val anchorStarts
= ref
[]
464 val i
= ++ posCounter
465 val _
= List.push (r
, i
)
466 val first
= singleton i
472 (* The following loop fills
in follow
and posChars
.
473 * first set
of positions that
474 * nullable is SOME v iff the regexp is nullable
, where v is the
475 * sequence
of actions to perform
if the expression is null
.
477 fun loop (r
: Regexp
.t
): {first
: set
,
479 nullable
: MatchAction
.t vector option
} =
481 Regexp
.AnchorFinish
=> anchor anchorFinishes
482 | Regexp
.AnchorStart
=> anchor anchorStarts
483 | Regexp
.CharSet f
=>
485 val i
= ++ posCounter
488 (0, numChars
, fn c
=>
490 then Array2
.update (posChars
, i
, c
, true)
492 val first
= singleton i
502 fn (r
, {first
= f
, last
= l
, nullable
= n
}) =>
504 val {first
= f
', last
= l
', nullable
= n
'} =
507 {first
= union (f
, f
'),
508 last
= union (l
, l
'),
509 nullable
= if isSome n
then n
else n
'}
511 | Regexp
.Save (r
, s
) =>
513 val {first
= f
, last
= l
, nullable
= n
} = loop r
514 val start
= MatchAction
.Start s
515 val finish
= MatchAction
.Finish s
517 {first
= addAction (f
, start
),
518 last
= addAction (l
, finish
),
519 nullable
= Option
.map (n
, fn v
=>
523 Vector.new1 finish
])}
529 nullable
= SOME (Vector.new0 ())},
530 fn (r
, {first
= f
, last
= l
, nullable
= n
}) =>
532 val {first
= f
', last
= l
', nullable
= n
'} =
534 val _
= connect (l
, f
')
538 | SOME v
=> union (f
, addActions (f
', v
))
542 | SOME v
=> union (l
', addActions (l
, v
))
546 nullable
= (case (n
, n
') of
548 SOME (Vector.concat
[v
, v
'])
553 val {first
= f
, last
= l
, ...} = loop r
554 val _
= connect (l
, f
)
556 {first
= f
, last
= l
,
557 nullable
= SOME (Vector.new0 ())}
559 val {first
, last
, nullable
} = loop r
561 fun extract (anchors
, positions
) =
565 Option
.map (lookup (positions
, s
), fn v
=> (s
, v
))),
568 (* Any anchor starts
in first should be anchor starts
. *)
569 val anchorStarts
= extract (anchorStarts
, first
)
570 (* Any anchor finishes
in last should be anchor finishes
*)
571 val anchorFinishes
= extract (anchorFinishes
, last
)
573 (* The positions
in first are reachable from the start state
. *)
574 val _
= foreach (first
, fn (i
, a
) =>
575 Array2
.update (follow
, start
, i
, SOME a
))
576 val final
= Array
.array (numStates
, NONE
)
577 (* The positions that are followed by an anchorFinish are final
,
578 * with requireFinish
= true.
582 (anchorFinishes
, fn (j
, _
) =>
584 (0, numStates
, fn i
=>
585 case Array2
.sub (follow
, i
, j
) of
588 Array
.update (final
, i
, SOME
{actions
= a
,
589 requireFinish
= true})))
590 (* The positions
in last are all final
. *)
592 foreach (last
, fn (i
, a
) =>
593 Array
.update (final
, i
, SOME
{actions
= a
,
594 requireFinish
= false}))
595 (* The start state is final iff the whole regexp is nullable
. *)
600 Array
.update (final
, start
,
602 requireFinish
= false})
603 (* Compute the transition table
, "next". *)
604 val tmp
: MatchAction
.t vector option Array
.t
=
605 Array
.new (numStates
, NONE
)
608 (numStates
, numChars
, fn (i
, c
) =>
613 case Array2
.sub (follow
, i
, j
) of
616 if Array2
.sub (posChars
, j
, c
)
617 then Array
.update (tmp
, j
, SOME a
)
620 Array
.keepAllMapi (tmp
, fn (i
, opt
) =>
621 Option
.map (opt
, fn v
=> (i
, v
)))
622 val _
= Int.for (0, numStates
, fn j
=>
623 Array
.update (tmp
, j
, NONE
))
627 (* Two characters are equivalent
if all states treat them the
630 fun charEquiv (c
: int, c
': int) =
632 (0, numStates
, fn i
=>
634 (Array2
.sub (next
, i
, c
),
635 Array2
.sub (next
, i
, c
'),
636 fn ((j
, v
), (j
', v
')) =>
637 j
= j
' andalso Vector.equals (v
, v
', MatchAction
.equals
)))
638 (* Compute charClass
. *)
639 val repCounter
= ref ~
1
640 val reps
= ref
[] (* representative
of each char class
*)
641 val charClass
= Array
.new (numChars
, ~
1)
643 Int.for (0, numChars
, fn c
=>
646 case List.peek (!reps
, fn {char
, ...} =>
647 charEquiv (c
, char
)) of
650 val rep
= ++ repCounter
651 in List.push (reps
, {char
= c
, rep
= rep
})
654 | SOME
{rep
, ...} => rep
655 in Array
.update (charClass
, c
, rep
)
657 val numClasses
= 1 + !repCounter
658 (* Compute
"next" for the charClasses
. *)
660 Array2
.new (numStates
, numClasses
, Array
.fromList
[])
663 (!reps
, fn {char
, rep
} =>
664 Int.for (0, numStates
, fn state
=>
665 Array2
.update (next
', state
, rep
,
666 Array2
.sub (next
, state
, char
))))
668 T
{anchorStarts
= anchorStarts
,
669 charClass
= charClass
,
673 seen
= Array
.new (numStates
, false),
674 stack1
= Stack
.new (numStates
, (~
1, Actions
.empty
)),
675 stack2
= Stack
.new (numStates
, (~
1, Actions
.empty
)),
679 structure Graph
= DirectedGraph
680 fun layoutDot (T
{anchorStarts
, charClass
, final
, next
, start
, ...},
681 title
: string): Layout
.t
=
683 val numStates
= Array2
.nRows next
686 val nodes
= Vector.tabulate (numStates
, fn _
=> Graph
.newNode g
)
687 fun node i
= Vector.sub (nodes
, i
)
688 val {get
= nodeOptions
, ...} =
689 Property
.get (Graph
.Node
.plist
,
691 (fn _
=> let open NodeOption
694 val {get
= edgeOptions
, ...} =
695 Property
.get (Graph
.Edge
.plist
,
697 (fn _
=> let open EdgeOption
700 fun addNodeOption (i
, opts
) =
701 let val r
= nodeOptions (node i
)
704 val _
= addNodeOption (start
, [NodeOption
.label
"start"])
707 (0, numStates
, fn src
=>
710 case (isSome (Array
.sub (final
, src
)),
711 Vector.exists (anchorStarts
, fn (s
, _
) =>
713 (false, false) => Ellipse
714 |
(true, false) => Box
715 |
(false, true) => Diamond
716 |
(true, true) => Polygon
{sides
= 5, options
= []}
718 addNodeOption (src
, let open NodeOption
721 val dsts
= Array
.new (numStates
, [])
724 (0, numChars
, fn c
=>
725 if Vector.sub (validChars
, c
)
728 val char
= Char.fromInt c
729 val class
= Array
.sub (charClass
, c
)
731 (Array2
.sub (next
, src
, class
), fn (dst
, _
) =>
732 (Array
.update (dsts
, dst
,
733 char
:: Array
.sub (dsts
, dst
))))
738 (dsts
, fn (dst
, cs
) =>
743 val edge
= Graph
.addEdge (g
, {from
= node src
,
745 in List.push (edgeOptions edge
,
746 EdgeOption
.label (edgeLabel cs
))
750 Graph
.layoutDot (g
, fn {nodeName
} =>
756 Rank (Min
, [{nodeName
= nodeName (node start
)}])
759 edgeOptions
= ! o edgeOptions
,
760 nodeOptions
= ! o nodeOptions
})
768 val fromNFA
: NFA
.t
-> t
769 val layoutDot
: {dfa
: t
,
771 title
: string} -> Layout
.t
776 anchorStart
: bool} -> (int * Actions
.t
) option
778 val saves
: t
-> Save
.t vector
781 (* The states
in a DFA are indexed from
0 to n
-1, where n is the number
788 val layout
= Int.layout
793 structure EdgeAction
=
798 actions
: MatchAction
.t vector
}
800 actions
: MatchAction
.t vector
}
803 fn (Add
{from
= f
, to
= t
, actions
= a
},
804 Add
{from
= f
', to
= t
', actions
= a
'}) =>
805 f
= f
' andalso t
= t
'
806 andalso Vector.equals (a
, a
', MatchAction
.equals
)
807 |
(Init
{to
= t
, actions
= a
},
808 Init
{to
= t
', actions
= a
'}) =>
809 t
= t
' andalso Vector.equals (a
, a
', MatchAction
.equals
)
813 fn Add
{from
, to
, actions
} =>
815 Int.toString from
, ", ",
816 Int.toString to
, ", ",
818 (Vector.layout MatchAction
.layout actions
),
820 | Init
{to
, actions
} =>
822 Int.toString to
, ", ",
824 (Vector.layout MatchAction
.layout actions
),
830 fn Add
{from
, to
, actions
} =>
832 [("from", Int.layout from
),
833 ("to", Int.layout to
),
835 Vector.layout MatchAction
.layout actions
)]
836 | Init
{actions
, to
} =>
838 [("to", Int.layout to
),
840 Vector.layout MatchAction
.layout actions
)]
844 (* State i is final iff Array
.sub (final
, i
).
845 * Characters are grouped into equivalence classes
, represented by
846 * integers
in [0, numCharClasses
).
847 * The equivalence class
of c is Array
.sub (charClass
, Char.toInt c
).
848 * The dimensions
of next are numStates x numCharClasses
849 * The outgoing state from state i on input char c is
850 * Array2
.sub (next
, i
, Array
.sub (charClass
, Char.toInt c
)).
851 * actions1
and actions2 are used only during matching
. They
852 * represent the actions associated
with each NFA state
. They are
of
853 * the same length
as the number
of states
in the NFA
.
856 T
of {anchorStart
: State
.t
,
857 anchorStartStack
: MatchAction
.t vector vector
,
858 charClass
: int array
, (* of length numChars
*)
860 final
: {actions
: MatchAction
.t vector
,
862 slot
: int} option array
,
863 next
: (State
.t
* EdgeAction
.t vector
) Array2
.t
,
864 saves
: Save
.t vector
,
865 stack1
: Actions
.t array
, (* of size maxNumNFAStates
*)
866 stack2
: Actions
.t array
, (* of size maxNumNFAStates
*)
868 startStack
: MatchAction
.t vector vector
}
870 fun numStates (T
{next
, ...}): int = Array2
.nRows next
871 fun saves (T
{saves
, ...}) = saves
873 fun dead (numStates
, numCharClasses
, final
, next
) =
876 not (isSome (Array
.sub (final
, i
)))
877 andalso Int.forall (0, numCharClasses
, fn c
=>
878 let val (j
, v
) = Array2
.sub (next
, i
, c
)
879 in i
= j
andalso Vector.isEmpty v
882 (* To build a DFA from an NFA
, I use the usual
"subset construction",
883 * as in algorithm
3.2 (page
118) of the Dragon Book
.
885 * It associates
each (reachable
) set
of states
in the NFA
with a single
888 fun fromNFA (nfa
as NFA
.T
{anchorStarts
, charClass
,
889 final
, next
, saves
, start
, ...}) =
891 val numNFAStates
= NFA
.numStates nfa
892 val numCharClasses
= NFA
.numCharClasses nfa
893 (* Determine the NFA states that have save info
.
895 val nfaStateSave
= Array
.array (numNFAStates
, false)
896 fun visit (s
: NFA
.State
.t
): unit
=
897 if Array
.sub (nfaStateSave
, s
)
899 else (Array
.update (nfaStateSave
, s
, true)
900 ; Int.for (0, numCharClasses
, fn c
=>
902 (Array2
.sub (next
, s
, c
), fn (s
', _
) =>
906 (anchorStarts
, fn (s
, v
) =>
911 Int.for (0, numNFAStates
, fn s
=>
912 if Array
.sub (nfaStateSave
, s
)
915 Int.for (0, numCharClasses
, fn c
=>
917 (Array2
.sub (next
, s
, c
), fn (s
', v
) =>
921 (* Sets
of states are represented
as arrays
, sorted
in increasing
922 * order
of state index
.
924 type states
= NFA
.State
.t array
929 out
: (State
.t
* EdgeAction
.t vector
) vector option ref
}
930 val cache
: work list ref
= ref
[]
931 val todo
: work list ref
= ref
[]
932 val maxNumStates
: int ref
= ref
0
933 fun statesToState (ss
: states
): State
.t
=
935 val n
= Array
.length ss
936 val _
= if n
> !maxNumStates
937 then maxNumStates
:= n
940 case List.peek (!cache
, fn {states
, ...} =>
941 Array
.equals (ss
, states
, op =)) of
944 val state
= ++ counter
945 val work
= {out
= ref NONE
,
948 val _
= List.push (cache
, work
)
949 val _
= List.push (todo
, work
)
953 | SOME
{state
, ...} => state
956 Trace
.trace ("Regexp.DFA.fromNFA.statesToState",
957 Array
.layout NFA
.State
.layout
,
961 val seen
= Array
.array (NFA
.numStates nfa
, NONE
)
963 fun computeOut states
=
965 (numCharClasses
, fn c
=>
967 val _
= Array
.modify (seen
, fn _
=> NONE
)
970 (states
, fn (fromSlot
: slot
,
971 fromState
: NFA
.State
.t
) =>
973 (Array2
.sub (next
, fromState
, c
),
974 fn (toState
: NFA
.State
.t
, v
) =>
975 case Array
.sub (seen
, toState
) of
979 SOME
{fromSlot
= fromSlot
,
980 fromState
= fromState
,
984 val toStates
= Array
.keepAllMap (seen
, fn opt
=> opt
)
985 val edgeActions
= ref
[]
988 (toStates
, fn (toSlot
: slot
,
989 {fromSlot
, fromState
, toState
,
991 (if Array
.sub (nfaStateSave
, toState
)
995 if Array
.sub (nfaStateSave
, fromState
)
1001 else (EdgeAction
.Init
1003 actions
= actions
}))
1006 in (statesToState toStates
,
1007 Vector.fromList (!edgeActions
))
1013 |
{states
, out
, ...} :: rest
=>
1015 ; out
:= SOME (computeOut states
)
1017 (* These calls to statesToState initialize the worklist
. *)
1018 val start
' = statesToState (Array
.fromList
[start
])
1019 val startStack
= Vector.new1 (Vector.new0 ())
1020 val anchorStartStates
=
1023 (Vector.toListMap (anchorStarts
, #
1), start
, op <=))
1024 val anchorStart
' = statesToState anchorStartStates
1025 val anchorStartStack
=
1027 (Array
.length anchorStartStates
,
1030 val s
= Array
.sub (anchorStartStates
, i
)
1032 case Vector.peek (anchorStarts
, fn (s
', _
) => s
= s
') of
1033 NONE
=> Vector.new0 ()
1037 (* The worklist is empty
. Compute the transition table
. *)
1038 val numStates
= 1 + !counter
1039 val next
' = Array2
.new (numStates
, numCharClasses
,
1040 (~
1, Vector.new0 ()))
1041 val final
' = Array
.new (numStates
, NONE
)
1044 (!cache
, fn {states
, state
= i
, out
, ...}: work
=>
1048 (valOf (! out
), fn (c
, j
) =>
1049 Array2
.update (next
', i
, c
, j
))
1051 case Array
.sub (final
', i
) of
1052 SOME
{requireFinish
= false, ...} => ()
1054 case Array
.peekMapi (states
, fn s
=>
1055 Array
.sub (final
, s
)) of
1057 |
SOME (slot
, {actions
, requireFinish
}) =>
1060 SOME
{actions
= actions
,
1061 requireFinish
= requireFinish
,
1066 fun newStack () = Array
.new (!maxNumStates
, Actions
.empty
)
1067 in T
{anchorStart
= anchorStart
',
1068 anchorStartStack
= anchorStartStack
,
1069 charClass
= charClass
,
1070 dead
= dead (numStates
, numCharClasses
, final
', next
'),
1074 stack1
= newStack (),
1075 stack2
= newStack (),
1077 startStack
= startStack
}
1081 * match could be sped up some by doing the match
in two passes
.
1082 * The first pass just determines
if the match will succeed
.
1083 * The second pass computes all the edge actions
.
1085 fun match
{dfa
= T
{anchorStart
= ancSt
, anchorStartStack
,
1086 charClass
, dead
, final
, next
, stack1
, stack2
,
1087 start
, startStack
, ...},
1091 anchorStart
: bool}: (int * Actions
.t
) option
=
1093 val n
= String.size s
1097 last
: (int * Actions
.t
) option
)
1098 : (int * Actions
.t
) option
=
1101 case Array
.sub (final
, state
) of
1103 | SOME
{actions
, requireFinish
, slot
} =>
1104 if requireFinish
andalso i
< n
1107 SOME (i
, Actions
.add (Array
.sub (stack1
, slot
),
1110 if Array
.sub (dead
, state
)
1112 orelse (short
andalso isSome last
)
1116 val (state
, edgeActions
) =
1117 Array2
.sub (next
, state
,
1120 Char.toInt (String.sub (s
, i
))))
1124 fn EdgeAction
.Add
{from
, to
, actions
} =>
1127 Actions
.add (Array
.sub (stack1
, from
),
1129 | EdgeAction
.Init
{to
, actions
} =>
1132 Actions
.add (Actions
.empty
, i
, actions
)))
1134 loop (i
+ 1, state
, stack2
, stack1
, last
)
1137 val (state
, initStack
) =
1139 then (ancSt
, anchorStartStack
)
1140 else (start
, startStack
)
1143 (initStack
, fn (slot
, v
) =>
1144 Array
.update (stack1
, slot
,
1145 Actions
.add (Actions
.empty
, startPos
, v
)))
1146 val res
= loop (startPos
, state
, stack1
, stack2
, NONE
)
1152 Trace
.trace ("Regexp.DFA.match",
1153 fn {string, startPos
, ...} =>
1154 Layout
.tuple
[String.layout
string,
1155 Int.layout startPos
],
1156 Option
.layout (Layout
.tuple2
1157 (Int.layout
, Actions
.layout
)))
1160 structure Graph
= DirectedGraph
1161 structure Env
= Env (structure Domain
= MonoVector (EdgeAction
))
1162 fun layoutDot
{dfa
as T
{anchorStart
, charClass
, dead
, final
,
1165 showDead
: bool}: Layout
.t
=
1167 val numStates
= numStates dfa
1169 val g
= Graph
.new ()
1170 val nodes
= Vector.tabulate (numStates
, fn _
=> Graph
.newNode g
)
1171 fun node i
= Vector.sub (nodes
, i
)
1172 val {get
= nodeOptions
, ...} =
1173 Property
.get (Graph
.Node
.plist
,
1175 (fn _
=> let open NodeOption
1178 val {get
= edgeOptions
, ...} =
1179 Property
.get (Graph
.Edge
.plist
,
1181 (fn _
=> let open EdgeOption
1184 fun addNodeOption (i
, opts
) =
1185 let val r
= nodeOptions (node i
)
1188 val _
= addNodeOption (start
, [NodeOption
.label
"start"])
1191 (0, numStates
, fn src
=>
1194 case (isSome (Array
.sub (final
, src
)),
1195 src
= anchorStart
) of
1196 (false, false) => Ellipse
1197 |
(true, false) => Box
1198 |
(false, true) => Diamond
1199 |
(true, true) => Polygon
{sides
= 5, options
= []}
1201 addNodeOption (src
, let open NodeOption
1204 val dsts
= Array
.new (numStates
, Env
.empty ())
1207 (0, numChars
, fn c
=>
1208 if Vector.sub (validChars
, c
)
1212 Array2
.sub (next
, src
,
1213 Array
.sub (charClass
, c
))
1214 val e
= Array
.sub (dsts
, dst
)
1215 val c
= Char.fromInt c
1217 case Env
.peek (e
, v
) of
1219 | SOME cs
=> c
:: cs
1221 (dsts
, dst
, Env
.extend (e
, v
, cs
))
1227 (dsts
, fn (dst
, e
) =>
1228 if not showDead
andalso Array
.sub (dead
, dst
)
1234 val edge
= Graph
.addEdge (g
, {from
= src
,
1237 concat
[edgeLabel cs
,
1240 (Vector.layout (Layout
.str
o
1241 EdgeAction
.toString
)
1243 in List.push (edgeOptions edge
,
1244 EdgeOption
.label label
)
1248 Graph
.layoutDot (g
, fn {nodeName
} =>
1251 let open GraphOption
1253 RankDir LeftToRight
,
1254 Rank (Min
, [{nodeName
= nodeName (node start
)}])
1257 edgeOptions
= ! o edgeOptions
,
1258 nodeOptions
= ! o nodeOptions
})
1262 (* This DFA minimization algorithm is based on algorithm
3.6 (page
142)
1263 * of the Dragon Book
.
1265 * It maintains an array
, r
, that stores for each state s the
1266 * representative
of the class to which s belongs
.
1267 * It repeatedly refines an equivalence relation
, represented by a list
1268 * of classes
, where each class is a list
of states
.
1270 (* fun minimize (dfa
as T
{anchorStart
, charClass
, final
,
1271 * start
, next
, ...}): t
=
1273 * val numStates
= numStates dfa
1274 * val numCharClasses
= numCharClasses dfa
1275 * type class
= int list
1276 * type classes
= class list
1277 * val repCounter
= ref ~
1
1278 * val change
= ref
false
1279 * fun newRep () = (change
:= true; ++ repCounter
)
1280 * val finRep
= newRep ()
1281 * val nonfinRep
= newRep ()
1282 * val r
= Array
.tabulate (numStates
, fn i
=>
1283 * if Array
.sub (final
, i
)
1286 * fun rep s
= Array
.sub (r
, s
)
1287 * fun trans (s
, c
) = rep (Array2
.sub (next
, s
, c
))
1288 * fun refine (class
: class
, ac
: classes
): classes
=
1292 * (class
, [], fn (state
, classes
) =>
1294 * fun loop (classes
, ac
) =
1298 * [] => [{class
= [state
],
1303 * val _
= Array
.update (r
, state
, s
)
1304 * in {class
= [state
],
1305 * old
= state
} :: ac
1307 * |
(z
as {class
, old
}) :: classes
=>
1309 * (0, numCharClasses
, fn c
=>
1310 * trans (old
, c
) = trans (state
, c
))
1312 * (Array
.update (r
, state
, rep old
)
1313 * ; {class
= state
:: class
,
1314 * old
= old
} :: (List.appendRev
1316 * else loop (classes
, z
:: ac
)
1317 * in loop (classes
, [])
1319 * in List.fold (r
, ac
, fn ({class
, ...}, ac
) =>
1322 * | _
=> class
:: ac
)
1325 * Trace
.trace ("refine",
1326 * (List.layout
Int.layout
o #
1),
1329 * fun refineAll (classes
: classes
): unit
=
1334 * val _
= change
:= false
1336 * List.fold (classes
, [], fn (class
, ac
) =>
1339 * | _
=> refine (class
, ac
))
1341 * then refineAll classes
1344 * val (fin
, nonfin
) =
1345 * Int.fold (0, numStates
, ([], []), fn (i
, (f
, n
)) =>
1346 * if Array
.sub (final
, i
)
1349 * val _
= refineAll
[fin
, nonfin
]
1350 * val numStates
' = 1 + !repCounter
1351 * (* Compute reachable states
. *)
1352 * val reached
= Array
.new (numStates
', false)
1353 * fun visit (s
: int (* an old state
*)): unit
=
1357 * if Array
.sub (reached
, s
')
1359 * else (Array
.update (reached
, s
', true)
1360 * ; Int.for (0, numCharClasses
, fn c
=>
1361 * visit (Array2
.sub (next
, s
, c
))))
1363 * val _
= visit start
1364 * val _
= visit anchorStart
1365 * (* Compute new representatives
. *)
1367 * val newR
= Array
.tabulate (numStates
', fn s
=>
1368 * if Array
.sub (reached
, s
)
1371 * val numStates
' = 1 + !c
1372 * val _
= Array
.modify (r
, fn s
=> Array
.sub (newR
, s
))
1373 * val next
' = Array2
.new (numStates
', numCharClasses
, ~
1)
1376 * (next
, fn (s
, c
, s
') =>
1377 * Array2
.update (next
', rep s
, c
, rep s
'))
1378 * val final
' = Array
.array (numStates
', false)
1381 * (final
, fn (i
, b
) =>
1382 * if b
then Array
.update (final
', rep i
, true) else ())
1383 * in T
{anchorStart
= rep anchorStart
,
1384 * charClass
= charClass
,
1385 * dead
= dead (numStates
', numCharClasses
, final
', next
'),
1387 * start
= rep start
,
1393 structure Regexp
: REGEXP
=
1395 structure Save
= Save
1396 structure Match
= Match
1400 val anchorFinish
= AnchorFinish
1401 val anchorStart
= AnchorStart
1402 val isChar
= CharSet
1403 fun isNotChar f
= isChar (not
o f
)
1404 fun char c
= isChar (fn c
' => c
= c
')
1405 fun notChar c
= isChar (fn c
' => c
<> c
')
1410 val zeroOrMore
= star
1412 val dquote
= char #
"\""
1414 val any
= isChar (fn _
=> true)
1416 val ascii
= isChar (fn c
=> ord c
<= 127)
1417 val asciis
= star ascii
1419 val none
= isChar (fn _
=> false)
1420 fun oneOf s
= isChar (fn c
=> String.contains (s
, c
))
1421 fun notOneOf s
= isNotChar (fn c
=> String.contains (s
, c
))
1422 val digit
= isChar
Char.isDigit
1423 val digits
= star digit
1424 val nonDigit
= isNotChar
Char.isDigit
1425 val space
= isChar
Char.isSpace
1426 val spaces
= star space
1428 fun string (s
: string): t
=
1429 seq (Int.foldDown (0, String.size s
, [], fn (i
, ac
) =>
1430 char (String.sub (s
, i
)) :: ac
))
1432 fun stringIgnoreCase (s
: string): t
=
1434 (0, String.size s
, [], fn (i
, ac
) =>
1436 val c
= Char.toLower (String.sub (s
, i
))
1438 isChar (fn c
' => c
= Char.toLower c
')
1441 val null
= seq
[] (* Language containing the empty
string only
. *)
1442 fun oneOrMore r
= seq
[r
, star r
]
1443 fun optional r
= or
[null
, r
]
1444 fun repeat (r
, n
: int) = seq (List.tabulate (n
, fn _
=> r
))
1445 fun lower (r
, n
: int) = seq
[repeat (r
, n
), star r
]
1446 fun upper (r
, n
: int) =
1449 else or
[null
, seq
[r
, upper (r
, n
- 1)]]
1450 fun range (r
, n
: int, m
: int) =
1451 seq
[repeat (r
, n
), upper (r
, m
- n
)]
1453 structure Compiled
=
1459 datatype t
= T
of {regexp
: Regexp
.t
,
1462 fun layoutDot (T
{machine
, ...}) =
1464 DFA m
=> DFA
.layoutDot
{dfa
= m
, showDead
= false,
1466 | NFA m
=> NFA
.layoutDot (m
, "nfa")
1468 fun layoutDotToFile (c
: t
, f
: File
.t
) =
1469 File
.withOut (f
, fn out
=> Layout
.output (layoutDot c
, out
))
1471 fun layout (T
{regexp
, ...}) =
1475 Regexp
.layout regexp
1477 align
[case machine
of
1478 DFA dfa
=> DFA
.layout dfa
1479 | NFA nfa
=> NFA
.layout nfa
1480 (* str
"implementing", Regexp
.layout regexp
*)
1485 fun match
{compiled
= T
{machine
, ...},
1486 short
, startPos
, string} =
1488 val anchorStart
= startPos
= 0
1493 DFA
.match
{anchorStart
= anchorStart
,
1497 startPos
= startPos
})
1501 NFA
.match
{nfa
= nfa
,
1504 startPos
= startPos
})
1508 (opt
, fn (stop
, Actions
.T actions
) =>
1510 val _
= Vector.foreachi (saves
, fn (i
, s
) =>
1512 val n
= Vector.length saves
1513 val starts
= Array
.array (n
, ~
1)
1514 val matches
= Array
.array (n
, NONE
)
1517 (rev actions
, fn (i
, v
) =>
1521 MatchAction
.Finish s
=>
1523 val index
= Save
.index s
1524 val start
= Array
.sub (starts
, index
)
1528 SOME (Substring
.substring
1529 (string, {start
= start
,
1530 length
= i
- start
})))
1532 | MatchAction
.Start s
=>
1533 Array
.update (starts
, Save
.index s
, i
)))
1536 (matches
, fn (i
, sso
) =>
1539 | SOME ss
=> SOME (Vector.sub (saves
, i
), ss
))
1542 (string, {start
= startPos
,
1543 length
= stop
- startPos
})
1547 end) handle No
=> NONE
1552 ("Regexp.Compiled.match",
1553 fn {compiled
, short
, startPos
, string} =>
1555 [("short", Bool.layout short
),
1556 ("startPos", Int.layout startPos
),
1557 ("string", String.layout
string),
1558 ("compiled", layout compiled
)],
1559 Option
.layout Match
.layout
)
1562 fun matchLong (c
, s
, i
) =
1563 match
{compiled
= c
,
1568 fun matchShort (c
, s
, i
) =
1569 match
{compiled
= c
,
1574 fun matchAll (r
, s
) =
1575 case matchLong (r
, s
, 0) of
1577 | SOME m
=> if String.size s
= Substring
.length (Match
.all m
)
1581 val matchesAll
= isSome
o matchAll
1583 fun matchPrefix (r
, s
) = matchShort (r
, s
, 0)
1585 val matchesPrefix
= isSome
o matchPrefix
1587 fun find (c
: t
, s
: string, startPos
, short
: bool) =
1589 val n
= String.size s
1594 case match
{compiled
= c
,
1598 NONE
=> loop (i
+ 1)
1603 fun findLong (c
, s
, i
) = find (c
, s
, i
, false)
1604 fun findShort (c
, s
, i
) = find (c
, s
, i
, true)
1606 fun foreachMatchShort (c
, s
, f
: Match
.t
-> unit
) =
1609 case findShort (c
, s
, i
) of
1611 | SOME m
=> (f m
; loop (Match
.endOf m
))
1619 val nfa
= NFA
.fromRegexp r
1623 machine
= Compiled
.DFA (DFA
.minimize (DFA
.fromNFA nfa
))}
1627 Trace
.trace ("Regexp.compileDFA", layout
, Compiled
.layout
) compileDFA
1632 machine
= Compiled
.NFA (NFA
.fromRegexp r
)}
1635 Trace
.trace ("Regexp.compileNFA", layout
, Compiled
.layout
) compileNFA
1637 (* POSIX
1003.2 regular expressions
1638 * caveats
: does not support back references
'\N
'
1639 * does not support unmatched
')'
1640 * does not support
'[=' style coallating elements
1641 * does not support coallating elements
as range endpoints
1646 * Re0
::= e |
'|
' Br Re0
1650 * P0
::= e |
'*' |
'+' |
'?
' | Bnd
1651 * Bnd
::= '{' N Bnd0
'}'
1652 * Bnd0
::= e |
',' Bnd1
1663 * Be0
::= '^
' Be1 | Be1
1664 * Be1
::= ']' Be2 | Be2
1665 * Be2
::= '-' Be3 | Be3
1669 * |
'[' '.' Ce
'.' ']' Be3
1670 * |
'[' ':' Cl
':' ']' Be3
1673 * Cl
::= 'alnum
' |
... |
'xdigit
'
1676 exception X
of string
1677 type res
= t
* Save
.t vector
1679 fun S (s
: char list
) : res
=
1680 Re (s
, fn (s
, re
, saves
) =>
1683 | _
=> raise (X
"S"))
1684 and Re (s
: char list
,
1685 k
: char list
* t
* Save
.t vector
-> res
) =
1686 Br (s
, fn (s
, re
, saves
) =>
1687 Re0 (s
, [re
], [saves
], k
))
1688 and Re0 (s
: char list
, res
: t list
, savess
: Save
.t vector list
,
1689 k
: char list
* t
* Save
.t vector
-> res
) =
1692 k (s
, or (List.rev res
), Vector.concat (List.rev savess
))
1696 | #
")"::_
=> finish s
1697 | #
"|"::s
=> Br (s
, fn (s
, re
, saves
) =>
1698 Re0 (s
, re
::res
, saves
::savess
, k
))
1699 | _
=> raise (X
"Re0")
1701 and Br (s
: char list
,
1702 k
: char list
* t
* Save
.t vector
-> res
) =
1703 P (s
, fn (s
, re
, saves
) =>
1704 Br0 (s
, [re
], [saves
], k
))
1705 and Br0 (s
: char list
, res
: t list
, savess
: Save
.t vector list
,
1706 k
: char list
* t
* Save
.t vector
-> res
) =
1709 k (s
, seq (List.rev res
), Vector.concat (List.rev savess
))
1713 | #
")"::_
=> finish s
1714 | #
"|"::_
=> finish s
1715 | _
=> P (s
, fn (s
, re
, saves
) =>
1716 Br0 (s
, re
::res
, saves
::savess
, k
))
1718 and P (s
: char list
,
1719 k
: char list
* t
* Save
.t vector
-> res
) =
1720 A (s
, fn (s
, re
, saves
) => P0 (s
, re
, saves
, [], [], k
))
1721 and P0 (s
: char list
,
1722 re
: t
, saves
: Save
.t vector
,
1723 res
: t list
, savess
: Save
.t vector list
,
1724 k
: char list
* t
* Save
.t vector
-> res
) =
1726 fun finish (s
, re
) =
1727 k (s
, seq (List.rev (re
::res
)),
1728 Vector.concat (List.rev (saves
::savess
)))
1732 val savess
= saves
::savess
1734 A (s
, fn (s
, re
, saves
) =>
1735 P0 (s
, re
, saves
, res
, savess
, k
))
1739 [] => finish (s
, re
)
1740 | #
")"::_
=> finish (s
, re
)
1741 | #
"|"::_
=> finish (s
, re
)
1742 | #
"*"::s
=> finish (s
, star re
)
1743 | #
"+"::s
=> finish (s
, oneOrMore re
)
1744 | #
"?"::s
=> finish (s
, optional re
)
1745 | #
"{"::(c
::s
) => if Char.isDigit c
1746 then Bnd (c
::s
, fn (s
, f
) =>
1751 and Bnd (s
: char list
,
1752 k
: char list
* (t
-> t
) -> res
) =
1754 Bnd0 (s
, n
, fn (s
, f
) =>
1757 | _
=> raise (X
"Bnd")))
1758 and Bnd0 (s
: char list
, n
: int,
1759 k
: char list
* (t
-> t
) -> res
) =
1761 fun finish (s
, f
) = k (s
, f
)
1764 #
"}"::_
=> finish (s
, fn re
=> repeat (re
, n
))
1765 | #
","::s
=> Bnd1 (s
, n
, k
)
1766 | _
=> raise (X
"Bnd0")
1768 and Bnd1 (s
: char list
, n
: int,
1769 k
: char list
* (t
-> t
) -> res
) =
1771 fun finish (s
, f
) = k (s
, f
)
1774 #
"}"::_
=> finish (s
, fn re
=> lower (re
, n
))
1775 | _
=> N (s
, fn (s
, m
) =>
1777 then raise (X
"Bnd1")
1778 else finish (s
, fn re
=> range (re
, n
, m
)))
1780 and N (s
: char list
,
1781 k
: char list
* int -> res
) =
1785 d
::s
' => (case Char.digitToInt d
of
1786 SOME d
=> N1 (s
', d
, k
)
1787 | NONE
=> raise (X
"N"))
1788 | _
=> raise (X
"N")
1790 and N1 (s
: char list
, n
: int,
1791 k
: char list
* int -> res
) =
1798 | d
::s
' => (case Char.digitToInt d
of
1799 SOME d
=> N1 (s
', n
* 10 + d
, k
)
1802 and A (s
: char list
,
1803 k
: char list
* t
* Save
.t vector
-> res
) =
1805 fun finish (s
, re
, saves
) =
1807 fun finishR (s
, re
) =
1808 finish (s
, re
, Vector.new0 ())
1811 fun finishC (s
, c
) =
1815 #
"("::(#
")"::s
) => finishN s
1817 val save
' = Save
.new ()
1819 Re (s
, fn (s
, re
, saves
) =>
1821 #
")"::s
=> k (s
, save (re
, save
'),
1823 [Vector.new1 save
', saves
])
1824 | _
=> raise (X
"A"))
1828 Be (s
, fn (s
, re
) =>
1830 #
"]"::s
=> finishR (s
, re
)
1831 | _
=> raise (X
"A"))
1833 | #
"."::s
=> finishR (s
, any
)
1834 | #
"^"::s
=> finishR (s
, anchorStart
)
1835 | #
"$"::s
=> finishR (s
, anchorFinish
)
1836 | #
"\\"::(c
::s
) => finishC (s
, c
)
1837 | c
::s
=> if String.contains (")|*+?{", c
)
1840 | _
=> raise (X
"A")
1842 and Be (s
: char list
,
1843 k
: char list
* t
-> res
) =
1845 and Be0 (s
: char list
,
1846 k
: char list
* t
-> res
) =
1850 #
"^"::s
=> Be1 (s
, true, k
)
1851 | _
=> Be1 (s
, false, k
)
1853 and Be1 (s
: char list
, inv
: bool,
1854 k
: char list
* t
-> res
) =
1858 #
"]"::s
=> Be2 (s
, inv
, [#
"]"], k
)
1859 | _
=> Be2 (s
, inv
, [], k
)
1861 and Be2 (s
: char list
, inv
: bool, cs
: char list
,
1862 k
: char list
* t
-> res
) =
1866 #
"-"::s
=> Be3 (s
, inv
, #
"-"::cs
, [], [], k
)
1867 | _
=> Be3 (s
, inv
, cs
, [], [], k
)
1869 and Be3 (s
: char list
, inv
: bool,
1870 cs
: char list
, cps
: (char
-> bool) list
, ces
: string list
,
1871 k
: char list
* t
-> res
) =
1873 fun finish (s
: char list
,
1875 cps
: (char
-> bool) list
,
1878 fun finish
' re
= k (s
, re
)
1880 val cp
= fn c
=> List.exists (cps
, fn cp
=> cp c
)
1889 cp c
orelse String.contains (s
, c
)))
1890 | _
=> Error
.bug
"Regexp.fromString: can't handle collating elements in negated bracket expressions")
1891 else finish
' (List.fold
1892 (ces
, or
[isChar cp
,
1895 or
[string ce
, re
]))
1899 #
"]"::_
=> finish (s
, cs
, cps
, ces
)
1900 | #
"-"::s
=> (case s
of
1901 #
"]"::_
=> finish (s
, #
"-"::cs
, cps
, ces
)
1902 | _
=> raise (X
"Be3"))
1903 | c1
::(#
"-"::(c2
::s
)) =>
1905 val r1
= Char.ord c1
1906 val r2
= Char.ord c2
1908 let val r
= Char.ord c
1909 in r1
<= r
andalso r
<= r2
1912 Be3 (s
, inv
, cs
, cp
::cps
, ces
, k
)
1914 | #
"["::(#
"."::s
) =>
1915 Ce (s
, [], fn (s
, ce
) =>
1917 #
"."::(#
"]"::s
) => Be3 (s
, inv
, cs
, cps
, ce
::ces
, k
)
1918 | _
=> raise (X
"Be3"))
1919 | #
"["::(#
":"::s
) =>
1920 Cl (s
, fn (s
, cp
) =>
1922 #
":"::(#
"]"::s
) => Be3 (s
, inv
, cs
, cp
::cps
, ces
, k
)
1923 | _
=> raise (X
"Be3"))
1924 | c
::s
=> Be3 (s
, inv
, c
::cs
, cps
, ces
, k
)
1925 | _
=> raise (X
"Be3")
1927 and Ce (s
: char list
, ce
: char list
,
1928 k
: char list
* string -> res
) =
1931 k (s
, implode (List.rev ce
))
1935 | c
::s
=> Ce (s
, c
::ce
, k
)
1936 | _
=> raise (X
"Ce")
1938 and Cl (s
: char list
,
1939 k
: char list
* (char
-> bool) -> res
) =
1943 #
"a"::(#
"l"::(#
"n"::(#
"u"::(#
"m"::s
)))) =>
1944 k (s
, Char.isAlphaNum
)
1945 | #
"a"::(#
"l"::(#
"p"::(#
"h"::(#
"a"::s
)))) =>
1947 | #
"b"::(#
"l"::(#
"a"::(#
"n"::(#
"k"::_
)))) =>
1948 raise (X
"Cl:blank")
1949 | #
"c"::(#
"n"::(#
"t"::(#
"r"::(#
"l"::s
)))) =>
1951 | #
"d"::(#
"i"::(#
"g"::(#
"i"::(#
"t"::s
)))) =>
1953 | #
"g"::(#
"r"::(#
"a"::(#
"p"::(#
"h"::s
)))) =>
1955 | #
"l"::(#
"o"::(#
"w"::(#
"e"::(#
"r"::s
)))) =>
1957 | #
"p"::(#
"r"::(#
"i"::(#
"n"::(#
"t"::s
)))) =>
1959 | #
"p"::(#
"u"::(#
"n"::(#
"c"::(#
"t"::_
)))) =>
1960 raise (X
"Cl:punct")
1961 | #
"s"::(#
"p"::(#
"a"::(#
"c"::(#
"e"::s
)))) =>
1963 | #
"u"::(#
"p"::(#
"p"::(#
"e"::(#
"r"::s
)))) =>
1965 | #
"x"::(#
"d"::(#
"i"::(#
"g"::(#
"i"::(#
"t"::s
))))) =>
1966 k (s
, Char.isHexDigit
)
1967 | _
=> raise (X
"Cl")
1970 val fromString
: string -> (t
* Save
.t vector
) option
=
1971 fn s
=> (SOME (S (explode s
))) handle X _
=> NONE
1973 Trace
.trace ("Regexp.fromString",
1975 Option
.layout (layout
o #
1))
1982 * let open Trace
.Immediate
1985 * ; debug
:= Out Out
.error
1999 * val r
= seq
[a
, b
, c
, d
]
2000 * val r
= or
[seq
[a
, b
, c
],
2003 * seq
[star (or
[a
, b
]),
2006 * val eol
= char #
"#"
2007 * val space
= oneOf
" \t"
2009 * seq
[or
[anchorStart
, notOneOf
"0123456789("],
2010 * or
[seq
[char #
"(", d
, d
, d
, char #
")"],
2016 * or
[eol
, nonDigit
]]
2018 * fun doit (name
, lay
) =
2020 * val dot
= concat
["/tmp/", name
, ".dot"]
2021 * val ps
= concat
["/tmp/", name
, ".ps"]
2022 * val _
= File
.withOut (dot
, fn out
=> Layout
.output (lay
, out
))
2023 * val _
= OS
.Process
.system (concat
["dot ", dot
, " >", ps
])
2026 * val nfa
= NFA
.fromRegexp r
2027 * val _
= doit ("nfa", NFA
.layoutDot (nfa
, "nfa"))
2028 * val _
= Out
.output (Out
.error
,
2029 * concat
["numCharClasses = ",
2030 * Int.toString (NFA
.numCharClasses nfa
),
2032 * val dfa
= DFA
.fromNFA nfa
2033 * val _
= doit ("dfa",
2034 * DFA
.layoutDot
{dfa
= dfa
, title
= "dfa", showDead
= false})
2035 * val min
= DFA
.minimize dfa
2036 * val _
= doit ("min",
2037 * DFA
.layoutDot
{dfa
= min
, title
= "min", showDead
= false})
2046 * open Trace
.Immediate
2048 * debug
:= Out Out
.error
2050 * ; on
["Regexp.match"]
2052 * structure Z
= TestRegexp (Regexp
)