1 (* Copyright (C) 2009-2010 Matthew Fluet.
2 * Copyright (C) 1999-2008 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 * Copyright (C) 1997-2000 NEC Research Institute.
6 * MLton is released under a BSD-style license.
7 * See the file MLton-LICENSE for details.
10 functor PolyHash (S: SSA_TRANSFORM_STRUCTS): SSA_TRANSFORM =
16 * This pass implements polymorphic, structural hashing.
18 * For each datatype tycon and vector type, it builds a hashing function and
19 * translates calls to MLton_hash into calls to that function.
21 * For tuples, it does the hashing inline. I.E. it does not create
22 * a separate hashing function for each tuple type.
24 * All hashing functions are only created if necessary, i.e. if hashing
25 * is actually used at a type.
36 fun wordFromWord (w: word, ws: WordSize.t): t =
37 word (WordX.fromIntInf (Word.toIntInf w, ws))
40 word (WordX.fromIntInf (i, WordSize.shiftArg))
41 fun shiftBits b = shiftInt (Bits.toIntInf b)
45 fn (e1: t, e2: t, s) =>
46 primApp {prim = prim s,
47 targs = Vector.new0 (),
48 args = Vector.new2 (e1, e2),
51 val add = mk Prim.wordAdd
52 val andb = mk Prim.wordAndb
53 val rshift = mk (fn s => Prim.wordRshift (s, {signed = false}))
54 val xorb = mk Prim.wordXorb
58 fn (e1: t, e2: t, s, sg) =>
59 primApp {prim = prim (s, sg),
60 targs = Vector.new0 (),
61 args = Vector.new2 (e1, e2),
64 val mul = mk Prim.wordMul
67 fun wordEqual (e1: t, e2: t, s): t =
68 primApp {prim = Prim.wordEqual s,
69 targs = Vector.new0 (),
70 args = Vector.new2 (e1, e2),
76 val resWordSize = WordSize.word32
77 val resTy = Type.word resWordSize
79 fun mkWordBytes {stateTy: Type.t,
80 workWordSize: WordSize.t,
81 combByte: Dexp.t * Dexp.t -> Dexp.t,
82 mix: Dexp.t -> Dexp.t} =
84 val workBits = WordSize.bits workWordSize
85 val workTy = Type.word workWordSize
86 fun wordBytes (st,w,ws) =
89 if WordSize.equals (ws, workWordSize)
91 else Dexp.primApp {prim = Prim.wordExtdToWord
94 targs = Vector.new0 (),
99 (Dexp.word o WordX.resize)
100 (WordX.allOnes WordSize.word8,
103 fun loop (st, w, b) =
104 if Bits.<= (b, Bits.zero)
108 val w0 = Var.newNoname ()
109 val dw0 = Dexp.var (w0, workTy)
110 val bw = Var.newNoname ()
111 val dbw = Dexp.var (bw, workTy)
112 val st1 = Var.newNoname ()
113 val dst1 = Dexp.var (st1, stateTy)
114 val st2 = Var.newNoname ()
115 val dst2 = Dexp.var (st2, stateTy)
118 {decs = [{var = w0, exp = w},
120 Dexp.andb (dw0, mask, workWordSize)},
122 combByte (dst0, dbw)},
127 Dexp.shiftBits Bits.inWord8,
129 Bits.- (b, Bits.inWord8))}
132 if Bits.<= (b, Bits.zero)
136 val w0 = Var.newNoname ()
137 val dw0 = Dexp.var (w0, Type.word ws)
138 val ew = Var.newNoname ()
139 val dew = Dexp.var (ew, workTy)
140 val loopBits = Bits.min (b, workBits)
141 val st1 = Var.newNoname ()
142 val dst1 = Dexp.var (st1, stateTy)
145 {decs = [{var = w0, exp = w},
146 {var = ew, exp = extdW dw0},
147 {var = st1, exp = loop (dst0, dew, loopBits)}],
150 Dexp.shiftBits workBits,
152 Bits.- (b, workBits))}
154 val st0 = Var.newNoname ()
155 val dst0 = Dexp.var (st0, stateTy)
158 {decs = [{var = st0, exp = st}],
159 body = lp (dst0, w, WordSize.bits ws)}
166 (* Jenkins hash function
167 * http://en.wikipedia.org/wiki/Jenkins_hash_function (20100315)
169 val {stateTy: Type.t,
170 init: unit -> Dexp.t,
171 wordBytes: Dexp.t * Dexp.t * WordSize.t -> Dexp.t,
172 fini: Dexp.t -> Dexp.t} =
174 val stateWordSize = resWordSize
175 val stateTy = Type.word stateWordSize
176 val workWordSize = resWordSize
177 val workTy = Type.word workWordSize
181 fn (w1, w2) => prim (w1, w2, stateWordSize)
183 val add = mk Dexp.add
184 val lshift = mk Dexp.lshift
185 val rshift = mk Dexp.rshift
186 val xorb = mk Dexp.xorb
189 fun init () = Dexp.word (WordX.zero stateWordSize)
190 fun combByte (hexp, wexp) =
192 val h0 = Var.newNoname ()
193 val dh0 = Dexp.var (h0, stateTy)
194 val w0 = Var.newNoname ()
195 val dw0 = Dexp.var (w0, workTy)
196 val h1 = Var.newNoname ()
197 val dh1 = Dexp.var (h1, stateTy)
200 {decs = [{var = h0, exp = hexp},
201 {var = w0, exp = wexp},
202 {var = h1, exp = add (dh0, dw0)}],
207 val h0 = Var.newNoname ()
208 val dh0 = Dexp.var (h0, stateTy)
209 val h1 = Var.newNoname ()
210 val dh1 = Dexp.var (h1, stateTy)
211 val h2 = Var.newNoname ()
212 val dh2 = Dexp.var (h2, stateTy)
215 {decs = [{var = h0, exp = hexp},
216 {var = h1, exp = add (dh0, lshift (dh0, Dexp.shiftInt 10))},
217 {var = h2, exp = xorb (dh1, rshift (dh1, Dexp.shiftInt 6))}],
223 workWordSize = workWordSize,
228 val h0 = Var.newNoname ()
229 val dh0 = Dexp.var (h0, stateTy)
230 val h1 = Var.newNoname ()
231 val dh1 = Dexp.var (h1, stateTy)
232 val h2 = Var.newNoname ()
233 val dh2 = Dexp.var (h2, stateTy)
234 val h3 = Var.newNoname ()
235 val dh3 = Dexp.var (h3, stateTy)
238 {decs = [{var = h0, exp = hexp},
239 {var = h1, exp = add (dh0, lshift (dh0, Dexp.shiftInt 3))},
240 {var = h2, exp = xorb (dh1, rshift (dh1, Dexp.shiftInt 11))},
241 {var = h3, exp = add (dh2, lshift (dh2, Dexp.shiftInt 15))}],
247 wordBytes = wordBytes,
252 (* FNV-1a hash function
253 * http://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function (20100315)
255 val {stateTy: Type.t,
256 init: unit -> Dexp.t,
257 wordBytes: Dexp.t * Dexp.t * WordSize.t -> Dexp.t,
258 fini: Dexp.t -> Dexp.t} =
260 val stateWordSize = resWordSize
261 val stateTy = Type.word stateWordSize
262 val workWordSize = resWordSize
263 val workTy = Type.word workWordSize
267 fn (w1, w2) => prim (w1, w2, stateWordSize)
269 val mul = mk (fn (w1,w2,s) => Dexp.mul (w1,w2,s,{signed = false}))
270 val xorb = mk Dexp.xorb
273 val fnv_prime = WordX.fromIntInf (16777619, stateWordSize)
274 val fnv_offset_bias = WordX.fromIntInf (2166136261, stateWordSize)
276 fun init () = Dexp.word fnv_offset_bias
277 fun combByte (hexp, wexp) =
279 val h0 = Var.newNoname ()
280 val dh0 = Dexp.var (h0, stateTy)
281 val w0 = Var.newNoname ()
282 val dw0 = Dexp.var (w0, workTy)
283 val h1 = Var.newNoname ()
284 val dh1 = Dexp.var (h1, stateTy)
287 {decs = [{var = h0, exp = hexp},
288 {var = w0, exp = wexp},
289 {var = h1, exp = xorb (dh0, dw0)}],
294 val h0 = Var.newNoname ()
295 val dh0 = Dexp.var (h0, stateTy)
296 val p = Dexp.word fnv_prime
297 val h1 = Var.newNoname ()
298 val dh1 = Dexp.var (h1, stateTy)
301 {decs = [{var = h0, exp = hexp},
302 {var = h1, exp = mul (dh0, p)}],
308 workWordSize = workWordSize,
315 wordBytes = wordBytes,
318 fun wordBytesFromWord (st: Dexp.t, w:word, ws: WordSize.t) =
319 wordBytes (st, Dexp.wordFromWord (w, ws), ws)
322 fun transform (Program.T {datatypes, globals, functions, main}) =
324 val {get = funcInfo: Func.t -> {hasHash: bool},
325 set = setFuncInfo, ...} =
326 Property.getSet (Func.plist, Property.initConst {hasHash = false})
327 val {get = labelInfo: Label.t -> {hasHash: bool},
328 set = setLabelInfo, ...} =
329 Property.getSet (Label.plist, Property.initConst {hasHash = false})
330 val {get = tyconInfo: Tycon.t -> {cons: {con: Con.t,
331 args: Type.t vector} vector},
332 set = setTyconInfo, ...} =
334 (Tycon.plist, Property.initRaise ("PolyHash.info", Tycon.layout))
335 val tyconCons = #cons o tyconInfo
336 val {get = getHashFunc: Type.t -> Func.t option,
338 destroy = destroyHashFunc} =
339 Property.destGetSet (Type.plist, Property.initConst NONE)
340 val {get = getTyconHashFunc: Tycon.t -> Func.t option,
341 set = setTyconHashFunc, ...} =
342 Property.getSet (Tycon.plist, Property.initConst NONE)
343 val {get = getVectorHashFunc: Type.t -> Func.t option,
344 set = setVectorHashFunc,
345 destroy = destroyVectorHashFunc} =
346 Property.destGetSet (Type.plist, Property.initConst NONE)
347 val returns = SOME (Vector.new1 Hash.stateTy)
348 val seqIndexWordSize = WordSize.seqIndex ()
349 val seqIndexTy = Type.word seqIndexWordSize
350 val newFunctions: Function.t list ref = ref []
352 List.push (newFunctions,
353 Function.profile (Function.new z,
354 SourceInfo.polyHash))
355 fun hashTyconFunc (tycon: Tycon.t): Func.t =
356 case getTyconHashFunc tycon of
361 Func.newString (concat ["hash_", Tycon.originalName tycon])
362 val _ = setTyconHashFunc (tycon, SOME name)
363 val ty = Type.datatypee tycon
364 val st = (Var.newNoname (), Hash.stateTy)
365 val x = (Var.newNoname (), ty)
366 val args = Vector.new2 (st, x)
367 val dst = Dexp.var st
369 val cons = tyconCons tycon
376 (Dexp.Con o Vector.map)
377 (cons, fn {con, args} =>
382 (Var.newNoname (), ty))
389 Hash.wordBytesFromWord
390 (dst, Con.hash con, WordSize.word32),
391 fn ((x,ty), dstate) =>
392 hashExp (dstate, Dexp.var (x, ty), ty))}
394 val (start, blocks) = Dexp.linearize (body, Handler.Caller)
395 val blocks = Vector.fromList blocks
397 newFunction {args = args,
407 and vectorHashFunc (ty: Type.t): Func.t =
408 case getVectorHashFunc ty of
412 (* Build two functions, one that hashes the length and the
415 val name = Func.newString "vectorHash"
416 val _ = setVectorHashFunc (ty, SOME name)
417 val loop = Func.newString "vectorHashLoop"
418 val vty = Type.vector ty
420 val st = (Var.newNoname (), Hash.stateTy)
421 val vec = (Var.newNoname (), vty)
422 val args = Vector.new2 (st, vec)
423 val dst = Dexp.var st
424 val dvec = Dexp.var vec
425 val len = (Var.newNoname (), seqIndexTy)
426 val dlen = Dexp.var len
429 {decs = [{var = #1 len, exp =
430 Dexp.primApp {prim = Prim.vectorLength,
431 targs = Vector.new1 ty,
432 args = Vector.new1 dvec,
438 (Hash.wordBytes (dst, dlen, seqIndexWordSize),
439 dvec, dlen, Dexp.word (WordX.zero seqIndexWordSize))),
441 val (start, blocks) = Dexp.linearize (body, Handler.Caller)
442 val blocks = Vector.fromList blocks
445 newFunction {args = args,
454 val st = (Var.newNoname (), Hash.stateTy)
455 val vec = (Var.newNoname (), vty)
456 val len = (Var.newNoname (), seqIndexTy)
457 val i = (Var.newNoname (), seqIndexTy)
458 val args = Vector.new4 (st, vec, len, i)
459 val dst = Dexp.var st
460 val dvec = Dexp.var vec
461 val dlen = Dexp.var len
469 Dexp.primApp {prim = Prim.vectorSub,
470 targs = Vector.new1 ty,
471 args = Vector.new2 (dvec, di),
477 Dexp.word (WordX.one seqIndexWordSize),
481 {test = Dexp.wordEqual
482 (di, dlen, seqIndexWordSize),
485 cases = (Dexp.Con o Vector.new2)
487 args = Vector.new0 (),
490 args = Vector.new0 (),
491 body = Dexp.call {args = args,
493 ty = Hash.stateTy}})}
495 val (start, blocks) = Dexp.linearize (body, Handler.Caller)
496 val blocks = Vector.fromList blocks
499 newFunction {args = args,
510 and hashExp (st: Dexp.t, x: Dexp.t, ty: Type.t): Dexp.t =
511 Dexp.name (st, fn st =>
512 Dexp.name (x, fn x => hash (st, x, ty)))
513 and hash (st: Var.t, x: Var.t, ty: Type.t): Dexp.t =
515 val dst = Dexp.var (st, Hash.stateTy)
516 val dx = Dexp.var (x, ty)
518 Hash.wordBytesFromWord
519 (dst, Type.hash ty, WordSize.word32)
523 Type.Array _ => stateful ()
526 val ws = WordSize.cpointer ()
529 {prim = Prim.cpointerToWord,
530 targs = Vector.new0 (),
531 args = Vector.new1 dx,
534 Hash.wordBytes (dst, toWord, ws)
536 | Type.Datatype tycon =>
537 Dexp.call {func = hashTyconFunc tycon,
538 args = Vector.new2 (dst, dx),
542 val sws = WordSize.smallIntInfWord ()
543 val bws = WordSize.bigIntInfWord ()
546 {prim = Prim.intInfToWord,
547 targs = Vector.new0 (),
548 args = Vector.new1 dx,
552 {prim = Prim.intInfToVector,
553 targs = Vector.new0 (),
554 args = Vector.new1 dx,
555 ty = Type.vector (Type.word bws)}
556 val w = Var.newNoname ()
557 val dw = Dexp.var (w, Type.word sws)
558 val one = Dexp.word (WordX.one sws)
561 {decs = [{var = w, exp = toWord}],
564 {test = Dexp.wordEqual (Dexp.andb (dw, one, sws), one, sws),
568 (Dexp.Con o Vector.new2)
570 args = Vector.new0 (),
571 body = Hash.wordBytes (dst, dw, sws)},
573 args = Vector.new0 (),
575 Dexp.call {func = vectorHashFunc (Type.word bws),
576 args = Vector.new2 (dst, toVector),
577 ty = Hash.stateTy}})}}
581 val ws = WordSize.fromBits (RealSize.bits rs)
584 {prim = Prim.realCastToWord (rs, ws),
585 targs = Vector.new0 (),
586 args = Vector.new1 dx,
589 Hash.wordBytes (dst, toWord, ws)
591 | Type.Ref _ => stateful ()
592 | Type.Thread => stateful ()
595 val max = Vector.length tys - 1
596 (* hash components i, i+1, ... *)
597 fun loop (i: int, dst): Dexp.t =
601 val ty = Vector.sub (tys, i)
603 Dexp.select {tuple = dx,
609 hashExp (dst, select, ty))
615 Dexp.call {func = vectorHashFunc ty,
616 args = Vector.new2 (dst, dx),
618 | Type.Weak _ => stateful ()
619 | Type.Word ws => Hash.wordBytes (dst, dx, ws)
623 fun hashFunc (ty: Type.t): Func.t =
624 case getHashFunc ty of
628 val name = Func.newString "hash"
629 val _ = setHashFunc (ty, SOME name)
630 val x = (Var.newNoname (), ty)
631 val args = Vector.new1 x
632 val sti = Var.newNoname ()
633 val dsti = Dexp.var (sti, Hash.stateTy)
635 val stf = Var.newNoname ()
636 val dstf = Dexp.var (stf, Hash.stateTy)
637 val w = Var.newNoname ()
638 val dw = Dexp.var (w, Hash.resTy)
641 {decs = [{var = sti, exp = Hash.init ()},
642 {var = stf, exp = hashExp (dsti, dx, ty)},
643 {var = w, exp = Hash.fini dstf}],
645 val (start, blocks) = Dexp.linearize (body, Handler.Caller)
646 val blocks = Vector.fromList blocks
648 newFunction {args = args,
661 (datatypes, fn Datatype.T {tycon, cons} =>
668 val {name, blocks, ...} = Function.dest f
671 (blocks, fn Block.T {label, statements, ...} =>
674 (setFuncInfo (name, {hasHash = true})
675 ; setLabelInfo (label, {hasHash = true}))
678 (statements, fn Statement.T {exp, ...} =>
680 PrimApp {prim, ...} =>
681 (case Prim.name prim of
682 Prim.Name.MLton_hash => setHasHash ()
692 fn (block as Block.T {label, args, statements, transfer}, blocks) =>
693 if not (#hasHash (labelInfo label))
697 fun finish ({label, args, statements}, transfer) =
698 Block.T {label = label,
700 statements = Vector.fromListRev statements,
705 (blocks, {label = label, args = args, statements = []}),
706 fn (stmt as Statement.T {exp, var, ...},
707 (blocks, las as {label, args, statements})) =>
709 fun normal () = (blocks,
712 statements = stmt::statements})
715 PrimApp {prim, targs, args, ...} =>
716 (case (Prim.name prim, Vector.length targs) of
717 (Prim.Name.MLton_hash, 1) =>
719 val ty = Vector.first targs
720 val x = Vector.first args
721 val l = Label.newNoname ()
725 Call {args = Vector.new1 x,
727 return = Return.NonTail
729 handler = Handler.Caller}})
732 args = Vector.new1 (valOf var, Hash.resTy),
739 finish (las, transfer)
743 Vector.fromList blocks
749 val {args, blocks, mayInline, name, raises, returns, start} =
752 if #hasHash (funcInfo name)
753 then Function.new {args = args,
754 blocks = doit blocks,
755 mayInline = mayInline,
761 val () = Function.clear f
766 Program.T {datatypes = datatypes,
768 functions = (!newFunctions) @ functions,
770 val _ = destroyHashFunc ()
771 val _ = destroyVectorHashFunc ()
772 val _ = Program.clearTop program