| 1 | ;;; soundex.el --- implement Soundex algorithm |
| 2 | |
| 3 | ;; Copyright (C) 1993, 2001-2014 Free Software Foundation, Inc. |
| 4 | |
| 5 | ;; Author: Christian Plaunt <chris@bliss.berkeley.edu> |
| 6 | ;; Maintainer: emacs-devel@gnu.org |
| 7 | ;; Keywords: matching |
| 8 | ;; Created: Sat May 15 14:48:18 1993 |
| 9 | |
| 10 | ;; This file is part of GNU Emacs. |
| 11 | |
| 12 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 13 | ;; it under the terms of the GNU General Public License as published by |
| 14 | ;; the Free Software Foundation, either version 3 of the License, or |
| 15 | ;; (at your option) any later version. |
| 16 | |
| 17 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 18 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 19 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 20 | ;; GNU General Public License for more details. |
| 21 | |
| 22 | ;; You should have received a copy of the GNU General Public License |
| 23 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;; The Soundex algorithm maps English words into representations of |
| 28 | ;; how they sound. Words with vaguely similar sound map to the same string. |
| 29 | |
| 30 | ;;; Code: |
| 31 | |
| 32 | (defvar soundex-alist |
| 33 | '((?B . "1") (?F . "1") (?P . "1") (?V . "1") |
| 34 | (?C . "2") (?G . "2") (?J . "2") (?K . "2") (?Q . "2") (?S . "2") |
| 35 | (?X . "2") (?Z . "2") (?D . "3") (?T . "3") (?L . "4") (?M . "5") |
| 36 | (?N . "5") (?R . "6")) |
| 37 | "Alist of chars-to-key-code for building Soundex keys.") |
| 38 | |
| 39 | (defun soundex (word) |
| 40 | "Return a Soundex key for WORD. |
| 41 | Implemented as described in: |
| 42 | Knuth, Donald E. \"The Art of Computer Programming, Vol. 3: Sorting |
| 43 | and Searching\", Addison-Wesley (1973), pp. 391-392." |
| 44 | (let* ((word (upcase word)) (length (length word)) |
| 45 | (code (cdr (assq (aref word 0) soundex-alist))) |
| 46 | (key (substring word 0 1)) (index 1) (prev-code code)) |
| 47 | ;; once we have a four char key, we're done |
| 48 | (while (and (> 4 (length key)) (< index length)) |
| 49 | ;; look up the code for each letter in word at index |
| 50 | (setq code (cdr (assq (aref word index) soundex-alist)) |
| 51 | index (1+ index) |
| 52 | ;; append code to key unless the same codes belong to |
| 53 | ;; adjacent letters in the original string |
| 54 | key (concat key (if (or (null code) (string= code prev-code)) |
| 55 | () |
| 56 | code)) |
| 57 | prev-code code)) |
| 58 | ;; return a key that is 4 chars long and padded by "0"s if needed |
| 59 | (if (> 4 (length key)) |
| 60 | (substring (concat key "000") 0 4) |
| 61 | key))) |
| 62 | |
| 63 | ;(defvar soundex-test |
| 64 | ; '("Euler" "Gauss" "Hilbert" "Knuth" "Lloyd" "Lukasiewicz" |
| 65 | ; "Ellery" "Ghosh" "Heilbronn" "Kant" "Ladd" "Lissajous") |
| 66 | ; "\n Knuth's names to demonstrate the Soundex algorithm.") |
| 67 | ; |
| 68 | ;(mapcar 'soundex soundex-test) |
| 69 | ;("E460" "G200" "H416" "K530" "L300" "L222" |
| 70 | ; "E460" "G200" "H416" "K530" "L300" "L222") |
| 71 | |
| 72 | (provide 'soundex) |
| 73 | |
| 74 | ;;; soundex.el ends here |