| 1 | ;;; soundex.el --- implement Soundex algorithm |
| 2 | |
| 3 | ;; Copyright (C) 1993, 2002, 2003, 2004, 2005, |
| 4 | ;; 2006 Free Software Foundation, Inc. |
| 5 | |
| 6 | ;; Author: Christian Plaunt <chris@bliss.berkeley.edu> |
| 7 | ;; Maintainer: FSF |
| 8 | ;; Keywords: matching |
| 9 | ;; Created: Sat May 15 14:48:18 1993 |
| 10 | |
| 11 | ;; This file is part of GNU Emacs. |
| 12 | |
| 13 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 14 | ;; it under the terms of the GNU General Public License as published by |
| 15 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 16 | ;; any later version. |
| 17 | |
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | ;; GNU General Public License for more details. |
| 22 | |
| 23 | ;; You should have received a copy of the GNU General Public License |
| 24 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 25 | ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
| 26 | ;; Boston, MA 02110-1301, USA. |
| 27 | |
| 28 | ;;; Commentary: |
| 29 | |
| 30 | ;; The Soundex algorithm maps English words into representations of |
| 31 | ;; how they sound. Words with vaguely similar sound map to the same string. |
| 32 | |
| 33 | ;;; Code: |
| 34 | |
| 35 | (defvar soundex-alist |
| 36 | '((?B . "1") (?F . "1") (?P . "1") (?V . "1") |
| 37 | (?C . "2") (?G . "2") (?J . "2") (?K . "2") (?Q . "2") (?S . "2") |
| 38 | (?X . "2") (?Z . "2") (?D . "3") (?T . "3") (?L . "4") (?M . "5") |
| 39 | (?N . "5") (?R . "6")) |
| 40 | "Alist of chars-to-key-code for building Soundex keys.") |
| 41 | |
| 42 | (defun soundex (word) |
| 43 | "Return a Soundex key for WORD. |
| 44 | Implemented as described in: |
| 45 | Knuth, Donald E. \"The Art of Computer Programming, Vol. 3: Sorting |
| 46 | and Searching\", Addison-Wesley (1973), pp. 391-392." |
| 47 | (let* ((word (upcase word)) (length (length word)) |
| 48 | (code (cdr (assq (aref word 0) soundex-alist))) |
| 49 | (key (substring word 0 1)) (index 1) (prev-code code)) |
| 50 | ;; once we have a four char key, we're done |
| 51 | (while (and (> 4 (length key)) (< index length)) |
| 52 | ;; look up the code for each letter in word at index |
| 53 | (setq code (cdr (assq (aref word index) soundex-alist)) |
| 54 | index (1+ index) |
| 55 | ;; append code to key unless the same codes belong to |
| 56 | ;; adjacent letters in the original string |
| 57 | key (concat key (if (or (null code) (string= code prev-code)) |
| 58 | () |
| 59 | code)) |
| 60 | prev-code code)) |
| 61 | ;; return a key that is 4 chars long and padded by "0"s if needed |
| 62 | (if (> 4 (length key)) |
| 63 | (substring (concat key "000") 0 4) |
| 64 | key))) |
| 65 | |
| 66 | ;(defvar soundex-test |
| 67 | ; '("Euler" "Gauss" "Hilbert" "Knuth" "Lloyd" "Lukasiewicz" |
| 68 | ; "Ellery" "Ghosh" "Heilbronn" "Kant" "Ladd" "Lissajous") |
| 69 | ; "\n Knuth's names to demonstrate the Soundex algorithm.") |
| 70 | ; |
| 71 | ;(mapcar 'soundex soundex-test) |
| 72 | ;("E460" "G200" "H416" "K530" "L300" "L222" |
| 73 | ; "E460" "G200" "H416" "K530" "L300" "L222") |
| 74 | |
| 75 | (provide 'soundex) |
| 76 | |
| 77 | ;;; arch-tag: b2615a98-feb7-430e-a717-171086738953 |
| 78 | ;;; soundex.el ends here |