/*
 * GT CLIPPER STANDARD HEADER
 *
 * File......: levensht.prg
 * Author....: Andy M Leighton
 * BBS.......: The Dark Knight Returns
 * Net/Node..: 050/069
 * User Name.: Andy Leighton
 * Date......: $Date$
 * Revision..: $Revision$
 *
 * This is an original work by Andy Leighton and is placed in the
 * public domain.
 *
 * Modification history:
 * ---------------------
 *
 * $Log$
 *
 */

/*  $DOC$
 *  $FUNCNAME$
 *       GT_LEVDIST()
 *  $CATEGORY$
 *       String
 *  $ONELINER$
 *       Return the Levenshtein distance between two strings
 *  $SYNTAX$
 *       GT_LevDist(<cStrA>, <cStrB>) --> nDist
 *  $ARGUMENTS$
 *       <cStrA>     -  First string
 *       <cStrB>     -  Second String
 *  $RETURNS$
 *       nDist       -  Levenshtein Distance.
 *  $DESCRIPTION$
 *       Return the Levenshtein distance between two strings
 *
 *       The  Levenshtein  distance  between  two  strings  is  the
 *       minimum  number  of  operations  (Insertions, Deletions or
 *       Substitutions) to transform one string to the other.   The
 *       algoritm used to solve this problem is a prime example  of
 *       dynamic  programming.   It  is  very  similar  approach to
 *       solving  the   shortest  distance   between  two    cities
 *       connected by a network of roads.
 *
 *       The algorithm  is of  order O(nm)  where n  and m  are the
 *       lengths  of  the  two  strings.   Therefore  when  the two
 *       strings are equal  the algoritm is  of order O(n)  so you
 *       should   check   for   equality   first   before   calling
 *       GT_LevDist()
 *
 *       REFERENCES
 *       Doctor Dobb's Journal #187, April 1992
 *
 *       IMPROVEMENTS
 *       The  main  improvements  in  this  routine will be made by
 *       introducing a more complex operation costing.
 *
 *       ie. have three matrices that record the cost of adding  or
 *           deleting  a  specific   letter,  and  substituting   a
 *           specified letter with another.
 *
 *       More improvements can be achieved  but at the loss of  the
 *       general purpose of the routine.
 *
 *       This is left as an exercise for the foolhardy or brave.
 *  $EXAMPLES$
 *       ? GT_LevDist("Axolotl", "Axl Rose")      // prints 5
 *
 *       Axolotl -> Axl Rose in 5 operations
 *
 *          Axolotl
 *          Axolote     SUB e for l
 *          Axolose     SUB s for t
 *          AxoRose     SUB R for l
 *          Ax Rose     SUB space for o
 *          Axl Rose    INS L
 *  $SEEALSO$
 *       GT_LEVCOST()
 *  $END$
 */

#include "gt_LIB.ch"
#include "levensht.ch"

static matrix := {}

function GT_LevDist(cA, cB)

   local nDistance

   gGT_initMatrix(cA, cB)
   gGT_calcMatrix(cA, cB)

   nDistance := DIST(len(cB) + 1, len(cA) + 1)

   matrix := NIL

return nDistance

/*
 * INTERNAL FUNCTION: gGT_initMatrix()
 *
 * initialise the matrix before calculating Levenshtein distance
 */

static function gGT_initMatrix(cA, cB)

   local i, j

   matrix := array(len(cA) + 1, len(cB) + 1, 2)

   for i := 1 to len(cA) + 1
      matrix[i][1][1] := i - 1
      matrix[i][1][2] := LV_INS
   next
   for i := 1 to len(cB) + 1
      matrix[1][i][1] := i - 1
      matrix[1][i][2] := LV_DEL

      for j := 2 to len(cA) + 1
         matrix[j][i][1] := 0
         matrix[j][i][2] := 0
      next
   next

return NIL

/*
 * INTERNAL FUNCTION: gGT_calcMatrix()
 *
 * calculate the matrix of distances
 */

static function gGT_calcMatrix(cA, cB)

   local nRow
   local nCol

   for nCol := 2 to len(cA) + 1
      for nRow := 2 to len(cB) + 1
         gGT_calcCell(cA, cB, nRow, nCol)
      next
   next

return NIL

/*
 * INTERNAL FUNCTION: gGT_calcCell()
 *
 * calculate just one cell of the matrix.
 * this is where all the work is done.
 *
 * To step through this whole algorithm in english text would
 * be much longer than the code below and IMHO would be much
 * harder to follow.  It also involves a mathematical expression
 * that is difficult to represent in straight ASCII.
 */

static function gGT_calcCell(cA, cB, row, col)

   local nDistNW  := DIST(row - 1, col - 1)
   local nDistW   := DIST(row, col - 1)
   local nDistN   := DIST(row - 1, col)
   local nOp

   if nDistW < nDistN
      if nDistW < nDistNW
         matrix[col][row][2] := LV_INS
         matrix[col][row][1] := nDistW + gGT_Cost(LV_INS)
      else
         if substr(cA, col - 1, 1) == substr(cB, row - 1, 1)
            nOp := LV_MATCH
         else
            nOp := LV_SUB
         endif
         matrix[col][row][2] := nOp
         matrix[col][row][1] := nDistNW + gGT_Cost(nOp)
      endif
   else
      if nDistN < nDistNW
         matrix[col][row][2] := LV_DEL
         matrix[col][row][1] := nDistN + gGT_Cost(LV_DEL)
      else
         if substr(cA, col - 1, 1) == substr(cB, row - 1, 1)
            nOp := LV_MATCH
         else
            nOp := LV_SUB
         endif
         matrix[col][row][2] := nOp
         matrix[col][row][1] := nDistNW + gGT_Cost(nOp)
      endif
   endif

return NIL
