Ninja
edit_distance.cc
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 //
00003 // Licensed under the Apache License, Version 2.0 (the "License");
00004 // you may not use this file except in compliance with the License.
00005 // You may obtain a copy of the License at
00006 //
00007 //     http://www.apache.org/licenses/LICENSE-2.0
00008 //
00009 // Unless required by applicable law or agreed to in writing, software
00010 // distributed under the License is distributed on an "AS IS" BASIS,
00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00012 // See the License for the specific language governing permissions and
00013 // limitations under the License.
00014 
00015 #include "edit_distance.h"
00016 
00017 #include <vector>
00018 
00019 int EditDistance(const StringPiece& s1,
00020                  const StringPiece& s2,
00021                  bool allow_replacements,
00022                  int max_edit_distance) {
00023   // The algorithm implemented below is the "classic"
00024   // dynamic-programming algorithm for computing the Levenshtein
00025   // distance, which is described here:
00026   //
00027   //   http://en.wikipedia.org/wiki/Levenshtein_distance
00028   //
00029   // Although the algorithm is typically described using an m x n
00030   // array, only two rows are used at a time, so this implemenation
00031   // just keeps two separate vectors for those two rows.
00032   int m = s1.len_;
00033   int n = s2.len_;
00034 
00035   vector<int> previous(n + 1);
00036   vector<int> current(n + 1);
00037 
00038   for (int i = 0; i <= n; ++i)
00039     previous[i] = i;
00040 
00041   for (int y = 1; y <= m; ++y) {
00042     current[0] = y;
00043     int best_this_row = current[0];
00044 
00045     for (int x = 1; x <= n; ++x) {
00046       if (allow_replacements) {
00047         current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1),
00048                          min(current[x-1], previous[x])+1);
00049       }
00050       else {
00051         if (s1.str_[y-1] == s2.str_[x-1])
00052           current[x] = previous[x-1];
00053         else
00054           current[x] = min(current[x-1], previous[x]) + 1;
00055       }
00056       best_this_row = min(best_this_row, current[x]);
00057     }
00058 
00059     if (max_edit_distance && best_this_row > max_edit_distance)
00060       return max_edit_distance + 1;
00061 
00062     current.swap(previous);
00063   }
00064 
00065   return previous[n];
00066 }