Ninja
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // 00003 // Licensed under the Apache License, Version 2.0 (the "License"); 00004 // you may not use this file except in compliance with the License. 00005 // You may obtain a copy of the License at 00006 // 00007 // http://www.apache.org/licenses/LICENSE-2.0 00008 // 00009 // Unless required by applicable law or agreed to in writing, software 00010 // distributed under the License is distributed on an "AS IS" BASIS, 00011 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00012 // See the License for the specific language governing permissions and 00013 // limitations under the License. 00014 00015 #include "edit_distance.h" 00016 00017 #include <vector> 00018 00019 int EditDistance(const StringPiece& s1, 00020 const StringPiece& s2, 00021 bool allow_replacements, 00022 int max_edit_distance) { 00023 // The algorithm implemented below is the "classic" 00024 // dynamic-programming algorithm for computing the Levenshtein 00025 // distance, which is described here: 00026 // 00027 // http://en.wikipedia.org/wiki/Levenshtein_distance 00028 // 00029 // Although the algorithm is typically described using an m x n 00030 // array, only two rows are used at a time, so this implemenation 00031 // just keeps two separate vectors for those two rows. 00032 int m = s1.len_; 00033 int n = s2.len_; 00034 00035 vector<int> previous(n + 1); 00036 vector<int> current(n + 1); 00037 00038 for (int i = 0; i <= n; ++i) 00039 previous[i] = i; 00040 00041 for (int y = 1; y <= m; ++y) { 00042 current[0] = y; 00043 int best_this_row = current[0]; 00044 00045 for (int x = 1; x <= n; ++x) { 00046 if (allow_replacements) { 00047 current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1), 00048 min(current[x-1], previous[x])+1); 00049 } 00050 else { 00051 if (s1.str_[y-1] == s2.str_[x-1]) 00052 current[x] = previous[x-1]; 00053 else 00054 current[x] = min(current[x-1], previous[x]) + 1; 00055 } 00056 best_this_row = min(best_this_row, current[x]); 00057 } 00058 00059 if (max_edit_distance && best_this_row > max_edit_distance) 00060 return max_edit_distance + 1; 00061 00062 current.swap(previous); 00063 } 00064 00065 return previous[n]; 00066 }