レーベンシュタイン距離?あなたがすでに受け入れた最初の解決策には欠陥があると思います。ちょっとしたことが挿入されたとしても、すべてが故障していることがわかります。
string[] list1 = { "a", "c", "b", "d", "j", "e", "f" };
string[] list2 = { "a", "d", "e", "f", "h", "j" };
これは、j が挿入されたため、j、e、f が順不同であることを示しています。
これはあなたが直面している問題を指摘しています。何が故障しているかという問題には、複数の解決策があり、最適な解決策が 1 つ以上あることもあります。J が順番に並んでいますか、それとも e と f ですか? それらはすべて故障していますか?セット A から開始し、セット B で終了するために必要な最小数の挿入操作と削除操作を見つけるレーベンシュタイン距離アルゴリズムと呼ばれるものがあります。
この次のアルゴリズムは、list1 に j が挿入され、e、f がシフトされているが、正しい順序であることを正しく出力します。
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using Math = System.Math;
namespace LevCompareLists {
class Program {
static void Main(string[] args) {
string[] list1 = { "a", "c", "b", "d", "j", "e", "f" };
string[] list2 = { "a", "d", "e", "f", "h", "j" };
int?[] aMap21 = Levenshtein(list2, list1);
int?[] aMap12 = Levenshtein(list1, list2);
}
public static int?[] Levenshtein(string[] Src, String[] Dst) {
// this finds a minimum difference solution of inserts and deletes that maps Src to Dst
// it returns the map from the perspective of Dst, i.e.:
// each element of the return array contains the Src index of the corresponging element in B
// a null value means the element in B was inserted, and never existed in A
//
// Given A = {"a", "c", "b", "d", "j", "e", "f"}
// B = {"a", "d", "e", "f", "h", "j"};
//
// Levenshtein(B, A):
// a c b d j e f <-- A
// 0 1 2 3 <-- aMap
// a d e f h j <-- B
//
// Levenshtein(A, B):
// a d e f h j <-- B
// 0 3 5 6 <-- aMap
// a c b d j e f <-- A
//
// see: http://en.wikipedia.org/wiki/Levenshtein_distance
int cSrc = Src.Length; //length of s
int cDst = Dst.Length; //length of t
if (cSrc == 0 || cDst == 0) return null;
//**** create the Levenshtein matrix
// it has at 1 extra element in each dimension to contain the edges
int[,] aLev = new int[cSrc + 1, cDst + 1]; // the matrix
int iSrc, iDst;
// Load the horizontal and vertical edges
for (iSrc = 0; iSrc <= cSrc; aLev[iSrc, 0] = iSrc++) ;
for (iDst = 0; iDst <= cDst; aLev[0, iDst] = iDst++) ;
// load the interior
for (iSrc = 1; iSrc <= cSrc; iSrc++)
for (iDst = 1; iDst <= cDst; iDst++)
aLev[iSrc, iDst] = Math.Min(Math.Min(aLev[iSrc - 1, iDst] + 1, aLev[iSrc, iDst - 1] + 1),
aLev[iSrc - 1, iDst - 1] + ((Dst[iDst - 1] == Src[iSrc - 1]) ? 0 : 2));
DumpLevMatrix(aLev, Src, Dst); // Debug
//**** create the return map, using the Levenshtein matrix
int?[] aMap = new int?[cDst]; // this is the return map
iSrc = cSrc; // start in lower right corner of the Levenshtein matrix
iDst = cDst; // start in lower right corner of the Levenshtein matrix
// work backwards to pick best solution
while ((iSrc >= 0) || (iDst >= 0)) {
if ((iSrc > 0) && (iDst > 0)) {
// enter here if iSrc and iDst are in the lev matrix and not on its edge
int nCur = aLev[iSrc, iDst];
int nIns = nCur - aLev[iSrc, iDst - 1]; // if move along B to find match, it was an insert
int nDel = nCur - aLev[iSrc - 1, iDst]; // if move along A to find match, it was a deletion
if (nIns == 1) // this char was NOT in A, but was inserted into B
iDst--; // Leave map of B[j] to nowher, scan to previous B (--j)
else if (nDel == 1) // this char was in A, but is missing in B
iSrc--; // Don't map any B, scan to previous A (--i)
else // Match
aMap[iDst-- - 1] = iSrc-- - 1; // After map B[j] to A[i], scan to prev A,B (--i, --j)
} else {
if (iDst > 0) // remaining chars are inserts, Leave map of B[j] to nowher, scan to previous B (--j)
iDst--;
else if (iSrc > 0) // Delete to the end, deletes do nothing
iSrc--;
else
break;
}
}
DumpMap(aMap, Dst); // Debug
return aMap;
}
// just for debugging
static void DumpLevMatrix(int[,] aLev, string[] Src, string[] Dst) {
StringBuilder sb = new StringBuilder();
int cSrc = Src.Length;
int cDst = Dst.Length;
int iSrc, iDst;
sb.Length = 6;
for (iDst = 0; iDst < cDst; ++iDst)
sb.AppendFormat("{0,-3}", Dst[iDst]);
Console.WriteLine(sb.ToString());
for (iSrc = 0; iSrc <= cSrc; ++iSrc) {
if (iSrc == 0)
sb.Length = 3;
else {
sb.Length = 0;
sb.AppendFormat("{0,-3}", Src[iSrc - 1]);
}
for (iDst = 0; iDst <= cDst; ++iDst)
sb.AppendFormat("{0:00}", aLev[iSrc, iDst]).Append(" ");
Console.WriteLine(sb.ToString());
}
}
// just for debugging
static void DumpMap(int?[] aMap, string[] Dst) {
StringBuilder sb = new StringBuilder();
for (int iMap = 0; iMap < aMap.Length; ++iMap)
sb.AppendFormat("{0,-3}", Dst[iMap]); // dst and map are same size
Console.WriteLine(sb.ToString());
sb.Length = 0;
for (int iMap = 0; iMap < aMap.Length; ++iMap)
if (aMap[iMap] == null)
sb.Append(" ");
else
sb.AppendFormat("{0:00}", aMap[iMap]).Append(" ");
Console.WriteLine(sb.ToString());
}
}
}