2 弦の類似点を見つけようとしているようですね。私はこのコードを見つけて、何年も前に Web のどこかでわずかに変更し (申し訳ありませんが、ソースを引用することはできません)、頻繁に使用しています。それは非常に速く動作します(とにかく文字列の場合)。目的に応じて変更する必要がある場合があります。VBですみません。
Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case.
''' Returns a numeric indication of their similarity
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0
' Convert each string to simplest form (letters
' and digits only, all upper case)
psStr1 = ReplaceSpecial(psStr1.ToUpper)
psStr2 = ReplaceSpecial(psStr2.ToUpper)
If psStr1.Trim = "" Or psStr2.Trim = "" Then
' One or both of the strings is now empty
Return 0
End If
If psStr1 = psStr2 Then
' Strings are identical
Return 100
End If
' Initialize cumulative score (this will be the
' total length of all the common substrings)
piScore = 0
' Find all common sub-strings
FindCommon(psStr1, psStr2)
' We now have the cumulative score. Return this
' as a percent of the maximum score. The maximum
' score is the average length of the two strings.
Return piScore * 200 / (Len(psStr1) + Len(psStr2))
End Function
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
' Finds longest common substring (other than single
' characters) in psS1 and psS2, then recursively
' finds longest common substring in left-hand
' portion and right-hand portion. Updates the
' cumulative score.
Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
Dim sRightStr1 As String = "", sRightStr2 As String = ""
sHoldStr = psS2
Do While Len(sHoldStr) > iLongest
sTestStr = sHoldStr
Do While Len(sTestStr) > 1
iJ = InStr(psS1, sTestStr)
If iJ > 0 Then
' Test string is sub-set of the other string
If Len(sTestStr) > iLongest Then
' Test string is longer than previous
' longest. Store its length and position.
iLongest = Len(sTestStr)
iStartPos1 = iJ
iStartPos2 = InStr(psS2, sTestStr)
End If
' No point in going further with this string
Exit Do
Else
' Test string is not a sub-set of the other
' string. Discard final character of test
' string and try again.
sTestStr = Left(sTestStr, Len(sTestStr) - 1)
End If
Loop
' Now discard first char of test string and
' repeat the process.
sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)
Loop
' Update the cumulative score with the length of
' the common sub-string.
piScore = piScore + iLongest
' We now have the longest common sub-string, so we
' can isolate the sub-strings to the left and right
' of it.
If iStartPos1 > 3 And iStartPos2 > 3 Then
sLeftStr1 = Left(psS1, iStartPos1 - 1)
sLeftStr2 = Left(psS2, iStartPos2 - 1)
If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
' Get longest common substring from left strings
FindCommon(sLeftStr1, sLeftStr2)
End If
Else
sLeftStr1 = ""
sLeftStr2 = ""
End If
If iLongest > 0 Then
sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
sRightStr2 = Mid(psS2, iStartPos2 + iLongest)
If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
' Get longest common substring from right strings
FindCommon(sRightStr1, sRightStr2)
End If
Else
sRightStr1 = ""
sRightStr2 = ""
End If
End Sub
''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
Dim iPos As Integer
Dim sReturn As String = ""
Dim iAsc As Integer
For iPos = 1 To sString.Length
iAsc = Asc(Mid(sString, iPos, 1))
If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
sReturn &= Chr(iAsc)
End If
Next
Return sReturn
End Function
Similar 関数を呼び出すだけで、0 から 100 までの結果が得られます。
お役に立てれば