1

次の問題を解決する必要があります。任意の長さの任意の内容の 2 つの文字列があります。両方の文字列に表示される、最大長の順序付けられたすべてのシーケンスを見つける必要があります。

例 1: 入力: "a1b2c3" "1a2b3c" 出力: "123" "12c" "1b3" "1bc" "a23" "a2c" "ab3" "abc"

例 2: 入力: "cadb" "abcd" 出力: "ab" "ad" "cd"

私は2つのループ、再帰、そしてより大きな結果の一部である重複と結果を削除するという簡単な方法でそれを書きました(たとえば、「abc」シーケンスには「ab」「ac」および「bc」シーケンスが含まれているため、それらをフィルタリングしています)

// "match" argument here used as temporary buffer
void match_recursive(set<string> &matches, string &match, const string &a_str1, const string &a_str2, size_t a_pos1, size_t a_pos2)
{
    bool added = false;

    for(size_t i = a_pos1; i < a_str1.length(); ++i)
    {
        for(size_t j = a_pos2; j < a_str2.length(); ++j)
        {
            if(a_str1[i] == a_str2[j])
            {
                match.push_back(a_str1[i]);

                if(i < a_str1.length() - 1 && j < a_str2.length() - 1)
                    match_recursive(matches, match, a_str1, a_str2, i + 1, j + 1);
                else
                    matches.emplace(match);
                added = true;

                match.pop_back();
            }
        }
    }

    if(!added)
        matches.emplace(match);
}

この関数は問題を解決しますが、複雑さは受け入れられません。たとえば、「0q0e0t0c0a0d0a0d0i0e0o0p0z0」「0w0r0y0d0s0a0b0w0k0f0.0k0x0」のソリューションは、私のマシンで 28 秒かかります (デバッグ ターゲットですが、とにかくこれは非常に遅いです)。この問題には簡単なアルゴリズムが必要だと思いますが、どういうわけかネット上で見つけることができません。

皆さんは私を正しい方向に向けることができますか?

4

3 に答える 3

0

2 弦の類似点を見つけようとしているようですね。私はこのコードを見つけて、何年も前に Web のどこかでわずかに変更し (申し訳ありませんが、ソースを引用することはできません)、頻繁に使用しています。それは非常に速く動作します(とにかく文字列の場合)。目的に応じて変更する必要がある場合があります。VBですみません。

Private Shared piScore As Integer
''' <summary>
''' Compares two not-empty strings regardless of case. 
''' Returns a numeric indication of their similarity 
''' (0 = not at all similar, 100 = identical)
''' </summary>
''' <param name="psStr1">String to compare</param>
''' <param name="psStr2">String to compare</param>
''' <returns>0-100 (0 = not at all similar, 100 = identical)</returns>
''' <remarks></remarks>
Public Shared Function Similar(ByVal psStr1 As String, ByVal psStr2 As String) As Integer
    If psStr1 Is Nothing Or psStr2 Is Nothing Then Return 0

    ' Convert each string to simplest form (letters
    ' and digits only, all upper case)
    psStr1 = ReplaceSpecial(psStr1.ToUpper)
    psStr2 = ReplaceSpecial(psStr2.ToUpper)

    If psStr1.Trim = "" Or psStr2.Trim = "" Then
        ' One or both of the strings is now empty
        Return 0
    End If

    If psStr1 = psStr2 Then
        ' Strings are identical
        Return 100
    End If

    ' Initialize cumulative score (this will be the
    ' total length of all the common substrings)
    piScore = 0

    ' Find all common sub-strings
    FindCommon(psStr1, psStr2)

    ' We now have the cumulative score. Return this
    ' as a percent of the maximum score. The maximum
    ' score is the average length of the two strings.
    Return piScore * 200 / (Len(psStr1) + Len(psStr2))

End Function

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Sub FindCommon(ByVal psS1 As String, ByVal psS2 As String)
    ' Finds longest common substring (other than single
    ' characters) in psS1 and psS2, then recursively
    ' finds longest common substring in left-hand
    ' portion and right-hand portion. Updates the
    ' cumulative score.

    Dim iLongest As Integer = 0, iStartPos1 As Integer = 0, iStartPos2 As Integer = 0, iJ As Integer = 0
    Dim sHoldStr As String = "", sTestStr As String = "", sLeftStr1 As String = "", sLeftStr2 As String = ""
    Dim sRightStr1 As String = "", sRightStr2 As String = ""

    sHoldStr = psS2
    Do While Len(sHoldStr) > iLongest

        sTestStr = sHoldStr
        Do While Len(sTestStr) > 1
            iJ = InStr(psS1, sTestStr)
            If iJ > 0 Then
                ' Test string is sub-set of the other string

                If Len(sTestStr) > iLongest Then
                    ' Test string is longer than previous
                    ' longest. Store its length and position.
                    iLongest = Len(sTestStr)
                    iStartPos1 = iJ
                    iStartPos2 = InStr(psS2, sTestStr)
                End If

                ' No point in going further with this string
                Exit Do

            Else
                ' Test string is not a sub-set of the other
                ' string. Discard final character of test
                ' string and try again.
                sTestStr = Left(sTestStr, Len(sTestStr) - 1)
            End If

        Loop

        ' Now discard first char of test string and
        ' repeat the process.
        sHoldStr = Right(sHoldStr, Len(sHoldStr) - 1)

    Loop

    ' Update the cumulative score with the length of
    ' the common sub-string.
    piScore = piScore + iLongest

    ' We now have the longest common sub-string, so we
    ' can isolate the sub-strings to the left and right
    ' of it.

    If iStartPos1 > 3 And iStartPos2 > 3 Then
        sLeftStr1 = Left(psS1, iStartPos1 - 1)
        sLeftStr2 = Left(psS2, iStartPos2 - 1)

        If sLeftStr1.Trim <> "" And sLeftStr2.Trim <> "" Then
            ' Get longest common substring from left strings
            FindCommon(sLeftStr1, sLeftStr2)
        End If
    Else
        sLeftStr1 = ""
        sLeftStr2 = ""
    End If
    If iLongest > 0 Then
        sRightStr1 = Mid(psS1, iStartPos1 + iLongest)
        sRightStr2 = Mid(psS2, iStartPos2 + iLongest)

        If sRightStr1.Trim <> "" And sRightStr2.Trim <> "" Then
            ' Get longest common substring from right strings
            FindCommon(sRightStr1, sRightStr2)
        End If
    Else
        sRightStr1 = ""
        sRightStr2 = ""
    End If
End Sub

''' <summary>USED BY SIMILAR FUNCTION</summary>
Private Shared Function ReplaceSpecial(ByVal sString As String) As String
    Dim iPos As Integer
    Dim sReturn As String = ""
    Dim iAsc As Integer
    For iPos = 1 To sString.Length
        iAsc = Asc(Mid(sString, iPos, 1))
        If (iAsc >= 48 And iAsc <= 57) Or (iAsc >= 65 And iAsc <= 90) Then
            sReturn &= Chr(iAsc)
        End If
    Next
    Return sReturn
End Function

Similar 関数を呼び出すだけで、0 から 100 までの結果が得られます。

お役に立てれば

于 2013-08-12T15:54:14.063 に答える