1

文字列から一意の単語と数値を抽出する必要があります。この時点で、すべてを取り除き、英数字の単語のみを返す関数があります。また、単語が実際に日付または数字である場合を認識し、テキストが分割されないようにする必要もあります。これどうやってするの?

これが私が現在持っているスプリッター機能です:

Public Function GetAlphaNumericWords(ByVal InputText As String) As Collection
' This function splits the rich text input into unique alpha-numeric only strings
    Dim words() As String
    Dim characters() As Byte
    Dim text As Variant
    Dim i As Long

    Set GetAlphaNumericWords = New Collection

    text = Trim(PlainText(InputText))
    If Len(text) > 0 Then
    ' Replace any non alphanumeric characters with a space
        characters = StrConv(text, vbFromUnicode)
        For i = LBound(characters) To UBound(characters)
            If Not (Chr(characters(i)) Like "[A-Za-z0-9 ]") Then
                characters(i) = 32 ' Space character
            End If
        Next
        ' Merge the byte array back to a string and then split on spaces
        words = VBA.Split(StrConv(characters, vbUnicode))

        ' Add each unique word to the output collection
        On Error Resume Next
        For Each text In words
            If (text <> vbNullString) Then GetAlphaNumericWords.Add CStr(text), CStr(text)
            If Err Then Err.Clear
        Next
    End If
End Function

この関数が現在返す出力の例:

GetAlphaNumericWords("Hello World!  Test 1. 123.45 8/22/2013 August 22, 2013")

Hello
World
Test
1
123
45
8
22
2013
August

私が本当に欲しいのは:

Hello
World
Test
1
123.45
8/22/2013
4

1 に答える 1

3

正規表現を使用できる場合、これは大変な作業のように思えます。良い出発点については、ここここを参照してください。

「Microsoft VBScript Regular Expressions 5.5」への参照を追加し、次の関数を追加するとします (他の場所で役立つ場合に備えて、必要以上の関数をいくつか含めました)。

Public Function RegEx(strInput As String, strRegEx As String, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As Boolean
    Dim RegExp As VBScript_RegExp_55.RegExp
    Set RegExp = New VBScript_RegExp_55.RegExp
    With RegExp
        .MultiLine = bMultiLine
        .IgnoreCase = bIgnoreCase
        .Pattern = strRegEx
    End With
    RegEx = RegExp.test(strInput)
    Set RegExp = Nothing
End Function

Public Function RegExMatch(strInput As String, strRegEx As String, Optional MatchNo As Long = 0, Optional FirstIDX As Long, Optional Lgth As Long, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As String
    Dim RegExp As VBScript_RegExp_55.RegExp, Matches As VBScript_RegExp_55.MatchCollection
    Set RegExp = New VBScript_RegExp_55.RegExp
    With RegExp
        .Global = True
        .MultiLine = bMultiLine
        .IgnoreCase = bIgnoreCase
        .Pattern = strRegEx
    End With
    If RegExp.test(strInput) Then
        Set Matches = RegExp.Execute(strInput)
        If MatchNo > Matches.Count - 1 Then
            RegExMatch = ""
        Else
            RegExMatch = Matches(MatchNo).value
            FirstIDX = Matches(MatchNo).FirstIndex
            Lgth = Matches(MatchNo).Length
        End If
    Else
        RegExMatch = ""
    End If
    Set RegExp = Nothing
End Function

Public Function RegexMatches(strInput As String, strRegEx As String, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As VBScript_RegExp_55.MatchCollection
    Dim RegExp As VBScript_RegExp_55.RegExp
    Set RegExp = New VBScript_RegExp_55.RegExp
    With RegExp
        .Global = True
        .MultiLine = bMultiLine
        .IgnoreCase = bIgnoreCase
        .Pattern = strRegEx
    End With
    Set RegexMatches = RegExp.Execute(strInput)
    Set RegExp = Nothing
End Function

Public Function RegExReplace(strInput As String, strRegEx As String, strReplace As String, Optional bGlobal As Boolean = True, Optional bIgnoreCase As Boolean = True, Optional bMultiLine As Boolean = False) As String
    Dim RegExp As VBScript_RegExp_55.RegExp
    Set RegExp = New VBScript_RegExp_55.RegExp
    With RegExp
        .MultiLine = bMultiLine
        .IgnoreCase = bIgnoreCase
        .Pattern = strRegEx
        .Global = bGlobal
    End With
    RegExReplace = RegExp.Replace(strInput, strReplace)
    Set RegExp = Nothing
End Function

それらを使用して、はるかに便利でエレガントなソリューションを作成できるはずです。

次のような正規表現パターンを検討する必要があります。

\b(\w+)\b

および次のようなコード - を使用した各一致およびサブマッチに対して、 aおよび aをRegexMatches試し、エラーが発生しない場合は拒否します (エラーがない場合は、正当な日付または番号を示します)。CDecCDate

Dim Matches As VBScript_RegExp_55.MatchCollection
...
Set Matches = RegexMatches(InputText , "\b(\w+)\b")
                If Matches.Count > 0 Then
                    For CtrA = 0 To Matches.Count - 1
                        For CtrB = 0 To Matches(CtrA).SubMatches.Count - 1
                            On Error Resume Next
                            TestVariant = Null
                            TestVariant = CDec(Matches(CtrA).Submatches(CtrB))
                            TestVariant = CDate(Matches(CtrA).Submatches(CtrB))
                            On Error Goto 0
                            If IsNull(TestVariant) Then
                                ' Do further processing to check if the submatch can be split on non-alphanumeric characters... 
                            Else
                                GetAlphaNumericWords.Add Matches(CtrA).Submatches(CtrB), Matches(CtrA).Submatches(CtrB)
                            End If
                        Next
                    Next
                End If
于 2013-08-23T00:58:16.867 に答える