4

毎週、数千行のテキスト行を解析して要約し、バッチ処理する必要があります。Excel のワイルドカードは十分に柔軟ではなかったので、処理のために Notepad++ に貼り付けたり、スクリプトにフィードしたりする余分な手順を削除したいと考えました。

思いついた道具はこちら。それらはまだ少し遅いです -- 会社のラップトップで 1 秒あたりおそらく 3000 行です -- しかし、便利です。

RXMatch -- 最初の一致を返します。サブグループを返すオプション。

=RXMatch("Apple","A(..)",1) -> "pp"

RXCount -- 一致数を数える

=RXCount("Apple","p") -> 2

RXPrint -- 最初の一致および/またはサブグループをテンプレート文字列に埋め込みます

=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"

RXPrintAll -- 各一致をテンプレート文字列に埋め込み、結果を結合します

=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"

RXMatches -- 一致の垂直配列を返します。サブグループを返すオプション

=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}
4

2 に答える 2

3

RXマッチ

Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns the matching text
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        If (Group > 0) Then
            retval = Matches(0).submatches(Group - 1)
        Else
            retval = Matches(0)
        End If
    Else
        retval = ""
    End If

    RXMatch = retval
End Function

RXカウント

Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
    Dim retval As Integer
    ' Counts the number of matches
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True

    RE.Pattern = Pattern
    Set Matches = RE.Execute(Text)

    retval = Matches.Count

    RXCount = retval
End Function

RXPrint

Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, using the first match found
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArray(0 To MatchesTemplate.Count - 1) As String
        Dim escaped As String
        Dim plaintext As String
        For i = 0 To MatchesTemplate.Count - 1
            escaped = MatchesTemplate(i).submatches(0)
            plaintext = MatchesTemplate(i).submatches(1)
            If (Len(escaped) > 0) Then
                If (IsNumeric(escaped)) Then
                    Dim groupnum As Integer
                    groupnum = CInt(escaped)
                    If groupnum = 0 Then
                        retArray(i) = MatchesText(0)
                    ElseIf (groupnum > MatchesText(0).submatches.Count) Then
                        retArray(i) = "?"
                    Else
                        retArray(i) = MatchesText(0).submatches(groupnum - 1)
                    End If
                Else
                    retArray(i) = escaped
                End If
            Else
                retArray(i) = plaintext
            End If
        Next i
        retval = Join(retArray, "")
    Else
        retval = ""
    End If

    RXPrint = retval
End Function

RXPrintAll

Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, repeated for each match
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' Delimiter (optional) specified how the results will be joined
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Global = True
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArrays(0 To MatchesText.Count - 1)
        For j = 0 To MatchesText.Count - 1
            ReDim retArray(0 To MatchesTemplate.Count - 1) As String
            Dim escaped As String
            Dim plaintext As String
            For i = 0 To MatchesTemplate.Count - 1
                escaped = MatchesTemplate(i).submatches(0)
                plaintext = MatchesTemplate(i).submatches(1)
                If (Len(escaped) > 0) Then
                    If (IsNumeric(escaped)) Then
                        Dim groupnum As Integer
                        groupnum = CInt(escaped)
                        If groupnum = 0 Then
                            retArray(i) = MatchesText(j)
                        ElseIf (groupnum > MatchesText(j).submatches.Count) Then
                            retArray(i) = "?"
                        Else
                            retArray(i) = MatchesText(j).submatches(groupnum - 1)
                        End If
                    Else
                        retArray(i) = escaped
                    End If
                Else
                    retArray(i) = plaintext
                End If
            Next i
            retArrays(j) = Join(retArray, "")
        Next j
        retval = Join(retArrays, Delimiter)
    Else
        retval = ""
    End If
    RXPrintAll = retval
End Function

RXMatches

Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
    Dim retval() As String
    ' Takes a string and returns all matches in a vertical array
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        ReDim retval(0 To Matches.Count - 1)
        For i = 0 To Matches.Count - 1
            If (Group > 0) Then
                retval(i) = Matches(i).submatches(Group - 1)
            Else
                retval(i) = Matches(i)
            End If
        Next i
    Else
        ReDim retval(1)
        retval(0) = ""
    End If

    RXMatches = Application.Transpose(retval)
End Function
于 2013-10-20T18:28:39.310 に答える