3

SQLスクリプトから各挿入行を取得するために正規表現を書き出そうとしています。Regex Hero で .NET Regex Tester を使用すると、予想される 8 つの一致が得られます。ただし、このスニピットをコンソール アプリとして実行すると、一致するものが返されません。

const string text =
@"INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(12 , 7 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(15 , 53 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(20 , 216 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(16 , 8 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);

INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(26 , 5 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(18 , 12 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(9 , 10 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(2 , 72 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
";

static void Main(string[] args)
{
    string query = @"^\(.*?\)(,|;)$";

    var matches = Regex.Matches(text, query, RegexOptions.Singleline | RegexOptions.Multiline);

    Console.WriteLine("Expected Matches: 8");
    Console.WriteLine("Matches Found: {0}", matches.Count);

    Console.ReadLine();
}

私のオプションは、Web サイトとコード (Multiline と Singleline) でまったく同じです。同じ .NET 正規表現エンジンを使用する必要があります。2 つの違いの原因は何ですか?


最終結果:

好奇心旺盛なすべての人のために、私の最終的な正規表現は

@"(?<=^\()                       # The beginning of a line followed by a (
((('(?<c>.*?)'(?!')(?=[\s\)])) | # Text string in SQL supports line breaks
  (?<c>-?[\d\.]+) |              # Any numbers
  (X'(?<c>[0-9a-f]*)')           # Something formatted like X'0123456789abcdef'
  )(\s,\s)?                      # Spaces and commas between the records
)+                               # Repeat the pattern at least one time
(?=(?<!'')\)[;,]\r?$)            # The End of the line ending with ); or ), and not immediately proceeded by ''";     

これを R&D (rip-off and deploy) 開発に使用することを計画しているすべての人に注意してください。これは非常に定期的であるため、私の SQL でのみ機能します。サード パーティ プログラムによって生成されたものではない SQL を使用する場合、対処する必要のない多くのエッジ ケースを処理するには微調整が必​​要です。

パーサーの解析コードの完全なコードを次に示します。うまくいけば、似たようなことで立ち往生している他の誰かを助けるでしょう.

foreach (var tableFolder in Directory.GetDirectories(_exportFolder))
{
    //Popluate the schema of the DataTable
    DataTable table = new DataTable();
    using (SqlDataAdapter ada = new SqlDataAdapter(String.Format("Select top 0 * from [{0}]", Path.GetFileName(tableFolder)), conn))
    {
        ada.Fill(table);
    }

    //All of the files to import for this table
    string[] filePaths = Directory.GetFiles(tableFolder, "*.sql");

    foreach (string file in filePaths)
    {
        string text;
        using (var txtRdr = new StreamReader(file))
        {
            text = txtRdr.ReadToEnd();
        }

        const string recordRegex =
                        @"(?<=^\()                       #The begining of a line followed by a (
                        ((('(?<s>.*?)'(?!')(?=[\s\)])) | # Something formatted like 'some text' supports line breaks
                            (?<n>-?[\d\.]+) |            # Any numbers
                            (X'(?<h>[0-9a-f]*)')         # Something formatted like X'0123456789abcdef'
                            )(\s,\s)?                    # Spaces and commas between the records
                        )+                               # Repeat the pattern at least one time
                        (?=(?<!'')\)[;,]\r?$)            # The End of the line ending with ); or ), and not immedatly proceded by ''";            

        //Creates one match per row in the database
        var records = Regex.Matches(text, recordRegex, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);

        const string headerRegex = @"^INSERT\sINTO\s\[[\w_\-\s]+\]\s\(\s(?:\[([\w_\-\s]+)\]\s(?:,\s)?)+\)";
        var header = Regex.Match(text, headerRegex).Groups[1].Captures.Cast<Capture>().ToArray();

        foreach (Match record in records)
        {
            //Due to how we captured the 3 groups we had to put them back in order in one list.
            var columns = record.Groups.Cast<Group>()
                                .Skip(1)  //Groups[0] contins the entire record.
                                .SelectMany(group => group.Captures.Cast<Capture>()) //Flattens all of the captures in the three groups in to one list
                                .OrderBy(capture => capture.Index) //Reorder the combined list as the SelectMany will not be outputting the correct order.
                                .ToArray(); 

            DataRow row = table.NewRow();
            for (int i = 0; i < columns.Length; i++)
            {
                Type columnType = table.Columns[header[i].Value].DataType;
                if (columnType == typeof(String))
                {
                    row[header[i].Value] = columns[i].Value;
                }
                else if (columnType == typeof(Int32))
                {
                    row[header[i].Value] = Convert.ToInt32(columns[i].Value);
                }
                else if (columnType == typeof(Double))
                {
                    row[header[i].Value] = Convert.ToDouble(columns[i].Value);
                }
                else if (columnType == typeof(Boolean))
                {
                    if (columns[i].Value == "0")
                        row[header[i].Value] = false;
                    else if (columns[i].Value == "1")
                        row[header[i].Value] = true;
                    else
                        throw new InvalidDataException();
                }
                else if (columnType == typeof(Int16))
                {
                    row[header[i].Value] = Convert.ToInt16(columns[i].Value);
                }
                else if (columnType == typeof(Byte[]))
                {
                    row[header[i].Value] = StringToByteArray(columns[i].Value);
                }
                else
                {
                    throw new NotImplementedException();
                }

            }
            table.Rows.Add(row);
        }

        using (var bulkCopy = new SqlBulkCopy(conn))
        {
            bulkCopy.DestinationTableName = Path.GetFileName(tableFolder);
            bulkCopy.BulkCopyTimeout = 0;
            bulkCopy.WriteToServer(table);
        }
    }
}

アップデート:

caputre グループの名前をすべて同じ名前に変更することで、.NET の正規表現エンジンがそれらを組み合わせてくれます。

var columns = record.Groups[1].Cast<Group>().Skip(1).SelectMany(group => group.Captures.Cast<Capture>()).OrderBy(capture => capture.Index).ToArray();

var columns = record.Groups[1].Captures.Cast<Capture>().ToArray();
4

1 に答える 1

6

Note that toggling the "CrLf marks a line ending" setting on the Regex Hero page causes the 8 lines to stop being matched; this is a clue as to what's causing the problem.

In your C# code, the line breaks within the literal string are encoded as a CR/LF pair ("\r\n"). The $ in the regex (that matches an end-of-line in Multiline mode) only matches the \n character. Thus, there is an extra \r character between the final comma (or semicolon) which the regex doesn't account for, and the match fails.

Some ways you could address this problem include:

  1. Strip the carriage returns: text = text.Replace("\r\n", "\n");, or
  2. Match the carriage returns: string query = @"^\(.*?\)(,|;)\r$";
于 2012-04-04T14:44:52.590 に答える