0

i need to split a text file, with values separated by comma and with text qualifier like ¨|¨

I was trying to use these function:

    public string[] Split(string expression, string delimiter, 
                string qualifier, bool ignoreCase)
    {

        string _Statement = String.Format
            ("{0}(?=(?:[^{1}]*{1}[^{1}]*{1})*(?![^{1}]*{1}))",
                            Regex.Escape(delimiter), Regex.Escape(qualifier));

        RegexOptions _Options = RegexOptions.Compiled | RegexOptions.Multiline;
        if (ignoreCase) _Options = _Options | RegexOptions.IgnoreCase;

        Regex _Expression = new Regex(_Statement, _Options);
        return _Expression.Split(expression);
    } 

to process a text file with rows like this one:

¨|¨column 1¨|¨,¨|¨column 2¨|¨,¨|¨column 3¨|¨,¨|¨column 4¨|¨

But my regex expression is not working... Any ideas that could help me to make this work?

Thanks in advance

4

3 に答える 3

0

正規表現なしでこれを行うことができます。文字列をスペースで分割し¨|¨、各項目をスペースで分割して、個々のキー/値を取得します。

foreach (var item in str.Split(new[] { "¨|¨" }, StringSplitOptions.RemoveEmptyEntries))
{
    var tokens = item.Split(' ');
    Console.WriteLine(tokens[0]);
    Console.WriteLine(tokens[1]);
}
于 2013-11-13T17:57:25.383 に答える
0

Not really sure why you need Regex for something like this, string.Split can give you the output you need like:

string str = "¨|¨column 1¨|¨,¨|¨column 2¨|¨,¨|¨column 3¨|¨,¨|¨column 4¨|¨";
string[] splitArray = str.Split(new[] { "¨|¨,", "¨|¨" }
                                        , StringSplitOptions.RemoveEmptyEntries);

For output:

foreach (var item in splitArray)
{
    Console.WriteLine(item);
}

Output:

column 1
column 2
column 3
column 4
于 2013-11-13T17:57:45.827 に答える
0

.net では、これが可能です。:)

私はそれを押し通しただけで、共有したい気がします。

これは、区切られたファイル行を分割するためのかなり完全な正規表現ソリューションです。

    private bool RowMe(string strColumnDelimiter, string strTextQualifier, string strInput, out string[] strSplitOutput, out string strResultMessage)
    {


        string[] retVal = null;
        bool blnResult = false;
        strResultMessage = "";


        //---- We need to escape at least some of the most common
        //              special characters for both delimiter & qualifier ----

        switch (strColumnDelimiter) {

            case "|":
                strColumnDelimiter = "\\|";
                break;

            case "\\":
                strColumnDelimiter = "\\\\";
                break;

        }

        switch (strTextQualifier)
        {

            case "\"":
                strTextQualifier = "\\\"";
                break;

        }


        //---- Let's have our delimited row splitter regex! ----
                                string strPattern = String.Concat(
                                            "^"

                                                ,"(?:"

                                                    ,"("
                                                        , "[^\\S" + strColumnDelimiter + strTextQualifier + "]*"        // allow leading whitespace, not counting our delimiter & qualifier

                                                        ,"(?:"                                                      
                                                            ,"(?:[^" + strColumnDelimiter + strTextQualifier +"]*)"         // any amount of characters not colum-delimiter or text-qualifier
                                                            ,"|"
                                                            , "(?:" + strTextQualifier + "(?:(?:[^" + strTextQualifier + "])|(?:" + strTextQualifier + strTextQualifier + "))*" + strTextQualifier + ")"        // any amount of characters not text-qualifier OR doubled-text-qualifier inside leading & trailing text-qualifier (allow even colum-delimiter inside text qualifier) 
                                                            ,"|"
                                                            ,"(?:(?:[^" + strColumnDelimiter + strTextQualifier + "]{1})(?:[^" + strColumnDelimiter + "]*)(?:[^" + strColumnDelimiter + strTextQualifier + "]{1}))"     // any amount of characters not column-delimiter inside other leading & trailing characters not column-delimiter or text-qualifier (allow text-qualifier inside value if it is not leading or trailing)
                                                        ,")"

                                                        , "[^\\S" + strColumnDelimiter + strTextQualifier + "]*"        // allow trailing whitespace, not counting our delimiter & qualifier
                                                    ,")"

                                                , "){0,1}"

                                                            //-- note how this second section is almost the same as the first but with a leading delimiter...  
                                                            //                  the first column must not have a leading delimiter, and any subsequent ones must
                                                , "(?:"
                                                    ,"(?:"
                                                        , strColumnDelimiter        // << :)
                                                        ,"(?:"

                                                            , "("
                                                                , "[^\\S" + strColumnDelimiter + strTextQualifier + "]*"        // allow leading whitespace, not counting our delimiter & qualifier

                                                                , "(?:"
                                                                    , "(?:[^" + strColumnDelimiter + strTextQualifier + "]*)"           // any amount of characters not colum-delimiter or text-qualifier
                                                                    , "|"
                                                                    , "(?:" + strTextQualifier + "(?:(?:[^" + strTextQualifier + "])|(?:" + strTextQualifier + strTextQualifier + "))*" + strTextQualifier + ")"        // any amount of characters not text-qualifier OR doubled-text-qualifier inside leading & trailing text-qualifier (allow even colum-delimiter inside text qualifier) 
                                                                    , "|"
                                                                    , "(?:(?:[^" + strColumnDelimiter + strTextQualifier + "]{1})(?:[^" + strColumnDelimiter + "]*)(?:[^" + strColumnDelimiter + strTextQualifier + "]{1}))"        // any amount of characters not column-delimiter inside other leading & trailing characters not column-delimiter or text-qualifier (allow text-qualifier inside value if it is not leading or trailing)
                                                                , ")"

                                                                , "[^\\S" + strColumnDelimiter + strTextQualifier + "]*"        // allow trailing whitespace, not counting our delimiter & qualifier
                                                            , ")"

                                                        ,")"
                                                    ,")"
                                                , "){0,}"

                                            ,"$"
                                        );
                                        );


        //---- And do the regex Match-ing ! ----
        System.Text.RegularExpressions.Regex objRegex = new System.Text.RegularExpressions.Regex(strPattern);
        System.Text.RegularExpressions.MatchCollection objMyMatches = objRegex.Matches(strInput);

        //---- So what did we get? ----
        if (objMyMatches.Count != 1) {
            blnResult = false;
            strResultMessage = "--NO-- no overall match";
        }
        else if (objMyMatches[0].Groups.Count != 3) {
            blnResult = false;
            strResultMessage = "--NO-- pattern not correct";
            throw new ApplicationException("ERROR SPLITTING FLAT FILE ROW!  The hardcoded regular expression appears to be broken.  This should not happen!!!  What's up??");
        }
        else {

            int cnt = (1 + objMyMatches[0].Groups[2].Captures.Count);

            retVal = new string[cnt];

            retVal[0] = objMyMatches[0].Groups[1].Captures[0].Value;

            for (int i = 0; i < objMyMatches[0].Groups[2].Captures.Count; i++) {
                retVal[i+1] = objMyMatches[0].Groups[2].Captures[i].Value;
            }

            blnResult = true;
            strResultMessage = "SUCCESS";
        }


        strSplitOutput = retVal;

        return blnResult;

    }
于 2014-03-21T15:59:25.010 に答える