c# - C# を使用してテキストファイルからフィールド名と最大長を抽出する

Question

SQL Server の結果セットをテキストファイルとして保存したファイルがあります。

ファイルがどのように見えるかのサンプルを次に示します。

RWS_DMP_ID      RV1_DMP_NUM      CUS_NAME
3192            3957             THE ACME COMPANY                          
3192            3957             THE ACME COMPANY                          
3192            3957             THE ACME COMPANY

このファイルを読み取り、次のデータテーブルを作成する C# プログラムを作成したいと考えています。

     Field       MaxSize
     -----       -------
 RWS_DMP_ID  17 
RV1_DMP_NUM 17 
CUS_NAME    42

これは、フィールド名とその最大長のリストです。最大長は、フィールドの先頭から次のフィールドの先頭の直前のスペースまでです。

ところで、コードのパフォーマンスは気にしません。これはめったに使用されないファイル処理ユーティリティです。

次のコードでこれを解決しました。

objFile = new StreamReader(strPath + strFileName);
            strLine = objFile.ReadLine();
            intLineCnt = 0;
            while (strLine != null)
            {
                intLineCnt++;

                if (intLineCnt <= 3)
                {                       
                    if (intLineCnt == 1)
                    {
                        strWords = SplitWords(strLine);
                        intNumberOfFields = strWords.Length;
                        foreach (char c in strLine)
                        {
                            if (bolNewField == true)
                            {
                                bolFieldEnd = false;
                                bolNewField = false;
                            }
                            if (bolFieldEnd == false)
                            {
                                if (c == ' ')
                                {
                                    bolFieldEnd = true;
                                }
                            }
                            else
                            {
                                if (c != ' ')
                                {
                                    if (intFieldCnt < strWords.Length)
                                    {
                                        strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
                                        strProcessedData[intFieldCnt, 1] = (intCharCnt - 1).ToString();
                                    }
                                    intFieldCnt++;
                                    intCharCnt = 1;
                                    bolNewField = true;
                                }
                            }
                            if (bolNewField == false)
                            {
                                intCharCnt++;
                            }
                        }
                        strProcessedData[intFieldCnt, 0] = strWords[intFieldCnt];
                        strProcessedData[intFieldCnt, 1] = intCharCnt.ToString();                               
                    }
                    else if (intLineCnt == 3)
                    {
                        intLine2Cnt= 0;
                        intTotalLength = 0;
                        while(intLine2Cnt < intNumberOfFields)
                        {
                            intSize = Convert.ToInt32(strProcessedData[intLine2Cnt, 1]);
                            if (intSize + intTotalLength > strLine.Length)
                            {
                                intSize = strLine.Length - intTotalLength;
                            }
                            strField = strLine.Substring(intTotalLength, intSize);
                            strField = strField.Trim();
                            strProcessedData[intLine2Cnt, intLineCnt - 1] = strField;
                            intTotalLength = intTotalLength + intSize + 1;                                

                            intLine2Cnt++;
                        }
                    }                       
                }
                strLine = objFile.ReadLine();
            }`enter code here`

このコードが完全なハッキング作業であることは承知しています。この問題を解決するためのより良い方法を探しています。

この問題を解決するより良い方法はありますか?

ありがとう

score 0 · Accepted Answer

これがどれほどメモリ効率が良いかはわかりませんが、少しすっきりしたと思います (フィールドがタブ区切りであると仮定します):

var COL_DELIMITER = new[] { '\t' };
string[] lines = File.ReadAllLines(strPath + strFileName);

// read the field names from the first line
var fields = lines[0].Split(COL_DELIMITER, StringSplitOptions.RemoveEmptyEntries).ToList();

// get a 2-D array of the columns (excluding the header row)
string[][] columnsArray = lines.Skip(1).Select(l => l.Split(COL_DELIMITER)).ToArray();

// dictionary of columns with max length
var max = new Dictionary<string, int>(); 

// for each field, select all columns, and take the max string length
foreach (var field in fields)
{
    max.Add(field, columnsArray.Select(row => row[fields.IndexOf(field)]).Max(col => col.Trim().Length));
}

// output per requirment
Console.WriteLine(string.Join(Environment.NewLine,
        max.Keys.Select(field => field + " " + max[field])
    ));

score 0 · Accepted Answer

これが私が思いついたものです。大きなポイントは、IndexOf 文字列関数を使用することです。

 class Program
    {
        static void Main(string[] args)
        {
            String strFilePath;
            String strLine;
            Int32 intMaxLineSize;

            strFilePath = [File path and name];                            
            StreamReader objFile= null;

            objFile = new StreamReader(strFilePath);

            intMaxLineSize = File.ReadAllLines(strFilePath).Max(line => line.Length);

            //Get the first line
            strLine = objFile.ReadLine();

            GetFieldNameAndFieldLengh(strLine, intMaxLineSize);


            Console.WriteLine("Press <enter> to continue.");
            Console.ReadLine();
        }
        public static void GetFieldNameAndFieldLengh(String strLine, Int32 intMaxSize)
        {            
            Int32 x;            
            string[] fields = null;
            string[,] strFieldSizes = null;
            Int32 intFieldSize;

            fields = SplitWords(strLine);


            strFieldSizes = new String[fields.Length, 2];
            x = 0;

            foreach (string strField in fields)
            {
                if (x < fields.Length - 1)
                {
                    intFieldSize = strLine.IndexOf(fields[x + 1]) - strLine.IndexOf(fields[x]);                    
                }
                else
                {
                    intFieldSize = intMaxSize - strLine.IndexOf(fields[x]);
                }
                strFieldSizes[x, 0] = fields[x];
                strFieldSizes[x, 1] = intFieldSize.ToString();
                x++;
            }
            Console.ReadLine();


        }

        static string[] SplitWords(string s)
        {
            return Regex.Split(s, @"\W+");
        }
    }

score 0 · Accepted Answer

    void MaximumWidth(StreamReader reader)
    {
        string[] columns = null;
        int[]   maxWidth = null;

        string line;

        while ((line = reader.ReadLine()) != null)
        {
            string[] cols = line.Split('\t');

            if (columns == null)
            {
                columns = cols;
                maxWidth = new int[cols.Length];
            }
            else
            {
                for (int i = 0; i < columns.Length; i++)
                {
                    int width = cols[i].Length;

                    if (maxWidth[i] < width)
                    {
                        maxWidth[i] = width;
                    }
                }
            }
        }

        // ...
    }

c# - C# を使用してテキスト ファイルからフィールド名と最大長を抽出する

3 に答える 3

Related

Reference

c# - C# を使用してテキストファイルからフィールド名と最大長を抽出する