c# - 降伏パターン、ステートマシンフロー

Question

次のファイルがあり、イテレータブロックを使用して、ファイル内で繰り返し発生する特定のノード/パーツを解析しています。最初は正規表現を使用してファイル全体を解析しましたが、特定のフィールドがノードに存在しない場合、一致しませんでした。だから私はyieldパターンを使おうとしています。ファイル形式は、私が使用しているコードで次のようになっています。ファイルに必要なのは、個々の部分としてのレプリケートノードだけです。これにより、キー文字列を使用してその中のフィールドをフェッチし、オブジェクトのコレクションに格納できます。最初のレプリケートが発生した場所で解析を開始できますが、レプリケートノードが終了した場所で解析を終了できません。

ファイル形式：

X_HEADER
{
    DATA_MANAGEMENT_FIELD_2     NA
    DATA_MANAGEMENT_FIELD_3     NA
    DATA_MANAGEMENT_FIELD_4     NA
    SYSTEM_SOFTWARE_VERSION     NA
}
Y_HEADER
{
    DATA_MANAGEMENT_FIELD_2     NA
    DATA_MANAGEMENT_FIELD_3     NA
    DATA_MANAGEMENT_FIELD_4     NA
    SYSTEM_SOFTWARE_VERSION     NA
}
COMPLETION
{
    NUMBER          877
    VERSION         4
    CALIBRATION_VERSION 1
    CONFIGURATION_ID    877    
}
REPLICATE
{
    REPLICATE_ID            1985
    ASSAY_NUMBER            656
    ASSAY_VERSION           4
    ASSAY_STATUS            Research
    DILUTION_ID         1
}
REPLICATE
{
    REPLICATE_ID            1985
    ASSAY_NUMBER            656
    ASSAY_VERSION           4
    ASSAY_STATUS            Research
}

コード：

static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
    using (var reader = File.OpenText(path))
    {
        var current = new Dictionary<string, string>();
        string line;
        while ((line = reader.ReadLine()) != null)
        {
            if (string.IsNullOrWhiteSpace(line)) continue;

            if (line.StartsWith("REPLICATE"))
            {
                yield return current;
                current = new Dictionary<string, string>();
            }
            else
            {
                var parts = line.Split('\t');
            }

            if (current.Count > 0) yield return current;
        }
    }
}

public static void parseFile(string fileName)
    {
        foreach (var part in ReadParts(fileName))
        {
           //part["fIELD1"] will retireve certain values from the REPLICATE PART HERE
        }
    }

score 3 · Accepted Answer

yield returnまあ、それはあなたが閉じ中括弧を得るとき、そしてその時点でのみセクションを「閉じる」必要があるように聞こえます。例えば：

static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
    using (var reader = File.OpenText(path))
    {
        string currentName = null;
        IDictionary<string, string> currentMap = null;
        while ((line = reader.ReadLine()) != null)
        {
            if (string.IsNullOrWhiteSpace(line))
            {
                continue;
            }
            if (line == "{")
            {
                if (currentName == null || currentMap != null)
                {
                    throw new BadDataException("Open brace at wrong place");
                }
                currentMap = new Dictionary<string, string>();
            }
            else if (line == "}")
            {
                if (currentName == null || currentMap == null)
                {
                    throw new BadDataException("Closing brace at wrong place");
                }
                // Isolate the "REPLICATE-only" requirement to a single
                // line - if you ever need other bits, you can change this.
                if (currentName == "REPLICATE")
                {
                    yield return currentMap;
                }
                currentName = null;
                currentMap = null;
            }
            else if (!line.StartsWith("\t"))
            {
                if (currentName != null || currentMap != null)
                {
                    throw new BadDataException("Section name at wrong place");
                }
                currentName = line;
            }
            else
            {
                if (currentName == null || currentMap == null)
                {
                    throw new BadDataException("Name/value pair at wrong place");
                }
                var parts = line.Substring(1).Split('\t');
                if (parts.Length != 2)
                {
                    throw new BadDataException("Invalid name/value pair");
                }
                currentMap[parts[0]] = parts[1];
            }                
        }
    }
}

正直なところ、これはかなり恐ろしい機能です。代わりに、これを独自のクラス（おそらくネストされたクラス）に入れて状態を格納し、各ハンドラーを独自のメソッドにするのではないかと思います。一体、これは実際には状態パターンが理にかなっている状況です:)

score 2 · Accepted Answer

private IEnumerable<IDictionary<string, string>> ParseFile(System.IO.TextReader reader)
{
    string token = reader.ReadLine();

    while (token != null)
    {
        bool isReplicate = token.StartsWith("REPLICATE");
        token = reader.ReadLine(); //consume this token to either skip it or parse it

        if (isReplicate)
        {     
            yield return ParseBlock(ref token, reader);
        }
    }
}

private IDictionary<string, string> ParseBlock(ref string token, System.IO.TextReader reader)
{
    if (token != "{")
    {
        throw new Exception("Missing opening brace.");
    }

    token = reader.ReadLine();

    var result = ParseValues(ref token, reader);

    if (token != "}")
    {
        throw new Exception("Missing closing brace.");
    }

    token = reader.ReadLine();

    return result;
}

private IDictionary<string, string> ParseValues(ref string token, System.IO.TextReader reader)
{
    IDictionary<string, string> result = new Dictionary<string, string>();

    while (token != "}" and token != null)
    {
        var args = token.Split('\t');

        if (args.Length < 2)
        {
            throw new Exception();
        }

        result.Add(args[0], args[1]);

        token = reader.ReadLine();
    }

    return result;
}

score 1 · Accepted Answer

whileループが終了した後に追加するyield return current;と、最終的な辞書が得られます。

現在のブロックの終わりとして「}」をチェックして、yield returnそこに配置する方がよいと思います。正規表現を使用してファイル全体を解析することはできませんが、正規表現を使用して行内のキーと値のペアを検索することはできます。次のイテレータコードが機能するはずです。REPLICATEブロックの辞書のみを返します。

 // Check for lines that are a key-value pair, separated by whitespace.
// Note that value is optional
static string partPattern = @"^(?<Key>\w*)(\s+(?<Value>\.*))?$";

static IEnumerable<IDictionary<string, string>> ReadParts(string path)
{
    using (var reader = File.OpenText(path))
    {
        string line;
        while ((line = reader.ReadLine()) != null)
        {
            // Ignore lines that just contain whitespace
            if (string.IsNullOrWhiteSpace(line)) continue; 

            // This is a new replicate block, start a new dictionary
            if (line.Trim().CompareTo("REPLICATE") == 0)
            {
                yield return parseReplicateBlock(reader);
            }
        }
    }
}

private static IDictionary<string, string> parseReplicateBlock(StreamReader reader)
{
    // Make sure we have an opening brace
    VerifyOpening(reader);
    string line;
    var currentDictionary = new Dictionary<string, string>();
    while ((line = reader.ReadLine()) != null)
    {
        // Ignore lines that just contain whitespace
        if (string.IsNullOrWhiteSpace(line)) continue;

        line = line.Trim();

        // Since our regex used groupings (?<Key> and ?<Value>), 
        // we can do a match and check to see if our groupings 
        // found anything. If they did, extract the key and value. 
        Match m = Regex.Match(line, partPattern);
        if (m.Groups["Key"].Length > 0)
        {
            currentDictionary.Add(m.Groups["Key"].Value, m.Groups["Value"].Value);
        }
        else if (line.CompareTo("}") == 0)
        {
            return currentDictionary;
        }
    }
    // We exited the loop before we found a closing brace, throw an exception
    throw new ApplicationException("Missing closing brace");
}

private static void VerifyOpening(StreamReader reader)
{
    string line;
    while ((line = reader.ReadLine()) != null)
    {
        // Ignore lines that just contain whitespace
        if (string.IsNullOrWhiteSpace(line)) continue;

        if (line.Trim().CompareTo("{") == 0)
        {
            return;
        }
        else
        {
            throw new ApplicationException("Missing opening brace");
        }
    }
    throw new ApplicationException("Missing opening brace");
}

更新：正規表現文字列に値がない場合が含まれていることを確認しました。さらに、正規表現文字列が変更された場合の問題を回避するために、グループ名を使用するようにグループインデックスがすべて変更されました。

c# - 降伏パターン、ステートマシンフロー

3 に答える 3

Related

Reference