d - この wc の例で文字列が null に設定されているのはなぜですか?

Question

今日は D 言語に興味があったので、Web サイトを調べたところ、Web サイトで次の wc 実装に出くわしました。

import std.stdio;
import std.stream;

int main (string[] args)
{
    int w_total;
    int l_total;
    ulong c_total;
    int[string] dictionary;

    writefln("   lines   words   bytes file");
    foreach (arg; args[1 .. args.length])
    {
        int w_cnt, l_cnt;
        bool inword;

        auto c_cnt = std.file.getSize(arg);
        if (c_cnt < 10_000_000)
        {
            size_t wstart;
            auto input = cast(string)std.file.read(arg);

            foreach (j, c; input)
            {
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        wstart = j;
                        inword = true;
                        ++w_cnt;
                    }
                }
                else if (inword)
                {   
                    auto word = input[wstart .. j];

                    dictionary[word]++;
                    inword = false;
                }
            }
            if (inword)
            {   
                auto w = input[wstart .. input.length];
                dictionary[w]++;
            }
        }
        else
        {
            auto f = new BufferedFile(arg);
            string buf;

            while (!f.eof())
            {   
                char c;

                f.read(c);
                if (c == '\n')
                ++l_cnt;
                if (c >= '0' && c <= '9')
                {
                    if (inword)
                    buf ~= c;
                }
                else if (c >= 'a' && c <= 'z' ||
                    c >= 'A' && c <= 'Z')
                {
                    if (!inword)
                    {
                        buf.length = 0;
                        buf ~= c;
                        inword = 1;
                        ++w_cnt;
                    }
                    else
                        buf ~= c;
                }
                else if (inword)
                {
                    if (++dictionary[buf] == 1)
                        buf = null;
                    inword = 0;
                }
            }
            if (inword)
            {
                dictionary[buf]++;
            }
        }
        writefln("%8s%8s%8s %s\n", l_cnt, w_cnt, c_cnt, arg);
        l_total += l_cnt;
        w_total += w_cnt;
        c_total += c_cnt;
    }

    if (args.length > 2)
    {
        writefln("--------------------------------------\n%8s%8s%8s total",
        l_total, w_total, c_total);
    }

    writefln("--------------------------------------");

    foreach (word1; dictionary.keys.sort)
    {
        writefln("%3s %s", dictionary[word1], word1);
    }
    return 0;
}

いずれにせよ、86 行目で、辞書内で単語が最初に出現したときに、コードは buf を null に設定します。

             if (++dictionary[buf] == 1)
                buf = null;
             inword = 0;
            }

これを行う利点は何ですか？その部分を省略してメソッドをテストしたところ、同じ結果が得られました。

score 0 · Accepted Answer

ここで推測しています。しかし、それはかなり古いコードなので、その理由はおそらく不変に関係しています。古いバージョンのDでは、型システムに不変のものがなかったため、文字列は単なるchar[]のエイリアスでした。

連想配列では、キーを変更すると、ハッシュが一致しないためにキーが壊れる可能性があります。ツリーに2つのエントリがあり、バグを見つけるのが1つだけで、他のバグを見つけるのが難しい場合があります（したがって、新しいDバージョンでは、 int [char []]を試してみると、キーは不変でなければならないと文句を言います）。

コードが新しい単語に対して行う長さをゼロに変更すると、既存のバッファーが再利用される可能性があります。今はそうはならないと確信していますが、当時はそうだったのかもしれません。これにより、ハッシュテーブルの既存のエントリが上書きされる可能性があります。nullに設定すると、実際に新しいバッファが割り当てられます。

結論：それが書かれた時点でその行がないとランダムに機能しないでしょう。

d - この wc の例で文字列が null に設定されているのはなぜですか?

1 に答える 1

Related

Reference