c# - Aforge.net のパーセプトロンニューラルネットワークによる OCR の回答が間違っている

Question

C#でAforge.Netを使ってパーセプトロンによるOCRを作ってみました。私は自分のネットワークを 9 つの 30*30 画像をバイナリで学習しました。しかし、結果では、すべて「C」として認識されます。これはコードです：

    private void button1_Click(object sender, EventArgs e)
    {
        AForge.Neuro.ActivationNetwork network = new AForge.Neuro.ActivationNetwork(new AForge.Neuro.BipolarSigmoidFunction(2), 900, 3);
        network.Randomize();
        AForge.Neuro.Learning.PerceptronLearning learning = new AForge.Neuro.Learning.PerceptronLearning(network);
        learning.LearningRate =1 ;
        double[][] input = new double[9][];
        for (int i = 0; i < 9; i++)
        {
            input[i] = new double[900];
        }
   //Reading A images
        for (int i = 1; i <= 3; i++)
        {
            Bitmap a = AForge.Imaging.Image.FromFile(path + "\\a" + i + ".bmp");
            for (int j = 0; j < 30; j++)
                for (int k = 0; k < 30; k++)
                {
                    if (a.GetPixel(j, k).ToKnownColor() == KnownColor.White)
                    {
                        input[i-1][j * 10 + k] = -1;
                    }
                    else
                        input[i-1][j * 10 + k] = 1;
                }
           // showImage(a);

        }
   //Reading B images
        for (int i = 1; i <= 3; i++)
        {
            Bitmap a = AForge.Imaging.Image.FromFile(path + "\\b" + i + ".bmp");
            for (int j = 0; j < 30; j++)
                for (int k = 0; k < 30; k++)
                {
                    if (a.GetPixel(j , k).ToKnownColor() == KnownColor.White)
                    {
                        input[i + 2][j * 10 + k] = -1;
                    }
                    else
                        input[i + 2][j * 10 + k] = 1;
                }
           // showImage(a);

        }
   //Reading C images
        for (int i = 1; i <= 3; i++)
        {
            Bitmap a = AForge.Imaging.Image.FromFile(path + "\\c" + i + ".bmp");
            for (int j = 0; j < 30; j++)
                for (int k = 0; k < 30; k++)
                {
                    if (a.GetPixel(j , k ).ToKnownColor() == KnownColor.White)
                    {
                        input[i + 5][j * 10 + k] = -1;
                    }
                    else
                        input[i + 5][j * 10 + k] = 1;
                }
           // showImage(a);

        }

        bool needToStop = false;
        int iteration = 0;
        while (!needToStop)
        {
            double error = learning.RunEpoch(input, new double[9][] { new double[3] { 1, -1, -1 },new double[3] { 1, -1, -1 },new double[3] { 1, -1, -1 },//A
                new double[3] { -1, 1, -1 },new double[3] { -1, 1, -1 },new double[3] { -1, 1, -1 },//B
                new double[3] { -1, -1, 1 },new double[3] { -1, -1, 1 },new double[3] { -1, -1, 1 } }//C
                    /*new double[9][]{ input[0],input[0],input[0],input[1],input[1],input[1],input[2],input[2],input[2]}*/
                );
            //learning.LearningRate -= learning.LearningRate / 1000;
            if (error == 0)
                break;
            else if (iteration < 1000)
                iteration++;
            else
                needToStop = true;
            System.Diagnostics.Debug.WriteLine("{0} {1}", error, iteration);
        }
        Bitmap b = AForge.Imaging.Image.FromFile(path + "\\b1.bmp");
    //Reading A Sample to test Netwok
        double[] sample = new double[900];
        for (int j = 0; j < 30; j++)
            for (int k = 0; k < 30; k++)
            {
                if (b.GetPixel(j , k ).ToKnownColor() == KnownColor.White)
                {
                    sample[j * 30 + k] = -1;
                }
                else
                    sample[j * 30 + k] = 1;
            }
        foreach (double d in network.Compute(sample))
            System.Diagnostics.Debug.WriteLine(d);//Output is Always C = {-1,-1,1}
    }

なぜそれが間違って答えるのか知りたかったのです。

score 3 · Accepted Answer

最初の 30x30 画像をinput構造体の double[900] 配列にロードする際に、次の計算を使用しています。

for (int j = 0; j < 30; j++)
    for (int k = 0; k < 30; k++)
    {
        if (a.GetPixel(j, k).ToKnownColor() == KnownColor.White)
            input[i-1][j * 10 + k] = -1;
        else
            input[i-1][j * 10 + k] = 1;
    }

あなたのオフセット計算はここで間違っています。j * 10 + kに変更する必要があります。そうしj * 30 + kないと、無効な結果が得られます。後で、テストイメージをロードするときに正しいオフセット計算を使用します。これが、破損したサンプルに対して正しく一致していない理由です。

double[900]同じコードを複数回記述するのではなく、ビットマップを配列にロードしてイメージごとに呼び出すメソッドを記述する必要があります。これは、同じ結果を返すはずの 2 つのコードによって異なる結果が得られる、このような問題を軽減するのに役立ちます。

score 2 · Accepted Answer

私はあなたのコードを試しました。それも私を助けてくれてありがとう。画像からビット配列を取得するためにいくつかの変更を加えることで、コードを機能させることができました。これが私が使用した方法です。

`
        private double[] GetImageData(Bitmap bmp)
        {
        double[] imageData = null;

        //Make the image grayscale
        Grayscale filter = new Grayscale(0.2125, 0.7154, 0.0721);
        bmp = filter.Apply(bmp);

        //Binarize the image
        AForge.Imaging.Filters.Threshold thFilter = new AForge.Imaging.Filters.Threshold(128);
        thFilter.ApplyInPlace(bmp);

        int height = bmp.Height;
        int width = bmp.Width;
        imageData = new double[height * width];
        int imagePointer = 0;
        System.Diagnostics.Debug.WriteLine("Height : " + height);
        System.Diagnostics.Debug.WriteLine("Width  : " + width);

        for (int i = 0; i < height; i++)
        {
            for (int j = 0; j < width; j++)
            {
                System.Diagnostics.Debug.Write(string.Format("({0}  , {1})     Color : {2}\n", i, j, bmp.GetPixel(i, j)));

                //Identify the black points of the image
                if (bmp.GetPixel(i, j) == Color.FromArgb(255, 0,  0, 0))
                {
                    imageData[imagePointer] = 1;
                }
                else
                {
                    imageData[imagePointer] = 0;
                }
                imagePointer++;
            }
            System.Diagnostics.Debug.WriteLine("");
        }
        System.Diagnostics.Debug.WriteLine("Bits  : " + imagePointer );
        return imageData;
    }`

これが役立つことを願っています。ありがとう。

score 0 · Accepted Answer

これを試して

double error = learning.RunEpoch(input, new double[9][] { new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }**,//A
                new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }**,//B
                new double[3] **{ 1, -1, -1 }**,new double[3] **{ -1, 1, -1 }**,new double[3] **{ -1, -1, 1 }** }//C

                );

またはこの方法

double[][] output = new double[patterns][];
            for (int j = 0; j < patterns; j++)
            {
                output[j] = new double[patterns];
                for (int i = 0; i < patterns; i++)
                {
                    if (i != j)
                    {
                        output[j][i] = -1;
                    }
                    else
                    {
                        output[j][i] = 1;
                    }
                }
            }


double error = learning.RunEpoch(input,output)

double[] netout = neuralNet.Compute(pattern);

 int maxIndex = 0;
            double max = netout[0];

            for (int i = 1; i < netout.Length; i++)
            {
                if (netout[i] > max)
                {
                    max = netout[i];
                    maxIndex = i;
                }
            }

maxIndex=0 の場合、答えは A です

maxIndex=1 の場合、答えは B

maxIndex=2 の場合、答えは C です

また、画像からマトリックスを作成し、それをパターンとして使用する必要があると思います。たとえば、20/20 または 15/15 または小さい、30/30 は大きいです。

イメージスキームを取得するために別の方法を使用します。I 画像を 20/20 に分割し、長方形のピクセルの 1 つが黒 (または必要な別の色) の場合、マトリックスに 1 を保存し、それ以外の場合は 0 を保存します。

私はすべてのピクセルを置き換えます。この後、白と黒の2色しかありません。輪郭で操作できます。

private void Cmd_ReplaceColors(ref WriteableBitmap Par_WriteableBitmap,int Par_Limit=180)
        {

            for (int y = 0; y < Par_WriteableBitmap.PixelHeight; y++)
            {
                for (int x = 0; x < Par_WriteableBitmap.PixelWidth; x++)
                {

                    Color color = Par_WriteableBitmap.GetPixel(x, y);

                    if (color == Colors.White)
                    {

                    }
                    else
                    {
                        if (color.R < Par_Limit)
                        {
                            Par_WriteableBitmap.SetPixel(x, y, Colors.Black);
                        }
                        else
                        {
                            Par_WriteableBitmap.SetPixel(x, y, Colors.White);
                        }

                    }

                }
            }

            Par_WriteableBitmap.Invalidate();
        }

私の意見では、1000回の反復は小さく、100 000の方が優れています:)

c# - Aforge.net のパーセプトロン ニューラル ネットワークによる OCR の回答が間違っている

3 に答える 3

Related

Reference

c# - Aforge.net のパーセプトロンニューラルネットワークによる OCR の回答が間違っている