3

私はこれに数時間取り組んできました。ポーランド語の発音区別符号ąśółńźćなどを含む文字列をファイルに保存していますが、そのファイルを読み取るために使用する必要のあるソフトウェアは、かなり古いエンコーディングであり、MicrosoftEncodingクラスでサポートされていないMazoviaエンコーディングでのみ読み取ります。

.Net文字列はUTF-16文字で構成されているため、このコードを使用してUnicodeからMazoviaに変換しています。

string rekord = (linia.Substring(0, linia.Length - 1)) + Environment.NewLine;
string rekordMazovia = Kodowanie.UnicodeNaMazovia(rekord);
File.AppendAllText(sciezka, rekordMazovia);
public static class Kodowanie {
  public static string UnicodeNaMazovia(string tekst) {
    return tekst
    .Replace((char)0x104, (char)0x8F) //Ą
    .Replace((char)0x106, (char)0x95) //Ć
    .Replace((char)0x118, (char)0x90) //Ę
    .Replace((char)0x141, (char)0x9C) //Ł
    .Replace((char)0x143, (char)0xA5) //Ń
    .Replace((char)0xD3, (char)0xA3) //Ó
    .Replace((char)0x15A, (char)0x98) //Ś
    .Replace((char)0x179, (char)0xA0) //Ź
    .Replace((char)0x17B, (char)0xA1) //Ż
    .Replace((char)0x105, (char)0x86) //ą
    .Replace((char)0x107, (char)0x8D) //ć
    .Replace((char)0x119, (char)0x91) //ę
    .Replace((char)0x142, (char)0x92) //ł
    .Replace((char)0x144, (char)0xA4) //ń
    .Replace((char)0xF3, (char)0xA2) //ó
    .Replace((char)0x15B, (char)0x9E) //ś
    .Replace((char)0x17A, (char)0xA6) //ź
    .Replace((char)0x17C, (char)0xA7); //ż            
  }
}

>アプリケーションで生成されたファイルを読み取った後、発音区別符号の前に1つの冗長文字を取得する以外はすべて問題ありません。こんな感じhttp://imgur.com/q7DZo

それを取り除く方法は?それをより良くする方法は?

4

1 に答える 1

10

Mazovia エンコーディングはコード ページ 437 に似ていますが、一部の位置で文字が異なるため、437 を使用できません。

MazoviaEncoding を実装すると、簡単に使用できます

Encoding encoding = new MazoviaEncoding();
String output = "ąśółńźć";
File.WriteAllText(@"test.txt", output, encoding);
//File.AppendAllText(@"test.txt", output, encoding);
// will work just as well, just pass the encoding as 3rd parameter

ファイルには次が含まれます。

0x86 0x9E 0xA2 0x92 0xA4 0xA6 0x8D

http://en.wikipedia.org/wiki/Mazovia_encodingによるとどちらが正しいですか

この実装はEncoding、C# で他のように使用できます。たとえば、ファイルを読み戻すことも同様に機能します。

Encoding encoding = new MazoviaEncoding();
String result = File.ReadAllText(@"test.txt", encoding);

これが私の実装です:

using System.Collections.Generic;
using System.Text;

namespace System.Text {
    class MazoviaEncoding : Encoding
    {
        private static int[] codePoints =  {
            0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
            ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
            ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
            ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
            ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
            ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
            ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
            ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
            ,0x00C7,0x00FC,0x00E9,0x00E2,0x00E4,0x00E0,0x0105,0x00E7,0x00EA,0x00EB,0x00E8,0x00EF,0x00EE,0x0107,0x00C4,0x0104
            ,0x0118,0x0119,0x0142,0x00F4,0x00F6,0x0106,0x00FB,0x00F9,0x015A,0x00D6,0x00DC,0x00A2,0x0141,0x00A5,0x015B,0x0192
            ,0x0179,0x017B,0x00F3,0x00D3,0x0144,0x0143,0x017A,0x017C,0x00BF,0x2310,0x00AC,0x00BD,0x00BC,0x00A1,0x00AB,0x00BB
            ,0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,0x2555,0x2563,0x2551,0x2557,0x255D,0x255C,0x255B,0x2510
            ,0x2514,0x2534,0x252C,0x251C,0x2500,0x253C,0x255E,0x255F,0x255A,0x2554,0x2569,0x2566,0x2560,0x2550,0x256C,0x2567
            ,0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256B,0x256A,0x2518,0x250C,0x2588,0x2584,0x258C,0x2590,0x2580
            ,0x03B1,0x00DF,0x0393,0x03C0,0x03A3,0x03C3,0x00B5,0x03C4,0x03A6,0x0398,0x03A9,0x03B4,0x221E,0x03C6,0x03B5,0x2229
            ,0x2261,0x00B1,0x2265,0x2264,0x2320,0x2321,0x00F7,0x2248,0x00B0,0x2219,0x00B7,0x221A,0x207F,0x00B2,0x25A0,0x00A0
        };

        private static Dictionary<char, byte> unicodeToByte;


        static MazoviaEncoding()
        {
            unicodeToByte = new Dictionary<char, byte>();

            for (int i = 0; i < codePoints.Length; ++i)
            {
                unicodeToByte.Add((char)codePoints[i], (byte)i);
            }

        }



        public override int GetMaxByteCount(int charCount)
        {
            if (charCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return charCount;
        }

        public override int GetMaxCharCount(int byteCount)
        {
            if (byteCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return byteCount;
        }

        public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
        {
            if( chars == null || bytes == null ) {
                throw new ArgumentNullException();
            }
            if( charIndex + charCount > chars.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                byteIndex + charCount > bytes.Length
                ) {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = charIndex; i < charIndex + charCount; ++i)
            {
                char cur = chars[i];
                byte asMazovia;
                if (!unicodeToByte.TryGetValue(cur, out asMazovia))
                {

                    asMazovia = (byte)0x003F; // "?"
                }
                total++;
                bytes[j+byteIndex] = asMazovia;
                j++;
            }
            return total;
        }

        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )
        {
            if (chars == null || bytes == null)
            {
                throw new ArgumentNullException();
            }
            if ( byteIndex + byteCount > bytes.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                charIndex + byteCount > chars.Length
                )
            {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = byteIndex; i < byteIndex + byteCount; ++i)
            {
                byte cur = bytes[i];
                char decoded = (char)codePoints[cur];
                total++;
                chars[charIndex + j] = decoded;
                j++;

            }
            return total;
        }

        public override int GetByteCount(char[] charArray, int index, int count)
        {
            if (charArray == null)
            {
                throw new ArgumentNullException();
            }

            if (index + count <= charArray.Length && index >= 0 && count >= 0)
            {
                return count;
            }
            else
            {

                throw new ArgumentOutOfRangeException();
            }
        }

        public override int GetCharCount( byte[] bytes, int index, int count )
        {
            if (bytes == null)
            {
                throw new ArgumentNullException();
            }

            if (index < 0 || count < 0 || index + count > bytes.Length)
            {
                throw new ArgumentOutOfRangeException();
            }

            return count;
        }



    }
}
于 2012-12-01T09:51:22.997 に答える