ここでの私の理解には明らかに不完全なものがあります。以下のコードを実行すると、次のことが期待できます。
translateTest:: start
start_korean: (6) c0 af c8 f1 c8 c6
expected_utf8: (6) c7 20 d7 6c d6 c8
found_utf8: (6) c7 20 d7 6c d6 c8
expected utf8 matches found? true
私が得るものは次のとおりです。
translateTest:: start
start_korean: (6) c0 af c8 f1 c8 c6
expected_utf8: (6) c7 20 d7 6c d6 c8
found_utf8: (9) ec 9c a0 ed 9d ac ed 9b 88
expected utf8 matches found? false
文字列を作成し、バイトを x-windows-949 として宣言し、バイトを utf-8 として取得すると、それらが一方から他方に変換されると思います。どうやら、私はこれについて正しくありません。
public class translateTest {
public static void main (String [] Argv) {
(new translateTest()).translate();
}
void translate() {
System.out.println("translateTest:: start");
try {
// pages below linked from http://msdn.microsoft.com/en-US/goglobal/cc305154
// Please ignore the lame bytesToHex helper method. Including it for completeness.
// from http://msdn.microsoft.com/en-US/goglobal/gg696909
//
// 0xC0AF = U+C720 = HANGUL SYLLABLE IEUNG YU
// from http://msdn.microsoft.com/en-US/goglobal/gg696960
//
// 0xC8F1 = U+D76C = HANGUL SYLLABLE HIEUH YI
// also from http://msdn.microsoft.com/en-US/goglobal/gg696960
//
// 0xC8C6 = U+D6C8 = HANGUL SYLLABLE HIEUH U NIEUN
byte[] start_korean = new byte[] { (byte)0xC0, (byte)0xAF, (byte)0xC8, (byte)0xF1, (byte)0xC8, (byte)0xC6 };
byte[] expected_utf8 = new byte[] { (byte)0xC7, (byte)0x20, (byte)0xD7, (byte)0x6C, (byte)0xD6, (byte)0xC8 };
String str = new String(start_korean, "x-windows-949");
byte[] found_utf8 = str.getBytes("utf8");
boolean isEqual = java.util.Arrays.equals(expected_utf8, found_utf8);
System.out.println(" start_korean: "+bytesToHex(start_korean));
System.out.println("expected_utf8: "+bytesToHex(expected_utf8));
System.out.println(" found_utf8: "+bytesToHex(found_utf8));
System.out.println("expected utf8 matches found? "+isEqual);
} catch (java.io.UnsupportedEncodingException uee) {
System.err.println(uee.getMessage());
}
}
public static String bytesToHex(byte[] b) {
StringBuffer str = new StringBuffer("("+b.length+") ");
for (int idx = 0; idx < b.length; idx++) {
str.append(" "+byteToHex(b[idx]));
}
return str.toString();
}
public static String byteToHex(byte b) {
String hex = Integer.toHexString(b);
while (hex.length() < 2) hex = "0"+hex;
if (hex.length() > 2)
hex = hex.substring(hex.length()-2);
return hex;
}
}