JAVA学习网首页 把生活查询网加入收藏 把JAVA学习网设为首页 联系方式
    Hi,JAVA学习
JAVA基础 设计模式 数据库 JavaBeans J2EE JavaDelphi 用户界面 综合文章  
Java输入输出过程中的字节流到字符流的转换
时间:15/08/2007
作者:未知
来源:网络
小提示点这里把文章加入您的收藏夹,方便下次查看
设置文章字体大小:[ ]

/*
 * Copyright (c) 2002-2003 Che, Dong Email: chedongATbigfoot.com/chedongATchedong.com
 * $Id: HelloUnicode.java,v 1.3 2003/03/09 08:41:46 chedong Exp $
 */

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;

/**
 * 目的:
 *     测试不同字符编码解码方式对多字节编码(中文)处理
 *     的影响 输入:
 *     可以从命令行输入测试字符串 输出:
 *     测试1 按照不同解码方式处理字符串,
 *     并按不同编码方式写入文件
 *     测试2 按照不同解码方式从文件中将字符串读出
 * @author Che, Dong
 */  

class HelloUnicode {
    /**
     * main entrance
     * @param args command line arguments
     */
    public static void main(String[] args) {
        String hello = "Hello world 世界你好";

        //read from command line input
        if (args.length > 0) {
            hello = args[0];
        }

        try {
            /*
             * 试验1: 从测试字符串按系统缺省编码方式解码,并写入文件
             */
            System.out.println(">>>>testing1: write hello world to files<<<<");
            System.out.println("[test 1-1]: with system default encoding="
                + System.getProperty("file.encoding") + "\nstring=" + hello
                + "\tlength=" + hello.length());
            printCharArray(hello);
            writeFile("hello.orig.html", hello);

            //把字符串按GB2312解码
            hello = new String(hello.getBytes(), "GB2312");
            System.out.println(
                "[test 1-2]: getBytes with platform default encoding and decoding as gb2312:\nstring="
                + hello + "\tlength=" + hello.length());
            writeFile("hello.gb2312.html", hello);
            printCharArray(hello);

            //把字符串按UTF8解码成字节流,并打印相应的字节
            hello = new String(hello.getBytes("UTF8"));
            System.out.println("[test 1-3]: convert string to UTF8\nstring="
                + hello + "\tlength=" + hello.length());
            writeFile("hello.utf8.html", hello);
            printCharArray(hello);

            /*
             * 试验2: 从试验1的输出文件中读取,并按照不同方式解码
             */
            System.out.println(
                ">>>>testing2: reading and decoding from files<<<<");

            //first file: encoding with system default
            hello = readFile("hello.orig.html");
            System.out.println(
                "[test 2-1]: read hello.orig.html: decoding with system default encoding\nstring="
                + hello + "\tlength=" + hello.length());
            printCharArray(hello);

            //second file: decoding from GBK
            hello = readFile("hello.gb2312.html");
          //  hello = new String(hello.getBytes(), "GB2312");
            System.out.println(
                "[test 2-2]: read hello.gb2312.html: decoding as GB2312\nstring="
                + hello + "\tlength=" + hello.length());
            printCharArray(hello);

            //third file: decoding from UTF8
            hello = readFile("hello.utf8.html");
            hello = new String(hello.getBytes(), "UTF8");
            System.out.println(
                "[test 2-3]: read hello.utf8.html: decoding as UTF8\nstring="
                + hello + "\tlength=" + hello.length());
            printCharArray(hello);
        } catch (Exception e) {
            System.out.println(e.toString());
        }
    }

    /**
     * print char array
     * @param inStr input string
     */
    public static void printCharArray(String inStr) {
        char[] myBuffer = inStr.toCharArray();

        //list each Charactor in byte value, short value, and UnicodeBlock Mapping
        for (int i = 0; i < inStr.length(); i++) {
            byte b = (byte) myBuffer[i];
            short s = (short) myBuffer[i];
            String hexB = Integer.toHexString(b).toUpperCase();
            String hexS = Integer.toHexString(s).toUpperCase();
            StringBuffer sb = new StringBuffer();

            //print char
            sb.append("char[");
            sb.append(i);
            sb.append("]='");
            sb.append(myBuffer[i]);
            sb.append("'\t");

            //byte value
            sb.append("byte=");
            sb.append(b);
            sb.append(" \\u");
            sb.append(hexB);
            sb.append('\t');

            //short value
            sb.append("short=");
            sb.append(s);
            sb.append(" \\u");
            sb.append(hexS);
            sb.append('\t');

            //Unicode Block
            sb.append(Character.UnicodeBlock.of(myBuffer[i]));

            System.out.println(sb.toString());
        }

        System.out.println();
    }

    /**
     * write content to output file
     * @param fileName output file name
     * @param content  file content to write
     */
    private static void writeFile(String fileName, String content) {
        try {
            File tmpFile = new File(fileName);

            if (tmpFile.exists()) {
                tmpFile.delete();
            }

            FileWriter fw = new FileWriter(fileName, true);
            fw.write(content);
            fw.close();
        } catch (Exception e) {
            System.out.println(e.toString());
        }
    }

    /**
     * read content from input file
     * @param fileName input file name
     * @return String file content
     */
    private static String readFile(String fileName) {
        try {
            BufferedReader fr = new BufferedReader(new FileReader(fileName));
            StringBuffer out = new StringBuffer();
            String thisLine = new String();

            while (thisLine != null) {
                thisLine = fr.readLine();

                if (thisLine != null) {
                    out.append(thisLine);
                }
            }

            fr.close();

            return out.toString();
        } catch (Exception e) {
            System.out.print(e.toString());
            return null;
        }
    }
}

运行结果:

C:\ja>java    HelloUnicode
>>>>testing1: write hello world to files<<<<
[test 1-1]: with system default encoding=GBK
string=Hello world 世界你好     length=16
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='世'   byte=22 \u16    short=19990 \u4E16      CJK_UNIFIED_IDEOGRAPHS
char[13]='界'   byte=76 \u4C    short=30028 \u754C      CJK_UNIFIED_IDEOGRAPHS
char[14]='你'   byte=96 \u60    short=20320 \u4F60      CJK_UNIFIED_IDEOGRAPHS
char[15]='好'   byte=125 \u7D   short=22909 \u597D      CJK_UNIFIED_IDEOGRAPHS

[test 1-2]: getBytes with platform default encoding and decoding as gb2312:
string=Hello world 世界你好     length=16
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='世'   byte=22 \u16    short=19990 \u4E16      CJK_UNIFIED_IDEOGRAPHS
char[13]='界'   byte=76 \u4C    short=30028 \u754C      CJK_UNIFIED_IDEOGRAPHS
char[14]='你'   byte=96 \u60    short=20320 \u4F60      CJK_UNIFIED_IDEOGRAPHS
char[15]='好'   byte=125 \u7D   short=22909 \u597D      CJK_UNIFIED_IDEOGRAPHS

[test 1-3]: convert string to UTF8
string=Hello world 涓栫晫浣犲ソ length=18
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='涓'   byte=-109 \uFFFFFF93    short=28051 \u6D93      CJK_UNIFIED_IDEO
GRAPHS
char[13]='栫'   byte=43 \u2B    short=26667 \u682B      CJK_UNIFIED_IDEOGRAPHS
char[14]='晫'   byte=107 \u6B   short=26219 \u666B      CJK_UNIFIED_IDEOGRAPHS
char[15]='浣'   byte=99 \u63    short=28003 \u6D63      CJK_UNIFIED_IDEOGRAPHS
char[16]='犲'   byte=-78 \uFFFFFFB2     short=29362 \u72B2      CJK_UNIFIED_IDEO
GRAPHS
char[17]='ソ'   byte=-67 \uFFFFFFBD     short=12477 \u30BD      KATAKANA

>>>>testing2: reading and decoding from files<<<<
[test 2-1]: read hello.orig.html: decoding with system default encoding
string=Hello world 世界你好     length=16
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='世'   byte=22 \u16    short=19990 \u4E16      CJK_UNIFIED_IDEOGRAPHS
char[13]='界'   byte=76 \u4C    short=30028 \u754C      CJK_UNIFIED_IDEOGRAPHS
char[14]='你'   byte=96 \u60    short=20320 \u4F60      CJK_UNIFIED_IDEOGRAPHS
char[15]='好'   byte=125 \u7D   short=22909 \u597D      CJK_UNIFIED_IDEOGRAPHS

[test 2-2]: read hello.gb2312.html: decoding as GB2312
string=Hello world 世界你好     length=16
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='世'   byte=22 \u16    short=19990 \u4E16      CJK_UNIFIED_IDEOGRAPHS
char[13]='界'   byte=76 \u4C    short=30028 \u754C      CJK_UNIFIED_IDEOGRAPHS
char[14]='你'   byte=96 \u60    short=20320 \u4F60      CJK_UNIFIED_IDEOGRAPHS
char[15]='好'   byte=125 \u7D   short=22909 \u597D      CJK_UNIFIED_IDEOGRAPHS

[test 2-3]: read hello.utf8.html: decoding as UTF8
string=Hello world 世界你好     length=16
char[0]='H'     byte=72 \u48    short=72 \u48   BASIC_LATIN
char[1]='e'     byte=101 \u65   short=101 \u65  BASIC_LATIN
char[2]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[3]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[4]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[5]=' '     byte=32 \u20    short=32 \u20   BASIC_LATIN
char[6]='w'     byte=119 \u77   short=119 \u77  BASIC_LATIN
char[7]='o'     byte=111 \u6F   short=111 \u6F  BASIC_LATIN
char[8]='r'     byte=114 \u72   short=114 \u72  BASIC_LATIN
char[9]='l'     byte=108 \u6C   short=108 \u6C  BASIC_LATIN
char[10]='d'    byte=100 \u64   short=100 \u64  BASIC_LATIN
char[11]=' '    byte=32 \u20    short=32 \u20   BASIC_LATIN
char[12]='世'   byte=22 \u16    short=19990 \u4E16      CJK_UNIFIED_IDEOGRAPHS
char[13]='界'   byte=76 \u4C    short=30028 \u754C      CJK_UNIFIED_IDEOGRAPHS
char[14]='你'   byte=96 \u60    short=20320 \u4F60      CJK_UNIFIED_IDEOGRAPHS
char[15]='好'   byte=125 \u7D   short=22909 \u597D      CJK_UNIFIED_IDEOGRAPHS

上一篇:自定义事件与监听器

下一篇:整型int和字节数组byte相互转换

  • struts-helloapp 学习小记
  • JSP连接SQL Server 2000系统配置
  • 用装饰者(Decorator)模式添加功能
  • minij2ee常见问题(FAQ)
  • JDK1.5中新的语言特征
  • Javamail中的常见中文乱码问题与解决办法(综合)
  • J2ME技术在手机开发领域的优势和不足
  • JSF - Request Processing Lifecycle
  • 基于容器的用户安全管理系统和JMS(6)
  • 随机数类Random
  • JAVA与正则表达式(2年级之1)
  • 一个MIDP俄罗斯方块游戏的设计和实现
  • Design Patterns 线路图
  • javac - Java 编程语言编译器的使用文档
  • 移动设备最优化(直逼网络j2me的应用性能和开发时间)一
  • 基于 J2EE 的企业应用系统 - 工具一览表
  • Apache下 配置WAP + OTA方法
  • Jbuilder快捷键
  • 返回】 【顶部】 【关闭
    Copyright © 2005-2010 www.594k.com All Rights Reserved.
    版权所有:JAVA学习网 备案序号:皖ICP备06004238号