Java判断中文字符

package com.jsoft.test;

import java.util.regex.Pattern;

/**

 * 判断中文字符

 *

 * @author jim

 * @date 2017-12-22

 */

public class ChineseHelper {

    public static void main(String[] args) {

        // 纯英文

        String s1 = "Hello,Tom.!@#$%^&*()_+-={}|[];':\"?";

        // 纯中文（不含中文标点）

        String s2 = "你好中国";

        // 纯中文（含中文标点）

        String s3 = "你好，中国。《》：“”‘'；（）【】！￥、";

        // 韩文

        String s4 = "한국어난";

        // 日文

        String s5 = "ぎじゅつ";

        // 特殊字符

        String s6 = "��";

        String s7 = "╃";

        String s8 = "╂";

        // 繁体中文

        String s9 = "蒼老師";

        // 1 使用字符范围判断

        System.out.println("s1是否包含中文：" + hasChineseByRange(s1));// false

        System.out.println("s2是否包含中文：" + hasChineseByRange(s2));// true

        System.out.println("s3是否包含中文：" + hasChineseByRange(s3));// true

        System.out.println("s4是否包含中文：" + hasChineseByRange(s4));// false

        System.out.println("s5是否包含中文：" + hasChineseByRange(s5));// false

        System.out.println("s6是否包含中文：" + hasChineseByRange(s6));// false

        System.out.println("s7是否包含中文：" + hasChineseByRange(s7));// false

        System.out.println("s8是否包含中文：" + hasChineseByRange(s8));// false

        System.out.println("s9是否包含中文：" + hasChineseByRange(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChineseByRange(s1));// false

        System.out.println("s2是否全是中文：" + isChineseByRange(s2));// true

        System.out.println("s3是否全是中文：" + isChineseByRange(s3));// false 中文标点不在范围内

        System.out.println("s4是否全是中文：" + isChineseByRange(s4));// false

        System.out.println("s5是否全是中文：" + isChineseByRange(s5));// false

        System.out.println("s6是否全是中文：" + isChineseByRange(s6));// false

        System.out.println("s7是否全是中文：" + isChineseByRange(s7));// false

        System.out.println("s8是否全是中文：" + isChineseByRange(s8));// false

        System.out.println("s9是否全是中文：" + isChineseByRange(s9));// true

        System.out.println("-------分割线-------");

        // 2 使用字符范围正则判断（结果同1）

        System.out.println("s1是否包含中文：" + hasChineseByReg(s1));// false

        System.out.println("s2是否包含中文：" + hasChineseByReg(s2));// true

        System.out.println("s3是否包含中文：" + hasChineseByReg(s3));// true

        System.out.println("s4是否包含中文：" + hasChineseByReg(s4));// false

        System.out.println("s5是否包含中文：" + hasChineseByReg(s5));// false

        System.out.println("s6是否包含中文：" + hasChineseByReg(s6));// false

        System.out.println("s7是否包含中文：" + hasChineseByReg(s7));// false

        System.out.println("s8是否包含中文：" + hasChineseByReg(s8));// false

        System.out.println("s9是否包含中文：" + hasChineseByReg(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChineseByReg(s1));// false

        System.out.println("s2是否全是中文：" + isChineseByReg(s2));// true

        System.out.println("s3是否全是中文：" + isChineseByReg(s3));// false 中文标点不在范围内

        System.out.println("s4是否全是中文：" + isChineseByReg(s4));// false

        System.out.println("s5是否全是中文：" + isChineseByReg(s5));// false

        System.out.println("s6是否全是中文：" + isChineseByReg(s6));// false

        System.out.println("s7是否全是中文：" + isChineseByReg(s7));// false

        System.out.println("s8是否全是中文：" + isChineseByReg(s8));// false

        System.out.println("s9是否全是中文：" + isChineseByReg(s9));// true

        System.out.println("-------分割线-------");

        // 3 使用CJK字符集判断

        System.out.println("s1是否包含中文：" + hasChinese(s1));// false

        System.out.println("s2是否包含中文：" + hasChinese(s2));// true

        System.out.println("s3是否包含中文：" + hasChinese(s3));// true

        System.out.println("s4是否包含中文：" + hasChinese(s4));// false

        System.out.println("s5是否包含中文：" + hasChinese(s5));// false

        System.out.println("s6是否包含中文：" + hasChinese(s6));// false

        System.out.println("s7是否包含中文：" + hasChinese(s7));// false

        System.out.println("s8是否包含中文：" + hasChinese(s8));// false

        System.out.println("s9是否包含中文：" + hasChinese(s9));// true

        System.out.println("-------分割线-------");

        System.out.println("s1是否全是中文：" + isChinese(s1));// false

        System.out.println("s2是否全是中文：" + isChinese(s2));// true

        System.out.println("s3是否全是中文：" + isChinese(s3));// true 中文标点也被包含进来

        System.out.println("s4是否全是中文：" + isChinese(s4));// false

        System.out.println("s5是否全是中文：" + isChinese(s5));// false

        System.out.println("s6是否全是中文：" + isChinese(s6));// false

        System.out.println("s7是否全是中文：" + isChinese(s7));// false

        System.out.println("s8是否全是中文：" + isChinese(s8));// false

        System.out.println("s9是否全是中文：" + isChinese(s9));// true

    }

    /**

     * 是否包含中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChinese(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (isChinese(c)) {

                return true;

            }

        }

        return false;

    }

    /**

     * 是否全是中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param str

     * @return

     */

    public static boolean isChinese(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (!isChinese(c)) {

                return false;

            }

        }

        return true;

    }

    /**

     * 是否是中文字符<br>

     * 包含中文标点符号<br>

     *

     * @param c

     * @return

     */

    private static boolean isChinese(char c) {

        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);

        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) {

            return true;

        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) {

            return true;

        } else if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {

            return true;

        } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {

            return true;

        }

        return false;

    }

    /**

     * 是否包含汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChineseByReg(String str) {

        if (str == null) {

            return false;

        }

        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");

        return pattern.matcher(str).find();

    }

    /**

     * 是否全是汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean isChineseByReg(String str) {

        if (str == null) {

            return false;

        }

        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");

        return pattern.matcher(str).matches();

    }

    /**

     * 是否包含汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean hasChineseByRange(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (c >= 0x4E00 && c <= 0x9FBF) {

                return true;

            }

        }

        return false;

    }

    /**

     * 是否全是汉字<br>

     * 根据汉字编码范围进行判断<br>

     * CJK统一汉字（不包含中文的，。《》（）“‘'”、！￥等符号）<br>

     *

     * @param str

     * @return

     */

    public static boolean isChineseByRange(String str) {

        if (str == null) {

            return false;

        }

        char[] ch = str.toCharArray();

        for (char c : ch) {

            if (c < 0x4E00 || c > 0x9FBF) {

                return false;

            }

        }

        return true;

    }

}

如果仅仅去判断是否是中文，不需判断中文标点的话，推荐使用正则去匹配，可能更高效点。

还有另外一种投机取巧的方法：转int类型，然后try...catch

参考：

http://www.jb51.net/article/79101.htm（以上内容转自此篇文章）

http://blog.csdn.net/h082602/article/details/73251446

http://blog.csdn.net/u011240877/article/details/49907751

http://blog.csdn.net/l1028386804/article/details/43764073

http://blog.csdn.net/qwkxq/article/details/53508736

https://www.cnblogs.com/jinc/archive/2013/02/26/2933766.html

Java判断中文字符的更多相关文章

Java 判断中文字符
Java判断一个字符串中是否有中文字符有两种方法,但是原理都一样,就是通过Unicode编码来判断,因为中文在Unicode中的编码区间为:0x4e00--0x9fa5 第一种: String chi ...
Java判断一个字符是否是数字的几种方法的代码
在工作期间,将写内容过程经常用到的一些内容段做个记录,下面内容是关于Java判断一个字符是否是数字的几种方法的内容,希望能对码农们有好处. public class Test{ public stat ...
JAVA的中文字符乱码问题
来源:http://luzefengoo.blog.163.com/blog/static/1403593882012754428536/ JAVA的中文字符乱码问题一直很让人头疼.特别是在WEB应用 ...
Java 完美判断中文字符
Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...
Java 完美判断中文字符的方法
Java判断一个字符串是否有中文一般情况是利用Unicode编码(CJK统一汉字的编码区间:0x4e00–0x9fbb)的正则来做判断,但是其实这个区间来判断中文不是非常精确,因为有些中文的标点符号比 ...
C# 判断中文字符（字符串）
在unicode 字符串中,中文的范围是在4E00..9FFF:CJK Unified Ideographs.通过对字符的unicode编码进行判断来确定字符是否为中文.protected bool ...
python利用utf-8编码判断中文字符
下面这个小工具包含了判断unicode是否是汉字,数字,英文,或者其他字符. 全角符号转半角符号. unicode字符串归一化等工作. 还有一个能处理多音字的汉字转拼音的程序,还在整理中. #!/u ...
MySQL判断中文字符的方法（转）
准备: 2.1.环境 MySQL mysql> SHOW VARIABLES LIKE "%version%"; +-------------------------+--- ...
java 获取中文字符的首字母
原理: GB2312编码中的中文是按照拼音排序的注意: 一些生僻的字无法获得正确的首字母,原因是这些字都是后加入的. import java.io.UnsupportedEncodingExcept ...

随机推荐

linux中帮助参数 man whatis which info区别？
在linux终端,面对命令不知道怎么用,或不记得命令的拼写及参数时,我们需要求助于系统的帮助文档: linux系统内置的帮助文档很详细,通常能解决我们的问题,我们需要掌握如何正确的去使用它们: 在只记 ...
第五章 Internet协议
写在开头: 埋头学习也差不多半个月了,达到了这半个月每天都会去图书馆的目标.确实挺忙的,不管在学习上,部门社团上,党建上.有时候为了多学一些总是会挤掉了其他事情的一些时间.但是自己时刻提醒着自己不要太 ...
J2EE的十三个技术——EJB之概述
含义: 企业级的JavaBeans(Enterprise JavaBean),其设计目标是部署分布式应用程序. EJB是J2EE的一部分,称为Java企业Bean,它把使用Java开发的服务器组件的部 ...
虚机中访问外网；NAT中的POSTROUTING是怎么搞的？
看下docker中是怎么配置的网络在虚机中访问外网:设定了qemu,在主机上添加路由:sudo iptables -t nat -I POSTROUTING -s 192.168.1.110 -j ...
解决IDEA2018.1.5或者Android Studio 3.0版本的输入法不跟随光标问题
问题1:IDEA2018.1.5版本的输入法不跟随光标解决办法1:修改JDK版本,步骤如下: 1. 使用快捷键ctrl+shift+A,在输入框中输入Switch Boot JDK,如图所示 2.替 ...
冒泡排序 [组合数学+dp]
题面思路一眼看过去以为NOI2018的题出出来了= =贼吓人首先,对于这个难度,我们有一个比较明显的结论: 一个序列的难度,等于这个东西: $hard=max(\sum_{j=i+1}^n[a_ ...
ZOJ 1081 Points Within | 判断点在多边形内
题目: 给个n个点的多边形,n个点按顺序给出,给个点m,判断m在不在多边形内部题解: 网上有两种方法,这里写一种:射线法大体的思想是:以这个点为端点,做一条平行与x轴的射线(代码中射线指向x轴正方 ...
hdu 2993 斜率dp
思路:直接通过斜率优化进行求解. #include<iostream> #include<cstdio> #include<algorithm> #include& ...
关于PDA、GPS等动态资源的几种GIS解决方案
关于PDA.GPS等动态资源的几种GIS解决方案(原创) 今年来GIS发展迅速,特别是实时监控中引入了GPS,PDA等动态资源,使得GIS在各个行业的应用更为广泛. 1.在这些动态资源资源的监控中主要 ...
在GitHub多个帐号上添加SSH公钥
GitHub后台可以添加多个SSH Keys,但是同一个SSH Keys只能在添加在一个帐号上(添加时提示“Key is already in use”).理由很容易想到,SSH公钥使用时相当于用户名 ...

Java判断中文字符

Java判断中文字符的更多相关文章

随机推荐

热门专题