java 清除 bom
package cn.com.do1.component.common.util;import java.io.*;import java.nio.charset.Charset;public class Utf8BomRemover {/*** 读取流中前面的字符,看是否有bom,如果有bom,将bom头先读掉丢弃** @param in* @return* @throws java.io.IOException*/public static InputStream getInputStream(InputStream in) throws IOException {PushbackInputStream testin = new PushbackInputStream(in);int ch = testin.read();if (ch != 0xEF) {testin.unread(ch);} else if ((ch = testin.read()) != 0xBB) {testin.unread(ch);testin.unread(0xef);} else if ((ch = testin.read()) != 0xBF) {throw new IOException("错误的UTF-8格式文件");} else {// 不需要做,这里是bom头被读完了// // System.out.println("still exist bom");}return testin;}/*** 根据一个文件名,读取完文件,干掉bom头。** @param fileName* @throws java.io.IOException*/public static void trimBom(String fileName) throws IOException {FileInputStream fin = new FileInputStream(fileName);// 开始写临时文件InputStream in = getInputStream(fin);ByteArrayOutputStream bos = new ByteArrayOutputStream();byte b[] = new byte[4096];int len = 0;while (in.available() > 0) {len = in.read(b, 0, 4096);// out.write(b, 0, len);bos.write(b, 0, len);}in.close();fin.close();bos.close();// 临时文件写完,开始将临时文件写回本文件。System.out.println("[" + fileName + "]");FileOutputStream out = new FileOutputStream(fileName);out.write(bos.toByteArray());out.close();System.out.println("处理文件" + fileName);}public static void main(String[] args) throws IOException {//刪除指定文件夾下(含子文件夾)所有java文件的BOM,若構造器中參數為null則刪除所有文件頭部BOMnew Utf8BomRemover( "java" ).start( "\"F:\\\\flwork\\\\gmmsDGYH\\\\src\\\\com");}/*** 根据一个文件名,读取完文件,干掉bom头2 这里使用了第三方类UnicodeReader** @throws java.io.IOException*/public void saveFile(String file) throws IOException {InputStream in= new FileInputStream( file);BufferedReader bre = null;OutputStreamWriter pw = null;//定义一个流CharArrayWriter writer = new CharArrayWriter();//这一句会读取BOM头//bre = new BufferedReader(new InputStreamReader(in, "UTF-8"));//这一句会干掉BOM头bre = new BufferedReader(new UnicodeReader(in, Charset.defaultCharset().name()));//String line = bre.readLine();while(line != null){writer.write(line);/*加上这段代码可以查看更详细的16进制byte[] allbytes = line.getBytes("UTF-8");for (int i=0; i < allbytes.length; i++){int tmp = allbytes[i];String hexString = Integer.toHexString(tmp);// 1个byte变成16进制的,只需要2位就可以表示了,取后面两位,去掉前面的符号填充hexString = hexString.substring(hexString.length() -2);System.out.print(hexString.toUpperCase());System.out.print(" ");}*/line = bre.readLine();}writer.flush();bre.close();FileWriter f2 = new FileWriter(file);writer.writeTo(f2);f2.close();writer.close();}private String extension = null ;public Utf8BomRemover(String extension) {super ();this .extension = extension;}/** 啟動對某個文件夾的篩選 */@SuppressWarnings ( "unchecked" )public void start(String rootDir) throws IOException {traverseFolder2(rootDir);}public void traverseFolder2(String path) throws IOException {File file = new File(path);if (file.exists()) {File[] files = file.listFiles();if (files.length == 0) {System.out.println("文件夹是空的!");return;} else {for (File file2 : files) {if (file2.isDirectory()) {System.out.println("文件夹:" + file2.getAbsolutePath());traverseFolder2(file2.getAbsolutePath());} else {remove(file2.getAbsolutePath());}}}} else {System.out.println("文件不存在!");}}/** 移除UTF-8的BOM */private void remove(String path) throws IOException {saveFile(path);trimBom(path);}}
package cn.com.do1.component.common.util;import java.io.IOException;import java.io.InputStream;import java.io.InputStreamReader;import java.io.PushbackInputStream;import java.io.Reader;/**version: 1.1 / 2007-01-25- changed BOM recognition ordering (longer boms first)网络地址:http://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txtOriginal pseudocode : Thomas WeidenfellerImplementation tweaked: Aki Nieminenhttp://www.unicode.org/unicode/faq/utf_bom.htmlBOMs:00 00 FE FF = UTF-32, big-endianFF FE 00 00 = UTF-32, little-endianEF BB BF = UTF-8,FE FF = UTF-16, big-endianFF FE = UTF-16, little-endianWin2k Notepad:Unicode format = UTF-16LE***//*** Generic unicode textreader, which will use BOM mark* to identify the encoding to be used. If BOM is not found* then use a given default or system encoding.*/public class UnicodeReader extends Reader {PushbackInputStream internalIn;InputStreamReader internalIn2 = null;String defaultEnc;private static final int BOM_SIZE = 4;/**** @param in inputstream to be read* @param defaultEnc default encoding if stream does not have* BOM marker. Give NULL to use system-level default.*/UnicodeReader(InputStream in, String defaultEnc) {internalIn = new PushbackInputStream(in, BOM_SIZE);this.defaultEnc = defaultEnc;}public String getDefaultEncoding() {return defaultEnc;}/*** Get stream encoding or NULL if stream is uninitialized.* Call init() or read() method to initialize it.*/public String getEncoding() {if (internalIn2 == null) return null;return internalIn2.getEncoding();}/*** Read-ahead four bytes and check for BOM marks. Extra bytes are* unread back to the stream, only BOM bytes are skipped.*/protected void init() throws IOException {if (internalIn2 != null) return;String encoding;byte bom[] = new byte[BOM_SIZE];int n, unread;n = internalIn.read(bom, 0, bom.length);if ( (bom[0] == (byte)0x00) && (bom[1] == (byte)0x00) &&(bom[2] == (byte)0xFE) && (bom[3] == (byte)0xFF) ) {encoding = "UTF-32BE";unread = n - 4;} else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) &&(bom[2] == (byte)0x00) && (bom[3] == (byte)0x00) ) {encoding = "UTF-32LE";unread = n - 4;} else if ( (bom[0] == (byte)0xEF) && (bom[1] == (byte)0xBB) &&(bom[2] == (byte)0xBF) ) {encoding = "UTF-8";unread = n - 3;} else if ( (bom[0] == (byte)0xFE) && (bom[1] == (byte)0xFF) ) {encoding = "UTF-16BE";unread = n - 2;} else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) ) {encoding = "UTF-16LE";unread = n - 2;} else {// Unicode BOM mark not found, unread all bytesencoding = defaultEnc;unread = n;}//System.out.println("read=" + n + ", unread=" + unread);if (unread > 0) internalIn.unread(bom, (n - unread), unread);// Use given encodingif (encoding == null) {internalIn2 = new InputStreamReader(internalIn);} else {internalIn2 = new InputStreamReader(internalIn, encoding);}}public void close() throws IOException {init();internalIn2.close();}public int read(char[] cbuf, int off, int len) throws IOException {init();return internalIn2.read(cbuf, off, len);}}
java 清除 bom的更多相关文章
- Linux下查找包含BOM头的文件和清除BOM头命令 2014-08-16 12:30:50
Linux下查找包含BOM头的文件和清除BOM头命令 2014-08-16 12:30:50 分类: 系统运维 查找包含BOM头的文件,命令如下: 点击(此处)折叠或打开 grep -r -I -l ...
- 批量清除BOM头
批量清除BOM头 (2012-03-05 13:28:30) 转载▼ 标签: 杂谈 有些php文件由于不小心保存成了含bom头的格式而导致出现一系列的问题.以下是批量清除bom头的代码,复制代码, ...
- Java清除:收尾和垃圾收集
垃圾收收集器(GC)只知道释放由new关键字分配的内存,所以不知道如何释放对象的"特殊"内存.为了解决这个问题,Java提供了一个名为:finalize()的方法,可为我们的类定义 ...
- 2018-03-21 11:34:44 java脚本批量转换java utf-8 bom源码文件为utf-8编码文件
package com.springbootdubbo; import java.io.*;import java.util.ArrayList;import java.util.List; /** ...
- java清除所有微博短链接 Java问题通用解决代码
java实现微博短链接清除,利用正则,目前只支持微博短链接格式为"http://域名/字母或数字8位以内"的链接格式,现在基本通用 如果链接有多个,返回结果中会有多出的空格,请注意 ...
- linux下查找包含BOM头的文件和清除BOM头命令
查找包含BOM头的文件,命令如下: grep -r -I -l $'^\xEF\xBB\xBF' ./ 这条命令会查找当前目录及子目录下所有包含BOM头的文件,并把文件名在屏幕上输出. 但 ...
- 清除BOM头源码
BOM: Byte Order Mark UTF-8 BOM又叫UTF-8 签名,其实UTF-8 的BOM对UFT-8没有作用,是为了支援UTF-16,UTF-32才加上的BOM,BOM签名的意思就是 ...
- Linux 查找bom头文件,清除bom头命令
1.查找bom头文件 grep -r -I -l $'^\xEF\xBB\xBF' ./ 2.替换bom头文件 find . -type f -exec sed -i 's/\xEF\xBB\xBF/ ...
- JAVA输出带BOM的UTF-8编码的文件
当从http 的response输出CSV文件的时候,设置为utf8的时候默认是不带bom的,可是windows的Excel是使用bom来确认utf8编码的,全部须要把bom写到文件的开头. 微软在 ...
随机推荐
- OpenCV学习(10) 图像的腐蚀与膨胀(1)
建议大家看看网络视频教程:http://www.opencvchina.com/thread-886-1-1.html 腐蚀与膨胀都是针对灰度图的形态学操作,比如下面的一副16*16的灰度图. ...
- C#常见算法题目
//冒泡排序 public class bubblesorter { public void sort(int[] list) { ...
- J2EE 中 用 El表达式 和 Jsp 方式 取得 URL 中的参数方法
使用 el表达式方法: var urlParamValue = "${param.urlVarName}"; 使用 Jsp 表达式 var urlParamValue2 = &qu ...
- [Node.js] Level 6. Socket.io
6.2 Setting Up socket.io Server-Side So far we've created an Express server. Now we want to start bu ...
- 委托批量处理Excel
在以前的博文中--CAD批量处理工具--BatchProc,即只要用户输入处理单个文件的代码,即可批量处理多个文件.使用起来特别方便. 在现在的地籍处理中,处理Excel的情况比较多,尤其需要反反复复 ...
- Java中解压文件名有中文的rar包出现乱码问题的解决
import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import j ...
- 算法笔记_009:字符串匹配(Java)
1 问题描述 给定一个n个字符组成的串(称为文本),一个m(m <= n)的串(称为模式),从文本中寻找匹配模式的子串. 2 解决方案 2.1 蛮力法 package com.liuzhen.c ...
- Android 再按一次退出应用的代码
private long exitTime = 0; @Override public boolean onKeyDown(int keyCode, KeyEvent event) { if (key ...
- vue vm.$attrs 使用
1.vm.$attrs 说明 https://cn.vuejs.org/v2/api/#vm-attrs 将父组件的属性(除去在props中传入的属性)传递给子组件. 2.代码分析 以下是elemen ...
- Warning: cast to/from pointer from/to integer of different size
将int变量转为(void*)时出现错误 error: cast to pointer from integer of different size [-Werror=int-to-pointer-c ...