多字节(一般指GBK) utf8 Unicode 编码互转

// c:\Program Files\Microsoft SDKs\Windows\v7.0A\Include\WinNls.h

#define CP_ACP                    0           // default to ANSI code page

#define CP_OEMCP                  1           // default to OEM  code page

#define CP_MACCP                  2           // default to MAC  code page

#define CP_THREAD_ACP             3           // current thread's ANSI code page

#define CP_SYMBOL                 42          // SYMBOL translations

#define CP_UTF7                   65000       // UTF-7 translation

#define CP_UTF8                   65001       // UTF-8 translation

// -- c:\Program Files\Microsoft SDKs\Windows\v7.0A\Include\WinNT.h

//

// UNICODE (Wide Character) types

//

#ifndef _MAC

typedef wchar_t WCHAR;    // wc,   16-bit UNICODE character

#else

// some Macintosh compilers don't define wchar_t in a convenient location, or define it as a char

typedef unsigned short WCHAR;    // wc,   16-bit UNICODE character

#endif

typedef WCHAR *PWCHAR, *LPWCH, *PWCH;

typedef CONST WCHAR *LPCWCH, *PCWCH;

typedef __nullterminated WCHAR *NWPSTR, *LPWSTR, *PWSTR;

......

typedef __nullterminated CONST WCHAR *LPCWSTR, *PCWSTR;

......

//

// ANSI (Multi-byte Character) types

//

typedef CHAR *PCHAR, *LPCH, *PCH;

typedef CONST CHAR *LPCCH, *PCCH;

typedef __nullterminated CHAR *NPSTR, *LPSTR, *PSTR;

......

typedef __nullterminated CONST CHAR *LPCSTR, *PCSTR;

......

//

// 多字节(一般指GBK) utf8 Unicode 编码互转

/*

MultiByteToWideChar: 将MultiByte(多字节编码(CP_ACP)、GBK(CP_ACP)、UTF8(CP_UTF8))转换为WideChar(Unicode 编码)。

    如 MultiByteToWideChar(CP_ACP, 0, old_str, old_str_len, new_str, new_Len);

    表示将CP_ACP类型的old_str转换为WideChar类型；CP_ACP标识 从哪种类型的MultiByte转换为WideChar。

WideCharToMultiByte: 将WideChar(Unicode 编码)转换为MultiByte(多字节编码(CP_ACP)、GBK(CP_ACP)、UTF8(CP_UTF8))。

    如 WideCharToMultiByte(CP_UTF8, 0, old_str, old_str_len, new_str, new_Len, NULL, NULL);

    表示将WideChar类型的old_str转换为CP_UTF8类型的MultiByte。CP_ACP标识 WideChar要转换为哪种类型的MultiByte。

MultiByte1 转换为 MultiByte2 ，先将MultiByte1转为将WideChar，再从将WideChar转为MultiByte2。

*/

std::string Utf8ToGBK(const char* strUtf8)

{

    //不是utf8编码，返回原串

    unsigned char code_mark = strUtf8[];

    if(code_mark <= 0xE0)

    {

        return strUtf8;

    }

    int len=MultiByteToWideChar(CP_UTF8, , (LPCSTR)strUtf8, -, NULL,);

    unsigned short * wszGBK = new unsigned short[len+];

    memset(wszGBK, , len *  + );

    MultiByteToWideChar(CP_UTF8, , (LPCSTR)strUtf8, -, (LPWSTR)wszGBK, len);

    len = WideCharToMultiByte(CP_ACP, , (LPCWSTR)wszGBK, -, NULL, , NULL, NULL);

    char *szGBK=new char[len + ];

    memset(szGBK, , len + );

    WideCharToMultiByte (CP_ACP, , (LPCWSTR)wszGBK, -, (LPSTR)szGBK, len, NULL,NULL);

    std::string gbkString = szGBK;

    delete[] wszGBK;

    delete[] szGBK;

    return gbkString;

}

// 多字节编码转为UTF8编码

bool MBToUTF8(vector<char>& pu8, const char* pmb, int32 mLen)

{

 // convert an MBCS string to widechar

 int32 nLen = MultiByteToWideChar(CP_ACP, , pmb, mLen, NULL, );  

 WCHAR* lpszW = NULL;

 try

 {

    lpszW = new WCHAR[nLen];

 }

 catch(bad_alloc &memExp)

 {

    return false;

 }  

 int32 nRtn = MultiByteToWideChar(CP_ACP, , pmb, mLen, lpszW, nLen);  

 if(nRtn != nLen)

 {

    delete[] lpszW;

    return false;

 }

 // convert an widechar string to utf8

 int32 utf8Len = WideCharToMultiByte(CP_UTF8, , lpszW, nLen, NULL, , NULL, NULL);

 if (utf8Len <= )

 {

     return false;

 }

 pu8.resize(utf8Len);

 nRtn = WideCharToMultiByte(CP_UTF8, , lpszW, nLen, &*pu8.begin(), utf8Len, NULL, NULL);

 delete[] lpszW;  

 if (nRtn != utf8Len)

 {

     pu8.clear();

     return false;

 }

 return true;

}  

// UTF8编码转为多字节编码

bool UTF8ToMB(vector<char>& pmb, const char* pu8, int32 utf8Len)

{

    // convert an UTF8 string to widechar

    int32 nLen = MultiByteToWideChar(CP_UTF8, , pu8, utf8Len, NULL, );  

    WCHAR* lpszW = NULL;

    try

    {

        lpszW = new WCHAR[nLen];

    }

    catch(bad_alloc &memExp)

    {

        return false;

    }  

    int32 nRtn = MultiByteToWideChar(CP_UTF8, , pu8, utf8Len, lpszW, nLen);  

    if(nRtn != nLen)

    {

        delete[] lpszW;

        return false;

    }  

    // convert an widechar string to Multibyte

    int32 MBLen = WideCharToMultiByte(CP_ACP, , lpszW, nLen, NULL, , NULL, NULL);

    if (MBLen <=)

    {

        return false;

    }

    pmb.resize(MBLen);

    nRtn = WideCharToMultiByte(CP_ACP, , lpszW, nLen, &*pmb.begin(), MBLen, NULL, NULL);

    delete[] lpszW;  

    if(nRtn != MBLen)

    {

        pmb.clear();

        return false;

    }

    return true;

}  

// 多字节编码转为Unicode编码

bool MBToUnicode(vector<wchar_t>& pun, const char* pmb, int32 mLen)

{

    // convert an MBCS string to widechar

    int32 uLen = MultiByteToWideChar(CP_ACP, , pmb, mLen, NULL, );  

    if (uLen<=)

    {

        return false;

    }

    pun.resize(uLen);  

    int32 nRtn = MultiByteToWideChar(CP_ACP, , pmb, mLen, &*pun.begin(), uLen);  

    if (nRtn != uLen)

    {

        pun.clear();

        return false;

    }

    return true;

}  

//Unicode编码转为多字节编码

bool UnicodeToMB(vector<char>& pmb, const wchar_t* pun, int32 uLen)

{

    // convert an widechar string to Multibyte

    int32 MBLen = WideCharToMultiByte(CP_ACP, , pun, uLen, NULL, , NULL, NULL);

    if (MBLen <=)

    {

        return false;

    }

    pmb.resize(MBLen);

    int nRtn = WideCharToMultiByte(CP_ACP, , pun, uLen, &*pmb.begin(), MBLen, NULL, NULL);  

    if(nRtn != MBLen)

    {

        pmb.clear();

        return false;

    }

    return true;

}  

// UTF8编码转为Unicode

bool UTF8ToUnicode(vector<wchar_t>& pun, const char* pu8, int32 utf8Len)

{

    // convert an UTF8 string to widechar

    int32 nLen = MultiByteToWideChar(CP_UTF8, , pu8, utf8Len, NULL, );

    if (nLen <=)

    {

        return false;

    }

    pun.resize(nLen);

    int32 nRtn = MultiByteToWideChar(CP_UTF8, , pu8, utf8Len, &*pun.begin(), nLen);  

    if(nRtn != nLen)

    {

        pun.clear();

        return false;

    }  

    return true;

}  

// Unicode编码转为UTF8

bool UnicodeToUTF8(vector<char>& pu8, const wchar_t* pun, int32 uLen)

{

    // convert an widechar string to utf8

    int32 utf8Len = WideCharToMultiByte(CP_UTF8, , pun, uLen, NULL, , NULL, NULL);

    if (utf8Len<=)

    {

        return false;

    }

    pu8.resize(utf8Len);

    int32 nRtn = WideCharToMultiByte(CP_UTF8, , pun, uLen, &*pu8.begin(), utf8Len, NULL, NULL);  

    if (nRtn != utf8Len)

    {

        pu8.clear();

        return false;

    }

    return true;

}

多字节(一般指GBK) utf8 Unicode 编码互转的更多相关文章

utf8 unicode 编码互转
static function utf8_to_unicode($c) { switch(strlen($c)) { case 1: return ord($c); case 2: $n = (ord ...
MySQL 解决 emoji表情的方法，使用utf8mb4 字符集(4字节 UTF-8 Unicode 编码)
p.p1 {margin: 0.0px 0.0px 0.0px 0.0px; font: 12.0px 'Helvetica Neue'; color: #454545} span.s1 {font: ...
GBK/ UTF-8/ UNICODE（字符编码）
在python2中:如果执行程序,在编译器中,因为默认的编码是ASCII码(英文),所以如果输入中文就会出现乱码,因此为了避免这种乱码的情况发生,在输入中文字符串之后,必须进行手动转码,将GBK/ U ...
C# unicode GBK UTF-8和汉字互转
界面: 源码: using System; using System.Collections.Generic; using System.ComponentModel; using System.Da ...
emoji表情与unicode编码互转(JS,JAVA,C#)
1.表情字符转编码 [C#] Encoding.UTF32.GetBytes("
字符编码-UNICODE,GBK,UTF-8区别【转转】
字符编码介绍及不同编码区别今天看到这篇关于字符编码的文章,抑制不住喜悦(总结的好详细)所以转到这里来.转自:祥龙之子http://www.cnblogs.com/cy163/archive/2007 ...
【JAVA编码专题】UNICODE,GBK,UTF-8区别
简单来说,unicode,gbk和大五码就是编码的值,而utf-8,uft-16之类就是这个值的表现形式．而前面那三种编码是一兼容的,同一个汉字,那三个码值是完全不一样的．如＂汉＂的uncode值与g ...
BIG5, GB(GB2312, GBK, ...), Unicode编码, UTF8, WideChar, MultiByte, Char说明与区别
汉语unicode编译方式,BIG5是繁体规范,GB是简体规范 GB是大陆使用的国标码,BIG5码,又叫大五码,是台湾使用的繁体码. BIG5编码, GB编码(GB2312, GBK, ...), U ...
一文读懂所有的编码方式(UTF-8、GBK、Unicode、宽字节...)
编码方式就分两类:ANSI编码.Unicode编码.这两类编码都兼容ASC码. ------------------------------------------------------------ ...

随机推荐

tipask 不能正常解析
<? if(!defined('IN_TIPASK')) exit('Access Denied'); include template('header'); ?> 代码如上,经查询为ph ...
从sql中image类型字段中导出图片
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.I ...
vue项目引入社交分享插件
vshare 基于百度分享开发的支持VUE2.X的分享插件,为您带来更多的流量!提供多种风格按钮,代码加载更快,引入社会化流量,提升网页抓取速度等优点.github地址:https://github. ...
SpringMVC HandlerMethodArgumentResolver自定义参数转换器针对HashMap失效的问题
自定义Spring MVC3的参数映射和返回值映射 + fastjson 自定义Spring MVC3的参数映射和返回值映射 + fastjson首先说一下场景:在一些富客户端Web应用程序中我们会有 ...
HTML——图片自动轮换和手动轮换
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/ ...
用Jquery获取checkbox多个选项
1,下拉框: var cc1 = $(".formc select[@name='country'] option[@selected]").text(); //得到下拉菜单的 ...
免费 web api 接口大全
下面的接口来自互联网,部分功能需要付费查询手机 http://www.yodao.com/s-martresult-xml/search.s?type=mobile&q= 手机号码查询 I ...
一个小bug
如果提交表单给按钮一个名字,就会报错... <html> <body> <form action="{:U('Index/login')}" meth ...
Intellij IDEA 使用学习
Intellij中名词解释: Project,就是一个完整的项目,类似Eclipse中的WorkSet(虽然WorkSet是人为归类的). Module,是Project中的模块,类似Eclipse中 ...
Mysql数据库中InnoDB和MyISAM的差别
Mysql数据库中InnoDB和MyISAM的差别 InnoDB和MyISAM是在使用MySQL最常用的两个表类型,各有优缺点,视具体应用而定.基本的差别为:MyISAM类型不支持事务处理等高级处理, ...

多字节(一般指GBK) utf8 Unicode 编码互转

多字节(一般指GBK) utf8 Unicode 编码互转的更多相关文章

随机推荐

热门专题