17down voteaccepted

Here's some code. Only lightly tested and there's probably a few improvements. Call this function to convert a UTF-8 string to a UTF-16 wstring. If it thinks the input string is not UTF-8 then it will throw an exception, otherwise it returns the equivalent UTF-16 wstring.

std::wstring utf8_to_utf16(const std::string& utf8)
{
std::vector<unsigned long> unicode;
size_t i = 0;
while (i < utf8.size())
{
unsigned long uni;
size_t todo;
bool error = false;
unsigned char ch = utf8[i++];
if (ch <= 0x7F)
{
uni = ch;
todo = 0;
}
else if (ch <= 0xBF)
{
throw std::logic_error("not a UTF-8 string");
}
else if (ch <= 0xDF)
{
uni = ch&0x1F;
todo = 1;
}
else if (ch <= 0xEF)
{
uni = ch&0x0F;
todo = 2;
}
else if (ch <= 0xF7)
{
uni = ch&0x07;
todo = 3;
}
else
{
throw std::logic_error("not a UTF-8 string");
}
for (size_t j = 0; j < todo; ++j)
{
if (i == utf8.size())
throw std::logic_error("not a UTF-8 string");
unsigned char ch = utf8[i++];
if (ch < 0x80 || ch > 0xBF)
throw std::logic_error("not a UTF-8 string");
uni <<= 6;
uni += ch & 0x3F;
}
if (uni >= 0xD800 && uni <= 0xDFFF)
throw std::logic_error("not a UTF-8 string");
if (uni > 0x10FFFF)
throw std::logic_error("not a UTF-8 string");
unicode.push_back(uni);
}
std::wstring utf16;
for (size_t i = 0; i < unicode.size(); ++i)
{
unsigned long uni = unicode[i];
if (uni <= 0xFFFF)
{
utf16 += (wchar_t)uni;
}
else
{
uni -= 0x10000;
utf16 += (wchar_t)((uni >> 10) + 0xD800);
utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
}
}
return utf16;
}

http://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring

#pragma once
#include <string> #ifdef tstring
#error "\"tstring\" Macro has been defined."
#else
#ifdef _UNICODE
#define tstring wstring
#else
#define tstring string
#endif
#endif class EncodingConverter
{
public:
static int AnsiStrToWideStr(std::string& strSrc, std::wstring& strDest)
{
int nLen = strSrc.length() + ;
int nRet = ; nLen *= sizeof(wchar_t); wchar_t* pszW = new wchar_t[nLen];
memset(pszW, , nLen); nRet = MultiByteToWideChar(CP_ACP, , strSrc.c_str(), -, pszW, nLen); strDest = pszW;
delete[] pszW; return nRet;
}; static int WideStrToAnsiStr(std::wstring& strSrc, std::string& strDest)
{
int nLen = strSrc.length() + ;
int nRet = ; nLen *= sizeof(wchar_t); char* pszA = new char[nLen];
memset(pszA, , nLen); nRet = WideCharToMultiByte(CP_ACP, , strSrc.c_str(), -, pszA, nLen, NULL, NULL); strDest = pszA;
delete[] pszA; return nRet;
}; static int AnsiStrToTStr(std::string& strSrc, std::tstring& strDest)
{
int nRet = ; #ifdef _UNICODE
nRet = AnsiStrToWideStr(strSrc, strDest);
#else
strDest = strSrc;
nRet = strDest.length();
#endif return nRet;
}; static int TStrToAnsiStr(std::tstring& strSrc, std::string& strDest)
{
int nRet = ; #ifdef _UNICODE
nRet = WideStrToAnsiStr(strSrc, strDest);
#else
strDest = strSrc;
nRet = strDest.length();
#endif return nRet;
}; static int WideStrToTStr(std::wstring& strSrc, std::tstring& strDest)
{
int nRet = ; #ifdef _UNICODE
strDest = strSrc;
nRet = strDest.length();
#else
nRet = WideStrToAnsiStr(strSrc, strDest);
#endif return nRet;
}; static int TStrToWideStr(std::tstring& strSrc, std::wstring& strDest)
{
int nRet = ; #ifdef _UNICODE
strDest = strSrc;
nRet = strDest.length();
#else
nRet = AnsiStrToWideStr(strSrc, strDest);
#endif return nRet;
}; static std::string ToAnsiString(const wchar_t* lpStr)
{
std::wstring wide_string = lpStr;
std::string ansi_string; WideStrToAnsiStr(wide_string, ansi_string);
return ansi_string;
}; static std::string ToAnsiString(const char* lpStr)
{
return std::string(lpStr);
}; static std::wstring ToWideString(const wchar_t* lpStr)
{
return std::wstring(lpStr);
}; static std::wstring ToWideString(const char* lpStr)
{
std::string ansi_string = lpStr;
std::wstring wide_string; AnsiStrToWideStr(ansi_string, wide_string);
return wide_string;
}; static std::tstring ToTString(const char* lpStr)
{
#ifdef _UNICODE
return ToWideString(lpStr);
#else
return ToAnsiString(lpStr);
#endif
}; static std::tstring ToTString(const wchar_t* lpStr)
{
#ifdef _UNICODE
return ToWideString(lpStr);
#else
return ToAnsiString(lpStr);
#endif
}; static int WideStrToUtf8Str(std::wstring& strSrc, std::string& strDest)
{
int nRet = ;
int nLen = ; nLen = WideCharToMultiByte(CP_UTF8, , strSrc.c_str(), -, NULL, , NULL, NULL); char * lpUtf8Str = new char[nLen+];
memset(lpUtf8Str, , nLen);
nRet = WideCharToMultiByte(CP_UTF8, , strSrc.c_str(), -, lpUtf8Str, nLen, NULL, NULL);
strDest = lpUtf8Str;
delete[] lpUtf8Str; return nRet;
}; static int AnsiStrToUtf8Str(std::string& strSrc, std::string& strDest)
{
int nRet = ;
std::wstring wide_string; nRet = AnsiStrToWideStr(strSrc, wide_string);
nRet = WideStrToUtf8Str(wide_string, strDest); return nRet;
}; static int Utf8StrToWideStr(const std::string& strSrc, std::wstring& strDest)
{
int nRet = ;
int nLen = ; nLen = MultiByteToWideChar(CP_UTF8, , strSrc.c_str(), -, NULL, ); wchar_t* lpWideStr = new wchar_t[nLen];
memset(lpWideStr, , nLen*sizeof(lpWideStr[]));
nRet = MultiByteToWideChar(CP_UTF8, , strSrc.c_str(), -, lpWideStr, nLen);
strDest = lpWideStr;
delete[] lpWideStr; return nRet;
}; static int Utf8StrToAnsiStr(const std::string& strSrc, std::string& strDest)
{
int nRet = ;
std::wstring wide_string; nRet = Utf8StrToWideStr(strSrc, wide_string);
nRet = WideStrToAnsiStr(wide_string, strDest); return nRet;
}; static int Utf8StrToTStr(const std::string& strSrc, std::tstring& strDest)
{
#ifdef UNICODE
return Utf8StrToWideStr(strSrc, strDest);
#else
return Utf8StrToAnsiStr(strSrc, strDest);
#endif
}; static std::string ToUtf8String(const std::string& str)
{
std::string ansi_string = str;
std::string utf8_string; AnsiStrToUtf8Str(ansi_string, utf8_string);
return utf8_string;
}; static std::string ToUtf8String(const std::wstring& str)
{
std::wstring wide_string = str;
std::string utf8_string; WideStrToUtf8Str(wide_string, utf8_string);
return utf8_string;
};
};

https://github.com/yaocoder/utility/blob/master/src/common/EncodingConverter.h

utf8_to_utf16的更多相关文章

  1. boost::xml——基本操作以及中文乱码解决方案 (续)

    本博文主要想说明以下两点: 1.对于上一篇的<boost::xml——基本操作以及中文乱码解决方案>解释,这篇博文基本解决了正确输入输出中英文问题,但是好像还没有解决修改中文出现乱码的问题 ...

  2. C++ MFC std::string转为 std::wstring

    std::string转为 std::wstring std::wstring UTF8_To_UTF16(const std::string& source) { unsigned long ...

  3. 谷歌拼音自带lua

    function fast_string_banji(argument) return {"快捷1", "快捷2", "快捷3", &quo ...

  4. 谷歌拼音输入法扩展API开发指南

    为了帮助开发者在谷歌拼音输入法的基本输入功能基础上,开发和定义更丰富的扩展输入功能,谷歌拼音输入法提供了以Lua脚本编程语言为基础的输入法扩展API.利用输入法扩展API,开发者可以编写自定义的输入功 ...

随机推荐

  1. iOS组件化思路-大神博客研读和思考

    一.大神博客研读 随着应用需求逐步迭代,应用的代码体积将会越来越大,为了更好的管理应用工程,我们开始借助CocoaPods版本管理工具对原有应用工程进行拆分.但是仅仅完成代码拆分还不足以解决业务之间的 ...

  2. PHP表单验证内容是否为空

    内容为空效果图为: 填写内容效果图: 下面是验证程序的代码: <!doctype html> <html> <head> <meta http-equiv=& ...

  3. Oracle中not exists 与not in 的使用情况

    1.在oracle11g以上版本,oracle已经做了优化,能够自动将in优化成exists方式,因此oracle11g以上版本,使用in和exists效果是一样的. 2.在oracle中,使用not ...

  4. SQL 中case when then else 用法

    SQL如下: SELECT DISTINCTsy_haken_type,sy_sagyo_type,sy_kokyaku_cdFROm tbl_syukeiWHERE (sy_sagyo_ymd be ...

  5. (ASP页面查询等待提示效果)GridViewなどで検索中に「処理中メッセージ」を表示する方法(※他の長い時間処理も参照できる)

    原博客 http://ino1970.blog119.fc2.com/blog-entry-163.html GridViewなどで検索中に「処理中メッセージ」を表示する方法 「GridViewなどで ...

  6. Js 上传文件 页面不刷新

    html控件代码: <form id="form1"> <p><input type="file" name="mfil ...

  7. Java多线程读书笔记之一

    今天开始陆续将这几天跟进Java多线程知识的成果记录下来分享. Java多线程的知识是一直想要系统彻底的看完的,但是懒惰加无聊早就了我每天都没有进展,这回下决心一定要把这块知识系统梳理完. 我的知识来 ...

  8. python基础知识十

    特殊的方法 在类中有一些特殊的方法具有特殊的意义,比如__init__和__del__方法,它们的重要性我们已经学习过了. 一般说来,特殊的方法都被用来模仿某个行为.例如,如果你想要为你的类使用x[k ...

  9. sql - 选出指定范围的行

    Select no=Identity(int,1,1),* Into #temptable From dbo.tName order by fName --利用Identity函数生成记录序号 Sel ...

  10. KMP算法_读书笔记

    下面是KMP算法的实现伪代码: KMP_MATCHER ( T, P ) . n = T.length . m = P.length . next = COMPUTE_PREFIX_FUNCTION ...