Linux 平台和 Windows平台下 Unicode与UTF-8互转

Windows:

unsigned char * make_utf8_string(const wchar_t *unicode)

{

    int size = , index = , out_index = ;

    unsigned char *out;

    unsigned short c;

    /* first calculate the size of the target string */

    c = unicode[index++];

    while(c)

    {

        if(c < 0x0080)

        {

            size += ;

        }

        else if(c < 0x0800)

        {

            size += ;

        }

        else

        {

            size += ;

        }

        c = unicode[index++];

    }

    out = (unsigned char*)malloc(size + );

    if (out == NULL)

        return NULL;

    index = ;

    c = unicode[index++];

    while(c)

    {

        if(c < 0x080)

        {

            out[out_index++] = (unsigned char)c;

        }

        else if(c < 0x800)

        {

            out[out_index++] = 0xc0 | (c >> );

            out[out_index++] = 0x80 | (c & 0x3f);

        }

        else

        {

            out[out_index++] = 0xe0 | (c >> );

            out[out_index++] = 0x80 | ((c >> ) & 0x3f);

            out[out_index++] = 0x80 | (c & 0x3f);

        }

        c = unicode[index++];

    }

    out[out_index] = 0x00;

    return out;

}

wchar_t * make_unicode_string(const unsigned char *utf8)

{

    int size = , index = , out_index = ;

    wchar_t *out;

    unsigned char c;

    /* first calculate the size of the target string */

    c = utf8[index++];

    while(c)

    {

        if((c & 0x80) == )

        {

            index += ;

        }

        else if((c & 0xe0) == 0xe0)

        {

            index += ;

        }

        else

        {

            index += ;

        }

        size += ;

        c = utf8[index++];

    }

    out = (wchar_t*)malloc((size + ) * sizeof(wchar_t));

    if (out == NULL)

        return NULL;

    index = ;

    c = utf8[index++];

    while(c)

    {

        if((c & 0x80) == )

        {

            out[out_index++] = c;

        }

        else if((c & 0xe0) == 0xe0)

        {

            out[out_index] = (c & 0x1F) << ;

            c = utf8[index++];

            out[out_index] |= (c & 0x3F) << ;

            c = utf8[index++];

            out[out_index++] |= (c & 0x3F);

        }

        else

        {

            out[out_index] = (c & 0x3F) << ;

            c = utf8[index++];

            out[out_index++] |= (c & 0x3F);

        }

        c = utf8[index++];

    }

    out[out_index] = ;

    return out;

}

int StrUtil::utf8_encode(const char *from, char **to)

{

    wchar_t *unicode;

    int wchars, err;

    wchars = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

        strlen(from), NULL, );

    if (wchars == )

    {

        fprintf(stderr, "Unicode translation error %d\n", GetLastError());

        return -;

    }

    unicode = (wchar_t*)calloc(wchars + , sizeof(unsigned short));

    if(unicode == NULL)

    {

        fprintf(stderr, "Out of memory processing string to UTF8\n");

        return -;

    }

    err = ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,

        strlen(from), unicode, wchars);

    if(err != wchars)

    {

        free(unicode);

        fprintf(stderr, "Unicode encode error %d\n", GetLastError());

        return -;

    }

    /* On NT-based windows systems, we could use WideCharToMultiByte(), but

    * MS doesn't actually have a consistent API across win32.

    */

    *to = (char *)make_utf8_string(unicode);

    free(unicode);

    return ;

}

int StrUtil::utf8_decode(const char *from, char **to)

{

    wchar_t *unicode;

    int chars, err;

    /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but

    * MS doesn't actually have a consistent API across win32.

    */

    unicode = make_unicode_string((unsigned char*)from);

    if(unicode == NULL)

    {

        fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");

        return -;

    }

    chars = ::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,

        -, NULL, , NULL, NULL);

    if(chars == )

    {

        fprintf(stderr, "Unicode translation error %d\n", GetLastError());

        free(unicode);

        return -;

    }

    *to = (char *)calloc(chars + , sizeof(unsigned char));

    if(*to == NULL)

    {

        fprintf(stderr, "Out of memory processing string to local charset\n");

        free(unicode);

        return -;

    }

    err = ::WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,

        -, *to, chars, NULL, NULL);

    if(err != chars)

    {

        fprintf(stderr, "Unicode decode error %d\n", GetLastError());

        free(unicode);

        free(*to);

        *to = NULL;

        return -;

    }

    free(unicode);

    return ;

}

Linux 平台：

unsigned char * make_utf8_string(const wchar_t *unicode)

{

    int size = , index = , out_index = ;

    unsigned char *out;

    unsigned short c;

    /* first calculate the size of the target string */

    c = unicode[index++];

    while(c)

    {

        if(c < 0x0080)

        {

            size += ;

        }

        else if(c < 0x0800)

        {

            size += ;

        }

        else

        {

            size += ;

        }

        c = unicode[index++];

    }

    out = (unsigned char*)malloc(size + );

    if (out == NULL)

        return NULL;

    index = ;

    c = unicode[index++];

    while(c)

    {

        if(c < 0x080)

        {

            out[out_index++] = (unsigned char)c;

        }

        else if(c < 0x800)

        {

            out[out_index++] = 0xc0 | (c >> );

            out[out_index++] = 0x80 | (c & 0x3f);

        }

        else

        {

            out[out_index++] = 0xe0 | (c >> );

            out[out_index++] = 0x80 | ((c >> ) & 0x3f);

            out[out_index++] = 0x80 | (c & 0x3f);

        }

        c = unicode[index++];

    }

    out[out_index] = 0x00;

    return out;

}

wchar_t * make_unicode_string(const unsigned char *utf8)

{

    int size = , index = , out_index = ;

    wchar_t *out;

    unsigned char c;

    /* first calculate the size of the target string */

    c = utf8[index++];

    while(c)

    {

        if((c & 0x80) == )

        {

            index += ;

        }

        else if((c & 0xe0) == 0xe0)

        {

            index += ;

        }

        else

        {

            index += ;

        }

        size += ;

        c = utf8[index++];

    }

    out = (wchar_t*)malloc((size + ) * sizeof(wchar_t));

    if (out == NULL)

        return NULL;

    index = ;

    c = utf8[index++];

    while(c)

    {

        if((c & 0x80) == )

        {

            out[out_index++] = c;

        }

        else if((c & 0xe0) == 0xe0)

        {

            out[out_index] = (c & 0x1F) << ;

            c = utf8[index++];

            out[out_index] |= (c & 0x3F) << ;

            c = utf8[index++];

            out[out_index++] |= (c & 0x3F);

        }

        else

        {

            out[out_index] = (c & 0x3F) << ;

            c = utf8[index++];

            out[out_index++] |= (c & 0x3F);

        }

        c = utf8[index++];

    }

    out[out_index] = ;

    return out;

}

int utf8_encode(const char *from, char **to)

{

    wchar_t *unicode = NULL;

    int wchars, err;

    setlocale(LC_ALL,"");

    wchars = mbstowcs(unicode, from, )+;

    unicode = new wchar_t[wchars];

    err = mbstowcs(unicode, from, wchars);

    if(err < )

    {

        delete unicode;

        fprintf(stderr, "Unicode encode error \n");

        return -;

    }

    setlocale(LC_ALL,"C");

    *to = (char *)make_utf8_string(unicode);

    delete unicode;

    return ;

}

int utf8_decode(const char *from, char **to)

{

    wchar_t *unicode = NULL;

    int chars, err;    

   // setlocale(LC_ALL,"zh_CN.GB18030");

    unicode = make_unicode_string((unsigned char*)from);

    setlocale(LC_ALL,"");

    chars = wcstombs(*to,unicode, )* + ;

    *to = new char[chars];

    memset(*to, , chars);

    //setlocale(LC_ALL,"");

    err = wcstombs(*to, unicode, chars);

    setlocale(LC_ALL,"C");


    delete unicode;

    if(err < )

    {

        fprintf(stderr, "Unicode decode error \n");

        delete *to;

        *to = NULL;

        return -;

    }

    return ;

}

Linux 平台和 Windows平台下 Unicode与UTF-8互转的更多相关文章

【NodeJs】Ctrl+C在Linux平台和Windows平台下的TCP连接中的不同表现
Linux平台:CentOS release 6.5 (Final) Windows平台:Windows 7 旗舰版服务器端代码如下: var net = require('net'); var s ...
Mac平台与Windows平台下AndroidStudio增量升级
Android Studio增量升级什么情况下使用最合适呢? 比如现在的as版本是2.2版本,而你的as版本2.0版本,这个时候点Check For Updates就没有反应了,因为你已经2个有版本没 ...
windows平台下nginx+PHP环境安装
因为日常工作在windows下,为方便在window是下进行PHP开发,需要在windows平台下搭建PHP开发环境,web服务器选择nginx,不过windows版本的nginx性能要比Linux/ ...
利用zabbix监控ogg进程(Windows平台下)
本文给大家介绍如何监控windows平台下的ogg程序.(注:所有操作都在administrator用户下面进行操作) 监控linux平台下的ogg程序请看:https://www.cnblogs.c ...
[转]Windows平台下Makefile学习笔记
Windows平台下Makefile学习笔记(一) 作者:朱金灿来源:http://blog.csdn.net/clever101 决心学习Makefile,一方面是为了解决编译开源代码时需要跨编译 ...
windows平台下VLC2.0.5编译
windows平台下VLC2.0.5编译说明时隔一年多,又要搞流媒体了,不过这次是要做流媒体服务器. 暂时决定使用vlc+ffmpeg+live555,虽然听有些前辈说这个组合的性能较差,只能作为学 ...
【转】Windows平台下Git服务器搭建
Windows平台下Git服务器搭建 Posted on 2015-05-18 21:29 阿祥当码农阅读(7637) 评论(0) 编辑收藏该文章转自:http://www.codeceo.co ...
windows平台下的oracle ORA-01031的解决方法
今天下午遇到一个很怪异的问题,在windows平台下sqlplus / as sysdba登陆数据库,提示权限不足, 当时就纳闷了,sys用户登陆数据库还能权限不足,问题出现了,就开始寻找解决方法呗 ...
Windows平台下MySQL常用操作与命令
Windows平台下MySQL常用操作与命令 Windows平台下MySQL常用操作与命令,学习mysql的朋友可以参考下. 1.导出整个数据库 mysqldump -u 用户名 -p --defau ...

随机推荐

Python+Selenium笔记（十五）调用JS
(一) 方法方法简单说明 execute_async_script(script, args) 异步执行JS代码 script:被执行的JS代码 args:js代码中的任意参数 execute_s ...
Problem2-Project Euler
Even Fibonacci numbers Each new term in the Fibonacci sequence is generated by adding the previous ...
使用 PowerShell 将数据磁盘附加到 Windows VM
本文介绍如何使用 PowerShell 将新磁盘和现有磁盘附加到 Windows 虚拟机. 在开始之前,请查看以下提示: 虚拟机的大小决定了可以附加多少个磁盘. 有关详细信息,请参阅虚拟机大小. 若要 ...
Qt与PyQT中设置ToolBar在AllowedArea的显示
因为个人对传统的软件GUI界面不是太喜欢,最近又在学习Qt和PyQt5,所以就有了设置ToolBar在窗口的不同地方的想法,经过浪里淘沙,最终在Qt官网里找到了,原来再添加toolBar的时候是由设置 ...
【爬坑】运行 Hadoop 的 MapReduce 示例卡住了
1. 问题说明在以伪分布式模式运行 Hadoop 自带的 MapReduce 示例,卡在了 Running job ,如图所示 2. 解决过程查看日志没得到有用的信息再次确认配置信息没有错误信息 ...
4星|《门口的野蛮人2》：美国宝万之争专业户KKR公司的疯狂借债收购史
门口的野蛮人2:KKR与资本暴利的崛起(珍藏版) 英文版是1992年出的.主要内容是1977-1998年之间KKR在美国的杠杆收购简史.从KKR创立开始,讲到1990年KKR差点倒闭.国内A股市场上前 ...
Alpha事后诸葛亮(阳光普照队)
Alpha事后诸葛亮设想和目标 1.实现文字识别,以用户喜欢的图片做背景将其保存,生成新的图片. 2.时间比较赶,主要是因为队员对于Android开发方面的了解不多,可以说是几乎没有,需要一步一步的 ...
Rx编程的第一步是将native对象转换为monad对象
Rx编程的第一步是将native对象转换为monad对象将基础类型转换为高阶类型,以便使用函数式编程的特性.
php api接口安全设计 sign理论
一. url请求的参数包括:timestamp,token, username,sign 1. timestamp: 时间戮 2. token: 登陆验证时,验证成功后,生成唯一的token(可以为u ...
[luogu2172] 部落战争
题面我们可以将'.'抽象为一个可以通过的点, 将'x'抽象为一个不可通过的点. 那么题意便可以转化为: 一条路径可以看做从任意一个没有到达过的可通过的点出发到任意一个其他的可以通过却没有被到 ...

Linux 平台和 Windows平台下 Unicode与UTF-8互转

Linux 平台和 Windows平台下 Unicode与UTF-8互转的更多相关文章

随机推荐

热门专题