2012-01-31 01:40:22 +00:00
|
|
|
|
/*
|
|
|
|
|
Copyright (C) 2006, 2008 Burkhard Plaum <plaum@ipf.uni-stuttgart.de>
|
2014-10-31 18:52:23 +00:00
|
|
|
|
Copyright (C) 2011, 2014 Rocky Bernstein <rocky@gnu.org>
|
2012-01-31 01:40:22 +00:00
|
|
|
|
Copyright (C) 2012 Pete Batard <pete@akeo.ie>
|
|
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
/* UTF-8 support */
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
|
# define __CDIO_CONFIG_H__ 1
|
|
|
|
|
# include "config.h"
|
|
|
|
|
#endif
|
|
|
|
|
|
2012-11-04 01:33:54 +00:00
|
|
|
|
#ifdef HAVE_STDIO_H
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
#ifdef HAVE_STRING_H
|
2012-11-04 01:33:54 +00:00
|
|
|
|
#include <string.h>
|
2012-01-31 01:40:22 +00:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_STDLIB_H
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_ERRNO_H
|
|
|
|
|
#include <errno.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#include <cdio/utf8.h>
|
|
|
|
|
#include <cdio/logging.h>
|
2014-10-31 18:52:23 +00:00
|
|
|
|
#include <cdio/memory.h>
|
2017-03-10 18:07:48 +00:00
|
|
|
|
#include "cdio_assert.h"
|
2012-01-31 01:40:22 +00:00
|
|
|
|
|
2012-03-03 23:12:48 +00:00
|
|
|
|
/* Windows requires some basic UTF-8 support outside of Joliet */
|
|
|
|
|
#if defined(_WIN32)
|
|
|
|
|
#include <windows.h>
|
|
|
|
|
|
|
|
|
|
#define wchar_to_utf8_no_alloc(wsrc, dest, dest_size) \
|
|
|
|
|
WideCharToMultiByte(CP_UTF8, 0, wsrc, -1, dest, dest_size, NULL, NULL)
|
|
|
|
|
#define utf8_to_wchar_no_alloc(src, wdest, wdest_size) \
|
|
|
|
|
MultiByteToWideChar(CP_UTF8, 0, src, -1, wdest, wdest_size)
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Converts an UTF-16 string to UTF8 (allocate returned string)
|
|
|
|
|
* Returns NULL on error
|
|
|
|
|
*/
|
2012-11-04 01:33:54 +00:00
|
|
|
|
static inline char* cdio_wchar_to_utf8(const wchar_t* wstr)
|
2012-03-03 23:12:48 +00:00
|
|
|
|
{
|
|
|
|
|
int size = 0;
|
|
|
|
|
char* str = NULL;
|
|
|
|
|
|
|
|
|
|
/* Find out the size we need to allocate for our converted string */
|
|
|
|
|
size = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
if (size <= 1) /* An empty string would be size 1 */
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if ((str = (char*)calloc(size, 1)) == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if (wchar_to_utf8_no_alloc(wstr, str, size) != size) {
|
|
|
|
|
free(str);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return str;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Converts an UTF8 string to UTF-16 (allocate returned string)
|
|
|
|
|
* Returns NULL on error
|
|
|
|
|
*/
|
|
|
|
|
wchar_t* cdio_utf8_to_wchar(const char* str)
|
|
|
|
|
{
|
|
|
|
|
int size = 0;
|
|
|
|
|
wchar_t* wstr = NULL;
|
|
|
|
|
|
|
|
|
|
/* Find out the size we need to allocate for our converted string */
|
|
|
|
|
size = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
|
|
|
|
|
if (size <= 1) /* An empty string would be size 1 */
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if ((wstr = (wchar_t*)calloc(size, sizeof(wchar_t))) == NULL)
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
if (utf8_to_wchar_no_alloc(str, wstr, size) != size) {
|
|
|
|
|
free(wstr);
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
return wstr;
|
|
|
|
|
}
|
2012-11-04 01:33:54 +00:00
|
|
|
|
|
|
|
|
|
/* UTF-8 compliant version of fopen() */
|
|
|
|
|
FILE* fopen_utf8(const char* filename, const char* mode)
|
|
|
|
|
{
|
|
|
|
|
FILE* ret = NULL;
|
|
|
|
|
wchar_t* wfilename = cdio_utf8_to_wchar(filename);
|
|
|
|
|
wchar_t* wmode = cdio_utf8_to_wchar(mode);
|
|
|
|
|
ret = _wfopen(wfilename, wmode);
|
2014-10-31 18:52:23 +00:00
|
|
|
|
cdio_free(wfilename);
|
|
|
|
|
cdio_free(wmode);
|
2012-11-04 01:33:54 +00:00
|
|
|
|
return ret;
|
|
|
|
|
}
|
2012-03-03 23:12:48 +00:00
|
|
|
|
#endif
|
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
#ifdef HAVE_ICONV
|
|
|
|
|
#include <iconv.h>
|
|
|
|
|
struct cdio_charset_coverter_s
|
|
|
|
|
{
|
|
|
|
|
iconv_t ic;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
cdio_charset_coverter_t *
|
|
|
|
|
cdio_charset_converter_create(const char * src_charset,
|
|
|
|
|
const char * dst_charset)
|
|
|
|
|
{
|
|
|
|
|
cdio_charset_coverter_t * ret;
|
|
|
|
|
ret = calloc(1, sizeof(*ret));
|
|
|
|
|
ret->ic = iconv_open(dst_charset, src_charset);
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
|
static void bgav_hexdump(uint8_t * data, int len, int linebreak)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
int bytes_written = 0;
|
|
|
|
|
int imax;
|
2014-10-31 18:52:23 +00:00
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
while(bytes_written < len)
|
|
|
|
|
{
|
|
|
|
|
imax = (bytes_written + linebreak > len) ? len - bytes_written : linebreak;
|
|
|
|
|
for(i = 0; i < imax; i++)
|
|
|
|
|
fprintf(stderr, "%02x ", data[bytes_written + i]);
|
|
|
|
|
for(i = imax; i < linebreak; i++)
|
|
|
|
|
fprintf(stderr, " ");
|
|
|
|
|
for(i = 0; i < imax; i++)
|
|
|
|
|
{
|
|
|
|
|
if(!(data[bytes_written + i] & 0x80) && (data[bytes_written + i] >= 32))
|
|
|
|
|
fprintf(stderr, "%c", data[bytes_written + i]);
|
|
|
|
|
else
|
|
|
|
|
fprintf(stderr, ".");
|
|
|
|
|
}
|
|
|
|
|
bytes_written += imax;
|
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
void cdio_charset_converter_destroy(cdio_charset_coverter_t*cnv)
|
|
|
|
|
{
|
|
|
|
|
iconv_close(cnv->ic);
|
|
|
|
|
free(cnv);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define BYTES_INCREMENT 16
|
|
|
|
|
|
|
|
|
|
static bool
|
2012-11-04 01:33:54 +00:00
|
|
|
|
do_convert(iconv_t cd, const char * src, int src_len,
|
2012-01-31 01:40:22 +00:00
|
|
|
|
char ** dst, int *dst_len)
|
|
|
|
|
{
|
|
|
|
|
char * ret;
|
|
|
|
|
|
|
|
|
|
char *inbuf;
|
|
|
|
|
char *outbuf;
|
|
|
|
|
int alloc_size;
|
|
|
|
|
int output_pos;
|
|
|
|
|
size_t inbytesleft;
|
|
|
|
|
size_t outbytesleft;
|
|
|
|
|
|
|
|
|
|
if(src_len < 0)
|
|
|
|
|
src_len = strlen(src);
|
|
|
|
|
#if 0
|
|
|
|
|
fprintf(stderr, "Converting:\n");
|
|
|
|
|
bgav_hexdump(src, src_len, 16);
|
2014-10-31 18:52:23 +00:00
|
|
|
|
#endif
|
2012-01-31 01:40:22 +00:00
|
|
|
|
alloc_size = src_len + BYTES_INCREMENT;
|
|
|
|
|
|
|
|
|
|
inbytesleft = src_len;
|
2014-10-31 18:52:23 +00:00
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
/* We reserve space here to add a final '\0' */
|
|
|
|
|
outbytesleft = alloc_size-1;
|
|
|
|
|
|
|
|
|
|
ret = malloc(alloc_size);
|
|
|
|
|
|
2012-11-04 01:33:54 +00:00
|
|
|
|
inbuf = (char *)src;
|
2012-01-31 01:40:22 +00:00
|
|
|
|
outbuf = ret;
|
2014-10-31 18:52:23 +00:00
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
while(1)
|
|
|
|
|
{
|
2014-10-31 18:52:23 +00:00
|
|
|
|
|
2012-01-31 01:40:22 +00:00
|
|
|
|
if(iconv(cd, (ICONV_CONST char **)&inbuf, &inbytesleft,
|
|
|
|
|
&outbuf, &outbytesleft) == (size_t)-1)
|
|
|
|
|
{
|
|
|
|
|
switch(errno)
|
|
|
|
|
{
|
|
|
|
|
case E2BIG:
|
|
|
|
|
output_pos = (int)(outbuf - ret);
|
|
|
|
|
|
|
|
|
|
alloc_size += BYTES_INCREMENT;
|
|
|
|
|
outbytesleft += BYTES_INCREMENT;
|
|
|
|
|
|
|
|
|
|
ret = realloc(ret, alloc_size);
|
|
|
|
|
if (ret == NULL)
|
|
|
|
|
{
|
|
|
|
|
cdio_warn("Can't realloc(%d).", alloc_size);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
outbuf = ret + output_pos;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
cdio_warn("Iconv failed: %s", strerror(errno));
|
|
|
|
|
if (ret != NULL)
|
|
|
|
|
free(ret);
|
|
|
|
|
return false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if(!inbytesleft)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Zero terminate */
|
|
|
|
|
*outbuf = '\0';
|
|
|
|
|
|
|
|
|
|
/* Set return values */
|
|
|
|
|
*dst = ret;
|
|
|
|
|
if(dst_len)
|
|
|
|
|
*dst_len = (int)(outbuf - ret);
|
|
|
|
|
#if 0
|
|
|
|
|
fprintf(stderr, "Conversion done, src:\n");
|
|
|
|
|
bgav_hexdump(src, src_len, 16);
|
|
|
|
|
fprintf(stderr, "dst:\n");
|
|
|
|
|
bgav_hexdump((uint8_t*)(ret), (int)(outbuf - ret), 16);
|
2014-10-31 18:52:23 +00:00
|
|
|
|
#endif
|
2012-01-31 01:40:22 +00:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool cdio_charset_convert(cdio_charset_coverter_t*cnv,
|
|
|
|
|
char * src, int src_len,
|
|
|
|
|
char ** dst, int * dst_len)
|
|
|
|
|
{
|
|
|
|
|
return do_convert(cnv->ic, src, src_len, dst, dst_len);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
|
|
|
|
|
int * dst_len, const char * dst_charset)
|
|
|
|
|
{
|
|
|
|
|
iconv_t ic;
|
|
|
|
|
bool result;
|
|
|
|
|
ic = iconv_open(dst_charset, "UTF-8");
|
|
|
|
|
result = do_convert(ic, src, -1, dst, dst_len);
|
|
|
|
|
iconv_close(ic);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2012-11-04 01:33:54 +00:00
|
|
|
|
bool cdio_charset_to_utf8(const char *src, size_t src_len, cdio_utf8_t **dst,
|
2012-01-31 01:40:22 +00:00
|
|
|
|
const char * src_charset)
|
|
|
|
|
{
|
|
|
|
|
iconv_t ic;
|
|
|
|
|
bool result;
|
|
|
|
|
ic = iconv_open("UTF-8", src_charset);
|
|
|
|
|
result = do_convert(ic, src, src_len, dst, NULL);
|
|
|
|
|
iconv_close(ic);
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
#elif defined(_WIN32)
|
2012-02-16 20:44:16 +00:00
|
|
|
|
|
|
|
|
|
bool cdio_charset_from_utf8(cdio_utf8_t * src, char ** dst,
|
|
|
|
|
int * dst_len, const char * dst_charset)
|
|
|
|
|
{
|
|
|
|
|
wchar_t* le_dst;
|
|
|
|
|
size_t i, len;
|
|
|
|
|
|
|
|
|
|
if (src == NULL || dst == NULL || dst_len == NULL || dst_charset == NULL || strcmp(dst_charset, "UTF-8") != 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
/* Eliminate empty strings */
|
2012-03-03 23:12:48 +00:00
|
|
|
|
le_dst = cdio_utf8_to_wchar(src);
|
2012-02-16 20:44:16 +00:00
|
|
|
|
if ((le_dst == NULL) || (le_dst[0] == 0)) {
|
2014-10-31 18:52:23 +00:00
|
|
|
|
cdio_free(le_dst);
|
2012-02-16 20:44:16 +00:00
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Perform byte reversal */
|
|
|
|
|
len = wcslen(le_dst);
|
|
|
|
|
*dst = (char*)calloc(len+1, sizeof(wchar_t));
|
2017-03-10 18:07:48 +00:00
|
|
|
|
cdio_assert(*dst != NULL);
|
2012-02-16 20:44:16 +00:00
|
|
|
|
for (i=0; i<2*len; i++) {
|
|
|
|
|
(*dst)[i] = ((char*)le_dst)[i+1];
|
|
|
|
|
(*dst)[i+1] = ((char*)le_dst)[i];
|
|
|
|
|
}
|
|
|
|
|
free(le_dst);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
2012-01-31 01:40:22 +00:00
|
|
|
|
|
2012-11-04 01:33:54 +00:00
|
|
|
|
bool cdio_charset_to_utf8(const char *src, size_t src_len, cdio_utf8_t **dst,
|
2013-10-13 23:31:20 +00:00
|
|
|
|
const char *src_charset)
|
2012-02-16 20:44:16 +00:00
|
|
|
|
{
|
|
|
|
|
wchar_t* le_src;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
if (src == NULL || dst == NULL || src_charset == NULL || strcmp(src_charset, "UCS-2BE") != 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
2012-02-21 00:08:31 +00:00
|
|
|
|
/* Compute UCS-2 src length */
|
2012-02-16 20:44:16 +00:00
|
|
|
|
if (src_len == (size_t)-1) {
|
|
|
|
|
for (src_len = 0; ((uint16_t*)src)[src_len] !=0; src_len++);
|
2012-02-21 00:08:31 +00:00
|
|
|
|
} else {
|
|
|
|
|
src_len >>=1;
|
2012-02-16 20:44:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Eliminate empty strings */
|
2012-02-21 00:08:31 +00:00
|
|
|
|
if ((src_len < 1) || ((src[0] == 0) && (src[1] == 0))) {
|
2012-02-16 20:44:16 +00:00
|
|
|
|
*dst = NULL;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Perform byte reversal */
|
2012-02-21 00:08:31 +00:00
|
|
|
|
le_src = (wchar_t*)malloc(2*src_len+2);
|
2017-03-10 18:07:48 +00:00
|
|
|
|
cdio_assert(le_src != NULL);
|
2012-02-21 00:08:31 +00:00
|
|
|
|
for (i=0; i<src_len; i++) {
|
|
|
|
|
((char*)le_src)[2*i] = src[2*i+1];
|
|
|
|
|
((char*)le_src)[2*i+1] = src[2*i];
|
2012-02-16 20:44:16 +00:00
|
|
|
|
}
|
2012-02-21 00:08:31 +00:00
|
|
|
|
le_src[src_len] = 0;
|
2012-03-03 23:12:48 +00:00
|
|
|
|
*dst = cdio_wchar_to_utf8(le_src);
|
2012-02-16 20:44:16 +00:00
|
|
|
|
free(le_src);
|
|
|
|
|
|
|
|
|
|
return (*dst != NULL);
|
2012-01-31 01:40:22 +00:00
|
|
|
|
}
|
|
|
|
|
#endif /* HAVE_ICONV */
|