Here is a short program that converts UTF-16 to a wide array of characters and then prints it.
#include <endian.h> #include <errno.h> #include <iconv.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <wchar.h> #define FROMCODE "UTF-16" #if (BYTE_ORDER == LITTLE_ENDIAN) #define TOCODE "UTF-32LE" #elif (BYTE_ORDER == BIG_ENDIAN) #define TOCODE "UTF-32BE" #else #error Unsupported byte order #endif int main(void) { void *tmp; char *outbuf; const char *inbuf; long converted = 0; wchar_t *out = NULL; int status = EXIT_SUCCESS, n; size_t inbytesleft, outbytesleft, size; const char in[] = { 0xff, 0xfe, 'H', 0x0, 'e', 0x0, 'l', 0x0, 'l', 0x0, 'o', 0x0, ',', 0x0, ' ', 0x0, 'W', 0x0, 'o', 0x0, 'r', 0x0, 'l', 0x0, 'd', 0x0, '!', 0x0 }; iconv_t cd = iconv_open(TOCODE, FROMCODE); if ((iconv_t)-1 == cd) { if (EINVAL == errno) { fprintf(stderr, "iconv: cannot convert from %s to %s\n", FROMCODE, TOCODE); } else { fprintf(stderr, "iconv: %s\n", strerror(errno)); } goto error; } size = sizeof(in) * sizeof(wchar_t); inbuf = in; inbytesleft = sizeof(in); while (1) { tmp = realloc(out, size + sizeof(wchar_t)); if (!tmp) { fprintf(stderr, "realloc: %s\n", strerror(errno)); goto error; } out = tmp; outbuf = (char *)out + converted; outbytesleft = size - converted; n = iconv(cd, (char **)&inbuf, &inbytesleft, &outbuf, &outbytesleft); if (-1 == n) { if (EINVAL == errno) { /* junk at the end of the buffer, ignore it */ break; } else if (E2BIG != errno) { /* unrecoverable error */ fprintf(stderr, "iconv: %s\n", strerror(errno)); goto error; } /* increase the size of the output buffer */ converted = size - outbytesleft; size <<= 1; } else { /* done */ break; } } converted = (size - outbytesleft) / sizeof(wchar_t); out[converted] = L'\0'; fprintf(stdout, "%ls\n", out); /* flush the iconv buffer */ iconv(cd, NULL, NULL, &outbuf, &outbytesleft); exit: if (out) { free(out); } if (cd) { iconv_close(cd); } exit(status); error: status = EXIT_FAILURE; goto exit; }
Since UTF-16 is a variable-length encoding, you guess how large your output buffer should be. The correct program should handle the case when the output buffer is not large enough to store the converted data.
You should also notice that iconv
not NULL
terminates your output buffer for you.
Iconv is a thread-oriented processor, so you need to reset iconv_t
if you want to reuse it for another conversion (the code example does this near the end). If you want to process the stream, you must handle the EINVAL
error by copying all the bytes remaining in the input buffer to the beginning of the new input buffer before calling iconv
again.