Display wide characters with printf

I am trying to understand how printf works with wide characters ( wchar_t).

I made the following code examples:

Example 1:

#include <stdio.h>
#include <stdlib.h>

int     main(void)
{
    wchar_t     *s;

    s = (wchar_t *)malloc(sizeof(wchar_t) * 2);
    s[0] = 42;
    s[1] = 0;
    printf("%ls\n", s);
    free(s);
    return (0);
}

conclusion:

*

Everything is fine here: my character ( *) is displayed correctly.

Example 2:

I wanted to show a different type of character. On my system wchar_tit seems encoded at 4 bytes. So I tried to display the following character: É

#include <stdio.h>
#include <stdlib.h>

int     main(void)
{
    wchar_t     *s;

    s = (wchar_t *)malloc(sizeof(wchar_t) * 2);
    s[0] = 0xC389;
    s[1] = 0;
    printf("%ls\n", s);
    free(s);
    return (0);
}

But this time there is no way out, I tried to use a lot of values ​​from the "coding" section (see the previous link) for s[0](0xC389, 201, 0xC9) ... But I never get the Écharacter displayed. I also tried using %Sinstead %ls.

printf : printf("<%ls>\n", s) '<', .

? ?

+4
2

?

, errno printf!

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>

int main(void)
{
    wchar_t *s;
    s = (wchar_t *) malloc(sizeof(wchar_t) * 2);
    s[0] = 0xC389;
    s[1] = 0;

    if (printf("%ls\n", s) < 0) {
        perror("printf");
    }

    free(s);
    return (0);
}

:

$ gcc test.c && ./a.out
printf: Invalid or incomplete multibyte or wide character

, C - C ( POSIX), ASCII. setlocale, setlocale(LC_ALL,"").

LC_ALL, LC_CTYPE LANG , UTF-8 , . setlocale(LC_ALL, "C.UTF-8") - C , UTF-8 C .

#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#include <wchar.h>

int main(void)
{
    wchar_t *s;
    s = (wchar_t *) malloc(sizeof(wchar_t) * 2);
    s[0] = 0xC389;
    s[1] = 0;

    setlocale(LC_ALL, "");

    if (printf("%ls\n", s) < 0) {
        perror("printf");
    }

    free(s);
    return (0);
}

:

$ gcc test.c && ./a.out

, , , wchar_t (, UTF-32), (, UTF-8). , wchar_t 32 GNU C, C . UTF-32BE (.. 0x000000C9), :

#include <stdio.h>
#include <stdlib.h>
#include <locale.h>
#include <wchar.h>

int main(void)
{
    wchar_t *s;
    s = (wchar_t *) malloc(sizeof(wchar_t) * 2);
    s[0] = 0xC9;
    s[1] = 0;

    setlocale(LC_ALL, "");

    if (printf("%ls\n", s) < 0) {
        perror("printf");
    }

    free(s);
    return (0);
}

:

$ gcc test.c && ./a.out
É

, LC (locale) :

$ LC_ALL=C.UTF-8
$ ./a.out
É
+4

, UTF-8, , . UTF-8 char.

, , UTF-8 , endianness ( -order) ( 0xC389 0x89 0xC3, ). - ( sizeof(wchar_t) == 4 s[0] , 0xFFFFC389).

- , . , UTF-8 , ?

+3

Source: https://habr.com/ru/post/1660693/


All Articles