xref: /drstd/dlibc/src/unix/header/wchar/utf8.rs (revision a1cd34728e2d4a5d4cf41974e4db28602cbb1b1c)
1 //UTF implementation parts for wchar.h.
2 //Partially ported from the Sortix libc
3 
4 use core::{char, slice, str, usize};
5 
6 use super::mbstate_t;
7 use crate::unix::header::errno;
8 use crate::unix::platform;
9 
10 // Based on
11 // https://github.com/rust-lang/rust/blob/f24ce9b/library/core/src/str/validations.rs#L232-L257,
12 // because apparently somebody removed the `pub use` statement from `core::str`.
13 
14 // https://tools.ietf.org/html/rfc3629
15 static UTF8_CHAR_WIDTH: [u8; 256] = [
16     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
17     1, // 0x1F
18     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
19     1, // 0x3F
20     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
21     1, // 0x5F
22     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
23     1, // 0x7F
24     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25     0, // 0x9F
26     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27     0, // 0xBF
28     0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
29     2, // 0xDF
30     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
31     4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
32 ];
33 
34 // Given a first byte, determines how many bytes are in this UTF-8 character.
35 #[inline]
utf8_char_width(b: u8) -> usize36 fn utf8_char_width(b: u8) -> usize {
37     UTF8_CHAR_WIDTH[usize::from(b)].into()
38 }
39 
40 //It's guaranteed that we don't have any nullpointers here
mbrtowc( pwc: *mut ::wchar_t, s: *const ::c_char, n: usize, _ps: *mut mbstate_t, ) -> usize41 pub unsafe fn mbrtowc(
42     pwc: *mut ::wchar_t,
43     s: *const ::c_char,
44     n: usize,
45     _ps: *mut mbstate_t,
46 ) -> usize {
47     let size = utf8_char_width(*s as u8);
48     if size > n {
49         platform::errno = errno::EILSEQ;
50         return -2isize as usize;
51     }
52     if size == 0 {
53         platform::errno = errno::EILSEQ;
54         return -1isize as usize;
55     }
56 
57     let slice = slice::from_raw_parts(s as *const u8, size);
58     let decoded = str::from_utf8(slice);
59     if decoded.is_err() {
60         platform::errno = errno::EILSEQ;
61         return -1isize as usize;
62     }
63 
64     let wc = decoded.unwrap();
65 
66     let result: ::wchar_t = wc.chars().next().unwrap() as ::wchar_t;
67 
68     if !pwc.is_null() {
69         *pwc = result;
70     }
71 
72     if result != 0 {
73         size
74     } else {
75         0
76     }
77 }
78 
79 //It's guaranteed that we don't have any nullpointers here
wcrtomb(s: *mut ::c_char, wc: ::wchar_t, _ps: *mut mbstate_t) -> usize80 pub unsafe fn wcrtomb(s: *mut ::c_char, wc: ::wchar_t, _ps: *mut mbstate_t) -> usize {
81     let dc = char::from_u32(wc as u32);
82 
83     if dc.is_none() {
84         ::errno = errno::EILSEQ;
85         return -1isize as usize;
86     }
87 
88     let c = dc.unwrap();
89     let size = c.len_utf8();
90     let slice = slice::from_raw_parts_mut(s as *mut u8, size);
91 
92     c.encode_utf8(slice);
93 
94     size
95 }
96