1#![allow(unsafe_code)]
2
3use crate::printable::is_printable;
4use crate::slice::advance;
5use crate::slice::subslice;
6
7pub struct CharEncodeUtf8 {
8 buf: [u8; 4],
9 len: u8,
10}
11
12impl CharEncodeUtf8 {
13 pub const fn new(ch: char) -> Self {
15 const TAG_CONT: u8 = 0b1000_0000;
17 const TAG_TWO_B: u8 = 0b1100_0000;
18 const TAG_THREE_B: u8 = 0b1110_0000;
19 const TAG_FOUR_B: u8 = 0b1111_0000;
20
21 let mut buf = [0; 4];
22 let len = ch.len_utf8();
23 let code = ch as u32;
24
25 match len {
26 1 => {
27 buf[0] = code as u8;
28 }
29 2 => {
30 buf[0] = ((code >> 6) & 0x1F) as u8 | TAG_TWO_B;
31 buf[1] = (code & 0x3F) as u8 | TAG_CONT;
32 }
33 3 => {
34 buf[0] = ((code >> 12) & 0x0F) as u8 | TAG_THREE_B;
35 buf[1] = ((code >> 6) & 0x3F) as u8 | TAG_CONT;
36 buf[2] = (code & 0x3F) as u8 | TAG_CONT;
37 }
38 4 => {
39 buf[0] = ((code >> 18) & 0x07) as u8 | TAG_FOUR_B;
40 buf[1] = ((code >> 12) & 0x3F) as u8 | TAG_CONT;
41 buf[2] = ((code >> 6) & 0x3F) as u8 | TAG_CONT;
42 buf[3] = (code & 0x3F) as u8 | TAG_CONT;
43 }
44 _ => {}
45 };
46
47 CharEncodeUtf8 {
48 buf,
49 len: len as u8,
50 }
51 }
52
53 pub const fn as_bytes(&self) -> &[u8] {
54 subslice(&self.buf, 0..self.len as usize)
55 }
56
57 pub const fn as_str(&self) -> &str {
59 unsafe { core::str::from_utf8_unchecked(self.as_bytes()) }
60 }
61}
62
63pub struct CharEscapeUnicode {
64 buf: [u8; 10],
65 len: u8,
66}
67
68impl CharEscapeUnicode {
69 const unsafe fn from_code_point(code: u32) -> Self {
70 let mut hex_buf = [0; 10];
71 let mut hex_pos = 0;
72
73 let mut x = code;
74 loop {
75 hex_buf[hex_pos] = crate::ascii::num_to_hex_digit((x as u8) & 0x0f);
76 hex_pos += 1;
77 x >>= 4;
78 if x == 0 {
79 break;
80 }
81 }
82
83 let mut buf = [b'\\', b'u', b'{', 0, 0, 0, 0, 0, 0, 0];
84 let mut pos = 3;
85
86 while hex_pos > 0 {
87 hex_pos -= 1;
88 buf[pos] = hex_buf[hex_pos];
89 pos += 1;
90 }
91
92 buf[pos] = b'}';
93 pos += 1;
94
95 Self {
96 buf,
97 len: pos as u8,
98 }
99 }
100
101 pub const fn new(ch: char) -> Self {
102 unsafe { Self::from_code_point(ch as u32) }
103 }
104
105 #[cfg(test)]
106 pub fn as_str(&self) -> &str {
107 unsafe { core::str::from_utf8_unchecked(&self.buf[..self.len as usize]) }
108 }
109}
110
111pub struct CharEscapeDebug {
112 buf: [u8; 10],
113 len: u8,
114}
115
116pub struct CharEscapeDebugArgs {
117 pub escape_single_quote: bool,
118 pub escape_double_quote: bool,
119}
120
121impl CharEscapeDebugArgs {
122 #[cfg(test)]
123 pub const ESCAPE_ALL: Self = Self {
124 escape_single_quote: true,
125 escape_double_quote: true,
126 };
127}
128
129impl CharEscapeDebug {
130 pub const fn new(ch: char, args: CharEscapeDebugArgs) -> Self {
131 match ch {
132 '\0' => Self::backslash_ascii(b'0'),
133 '\t' => Self::backslash_ascii(b't'),
134 '\r' => Self::backslash_ascii(b'r'),
135 '\n' => Self::backslash_ascii(b'n'),
136 '\\' => Self::backslash_ascii(b'\\'),
137 '"' if args.escape_double_quote => Self::backslash_ascii(b'"'),
138 '\'' if args.escape_single_quote => Self::backslash_ascii(b'\''),
139 _ if is_printable(ch) => Self::printable(ch),
140 _ => Self::unicode(ch),
141 }
142 }
143
144 const fn printable(ch: char) -> Self {
145 let e = CharEncodeUtf8::new(ch);
146 Self {
147 buf: [e.buf[0], e.buf[1], e.buf[2], e.buf[3], 0, 0, 0, 0, 0, 0],
148 len: e.len,
149 }
150 }
151
152 const fn backslash_ascii(ch: u8) -> Self {
153 Self {
154 buf: [b'\\', ch, 0, 0, 0, 0, 0, 0, 0, 0],
155 len: 2,
156 }
157 }
158
159 const fn unicode(ch: char) -> Self {
160 let e = CharEscapeUnicode::new(ch);
161 Self {
162 buf: e.buf,
163 len: e.len,
164 }
165 }
166
167 pub const fn as_bytes(&self) -> &[u8] {
168 subslice(&self.buf, 0..self.len as usize)
169 }
170
171 #[cfg(test)]
172 pub fn as_str(&self) -> &str {
173 unsafe { core::str::from_utf8_unchecked(&self.buf[..self.len as usize]) }
174 }
175
176 }
181
182pub const fn next_char(bytes: &[u8]) -> Option<(char, usize)> {
183 #[allow(clippy::many_single_char_names)]
185 const fn next_code_point(bytes: &[u8]) -> Option<(u32, usize)> {
186 const CONT_MASK: u8 = 0b0011_1111;
187
188 const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
189 (byte & (0x7F >> width)) as u32
190 }
191
192 const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
193 (ch << 6) | (byte & CONT_MASK) as u32
194 }
195
196 #[allow(clippy::manual_unwrap_or_default)] const fn unwrap_or_0(opt: Option<u8>) -> u8 {
198 match opt {
199 Some(byte) => byte,
200 None => 0,
201 }
202 }
203
204 let mut i = 0;
205
206 macro_rules! next {
207 () => {{
208 if i < bytes.len() {
209 let x = Some(bytes[i]);
210 i += 1;
211 x
212 } else {
213 None
214 }
215 }};
216 }
217
218 let x = match next!() {
219 Some(x) => x,
220 None => return None,
221 };
222 if x < 128 {
223 return Some((x as u32, i));
224 }
225
226 let init = utf8_first_byte(x, 2);
227 let y = unwrap_or_0(next!());
228 let mut ch = utf8_acc_cont_byte(init, y);
229 if x >= 0xE0 {
230 let z = unwrap_or_0(next!());
231 let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
232 ch = (init << 12) | y_z;
233 if x >= 0xF0 {
234 let w = unwrap_or_0(next!());
235 ch = ((init & 7) << 18) | utf8_acc_cont_byte(y_z, w);
236 }
237 }
238
239 Some((ch, i))
240 }
241
242 match next_code_point(bytes) {
243 Some((ch, count)) => Some((unsafe { crate::str::char_from_u32(ch) }, count)),
244 None => None,
245 }
246}
247
248pub const fn str_count_chars(s: &str) -> usize {
249 let mut s = s.as_bytes();
250 let mut ans = 0;
251 while let Some((_, count)) = next_char(s) {
252 s = advance(s, count);
253 ans += 1;
254 }
255 ans
256}
257
258pub const fn str_chars<const N: usize>(s: &str) -> [char; N] {
259 let mut s = s.as_bytes();
260 let mut buf: [char; N] = ['\0'; N];
261 let mut pos = 0;
262 while let Some((ch, count)) = next_char(s) {
263 s = advance(s, count);
264 buf[pos] = ch;
265 pos += 1;
266 }
267 assert!(pos == N);
268 buf
269}
270
271#[cfg(test)]
272mod tests {
273 use super::*;
274
275 #[test]
276 fn test_char_encode_utf8() {
277 macro_rules! test_char_encode_utf8 {
278 ($ch: expr) => {{
279 let e = CharEncodeUtf8::new($ch);
280 let output = e.as_str();
281 let mut ans = [0; 4];
282 let ans = $ch.encode_utf8(&mut ans);
283 assert_eq!(output, ans);
284 }};
285 }
286
287 test_char_encode_utf8!('\0');
288 test_char_encode_utf8!('我');
289 test_char_encode_utf8!('\u{10ffff}');
290 }
291
292 #[test]
293 fn test_char_escape_unicode() {
294 macro_rules! test_char_escape_unicode {
295 ($ch: expr) => {{
296 let e = CharEscapeUnicode::new($ch);
297 let output = e.as_str();
298 let ans = $ch.escape_unicode().to_string();
299 assert_eq!(output, ans);
300 }};
301 }
302
303 test_char_escape_unicode!('\0');
304 test_char_escape_unicode!('我');
305 test_char_escape_unicode!('\u{10ffff}');
306 }
307
308 #[test]
309 fn test_char_escape_debug() {
310 macro_rules! test_char_escape_debug {
311 ($ch: expr) => {{
312 let e = CharEscapeDebug::new($ch, CharEscapeDebugArgs::ESCAPE_ALL);
313 let output = e.as_str();
314 let ans = $ch.escape_debug().to_string();
315 assert_eq!(output, ans);
316 }};
317 }
318
319 for ch in '\0'..='\u{7f}' {
320 test_char_escape_debug!(ch);
321 }
322
323 test_char_escape_debug!('\u{10ffff}');
325 }
326
327 #[test]
328 fn test_str_chars() {
329 const X: &str = "唐可可";
330 const OUTPUT_LEN: usize = str_count_chars(X);
331 const OUTPUT_BUF: [char; OUTPUT_LEN] = str_chars::<OUTPUT_LEN>(X);
332 let ans = X.chars().collect::<Vec<_>>();
333 assert_eq!(OUTPUT_BUF, ans.as_slice());
334 }
335}