1use crate::slice::advance;
2use crate::slice::subslice;
3use crate::utf8::CharEncodeUtf8;
4
5use core::str;
6
7struct SplitImpl<'input, 'pat> {
8 input: &'input str,
9 pattern: &'pat str,
10 inclusive: bool,
11}
12
13impl<'input> SplitImpl<'input, '_> {
14 const fn output_len(&self) -> usize {
15 let mut input = self.input;
16 let pat = self.pattern;
17
18 if pat.is_empty() {
19 crate::utf8::str_count_chars(input) + 2
20 } else {
21 let mut ans = 0;
22 while let Some((_, remain)) = crate::str::next_match(input, pat) {
23 ans += 1;
24 input = remain
25 }
26 if self.inclusive {
27 if !input.is_empty() {
28 ans += 1;
29 }
30 } else {
31 ans += 1;
32 }
33 ans
34 }
35 }
36
37 #[allow(unsafe_code)]
38 const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
39 let mut input = self.input;
40 let pat = self.pattern;
41
42 let mut buf: [&str; N] = [""; N];
43 let mut pos = 0;
44
45 if pat.is_empty() {
46 let mut input = input.as_bytes();
47
48 {
49 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
50 pos += 1;
51 }
52
53 while let Some((_, count)) = crate::utf8::next_char(input) {
54 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..count)) };
55 pos += 1;
56 input = advance(input, count);
57 }
58
59 {
60 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
61 pos += 1;
62 }
63 } else {
64 while let Some((m, remain)) = crate::str::next_match(input, pat) {
65 let substr = if self.inclusive {
66 subslice(input.as_bytes(), 0..m + pat.len())
67 } else {
68 subslice(input.as_bytes(), 0..m)
69 };
70 buf[pos] = unsafe { str::from_utf8_unchecked(substr) };
71 pos += 1;
72 input = remain;
73 }
74 if self.inclusive {
75 if !input.is_empty() {
76 buf[pos] = input;
77 pos += 1;
78 }
79 } else {
80 buf[pos] = input;
81 pos += 1;
82 }
83 }
84 assert!(pos == N);
85 buf
86 }
87}
88
89pub struct Split<T, P>(pub T, pub P);
90
91impl<'input, 'pat> Split<&'input str, &'pat str> {
92 const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
93 SplitImpl {
94 input: self.0,
95 pattern: self.1,
96 inclusive: false,
97 }
98 }
99
100 pub const fn output_len(&self) -> usize {
101 self.to_impl().output_len()
102 }
103
104 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
105 self.to_impl().const_eval()
106 }
107}
108
109impl<'input> Split<&'input str, char> {
110 const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
111 SplitImpl {
112 input: self.0,
113 pattern: ch.as_str(),
114 inclusive: false,
115 }
116 }
117
118 pub const fn output_len(&self) -> usize {
119 let ch = CharEncodeUtf8::new(self.1);
120 self.to_impl(&ch).output_len()
121 }
122
123 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
124 let ch = CharEncodeUtf8::new(self.1);
125 self.to_impl(&ch).const_eval()
126 }
127}
128
129impl<'input> Split<&'input str, &[char]> {
130 const fn char_in_slice(&self, ch: char) -> bool {
131 let chars = self.1;
132 let mut i = 0;
133 while i < chars.len() {
134 if chars[i] == ch {
135 return true;
136 }
137 i += 1;
138 }
139 false
140 }
141
142 pub const fn output_len(&self) -> usize {
143 let mut input = self.0.as_bytes();
144 let mut ans = 0;
145
146 if self.1.is_empty() {
147 return 1; }
149
150 while let Some((ch, count)) = crate::utf8::next_char(input) {
151 if self.char_in_slice(ch) {
152 ans += 1;
153 }
154 input = advance(input, count);
155 }
156 ans + 1 }
158
159 #[allow(unsafe_code)]
160 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
161 let mut input = self.0.as_bytes();
162 let input_str = self.0;
163 let mut buf: [&str; N] = [""; N];
164 let mut pos = 0;
165 let mut start_byte_pos = 0;
166 let mut current_byte_pos = 0;
167
168 if self.1.is_empty() {
169 buf[0] = input_str;
171 assert!(1 == N);
172 return buf;
173 }
174
175 while let Some((ch, count)) = crate::utf8::next_char(input) {
176 if self.char_in_slice(ch) {
177 let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..current_byte_pos);
179 buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
180 pos += 1;
181
182 start_byte_pos = current_byte_pos + count;
184 }
185 current_byte_pos += count;
186 input = advance(input, count);
187 }
188
189 let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..input_str.len());
191 buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
192 pos += 1;
193
194 assert!(pos == N);
195 buf
196 }
197}
198
199#[macro_export]
233macro_rules! split {
234 ($s: expr, $pat: expr) => {{
235 const INPUT: &str = $s;
236 const OUTPUT_LEN: usize = $crate::__ctfe::Split(INPUT, $pat).output_len();
237 const OUTPUT_BUF: [&str; OUTPUT_LEN] = $crate::__ctfe::Split(INPUT, $pat).const_eval();
238 OUTPUT_BUF
239 }};
240}
241
242pub struct SplitInclusive<T, P>(pub T, pub P);
243
244impl<'input, 'pat> SplitInclusive<&'input str, &'pat str> {
245 const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
246 SplitImpl {
247 input: self.0,
248 pattern: self.1,
249 inclusive: true,
250 }
251 }
252
253 pub const fn output_len(&self) -> usize {
254 self.to_impl().output_len()
255 }
256
257 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
258 self.to_impl().const_eval()
259 }
260}
261
262impl<'input> SplitInclusive<&'input str, char> {
263 const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
264 SplitImpl {
265 input: self.0,
266 pattern: ch.as_str(),
267 inclusive: true,
268 }
269 }
270
271 pub const fn output_len(&self) -> usize {
272 let ch = CharEncodeUtf8::new(self.1);
273 self.to_impl(&ch).output_len()
274 }
275
276 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
277 let ch = CharEncodeUtf8::new(self.1);
278 self.to_impl(&ch).const_eval()
279 }
280}
281
282impl<'input> SplitInclusive<&'input str, &[char]> {
283 const fn char_in_slice(&self, ch: char) -> bool {
284 let chars = self.1;
285 let mut i = 0;
286 while i < chars.len() {
287 if chars[i] == ch {
288 return true;
289 }
290 i += 1;
291 }
292 false
293 }
294
295 pub const fn output_len(&self) -> usize {
296 if self.0.is_empty() {
297 return 0; }
299
300 let mut input = self.0.as_bytes();
301 let mut ans = 0;
302
303 if self.1.is_empty() {
304 return 1; }
306
307 let mut found_any_split = false;
308 while let Some((ch, count)) = crate::utf8::next_char(input) {
309 if self.char_in_slice(ch) {
310 ans += 1;
311 found_any_split = true;
312 }
313 input = advance(input, count);
314 }
315
316 if !found_any_split {
317 return 1; }
319
320 let mut input_check = self.0.as_bytes();
322 let mut last_was_split = false;
323
324 while let Some((ch, count)) = crate::utf8::next_char(input_check) {
325 let remaining = advance(input_check, count);
326 if remaining.is_empty() {
327 last_was_split = self.char_in_slice(ch);
329 break;
330 }
331 input_check = remaining;
332 }
333
334 if !last_was_split {
335 ans += 1;
336 }
337
338 ans
339 }
340
341 #[allow(unsafe_code)]
342 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
343 if self.0.is_empty() {
344 let buf: [&str; N] = [""; N];
346 assert!(N == 0);
347 return buf;
348 }
349
350 let mut input = self.0.as_bytes();
351 let input_str = self.0;
352 let mut buf: [&str; N] = [""; N];
353 let mut pos = 0;
354 let mut start_byte_pos = 0;
355 let mut current_byte_pos = 0;
356
357 if self.1.is_empty() {
358 buf[0] = input_str;
360 assert!(1 == N);
361 return buf;
362 }
363
364 while let Some((ch, count)) = crate::utf8::next_char(input) {
365 current_byte_pos += count;
366 if self.char_in_slice(ch) {
367 let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..current_byte_pos);
369 buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
370 pos += 1;
371
372 start_byte_pos = current_byte_pos;
374 }
375 input = advance(input, count);
376 }
377
378 if start_byte_pos < input_str.len() {
380 let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..input_str.len());
381 buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
382 pos += 1;
383 }
384
385 assert!(pos == N);
386 buf
387 }
388}
389
390#[macro_export]
430macro_rules! split_inclusive {
431 ($s: expr, $pat: expr) => {{
432 const INPUT: &str = $s;
433 const OUTPUT_LEN: usize = $crate::__ctfe::SplitInclusive(INPUT, $pat).output_len();
434 const OUTPUT_BUF: [&str; OUTPUT_LEN] =
435 $crate::__ctfe::SplitInclusive(INPUT, $pat).const_eval();
436 OUTPUT_BUF
437 }};
438}
439
440pub struct SplitAsciiWhitespace<T>(pub T);
441
442impl SplitAsciiWhitespace<&'_ str> {
443 pub const fn output_len(&self) -> usize {
444 let bytes = self.0.as_bytes();
445 let mut count = 0;
446 let mut i = 0;
447 let mut in_word = false;
448
449 while i < bytes.len() {
450 if bytes[i].is_ascii_whitespace() {
451 if in_word {
452 count += 1;
453 in_word = false;
454 }
455 } else {
456 in_word = true;
457 }
458 i += 1;
459 }
460
461 if in_word {
462 count += 1;
463 }
464
465 count
466 }
467
468 #[allow(unsafe_code)]
469 pub const fn const_eval<const N: usize>(&self) -> [&'_ str; N] {
470 let bytes = self.0.as_bytes();
471 let mut buf: [&str; N] = [""; N];
472 let mut pos = 0;
473 let mut i = 0;
474
475 while i < bytes.len() {
476 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
478 i += 1;
479 }
480
481 if i >= bytes.len() {
482 break;
483 }
484
485 let start = i;
487
488 while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
490 i += 1;
491 }
492
493 let word_bytes = subslice(bytes, start..i);
495 buf[pos] = unsafe { core::str::from_utf8_unchecked(word_bytes) };
496 pos += 1;
497 }
498
499 assert!(pos == N);
500 buf
501 }
502}
503
504pub const fn map_lines<const N: usize>(mut lines: [&str; N]) -> [&str; N] {
505 let mut i = 0;
506 while i < N {
507 let s = lines[i];
508 match crate::str::strip_suffix(s, "\r\n") {
509 Some(s) => lines[i] = s,
510 None => match crate::str::strip_suffix(s, "\n") {
511 Some(s) => lines[i] = s,
512 None => lines[i] = s,
513 },
514 }
515 i += 1;
516 }
517 lines
518}
519
520#[macro_export]
554macro_rules! split_lines {
555 ($s: expr) => {{
556 $crate::__ctfe::map_lines($crate::split_inclusive!($s, "\n"))
557 }};
558}
559
560#[macro_export]
589macro_rules! split_ascii_whitespace {
590 ($s: expr) => {{
591 const INPUT: &str = $s;
592 const OUTPUT_LEN: usize = $crate::__ctfe::SplitAsciiWhitespace(INPUT).output_len();
593 const OUTPUT_BUF: [&str; OUTPUT_LEN] =
594 $crate::__ctfe::SplitAsciiWhitespace(INPUT).const_eval();
595 OUTPUT_BUF
596 }};
597}
598
599#[cfg(test)]
600mod tests {
601 use super::*;
602
603 #[test]
604 fn test_split() {
605 macro_rules! testcase {
606 ($input: expr, $pat: expr) => {{
607 const OUTPUT: &[&str] = &$crate::split!($input, $pat);
608
609 let ans = $input.split($pat).collect::<Vec<_>>();
610 assert_eq!(OUTPUT.len(), ans.len());
611 assert_eq!(OUTPUT, &*ans, "ans = {:?}", ans);
612 }};
613 }
614
615 testcase!("", "");
616 testcase!("a中1😂1!", "");
617 testcase!("a中1😂1!", "a");
618 testcase!("a中1😂1!", "中");
619 testcase!("a中1😂1!", "1");
620 testcase!("a中1😂1!", "😂");
621 testcase!("a中1😂1!", "!");
622 testcase!("11111", "1");
623 testcase!("222", "22");
624 testcase!("啊哈哈哈", "哈哈");
625 testcase!("some string:another string", ":");
626
627 testcase!("11111", '1');
628 testcase!("a中1😂1!", 'a');
629 testcase!("a中1😂1!", '中');
630 testcase!("a中1😂1!", '1');
631 testcase!("a中1😂1!", '😂');
632 testcase!("a中1😂1!", '!');
633 }
634
635 #[test]
636 fn test_split_char_slice() {
637 macro_rules! testcase_char_slice {
638 ($input: expr, $chars: expr) => {{
639 const CHARS: &[char] = $chars;
640 const OUTPUT: &[&str] = &$crate::split!($input, CHARS);
641
642 let ans = $input.split(CHARS).collect::<Vec<_>>();
643 assert_eq!(
644 OUTPUT.len(),
645 ans.len(),
646 "Length mismatch for input: {:?}, chars: {:?}",
647 $input,
648 CHARS
649 );
650 assert_eq!(
651 OUTPUT, &*ans,
652 "Content mismatch for input: {:?}, chars: {:?}, expected: {:?}",
653 $input, CHARS, ans
654 );
655 }};
656 }
657
658 testcase_char_slice!("a,b,c", &[',']);
660 testcase_char_slice!("hello", &[]);
661 testcase_char_slice!("", &[]);
662 testcase_char_slice!("", &[',']);
663
664 testcase_char_slice!("hello,world;test", &[',', ';']);
666 testcase_char_slice!("hello", &['x', 'y', 'z']);
667 testcase_char_slice!("a,b,,c,", &[',']);
668 testcase_char_slice!(";;;", &[';']);
669
670 testcase_char_slice!("a中1😂1!", &['中', '😂']);
672 testcase_char_slice!("hello世界test", &['世', '界']);
673
674 testcase_char_slice!("one:two;three,four", &[':', ';', ',']);
676 }
677
678 #[test]
679 fn test_split_inclusive_char_slice() {
680 macro_rules! testcase_inclusive_char_slice {
681 ($input: expr, $chars: expr) => {{
682 const CHARS: &[char] = $chars;
683 const OUTPUT: &[&str] = &$crate::split_inclusive!($input, CHARS);
684
685 let ans = $input.split_inclusive(CHARS).collect::<Vec<_>>();
686 assert_eq!(
687 OUTPUT.len(),
688 ans.len(),
689 "Length mismatch for input: {:?}, chars: {:?}",
690 $input,
691 CHARS
692 );
693 assert_eq!(
694 OUTPUT, &*ans,
695 "Content mismatch for input: {:?}, chars: {:?}, expected: {:?}",
696 $input, CHARS, ans
697 );
698 }};
699 }
700
701 testcase_inclusive_char_slice!("a,b,c", &[',']);
703 testcase_inclusive_char_slice!("hello", &[]);
704 testcase_inclusive_char_slice!("", &[]);
705
706 testcase_inclusive_char_slice!("hello,world;test", &[',', ';']);
708 testcase_inclusive_char_slice!("hello", &['x', 'y', 'z']);
709 testcase_inclusive_char_slice!("a,b,,c,", &[',']);
710 testcase_inclusive_char_slice!(";;;", &[';']);
711
712 testcase_inclusive_char_slice!("a中1😂1!", &['中', '😂']);
714 testcase_inclusive_char_slice!("hello世界test", &['世', '界']);
715
716 testcase_inclusive_char_slice!("one:two;three,four", &[':', ';', ',']);
718 }
719
720 #[test]
721 fn test_split_ascii_whitespace() {
722 macro_rules! testcase {
723 ($input: expr) => {{
724 const OUTPUT: &[&str] = &$crate::split_ascii_whitespace!($input);
725
726 let ans = $input.split_ascii_whitespace().collect::<Vec<_>>();
727 assert_eq!(
728 OUTPUT.len(),
729 ans.len(),
730 "Length mismatch for input: {:?}",
731 $input
732 );
733 assert_eq!(
734 OUTPUT, &*ans,
735 "Content mismatch for input: {:?}, expected: {:?}",
736 $input, ans
737 );
738 }};
739 }
740
741 testcase!("");
743 testcase!(" ");
744 testcase!(" ");
745 testcase!("hello");
746 testcase!(" hello ");
747 testcase!(" hello ");
748 testcase!("hello world");
749 testcase!(" hello world ");
750 testcase!(" hello world ");
751
752 testcase!("a\tb\nc\rd\x0Cf");
754 testcase!(" \t\n\r\x0C ");
755 testcase!("word1\t\t\tword2\n\n\nword3");
756
757 testcase!("foo bar baz");
759 testcase!("\tfoo\nbar\rbaz\x0C");
760 testcase!(" a b c ");
761 testcase!("\t\n\r\x0C");
762
763 testcase!("single");
765 testcase!("a");
766 testcase!("a b");
767 testcase!(" a b ");
768 }
769}