xref: /drstd/dlibc/src/unix/header/regex/mod.rs (revision 86982c5e9b2eaa583327251616ee822c36288824)
1 //! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html
2 
3 use crate::unix::header::string::strlen;
4 use alloc::{borrow::Cow, vec::Vec};
5 use core::{mem, ptr, slice};
6 use posix_regex::{
7     compile::{Error as CompileError, Range, Token},
8     PosixRegex, PosixRegexBuilder,
9 };
10 
11 pub type regoff_t = ::size_t;
12 
13 #[repr(C)]
14 pub struct regex_t {
15     // Can't be a normal Vec<T> because then the struct size won't be known
16     // from C.
17     ptr: *mut ::c_void,
18     length: ::size_t,
19     capacity: ::size_t,
20 
21     cflags: ::c_int,
22     re_nsub: ::size_t,
23 }
24 #[repr(C)]
25 pub struct regmatch_t {
26     rm_so: regoff_t,
27     rm_eo: regoff_t,
28 }
29 
30 pub const REG_EXTENDED: ::c_int = 1;
31 pub const REG_ICASE: ::c_int = 2;
32 pub const REG_NOSUB: ::c_int = 4;
33 pub const REG_NEWLINE: ::c_int = 8;
34 pub const REG_NOTBOL: ::c_int = 16;
35 pub const REG_NOTEOL: ::c_int = 32;
36 
37 pub const REG_NOMATCH: ::c_int = 1;
38 pub const REG_BADPAT: ::c_int = 2;
39 pub const REG_ECOLLATE: ::c_int = 3;
40 pub const REG_ECTYPE: ::c_int = 4;
41 pub const REG_EESCAPE: ::c_int = 5;
42 pub const REG_ESUBREG: ::c_int = 6;
43 pub const REG_EBRACK: ::c_int = 7;
44 pub const REG_ENOSYS: ::c_int = 8;
45 pub const REG_EPAREN: ::c_int = 9;
46 pub const REG_EBRACE: ::c_int = 10;
47 pub const REG_BADBR: ::c_int = 11;
48 pub const REG_ERANGE: ::c_int = 12;
49 pub const REG_ESPACE: ::c_int = 13;
50 pub const REG_BADRPT: ::c_int = 14;
51 
52 #[no_mangle]
53 #[linkage = "weak"] // redefined in GIT
54 pub unsafe extern "C" fn regcomp(out: *mut regex_t, pat: *const ::c_char, cflags: ::c_int) -> ::c_int {
55     if cflags & REG_EXTENDED == REG_EXTENDED {
56         return REG_ENOSYS;
57     }
58 
59     let pat = slice::from_raw_parts(pat as *const u8, strlen(pat));
60     let res = PosixRegexBuilder::new(pat)
61         .with_default_classes()
62         .compile_tokens();
63 
64     match res {
65         Ok(mut branches) => {
66             let re_nsub = PosixRegex::new(Cow::Borrowed(&branches)).count_groups();
67             *out = regex_t {
68                 ptr: branches.as_mut_ptr() as *mut ::c_void,
69                 length: branches.len(),
70                 capacity: branches.capacity(),
71 
72                 cflags,
73                 re_nsub,
74             };
75             mem::forget(branches);
76             0
77         }
78         Err(CompileError::EmptyRepetition)
79         | Err(CompileError::IntegerOverflow)
80         | Err(CompileError::IllegalRange) => REG_BADBR,
81         Err(CompileError::UnclosedRepetition) => REG_EBRACE,
82         Err(CompileError::LeadingRepetition) => REG_BADRPT,
83         Err(CompileError::UnknownCollation) => REG_ECOLLATE,
84         Err(CompileError::UnknownClass(_)) => REG_ECTYPE,
85         Err(_) => REG_BADPAT,
86     }
87 }
88 
89 #[no_mangle]
90 #[linkage = "weak"] // redefined in GIT
91 pub unsafe extern "C" fn regfree(regex: *mut regex_t) {
92     Vec::from_raw_parts(
93         (*regex).ptr as *mut Vec<(Token, Range)>,
94         (*regex).length,
95         (*regex).capacity,
96     );
97 }
98 
99 #[no_mangle]
100 #[linkage = "weak"] // redefined in GIT
101 pub unsafe extern "C" fn regexec(
102     regex: *const regex_t,
103     input: *const ::c_char,
104     nmatch: ::size_t,
105     pmatch: *mut regmatch_t,
106     eflags: ::c_int,
107 ) -> ::c_int {
108     if eflags & REG_EXTENDED == REG_EXTENDED {
109         return REG_ENOSYS;
110     }
111 
112     let regex = &*regex;
113 
114     // Allow specifying a compiler argument to the executor and vise versa
115     // because why not?
116     let flags = regex.cflags | eflags;
117 
118     let input = slice::from_raw_parts(input as *const u8, strlen(input));
119     let branches = slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length);
120 
121     let matches = PosixRegex::new(Cow::Borrowed(&branches))
122         .case_insensitive(flags & REG_ICASE == REG_ICASE)
123         .newline(flags & REG_NEWLINE == REG_NEWLINE)
124         .no_start(flags & REG_NOTBOL == REG_NOTBOL)
125         .no_end(flags & REG_NOTEOL == REG_NOTEOL)
126         .matches(input, Some(1));
127 
128     if !matches.is_empty() && eflags & REG_NOSUB != REG_NOSUB && !pmatch.is_null() && nmatch > 0 {
129         let first = &matches[0];
130 
131         for i in 0..nmatch {
132             let (start, end) = first.get(i).and_then(|&range| range).unwrap_or((!0, !0));
133             *pmatch.add(i) = regmatch_t {
134                 rm_so: start,
135                 rm_eo: end,
136             };
137         }
138     }
139 
140     if matches.is_empty() {
141         REG_NOMATCH
142     } else {
143         0
144     }
145 }
146 
147 #[no_mangle]
148 #[linkage = "weak"] // redefined in GIT
149 pub extern "C" fn regerror(
150     code: ::c_int,
151     _regex: *const regex_t,
152     out: *mut ::c_char,
153     max: ::size_t,
154 ) -> ::size_t {
155     let string = match code {
156         0 => "No error\0",
157         REG_NOMATCH => "No match\0",
158         REG_BADPAT => "Invalid regexp\0",
159         REG_ECOLLATE => "Unknown collating element\0",
160         REG_ECTYPE => "Unknown character class name\0",
161         REG_EESCAPE => "Trailing backslash\0",
162         REG_ESUBREG => "Invalid back reference\0",
163         REG_EBRACK => "Missing ']'\0",
164         REG_ENOSYS => "Unsupported operation\0",
165         REG_EPAREN => "Missing ')'\0",
166         REG_EBRACE => "Missing '}'\0",
167         REG_BADBR => "Invalid contents of {}\0",
168         REG_ERANGE => "Invalid character range\0",
169         REG_ESPACE => "Out of memory\0",
170         REG_BADRPT => "Repetition not preceded by valid expression\0",
171         _ => "Unknown error\0",
172     };
173 
174     unsafe {
175         ptr::copy_nonoverlapping(
176             string.as_ptr(),
177             out as *mut u8,
178             string.len().min(max as usize),
179         );
180     }
181 
182     string.len()
183 }
184