1 //! regex.h implementation, following http://pubs.opengroup.org/onlinepubs/7908799/xsh/regex.h.html 2 3 use crate::unix::header::string::strlen; 4 use alloc::{borrow::Cow, vec::Vec}; 5 use core::{mem, ptr, slice}; 6 use posix_regex::{ 7 compile::{Error as CompileError, Range, Token}, 8 PosixRegex, PosixRegexBuilder, 9 }; 10 11 pub type regoff_t = ::size_t; 12 13 #[repr(C)] 14 pub struct regex_t { 15 // Can't be a normal Vec<T> because then the struct size won't be known 16 // from C. 17 ptr: *mut ::c_void, 18 length: ::size_t, 19 capacity: ::size_t, 20 21 cflags: ::c_int, 22 re_nsub: ::size_t, 23 } 24 #[repr(C)] 25 pub struct regmatch_t { 26 rm_so: regoff_t, 27 rm_eo: regoff_t, 28 } 29 30 pub const REG_EXTENDED: ::c_int = 1; 31 pub const REG_ICASE: ::c_int = 2; 32 pub const REG_NOSUB: ::c_int = 4; 33 pub const REG_NEWLINE: ::c_int = 8; 34 pub const REG_NOTBOL: ::c_int = 16; 35 pub const REG_NOTEOL: ::c_int = 32; 36 37 pub const REG_NOMATCH: ::c_int = 1; 38 pub const REG_BADPAT: ::c_int = 2; 39 pub const REG_ECOLLATE: ::c_int = 3; 40 pub const REG_ECTYPE: ::c_int = 4; 41 pub const REG_EESCAPE: ::c_int = 5; 42 pub const REG_ESUBREG: ::c_int = 6; 43 pub const REG_EBRACK: ::c_int = 7; 44 pub const REG_ENOSYS: ::c_int = 8; 45 pub const REG_EPAREN: ::c_int = 9; 46 pub const REG_EBRACE: ::c_int = 10; 47 pub const REG_BADBR: ::c_int = 11; 48 pub const REG_ERANGE: ::c_int = 12; 49 pub const REG_ESPACE: ::c_int = 13; 50 pub const REG_BADRPT: ::c_int = 14; 51 52 #[no_mangle] 53 #[linkage = "weak"] // redefined in GIT 54 pub unsafe extern "C" fn regcomp(out: *mut regex_t, pat: *const ::c_char, cflags: ::c_int) -> ::c_int { 55 if cflags & REG_EXTENDED == REG_EXTENDED { 56 return REG_ENOSYS; 57 } 58 59 let pat = slice::from_raw_parts(pat as *const u8, strlen(pat)); 60 let res = PosixRegexBuilder::new(pat) 61 .with_default_classes() 62 .compile_tokens(); 63 64 match res { 65 Ok(mut branches) => { 66 let re_nsub = PosixRegex::new(Cow::Borrowed(&branches)).count_groups(); 67 *out = regex_t { 68 ptr: branches.as_mut_ptr() as *mut ::c_void, 69 length: branches.len(), 70 capacity: branches.capacity(), 71 72 cflags, 73 re_nsub, 74 }; 75 mem::forget(branches); 76 0 77 } 78 Err(CompileError::EmptyRepetition) 79 | Err(CompileError::IntegerOverflow) 80 | Err(CompileError::IllegalRange) => REG_BADBR, 81 Err(CompileError::UnclosedRepetition) => REG_EBRACE, 82 Err(CompileError::LeadingRepetition) => REG_BADRPT, 83 Err(CompileError::UnknownCollation) => REG_ECOLLATE, 84 Err(CompileError::UnknownClass(_)) => REG_ECTYPE, 85 Err(_) => REG_BADPAT, 86 } 87 } 88 89 #[no_mangle] 90 #[linkage = "weak"] // redefined in GIT 91 pub unsafe extern "C" fn regfree(regex: *mut regex_t) { 92 Vec::from_raw_parts( 93 (*regex).ptr as *mut Vec<(Token, Range)>, 94 (*regex).length, 95 (*regex).capacity, 96 ); 97 } 98 99 #[no_mangle] 100 #[linkage = "weak"] // redefined in GIT 101 pub unsafe extern "C" fn regexec( 102 regex: *const regex_t, 103 input: *const ::c_char, 104 nmatch: ::size_t, 105 pmatch: *mut regmatch_t, 106 eflags: ::c_int, 107 ) -> ::c_int { 108 if eflags & REG_EXTENDED == REG_EXTENDED { 109 return REG_ENOSYS; 110 } 111 112 let regex = &*regex; 113 114 // Allow specifying a compiler argument to the executor and vise versa 115 // because why not? 116 let flags = regex.cflags | eflags; 117 118 let input = slice::from_raw_parts(input as *const u8, strlen(input)); 119 let branches = slice::from_raw_parts(regex.ptr as *const Vec<(Token, Range)>, regex.length); 120 121 let matches = PosixRegex::new(Cow::Borrowed(&branches)) 122 .case_insensitive(flags & REG_ICASE == REG_ICASE) 123 .newline(flags & REG_NEWLINE == REG_NEWLINE) 124 .no_start(flags & REG_NOTBOL == REG_NOTBOL) 125 .no_end(flags & REG_NOTEOL == REG_NOTEOL) 126 .matches(input, Some(1)); 127 128 if !matches.is_empty() && eflags & REG_NOSUB != REG_NOSUB && !pmatch.is_null() && nmatch > 0 { 129 let first = &matches[0]; 130 131 for i in 0..nmatch { 132 let (start, end) = first.get(i).and_then(|&range| range).unwrap_or((!0, !0)); 133 *pmatch.add(i) = regmatch_t { 134 rm_so: start, 135 rm_eo: end, 136 }; 137 } 138 } 139 140 if matches.is_empty() { 141 REG_NOMATCH 142 } else { 143 0 144 } 145 } 146 147 #[no_mangle] 148 #[linkage = "weak"] // redefined in GIT 149 pub extern "C" fn regerror( 150 code: ::c_int, 151 _regex: *const regex_t, 152 out: *mut ::c_char, 153 max: ::size_t, 154 ) -> ::size_t { 155 let string = match code { 156 0 => "No error\0", 157 REG_NOMATCH => "No match\0", 158 REG_BADPAT => "Invalid regexp\0", 159 REG_ECOLLATE => "Unknown collating element\0", 160 REG_ECTYPE => "Unknown character class name\0", 161 REG_EESCAPE => "Trailing backslash\0", 162 REG_ESUBREG => "Invalid back reference\0", 163 REG_EBRACK => "Missing ']'\0", 164 REG_ENOSYS => "Unsupported operation\0", 165 REG_EPAREN => "Missing ')'\0", 166 REG_EBRACE => "Missing '}'\0", 167 REG_BADBR => "Invalid contents of {}\0", 168 REG_ERANGE => "Invalid character range\0", 169 REG_ESPACE => "Out of memory\0", 170 REG_BADRPT => "Repetition not preceded by valid expression\0", 171 _ => "Unknown error\0", 172 }; 173 174 unsafe { 175 ptr::copy_nonoverlapping( 176 string.as_ptr(), 177 out as *mut u8, 178 string.len().min(max as usize), 179 ); 180 } 181 182 string.len() 183 } 184