1 use crate::{ 2 c_str::{CStr, CString}, 3 core_io::{prelude::*, BufReader, SeekFrom}, 4 fs::File, 5 header::{fcntl, string::strlen}, 6 platform::{ 7 sys::{S_ISGID, S_ISUID}, 8 types::*, 9 }, 10 }; 11 12 use redox_exec::{ExtraInfo, FdGuard, FexecResult}; 13 use syscall::{data::Stat, error::*, flag::*}; 14 15 fn fexec_impl( 16 file: File, 17 path: &[u8], 18 args: &[&[u8]], 19 envs: &[&[u8]], 20 total_args_envs_size: usize, 21 extrainfo: &ExtraInfo, 22 interp_override: Option<redox_exec::InterpOverride>, 23 ) -> Result<usize> { 24 let fd = *file; 25 core::mem::forget(file); 26 let image_file = FdGuard::new(fd as usize); 27 28 let open_via_dup = FdGuard::new(syscall::open("thisproc:current/open_via_dup", 0)?); 29 let memory = FdGuard::new(syscall::open("memory:", 0)?); 30 31 let addrspace_selection_fd = match redox_exec::fexec_impl( 32 image_file, 33 open_via_dup, 34 &memory, 35 path, 36 args.iter().rev(), 37 envs.iter().rev(), 38 total_args_envs_size, 39 extrainfo, 40 interp_override, 41 )? { 42 FexecResult::Normal { addrspace_handle } => addrspace_handle, 43 FexecResult::Interp { 44 image_file, 45 open_via_dup, 46 path, 47 interp_override: new_interp_override, 48 } => { 49 drop(image_file); 50 drop(open_via_dup); 51 drop(memory); 52 53 // According to elf(5), PT_INTERP requires that the interpreter path be 54 // null-terminated. Violating this should therefore give the "format error" ENOEXEC. 55 let path_cstr = CStr::from_bytes_with_nul(&path).map_err(|_| Error::new(ENOEXEC))?; 56 57 return execve( 58 path_cstr, 59 ArgEnv::Parsed { 60 total_args_envs_size, 61 args, 62 envs, 63 }, 64 Some(new_interp_override), 65 ); 66 } 67 }; 68 drop(memory); 69 70 // Dropping this FD will cause the address space switch. 71 drop(addrspace_selection_fd); 72 73 unreachable!(); 74 } 75 pub enum ArgEnv<'a> { 76 C { 77 argv: *const *mut c_char, 78 envp: *const *mut c_char, 79 }, 80 Parsed { 81 args: &'a [&'a [u8]], 82 envs: &'a [&'a [u8]], 83 total_args_envs_size: usize, 84 }, 85 } 86 pub fn execve( 87 path: &CStr, 88 arg_env: ArgEnv, 89 interp_override: Option<redox_exec::InterpOverride>, 90 ) -> Result<usize> { 91 // NOTE: We must omit O_CLOEXEC and close manually, otherwise it will be closed before we 92 // have even read it! 93 let mut image_file = File::open(path, O_RDONLY as c_int).map_err(|_| Error::new(ENOENT))?; 94 95 // With execve now being implemented in userspace, we need to check ourselves that this 96 // file is actually executable. While checking for read permission is unnecessary as the 97 // scheme will not allow us to read otherwise, the execute bit is completely unenforced. We 98 // have the permission to mmap executable memory and fill it with the program even if it is 99 // unset, so the best we can do is check that nothing is executed by accident. 100 // 101 // TODO: At some point we might have capabilities limiting the ability to allocate 102 // executable memory, and in that case we might use the `escalate:` scheme as we already do 103 // when the binary needs setuid/setgid. 104 105 let mut stat = Stat::default(); 106 syscall::fstat(*image_file as usize, &mut stat)?; 107 let uid = syscall::getuid()?; 108 let gid = syscall::getuid()?; 109 110 let mode = if uid == stat.st_uid as usize { 111 (stat.st_mode >> 3 * 2) & 0o7 112 } else if gid == stat.st_gid as usize { 113 (stat.st_mode >> 3 * 1) & 0o7 114 } else { 115 stat.st_mode & 0o7 116 }; 117 118 if mode & 0o1 == 0o0 { 119 return Err(Error::new(EPERM)); 120 } 121 let wants_setugid = stat.st_mode & ((S_ISUID | S_ISGID) as u16) != 0; 122 123 let cwd: Box<[u8]> = super::path::clone_cwd().unwrap_or_default().into(); 124 125 // Count arguments 126 let mut len = 0; 127 128 match arg_env { 129 ArgEnv::C { argv, .. } => unsafe { 130 while !(*argv.add(len)).is_null() { 131 len += 1; 132 } 133 }, 134 ArgEnv::Parsed { args, .. } => len = args.len(), 135 } 136 137 let mut args: Vec<&[u8]> = Vec::with_capacity(len); 138 139 // Read shebang (for example #!/bin/sh) 140 let mut _interpreter_path = None; 141 let is_interpreted = { 142 let mut read = 0; 143 let mut shebang = [0; 2]; 144 145 while read < 2 { 146 match image_file 147 .read(&mut shebang) 148 .map_err(|_| Error::new(ENOEXEC))? 149 { 150 0 => break, 151 i => read += i, 152 } 153 } 154 shebang == *b"#!" 155 }; 156 // Since the fexec implementation is almost fully done in userspace, the kernel can no longer 157 // set UID/GID accordingly, and this code checking for them before using interfaces to upgrade 158 // UID/GID, can not be trusted. So we ask the `escalate:` scheme for help. Note that 159 // `escalate:` can be deliberately excluded from the scheme namespace to deny privilege 160 // escalation (such as su/sudo/doas) for untrusted processes. 161 // 162 // According to execve(2), Linux and most other UNIXes ignore setuid/setgid for interpreted 163 // executables and thereby simply keep the privileges as is. For compatibility we do that 164 // too. 165 166 if is_interpreted { 167 // TODO: Does this support prepending args to the interpreter? E.g. 168 // #!/usr/bin/env python3 169 170 // So, this file is interpreted. 171 // Then, read the actual interpreter: 172 let mut interpreter = Vec::new(); 173 BufReader::new(&mut image_file) 174 .read_until(b'\n', &mut interpreter) 175 .map_err(|_| Error::new(EIO))?; 176 if interpreter.ends_with(&[b'\n']) { 177 interpreter.pop().unwrap(); 178 } 179 let cstring = CString::new(interpreter).map_err(|_| Error::new(ENOEXEC))?; 180 image_file = File::open(&cstring, O_RDONLY as c_int).map_err(|_| Error::new(ENOENT))?; 181 182 // Make sure path is kept alive long enough, and push it to the arguments 183 _interpreter_path = Some(cstring); 184 let path_ref = _interpreter_path.as_ref().unwrap(); 185 args.push(path_ref.as_bytes()); 186 } else { 187 image_file 188 .seek(SeekFrom::Start(0)) 189 .map_err(|_| Error::new(EIO))?; 190 } 191 192 let (total_args_envs_size, args, envs): (usize, Vec<_>, Vec<_>) = match arg_env { 193 ArgEnv::C { mut argv, mut envp } => unsafe { 194 let mut args_envs_size_without_nul = 0; 195 196 // Arguments 197 while !argv.read().is_null() { 198 let arg = argv.read(); 199 200 let len = strlen(arg); 201 args.push(core::slice::from_raw_parts(arg as *const u8, len)); 202 args_envs_size_without_nul += len; 203 argv = argv.add(1); 204 } 205 206 // Environment variables 207 let mut len = 0; 208 while !envp.add(len).read().is_null() { 209 len += 1; 210 } 211 212 let mut envs: Vec<&[u8]> = Vec::with_capacity(len); 213 while !envp.read().is_null() { 214 let env = envp.read(); 215 216 let len = strlen(env); 217 envs.push(core::slice::from_raw_parts(env as *const u8, len)); 218 args_envs_size_without_nul += len; 219 envp = envp.add(1); 220 } 221 ( 222 args_envs_size_without_nul + args.len() + envs.len(), 223 args, 224 envs, 225 ) 226 }, 227 ArgEnv::Parsed { 228 args: new_args, 229 envs, 230 total_args_envs_size, 231 } => { 232 let prev_size: usize = args.iter().map(|a| a.len()).sum(); 233 args.extend(new_args); 234 (total_args_envs_size + prev_size, args, Vec::from(envs)) 235 } 236 }; 237 238 // Close all O_CLOEXEC file descriptors. TODO: close_range? 239 { 240 // NOTE: This approach of implementing O_CLOEXEC will not work in multithreaded 241 // scenarios. While execve() is undefined according to POSIX if there exist sibling 242 // threads, it could still be allowed by keeping certain file descriptors and instead 243 // set the active file table. 244 let files_fd = File::new(syscall::open("thisproc:current/filetable", O_RDONLY)? as c_int); 245 for line in BufReader::new(files_fd).lines() { 246 let line = match line { 247 Ok(l) => l, 248 Err(_) => break, 249 }; 250 let fd = match line.parse::<usize>() { 251 Ok(f) => f, 252 Err(_) => continue, 253 }; 254 255 let flags = syscall::fcntl(fd, F_GETFD, 0)?; 256 257 if flags & O_CLOEXEC == O_CLOEXEC { 258 let _ = syscall::close(fd); 259 } 260 } 261 } 262 263 if !is_interpreted && wants_setugid { 264 // Make sure the last file descriptor not covered by O_CLOEXEC is not leaked. 265 drop(image_file); 266 267 // We are now going to invoke `escalate:` rather than loading the program ourselves. 268 let escalate_fd = FdGuard::new(syscall::open("escalate:", O_WRONLY)?); 269 270 // First, we write the path. 271 // 272 // TODO: For improved security, use a hypothetical SYS_DUP_FORWARD syscall to give the 273 // scheme our file descriptor. It can check through the kernel-overwritten stat.st_dev 274 // field that it pertains to a "trusted" scheme (i.e. of at least the privilege the 275 // new uid/gid has), although for now only root can open schemes. Passing a file 276 // descriptor and not a path will allow escalated to run in a limited namespace. 277 // 278 // TODO: Plus, at this point fexecve is not implemented (but specified in 279 // POSIX.1-2008), and to avoid bad syscalls such as fpath, passing a file descriptor 280 // would be better. 281 let _ = syscall::write(*escalate_fd, path.to_bytes()); 282 283 // Second, we write the flattened args and envs with NUL characters separating 284 // individual items. This can be copied directly into the new executable's memory. 285 let _ = syscall::write(*escalate_fd, &flatten_with_nul(args))?; 286 let _ = syscall::write(*escalate_fd, &flatten_with_nul(envs))?; 287 let _ = syscall::write(*escalate_fd, &cwd)?; 288 289 // Closing will notify the scheme, and from that point we will no longer have control 290 // over this process (unless it fails). We do this manually since drop cannot handle 291 // errors. 292 let fd = *escalate_fd as usize; 293 core::mem::forget(escalate_fd); 294 295 syscall::close(fd)?; 296 297 unreachable!() 298 } else { 299 let extrainfo = ExtraInfo { cwd: Some(&cwd) }; 300 fexec_impl( 301 image_file, 302 path.to_bytes(), 303 &args, 304 &envs, 305 total_args_envs_size, 306 &extrainfo, 307 interp_override, 308 ) 309 } 310 } 311 fn flatten_with_nul<T>(iter: impl IntoIterator<Item = T>) -> Box<[u8]> 312 where 313 T: AsRef<[u8]>, 314 { 315 let mut vec = Vec::new(); 316 for item in iter { 317 vec.extend(item.as_ref()); 318 vec.push(b'\0'); 319 } 320 vec.into_boxed_slice() 321 } 322