xref: /relibc/src/platform/redox/exec.rs (revision 041d1604b526a0b908d4222e39baa0030a94fd8f)
1 use crate::{
2     c_str::{CStr, CString},
3     core_io::{prelude::*, BufReader, SeekFrom},
4     fs::File,
5     header::{fcntl, string::strlen},
6     platform::{
7         sys::{S_ISGID, S_ISUID},
8         types::*,
9     },
10 };
11 
12 use redox_exec::{ExtraInfo, FdGuard, FexecResult};
13 use syscall::{data::Stat, error::*, flag::*};
14 
15 fn fexec_impl(
16     file: File,
17     path: &[u8],
18     args: &[&[u8]],
19     envs: &[&[u8]],
20     total_args_envs_size: usize,
21     extrainfo: &ExtraInfo,
22     interp_override: Option<redox_exec::InterpOverride>,
23 ) -> Result<usize> {
24     let fd = *file;
25     core::mem::forget(file);
26     let image_file = FdGuard::new(fd as usize);
27 
28     let open_via_dup = FdGuard::new(syscall::open("thisproc:current/open_via_dup", 0)?);
29     let memory = FdGuard::new(syscall::open("memory:", 0)?);
30 
31     let addrspace_selection_fd = match redox_exec::fexec_impl(
32         image_file,
33         open_via_dup,
34         &memory,
35         path,
36         args.iter().rev(),
37         envs.iter().rev(),
38         total_args_envs_size,
39         extrainfo,
40         interp_override,
41     )? {
42         FexecResult::Normal { addrspace_handle } => addrspace_handle,
43         FexecResult::Interp {
44             image_file,
45             open_via_dup,
46             path,
47             interp_override: new_interp_override,
48         } => {
49             drop(image_file);
50             drop(open_via_dup);
51             drop(memory);
52 
53             // According to elf(5), PT_INTERP requires that the interpreter path be
54             // null-terminated. Violating this should therefore give the "format error" ENOEXEC.
55             let path_cstr = CStr::from_bytes_with_nul(&path).map_err(|_| Error::new(ENOEXEC))?;
56 
57             return execve(
58                 path_cstr,
59                 ArgEnv::Parsed {
60                     total_args_envs_size,
61                     args,
62                     envs,
63                 },
64                 Some(new_interp_override),
65             );
66         }
67     };
68     drop(memory);
69 
70     // Dropping this FD will cause the address space switch.
71     drop(addrspace_selection_fd);
72 
73     unreachable!();
74 }
75 pub enum ArgEnv<'a> {
76     C {
77         argv: *const *mut c_char,
78         envp: *const *mut c_char,
79     },
80     Parsed {
81         args: &'a [&'a [u8]],
82         envs: &'a [&'a [u8]],
83         total_args_envs_size: usize,
84     },
85 }
86 pub fn execve(
87     path: &CStr,
88     arg_env: ArgEnv,
89     interp_override: Option<redox_exec::InterpOverride>,
90 ) -> Result<usize> {
91     // NOTE: We must omit O_CLOEXEC and close manually, otherwise it will be closed before we
92     // have even read it!
93     let mut image_file = File::open(path, O_RDONLY as c_int).map_err(|_| Error::new(ENOENT))?;
94 
95     // With execve now being implemented in userspace, we need to check ourselves that this
96     // file is actually executable. While checking for read permission is unnecessary as the
97     // scheme will not allow us to read otherwise, the execute bit is completely unenforced. We
98     // have the permission to mmap executable memory and fill it with the program even if it is
99     // unset, so the best we can do is check that nothing is executed by accident.
100     //
101     // TODO: At some point we might have capabilities limiting the ability to allocate
102     // executable memory, and in that case we might use the `escalate:` scheme as we already do
103     // when the binary needs setuid/setgid.
104 
105     let mut stat = Stat::default();
106     syscall::fstat(*image_file as usize, &mut stat)?;
107     let uid = syscall::getuid()?;
108     let gid = syscall::getuid()?;
109 
110     let mode = if uid == stat.st_uid as usize {
111         (stat.st_mode >> 3 * 2) & 0o7
112     } else if gid == stat.st_gid as usize {
113         (stat.st_mode >> 3 * 1) & 0o7
114     } else {
115         stat.st_mode & 0o7
116     };
117 
118     if mode & 0o1 == 0o0 {
119         return Err(Error::new(EPERM));
120     }
121     let wants_setugid = stat.st_mode & ((S_ISUID | S_ISGID) as u16) != 0;
122 
123     let cwd: Box<[u8]> = super::path::clone_cwd().unwrap_or_default().into();
124 
125     // Count arguments
126     let mut len = 0;
127 
128     match arg_env {
129         ArgEnv::C { argv, .. } => unsafe {
130             while !(*argv.add(len)).is_null() {
131                 len += 1;
132             }
133         },
134         ArgEnv::Parsed { args, .. } => len = args.len(),
135     }
136 
137     let mut args: Vec<&[u8]> = Vec::with_capacity(len);
138 
139     // Read shebang (for example #!/bin/sh)
140     let mut _interpreter_path = None;
141     let is_interpreted = {
142         let mut read = 0;
143         let mut shebang = [0; 2];
144 
145         while read < 2 {
146             match image_file
147                 .read(&mut shebang)
148                 .map_err(|_| Error::new(ENOEXEC))?
149             {
150                 0 => break,
151                 i => read += i,
152             }
153         }
154         shebang == *b"#!"
155     };
156     // Since the fexec implementation is almost fully done in userspace, the kernel can no longer
157     // set UID/GID accordingly, and this code checking for them before using interfaces to upgrade
158     // UID/GID, can not be trusted. So we ask the `escalate:` scheme for help. Note that
159     // `escalate:` can be deliberately excluded from the scheme namespace to deny privilege
160     // escalation (such as su/sudo/doas) for untrusted processes.
161     //
162     // According to execve(2), Linux and most other UNIXes ignore setuid/setgid for interpreted
163     // executables and thereby simply keep the privileges as is. For compatibility we do that
164     // too.
165 
166     if is_interpreted {
167         // TODO: Does this support prepending args to the interpreter? E.g.
168         // #!/usr/bin/env python3
169 
170         // So, this file is interpreted.
171         // Then, read the actual interpreter:
172         let mut interpreter = Vec::new();
173         BufReader::new(&mut image_file)
174             .read_until(b'\n', &mut interpreter)
175             .map_err(|_| Error::new(EIO))?;
176         if interpreter.ends_with(&[b'\n']) {
177             interpreter.pop().unwrap();
178         }
179         let cstring = CString::new(interpreter).map_err(|_| Error::new(ENOEXEC))?;
180         image_file = File::open(&cstring, O_RDONLY as c_int).map_err(|_| Error::new(ENOENT))?;
181 
182         // Make sure path is kept alive long enough, and push it to the arguments
183         _interpreter_path = Some(cstring);
184         let path_ref = _interpreter_path.as_ref().unwrap();
185         args.push(path_ref.as_bytes());
186     } else {
187         image_file
188             .seek(SeekFrom::Start(0))
189             .map_err(|_| Error::new(EIO))?;
190     }
191 
192     let (total_args_envs_size, args, envs): (usize, Vec<_>, Vec<_>) = match arg_env {
193         ArgEnv::C { mut argv, mut envp } => unsafe {
194             let mut args_envs_size_without_nul = 0;
195 
196             // Arguments
197             while !argv.read().is_null() {
198                 let arg = argv.read();
199 
200                 let len = strlen(arg);
201                 args.push(core::slice::from_raw_parts(arg as *const u8, len));
202                 args_envs_size_without_nul += len;
203                 argv = argv.add(1);
204             }
205 
206             // Environment variables
207             let mut len = 0;
208             while !envp.add(len).read().is_null() {
209                 len += 1;
210             }
211 
212             let mut envs: Vec<&[u8]> = Vec::with_capacity(len);
213             while !envp.read().is_null() {
214                 let env = envp.read();
215 
216                 let len = strlen(env);
217                 envs.push(core::slice::from_raw_parts(env as *const u8, len));
218                 args_envs_size_without_nul += len;
219                 envp = envp.add(1);
220             }
221             (
222                 args_envs_size_without_nul + args.len() + envs.len(),
223                 args,
224                 envs,
225             )
226         },
227         ArgEnv::Parsed {
228             args: new_args,
229             envs,
230             total_args_envs_size,
231         } => {
232             let prev_size: usize = args.iter().map(|a| a.len()).sum();
233             args.extend(new_args);
234             (total_args_envs_size + prev_size, args, Vec::from(envs))
235         }
236     };
237 
238     // Close all O_CLOEXEC file descriptors. TODO: close_range?
239     {
240         // NOTE: This approach of implementing O_CLOEXEC will not work in multithreaded
241         // scenarios. While execve() is undefined according to POSIX if there exist sibling
242         // threads, it could still be allowed by keeping certain file descriptors and instead
243         // set the active file table.
244         let files_fd = File::new(syscall::open("thisproc:current/filetable", O_RDONLY)? as c_int);
245         for line in BufReader::new(files_fd).lines() {
246             let line = match line {
247                 Ok(l) => l,
248                 Err(_) => break,
249             };
250             let fd = match line.parse::<usize>() {
251                 Ok(f) => f,
252                 Err(_) => continue,
253             };
254 
255             let flags = syscall::fcntl(fd, F_GETFD, 0)?;
256 
257             if flags & O_CLOEXEC == O_CLOEXEC {
258                 let _ = syscall::close(fd);
259             }
260         }
261     }
262 
263     if !is_interpreted && wants_setugid {
264         // Make sure the last file descriptor not covered by O_CLOEXEC is not leaked.
265         drop(image_file);
266 
267         // We are now going to invoke `escalate:` rather than loading the program ourselves.
268         let escalate_fd = FdGuard::new(syscall::open("escalate:", O_WRONLY)?);
269 
270         // First, we write the path.
271         //
272         // TODO: For improved security, use a hypothetical SYS_DUP_FORWARD syscall to give the
273         // scheme our file descriptor. It can check through the kernel-overwritten stat.st_dev
274         // field that it pertains to a "trusted" scheme (i.e. of at least the privilege the
275         // new uid/gid has), although for now only root can open schemes. Passing a file
276         // descriptor and not a path will allow escalated to run in a limited namespace.
277         //
278         // TODO: Plus, at this point fexecve is not implemented (but specified in
279         // POSIX.1-2008), and to avoid bad syscalls such as fpath, passing a file descriptor
280         // would be better.
281         let _ = syscall::write(*escalate_fd, path.to_bytes());
282 
283         // Second, we write the flattened args and envs with NUL characters separating
284         // individual items. This can be copied directly into the new executable's memory.
285         let _ = syscall::write(*escalate_fd, &flatten_with_nul(args))?;
286         let _ = syscall::write(*escalate_fd, &flatten_with_nul(envs))?;
287         let _ = syscall::write(*escalate_fd, &cwd)?;
288 
289         // Closing will notify the scheme, and from that point we will no longer have control
290         // over this process (unless it fails). We do this manually since drop cannot handle
291         // errors.
292         let fd = *escalate_fd as usize;
293         core::mem::forget(escalate_fd);
294 
295         syscall::close(fd)?;
296 
297         unreachable!()
298     } else {
299         let extrainfo = ExtraInfo { cwd: Some(&cwd) };
300         fexec_impl(
301             image_file,
302             path.to_bytes(),
303             &args,
304             &envs,
305             total_args_envs_size,
306             &extrainfo,
307             interp_override,
308         )
309     }
310 }
311 fn flatten_with_nul<T>(iter: impl IntoIterator<Item = T>) -> Box<[u8]>
312 where
313     T: AsRef<[u8]>,
314 {
315     let mut vec = Vec::new();
316     for item in iter {
317         vec.extend(item.as_ref());
318         vec.push(b'\0');
319     }
320     vec.into_boxed_slice()
321 }
322