diff --git a/fs/filesystems/procfs/src/task_nodes/mounts.rs b/fs/filesystems/procfs/src/task_nodes/mounts.rs index 415a41d4548be8e75404cd8659cc71e42b298548..0ae303f0db25a61b30240564a080c2e69b43c500 100644 --- a/fs/filesystems/procfs/src/task_nodes/mounts.rs +++ b/fs/filesystems/procfs/src/task_nodes/mounts.rs @@ -10,15 +10,14 @@ use alloc::{ }; use kthread::current_process_state; -use kvfs::{MountFlags, Mountpoint, ST_RDONLY}; +use kvfs::{DeviceId, MountFlags, Mountpoint, ST_RDONLY}; use kvfs_simple::{DirMapping, SeqFileNode, SeqIterator, SimpleFs}; #[derive(Clone)] pub(crate) struct ProcMountEntry { mount_id: u64, parent_id: u64, - major: u32, - minor: u32, + device: DeviceId, root: String, mount_point: String, fs_type: String, @@ -70,7 +69,10 @@ fn show_mounts(item: &ProcMountEntry, buf: &mut String) -> core::fmt::Result { let mount_options = format_mount_options(item.mnt_flags, item.mount_ro || item.super_ro); buf.push_str(&format!( "{} {} {} {} 0 0\n", - item.source, item.mount_point, item.fs_type, mount_options + escape_mount_field(&item.source, EscapeMountField::Devname), + escape_mount_field(&item.mount_point, EscapeMountField::Path), + escape_mount_field(&item.fs_type, EscapeMountField::Devname), + mount_options )); Ok(()) } @@ -82,13 +84,13 @@ fn show_mountinfo(item: &ProcMountEntry, buf: &mut String) -> core::fmt::Result "{} {} {}:{} {} {} {} - {} {} {}\n", item.mount_id, item.parent_id, - item.major, - item.minor, - item.root, - item.mount_point, + item.device.major(), + item.device.minor(), + escape_mount_field(&item.root, EscapeMountField::Path), + escape_mount_field(&item.mount_point, EscapeMountField::Path), mount_options, - item.fs_type, - item.source, + escape_mount_field(&item.fs_type, EscapeMountField::Devname), + escape_mount_field(&item.source, EscapeMountField::Devname), super_options, )); Ok(()) @@ -142,50 +144,63 @@ fn mount_source_for(is_root: bool, fs_type: &str) -> String { } fn mount_source(mount: &Arc, fs_type: &str) -> String { - mount_source_for(mount.is_root(), fs_type) + let source = mount.source(); + if source.is_empty() { + mount_source_for(mount.is_root(), fs_type) + } else { + source.to_string() + } +} + +#[derive(Clone, Copy)] +enum EscapeMountField { + Devname, + Path, } -fn push_mount_option(options: &mut Vec<&'static str>, enabled: bool, name: &'static str) { - if enabled { - options.push(name); +fn escape_mount_field(value: &str, field: EscapeMountField) -> String { + let mut result = String::new(); + let mut unescaped_start = 0; + + for (index, byte) in value.bytes().enumerate() { + let should_escape_hash = matches!(field, EscapeMountField::Devname) && byte == b'#'; + if matches!(byte, b' ' | b'\t' | b'\n' | b'\\') || should_escape_hash { + result.push_str(&value[unescaped_start..index]); + push_octal_escape(&mut result, byte); + unescaped_start = index + 1; + } } + + result.push_str(&value[unescaped_start..]); + result } +fn push_octal_escape(buf: &mut String, byte: u8) { + buf.push('\\'); + buf.push((b'0' + ((byte >> 6) & 0o7)) as char); + buf.push((b'0' + ((byte >> 3) & 0o7)) as char); + buf.push((b'0' + (byte & 0o7)) as char); +} + +const MOUNT_OPTION_NAMES: &[(MountFlags, &str)] = &[ + (MountFlags::NOSUID, "nosuid"), + (MountFlags::NODEV, "nodev"), + (MountFlags::NOEXEC, "noexec"), + (MountFlags::NOATIME, "noatime"), + (MountFlags::NODIRATIME, "nodiratime"), + (MountFlags::RELATIME, "relatime"), + (MountFlags::NOSYMFOLLOW, "nosymfollow"), +]; + /// Build the mount-option string from per-mount flags and the supplied readonly state. fn format_mount_options(mnt_flags: MountFlags, is_ro: bool) -> String { - let mut options = Vec::with_capacity(8); - options.push(if is_ro { "ro" } else { "rw" }); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::NOSUID), - "nosuid", - ); - push_mount_option(&mut options, mnt_flags.contains(MountFlags::NODEV), "nodev"); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::NOEXEC), - "noexec", - ); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::NOATIME), - "noatime", - ); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::NODIRATIME), - "nodiratime", - ); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::RELATIME), - "relatime", - ); - push_mount_option( - &mut options, - mnt_flags.contains(MountFlags::NOSYMFOLLOW), - "nosymfollow", - ); + let options = core::iter::once(if is_ro { "ro" } else { "rw" }) + .chain( + MOUNT_OPTION_NAMES + .iter() + .filter_map(|(flag, name)| mnt_flags.contains(*flag).then_some(*name)), + ) + .collect::>(); options.join(",") } @@ -204,8 +219,7 @@ fn make_mount_entry(mount: &Arc, parent_id: u64, mount_id: u64) -> P ProcMountEntry { mount_id, parent_id, - major: 0, - minor: mount.device() as u32, + device: DeviceId::new(0, mount.device() as u32), root: "/".to_string(), mount_point: mount_point.clone(), source: mount_source(mount, &fs_type), @@ -258,7 +272,7 @@ pub(crate) fn add_root_entries(root: &mut DirMapping, fs: Arc) { #[cfg(unittest)] mod tests { - use kvfs::MountFlags; + use kvfs::{DeviceId, MountFlags}; use unittest::{assert_eq, def_test}; use super::ProcMountEntry; @@ -317,13 +331,24 @@ mod tests { assert!(opts.contains("nosymfollow")); } + #[def_test] + fn test_escape_mount_field_uses_octal_escapes() { + assert_eq!( + super::escape_mount_field("a b\tc\nd\\e#f", super::EscapeMountField::Path), + "a\\040b\\011c\\012d\\134e#f" + ); + assert_eq!( + super::escape_mount_field("a b\tc\nd\\e#f", super::EscapeMountField::Devname), + "a\\040b\\011c\\012d\\134e\\043f" + ); + } + #[def_test] fn test_mounts_uses_effective_readonly_options() { let entry = ProcMountEntry { mount_id: 1, parent_id: 0, - major: 0, - minor: 1, + device: DeviceId::new(0, 1), root: "/".into(), mount_point: "/".into(), fs_type: "ext4".into(), @@ -339,13 +364,33 @@ mod tests { assert_eq!(buf, "rootfs / ext4 ro 0 0\n"); } + #[def_test] + fn test_mounts_escapes_mount_table_fields() { + let entry = ProcMountEntry { + mount_id: 1, + parent_id: 0, + device: DeviceId::new(0, 1), + root: "/".into(), + mount_point: "/mnt/a b".into(), + fs_type: "tmp fs".into(), + source: "dev#x\\y".into(), + mnt_flags: MountFlags::empty(), + mount_ro: false, + super_ro: false, + }; + let mut buf = alloc::string::String::new(); + + super::show_mounts(&entry, &mut buf).unwrap(); + + assert_eq!(buf, "dev\\043x\\134y /mnt/a\\040b tmp\\040fs rw 0 0\n"); + } + #[def_test] fn test_mountinfo_separates_mount_and_super_readonly() { let entry = ProcMountEntry { mount_id: 1, parent_id: 0, - major: 0, - minor: 1, + device: DeviceId::new(0, 1), root: "/".into(), mount_point: "/".into(), fs_type: "ext4".into(), @@ -360,4 +405,28 @@ mod tests { assert_eq!(buf, "1 0 0:1 / / rw - ext4 rootfs ro\n"); } + + #[def_test] + fn test_mountinfo_escapes_mount_table_fields() { + let entry = ProcMountEntry { + mount_id: 1, + parent_id: 0, + device: DeviceId::new(0, 1), + root: "/root dir".into(), + mount_point: "/mnt/a b".into(), + fs_type: "tmp fs".into(), + source: "dev#x\\y".into(), + mnt_flags: MountFlags::empty(), + mount_ro: false, + super_ro: false, + }; + let mut buf = alloc::string::String::new(); + + super::show_mountinfo(&entry, &mut buf).unwrap(); + + assert_eq!( + buf, + "1 0 0:1 /root\\040dir /mnt/a\\040b rw - tmp\\040fs dev\\043x\\134y rw\n" + ); + } } diff --git a/fs/foundation/kvfs/src/mount.rs b/fs/foundation/kvfs/src/mount.rs index 02e66ce5497e70ae383fc5739077e02e7f78c31d..cd7673f15bd75805a2848dd520929ed2df501814 100644 --- a/fs/foundation/kvfs/src/mount.rs +++ b/fs/foundation/kvfs/src/mount.rs @@ -66,6 +66,8 @@ pub struct Mountpoint { device: u64, /// Per-mount flags. flags: MountFlags, + /// User-visible source string for this mount. + source: String, /// Mount that this one covers (for overmount). /// /// When a new mount is created at a location that already has a mount, @@ -84,6 +86,21 @@ impl Mountpoint { fs: &Filesystem, location_in_parent: Option, flags: MountFlags, + ) -> Arc { + let source = if location_in_parent.is_none() { + "rootfs" + } else { + fs.name() + }; + Self::new_with_flags_and_source(fs, location_in_parent, flags, source) + } + + /// Creates a new mountpoint with per-mount flags and source metadata. + pub fn new_with_flags_and_source( + fs: &Filesystem, + location_in_parent: Option, + flags: MountFlags, + source: impl Into, ) -> Arc { static DEVICE_COUNTER: AtomicU64 = AtomicU64::new(1); @@ -94,6 +111,7 @@ impl Mountpoint { child_mounts: Mutex::default(), device: DEVICE_COUNTER.fetch_add(1, Ordering::Relaxed), flags, + source: source.into(), covers: Mutex::default(), }) } @@ -108,6 +126,15 @@ impl Mountpoint { Self::new_with_flags(fs, None, flags) } + /// Creates a root mountpoint with per-mount flags and source metadata. + pub fn new_root_with_flags_and_source( + fs: &Filesystem, + flags: MountFlags, + source: impl Into, + ) -> Arc { + Self::new_with_flags_and_source(fs, None, flags, source) + } + /// Return a `Location` representing the mountpoint root. pub fn root_location(self: &Arc) -> Location { Location::new(self.clone(), self.root.clone()) @@ -164,6 +191,11 @@ impl Mountpoint { self.flags } + /// Returns this mountpoint's user-visible source string. + pub fn source(&self) -> &str { + &self.source + } + /// Returns whether this mountpoint is mounted read-only. pub fn is_readonly(&self) -> bool { self.flags.contains(MountFlags::RDONLY) @@ -479,9 +511,19 @@ impl Location { &self, fs: &Filesystem, flags: MountFlags, + ) -> VfsResult> { + self.mount_with_flags_and_source(fs, flags, fs.name()) + } + + /// Mounts a filesystem with per-mount flags and source metadata. + pub fn mount_with_flags_and_source( + &self, + fs: &Filesystem, + flags: MountFlags, + source: impl Into, ) -> VfsResult> { let mut mountpoint = self.entry.as_dir()?.mount_at_this_dir.lock(); - let result = Mountpoint::new_with_flags(fs, Some(self.clone()), flags); + let result = Mountpoint::new_with_flags_and_source(fs, Some(self.clone()), flags, source); // Overmount: the new mount covers any existing mount at this location. // The old mount is stashed so it can be restored on unmount. if let Some(old) = mountpoint.take() { @@ -504,6 +546,15 @@ impl Location { /// tail matches `mount_with_flags`; `self.child_mounts` is a distinct /// instance so no deadlock. The `child_mounts` check is done under /// `mount_at_this_dir` to close the TOCTOU window. + /// + /// `flush()` also runs before releasing `mount_at_this_dir`. This keeps the + /// mount visible if writeback fails, which matches the current VFS contract + /// better than detaching first and reattaching on error. Linux can split + /// this more cleanly because `umount_tree()` detaches mounts while mnt/super + /// references keep teardown alive, and `generic_shutdown_super()` later + /// performs filesystem sync. This VFS does not have that state machine yet; + /// once it grows detached-mount state and active superblock references, this + /// long filesystem callback should move out from under the mountpoint lock. pub fn unmount(&self) -> VfsResult<()> { if !self.is_root_of_mount() { return Err(VfsError::InvalidInput); @@ -511,34 +562,39 @@ impl Location { if !self.entry.ptr_eq(&self.mountpoint.root) { return Err(VfsError::InvalidInput); } - if let Some(parent_loc) = &self.mountpoint.location { - let mut mount_slot = parent_loc.entry.as_dir()?.mount_at_this_dir.lock(); - // Guard against overmount race: self must still be the visible mount - // at this dentry after we acquired the lock. - if !mount_slot - .as_ref() - .is_some_and(|m| Arc::ptr_eq(m, &self.mountpoint)) - { - return Err(VfsError::InvalidInput); - } - // Re-check child_mounts under mount_slot to close the TOCTOU window - // between the earlier check and acquiring mount_slot. - if !self.mountpoint.child_mounts.lock().is_empty() { - return Err(VfsError::ResourceBusy); + let Some(parent_loc) = &self.mountpoint.location else { + return Err(VfsError::InvalidInput); + }; + + let mut mount_slot = parent_loc.entry.as_dir()?.mount_at_this_dir.lock(); + // Guard against overmount race: self must still be the visible mount + // at this dentry after we acquired the lock. + if !mount_slot + .as_ref() + .is_some_and(|m| Arc::ptr_eq(m, &self.mountpoint)) + { + return Err(VfsError::InvalidInput); + } + // Re-check child_mounts under mount_slot to close the TOCTOU window + // between the earlier check and acquiring mount_slot. + if !self.mountpoint.child_mounts.lock().is_empty() { + return Err(VfsError::ResourceBusy); + } + + self.filesystem().flush()?; + + let mut parent_children = parent_loc.mountpoint.child_mounts.lock(); + let covered = self.mountpoint.covers.lock().take(); + *mount_slot = covered.clone(); + match covered { + Some(ref m) => { + parent_children.insert(parent_loc.entry.key(), Arc::downgrade(m)); } - let mut parent_children = parent_loc.mountpoint.child_mounts.lock(); - let covered = self.mountpoint.covers.lock().take(); - *mount_slot = covered.clone(); - match covered { - Some(ref m) => { - parent_children.insert(parent_loc.entry.key(), Arc::downgrade(m)); - } - None => { - parent_children.remove(&parent_loc.entry.key()); - } + None => { + parent_children.remove(&parent_loc.entry.key()); } - // Drop parent locks before forget. Order matches mount_with_flags. } + // Drop parent locks before forget. Order matches mount_with_flags. // forget() after parent state is committed — if the parent update // failed we must not have destroyed the dentry cache prematurely. self.entry.as_dir()?.forget(); @@ -546,6 +602,11 @@ impl Location { } /// Recursively unmount this filesystem and all children. + /// + /// This is the mount-tree teardown path used by kernel shutdown. Unlike + /// [`Self::unmount`], it may be called on the global root mount. The root + /// mount has no parent mountpoint to detach from, so this method only + /// detaches its children and leaves root filesystem flushing to the caller. pub fn unmount_all(&self) -> VfsResult<()> { if !self.is_root_of_mount() { return Err(VfsError::InvalidInput); @@ -553,12 +614,14 @@ impl Location { let mut children = self.mountpoint.child_mounts.lock(); let remaining = mem::take(&mut *children); drop(children); - let mut failed = false; + let mut first_error = None; for (key, child) in remaining { if let Some(m) = child.upgrade() - && let Err(_e) = m.root_location().unmount_all() + && let Err(error) = m.root_location().unmount_all() { - failed = true; + if first_error.is_none() { + first_error = Some(error); + } // Re-insert so the child is not orphaned. self.mountpoint .child_mounts @@ -566,10 +629,13 @@ impl Location { .insert(key, Arc::downgrade(&m)); } } - if failed { - return Err(VfsError::ResourceBusy); + if let Some(error) = first_error { + return Err(error); + } + if self.mountpoint.location.is_some() { + self.unmount()?; } - self.unmount() + Ok(()) } } @@ -585,7 +651,11 @@ mod tests { extern crate alloc; use alloc::{string::String, sync::Arc}; - use core::{any::Any, time::Duration}; + use core::{ + any::Any, + sync::atomic::{AtomicUsize, Ordering}, + time::Duration, + }; use unittest::{assert, assert_eq, def_test}; @@ -597,6 +667,40 @@ mod tests { struct MockFilesystem { mount_flags: u32, + flush_state: Arc, + } + + struct MockFlushState { + count: AtomicUsize, + error: Option, + } + + impl MockFlushState { + fn ok() -> Arc { + Arc::new(Self { + count: AtomicUsize::new(0), + error: None, + }) + } + + fn with_error(error: VfsError) -> Arc { + Arc::new(Self { + count: AtomicUsize::new(0), + error: Some(error), + }) + } + + fn count(&self) -> usize { + self.count.load(Ordering::Relaxed) + } + + fn flush(&self) -> VfsResult<()> { + self.count.fetch_add(1, Ordering::Relaxed); + if let Some(error) = self.error { + return Err(error); + } + Ok(()) + } } impl FilesystemOps for MockFilesystem { @@ -606,7 +710,13 @@ mod tests { fn root_dir(&self) -> DirEntry { DirEntry::new_dir( - |_| DirNode::new(Arc::new(MockDirNodeOps::new(self.mount_flags, 1))), + |_| { + DirNode::new(Arc::new(MockDirNodeOps::new( + self.mount_flags, + self.flush_state.clone(), + 1, + ))) + }, Reference::root(), ) } @@ -614,16 +724,25 @@ mod tests { fn stat(&self) -> VfsResult { statfs(self.mount_flags) } + + fn flush(&self) -> VfsResult<()> { + self.flush_state.flush() + } } struct MockDirNodeOps { mount_flags: u32, + flush_state: Arc, inode: u64, } impl MockDirNodeOps { - fn new(mount_flags: u32, inode: u64) -> Self { - Self { mount_flags, inode } + fn new(mount_flags: u32, flush_state: Arc, inode: u64) -> Self { + Self { + mount_flags, + flush_state, + inode, + } } } @@ -680,6 +799,10 @@ mod tests { fn stat(&self) -> VfsResult { statfs(self.mount_flags) } + + fn flush(&self) -> VfsResult<()> { + self.flush_state.flush() + } } impl DirNodeOps for MockDirNodeOps { @@ -696,6 +819,7 @@ mod tests { |_| { DirNode::new(Arc::new(MockDirNodeOps::new( self.mount_flags, + self.flush_state.clone(), self.inode + 1, ))) }, @@ -726,7 +850,20 @@ mod tests { } fn mock_filesystem(mount_flags: u32) -> Filesystem { - Filesystem::new(Arc::new(MockFilesystem { mount_flags })) + mock_filesystem_with_flush(mount_flags, MockFlushState::ok()).0 + } + + fn mock_filesystem_with_flush( + mount_flags: u32, + flush_state: Arc, + ) -> (Filesystem, Arc) { + ( + Filesystem::new(Arc::new(MockFilesystem { + mount_flags, + flush_state: flush_state.clone(), + })), + flush_state, + ) } fn statfs(mount_flags: u32) -> VfsResult { @@ -763,6 +900,14 @@ mod tests { assert_eq!(root.check_writable_mount(), Ok(())); } + #[def_test] + fn test_root_mount_source_defaults_to_rootfs() { + let fs = mock_filesystem(0); + let mount = Mountpoint::new_root(&fs); + + assert_eq!(mount.source(), "rootfs"); + } + #[def_test] fn test_root_mount_can_be_readonly() { let fs = mock_filesystem(0); @@ -796,6 +941,19 @@ mod tests { assert!(!child_root.is_effectively_readonly()); } + #[def_test] + fn test_mount_with_explicit_source_records_source() { + let root_fs = mock_filesystem(0); + let child_fs = mock_filesystem(0); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir + .mount_with_flags_and_source(&child_fs, MountFlags::empty(), "none") + .unwrap(); + + assert_eq!(child_mount.source(), "none"); + } + #[def_test] fn test_filesystem_stat_readonly_makes_location_effectively_readonly() { let fs = mock_filesystem(crate::ST_RDONLY); @@ -854,6 +1012,115 @@ mod tests { assert!(Arc::ptr_eq(&loc_a_again.mountpoint().clone(), &mount_a)); } + #[def_test] + fn test_unmount_flushes_mounted_filesystem_before_detach() { + let root_fs = mock_filesystem(0); + let (child_fs, child_flush) = mock_filesystem_with_flush(0, MockFlushState::ok()); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + + child_mount.root_location().unmount().unwrap(); + + assert_eq!(child_flush.count(), 1); + let visible = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + assert!(!Arc::ptr_eq(&visible.mountpoint().clone(), &child_mount)); + } + + #[def_test] + fn test_unmount_flush_error_keeps_mount_attached() { + let root_fs = mock_filesystem(0); + let (child_fs, child_flush) = + mock_filesystem_with_flush(0, MockFlushState::with_error(VfsError::Unsupported)); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + + let result = child_mount.root_location().unmount(); + + assert_eq!(result, Err(VfsError::Unsupported)); + assert_eq!(child_flush.count(), 1); + let visible = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + assert!(Arc::ptr_eq(&visible.mountpoint().clone(), &child_mount)); + } + + #[def_test] + fn test_unmount_busy_mount_does_not_flush() { + let root_fs = mock_filesystem(0); + let (child_fs, child_flush) = mock_filesystem_with_flush(0, MockFlushState::ok()); + let grandchild_fs = mock_filesystem(0); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + let grandchild_dir = child_mount.root_location().lookup_no_follow("mnt").unwrap(); + grandchild_dir.mount(&grandchild_fs).unwrap(); + + let result = child_mount.root_location().unmount(); + + assert_eq!(result, Err(VfsError::ResourceBusy)); + assert_eq!(child_flush.count(), 0); + } + + #[def_test] + fn test_unmount_global_root_fails() { + let fs = mock_filesystem(0); + let mount = Mountpoint::new_root(&fs); + + assert_eq!(mount.root_location().unmount(), Err(VfsError::InvalidInput)); + } + + #[def_test] + fn test_unmount_all_on_global_root_detaches_children_only() { + let (root_fs, root_flush) = mock_filesystem_with_flush(0, MockFlushState::ok()); + let (child_fs, child_flush) = mock_filesystem_with_flush(0, MockFlushState::ok()); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + + root_mount.root_location().unmount_all().unwrap(); + + assert_eq!(root_flush.count(), 0); + assert_eq!(child_flush.count(), 1); + let visible = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + assert!(!Arc::ptr_eq(&visible.mountpoint().clone(), &child_mount)); + } + + #[def_test] + fn test_unmount_all_on_global_root_recursively_detaches_children() { + let root_fs = mock_filesystem(0); + let (child_fs, child_flush) = mock_filesystem_with_flush(0, MockFlushState::ok()); + let grandchild_fs = mock_filesystem(0); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + let grandchild_dir = child_mount.root_location().lookup_no_follow("mnt").unwrap(); + grandchild_dir.mount(&grandchild_fs).unwrap(); + + let result = root_mount.root_location().unmount_all(); + + assert_eq!(result, Ok(())); + assert_eq!(child_flush.count(), 1); + let visible = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + assert!(!Arc::ptr_eq(&visible.mountpoint().clone(), &child_mount)); + } + + #[def_test] + fn test_unmount_all_on_global_root_keeps_child_on_flush_error() { + let root_fs = mock_filesystem(0); + let (child_fs, child_flush) = + mock_filesystem_with_flush(0, MockFlushState::with_error(VfsError::Unsupported)); + let root_mount = Mountpoint::new_root(&root_fs); + let mount_dir = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + let child_mount = mount_dir.mount(&child_fs).unwrap(); + + let result = root_mount.root_location().unmount_all(); + + assert_eq!(result, Err(VfsError::Unsupported)); + assert_eq!(child_flush.count(), 1); + let visible = root_mount.root_location().lookup_no_follow("mnt").unwrap(); + assert!(Arc::ptr_eq(&visible.mountpoint().clone(), &child_mount)); + } + #[def_test] fn test_readonly_mount_blocks_create() { let fs = mock_filesystem(0); diff --git a/posix/fs/src/mount.rs b/posix/fs/src/mount.rs index b52ab9e4cafa32aef2f49af8738c5d989fc73760..7d06e474c72049c5462a506f6fe53f50464fe733 100644 --- a/posix/fs/src/mount.rs +++ b/posix/fs/src/mount.rs @@ -8,98 +8,317 @@ use core::ffi::{c_char, c_void}; use kerrno::{KError, KResult}; use kthread::current_process_state; -use kvfs::{MountFlags, ST_RDONLY}; +use kvfs::{MetadataUpdate, MountFlags, NodePermission, ST_RDONLY}; +use linux_raw_sys::general; use memfs::MemoryFs; use posix_types::UserConstPtr; -fn superblock_flags_from_sys_mount(flags: i32) -> u32 { - let f = flags as u32; - let mut sb_flags = 0; - - if f & linux_raw_sys::general::MS_RDONLY != 0 { - sb_flags |= ST_RDONLY; +bitflags::bitflags! { + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + struct RawMountFlags: u32 { + const RDONLY = general::MS_RDONLY; + const NOSUID = general::MS_NOSUID; + const NODEV = general::MS_NODEV; + const NOEXEC = general::MS_NOEXEC; + const NOATIME = general::MS_NOATIME; + const NODIRATIME = general::MS_NODIRATIME; + const RELATIME = general::MS_RELATIME; + const STRICTATIME = general::MS_STRICTATIME; + const NOSYMFOLLOW = general::MS_NOSYMFOLLOW; + const SILENT = general::MS_SILENT; + const SYNCHRONOUS = general::MS_SYNCHRONOUS; + const MANDLOCK = general::MS_MANDLOCK; + const DIRSYNC = general::MS_DIRSYNC; + const POSIXACL = general::MS_POSIXACL; + const I_VERSION = general::MS_I_VERSION; + const LAZYTIME = general::MS_LAZYTIME; + const SHARED = general::MS_SHARED; + const PRIVATE = general::MS_PRIVATE; + const SLAVE = general::MS_SLAVE; + const UNBINDABLE = general::MS_UNBINDABLE; + const REC = general::MS_REC; + const REMOUNT = general::MS_REMOUNT; + const BIND = general::MS_BIND; + const MOVE = general::MS_MOVE; + const NOUSER = general::MS_NOUSER; } - sb_flags } -/// Map `mount(2)` MS_* flags to per-mount [`MountFlags`]. -fn per_mount_flags(flags: i32) -> MountFlags { - // `flags` is a non-negative bitmask from mount(2); safe to reinterpret. - let f = flags as u32; - let mut mnt_flags = MountFlags::empty(); +impl RawMountFlags { + fn from_syscall(flags: i32) -> KResult { + let mut bits = flags as u32; + + if (bits & general::MS_MGC_MSK) == general::MS_MGC_VAL { + bits &= !general::MS_MGC_MSK; + } + + let flags = Self::from_bits_retain(bits); + if flags.contains(Self::NOUSER) { + return Err(KError::InvalidInput); + } - // Default to relatime unless NOATIME is set. - if f & linux_raw_sys::general::MS_NOATIME == 0 { - mnt_flags |= MountFlags::RELATIME; + Ok(flags) } - if f & linux_raw_sys::general::MS_RDONLY != 0 { - mnt_flags |= MountFlags::RDONLY; + fn to_mount_flags(self) -> MountFlags { + let mut flags = ordinary_mount_flags(self); + apply_atime_policy(self, &mut flags); + flags } - if f & linux_raw_sys::general::MS_NOSUID != 0 { - mnt_flags |= MountFlags::NOSUID; +} + +fn implemented_new_mount_flags() -> RawMountFlags { + RawMountFlags::RDONLY + | RawMountFlags::NOSUID + | RawMountFlags::NODEV + | RawMountFlags::NOEXEC + | RawMountFlags::NOATIME + | RawMountFlags::NODIRATIME + | RawMountFlags::RELATIME + | RawMountFlags::STRICTATIME + | RawMountFlags::NOSYMFOLLOW + | RawMountFlags::SILENT +} + +fn known_linux_new_mount_flags() -> RawMountFlags { + implemented_new_mount_flags() + | RawMountFlags::SYNCHRONOUS + | RawMountFlags::MANDLOCK + | RawMountFlags::DIRSYNC + | RawMountFlags::POSIXACL + | RawMountFlags::I_VERSION + | RawMountFlags::LAZYTIME +} + +fn propagation_flags() -> RawMountFlags { + RawMountFlags::SHARED + | RawMountFlags::PRIVATE + | RawMountFlags::SLAVE + | RawMountFlags::UNBINDABLE +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum PropagationKind { + Shared, + Private, + Slave, + Unbindable, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum MountRequest { + BindRemount(RawMountFlags), + Remount(RawMountFlags), + Bind { + recursive: bool, + }, + Propagation { + kind: PropagationKind, + recursive: bool, + }, + Move(RawMountFlags), + NewMount(RawMountFlags), +} + +fn parse_mount_request(flags: i32) -> KResult { + let flags = RawMountFlags::from_syscall(flags)?; + + if flags.contains(RawMountFlags::REMOUNT | RawMountFlags::BIND) { + return Ok(MountRequest::BindRemount(flags)); } - if f & linux_raw_sys::general::MS_NODEV != 0 { - mnt_flags |= MountFlags::NODEV; + if flags.contains(RawMountFlags::REMOUNT) { + return Ok(MountRequest::Remount(flags)); } - if f & linux_raw_sys::general::MS_NOEXEC != 0 { - mnt_flags |= MountFlags::NOEXEC; + if flags.contains(RawMountFlags::BIND) { + return Ok(MountRequest::Bind { + recursive: flags.contains(RawMountFlags::REC), + }); } - if f & linux_raw_sys::general::MS_NOATIME != 0 { - mnt_flags |= MountFlags::NOATIME; + if flags.intersects(propagation_flags()) { + return parse_propagation_request(flags); } - if f & linux_raw_sys::general::MS_NODIRATIME != 0 { - mnt_flags |= MountFlags::NODIRATIME; + if flags.contains(RawMountFlags::MOVE) { + return Ok(MountRequest::Move(flags)); } - // No explicit MS_RELATIME → MNT_RELATIME mapping: RELATIME is controlled - // solely by the default logic above. - // STRICTATIME takes priority — clear both RELATIME and NOATIME. - if f & linux_raw_sys::general::MS_STRICTATIME != 0 { - mnt_flags &= !(MountFlags::RELATIME | MountFlags::NOATIME); + + validate_new_mount_flags(flags)?; + let mut flags = flags; + flags.remove(RawMountFlags::SILENT); + Ok(MountRequest::NewMount(flags)) +} + +fn validate_new_mount_flags(flags: RawMountFlags) -> KResult<()> { + if flags.bits() & !known_linux_new_mount_flags().bits() != 0 { + return Err(KError::InvalidInput); } - if f & linux_raw_sys::general::MS_NOSYMFOLLOW != 0 { - mnt_flags |= MountFlags::NOSYMFOLLOW; + if flags.bits() & !implemented_new_mount_flags().bits() != 0 { + return Err(KError::InvalidInput); } - mnt_flags + Ok(()) } -/// Mount a filesystem at the specified target path. -pub fn sys_mount( - source: UserConstPtr, - target: UserConstPtr, - fs_type: UserConstPtr, - flags: i32, - _data: UserConstPtr, -) -> KResult { - let f = flags as u32; - - // MS_NOUSER is never allowed from userspace. - if f & linux_raw_sys::general::MS_NOUSER != 0 { +fn parse_propagation_request(flags: RawMountFlags) -> KResult { + let propagation_flags = flags & propagation_flags(); + if propagation_flags.bits().count_ones() != 1 { return Err(KError::InvalidInput); } - // Reject operation types that aren't yet implemented. - // Each of these dispatches to a separate handler. - if f & linux_raw_sys::general::MS_REMOUNT != 0 { + let allowed_flags = propagation_flags | RawMountFlags::REC | RawMountFlags::SILENT; + if flags.bits() & !allowed_flags.bits() != 0 { return Err(KError::InvalidInput); } - if f & linux_raw_sys::general::MS_BIND != 0 { + + let kind = if propagation_flags == RawMountFlags::SHARED { + PropagationKind::Shared + } else if propagation_flags == RawMountFlags::PRIVATE { + PropagationKind::Private + } else if propagation_flags == RawMountFlags::SLAVE { + PropagationKind::Slave + } else if propagation_flags == RawMountFlags::UNBINDABLE { + PropagationKind::Unbindable + } else { return Err(KError::InvalidInput); + }; + Ok(MountRequest::Propagation { + kind, + recursive: flags.contains(RawMountFlags::REC), + }) +} + +struct TmpfsMountOptions { + mode: NodePermission, +} + +impl Default for TmpfsMountOptions { + fn default() -> Self { + Self { + mode: NodePermission::from_bits_truncate(0o1777), + } } - if f & linux_raw_sys::general::MS_MOVE != 0 { - return Err(KError::InvalidInput); +} + +fn parse_tmpfs_mount_data(data: UserConstPtr) -> KResult { + if data.is_null() { + return Ok(TmpfsMountOptions::default()); } - if f & (linux_raw_sys::general::MS_SHARED - | linux_raw_sys::general::MS_PRIVATE - | linux_raw_sys::general::MS_SLAVE - | linux_raw_sys::general::MS_UNBINDABLE - | linux_raw_sys::general::MS_REC) - != 0 - { + + let data = data.cast::().load_string()?; + let mut options = TmpfsMountOptions::default(); + if data.is_empty() { + return Ok(options); + } + + for option in data.split(',') { + let Some(mode) = option.strip_prefix("mode=") else { + return Err(KError::InvalidInput); + }; + options.mode = parse_tmpfs_mode(mode)?; + } + Ok(options) +} + +fn parse_tmpfs_mode(mode: &str) -> KResult { + if mode.is_empty() { return Err(KError::InvalidInput); } - let source = source.load_string()?; + let mode = u32::from_str_radix(mode, 8).map_err(|_| KError::InvalidInput)? & 0o7777; + Ok(NodePermission::from_bits_truncate(mode as u16)) +} + +fn superblock_flags_from_mount_flags(flags: RawMountFlags) -> u32 { + let mut sb_flags = 0; + + if flags.contains(RawMountFlags::RDONLY) { + sb_flags |= ST_RDONLY; + } + sb_flags +} + +struct MountFlagMap { + raw: RawMountFlags, + mount: MountFlags, +} + +const ORDINARY_MOUNT_FLAG_MAP: &[MountFlagMap] = &[ + MountFlagMap { + raw: RawMountFlags::RDONLY, + mount: MountFlags::RDONLY, + }, + MountFlagMap { + raw: RawMountFlags::NOSUID, + mount: MountFlags::NOSUID, + }, + MountFlagMap { + raw: RawMountFlags::NODEV, + mount: MountFlags::NODEV, + }, + MountFlagMap { + raw: RawMountFlags::NOEXEC, + mount: MountFlags::NOEXEC, + }, + MountFlagMap { + raw: RawMountFlags::NOATIME, + mount: MountFlags::NOATIME, + }, + MountFlagMap { + raw: RawMountFlags::NODIRATIME, + mount: MountFlags::NODIRATIME, + }, + MountFlagMap { + raw: RawMountFlags::NOSYMFOLLOW, + mount: MountFlags::NOSYMFOLLOW, + }, +]; + +fn ordinary_mount_flags(flags: RawMountFlags) -> MountFlags { + let mut mount_flags = MountFlags::empty(); + + for map in ORDINARY_MOUNT_FLAG_MAP { + if flags.contains(map.raw) { + mount_flags |= map.mount; + } + } + + mount_flags +} + +fn apply_atime_policy(flags: RawMountFlags, mount_flags: &mut MountFlags) { + // Match Linux legacy mount behavior: relatime is the default unless the + // caller explicitly requested noatime. MS_RELATIME is a bit in the same + // unordered flag set, so it must not cancel a simultaneous MS_NOATIME. + if !flags.contains(RawMountFlags::NOATIME) { + *mount_flags |= MountFlags::RELATIME; + } + + if flags.contains(RawMountFlags::STRICTATIME) { + *mount_flags &= !(MountFlags::RELATIME | MountFlags::NOATIME); + } +} + +/// Mount a filesystem at the specified target path. +pub fn sys_mount( + source: UserConstPtr, + target: UserConstPtr, + fs_type: UserConstPtr, + flags: i32, + data: UserConstPtr, +) -> KResult { + let flags = match parse_mount_request(flags)? { + MountRequest::NewMount(flags) => flags, + MountRequest::BindRemount(_) | MountRequest::Remount(_) | MountRequest::Move(_) => { + return Err(KError::InvalidInput); + } + MountRequest::Bind { .. } | MountRequest::Propagation { .. } => { + return Err(KError::InvalidInput); + } + }; + + let source = if source.is_null() { + None + } else { + Some(source.load_string()?) + }; let target = target.load_string()?; let fs_type = fs_type.load_string()?; debug!("sys_mount <= source: {source:?}, target: {target:?}, fs_type: {fs_type:?}"); @@ -107,14 +326,21 @@ pub fn sys_mount( if fs_type != "tmpfs" { return Err(KError::NoSuchDevice); } + let tmpfs_options = parse_tmpfs_mount_data(data)?; - let mount_flags = per_mount_flags(flags); - let fs = MemoryFs::new_with_name_and_flags("tmpfs", superblock_flags_from_sys_mount(flags)); - let target = current_process_state() - .fs_context() - .lock() - .resolve(target)?; - target.mount_with_flags(&fs, mount_flags)?; + let source = source.unwrap_or_else(|| "none".into()); + let mount_flags = flags.to_mount_flags(); + let process_state = current_process_state(); + let fs_context = process_state.fs_context(); + let fs_context = fs_context.lock(); + let target = fs_context.resolve(target)?; + target.check_is_dir()?; + let fs = MemoryFs::new_with_name_and_flags("tmpfs", superblock_flags_from_mount_flags(flags)); + fs.root_dir().update_metadata(MetadataUpdate { + mode: Some(tmpfs_options.mode), + ..Default::default() + })?; + target.mount_with_flags_and_source(&fs, mount_flags, source)?; Ok(0) } @@ -149,37 +375,110 @@ pub fn sys_umount2(target: UserConstPtr, flags: i32) -> KResult { #[cfg(unittest)] mod tests { use kvfs::{MountFlags, ST_RDONLY}; + use linux_raw_sys::general; use unittest::{assert, assert_eq, def_test}; + fn raw(bits: u32) -> super::RawMountFlags { + super::RawMountFlags::from_bits_retain(bits) + } + #[def_test] fn test_superblock_flags_from_mount_only_options_are_filtered() { - let flags = (linux_raw_sys::general::MS_NODEV - | linux_raw_sys::general::MS_NOEXEC - | linux_raw_sys::general::MS_NOSUID - | linux_raw_sys::general::MS_NOATIME - | linux_raw_sys::general::MS_NODIRATIME - | linux_raw_sys::general::MS_NOSYMFOLLOW) as i32; + let flags = general::MS_NODEV + | general::MS_NOEXEC + | general::MS_NOSUID + | general::MS_NOATIME + | general::MS_NODIRATIME + | general::MS_NOSYMFOLLOW; - assert_eq!(super::superblock_flags_from_sys_mount(flags), 0); + assert_eq!(super::superblock_flags_from_mount_flags(raw(flags)), 0); } #[def_test] fn test_superblock_flags_preserve_readonly() { assert_eq!( - super::superblock_flags_from_sys_mount(linux_raw_sys::general::MS_RDONLY as i32), + super::superblock_flags_from_mount_flags(raw(general::MS_RDONLY)), ST_RDONLY ); } #[def_test] fn test_per_mount_flags_preserve_mount_options() { - let flags = (linux_raw_sys::general::MS_RDONLY - | linux_raw_sys::general::MS_NODEV - | linux_raw_sys::general::MS_NOEXEC) as i32; - let result = super::per_mount_flags(flags); + let flags = general::MS_RDONLY | general::MS_NODEV | general::MS_NOEXEC; + let result = raw(flags).to_mount_flags(); assert!(result.contains(MountFlags::RDONLY)); assert!(result.contains(MountFlags::NODEV)); assert!(result.contains(MountFlags::NOEXEC)); } + + #[def_test] + fn test_per_mount_flags_noatime_is_not_cleared_by_relatime() { + let flags = general::MS_NOATIME | general::MS_RELATIME; + let result = raw(flags).to_mount_flags(); + + assert!(!result.contains(MountFlags::RELATIME)); + assert!(result.contains(MountFlags::NOATIME)); + } + + #[def_test] + fn test_per_mount_flags_strictatime_overrides_atime_flags() { + let flags = general::MS_NOATIME | general::MS_RELATIME | general::MS_STRICTATIME; + let result = raw(flags).to_mount_flags(); + + assert!(!result.contains(MountFlags::RELATIME)); + assert!(!result.contains(MountFlags::NOATIME)); + } + + #[def_test] + fn test_parse_mount_request_accepts_supported_new_mount_flags() { + let flags = (general::MS_RDONLY + | general::MS_NOSUID + | general::MS_NODEV + | general::MS_NOEXEC + | general::MS_NOATIME + | general::MS_NODIRATIME + | general::MS_RELATIME + | general::MS_STRICTATIME + | general::MS_NOSYMFOLLOW) as i32; + + assert_eq!( + super::parse_mount_request(flags), + Ok(super::MountRequest::NewMount(raw(flags as u32))) + ); + } + + #[def_test] + fn test_parse_mount_request_ignores_compatibility_flags() { + let flags = (general::MS_MGC_VAL | general::MS_SILENT | general::MS_RDONLY) as i32; + + assert_eq!( + super::parse_mount_request(flags), + Ok(super::MountRequest::NewMount(raw(general::MS_RDONLY))) + ); + } + + #[def_test] + fn test_parse_mount_request_recognizes_operation_flags() { + assert_eq!( + super::parse_mount_request(general::MS_BIND as i32), + Ok(super::MountRequest::Bind { recursive: false }) + ); + assert_eq!( + super::parse_mount_request(general::MS_REMOUNT as i32), + Ok(super::MountRequest::Remount(raw(general::MS_REMOUNT))) + ); + } + + #[def_test] + fn test_parse_mount_request_rejects_unimplemented_plain_flags() { + assert_eq!( + super::parse_mount_request(general::MS_SYNCHRONOUS as i32), + Err(kerrno::KError::InvalidInput) + ); + assert_eq!( + super::parse_mount_request(general::MS_LAZYTIME as i32), + Err(kerrno::KError::InvalidInput) + ); + } }