kernel/fs/file.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2024 Google LLC.
//! Files and file descriptors.
//!
//! C headers: [`include/linux/fs.h`](srctree/include/linux/fs.h) and
//! [`include/linux/file.h`](srctree/include/linux/file.h)
use crate::{
bindings,
cred::Credential,
error::{code::*, Error, Result},
types::{ARef, AlwaysRefCounted, NotThreadSafe, Opaque},
};
use core::ptr;
/// Flags associated with a [`File`].
pub mod flags {
/// File is opened in append mode.
pub const O_APPEND: u32 = bindings::O_APPEND;
/// Signal-driven I/O is enabled.
pub const O_ASYNC: u32 = bindings::FASYNC;
/// Close-on-exec flag is set.
pub const O_CLOEXEC: u32 = bindings::O_CLOEXEC;
/// File was created if it didn't already exist.
pub const O_CREAT: u32 = bindings::O_CREAT;
/// Direct I/O is enabled for this file.
pub const O_DIRECT: u32 = bindings::O_DIRECT;
/// File must be a directory.
pub const O_DIRECTORY: u32 = bindings::O_DIRECTORY;
/// Like [`O_SYNC`] except metadata is not synced.
pub const O_DSYNC: u32 = bindings::O_DSYNC;
/// Ensure that this file is created with the `open(2)` call.
pub const O_EXCL: u32 = bindings::O_EXCL;
/// Large file size enabled (`off64_t` over `off_t`).
pub const O_LARGEFILE: u32 = bindings::O_LARGEFILE;
/// Do not update the file last access time.
pub const O_NOATIME: u32 = bindings::O_NOATIME;
/// File should not be used as process's controlling terminal.
pub const O_NOCTTY: u32 = bindings::O_NOCTTY;
/// If basename of path is a symbolic link, fail open.
pub const O_NOFOLLOW: u32 = bindings::O_NOFOLLOW;
/// File is using nonblocking I/O.
pub const O_NONBLOCK: u32 = bindings::O_NONBLOCK;
/// File is using nonblocking I/O.
///
/// This is effectively the same flag as [`O_NONBLOCK`] on all architectures
/// except SPARC64.
pub const O_NDELAY: u32 = bindings::O_NDELAY;
/// Used to obtain a path file descriptor.
pub const O_PATH: u32 = bindings::O_PATH;
/// Write operations on this file will flush data and metadata.
pub const O_SYNC: u32 = bindings::O_SYNC;
/// This file is an unnamed temporary regular file.
pub const O_TMPFILE: u32 = bindings::O_TMPFILE;
/// File should be truncated to length 0.
pub const O_TRUNC: u32 = bindings::O_TRUNC;
/// Bitmask for access mode flags.
///
/// # Examples
///
/// ```
/// use kernel::fs::file;
/// # fn do_something() {}
/// # let flags = 0;
/// if (flags & file::flags::O_ACCMODE) == file::flags::O_RDONLY {
/// do_something();
/// }
/// ```
pub const O_ACCMODE: u32 = bindings::O_ACCMODE;
/// File is read only.
pub const O_RDONLY: u32 = bindings::O_RDONLY;
/// File is write only.
pub const O_WRONLY: u32 = bindings::O_WRONLY;
/// File can be both read and written.
pub const O_RDWR: u32 = bindings::O_RDWR;
}
/// Wraps the kernel's `struct file`. Thread safe.
///
/// This represents an open file rather than a file on a filesystem. Processes generally reference
/// open files using file descriptors. However, file descriptors are not the same as files. A file
/// descriptor is just an integer that corresponds to a file, and a single file may be referenced
/// by multiple file descriptors.
///
/// # Refcounting
///
/// Instances of this type are reference-counted. The reference count is incremented by the
/// `fget`/`get_file` functions and decremented by `fput`. The Rust type `ARef<File>` represents a
/// pointer that owns a reference count on the file.
///
/// Whenever a process opens a file descriptor (fd), it stores a pointer to the file in its fd
/// table (`struct files_struct`). This pointer owns a reference count to the file, ensuring the
/// file isn't prematurely deleted while the file descriptor is open. In Rust terminology, the
/// pointers in `struct files_struct` are `ARef<File>` pointers.
///
/// ## Light refcounts
///
/// Whenever a process has an fd to a file, it may use something called a "light refcount" as a
/// performance optimization. Light refcounts are acquired by calling `fdget` and released with
/// `fdput`. The idea behind light refcounts is that if the fd is not closed between the calls to
/// `fdget` and `fdput`, then the refcount cannot hit zero during that time, as the `struct
/// files_struct` holds a reference until the fd is closed. This means that it's safe to access the
/// file even if `fdget` does not increment the refcount.
///
/// The requirement that the fd is not closed during a light refcount applies globally across all
/// threads - not just on the thread using the light refcount. For this reason, light refcounts are
/// only used when the `struct files_struct` is not shared with other threads, since this ensures
/// that other unrelated threads cannot suddenly start using the fd and close it. Therefore,
/// calling `fdget` on a shared `struct files_struct` creates a normal refcount instead of a light
/// refcount.
///
/// Light reference counts must be released with `fdput` before the system call returns to
/// userspace. This means that if you wait until the current system call returns to userspace, then
/// all light refcounts that existed at the time have gone away.
///
/// ### The file position
///
/// Each `struct file` has a position integer, which is protected by the `f_pos_lock` mutex.
/// However, if the `struct file` is not shared, then the kernel may avoid taking the lock as a
/// performance optimization.
///
/// The condition for avoiding the `f_pos_lock` mutex is different from the condition for using
/// `fdget`. With `fdget`, you may avoid incrementing the refcount as long as the current fd table
/// is not shared; it is okay if there are other fd tables that also reference the same `struct
/// file`. However, `fdget_pos` can only avoid taking the `f_pos_lock` if the entire `struct file`
/// is not shared, as different processes with an fd to the same `struct file` share the same
/// position.
///
/// To represent files that are not thread safe due to this optimization, the [`LocalFile`] type is
/// used.
///
/// ## Rust references
///
/// The reference type `&File` is similar to light refcounts:
///
/// * `&File` references don't own a reference count. They can only exist as long as the reference
/// count stays positive, and can only be created when there is some mechanism in place to ensure
/// this.
///
/// * The Rust borrow-checker normally ensures this by enforcing that the `ARef<File>` from which
/// a `&File` is created outlives the `&File`.
///
/// * Using the unsafe [`File::from_raw_file`] means that it is up to the caller to ensure that the
/// `&File` only exists while the reference count is positive.
///
/// * You can think of `fdget` as using an fd to look up an `ARef<File>` in the `struct
/// files_struct` and create an `&File` from it. The "fd cannot be closed" rule is like the Rust
/// rule "the `ARef<File>` must outlive the `&File`".
///
/// # Invariants
///
/// * All instances of this type are refcounted using the `f_count` field.
/// * There must not be any active calls to `fdget_pos` on this file that did not take the
/// `f_pos_lock` mutex.
#[repr(transparent)]
pub struct File {
inner: Opaque<bindings::file>,
}
// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the
// `f_pos_lock` mutex, so it is safe to transfer it between threads.
unsafe impl Send for File {}
// SAFETY: This file is known to not have any active `fdget_pos` calls that did not take the
// `f_pos_lock` mutex, so it is safe to access its methods from several threads in parallel.
unsafe impl Sync for File {}
// SAFETY: The type invariants guarantee that `File` is always ref-counted. This implementation
// makes `ARef<File>` own a normal refcount.
unsafe impl AlwaysRefCounted for File {
#[inline]
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference means that the refcount is nonzero.
unsafe { bindings::get_file(self.as_ptr()) };
}
#[inline]
unsafe fn dec_ref(obj: ptr::NonNull<File>) {
// SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we
// may drop it. The cast is okay since `File` has the same representation as `struct file`.
unsafe { bindings::fput(obj.cast().as_ptr()) }
}
}
/// Wraps the kernel's `struct file`. Not thread safe.
///
/// This type represents a file that is not known to be safe to transfer across thread boundaries.
/// To obtain a thread-safe [`File`], use the [`assume_no_fdget_pos`] conversion.
///
/// See the documentation for [`File`] for more information.
///
/// # Invariants
///
/// * All instances of this type are refcounted using the `f_count` field.
/// * If there is an active call to `fdget_pos` that did not take the `f_pos_lock` mutex, then it
/// must be on the same thread as this file.
///
/// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos
pub struct LocalFile {
inner: Opaque<bindings::file>,
}
// SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation
// makes `ARef<File>` own a normal refcount.
unsafe impl AlwaysRefCounted for LocalFile {
#[inline]
fn inc_ref(&self) {
// SAFETY: The existence of a shared reference means that the refcount is nonzero.
unsafe { bindings::get_file(self.as_ptr()) };
}
#[inline]
unsafe fn dec_ref(obj: ptr::NonNull<LocalFile>) {
// SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we
// may drop it. The cast is okay since `File` has the same representation as `struct file`.
unsafe { bindings::fput(obj.cast().as_ptr()) }
}
}
impl LocalFile {
/// Constructs a new `struct file` wrapper from a file descriptor.
///
/// The file descriptor belongs to the current process, and there might be active local calls
/// to `fdget_pos` on the same file.
///
/// To obtain an `ARef<File>`, use the [`assume_no_fdget_pos`] function to convert.
///
/// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos
#[inline]
pub fn fget(fd: u32) -> Result<ARef<LocalFile>, BadFdError> {
// SAFETY: FFI call, there are no requirements on `fd`.
let ptr = ptr::NonNull::new(unsafe { bindings::fget(fd) }).ok_or(BadFdError)?;
// SAFETY: `bindings::fget` created a refcount, and we pass ownership of it to the `ARef`.
//
// INVARIANT: This file is in the fd table on this thread, so either all `fdget_pos` calls
// are on this thread, or the file is shared, in which case `fdget_pos` calls took the
// `f_pos_lock` mutex.
Ok(unsafe { ARef::from_raw(ptr.cast()) })
}
/// Creates a reference to a [`LocalFile`] from a valid pointer.
///
/// # Safety
///
/// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is
/// positive for the duration of 'a.
/// * The caller must ensure that if there is an active call to `fdget_pos` that did not take
/// the `f_pos_lock` mutex, then that call is on the current thread.
#[inline]
pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile {
// SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the
// duration of 'a. The cast is okay because `File` is `repr(transparent)`.
//
// INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls.
unsafe { &*ptr.cast() }
}
/// Assume that there are no active `fdget_pos` calls that prevent us from sharing this file.
///
/// This makes it safe to transfer this file to other threads. No checks are performed, and
/// using it incorrectly may lead to a data race on the file position if the file is shared
/// with another thread.
///
/// This method is intended to be used together with [`LocalFile::fget`] when the caller knows
/// statically that there are no `fdget_pos` calls on the current thread. For example, you
/// might use it when calling `fget` from an ioctl, since ioctls usually do not touch the file
/// position.
///
/// # Safety
///
/// There must not be any active `fdget_pos` calls on the current thread.
#[inline]
pub unsafe fn assume_no_fdget_pos(me: ARef<LocalFile>) -> ARef<File> {
// INVARIANT: There are no `fdget_pos` calls on the current thread, and by the type
// invariants, if there is a `fdget_pos` call on another thread, then it took the
// `f_pos_lock` mutex.
//
// SAFETY: `LocalFile` and `File` have the same layout.
unsafe { ARef::from_raw(ARef::into_raw(me).cast()) }
}
/// Returns a raw pointer to the inner C struct.
#[inline]
pub fn as_ptr(&self) -> *mut bindings::file {
self.inner.get()
}
/// Returns the credentials of the task that originally opened the file.
pub fn cred(&self) -> &Credential {
// SAFETY: It's okay to read the `f_cred` field without synchronization because `f_cred` is
// never changed after initialization of the file.
let ptr = unsafe { (*self.as_ptr()).f_cred };
// SAFETY: The signature of this function ensures that the caller will only access the
// returned credential while the file is still valid, and the C side ensures that the
// credential stays valid at least as long as the file.
unsafe { Credential::from_ptr(ptr) }
}
/// Returns the flags associated with the file.
///
/// The flags are a combination of the constants in [`flags`].
#[inline]
pub fn flags(&self) -> u32 {
// This `read_volatile` is intended to correspond to a READ_ONCE call.
//
// SAFETY: The file is valid because the shared reference guarantees a nonzero refcount.
//
// FIXME(read_once): Replace with `read_once` when available on the Rust side.
unsafe { core::ptr::addr_of!((*self.as_ptr()).f_flags).read_volatile() }
}
}
impl File {
/// Creates a reference to a [`File`] from a valid pointer.
///
/// # Safety
///
/// * The caller must ensure that `ptr` points at a valid file and that the file's refcount is
/// positive for the duration of 'a.
/// * The caller must ensure that if there are active `fdget_pos` calls on this file, then they
/// took the `f_pos_lock` mutex.
#[inline]
pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File {
// SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the
// duration of 'a. The cast is okay because `File` is `repr(transparent)`.
//
// INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls.
unsafe { &*ptr.cast() }
}
}
// Make LocalFile methods available on File.
impl core::ops::Deref for File {
type Target = LocalFile;
#[inline]
fn deref(&self) -> &LocalFile {
// SAFETY: The caller provides a `&File`, and since it is a reference, it must point at a
// valid file for the desired duration.
//
// By the type invariants, there are no `fdget_pos` calls that did not take the
// `f_pos_lock` mutex.
unsafe { LocalFile::from_raw_file(self as *const File as *const bindings::file) }
}
}
/// A file descriptor reservation.
///
/// This allows the creation of a file descriptor in two steps: first, we reserve a slot for it,
/// then we commit or drop the reservation. The first step may fail (e.g., the current process ran
/// out of available slots), but commit and drop never fail (and are mutually exclusive).
///
/// Dropping the reservation happens in the destructor of this type.
///
/// # Invariants
///
/// The fd stored in this struct must correspond to a reserved file descriptor of the current task.
pub struct FileDescriptorReservation {
fd: u32,
/// Prevent values of this type from being moved to a different task.
///
/// The `fd_install` and `put_unused_fd` functions assume that the value of `current` is
/// unchanged since the call to `get_unused_fd_flags`. By adding this marker to this type, we
/// prevent it from being moved across task boundaries, which ensures that `current` does not
/// change while this value exists.
_not_send: NotThreadSafe,
}
impl FileDescriptorReservation {
/// Creates a new file descriptor reservation.
pub fn get_unused_fd_flags(flags: u32) -> Result<Self> {
// SAFETY: FFI call, there are no safety requirements on `flags`.
let fd: i32 = unsafe { bindings::get_unused_fd_flags(flags) };
if fd < 0 {
return Err(Error::from_errno(fd));
}
Ok(Self {
fd: fd as u32,
_not_send: NotThreadSafe,
})
}
/// Returns the file descriptor number that was reserved.
pub fn reserved_fd(&self) -> u32 {
self.fd
}
/// Commits the reservation.
///
/// The previously reserved file descriptor is bound to `file`. This method consumes the
/// [`FileDescriptorReservation`], so it will not be usable after this call.
pub fn fd_install(self, file: ARef<File>) {
// SAFETY: `self.fd` was previously returned by `get_unused_fd_flags`. We have not yet used
// the fd, so it is still valid, and `current` still refers to the same task, as this type
// cannot be moved across task boundaries.
//
// Furthermore, the file pointer is guaranteed to own a refcount by its type invariants,
// and we take ownership of that refcount by not running the destructor below.
// Additionally, the file is known to not have any non-shared `fdget_pos` calls, so even if
// this process starts using the file position, this will not result in a data race on the
// file position.
unsafe { bindings::fd_install(self.fd, file.as_ptr()) };
// `fd_install` consumes both the file descriptor and the file reference, so we cannot run
// the destructors.
core::mem::forget(self);
core::mem::forget(file);
}
}
impl Drop for FileDescriptorReservation {
fn drop(&mut self) {
// SAFETY: By the type invariants of this type, `self.fd` was previously returned by
// `get_unused_fd_flags`. We have not yet used the fd, so it is still valid, and `current`
// still refers to the same task, as this type cannot be moved across task boundaries.
unsafe { bindings::put_unused_fd(self.fd) };
}
}
/// Represents the `EBADF` error code.
///
/// Used for methods that can only fail with `EBADF`.
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct BadFdError;
impl From<BadFdError> for Error {
#[inline]
fn from(_: BadFdError) -> Error {
EBADF
}
}
impl core::fmt::Debug for BadFdError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.pad("EBADF")
}
}