mirror of
https://github.com/bootandy/dust.git
synced 2025-12-12 15:49:58 -08:00
Previously, the function get_metadata in platform.rs used `fs::metadata` which follows symbolic links and returns metadata for the target file. This caused issues #421: du / dust disagreement when trying to determine properties of the symbolic link itself
201 lines
7.6 KiB
Rust
201 lines
7.6 KiB
Rust
#[allow(unused_imports)]
|
|
use std::fs;
|
|
|
|
use std::path::Path;
|
|
|
|
#[cfg(target_family = "unix")]
|
|
fn get_block_size() -> u64 {
|
|
// All os specific implementations of MetadataExt seem to define a block as 512 bytes
|
|
// https://doc.rust-lang.org/std/os/linux/fs/trait.MetadataExt.html#tymethod.st_blocks
|
|
512
|
|
}
|
|
|
|
type InodeAndDevice = (u64, u64);
|
|
type FileTime = (i64, i64, i64);
|
|
|
|
#[cfg(target_family = "unix")]
|
|
pub fn get_metadata<P: AsRef<Path>>(
|
|
path: P,
|
|
use_apparent_size: bool,
|
|
follow_links: bool,
|
|
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
|
use std::os::unix::fs::MetadataExt;
|
|
let metadata = if follow_links {
|
|
path.as_ref().metadata()
|
|
} else {
|
|
path.as_ref().symlink_metadata()
|
|
};
|
|
match metadata {
|
|
Ok(md) => {
|
|
if use_apparent_size {
|
|
Some((
|
|
md.len(),
|
|
Some((md.ino(), md.dev())),
|
|
(md.mtime(), md.atime(), md.ctime()),
|
|
))
|
|
} else {
|
|
Some((
|
|
md.blocks() * get_block_size(),
|
|
Some((md.ino(), md.dev())),
|
|
(md.mtime(), md.atime(), md.ctime()),
|
|
))
|
|
}
|
|
}
|
|
Err(_e) => None,
|
|
}
|
|
}
|
|
|
|
#[cfg(target_family = "windows")]
|
|
pub fn get_metadata<P: AsRef<Path>>(
|
|
path: P,
|
|
use_apparent_size: bool,
|
|
follow_links: bool,
|
|
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
|
// On windows opening the file to get size, file ID and volume can be very
|
|
// expensive because 1) it causes a few system calls, and more importantly 2) it can cause
|
|
// windows defender to scan the file.
|
|
// Therefore we try to avoid doing that for common cases, mainly those of
|
|
// plain files:
|
|
|
|
// The idea is to make do with the file size that we get from the OS for
|
|
// free as part of iterating a folder. Therefore we want to make sure that
|
|
// it makes sense to use that free size information:
|
|
|
|
// Volume boundaries:
|
|
// The user can ask us not to cross volume boundaries. If the DirEntry is a
|
|
// plain file and not a reparse point or other non-trivial stuff, we assume
|
|
// that the file is located on the same volume as the directory that
|
|
// contains it.
|
|
|
|
// File ID:
|
|
// This optimization does deprive us of access to a file ID. As a
|
|
// workaround, we just make one up that hopefully does not collide with real
|
|
// file IDs.
|
|
// Hard links: Unresolved. We don't get inode/file index, so hard links
|
|
// count once for each link. Hopefully they are not too commonly in use on
|
|
// windows.
|
|
|
|
// Size:
|
|
// We assume (naively?) that for the common cases the free size info is the
|
|
// same as one would get by doing the expensive thing. Sparse, encrypted and
|
|
// compressed files are not included in the common cases, as one can image
|
|
// there being more than view on their size.
|
|
|
|
// Savings in orders of magnitude in terms of time, io and cpu have been
|
|
// observed on hdd, windows 10, some 100Ks files taking up some hundreds of
|
|
// GBs:
|
|
// Consistently opening the file: 30 minutes.
|
|
// With this optimization: 8 sec.
|
|
|
|
use std::io;
|
|
use winapi_util::Handle;
|
|
fn handle_from_path_limited(path: &Path) -> io::Result<Handle> {
|
|
use std::fs::OpenOptions;
|
|
use std::os::windows::fs::OpenOptionsExt;
|
|
const FILE_READ_ATTRIBUTES: u32 = 0x0080;
|
|
|
|
// So, it seems that it does does have to be that expensive to open
|
|
// files to get their info: Avoiding opening the file with the full
|
|
// GENERIC_READ is key:
|
|
|
|
// https://docs.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights:
|
|
// "For example, a Windows file object maps the GENERIC_READ bit to the
|
|
// READ_CONTROL and SYNCHRONIZE standard access rights and to the
|
|
// FILE_READ_DATA, FILE_READ_EA, and FILE_READ_ATTRIBUTES
|
|
// object-specific access rights"
|
|
|
|
// The flag FILE_READ_DATA seems to be the expensive one, so we'll avoid
|
|
// that, and a most of the other ones. Simply because it seems that we
|
|
// don't need them.
|
|
|
|
let file = OpenOptions::new()
|
|
.access_mode(FILE_READ_ATTRIBUTES)
|
|
.open(path)?;
|
|
Ok(Handle::from_file(file))
|
|
}
|
|
|
|
fn get_metadata_expensive(
|
|
path: &Path,
|
|
use_apparent_size: bool,
|
|
) -> Option<(u64, Option<InodeAndDevice>, FileTime)> {
|
|
use winapi_util::file::information;
|
|
|
|
let h = handle_from_path_limited(path).ok()?;
|
|
let info = information(&h).ok()?;
|
|
|
|
if use_apparent_size {
|
|
use filesize::PathExt;
|
|
Some((
|
|
path.size_on_disk().ok()?,
|
|
Some((info.file_index(), info.volume_serial_number())),
|
|
(
|
|
info.last_write_time().unwrap() as i64,
|
|
info.last_access_time().unwrap() as i64,
|
|
info.creation_time().unwrap() as i64,
|
|
),
|
|
))
|
|
} else {
|
|
Some((
|
|
info.file_size(),
|
|
Some((info.file_index(), info.volume_serial_number())),
|
|
(
|
|
info.last_write_time().unwrap() as i64,
|
|
info.last_access_time().unwrap() as i64,
|
|
info.creation_time().unwrap() as i64,
|
|
),
|
|
))
|
|
}
|
|
}
|
|
|
|
use std::os::windows::fs::MetadataExt;
|
|
let path = path.as_ref();
|
|
let metadata = if follow_links {
|
|
path.metadata()
|
|
} else {
|
|
path.symlink_metadata()
|
|
};
|
|
match metadata {
|
|
Ok(ref md) => {
|
|
const FILE_ATTRIBUTE_ARCHIVE: u32 = 0x20;
|
|
const FILE_ATTRIBUTE_READONLY: u32 = 0x01;
|
|
const FILE_ATTRIBUTE_HIDDEN: u32 = 0x02;
|
|
const FILE_ATTRIBUTE_SYSTEM: u32 = 0x04;
|
|
const FILE_ATTRIBUTE_NORMAL: u32 = 0x80;
|
|
const FILE_ATTRIBUTE_DIRECTORY: u32 = 0x10;
|
|
const FILE_ATTRIBUTE_SPARSE_FILE: u32 = 0x00000200;
|
|
const FILE_ATTRIBUTE_PINNED: u32 = 0x00080000;
|
|
const FILE_ATTRIBUTE_UNPINNED: u32 = 0x00100000;
|
|
const FILE_ATTRIBUTE_RECALL_ON_OPEN: u32 = 0x00040000;
|
|
const FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS: u32 = 0x00400000;
|
|
const FILE_ATTRIBUTE_OFFLINE: u32 = 0x00001000;
|
|
// normally FILE_ATTRIBUTE_SPARSE_FILE would be enough, however Windows sometimes likes to mask it out. see: https://stackoverflow.com/q/54560454
|
|
const IS_PROBABLY_ONEDRIVE: u32 = FILE_ATTRIBUTE_SPARSE_FILE
|
|
| FILE_ATTRIBUTE_PINNED
|
|
| FILE_ATTRIBUTE_UNPINNED
|
|
| FILE_ATTRIBUTE_RECALL_ON_OPEN
|
|
| FILE_ATTRIBUTE_RECALL_ON_DATA_ACCESS
|
|
| FILE_ATTRIBUTE_OFFLINE;
|
|
let attr_filtered = md.file_attributes()
|
|
& !(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_READONLY | FILE_ATTRIBUTE_SYSTEM);
|
|
if ((attr_filtered & FILE_ATTRIBUTE_ARCHIVE) != 0
|
|
|| (attr_filtered & FILE_ATTRIBUTE_DIRECTORY) != 0
|
|
|| md.file_attributes() == FILE_ATTRIBUTE_NORMAL)
|
|
&& !((attr_filtered & IS_PROBABLY_ONEDRIVE != 0) && use_apparent_size)
|
|
{
|
|
Some((
|
|
md.len(),
|
|
None,
|
|
(
|
|
md.last_write_time() as i64,
|
|
md.last_access_time() as i64,
|
|
md.creation_time() as i64,
|
|
),
|
|
))
|
|
} else {
|
|
get_metadata_expensive(path, use_apparent_size)
|
|
}
|
|
}
|
|
_ => get_metadata_expensive(path, use_apparent_size),
|
|
}
|
|
}
|