Merge pull request #51 from bootandy/ab-exclude

Support excluding filesystems with -x
This commit is contained in:
andy boot
2020-01-18 21:54:41 +00:00
committed by GitHub
3 changed files with 116 additions and 24 deletions

View File

@@ -54,6 +54,12 @@ fn main() {
.long("full-paths")
.help("If set sub directories will not have their path shortened"),
)
.arg(
Arg::with_name("limit_filesystem")
.short("x")
.long("limit-filesystem")
.help("Only count the files and directories in the same filesystem as the supplied directory"),
)
.arg(
Arg::with_name("display_apparent_size")
.short("s")
@@ -110,9 +116,15 @@ fn main() {
}
let use_apparent_size = options.is_present("display_apparent_size");
let limit_filesystem = options.is_present("limit_filesystem");
let simplified_dirs = simplify_dir_names(target_dirs);
let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads);
let (permissions, nodes) = get_dir_tree(
&simplified_dirs,
use_apparent_size,
limit_filesystem,
threads,
);
let sorted_data = sort(nodes);
let biggest_ones = {
match depth {

View File

@@ -1,3 +1,4 @@
use jwalk::DirEntry;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::collections::HashSet;
@@ -37,7 +38,8 @@ impl PartialEq for Node {
}
pub fn is_a_parent_of(parent: &str, child: &str) -> bool {
(child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/')) || parent == "/"
(child.starts_with(parent) && child.chars().nth(parent.chars().count()) == Some('/'))
|| parent == "/"
}
pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
@@ -69,16 +71,23 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
pub fn get_dir_tree(
top_level_names: &HashSet<String>,
apparent_size: bool,
limit_filesystem: bool,
threads: Option<usize>,
) -> (bool, HashMap<String, u64>) {
let mut permissions = 0;
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
let mut data: HashMap<String, u64> = HashMap::new();
let restricted_filesystems = if limit_filesystem {
get_allowed_filesystems(top_level_names)
} else {
None
};
for b in top_level_names.iter() {
examine_dir(
&b,
apparent_size,
&restricted_filesystems,
&mut inodes,
&mut data,
&mut permissions,
@@ -88,6 +97,16 @@ pub fn get_dir_tree(
(permissions == 0, data)
}
fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<u64>> {
let mut limit_filesystems: HashSet<u64> = HashSet::new();
for file_name in top_level_names.iter() {
if let Ok(a) = get_filesystem(file_name) {
limit_filesystems.insert(a);
}
}
Some(limit_filesystems)
}
pub fn strip_end_slash(mut new_name: &str) -> &str {
while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 {
new_name = &new_name[..new_name.len() - 1];
@@ -98,6 +117,7 @@ pub fn strip_end_slash(mut new_name: &str) -> &str {
fn examine_dir(
top_dir: &str,
apparent_size: bool,
filesystems: &Option<HashSet<u64>>,
inodes: &mut HashSet<(u64, u64)>,
data: &mut HashMap<String, u64>,
file_count_no_permission: &mut u64,
@@ -115,27 +135,8 @@ fn examine_dir(
match maybe_size_and_inode {
Some((size, maybe_inode)) => {
if !apparent_size {
if let Some(inode_dev_pair) = maybe_inode {
if inodes.contains(&inode_dev_pair) {
continue;
}
inodes.insert(inode_dev_pair);
}
}
// This path and all its parent paths have their counter incremented
for path_name in e.path().ancestors() {
// This is required due to bug in Jwalk that adds '/' to all sub dir lists
// see: https://github.com/jessegrosjean/jwalk/issues/13
if path_name.to_string_lossy() == "/" && top_dir != "/" {
continue
}
let path_name = path_name.to_string_lossy();
let s = data.entry(path_name.to_string()).or_insert(0);
*s += size;
if path_name == top_dir {
break;
}
if !should_ignore_file(apparent_size, filesystems, inodes, maybe_inode) {
process_file_with_size_and_inode(top_dir, data, e, size)
}
}
None => *file_count_no_permission += 1,
@@ -146,6 +147,55 @@ fn examine_dir(
}
}
fn should_ignore_file(
apparent_size: bool,
restricted_filesystems: &Option<HashSet<u64>>,
inodes: &mut HashSet<(u64, u64)>,
maybe_inode: Option<(u64, u64)>,
) -> bool {
if !apparent_size {
if let Some(inode_dev_pair) = maybe_inode {
// Ignore files on different devices (if flag applied)
if restricted_filesystems.is_some()
&& !restricted_filesystems
.as_ref()
.unwrap()
.contains(&inode_dev_pair.1)
{
return true;
}
// Ignore files already visited or symlinked
if inodes.contains(&inode_dev_pair) {
return true;
}
inodes.insert(inode_dev_pair);
}
}
false
}
fn process_file_with_size_and_inode(
top_dir: &str,
data: &mut HashMap<String, u64>,
e: DirEntry,
size: u64,
) {
// This path and all its parent paths have their counter incremented
for path_name in e.path().ancestors() {
// This is required due to bug in Jwalk that adds '/' to all sub dir lists
// see: https://github.com/jessegrosjean/jwalk/issues/13
if path_name.to_string_lossy() == "/" && top_dir != "/" {
continue;
}
let path_name = path_name.to_string_lossy();
let s = data.entry(path_name.to_string()).or_insert(0);
*s += size;
if path_name == top_dir {
break;
}
}
}
pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering {
let result = b.1.cmp(&a.1);
if result == Ordering::Equal {

View File

@@ -1,4 +1,6 @@
use jwalk::DirEntry;
use std::fs;
use std::io;
#[cfg(target_family = "unix")]
fn get_block_size() -> u64 {
@@ -20,7 +22,16 @@ pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Optio
})
}
#[cfg(not(target_family = "unix"))]
#[cfg(target_family = "windows")]
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
use std::os::windows::fs::MetadataExt;
d.metadata.as_ref().unwrap().as_ref().ok().map(|md| {
let windows_equivalent_of_inode = Some((md.file_index(), md.volume_serial_number()));
(md.file_size(), windows_equivalent_of_inode)
})
}
#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))]
pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
d.metadata
.as_ref()
@@ -29,3 +40,22 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64,
.ok()
.map(|md| (md.len(), None))
}
#[cfg(target_family = "unix")]
pub fn get_filesystem(file_path: &str) -> Result<u64, io::Error> {
use std::os::unix::fs::MetadataExt;
let metadata = fs::metadata(file_path)?;
Ok(metadata.dev())
}
#[cfg(target_family = "windows")]
pub fn get_device(file_path: &str) -> Result<u64, io::Error> {
use std::os::windows::fs::MetadataExt;
let metadata = fs::metadata(file_path)?;
Ok(metadata.volume_serial_number())
}
#[cfg(all(not(target_family = "windows"), not(target_family = "unix")))]
pub fn get_device(file_path: &str) -> Result<u64, io::Error> {
None
}