From 9d4531d48b3e5941cbc816000d76d3f5648a3a88 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Thu, 23 Jan 2020 18:48:22 -0600 Subject: [PATCH 1/2] Refactor ~ use PathBuf instead of String --- src/display.rs | 28 +++++--- src/main.rs | 8 ++- src/utils/mod.rs | 153 ++++++++++++++++++++---------------------- src/utils/platform.rs | 5 +- 4 files changed, 98 insertions(+), 96 deletions(-) diff --git a/src/display.rs b/src/display.rs index 3eb725d..43ae0ca 100644 --- a/src/display.rs +++ b/src/display.rs @@ -4,6 +4,8 @@ use self::ansi_term::Colour::Fixed; use self::ansi_term::Style; use crate::utils::Node; +use std::path::Path; + static UNITS: [char; 4] = ['T', 'G', 'M', 'K']; pub struct DisplayData { @@ -103,7 +105,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ let size = node.size; if !display_data.is_reversed { - print_this_node(&*name, size, is_biggest, display_data, indent); + print_this_node(&name, size, is_biggest, display_data, indent); } for c in display_data.get_children_from_node(node) { @@ -115,7 +117,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ } if display_data.is_reversed { - print_this_node(&*name, size, is_biggest, display_data, indent); + print_this_node(&name, size, is_biggest, display_data, indent); } } @@ -136,8 +138,8 @@ fn clean_indentation_string(s: &str) -> String { is } -fn print_this_node( - name: &str, +fn print_this_node>( + name: P, size: u64, is_biggest: bool, display_data: &DisplayData, @@ -150,19 +152,23 @@ fn print_this_node( ) } -pub fn format_string( - dir_name: &str, +pub fn format_string>( + dir_name: P, is_biggest: bool, display_data: &DisplayData, size: &str, indentation: &str, ) -> String { + let dir_name = dir_name.as_ref(); let printable_name = { if display_data.short_paths { - dir_name - .split(std::path::is_separator) - .last() - .unwrap_or(dir_name) + match dir_name.parent() { + Some(prefix) => match dir_name.strip_prefix(prefix) { + Ok(base) => base, + Err(_) => dir_name, + }, + None => dir_name, + } } else { dir_name } @@ -175,7 +181,7 @@ pub fn format_string( Style::new().paint(size) }, indentation, - printable_name, + printable_name.display(), ) } diff --git a/src/main.rs b/src/main.rs index 5ea2481..bb25603 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ extern crate clap; use self::display::draw_it; use crate::utils::is_a_parent_of; use clap::{App, AppSettings, Arg}; +use std::path::PathBuf; use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, trim_deep_ones, Node}; mod display; @@ -137,7 +138,10 @@ fn main() { let use_apparent_size = options.is_present("display_apparent_size"); let limit_filesystem = options.is_present("limit_filesystem"); - let ignore_directories = options.values_of("ignore_directory").map(|r| r.collect()); + let ignore_directories = match options.values_of("ignore_directory") { + Some(i) => Some(i.map(PathBuf::from).collect()), + None => None, + }; let simplified_dirs = simplify_dir_names(target_dirs); let (permissions, nodes) = get_dir_tree( @@ -165,7 +169,7 @@ fn main() { ); } -fn build_tree(biggest_ones: Vec<(String, u64)>, depth: Option) -> Node { +fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option) -> Node { let mut top_parent = Node::default(); // assume sorted order diff --git a/src/utils/mod.rs b/src/utils/mod.rs index e32db52..a73325e 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -2,6 +2,7 @@ use jwalk::DirEntry; use std::cmp::Ordering; use std::collections::HashMap; use std::collections::HashSet; +use std::path::{Path, PathBuf}; use jwalk::WalkDir; @@ -10,7 +11,7 @@ use self::platform::*; #[derive(Debug, Default, Eq)] pub struct Node { - pub name: String, + pub name: PathBuf, pub size: u64, pub children: Vec, } @@ -37,15 +38,15 @@ impl PartialEq for Node { } } -pub fn is_a_parent_of(parent: &str, child: &str) -> bool { - let path_parent = std::path::Path::new(parent); - let path_child = std::path::Path::new(child); - (path_child.starts_with(path_parent) && !path_parent.starts_with(path_child)) +pub fn is_a_parent_of>(parent: P, child: P) -> bool { + let parent = parent.as_ref(); + let child = child.as_ref(); + (child.starts_with(parent) && !parent.starts_with(child)) } -pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { - let mut top_level_names: HashSet = HashSet::with_capacity(filenames.len()); - let mut to_remove: Vec = Vec::with_capacity(filenames.len()); +pub fn simplify_dir_names>(filenames: Vec

) -> HashSet { + let mut top_level_names: HashSet = HashSet::with_capacity(filenames.len()); + let mut to_remove: Vec = Vec::with_capacity(filenames.len()); for t in filenames { let top_level_name = normalize_path(t); @@ -53,7 +54,7 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { for tt in top_level_names.iter() { if is_a_parent_of(&top_level_name, tt) { - to_remove.push(tt.to_string()); + to_remove.push(tt.to_path_buf()); } else if is_a_parent_of(tt, &top_level_name) { can_add = false; } @@ -62,7 +63,7 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { top_level_names.retain(|tr| to_remove.binary_search(tr).is_err()); to_remove.clear(); if can_add { - top_level_names.insert(normalize_path(t).to_owned()); + top_level_names.insert(top_level_name); } } @@ -70,14 +71,14 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet { } pub fn get_dir_tree( - top_level_names: &HashSet, - ignore_directories: Option>, + top_level_names: &HashSet, + ignore_directories: Option>, apparent_size: bool, limit_filesystem: bool, threads: Option, -) -> (bool, HashMap) { +) -> (bool, HashMap) { let mut permissions = 0; - let mut data: HashMap = HashMap::new(); + let mut data: HashMap = HashMap::new(); let restricted_filesystems = if limit_filesystem { get_allowed_filesystems(top_level_names) } else { @@ -86,7 +87,7 @@ pub fn get_dir_tree( for b in top_level_names.iter() { examine_dir( - &b, + b, apparent_size, &restricted_filesystems, &ignore_directories, @@ -98,7 +99,7 @@ pub fn get_dir_tree( (permissions == 0, data) } -fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { +fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { let mut limit_filesystems: HashSet = HashSet::new(); for file_name in top_level_names.iter() { if let Ok(a) = get_filesystem(file_name) { @@ -108,26 +109,22 @@ fn get_allowed_filesystems(top_level_names: &HashSet) -> Option>(path: P) -> std::string::String { +pub fn normalize_path>(path: P) -> PathBuf { // normalize path ... // 1. removing repeated separators // 2. removing interior '.' ("current directory") path segments // 3. removing trailing extra separators and '.' ("current directory") path segments // * `Path.components()` does all the above work; ref: // 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf) - path.as_ref() - .components() - .collect::() - .to_string_lossy() - .to_string() + path.as_ref().components().collect::() } fn examine_dir( - top_dir: &str, + top_dir: &PathBuf, apparent_size: bool, filesystems: &Option>, - ignore_directories: &Option>, - data: &mut HashMap, + ignore_directories: &Option>, + data: &mut HashMap, file_count_no_permission: &mut u64, threads: Option, ) { @@ -141,9 +138,15 @@ fn examine_dir( 'entry: for entry in iter { if let Ok(e) = entry { let maybe_size_and_inode = get_metadata(&e, apparent_size); - if let Some(d) = ignore_directories { - for s in d { - if e.path().to_string_lossy().contains(*s) { + if let Some(dirs) = ignore_directories { + let path = e.path(); + let parts = path.components().collect::>(); + for d in dirs { + let seq = d.components().collect::>(); + if parts + .windows(seq.len()) + .any(|window| window.iter().collect::() == *d) + { continue 'entry; } } @@ -152,7 +155,7 @@ fn examine_dir( match maybe_size_and_inode { Some((size, maybe_inode)) => { if !should_ignore_file(apparent_size, filesystems, &mut inodes, maybe_inode) { - process_file_with_size_and_inode(top_dir, data, e, size) + process_file_with_size_and_inode(&top_dir, data, e, size) } } None => *file_count_no_permission += 1, @@ -191,28 +194,27 @@ fn should_ignore_file( } fn process_file_with_size_and_inode( - top_dir: &str, - data: &mut HashMap, + top_dir: &PathBuf, + data: &mut HashMap, e: DirEntry, size: u64, ) { // This path and all its parent paths have their counter incremented - for path_name in e.path().ancestors() { + for path in e.path().ancestors() { // This is required due to bug in Jwalk that adds '/' to all sub dir lists // see: https://github.com/jessegrosjean/jwalk/issues/13 - if path_name.to_string_lossy() == "/" && top_dir != "/" { + if path.to_string_lossy() == "/" && top_dir.to_string_lossy() != "/" { continue; } - let path_name = path_name.to_string_lossy(); - let s = data.entry(path_name.to_string()).or_insert(0); + let s = data.entry(normalize_path(path)).or_insert(0); *s += size; - if path_name == top_dir { + if path.starts_with(top_dir) && top_dir.starts_with(path) { break; } } } -pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering { +pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering { let result = b.1.cmp(&a.1); if result == Ordering::Equal { a.0.cmp(&b.0) @@ -221,13 +223,13 @@ pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> O } } -pub fn sort(data: HashMap) -> Vec<(String, u64)> { - let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect(); +pub fn sort(data: HashMap) -> Vec<(PathBuf, u64)> { + let mut new_l: Vec<(PathBuf, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect(); new_l.sort_unstable_by(sort_by_size_first_name_second); new_l } -pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(String, u64)> { +pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(PathBuf, u64)> { if max_to_show > 0 && new_l.len() > max_to_show { new_l[0..max_to_show].to_vec() } else { @@ -236,18 +238,31 @@ pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(Stri } pub fn trim_deep_ones( - input: Vec<(String, u64)>, + input: Vec<(PathBuf, u64)>, max_depth: u64, - top_level_names: &HashSet, -) -> Vec<(String, u64)> { - let mut result: Vec<(String, u64)> = Vec::with_capacity(input.len() * top_level_names.len()); + top_level_names: &HashSet, +) -> Vec<(PathBuf, u64)> { + let mut result: Vec<(PathBuf, u64)> = Vec::with_capacity(input.len() * top_level_names.len()); for name in top_level_names { - let my_max_depth = name.matches(std::path::is_separator).count() + max_depth as usize; - let name_ref: &str = name.as_ref(); + let my_max_depth = name + .components() + .filter(|&c| match c { + std::path::Component::Prefix(_) => false, + _ => true, + }) + .count() + + max_depth as usize; for &(ref k, ref v) in input.iter() { - if k.starts_with(name_ref) && k.matches(std::path::is_separator).count() <= my_max_depth + if k.starts_with(name) + && k.components() + .filter(|&c| match c { + std::path::Component::Prefix(_) => false, + _ => true, + }) + .count() + <= my_max_depth { result.push((k.clone(), *v)); } @@ -263,34 +278,22 @@ mod tests { #[test] fn test_simplify_dir() { let mut correct = HashSet::new(); - correct.insert("a".to_string()); + correct.insert(PathBuf::from("a")); assert_eq!(simplify_dir_names(vec!["a"]), correct); } #[test] fn test_simplify_dir_rm_subdir() { let mut correct = HashSet::new(); - correct.insert( - ["a", "b"] - .iter() - .collect::() - .to_string_lossy() - .to_string(), - ); + correct.insert(["a", "b"].iter().collect::()); assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct); } #[test] fn test_simplify_dir_duplicates() { let mut correct = HashSet::new(); - correct.insert( - ["a", "b"] - .iter() - .collect::() - .to_string_lossy() - .to_string(), - ); - correct.insert("c".to_string()); + correct.insert(["a", "b"].iter().collect::()); + correct.insert(PathBuf::from("c")); assert_eq!( simplify_dir_names(vec![ "a/b", @@ -308,36 +311,24 @@ mod tests { #[test] fn test_simplify_dir_rm_subdir_and_not_substrings() { let mut correct = HashSet::new(); - correct.insert("b".to_string()); - correct.insert( - ["c", "a", "b"] - .iter() - .collect::() - .to_string_lossy() - .to_string(), - ); - correct.insert( - ["a", "b"] - .iter() - .collect::() - .to_string_lossy() - .to_string(), - ); + correct.insert(PathBuf::from("b")); + correct.insert(["c", "a", "b"].iter().collect::()); + correct.insert(["a", "b"].iter().collect::()); assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct); } #[test] fn test_simplify_dir_dots() { let mut correct = HashSet::new(); - correct.insert("src".to_string()); + correct.insert(PathBuf::from("src")); assert_eq!(simplify_dir_names(vec!["src/."]), correct); } #[test] fn test_simplify_dir_substring_names() { let mut correct = HashSet::new(); - correct.insert("src".to_string()); - correct.insert("src_v2".to_string()); + correct.insert(PathBuf::from("src")); + correct.insert(PathBuf::from("src_v2")); assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct); } diff --git a/src/utils/platform.rs b/src/utils/platform.rs index 259f388..b35687e 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -2,6 +2,7 @@ use jwalk::DirEntry; #[allow(unused_imports)] use std::fs; use std::io; +use std::path::PathBuf; #[cfg(target_family = "unix")] fn get_block_size() -> u64 { @@ -48,14 +49,14 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, } #[cfg(target_family = "unix")] -pub fn get_filesystem(file_path: &str) -> Result { +pub fn get_filesystem(file_path: &PathBuf) -> Result { use std::os::unix::fs::MetadataExt; let metadata = fs::metadata(file_path)?; Ok(metadata.dev()) } #[cfg(target_family = "windows")] -pub fn get_filesystem(file_path: &str) -> Result { +pub fn get_filesystem(file_path: &PathBuf) -> Result { use winapi_util::file::information; use winapi_util::Handle; From a3d8fc00e1946388c3e77ad7d26c0872c358bd53 Mon Sep 17 00:00:00 2001 From: Roy Ivy III Date: Wed, 29 Jan 2020 22:25:35 -0600 Subject: [PATCH 2/2] Refactor ~ use `AsRef` where possible --- src/main.rs | 2 +- src/utils/mod.rs | 24 +++++++++++++----------- src/utils/platform.rs | 6 +++--- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/main.rs b/src/main.rs index bb25603..aec5656 100644 --- a/src/main.rs +++ b/src/main.rs @@ -146,7 +146,7 @@ fn main() { let simplified_dirs = simplify_dir_names(target_dirs); let (permissions, nodes) = get_dir_tree( &simplified_dirs, - ignore_directories, + &ignore_directories, use_apparent_size, limit_filesystem, threads, diff --git a/src/utils/mod.rs b/src/utils/mod.rs index a73325e..5586e29 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -70,9 +70,9 @@ pub fn simplify_dir_names>(filenames: Vec

) -> HashSet top_level_names } -pub fn get_dir_tree( - top_level_names: &HashSet, - ignore_directories: Option>, +pub fn get_dir_tree>( + top_level_names: &HashSet

, + ignore_directories: &Option>, apparent_size: bool, limit_filesystem: bool, threads: Option, @@ -90,7 +90,7 @@ pub fn get_dir_tree( b, apparent_size, &restricted_filesystems, - &ignore_directories, + ignore_directories, &mut data, &mut permissions, threads, @@ -99,7 +99,7 @@ pub fn get_dir_tree( (permissions == 0, data) } -fn get_allowed_filesystems(top_level_names: &HashSet) -> Option> { +fn get_allowed_filesystems>(top_level_names: &HashSet

) -> Option> { let mut limit_filesystems: HashSet = HashSet::new(); for file_name in top_level_names.iter() { if let Ok(a) = get_filesystem(file_name) { @@ -109,7 +109,7 @@ fn get_allowed_filesystems(top_level_names: &HashSet) -> Option>(path: P) -> PathBuf { +pub fn normalize_path>(path: P) -> PathBuf { // normalize path ... // 1. removing repeated separators // 2. removing interior '.' ("current directory") path segments @@ -119,8 +119,8 @@ pub fn normalize_path>(path: P) -> PathBuf { path.as_ref().components().collect::() } -fn examine_dir( - top_dir: &PathBuf, +fn examine_dir>( + top_dir: P, apparent_size: bool, filesystems: &Option>, ignore_directories: &Option>, @@ -128,6 +128,7 @@ fn examine_dir( file_count_no_permission: &mut u64, threads: Option, ) { + let top_dir = top_dir.as_ref(); let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut iter = WalkDir::new(top_dir) .preload_metadata(true) @@ -155,7 +156,7 @@ fn examine_dir( match maybe_size_and_inode { Some((size, maybe_inode)) => { if !should_ignore_file(apparent_size, filesystems, &mut inodes, maybe_inode) { - process_file_with_size_and_inode(&top_dir, data, e, size) + process_file_with_size_and_inode(top_dir, data, e, size) } } None => *file_count_no_permission += 1, @@ -193,12 +194,13 @@ fn should_ignore_file( false } -fn process_file_with_size_and_inode( - top_dir: &PathBuf, +fn process_file_with_size_and_inode>( + top_dir: P, data: &mut HashMap, e: DirEntry, size: u64, ) { + let top_dir = top_dir.as_ref(); // This path and all its parent paths have their counter incremented for path in e.path().ancestors() { // This is required due to bug in Jwalk that adds '/' to all sub dir lists diff --git a/src/utils/platform.rs b/src/utils/platform.rs index b35687e..d2b1720 100644 --- a/src/utils/platform.rs +++ b/src/utils/platform.rs @@ -2,7 +2,7 @@ use jwalk::DirEntry; #[allow(unused_imports)] use std::fs; use std::io; -use std::path::PathBuf; +use std::path::Path; #[cfg(target_family = "unix")] fn get_block_size() -> u64 { @@ -49,14 +49,14 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, } #[cfg(target_family = "unix")] -pub fn get_filesystem(file_path: &PathBuf) -> Result { +pub fn get_filesystem>(file_path: P) -> Result { use std::os::unix::fs::MetadataExt; let metadata = fs::metadata(file_path)?; Ok(metadata.dev()) } #[cfg(target_family = "windows")] -pub fn get_filesystem(file_path: &PathBuf) -> Result { +pub fn get_filesystem>(file_path: P) -> Result { use winapi_util::file::information; use winapi_util::Handle;