Merge pull request #64 from rivy/rf.path

Refactor ~ use Path/PathBuf instead of &str/String
This commit is contained in:
andy.boot
2020-02-09 13:52:11 +00:00
committed by GitHub
4 changed files with 104 additions and 100 deletions

View File

@@ -4,6 +4,8 @@ use self::ansi_term::Colour::Fixed;
use self::ansi_term::Style; use self::ansi_term::Style;
use crate::utils::Node; use crate::utils::Node;
use std::path::Path;
static UNITS: [char; 4] = ['T', 'G', 'M', 'K']; static UNITS: [char; 4] = ['T', 'G', 'M', 'K'];
pub struct DisplayData { pub struct DisplayData {
@@ -103,7 +105,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ
let size = node.size; let size = node.size;
if !display_data.is_reversed { if !display_data.is_reversed {
print_this_node(&*name, size, is_biggest, display_data, indent); print_this_node(&name, size, is_biggest, display_data, indent);
} }
for c in display_data.get_children_from_node(node) { for c in display_data.get_children_from_node(node) {
@@ -115,7 +117,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ
} }
if display_data.is_reversed { if display_data.is_reversed {
print_this_node(&*name, size, is_biggest, display_data, indent); print_this_node(&name, size, is_biggest, display_data, indent);
} }
} }
@@ -136,8 +138,8 @@ fn clean_indentation_string(s: &str) -> String {
is is
} }
fn print_this_node( fn print_this_node<P: AsRef<Path>>(
name: &str, name: P,
size: u64, size: u64,
is_biggest: bool, is_biggest: bool,
display_data: &DisplayData, display_data: &DisplayData,
@@ -150,19 +152,23 @@ fn print_this_node(
) )
} }
pub fn format_string( pub fn format_string<P: AsRef<Path>>(
dir_name: &str, dir_name: P,
is_biggest: bool, is_biggest: bool,
display_data: &DisplayData, display_data: &DisplayData,
size: &str, size: &str,
indentation: &str, indentation: &str,
) -> String { ) -> String {
let dir_name = dir_name.as_ref();
let printable_name = { let printable_name = {
if display_data.short_paths { if display_data.short_paths {
dir_name match dir_name.parent() {
.split(std::path::is_separator) Some(prefix) => match dir_name.strip_prefix(prefix) {
.last() Ok(base) => base,
.unwrap_or(dir_name) Err(_) => dir_name,
},
None => dir_name,
}
} else { } else {
dir_name dir_name
} }
@@ -175,7 +181,7 @@ pub fn format_string(
Style::new().paint(size) Style::new().paint(size)
}, },
indentation, indentation,
printable_name, printable_name.display(),
) )
} }

View File

@@ -4,6 +4,7 @@ extern crate clap;
use self::display::draw_it; use self::display::draw_it;
use crate::utils::is_a_parent_of; use crate::utils::is_a_parent_of;
use clap::{App, AppSettings, Arg}; use clap::{App, AppSettings, Arg};
use std::path::PathBuf;
use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, trim_deep_ones, Node}; use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, trim_deep_ones, Node};
mod display; mod display;
@@ -137,12 +138,15 @@ fn main() {
let use_apparent_size = options.is_present("display_apparent_size"); let use_apparent_size = options.is_present("display_apparent_size");
let limit_filesystem = options.is_present("limit_filesystem"); let limit_filesystem = options.is_present("limit_filesystem");
let ignore_directories = options.values_of("ignore_directory").map(|r| r.collect()); let ignore_directories = match options.values_of("ignore_directory") {
Some(i) => Some(i.map(PathBuf::from).collect()),
None => None,
};
let simplified_dirs = simplify_dir_names(target_dirs); let simplified_dirs = simplify_dir_names(target_dirs);
let (permissions, nodes) = get_dir_tree( let (permissions, nodes) = get_dir_tree(
&simplified_dirs, &simplified_dirs,
ignore_directories, &ignore_directories,
use_apparent_size, use_apparent_size,
limit_filesystem, limit_filesystem,
threads, threads,
@@ -165,7 +169,7 @@ fn main() {
); );
} }
fn build_tree(biggest_ones: Vec<(String, u64)>, depth: Option<u64>) -> Node { fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<u64>) -> Node {
let mut top_parent = Node::default(); let mut top_parent = Node::default();
// assume sorted order // assume sorted order

View File

@@ -2,6 +2,7 @@ use jwalk::DirEntry;
use std::cmp::Ordering; use std::cmp::Ordering;
use std::collections::HashMap; use std::collections::HashMap;
use std::collections::HashSet; use std::collections::HashSet;
use std::path::{Path, PathBuf};
use jwalk::WalkDir; use jwalk::WalkDir;
@@ -10,7 +11,7 @@ use self::platform::*;
#[derive(Debug, Default, Eq)] #[derive(Debug, Default, Eq)]
pub struct Node { pub struct Node {
pub name: String, pub name: PathBuf,
pub size: u64, pub size: u64,
pub children: Vec<Node>, pub children: Vec<Node>,
} }
@@ -37,15 +38,15 @@ impl PartialEq for Node {
} }
} }
pub fn is_a_parent_of(parent: &str, child: &str) -> bool { pub fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
let path_parent = std::path::Path::new(parent); let parent = parent.as_ref();
let path_child = std::path::Path::new(child); let child = child.as_ref();
(path_child.starts_with(path_parent) && !path_parent.starts_with(path_child)) (child.starts_with(parent) && !parent.starts_with(child))
} }
pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> { pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
let mut top_level_names: HashSet<String> = HashSet::with_capacity(filenames.len()); let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
let mut to_remove: Vec<String> = Vec::with_capacity(filenames.len()); let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
for t in filenames { for t in filenames {
let top_level_name = normalize_path(t); let top_level_name = normalize_path(t);
@@ -53,7 +54,7 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
for tt in top_level_names.iter() { for tt in top_level_names.iter() {
if is_a_parent_of(&top_level_name, tt) { if is_a_parent_of(&top_level_name, tt) {
to_remove.push(tt.to_string()); to_remove.push(tt.to_path_buf());
} else if is_a_parent_of(tt, &top_level_name) { } else if is_a_parent_of(tt, &top_level_name) {
can_add = false; can_add = false;
} }
@@ -62,22 +63,22 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err()); top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
to_remove.clear(); to_remove.clear();
if can_add { if can_add {
top_level_names.insert(normalize_path(t).to_owned()); top_level_names.insert(top_level_name);
} }
} }
top_level_names top_level_names
} }
pub fn get_dir_tree( pub fn get_dir_tree<P: AsRef<Path>>(
top_level_names: &HashSet<String>, top_level_names: &HashSet<P>,
ignore_directories: Option<Vec<&str>>, ignore_directories: &Option<Vec<PathBuf>>,
apparent_size: bool, apparent_size: bool,
limit_filesystem: bool, limit_filesystem: bool,
threads: Option<usize>, threads: Option<usize>,
) -> (bool, HashMap<String, u64>) { ) -> (bool, HashMap<PathBuf, u64>) {
let mut permissions = 0; let mut permissions = 0;
let mut data: HashMap<String, u64> = HashMap::new(); let mut data: HashMap<PathBuf, u64> = HashMap::new();
let restricted_filesystems = if limit_filesystem { let restricted_filesystems = if limit_filesystem {
get_allowed_filesystems(top_level_names) get_allowed_filesystems(top_level_names)
} else { } else {
@@ -86,10 +87,10 @@ pub fn get_dir_tree(
for b in top_level_names.iter() { for b in top_level_names.iter() {
examine_dir( examine_dir(
&b, b,
apparent_size, apparent_size,
&restricted_filesystems, &restricted_filesystems,
&ignore_directories, ignore_directories,
&mut data, &mut data,
&mut permissions, &mut permissions,
threads, threads,
@@ -98,7 +99,7 @@ pub fn get_dir_tree(
(permissions == 0, data) (permissions == 0, data)
} }
fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<u64>> { fn get_allowed_filesystems<P: AsRef<Path>>(top_level_names: &HashSet<P>) -> Option<HashSet<u64>> {
let mut limit_filesystems: HashSet<u64> = HashSet::new(); let mut limit_filesystems: HashSet<u64> = HashSet::new();
for file_name in top_level_names.iter() { for file_name in top_level_names.iter() {
if let Ok(a) = get_filesystem(file_name) { if let Ok(a) = get_filesystem(file_name) {
@@ -108,29 +109,26 @@ fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<
Some(limit_filesystems) Some(limit_filesystems)
} }
pub fn normalize_path<P: AsRef<std::path::Path>>(path: P) -> std::string::String { pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
// normalize path ... // normalize path ...
// 1. removing repeated separators // 1. removing repeated separators
// 2. removing interior '.' ("current directory") path segments // 2. removing interior '.' ("current directory") path segments
// 3. removing trailing extra separators and '.' ("current directory") path segments // 3. removing trailing extra separators and '.' ("current directory") path segments
// * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components> // * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
// 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf) // 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
path.as_ref() path.as_ref().components().collect::<PathBuf>()
.components()
.collect::<std::path::PathBuf>()
.to_string_lossy()
.to_string()
} }
fn examine_dir( fn examine_dir<P: AsRef<Path>>(
top_dir: &str, top_dir: P,
apparent_size: bool, apparent_size: bool,
filesystems: &Option<HashSet<u64>>, filesystems: &Option<HashSet<u64>>,
ignore_directories: &Option<Vec<&str>>, ignore_directories: &Option<Vec<PathBuf>>,
data: &mut HashMap<String, u64>, data: &mut HashMap<PathBuf, u64>,
file_count_no_permission: &mut u64, file_count_no_permission: &mut u64,
threads: Option<usize>, threads: Option<usize>,
) { ) {
let top_dir = top_dir.as_ref();
let mut inodes: HashSet<(u64, u64)> = HashSet::new(); let mut inodes: HashSet<(u64, u64)> = HashSet::new();
let mut iter = WalkDir::new(top_dir) let mut iter = WalkDir::new(top_dir)
.preload_metadata(true) .preload_metadata(true)
@@ -141,9 +139,15 @@ fn examine_dir(
'entry: for entry in iter { 'entry: for entry in iter {
if let Ok(e) = entry { if let Ok(e) = entry {
let maybe_size_and_inode = get_metadata(&e, apparent_size); let maybe_size_and_inode = get_metadata(&e, apparent_size);
if let Some(d) = ignore_directories { if let Some(dirs) = ignore_directories {
for s in d { let path = e.path();
if e.path().to_string_lossy().contains(*s) { let parts = path.components().collect::<Vec<std::path::Component>>();
for d in dirs {
let seq = d.components().collect::<Vec<std::path::Component>>();
if parts
.windows(seq.len())
.any(|window| window.iter().collect::<PathBuf>() == *d)
{
continue 'entry; continue 'entry;
} }
} }
@@ -190,29 +194,29 @@ fn should_ignore_file(
false false
} }
fn process_file_with_size_and_inode( fn process_file_with_size_and_inode<P: AsRef<Path>>(
top_dir: &str, top_dir: P,
data: &mut HashMap<String, u64>, data: &mut HashMap<PathBuf, u64>,
e: DirEntry, e: DirEntry,
size: u64, size: u64,
) { ) {
let top_dir = top_dir.as_ref();
// This path and all its parent paths have their counter incremented // This path and all its parent paths have their counter incremented
for path_name in e.path().ancestors() { for path in e.path().ancestors() {
// This is required due to bug in Jwalk that adds '/' to all sub dir lists // This is required due to bug in Jwalk that adds '/' to all sub dir lists
// see: https://github.com/jessegrosjean/jwalk/issues/13 // see: https://github.com/jessegrosjean/jwalk/issues/13
if path_name.to_string_lossy() == "/" && top_dir != "/" { if path.to_string_lossy() == "/" && top_dir.to_string_lossy() != "/" {
continue; continue;
} }
let path_name = path_name.to_string_lossy(); let s = data.entry(normalize_path(path)).or_insert(0);
let s = data.entry(path_name.to_string()).or_insert(0);
*s += size; *s += size;
if path_name == top_dir { if path.starts_with(top_dir) && top_dir.starts_with(path) {
break; break;
} }
} }
} }
pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering { pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering {
let result = b.1.cmp(&a.1); let result = b.1.cmp(&a.1);
if result == Ordering::Equal { if result == Ordering::Equal {
a.0.cmp(&b.0) a.0.cmp(&b.0)
@@ -221,13 +225,13 @@ pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> O
} }
} }
pub fn sort(data: HashMap<String, u64>) -> Vec<(String, u64)> { pub fn sort(data: HashMap<PathBuf, u64>) -> Vec<(PathBuf, u64)> {
let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect(); let mut new_l: Vec<(PathBuf, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
new_l.sort_unstable_by(sort_by_size_first_name_second); new_l.sort_unstable_by(sort_by_size_first_name_second);
new_l new_l
} }
pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(String, u64)> { pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(PathBuf, u64)> {
if max_to_show > 0 && new_l.len() > max_to_show { if max_to_show > 0 && new_l.len() > max_to_show {
new_l[0..max_to_show].to_vec() new_l[0..max_to_show].to_vec()
} else { } else {
@@ -236,18 +240,31 @@ pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(Stri
} }
pub fn trim_deep_ones( pub fn trim_deep_ones(
input: Vec<(String, u64)>, input: Vec<(PathBuf, u64)>,
max_depth: u64, max_depth: u64,
top_level_names: &HashSet<String>, top_level_names: &HashSet<PathBuf>,
) -> Vec<(String, u64)> { ) -> Vec<(PathBuf, u64)> {
let mut result: Vec<(String, u64)> = Vec::with_capacity(input.len() * top_level_names.len()); let mut result: Vec<(PathBuf, u64)> = Vec::with_capacity(input.len() * top_level_names.len());
for name in top_level_names { for name in top_level_names {
let my_max_depth = name.matches(std::path::is_separator).count() + max_depth as usize; let my_max_depth = name
let name_ref: &str = name.as_ref(); .components()
.filter(|&c| match c {
std::path::Component::Prefix(_) => false,
_ => true,
})
.count()
+ max_depth as usize;
for &(ref k, ref v) in input.iter() { for &(ref k, ref v) in input.iter() {
if k.starts_with(name_ref) && k.matches(std::path::is_separator).count() <= my_max_depth if k.starts_with(name)
&& k.components()
.filter(|&c| match c {
std::path::Component::Prefix(_) => false,
_ => true,
})
.count()
<= my_max_depth
{ {
result.push((k.clone(), *v)); result.push((k.clone(), *v));
} }
@@ -263,34 +280,22 @@ mod tests {
#[test] #[test]
fn test_simplify_dir() { fn test_simplify_dir() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert("a".to_string()); correct.insert(PathBuf::from("a"));
assert_eq!(simplify_dir_names(vec!["a"]), correct); assert_eq!(simplify_dir_names(vec!["a"]), correct);
} }
#[test] #[test]
fn test_simplify_dir_rm_subdir() { fn test_simplify_dir_rm_subdir() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert( correct.insert(["a", "b"].iter().collect::<PathBuf>());
["a", "b"]
.iter()
.collect::<std::path::PathBuf>()
.to_string_lossy()
.to_string(),
);
assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct); assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
} }
#[test] #[test]
fn test_simplify_dir_duplicates() { fn test_simplify_dir_duplicates() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert( correct.insert(["a", "b"].iter().collect::<PathBuf>());
["a", "b"] correct.insert(PathBuf::from("c"));
.iter()
.collect::<std::path::PathBuf>()
.to_string_lossy()
.to_string(),
);
correct.insert("c".to_string());
assert_eq!( assert_eq!(
simplify_dir_names(vec![ simplify_dir_names(vec![
"a/b", "a/b",
@@ -308,36 +313,24 @@ mod tests {
#[test] #[test]
fn test_simplify_dir_rm_subdir_and_not_substrings() { fn test_simplify_dir_rm_subdir_and_not_substrings() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert("b".to_string()); correct.insert(PathBuf::from("b"));
correct.insert( correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
["c", "a", "b"] correct.insert(["a", "b"].iter().collect::<PathBuf>());
.iter()
.collect::<std::path::PathBuf>()
.to_string_lossy()
.to_string(),
);
correct.insert(
["a", "b"]
.iter()
.collect::<std::path::PathBuf>()
.to_string_lossy()
.to_string(),
);
assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct); assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
} }
#[test] #[test]
fn test_simplify_dir_dots() { fn test_simplify_dir_dots() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert("src".to_string()); correct.insert(PathBuf::from("src"));
assert_eq!(simplify_dir_names(vec!["src/."]), correct); assert_eq!(simplify_dir_names(vec!["src/."]), correct);
} }
#[test] #[test]
fn test_simplify_dir_substring_names() { fn test_simplify_dir_substring_names() {
let mut correct = HashSet::new(); let mut correct = HashSet::new();
correct.insert("src".to_string()); correct.insert(PathBuf::from("src"));
correct.insert("src_v2".to_string()); correct.insert(PathBuf::from("src_v2"));
assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct); assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
} }

View File

@@ -2,6 +2,7 @@ use jwalk::DirEntry;
#[allow(unused_imports)] #[allow(unused_imports)]
use std::fs; use std::fs;
use std::io; use std::io;
use std::path::Path;
#[cfg(target_family = "unix")] #[cfg(target_family = "unix")]
fn get_block_size() -> u64 { fn get_block_size() -> u64 {
@@ -48,14 +49,14 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64,
} }
#[cfg(target_family = "unix")] #[cfg(target_family = "unix")]
pub fn get_filesystem(file_path: &str) -> Result<u64, io::Error> { pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
use std::os::unix::fs::MetadataExt; use std::os::unix::fs::MetadataExt;
let metadata = fs::metadata(file_path)?; let metadata = fs::metadata(file_path)?;
Ok(metadata.dev()) Ok(metadata.dev())
} }
#[cfg(target_family = "windows")] #[cfg(target_family = "windows")]
pub fn get_filesystem(file_path: &str) -> Result<u64, io::Error> { pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
use winapi_util::file::information; use winapi_util::file::information;
use winapi_util::Handle; use winapi_util::Handle;