Merge pull request #64 from rivy/rf.path

Refactor ~ use Path/PathBuf instead of &str/String
2025-12-12 15:49:58 -08:00 · 2020-02-09 13:52:11 +00:00
parent edf300893c a3d8fc00e1
commit 871b7e90d8
4 changed files with 104 additions and 100 deletions
--- a/src/display.rs
+++ b/src/display.rs
@@ -4,6 +4,8 @@ use self::ansi_term::Colour::Fixed;
 use self::ansi_term::Style;
 use crate::utils::Node;
 use std::path::Path;
 static UNITS: [char; 4] = ['T', 'G', 'M', 'K'];
 pub struct DisplayData {
@@ -103,7 +105,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ
    let size = node.size;
    if !display_data.is_reversed {
-        print_this_node(&*name, size, is_biggest, display_data, indent);
+        print_this_node(&name, size, is_biggest, display_data, indent);
    }
    for c in display_data.get_children_from_node(node) {
@@ -115,7 +117,7 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ
    }
    if display_data.is_reversed {
-        print_this_node(&*name, size, is_biggest, display_data, indent);
+        print_this_node(&name, size, is_biggest, display_data, indent);
    }
 }
@@ -136,8 +138,8 @@ fn clean_indentation_string(s: &str) -> String {
    is
 }
-fn print_this_node(
+fn print_this_node<P: AsRef<Path>>(
-    name: &str,
+    name: P,
    size: u64,
    is_biggest: bool,
    display_data: &DisplayData,
@@ -150,19 +152,23 @@ fn print_this_node(
    )
 }
-pub fn format_string(
+pub fn format_string<P: AsRef<Path>>(
-    dir_name: &str,
+    dir_name: P,
    is_biggest: bool,
    display_data: &DisplayData,
    size: &str,
    indentation: &str,
 ) -> String {
    let dir_name = dir_name.as_ref();
    let printable_name = {
        if display_data.short_paths {
-            dir_name
+            match dir_name.parent() {
-                .split(std::path::is_separator)
+                Some(prefix) => match dir_name.strip_prefix(prefix) {
-                .last()
+                    Ok(base) => base,
-                .unwrap_or(dir_name)
+                    Err(_) => dir_name,
                },
                None => dir_name,
            }
        } else {
            dir_name
        }
@@ -175,7 +181,7 @@ pub fn format_string(
            Style::new().paint(size)
        },
        indentation,
-        printable_name,
+        printable_name.display(),
    )
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ extern crate clap;
 use self::display::draw_it;
 use crate::utils::is_a_parent_of;
 use clap::{App, AppSettings, Arg};
 use std::path::PathBuf;
 use utils::{find_big_ones, get_dir_tree, simplify_dir_names, sort, trim_deep_ones, Node};
 mod display;
@@ -137,12 +138,15 @@ fn main() {
    let use_apparent_size = options.is_present("display_apparent_size");
    let limit_filesystem = options.is_present("limit_filesystem");
-    let ignore_directories = options.values_of("ignore_directory").map(|r| r.collect());
+    let ignore_directories = match options.values_of("ignore_directory") {
        Some(i) => Some(i.map(PathBuf::from).collect()),
        None => None,
    };
    let simplified_dirs = simplify_dir_names(target_dirs);
    let (permissions, nodes) = get_dir_tree(
        &simplified_dirs,
-        ignore_directories,
+        &ignore_directories,
        use_apparent_size,
        limit_filesystem,
        threads,
@@ -165,7 +169,7 @@ fn main() {
    );
 }
-fn build_tree(biggest_ones: Vec<(String, u64)>, depth: Option<u64>) -> Node {
+fn build_tree(biggest_ones: Vec<(PathBuf, u64)>, depth: Option<u64>) -> Node {
    let mut top_parent = Node::default();
    // assume sorted order
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -2,6 +2,7 @@ use jwalk::DirEntry;
 use std::cmp::Ordering;
 use std::collections::HashMap;
 use std::collections::HashSet;
 use std::path::{Path, PathBuf};
 use jwalk::WalkDir;
@@ -10,7 +11,7 @@ use self::platform::*;
 #[derive(Debug, Default, Eq)]
 pub struct Node {
-    pub name: String,
+    pub name: PathBuf,
    pub size: u64,
    pub children: Vec<Node>,
 }
@@ -37,15 +38,15 @@ impl PartialEq for Node {
    }
 }
-pub fn is_a_parent_of(parent: &str, child: &str) -> bool {
+pub fn is_a_parent_of<P: AsRef<Path>>(parent: P, child: P) -> bool {
-    let path_parent = std::path::Path::new(parent);
+    let parent = parent.as_ref();
-    let path_child = std::path::Path::new(child);
+    let child = child.as_ref();
-    (path_child.starts_with(path_parent) && !path_parent.starts_with(path_child))
+    (child.starts_with(parent) && !parent.starts_with(child))
 }
-pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
+pub fn simplify_dir_names<P: AsRef<Path>>(filenames: Vec<P>) -> HashSet<PathBuf> {
-    let mut top_level_names: HashSet<String> = HashSet::with_capacity(filenames.len());
+    let mut top_level_names: HashSet<PathBuf> = HashSet::with_capacity(filenames.len());
-    let mut to_remove: Vec<String> = Vec::with_capacity(filenames.len());
+    let mut to_remove: Vec<PathBuf> = Vec::with_capacity(filenames.len());
    for t in filenames {
        let top_level_name = normalize_path(t);
@@ -53,7 +54,7 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
        for tt in top_level_names.iter() {
            if is_a_parent_of(&top_level_name, tt) {
-                to_remove.push(tt.to_string());
+                to_remove.push(tt.to_path_buf());
            } else if is_a_parent_of(tt, &top_level_name) {
                can_add = false;
            }
@@ -62,22 +63,22 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
        top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
        to_remove.clear();
        if can_add {
-            top_level_names.insert(normalize_path(t).to_owned());
+            top_level_names.insert(top_level_name);
        }
    }
    top_level_names
 }
-pub fn get_dir_tree(
+pub fn get_dir_tree<P: AsRef<Path>>(
-    top_level_names: &HashSet<String>,
+    top_level_names: &HashSet<P>,
-    ignore_directories: Option<Vec<&str>>,
+    ignore_directories: &Option<Vec<PathBuf>>,
    apparent_size: bool,
    limit_filesystem: bool,
    threads: Option<usize>,
-) -> (bool, HashMap<String, u64>) {
+) -> (bool, HashMap<PathBuf, u64>) {
    let mut permissions = 0;
-    let mut data: HashMap<String, u64> = HashMap::new();
+    let mut data: HashMap<PathBuf, u64> = HashMap::new();
    let restricted_filesystems = if limit_filesystem {
        get_allowed_filesystems(top_level_names)
    } else {
@@ -86,10 +87,10 @@ pub fn get_dir_tree(
    for b in top_level_names.iter() {
        examine_dir(
-            &b,
+            b,
            apparent_size,
            &restricted_filesystems,
-            &ignore_directories,
+            ignore_directories,
            &mut data,
            &mut permissions,
            threads,
@@ -98,7 +99,7 @@ pub fn get_dir_tree(
    (permissions == 0, data)
 }
-fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<u64>> {
+fn get_allowed_filesystems<P: AsRef<Path>>(top_level_names: &HashSet<P>) -> Option<HashSet<u64>> {
    let mut limit_filesystems: HashSet<u64> = HashSet::new();
    for file_name in top_level_names.iter() {
        if let Ok(a) = get_filesystem(file_name) {
@@ -108,29 +109,26 @@ fn get_allowed_filesystems(top_level_names: &HashSet<String>) -> Option<HashSet<
    Some(limit_filesystems)
 }
-pub fn normalize_path<P: AsRef<std::path::Path>>(path: P) -> std::string::String {
+pub fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
    // normalize path ...
    // 1. removing repeated separators
    // 2. removing interior '.' ("current directory") path segments
    // 3. removing trailing extra separators and '.' ("current directory") path segments
    // * `Path.components()` does all the above work; ref: <https://doc.rust-lang.org/std/path/struct.Path.html#method.components>
    // 4. changing to os preferred separator (automatically done by recollecting components back into a PathBuf)
-    path.as_ref()
+    path.as_ref().components().collect::<PathBuf>()
        .components()
        .collect::<std::path::PathBuf>()
        .to_string_lossy()
        .to_string()
 }
-fn examine_dir(
+fn examine_dir<P: AsRef<Path>>(
-    top_dir: &str,
+    top_dir: P,
    apparent_size: bool,
    filesystems: &Option<HashSet<u64>>,
-    ignore_directories: &Option<Vec<&str>>,
+    ignore_directories: &Option<Vec<PathBuf>>,
-    data: &mut HashMap<String, u64>,
+    data: &mut HashMap<PathBuf, u64>,
    file_count_no_permission: &mut u64,
    threads: Option<usize>,
 ) {
    let top_dir = top_dir.as_ref();
    let mut inodes: HashSet<(u64, u64)> = HashSet::new();
    let mut iter = WalkDir::new(top_dir)
        .preload_metadata(true)
@@ -141,9 +139,15 @@ fn examine_dir(
    'entry: for entry in iter {
        if let Ok(e) = entry {
            let maybe_size_and_inode = get_metadata(&e, apparent_size);
-            if let Some(d) = ignore_directories {
+            if let Some(dirs) = ignore_directories {
-                for s in d {
+                let path = e.path();
-                    if e.path().to_string_lossy().contains(*s) {
+                let parts = path.components().collect::<Vec<std::path::Component>>();
                for d in dirs {
                    let seq = d.components().collect::<Vec<std::path::Component>>();
                    if parts
                        .windows(seq.len())
                        .any(|window| window.iter().collect::<PathBuf>() == *d)
                    {
                        continue 'entry;
                    }
                }
@@ -190,29 +194,29 @@ fn should_ignore_file(
    false
 }
-fn process_file_with_size_and_inode(
+fn process_file_with_size_and_inode<P: AsRef<Path>>(
-    top_dir: &str,
+    top_dir: P,
-    data: &mut HashMap<String, u64>,
+    data: &mut HashMap<PathBuf, u64>,
    e: DirEntry,
    size: u64,
 ) {
    let top_dir = top_dir.as_ref();
    // This path and all its parent paths have their counter incremented
-    for path_name in e.path().ancestors() {
+    for path in e.path().ancestors() {
        // This is required due to bug in Jwalk that adds '/' to all sub dir lists
        // see: https://github.com/jessegrosjean/jwalk/issues/13
-        if path_name.to_string_lossy() == "/" && top_dir != "/" {
+        if path.to_string_lossy() == "/" && top_dir.to_string_lossy() != "/" {
            continue;
        }
-        let path_name = path_name.to_string_lossy();
+        let s = data.entry(normalize_path(path)).or_insert(0);
        let s = data.entry(path_name.to_string()).or_insert(0);
        *s += size;
-        if path_name == top_dir {
+        if path.starts_with(top_dir) && top_dir.starts_with(path) {
            break;
        }
    }
 }
-pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> Ordering {
+pub fn sort_by_size_first_name_second(a: &(PathBuf, u64), b: &(PathBuf, u64)) -> Ordering {
    let result = b.1.cmp(&a.1);
    if result == Ordering::Equal {
        a.0.cmp(&b.0)
@@ -221,13 +225,13 @@ pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> O
    }
 }
-pub fn sort(data: HashMap<String, u64>) -> Vec<(String, u64)> {
+pub fn sort(data: HashMap<PathBuf, u64>) -> Vec<(PathBuf, u64)> {
-    let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
+    let mut new_l: Vec<(PathBuf, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
    new_l.sort_unstable_by(sort_by_size_first_name_second);
    new_l
 }
-pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(String, u64)> {
+pub fn find_big_ones(new_l: Vec<(PathBuf, u64)>, max_to_show: usize) -> Vec<(PathBuf, u64)> {
    if max_to_show > 0 && new_l.len() > max_to_show {
        new_l[0..max_to_show].to_vec()
    } else {
@@ -236,18 +240,31 @@ pub fn find_big_ones(new_l: Vec<(String, u64)>, max_to_show: usize) -> Vec<(Stri
 }
 pub fn trim_deep_ones(
-    input: Vec<(String, u64)>,
+    input: Vec<(PathBuf, u64)>,
    max_depth: u64,
-    top_level_names: &HashSet<String>,
+    top_level_names: &HashSet<PathBuf>,
-) -> Vec<(String, u64)> {
+) -> Vec<(PathBuf, u64)> {
-    let mut result: Vec<(String, u64)> = Vec::with_capacity(input.len() * top_level_names.len());
+    let mut result: Vec<(PathBuf, u64)> = Vec::with_capacity(input.len() * top_level_names.len());
    for name in top_level_names {
-        let my_max_depth = name.matches(std::path::is_separator).count() + max_depth as usize;
+        let my_max_depth = name
-        let name_ref: &str = name.as_ref();
+            .components()
            .filter(|&c| match c {
                std::path::Component::Prefix(_) => false,
                _ => true,
            })
            .count()
            + max_depth as usize;
        for &(ref k, ref v) in input.iter() {
-            if k.starts_with(name_ref) && k.matches(std::path::is_separator).count() <= my_max_depth
+            if k.starts_with(name)
                && k.components()
                    .filter(|&c| match c {
                        std::path::Component::Prefix(_) => false,
                        _ => true,
                    })
                    .count()
                    <= my_max_depth
            {
                result.push((k.clone(), *v));
            }
@@ -263,34 +280,22 @@ mod tests {
    #[test]
    fn test_simplify_dir() {
        let mut correct = HashSet::new();
-        correct.insert("a".to_string());
+        correct.insert(PathBuf::from("a"));
        assert_eq!(simplify_dir_names(vec!["a"]), correct);
    }
    #[test]
    fn test_simplify_dir_rm_subdir() {
        let mut correct = HashSet::new();
-        correct.insert(
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
            ["a", "b"]
                .iter()
                .collect::<std::path::PathBuf>()
                .to_string_lossy()
                .to_string(),
        );
        assert_eq!(simplify_dir_names(vec!["a/b", "a/b/c", "a/b/d/f"]), correct);
    }
    #[test]
    fn test_simplify_dir_duplicates() {
        let mut correct = HashSet::new();
-        correct.insert(
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
-            ["a", "b"]
+        correct.insert(PathBuf::from("c"));
                .iter()
                .collect::<std::path::PathBuf>()
                .to_string_lossy()
                .to_string(),
        );
        correct.insert("c".to_string());
        assert_eq!(
            simplify_dir_names(vec![
                "a/b",
@@ -308,36 +313,24 @@ mod tests {
    #[test]
    fn test_simplify_dir_rm_subdir_and_not_substrings() {
        let mut correct = HashSet::new();
-        correct.insert("b".to_string());
+        correct.insert(PathBuf::from("b"));
-        correct.insert(
+        correct.insert(["c", "a", "b"].iter().collect::<PathBuf>());
-            ["c", "a", "b"]
+        correct.insert(["a", "b"].iter().collect::<PathBuf>());
                .iter()
                .collect::<std::path::PathBuf>()
                .to_string_lossy()
                .to_string(),
        );
        correct.insert(
            ["a", "b"]
                .iter()
                .collect::<std::path::PathBuf>()
                .to_string_lossy()
                .to_string(),
        );
        assert_eq!(simplify_dir_names(vec!["a/b", "c/a/b/", "b"]), correct);
    }
    #[test]
    fn test_simplify_dir_dots() {
        let mut correct = HashSet::new();
-        correct.insert("src".to_string());
+        correct.insert(PathBuf::from("src"));
        assert_eq!(simplify_dir_names(vec!["src/."]), correct);
    }
    #[test]
    fn test_simplify_dir_substring_names() {
        let mut correct = HashSet::new();
-        correct.insert("src".to_string());
+        correct.insert(PathBuf::from("src"));
-        correct.insert("src_v2".to_string());
+        correct.insert(PathBuf::from("src_v2"));
        assert_eq!(simplify_dir_names(vec!["src/", "src_v2"]), correct);
    }
--- a/src/utils/platform.rs
+++ b/src/utils/platform.rs
@@ -2,6 +2,7 @@ use jwalk::DirEntry;
 #[allow(unused_imports)]
 use std::fs;
 use std::io;
 use std::path::Path;
 #[cfg(target_family = "unix")]
 fn get_block_size() -> u64 {
@@ -48,14 +49,14 @@ pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64,
 }
 #[cfg(target_family = "unix")]
-pub fn get_filesystem(file_path: &str) -> Result<u64, io::Error> {
+pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
    use std::os::unix::fs::MetadataExt;
    let metadata = fs::metadata(file_path)?;
    Ok(metadata.dev())
 }
 #[cfg(target_family = "windows")]
-pub fn get_filesystem(file_path: &str) -> Result<u64, io::Error> {
+pub fn get_filesystem<P: AsRef<Path>>(file_path: P) -> Result<u64, io::Error> {
    use winapi_util::file::information;
    use winapi_util::Handle;