use std::fs::OpenOptions; use std::io::{self, IsTerminal, Write}; use std::path::{Path, PathBuf}; use std::process::ExitCode; use anyhow::{Context, bail}; use clap::Parser; use dialoguer::{Confirm, Select, theme::ColorfulTheme}; use disk_checker::{DuplicateGroup, ScanConfig, parse_byte_count, scan_paths, write_human_report}; #[derive(Debug, Parser)] #[command( author, version, about = "Fast folder scanner for sizes, symlinks, hard links, and possible duplicate files", long_about = "disk-checker scans one or more folders, groups files by size, hashes only the first N bytes of same-size candidates, and reports possible duplicates plus symlinks, hard links, special files, and scan errors." )] struct Cli { /// Folder or file paths to scan. Defaults to the current directory. #[arg(value_name = "PATH")] paths: Vec, /// Number of bytes to hash from each same-size candidate file. /// Accepts plain bytes or units like 512KiB, 1MiB, 2MB. #[arg(long, default_value = "1MiB", value_parser = parse_byte_count)] hash_bytes: u64, /// Follow symlinks while scanning. Symlinks are still reported separately. #[arg(long)] follow_links: bool, /// Fully hash possible duplicate groups after the fast partial-hash pass. #[arg(long)] verify_full: bool, /// Group duplicate candidates by size only. Fastest mode for huge triage; less precise. #[arg(long)] size_only: bool, /// Ignore duplicate candidates smaller than this size. Accepts units like 100MiB or 1GB. #[arg(long, default_value = "0", value_parser = parse_min_size)] min_size: u64, /// Maximum directory depth to scan. Depth 0 means only the provided path itself. #[arg(long, value_parser = parse_nonzero_or_zero_usize)] max_depth: Option, /// Number of worker threads used for scanning and hashing. Defaults to CPU parallelism. #[arg(long, value_parser = parse_thread_count)] threads: Option, /// Print machine-readable JSON instead of the human summary. #[arg(long)] json: bool, /// Disable progress output. #[arg(long)] no_progress: bool, /// Interactively review duplicate groups and choose which path to keep. #[arg(long)] interactive: bool, /// Shell script path for planned deletes when --interactive is used. #[arg(long, default_value = "disk-checker-delete-plan.sh")] delete_plan: PathBuf, } fn parse_thread_count(input: &str) -> Result { let threads = input .parse::() .map_err(|error| format!("invalid thread count {input:?}: {error}"))?; if threads == 0 { Err("thread count must be greater than zero".to_string()) } else { Ok(threads) } } fn parse_nonzero_or_zero_usize(input: &str) -> Result { input .parse::() .map_err(|error| format!("invalid depth {input:?}: {error}")) } fn parse_min_size(input: &str) -> Result { if input.trim() == "0" { Ok(0) } else { parse_byte_count(input) } } fn main() -> anyhow::Result { let cli = Cli::parse(); if cli.interactive && cli.json { bail!( "--interactive cannot be combined with --json because prompts would contaminate JSON output" ); } let verify_full = cli.verify_full || cli.interactive; if let Some(threads) = cli.threads { rayon::ThreadPoolBuilder::new() .num_threads(threads) .build_global() .context("failed to initialize hashing thread pool")?; } let paths = if cli.paths.is_empty() { vec![PathBuf::from(".")] } else { cli.paths }; let report = scan_paths(ScanConfig { paths, hash_bytes: cli.hash_bytes, follow_links: cli.follow_links, verify_full, threads: cli.threads, size_only: cli.size_only, min_size: cli.min_size, max_depth: cli.max_depth, progress: !cli.no_progress && !cli.json && io::stderr().is_terminal(), }); let stdout = io::stdout(); let mut out = stdout.lock(); if cli.json { serde_json::to_writer_pretty(&mut out, &report).context("failed to write JSON report")?; writeln!(out).context("failed to finish JSON report")?; } else { write_human_report(&mut out, &report).context("failed to write report")?; } drop(out); if cli.interactive { run_interactive_resolver(&report.verified_duplicates, true, &cli.delete_plan)?; } if report.summary.errors > 0 { Ok(ExitCode::from(2)) } else { Ok(ExitCode::SUCCESS) } } fn run_interactive_resolver( groups: &[DuplicateGroup], verified: bool, delete_plan: &PathBuf, ) -> anyhow::Result<()> { if groups.is_empty() { println!("No duplicate groups to resolve."); return Ok(()); } let theme = ColorfulTheme::default(); let mut planned_deletes = Vec::new(); let mut skipped = 0usize; for (group_index, group) in groups.iter().enumerate() { println!(); println!( "Duplicate group {}/{} — {} across {} files", group_index + 1, groups.len(), disk_checker::format_bytes(group.size), group.paths.len() ); let mut choices = group .paths .iter() .map(|path| path.display().to_string()) .collect::>(); choices.push("Skip this group".to_string()); let selection = Select::with_theme(&theme) .with_prompt("Choose the version to keep") .items(&choices) .default(0) .interact() .context("interactive selection failed")?; if selection == group.paths.len() { skipped += 1; continue; } let keep_path = &group.paths[selection]; let delete_paths = group .paths .iter() .filter(|path| *path != keep_path) .cloned() .collect::>(); println!("Keeping: {}", keep_path.display()); for path in &delete_paths { println!(" remove: {}", path.display()); } let confirmed = Confirm::with_theme(&theme) .with_prompt("Add these files to the deletion plan?") .default(false) .interact() .context("interactive confirmation failed")?; if !confirmed { skipped += 1; continue; } planned_deletes.extend(delete_paths); } if !planned_deletes.is_empty() { write_delete_plan(delete_plan, &planned_deletes, verified)?; println!( "Wrote deletion plan for {} files: {}", planned_deletes.len(), delete_plan.display() ); println!("Review it, then run: sh {}", delete_plan.display()); } println!( "Interactive resolver complete: {} planned, {} groups skipped.", planned_deletes.len(), skipped ); Ok(()) } fn write_delete_plan( path: &PathBuf, delete_paths: &[PathBuf], verified: bool, ) -> anyhow::Result<()> { let mut file = OpenOptions::new() .write(true) .create_new(true) .open(path) .with_context(|| format!("failed to create delete plan {}", path.display()))?; writeln!(file, "#!/bin/sh")?; writeln!(file, "set -eu")?; writeln!( file, "# Review carefully before running. Generated by disk-checker." )?; if verified { writeln!( file, "# Source groups were fully verified with --verify-full." )?; } else { writeln!( file, "# WARNING: Source groups were possible duplicates only, not fully verified." )?; } for delete_path in delete_paths { writeln!(file, "rm -- {}", shell_quote(delete_path)?)?; } Ok(()) } fn shell_quote(path: &Path) -> anyhow::Result { let value = path.to_str().with_context(|| { format!( "delete plan cannot safely encode non-UTF-8 path: {}", path.display() ) })?; Ok(format!("'{}'", value.replace('\'', "'\\''"))) } #[cfg(test)] mod tests { use super::shell_quote; use std::path::Path; #[test] fn shell_quote_escapes_single_quotes() { assert_eq!( shell_quote(Path::new("/tmp/it's-here.txt")).expect("quote path"), "'/tmp/it'\\''s-here.txt'" ); } }