Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
293 lines
8.4 KiB
Rust
293 lines
8.4 KiB
Rust
use std::fs::OpenOptions;
|
|
use std::io::{self, IsTerminal, Write};
|
|
use std::path::{Path, PathBuf};
|
|
use std::process::ExitCode;
|
|
|
|
use anyhow::{Context, bail};
|
|
use clap::Parser;
|
|
use dialoguer::{Confirm, Select, theme::ColorfulTheme};
|
|
use disk_checker::{DuplicateGroup, ScanConfig, parse_byte_count, scan_paths, write_human_report};
|
|
|
|
#[derive(Debug, Parser)]
|
|
#[command(
|
|
author,
|
|
version,
|
|
about = "Fast folder scanner for sizes, symlinks, hard links, and possible duplicate files",
|
|
long_about = "disk-checker scans one or more folders, groups files by size, hashes only the first N bytes of same-size candidates, and reports possible duplicates plus symlinks, hard links, special files, and scan errors."
|
|
)]
|
|
struct Cli {
|
|
/// Folder or file paths to scan. Defaults to the current directory.
|
|
#[arg(value_name = "PATH")]
|
|
paths: Vec<PathBuf>,
|
|
|
|
/// Number of bytes to hash from each same-size candidate file.
|
|
/// Accepts plain bytes or units like 512KiB, 1MiB, 2MB.
|
|
#[arg(long, default_value = "1MiB", value_parser = parse_byte_count)]
|
|
hash_bytes: u64,
|
|
|
|
/// Follow symlinks while scanning. Symlinks are still reported separately.
|
|
#[arg(long)]
|
|
follow_links: bool,
|
|
|
|
/// Fully hash possible duplicate groups after the fast partial-hash pass.
|
|
#[arg(long)]
|
|
verify_full: bool,
|
|
|
|
/// Group duplicate candidates by size only. Fastest mode for huge triage; less precise.
|
|
#[arg(long)]
|
|
size_only: bool,
|
|
|
|
/// Ignore duplicate candidates smaller than this size. Accepts units like 100MiB or 1GB.
|
|
#[arg(long, default_value = "0", value_parser = parse_min_size)]
|
|
min_size: u64,
|
|
|
|
/// Maximum directory depth to scan. Depth 0 means only the provided path itself.
|
|
#[arg(long, value_parser = parse_nonzero_or_zero_usize)]
|
|
max_depth: Option<usize>,
|
|
|
|
/// Number of worker threads used for scanning and hashing. Defaults to CPU parallelism.
|
|
#[arg(long, value_parser = parse_thread_count)]
|
|
threads: Option<usize>,
|
|
|
|
/// Print machine-readable JSON instead of the human summary.
|
|
#[arg(long)]
|
|
json: bool,
|
|
|
|
/// Disable progress output.
|
|
#[arg(long)]
|
|
no_progress: bool,
|
|
|
|
/// Interactively review duplicate groups and choose which path to keep.
|
|
#[arg(long)]
|
|
interactive: bool,
|
|
|
|
/// Shell script path for planned deletes when --interactive is used.
|
|
#[arg(long, default_value = "disk-checker-delete-plan.sh")]
|
|
delete_plan: PathBuf,
|
|
}
|
|
|
|
fn parse_thread_count(input: &str) -> Result<usize, String> {
|
|
let threads = input
|
|
.parse::<usize>()
|
|
.map_err(|error| format!("invalid thread count {input:?}: {error}"))?;
|
|
if threads == 0 {
|
|
Err("thread count must be greater than zero".to_string())
|
|
} else {
|
|
Ok(threads)
|
|
}
|
|
}
|
|
|
|
fn parse_nonzero_or_zero_usize(input: &str) -> Result<usize, String> {
|
|
input
|
|
.parse::<usize>()
|
|
.map_err(|error| format!("invalid depth {input:?}: {error}"))
|
|
}
|
|
|
|
fn parse_min_size(input: &str) -> Result<u64, String> {
|
|
if input.trim() == "0" {
|
|
Ok(0)
|
|
} else {
|
|
parse_byte_count(input)
|
|
}
|
|
}
|
|
|
|
fn main() -> anyhow::Result<ExitCode> {
|
|
let cli = Cli::parse();
|
|
if cli.interactive && cli.json {
|
|
bail!(
|
|
"--interactive cannot be combined with --json because prompts would contaminate JSON output"
|
|
);
|
|
}
|
|
let verify_full = cli.verify_full || cli.interactive;
|
|
|
|
if let Some(threads) = cli.threads {
|
|
rayon::ThreadPoolBuilder::new()
|
|
.num_threads(threads)
|
|
.build_global()
|
|
.context("failed to initialize hashing thread pool")?;
|
|
}
|
|
|
|
let paths = if cli.paths.is_empty() {
|
|
vec![PathBuf::from(".")]
|
|
} else {
|
|
cli.paths
|
|
};
|
|
|
|
let report = scan_paths(ScanConfig {
|
|
paths,
|
|
hash_bytes: cli.hash_bytes,
|
|
follow_links: cli.follow_links,
|
|
verify_full,
|
|
threads: cli.threads,
|
|
size_only: cli.size_only,
|
|
min_size: cli.min_size,
|
|
max_depth: cli.max_depth,
|
|
progress: !cli.no_progress && !cli.json && io::stderr().is_terminal(),
|
|
});
|
|
|
|
let stdout = io::stdout();
|
|
let mut out = stdout.lock();
|
|
if cli.json {
|
|
serde_json::to_writer_pretty(&mut out, &report).context("failed to write JSON report")?;
|
|
writeln!(out).context("failed to finish JSON report")?;
|
|
} else {
|
|
write_human_report(&mut out, &report).context("failed to write report")?;
|
|
}
|
|
drop(out);
|
|
|
|
if cli.interactive {
|
|
run_interactive_resolver(&report.verified_duplicates, true, &cli.delete_plan)?;
|
|
}
|
|
|
|
if report.summary.errors > 0 {
|
|
Ok(ExitCode::from(2))
|
|
} else {
|
|
Ok(ExitCode::SUCCESS)
|
|
}
|
|
}
|
|
|
|
fn run_interactive_resolver(
|
|
groups: &[DuplicateGroup],
|
|
verified: bool,
|
|
delete_plan: &PathBuf,
|
|
) -> anyhow::Result<()> {
|
|
if groups.is_empty() {
|
|
println!("No duplicate groups to resolve.");
|
|
return Ok(());
|
|
}
|
|
|
|
let theme = ColorfulTheme::default();
|
|
let mut planned_deletes = Vec::new();
|
|
let mut skipped = 0usize;
|
|
|
|
for (group_index, group) in groups.iter().enumerate() {
|
|
println!();
|
|
println!(
|
|
"Duplicate group {}/{} — {} across {} files",
|
|
group_index + 1,
|
|
groups.len(),
|
|
disk_checker::format_bytes(group.size),
|
|
group.paths.len()
|
|
);
|
|
|
|
let mut choices = group
|
|
.paths
|
|
.iter()
|
|
.map(|path| path.display().to_string())
|
|
.collect::<Vec<_>>();
|
|
choices.push("Skip this group".to_string());
|
|
|
|
let selection = Select::with_theme(&theme)
|
|
.with_prompt("Choose the version to keep")
|
|
.items(&choices)
|
|
.default(0)
|
|
.interact()
|
|
.context("interactive selection failed")?;
|
|
|
|
if selection == group.paths.len() {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
|
|
let keep_path = &group.paths[selection];
|
|
let delete_paths = group
|
|
.paths
|
|
.iter()
|
|
.filter(|path| *path != keep_path)
|
|
.cloned()
|
|
.collect::<Vec<_>>();
|
|
|
|
println!("Keeping: {}", keep_path.display());
|
|
for path in &delete_paths {
|
|
println!(" remove: {}", path.display());
|
|
}
|
|
|
|
let confirmed = Confirm::with_theme(&theme)
|
|
.with_prompt("Add these files to the deletion plan?")
|
|
.default(false)
|
|
.interact()
|
|
.context("interactive confirmation failed")?;
|
|
|
|
if !confirmed {
|
|
skipped += 1;
|
|
continue;
|
|
}
|
|
|
|
planned_deletes.extend(delete_paths);
|
|
}
|
|
|
|
if !planned_deletes.is_empty() {
|
|
write_delete_plan(delete_plan, &planned_deletes, verified)?;
|
|
println!(
|
|
"Wrote deletion plan for {} files: {}",
|
|
planned_deletes.len(),
|
|
delete_plan.display()
|
|
);
|
|
println!("Review it, then run: sh {}", delete_plan.display());
|
|
}
|
|
|
|
println!(
|
|
"Interactive resolver complete: {} planned, {} groups skipped.",
|
|
planned_deletes.len(),
|
|
skipped
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
fn write_delete_plan(
|
|
path: &PathBuf,
|
|
delete_paths: &[PathBuf],
|
|
verified: bool,
|
|
) -> anyhow::Result<()> {
|
|
let mut file = OpenOptions::new()
|
|
.write(true)
|
|
.create_new(true)
|
|
.open(path)
|
|
.with_context(|| format!("failed to create delete plan {}", path.display()))?;
|
|
writeln!(file, "#!/bin/sh")?;
|
|
writeln!(file, "set -eu")?;
|
|
writeln!(
|
|
file,
|
|
"# Review carefully before running. Generated by disk-checker."
|
|
)?;
|
|
if verified {
|
|
writeln!(
|
|
file,
|
|
"# Source groups were fully verified with --verify-full."
|
|
)?;
|
|
} else {
|
|
writeln!(
|
|
file,
|
|
"# WARNING: Source groups were possible duplicates only, not fully verified."
|
|
)?;
|
|
}
|
|
for delete_path in delete_paths {
|
|
writeln!(file, "rm -- {}", shell_quote(delete_path)?)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn shell_quote(path: &Path) -> anyhow::Result<String> {
|
|
let value = path.to_str().with_context(|| {
|
|
format!(
|
|
"delete plan cannot safely encode non-UTF-8 path: {}",
|
|
path.display()
|
|
)
|
|
})?;
|
|
Ok(format!("'{}'", value.replace('\'', "'\\''")))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::shell_quote;
|
|
use std::path::Path;
|
|
|
|
#[test]
|
|
fn shell_quote_escapes_single_quotes() {
|
|
assert_eq!(
|
|
shell_quote(Path::new("/tmp/it's-here.txt")).expect("quote path"),
|
|
"'/tmp/it'\\''s-here.txt'"
|
|
);
|
|
}
|
|
}
|