Add duplicate resolver and real progress
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
217
src/main.rs
217
src/main.rs
@@ -1,10 +1,12 @@
|
||||
use std::io::{self, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::fs::OpenOptions;
|
||||
use std::io::{self, IsTerminal, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::ExitCode;
|
||||
|
||||
use anyhow::Context;
|
||||
use anyhow::{Context, bail};
|
||||
use clap::Parser;
|
||||
use disk_checker::{ScanConfig, parse_byte_count, scan_paths, write_human_report};
|
||||
use dialoguer::{Confirm, Select, theme::ColorfulTheme};
|
||||
use disk_checker::{DuplicateGroup, ScanConfig, parse_byte_count, scan_paths, write_human_report};
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(
|
||||
@@ -31,6 +33,18 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
verify_full: bool,
|
||||
|
||||
/// Group duplicate candidates by size only. Fastest mode for huge triage; less precise.
|
||||
#[arg(long)]
|
||||
size_only: bool,
|
||||
|
||||
/// Ignore duplicate candidates smaller than this size. Accepts units like 100MiB or 1GB.
|
||||
#[arg(long, default_value = "0", value_parser = parse_min_size)]
|
||||
min_size: u64,
|
||||
|
||||
/// Maximum directory depth to scan. Depth 0 means only the provided path itself.
|
||||
#[arg(long, value_parser = parse_nonzero_or_zero_usize)]
|
||||
max_depth: Option<usize>,
|
||||
|
||||
/// Number of worker threads used for scanning and hashing. Defaults to CPU parallelism.
|
||||
#[arg(long, value_parser = parse_thread_count)]
|
||||
threads: Option<usize>,
|
||||
@@ -38,6 +52,18 @@ struct Cli {
|
||||
/// Print machine-readable JSON instead of the human summary.
|
||||
#[arg(long)]
|
||||
json: bool,
|
||||
|
||||
/// Disable progress output.
|
||||
#[arg(long)]
|
||||
no_progress: bool,
|
||||
|
||||
/// Interactively review duplicate groups and choose which path to keep.
|
||||
#[arg(long)]
|
||||
interactive: bool,
|
||||
|
||||
/// Shell script path for planned deletes when --interactive is used.
|
||||
#[arg(long, default_value = "disk-checker-delete-plan.sh")]
|
||||
delete_plan: PathBuf,
|
||||
}
|
||||
|
||||
fn parse_thread_count(input: &str) -> Result<usize, String> {
|
||||
@@ -51,8 +77,32 @@ fn parse_thread_count(input: &str) -> Result<usize, String> {
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_nonzero_or_zero_usize(input: &str) -> Result<usize, String> {
|
||||
input
|
||||
.parse::<usize>()
|
||||
.map_err(|error| format!("invalid depth {input:?}: {error}"))
|
||||
}
|
||||
|
||||
fn parse_min_size(input: &str) -> Result<u64, String> {
|
||||
if input.trim() == "0" {
|
||||
Ok(0)
|
||||
} else {
|
||||
parse_byte_count(input)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> anyhow::Result<ExitCode> {
|
||||
let cli = Cli::parse();
|
||||
if cli.interactive && !cli.verify_full {
|
||||
bail!(
|
||||
"--interactive requires --verify-full so keep/delete plans are based on fully verified duplicates"
|
||||
);
|
||||
}
|
||||
if cli.interactive && cli.json {
|
||||
bail!(
|
||||
"--interactive cannot be combined with --json because prompts would contaminate JSON output"
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(threads) = cli.threads {
|
||||
rayon::ThreadPoolBuilder::new()
|
||||
@@ -73,6 +123,10 @@ fn main() -> anyhow::Result<ExitCode> {
|
||||
follow_links: cli.follow_links,
|
||||
verify_full: cli.verify_full,
|
||||
threads: cli.threads,
|
||||
size_only: cli.size_only,
|
||||
min_size: cli.min_size,
|
||||
max_depth: cli.max_depth,
|
||||
progress: !cli.no_progress && !cli.json && io::stderr().is_terminal(),
|
||||
});
|
||||
|
||||
let stdout = io::stdout();
|
||||
@@ -83,6 +137,16 @@ fn main() -> anyhow::Result<ExitCode> {
|
||||
} else {
|
||||
write_human_report(&mut out, &report).context("failed to write report")?;
|
||||
}
|
||||
drop(out);
|
||||
|
||||
if cli.interactive {
|
||||
let groups = if cli.verify_full {
|
||||
&report.verified_duplicates
|
||||
} else {
|
||||
&report.possible_duplicates
|
||||
};
|
||||
run_interactive_resolver(groups, cli.verify_full, &cli.delete_plan)?;
|
||||
}
|
||||
|
||||
if report.summary.errors > 0 {
|
||||
Ok(ExitCode::from(2))
|
||||
@@ -90,3 +154,148 @@ fn main() -> anyhow::Result<ExitCode> {
|
||||
Ok(ExitCode::SUCCESS)
|
||||
}
|
||||
}
|
||||
|
||||
fn run_interactive_resolver(
|
||||
groups: &[DuplicateGroup],
|
||||
verified: bool,
|
||||
delete_plan: &PathBuf,
|
||||
) -> anyhow::Result<()> {
|
||||
if groups.is_empty() {
|
||||
println!("No duplicate groups to resolve.");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let theme = ColorfulTheme::default();
|
||||
let mut planned_deletes = Vec::new();
|
||||
let mut skipped = 0usize;
|
||||
|
||||
for (group_index, group) in groups.iter().enumerate() {
|
||||
println!();
|
||||
println!(
|
||||
"Duplicate group {}/{} — {} across {} files",
|
||||
group_index + 1,
|
||||
groups.len(),
|
||||
disk_checker::format_bytes(group.size),
|
||||
group.paths.len()
|
||||
);
|
||||
|
||||
let mut choices = group
|
||||
.paths
|
||||
.iter()
|
||||
.map(|path| path.display().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
choices.push("Skip this group".to_string());
|
||||
|
||||
let selection = Select::with_theme(&theme)
|
||||
.with_prompt("Choose the version to keep")
|
||||
.items(&choices)
|
||||
.default(0)
|
||||
.interact()
|
||||
.context("interactive selection failed")?;
|
||||
|
||||
if selection == group.paths.len() {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
let keep_path = &group.paths[selection];
|
||||
let delete_paths = group
|
||||
.paths
|
||||
.iter()
|
||||
.filter(|path| *path != keep_path)
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
println!("Keeping: {}", keep_path.display());
|
||||
for path in &delete_paths {
|
||||
println!(" remove: {}", path.display());
|
||||
}
|
||||
|
||||
let confirmed = Confirm::with_theme(&theme)
|
||||
.with_prompt("Add these files to the deletion plan?")
|
||||
.default(false)
|
||||
.interact()
|
||||
.context("interactive confirmation failed")?;
|
||||
|
||||
if !confirmed {
|
||||
skipped += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
planned_deletes.extend(delete_paths);
|
||||
}
|
||||
|
||||
if !planned_deletes.is_empty() {
|
||||
write_delete_plan(delete_plan, &planned_deletes, verified)?;
|
||||
println!(
|
||||
"Wrote deletion plan for {} files: {}",
|
||||
planned_deletes.len(),
|
||||
delete_plan.display()
|
||||
);
|
||||
println!("Review it, then run: sh {}", delete_plan.display());
|
||||
}
|
||||
|
||||
println!(
|
||||
"Interactive resolver complete: {} planned, {} groups skipped.",
|
||||
planned_deletes.len(),
|
||||
skipped
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn write_delete_plan(
|
||||
path: &PathBuf,
|
||||
delete_paths: &[PathBuf],
|
||||
verified: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
let mut file = OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(path)
|
||||
.with_context(|| format!("failed to create delete plan {}", path.display()))?;
|
||||
writeln!(file, "#!/bin/sh")?;
|
||||
writeln!(file, "set -eu")?;
|
||||
writeln!(
|
||||
file,
|
||||
"# Review carefully before running. Generated by disk-checker."
|
||||
)?;
|
||||
if verified {
|
||||
writeln!(
|
||||
file,
|
||||
"# Source groups were fully verified with --verify-full."
|
||||
)?;
|
||||
} else {
|
||||
writeln!(
|
||||
file,
|
||||
"# WARNING: Source groups were possible duplicates only, not fully verified."
|
||||
)?;
|
||||
}
|
||||
for delete_path in delete_paths {
|
||||
writeln!(file, "rm -- {}", shell_quote(delete_path)?)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn shell_quote(path: &Path) -> anyhow::Result<String> {
|
||||
let value = path.to_str().with_context(|| {
|
||||
format!(
|
||||
"delete plan cannot safely encode non-UTF-8 path: {}",
|
||||
path.display()
|
||||
)
|
||||
})?;
|
||||
Ok(format!("'{}'", value.replace('\'', "'\\''")))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::shell_quote;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn shell_quote_escapes_single_quotes() {
|
||||
assert_eq!(
|
||||
shell_quote(Path::new("/tmp/it's-here.txt")).expect("quote path"),
|
||||
"'/tmp/it'\\''s-here.txt'"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user