Add duplicate resolver and real progress

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
2026-06-04 15:30:22 +01:00
parent 72906ed4f3
commit 4dafcac9dc
2 changed files with 579 additions and 63 deletions

View File

@@ -6,6 +6,7 @@ use std::sync::mpsc;
use std::thread;
use ignore::{WalkBuilder, WalkState};
use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use serde::{Serialize, Serializer};
@@ -22,6 +23,10 @@ pub struct ScanConfig {
pub follow_links: bool,
pub verify_full: bool,
pub threads: Option<usize>,
pub size_only: bool,
pub min_size: u64,
pub max_depth: Option<usize>,
pub progress: bool,
}
#[derive(Debug, Clone, Serialize)]
@@ -30,6 +35,9 @@ pub struct ScanReport {
pub scanned_paths: Vec<PathBuf>,
pub hash_bytes: u64,
pub worker_threads: usize,
pub size_only: bool,
pub min_size: u64,
pub max_depth: Option<usize>,
pub followed_symlinks: bool,
pub full_verification: bool,
pub summary: ScanSummary,
@@ -139,6 +147,16 @@ enum HashOutcome {
Issue(ScanIssue),
}
#[derive(Debug, Default)]
struct ScanAccumulator {
files: Vec<FileEntry>,
symlinks: Vec<SymlinkInfo>,
special_entries: Vec<SpecialEntry>,
errors: Vec<ScanIssue>,
directories: usize,
total_file_bytes: u64,
}
#[derive(Debug, Clone)]
enum ScannedEntry {
File(FileEntry),
@@ -198,12 +216,13 @@ pub fn parse_byte_count(input: &str) -> Result<u64, String> {
pub fn scan_paths(config: ScanConfig) -> ScanReport {
let hash_bytes = config.hash_bytes.max(1);
let worker_threads = worker_threads(config.threads);
let min_size = config.min_size;
let mut files = Vec::new();
let mut symlinks = Vec::new();
let mut special_entries = Vec::new();
let mut errors = Vec::new();
let mut directories = 0;
let mut total_file_bytes = 0;
let mut directories: usize = 0;
let mut total_file_bytes: u64 = 0;
for root in &config.paths {
let mut builder = WalkBuilder::new(root);
@@ -216,33 +235,21 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
.git_global(false)
.git_exclude(false)
.parents(false);
let (sender, receiver) = mpsc::channel();
builder.build_parallel().run(|| {
let sender = sender.clone();
let follow_links = config.follow_links;
Box::new(move |entry| {
for scanned_entry in classify_walk_entry(entry, follow_links) {
if sender.send(scanned_entry).is_err() {
return WalkState::Quit;
if let Some(max_depth) = config.max_depth {
builder.max_depth(Some(max_depth));
}
}
WalkState::Continue
})
});
drop(sender);
for scanned_entry in receiver {
collect_scanned_entry(
scanned_entry,
&mut files,
&mut symlinks,
&mut special_entries,
&mut errors,
&mut directories,
&mut total_file_bytes,
let accumulator = walk_root_parallel(
&mut builder,
config.follow_links,
walk_progress(config.progress, root),
);
}
files.extend(accumulator.files);
symlinks.extend(accumulator.symlinks);
special_entries.extend(accumulator.special_entries);
errors.extend(accumulator.errors);
directories += accumulator.directories;
total_file_bytes = total_file_bytes.saturating_add(accumulator.total_file_bytes);
}
files.sort_by(|left, right| left.path.cmp(&right.path));
@@ -250,17 +257,43 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
special_entries.sort_by(|left, right| left.path.cmp(&right.path));
let hard_links = find_hard_links(&files);
let same_size_candidates = same_size_candidates(&files);
let same_size_candidates = same_size_candidates(&files, min_size);
let same_size_candidate_files = same_size_candidates.len();
let partial_outcomes = hash_files(&same_size_candidates, hash_bytes, false);
let possible_duplicates = if config.size_only {
size_only_duplicate_groups(same_size_candidates.clone())
} else {
let partial_outcomes = hash_files(
&same_size_candidates,
hash_bytes,
false,
hash_progress(
config.progress,
&same_size_candidates,
hash_bytes,
false,
"Hashing file prefixes",
),
);
let mut partial_hashes = Vec::new();
collect_hash_outcomes(partial_outcomes, &mut partial_hashes, &mut errors);
let possible_duplicates = duplicate_groups(partial_hashes);
duplicate_groups(partial_hashes)
};
let verified_duplicates = if config.verify_full {
let full_candidates = files_from_duplicate_groups(&possible_duplicates);
let full_outcomes = hash_files(&full_candidates, hash_bytes, true);
let full_outcomes = hash_files(
&full_candidates,
hash_bytes,
true,
hash_progress(
config.progress,
&full_candidates,
hash_bytes,
true,
"Full verification hashing",
),
);
let mut full_hashes = Vec::new();
collect_hash_outcomes(full_outcomes, &mut full_hashes, &mut errors);
duplicate_groups(full_hashes)
@@ -291,6 +324,9 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
scanned_paths: config.paths,
hash_bytes,
worker_threads,
size_only: config.size_only,
min_size,
max_depth: config.max_depth,
followed_symlinks: config.follow_links,
full_verification: config.verify_full,
summary: ScanSummary {
@@ -325,6 +361,123 @@ fn worker_threads(configured_threads: Option<usize>) -> usize {
})
}
fn walk_root_parallel(
builder: &mut WalkBuilder,
follow_links: bool,
progress: Option<ProgressBar>,
) -> ScanAccumulator {
let (sender, receiver) = mpsc::channel();
let collector = thread::spawn(move || {
let mut accumulator = ScanAccumulator::default();
for scanned_entry in receiver {
collect_scanned_entry(scanned_entry, &mut accumulator);
update_walk_progress(progress.as_ref(), &accumulator, false);
}
update_walk_progress(progress.as_ref(), &accumulator, true);
accumulator
});
builder.build_parallel().run(|| {
let sender = sender.clone();
Box::new(move |entry| {
for scanned_entry in classify_walk_entry(entry, follow_links) {
if sender.send(scanned_entry).is_err() {
return WalkState::Quit;
}
}
WalkState::Continue
})
});
drop(sender);
collector
.join()
.expect("scan result collector thread should not panic")
}
fn walk_progress(enabled: bool, root: &Path) -> Option<ProgressBar> {
if !enabled {
return None;
}
let progress = ProgressBar::new_spinner();
progress.set_style(
ProgressStyle::with_template("{spinner:.green} {msg}")
.expect("valid traversal progress template"),
);
progress.set_message(format!(
"Scanning {} — 0 files, 0 dirs, 0 symlinks, 0 errors",
root.display()
));
Some(progress)
}
fn update_walk_progress(progress: Option<&ProgressBar>, accumulator: &ScanAccumulator, done: bool) {
let Some(progress) = progress else {
return;
};
let interactions = accumulator.interactions();
if done {
progress.finish_with_message(format!(
"Scanned {} files, {} dirs, {} symlinks, {} special entries, {} errors",
accumulator.files.len(),
accumulator.directories,
accumulator.symlinks.len(),
accumulator.special_entries.len(),
accumulator.errors.len()
));
} else if interactions == 1 || interactions.is_multiple_of(100) {
progress.tick();
progress.set_message(format!(
"Scanning — {} files, {} dirs, {} symlinks, {} special entries, {} errors",
accumulator.files.len(),
accumulator.directories,
accumulator.symlinks.len(),
accumulator.special_entries.len(),
accumulator.errors.len()
));
}
}
fn hash_progress(
enabled: bool,
files: &[FileEntry],
hash_bytes: u64,
full_file: bool,
message: &'static str,
) -> Option<ProgressBar> {
if !enabled || files.is_empty() {
return None;
}
let total_bytes = files
.iter()
.map(|file| {
if full_file {
file.size
} else {
file.size.min(hash_bytes)
}
})
.sum::<u64>();
if total_bytes == 0 {
return None;
}
let progress = ProgressBar::new(total_bytes);
progress.set_style(
ProgressStyle::with_template(
"{msg} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {binary_bytes}/{binary_total_bytes} {binary_bytes_per_sec}",
)
.expect("valid hashing progress template")
.progress_chars("=>-"),
);
progress.set_message(message);
Some(progress)
}
fn classify_walk_entry(
entry: Result<ignore::DirEntry, ignore::Error>,
follow_links: bool,
@@ -386,24 +539,16 @@ fn non_symlink_entry(path: PathBuf, metadata: &Metadata) -> ScannedEntry {
}
}
fn collect_scanned_entry(
entry: ScannedEntry,
files: &mut Vec<FileEntry>,
symlinks: &mut Vec<SymlinkInfo>,
special_entries: &mut Vec<SpecialEntry>,
errors: &mut Vec<ScanIssue>,
directories: &mut usize,
total_file_bytes: &mut u64,
) {
fn collect_scanned_entry(entry: ScannedEntry, accumulator: &mut ScanAccumulator) {
match entry {
ScannedEntry::File(file) => {
*total_file_bytes = total_file_bytes.saturating_add(file.size);
files.push(file);
accumulator.total_file_bytes = accumulator.total_file_bytes.saturating_add(file.size);
accumulator.files.push(file);
}
ScannedEntry::Directory => *directories += 1,
ScannedEntry::Symlink(symlink) => symlinks.push(symlink),
ScannedEntry::Special(special_entry) => special_entries.push(special_entry),
ScannedEntry::Issue(error) => errors.push(error),
ScannedEntry::Directory => accumulator.directories += 1,
ScannedEntry::Symlink(symlink) => accumulator.symlinks.push(symlink),
ScannedEntry::Special(special_entry) => accumulator.special_entries.push(special_entry),
ScannedEntry::Issue(error) => accumulator.errors.push(error),
}
}
@@ -465,10 +610,13 @@ fn find_hard_links(files: &[FileEntry]) -> Vec<HardLinkGroup> {
.collect()
}
fn same_size_candidates(files: &[FileEntry]) -> Vec<FileEntry> {
fn same_size_candidates(files: &[FileEntry], min_size: u64) -> Vec<FileEntry> {
let files = unique_file_id_entries(files);
let mut by_size: BTreeMap<u64, Vec<FileEntry>> = BTreeMap::new();
for file in files {
if file.size < min_size {
continue;
}
by_size.entry(file.size).or_default().push(file);
}
@@ -479,6 +627,25 @@ fn same_size_candidates(files: &[FileEntry]) -> Vec<FileEntry> {
.collect()
}
fn size_only_duplicate_groups(files: Vec<FileEntry>) -> Vec<DuplicateGroup> {
let mut by_size: BTreeMap<u64, Vec<PathBuf>> = BTreeMap::new();
for file in files {
by_size.entry(file.size).or_default().push(file.path);
}
by_size
.into_iter()
.filter_map(|(size, mut paths)| {
paths.sort();
(paths.len() > 1).then_some(DuplicateGroup {
size,
hash: "size-only".to_string(),
paths,
})
})
.collect()
}
fn unique_file_id_entries(files: &[FileEntry]) -> Vec<FileEntry> {
let mut by_file_id: BTreeMap<(u64, u64), &FileEntry> = BTreeMap::new();
for file in files {
@@ -488,14 +655,20 @@ fn unique_file_id_entries(files: &[FileEntry]) -> Vec<FileEntry> {
by_file_id.into_values().cloned().collect()
}
fn hash_files(files: &[FileEntry], hash_bytes: u64, full_file: bool) -> Vec<HashOutcome> {
files
fn hash_files(
files: &[FileEntry],
hash_bytes: u64,
full_file: bool,
progress: Option<ProgressBar>,
) -> Vec<HashOutcome> {
let outcomes = files
.par_iter()
.map(|file| {
let file_progress = progress.clone();
let hash_result = if full_file {
hash_full_file(&file.path)
hash_full_file(&file.path, file_progress.as_ref())
} else {
hash_file_prefix(&file.path, hash_bytes)
hash_file_prefix(&file.path, hash_bytes, file_progress.as_ref())
};
match hash_result {
@@ -510,7 +683,13 @@ fn hash_files(files: &[FileEntry], hash_bytes: u64, full_file: bool) -> Vec<Hash
)),
}
})
.collect()
.collect();
if let Some(progress) = progress {
progress.finish_and_clear();
}
outcomes
}
fn collect_hash_outcomes(
@@ -558,7 +737,11 @@ fn files_from_duplicate_groups(groups: &[DuplicateGroup]) -> Vec<FileEntry> {
.collect()
}
fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
fn hash_file_prefix(
path: &Path,
hash_bytes: u64,
progress: Option<&ProgressBar>,
) -> io::Result<String> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut hasher = blake3::Hasher::new();
@@ -571,6 +754,9 @@ fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
if bytes_read == 0 {
break;
}
if let Some(progress) = progress {
progress.inc(bytes_read as u64);
}
hasher.update(&buffer[..bytes_read]);
remaining -= bytes_read as u64;
}
@@ -578,7 +764,7 @@ fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
Ok(hasher.finalize().to_hex().to_string())
}
fn hash_full_file(path: &Path) -> io::Result<String> {
fn hash_full_file(path: &Path, progress: Option<&ProgressBar>) -> io::Result<String> {
let file = File::open(path)?;
let mut reader = BufReader::new(file);
let mut hasher = blake3::Hasher::new();
@@ -589,6 +775,9 @@ fn hash_full_file(path: &Path) -> io::Result<String> {
if bytes_read == 0 {
break;
}
if let Some(progress) = progress {
progress.inc(bytes_read as u64);
}
hasher.update(&buffer[..bytes_read]);
}
@@ -609,6 +798,28 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
)?;
writeln!(writer, "Hash window: {}", format_bytes(report.hash_bytes))?;
writeln!(writer, "Worker threads: {}", report.worker_threads)?;
writeln!(
writer,
"Duplicate mode: {}",
if report.size_only {
"size only"
} else {
"size + partial hash"
}
)?;
writeln!(
writer,
"Minimum duplicate size: {}",
format_bytes(report.min_size)
)?;
writeln!(
writer,
"Maximum depth: {}",
report
.max_depth
.map(|depth| depth.to_string())
.unwrap_or_else(|| "unlimited".to_string())
)?;
writeln!(
writer,
"Symlink traversal: {}",
@@ -640,7 +851,7 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
)?;
writeln!(
writer,
"Same-size files hashed: {}",
"Same-size duplicate candidates: {}",
report.summary.same_size_candidate_files
)?;
writeln!(
@@ -675,7 +886,11 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
write_duplicate_section(
&mut writer,
"Possible duplicates (same size + partial hash)",
if report.size_only {
"Possible duplicates (same size only)"
} else {
"Possible duplicates (same size + partial hash)"
},
&report.possible_duplicates,
)?;
@@ -898,6 +1113,16 @@ impl SpecialEntryKind {
}
}
impl ScanAccumulator {
fn interactions(&self) -> usize {
self.files.len()
+ self.directories
+ self.symlinks.len()
+ self.special_entries.len()
+ self.errors.len()
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -929,6 +1154,10 @@ mod tests {
follow_links: false,
verify_full: false,
threads: None,
size_only: false,
min_size: 0,
max_depth: None,
progress: false,
});
assert_eq!(report.summary.files, 3);
@@ -953,12 +1182,75 @@ mod tests {
follow_links: false,
verify_full: true,
threads: None,
size_only: false,
min_size: 0,
max_depth: None,
progress: false,
});
assert_eq!(report.possible_duplicates.len(), 1);
assert!(report.verified_duplicates.is_empty());
}
#[test]
fn size_only_mode_groups_same_size_without_hashing_prefixes() {
let temp = TempDir::new().expect("temp dir");
let first = temp.path().join("first.bin");
let second = temp.path().join("second.bin");
fs::write(&first, b"abcdef").expect("write first");
fs::write(&second, b"uvwxyz").expect("write second");
let report = scan_paths(ScanConfig {
paths: vec![temp.path().to_path_buf()],
hash_bytes: DEFAULT_HASH_BYTES,
follow_links: false,
verify_full: false,
threads: None,
size_only: true,
min_size: 0,
max_depth: None,
progress: false,
});
assert_eq!(report.possible_duplicates.len(), 1);
assert_eq!(report.possible_duplicates[0].hash, "size-only");
assert!(report.possible_duplicates[0].paths.contains(&first));
assert!(report.possible_duplicates[0].paths.contains(&second));
}
#[test]
fn min_size_filters_duplicate_candidates_before_hashing() {
let temp = TempDir::new().expect("temp dir");
let small_first = temp.path().join("small-first.bin");
let small_second = temp.path().join("small-second.bin");
let large_first = temp.path().join("large-first.bin");
let large_second = temp.path().join("large-second.bin");
fs::write(&small_first, b"abc").expect("write small first");
fs::write(&small_second, b"abc").expect("write small second");
fs::write(&large_first, b"abcdef").expect("write large first");
fs::write(&large_second, b"abcdef").expect("write large second");
let report = scan_paths(ScanConfig {
paths: vec![temp.path().to_path_buf()],
hash_bytes: DEFAULT_HASH_BYTES,
follow_links: false,
verify_full: false,
threads: None,
size_only: false,
min_size: 4,
max_depth: None,
progress: false,
});
assert_eq!(report.possible_duplicates.len(), 1);
assert!(report.possible_duplicates[0].paths.contains(&large_first));
assert!(report.possible_duplicates[0].paths.contains(&large_second));
assert!(!report.possible_duplicates[0].paths.contains(&small_first));
assert!(!report.possible_duplicates[0].paths.contains(&small_second));
}
#[cfg(unix)]
#[test]
fn reports_symlinks_without_following_them() {
@@ -976,6 +1268,10 @@ mod tests {
follow_links: false,
verify_full: false,
threads: None,
size_only: false,
min_size: 0,
max_depth: None,
progress: false,
});
assert_eq!(report.summary.files, 1);
@@ -999,6 +1295,10 @@ mod tests {
follow_links: false,
verify_full: false,
threads: None,
size_only: false,
min_size: 0,
max_depth: None,
progress: false,
});
assert_eq!(report.summary.files, 2);
@@ -1024,6 +1324,10 @@ mod tests {
follow_links: false,
verify_full: false,
threads: None,
size_only: false,
min_size: 0,
max_depth: None,
progress: false,
});
let json = serde_json::to_string(&report).expect("serialize report with lossy path");
@@ -1037,6 +1341,9 @@ mod tests {
scanned_paths: vec![PathBuf::from(".")],
hash_bytes: DEFAULT_HASH_BYTES,
worker_threads: 1,
size_only: false,
min_size: 0,
max_depth: None,
followed_symlinks: false,
full_verification: false,
summary: ScanSummary {

View File

@@ -1,10 +1,12 @@
use std::io::{self, Write};
use std::path::PathBuf;
use std::fs::OpenOptions;
use std::io::{self, IsTerminal, Write};
use std::path::{Path, PathBuf};
use std::process::ExitCode;
use anyhow::Context;
use anyhow::{Context, bail};
use clap::Parser;
use disk_checker::{ScanConfig, parse_byte_count, scan_paths, write_human_report};
use dialoguer::{Confirm, Select, theme::ColorfulTheme};
use disk_checker::{DuplicateGroup, ScanConfig, parse_byte_count, scan_paths, write_human_report};
#[derive(Debug, Parser)]
#[command(
@@ -31,6 +33,18 @@ struct Cli {
#[arg(long)]
verify_full: bool,
/// Group duplicate candidates by size only. Fastest mode for huge triage; less precise.
#[arg(long)]
size_only: bool,
/// Ignore duplicate candidates smaller than this size. Accepts units like 100MiB or 1GB.
#[arg(long, default_value = "0", value_parser = parse_min_size)]
min_size: u64,
/// Maximum directory depth to scan. Depth 0 means only the provided path itself.
#[arg(long, value_parser = parse_nonzero_or_zero_usize)]
max_depth: Option<usize>,
/// Number of worker threads used for scanning and hashing. Defaults to CPU parallelism.
#[arg(long, value_parser = parse_thread_count)]
threads: Option<usize>,
@@ -38,6 +52,18 @@ struct Cli {
/// Print machine-readable JSON instead of the human summary.
#[arg(long)]
json: bool,
/// Disable progress output.
#[arg(long)]
no_progress: bool,
/// Interactively review duplicate groups and choose which path to keep.
#[arg(long)]
interactive: bool,
/// Shell script path for planned deletes when --interactive is used.
#[arg(long, default_value = "disk-checker-delete-plan.sh")]
delete_plan: PathBuf,
}
fn parse_thread_count(input: &str) -> Result<usize, String> {
@@ -51,8 +77,32 @@ fn parse_thread_count(input: &str) -> Result<usize, String> {
}
}
fn parse_nonzero_or_zero_usize(input: &str) -> Result<usize, String> {
input
.parse::<usize>()
.map_err(|error| format!("invalid depth {input:?}: {error}"))
}
fn parse_min_size(input: &str) -> Result<u64, String> {
if input.trim() == "0" {
Ok(0)
} else {
parse_byte_count(input)
}
}
fn main() -> anyhow::Result<ExitCode> {
let cli = Cli::parse();
if cli.interactive && !cli.verify_full {
bail!(
"--interactive requires --verify-full so keep/delete plans are based on fully verified duplicates"
);
}
if cli.interactive && cli.json {
bail!(
"--interactive cannot be combined with --json because prompts would contaminate JSON output"
);
}
if let Some(threads) = cli.threads {
rayon::ThreadPoolBuilder::new()
@@ -73,6 +123,10 @@ fn main() -> anyhow::Result<ExitCode> {
follow_links: cli.follow_links,
verify_full: cli.verify_full,
threads: cli.threads,
size_only: cli.size_only,
min_size: cli.min_size,
max_depth: cli.max_depth,
progress: !cli.no_progress && !cli.json && io::stderr().is_terminal(),
});
let stdout = io::stdout();
@@ -83,6 +137,16 @@ fn main() -> anyhow::Result<ExitCode> {
} else {
write_human_report(&mut out, &report).context("failed to write report")?;
}
drop(out);
if cli.interactive {
let groups = if cli.verify_full {
&report.verified_duplicates
} else {
&report.possible_duplicates
};
run_interactive_resolver(groups, cli.verify_full, &cli.delete_plan)?;
}
if report.summary.errors > 0 {
Ok(ExitCode::from(2))
@@ -90,3 +154,148 @@ fn main() -> anyhow::Result<ExitCode> {
Ok(ExitCode::SUCCESS)
}
}
fn run_interactive_resolver(
groups: &[DuplicateGroup],
verified: bool,
delete_plan: &PathBuf,
) -> anyhow::Result<()> {
if groups.is_empty() {
println!("No duplicate groups to resolve.");
return Ok(());
}
let theme = ColorfulTheme::default();
let mut planned_deletes = Vec::new();
let mut skipped = 0usize;
for (group_index, group) in groups.iter().enumerate() {
println!();
println!(
"Duplicate group {}/{}{} across {} files",
group_index + 1,
groups.len(),
disk_checker::format_bytes(group.size),
group.paths.len()
);
let mut choices = group
.paths
.iter()
.map(|path| path.display().to_string())
.collect::<Vec<_>>();
choices.push("Skip this group".to_string());
let selection = Select::with_theme(&theme)
.with_prompt("Choose the version to keep")
.items(&choices)
.default(0)
.interact()
.context("interactive selection failed")?;
if selection == group.paths.len() {
skipped += 1;
continue;
}
let keep_path = &group.paths[selection];
let delete_paths = group
.paths
.iter()
.filter(|path| *path != keep_path)
.cloned()
.collect::<Vec<_>>();
println!("Keeping: {}", keep_path.display());
for path in &delete_paths {
println!(" remove: {}", path.display());
}
let confirmed = Confirm::with_theme(&theme)
.with_prompt("Add these files to the deletion plan?")
.default(false)
.interact()
.context("interactive confirmation failed")?;
if !confirmed {
skipped += 1;
continue;
}
planned_deletes.extend(delete_paths);
}
if !planned_deletes.is_empty() {
write_delete_plan(delete_plan, &planned_deletes, verified)?;
println!(
"Wrote deletion plan for {} files: {}",
planned_deletes.len(),
delete_plan.display()
);
println!("Review it, then run: sh {}", delete_plan.display());
}
println!(
"Interactive resolver complete: {} planned, {} groups skipped.",
planned_deletes.len(),
skipped
);
Ok(())
}
fn write_delete_plan(
path: &PathBuf,
delete_paths: &[PathBuf],
verified: bool,
) -> anyhow::Result<()> {
let mut file = OpenOptions::new()
.write(true)
.create_new(true)
.open(path)
.with_context(|| format!("failed to create delete plan {}", path.display()))?;
writeln!(file, "#!/bin/sh")?;
writeln!(file, "set -eu")?;
writeln!(
file,
"# Review carefully before running. Generated by disk-checker."
)?;
if verified {
writeln!(
file,
"# Source groups were fully verified with --verify-full."
)?;
} else {
writeln!(
file,
"# WARNING: Source groups were possible duplicates only, not fully verified."
)?;
}
for delete_path in delete_paths {
writeln!(file, "rm -- {}", shell_quote(delete_path)?)?;
}
Ok(())
}
fn shell_quote(path: &Path) -> anyhow::Result<String> {
let value = path.to_str().with_context(|| {
format!(
"delete plan cannot safely encode non-UTF-8 path: {}",
path.display()
)
})?;
Ok(format!("'{}'", value.replace('\'', "'\\''")))
}
#[cfg(test)]
mod tests {
use super::shell_quote;
use std::path::Path;
#[test]
fn shell_quote_escapes_single_quotes() {
assert_eq!(
shell_quote(Path::new("/tmp/it's-here.txt")).expect("quote path"),
"'/tmp/it'\\''s-here.txt'"
);
}
}