Add disk checker CLI

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
2026-06-04 14:44:37 +01:00
commit 37730b3269
4 changed files with 1904 additions and 0 deletions

91
src/main.rs Normal file
View File

@@ -0,0 +1,91 @@
use std::io::{self, Write};
use std::path::PathBuf;
use std::process::ExitCode;
use anyhow::Context;
use clap::Parser;
use disk_checker::{ScanConfig, parse_byte_count, scan_paths, write_human_report};
#[derive(Debug, Parser)]
#[command(
author,
version,
about = "Fast folder scanner for sizes, symlinks, hard links, and possible duplicate files",
long_about = "disk-checker scans one or more folders, groups files by size, hashes only the first N bytes of same-size candidates, and reports possible duplicates plus symlinks, hard links, special files, and scan errors."
)]
struct Cli {
/// Folder or file paths to scan. Defaults to the current directory.
#[arg(value_name = "PATH")]
paths: Vec<PathBuf>,
/// Number of bytes to hash from each same-size candidate file.
/// Accepts plain bytes or units like 512KiB, 1MiB, 2MB.
#[arg(long, default_value = "1MiB", value_parser = parse_byte_count)]
hash_bytes: u64,
/// Follow symlinks while scanning. Symlinks are still reported separately.
#[arg(long)]
follow_links: bool,
/// Fully hash possible duplicate groups after the fast partial-hash pass.
#[arg(long)]
verify_full: bool,
/// Number of worker threads used for hashing. Defaults to Rayon automatic sizing.
#[arg(long, value_parser = parse_thread_count)]
threads: Option<usize>,
/// Print machine-readable JSON instead of the human summary.
#[arg(long)]
json: bool,
}
fn parse_thread_count(input: &str) -> Result<usize, String> {
let threads = input
.parse::<usize>()
.map_err(|error| format!("invalid thread count {input:?}: {error}"))?;
if threads == 0 {
Err("thread count must be greater than zero".to_string())
} else {
Ok(threads)
}
}
fn main() -> anyhow::Result<ExitCode> {
let cli = Cli::parse();
if let Some(threads) = cli.threads {
rayon::ThreadPoolBuilder::new()
.num_threads(threads)
.build_global()
.context("failed to initialize hashing thread pool")?;
}
let paths = if cli.paths.is_empty() {
vec![PathBuf::from(".")]
} else {
cli.paths
};
let report = scan_paths(ScanConfig {
paths,
hash_bytes: cli.hash_bytes,
follow_links: cli.follow_links,
verify_full: cli.verify_full,
});
let stdout = io::stdout();
let mut out = stdout.lock();
if cli.json {
serde_json::to_writer_pretty(&mut out, &report).context("failed to write JSON report")?;
writeln!(out).context("failed to finish JSON report")?;
} else {
write_human_report(&mut out, &report).context("failed to write report")?;
}
if report.summary.errors > 0 {
Ok(ExitCode::from(2))
} else {
Ok(ExitCode::SUCCESS)
}
}