commit 37730b3269cab9d0efadb4346bfada2b2268bede Author: repi Date: Thu Jun 4 14:44:37 2026 +0100 Add disk checker CLI Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1c35755 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,783 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" + +[[package]] +name = "arrayref" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" + +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + +[[package]] +name = "bitflags" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84d7ced0ae9557296835c32bf1b1e02b44c746701f898460fb000d7eaa84f00a" + +[[package]] +name = "blake3" +version = "1.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0aa83c34e62843d924f905e0f5c866eb1dd6545fc4d719e803d9ba6030371fce" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "cpufeatures", +] + +[[package]] +name = "bstr" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab" +dependencies = [ + "memchr", + "serde", +] + +[[package]] +name = "cc" +version = "1.2.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "constant_time_eq" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d52eff69cd5e647efe296129160853a42795992097e8af39800e1060caeea9b" + +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "disk-checker" +version = "0.1.0" +dependencies = [ + "anyhow", + "blake3", + "clap", + "ignore", + "rayon", + "serde", + "serde_json", + "tempfile", +] + +[[package]] +name = "either" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "fastrand" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", + "wasip3", +] + +[[package]] +name = "globset" +version = "0.4.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + +[[package]] +name = "ignore" +version = "0.4.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3d782a365a015e0f5c04902246139249abf769125006fbe7649e2ee88169b4a" +dependencies = [ + "crossbeam-deque", + "globset", + "log", + "memchr", + "regex-automata", + "same-file", + "walkdir", + "winapi-util", +] + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", + "serde", + "serde_core", +] + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "linux-raw-sys" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" + +[[package]] +name = "log" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a" + +[[package]] +name = "memchr" +version = "2.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8" + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustix" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix", + "windows-sys", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasip2" +version = "1.0.3+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" +dependencies = [ + "wit-bindgen 0.57.1", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", +] + +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..4d3c39d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "disk-checker" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1" +blake3 = "1" +clap = { version = "4", features = ["derive"] } +ignore = "0.4" +rayon = "1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" + +[dev-dependencies] +tempfile = "3" diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..c3123cb --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,1014 @@ +use std::collections::BTreeMap; +use std::fs::{self, File, Metadata}; +use std::io::{self, BufReader, Read, Write}; +use std::path::{Path, PathBuf}; + +use ignore::WalkBuilder; +use rayon::prelude::*; +use serde::{Serialize, Serializer}; + +#[cfg(unix)] +use std::os::unix::fs::{FileTypeExt, MetadataExt}; + +pub const DEFAULT_HASH_BYTES: u64 = 1024 * 1024; +const HASH_BUFFER_BYTES: usize = 128 * 1024; + +#[derive(Debug, Clone)] +pub struct ScanConfig { + pub paths: Vec, + pub hash_bytes: u64, + pub follow_links: bool, + pub verify_full: bool, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ScanReport { + #[serde(serialize_with = "serialize_paths")] + pub scanned_paths: Vec, + pub hash_bytes: u64, + pub followed_symlinks: bool, + pub full_verification: bool, + pub summary: ScanSummary, + pub possible_duplicates: Vec, + pub verified_duplicates: Vec, + pub symlinks: Vec, + pub hard_links: Vec, + pub special_entries: Vec, + pub errors: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ScanSummary { + pub directories: usize, + pub files: usize, + pub total_file_bytes: u64, + pub same_size_candidate_files: usize, + pub possible_duplicate_groups: usize, + pub possible_duplicate_files: usize, + pub verified_duplicate_groups: usize, + pub verified_duplicate_files: usize, + pub symlinks: usize, + pub broken_symlinks: usize, + pub hard_link_groups: usize, + pub special_entries: usize, + pub errors: usize, +} + +#[derive(Debug, Clone, Serialize)] +pub struct DuplicateGroup { + pub size: u64, + pub hash: String, + #[serde(serialize_with = "serialize_paths")] + pub paths: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct SymlinkInfo { + #[serde(serialize_with = "serialize_path")] + pub path: PathBuf, + #[serde(serialize_with = "serialize_optional_path")] + pub target: Option, + pub target_kind: SymlinkTargetKind, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum SymlinkTargetKind { + File, + Directory, + Other, + Broken, + Unreadable, +} + +#[derive(Debug, Clone, Serialize)] +pub struct HardLinkGroup { + pub device: u64, + pub inode: u64, + pub size: u64, + #[serde(serialize_with = "serialize_paths")] + pub paths: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct SpecialEntry { + #[serde(serialize_with = "serialize_path")] + pub path: PathBuf, + pub kind: SpecialEntryKind, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum SpecialEntryKind { + BlockDevice, + CharacterDevice, + Fifo, + Socket, + Other, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ScanIssue { + #[serde(serialize_with = "serialize_path")] + pub path: PathBuf, + pub message: String, +} + +#[derive(Debug, Clone)] +struct FileEntry { + path: PathBuf, + size: u64, + device: u64, + inode: u64, +} + +#[derive(Debug, Clone)] +struct HashedFile { + path: PathBuf, + size: u64, + hash: String, +} + +#[derive(Debug, Clone)] +enum HashOutcome { + Hashed(HashedFile), + Issue(ScanIssue), +} + +pub fn parse_byte_count(input: &str) -> Result { + let trimmed = input.trim(); + if trimmed.is_empty() { + return Err("value cannot be empty".to_string()); + } + + let compact: String = trimmed + .chars() + .filter(|character| *character != '_') + .collect(); + let number_end = compact + .char_indices() + .find_map(|(index, character)| (!character.is_ascii_digit()).then_some(index)) + .unwrap_or(compact.len()); + + if number_end == 0 { + return Err(format!("{input:?} must start with a positive byte count")); + } + + let number = compact[..number_end] + .parse::() + .map_err(|error| format!("invalid byte count {input:?}: {error}"))?; + if number == 0 { + return Err("byte count must be greater than zero".to_string()); + } + + let suffix = compact[number_end..].to_ascii_lowercase(); + let multiplier = match suffix.as_str() { + "" | "b" => 1, + "k" | "kb" => 1_000, + "m" | "mb" => 1_000_000, + "g" | "gb" => 1_000_000_000, + "kib" => 1024, + "mib" => 1024 * 1024, + "gib" => 1024 * 1024 * 1024, + _ => { + return Err(format!( + "unsupported unit {suffix:?}; use bytes, KB, MB, GB, KiB, MiB, or GiB" + )); + } + }; + + number + .checked_mul(multiplier) + .ok_or_else(|| format!("byte count {input:?} is too large")) +} + +pub fn scan_paths(config: ScanConfig) -> ScanReport { + let hash_bytes = config.hash_bytes.max(1); + let mut files = Vec::new(); + let mut symlinks = Vec::new(); + let mut special_entries = Vec::new(); + let mut errors = Vec::new(); + let mut directories = 0; + let mut total_file_bytes = 0; + + for root in &config.paths { + let mut builder = WalkBuilder::new(root); + builder + .follow_links(config.follow_links) + .hidden(false) + .ignore(false) + .git_ignore(false) + .git_global(false) + .git_exclude(false) + .parents(false); + + for entry in builder.build() { + match entry { + Ok(entry) => { + let path = entry.path().to_path_buf(); + let metadata = match fs::symlink_metadata(&path) { + Ok(metadata) => metadata, + Err(error) => { + errors.push(issue(path, format!("could not read metadata: {error}"))); + continue; + } + }; + + if metadata.file_type().is_symlink() { + symlinks.push(describe_symlink(&path)); + if !config.follow_links { + continue; + } + + match fs::metadata(&path) { + Ok(target_metadata) => { + process_non_symlink_entry( + path, + &target_metadata, + &mut files, + &mut special_entries, + &mut directories, + &mut total_file_bytes, + ); + } + Err(error) => { + errors.push(issue( + path, + format!("could not follow symlink target: {error}"), + )); + } + } + } else { + process_non_symlink_entry( + path, + &metadata, + &mut files, + &mut special_entries, + &mut directories, + &mut total_file_bytes, + ); + } + } + Err(error) => { + errors.push(issue(PathBuf::from(""), error.to_string())); + } + } + } + } + + files.sort_by(|left, right| left.path.cmp(&right.path)); + symlinks.sort_by(|left, right| left.path.cmp(&right.path)); + special_entries.sort_by(|left, right| left.path.cmp(&right.path)); + + let hard_links = find_hard_links(&files); + let same_size_candidates = same_size_candidates(&files); + let same_size_candidate_files = same_size_candidates.len(); + + let partial_outcomes = hash_files(&same_size_candidates, hash_bytes, false); + let mut partial_hashes = Vec::new(); + collect_hash_outcomes(partial_outcomes, &mut partial_hashes, &mut errors); + let possible_duplicates = duplicate_groups(partial_hashes); + + let verified_duplicates = if config.verify_full { + let full_candidates = files_from_duplicate_groups(&possible_duplicates); + let full_outcomes = hash_files(&full_candidates, hash_bytes, true); + let mut full_hashes = Vec::new(); + collect_hash_outcomes(full_outcomes, &mut full_hashes, &mut errors); + duplicate_groups(full_hashes) + } else { + Vec::new() + }; + + errors.sort_by(|left, right| { + left.path + .cmp(&right.path) + .then(left.message.cmp(&right.message)) + }); + + let broken_symlinks = symlinks + .iter() + .filter(|symlink| symlink.target_kind == SymlinkTargetKind::Broken) + .count(); + let possible_duplicate_files = possible_duplicates + .iter() + .map(|group| group.paths.len()) + .sum(); + let verified_duplicate_files = verified_duplicates + .iter() + .map(|group| group.paths.len()) + .sum(); + + ScanReport { + scanned_paths: config.paths, + hash_bytes, + followed_symlinks: config.follow_links, + full_verification: config.verify_full, + summary: ScanSummary { + directories, + files: files.len(), + total_file_bytes, + same_size_candidate_files, + possible_duplicate_groups: possible_duplicates.len(), + possible_duplicate_files, + verified_duplicate_groups: verified_duplicates.len(), + verified_duplicate_files, + symlinks: symlinks.len(), + broken_symlinks, + hard_link_groups: hard_links.len(), + special_entries: special_entries.len(), + errors: errors.len(), + }, + possible_duplicates, + verified_duplicates, + symlinks, + hard_links, + special_entries, + errors, + } +} + +fn process_non_symlink_entry( + path: PathBuf, + metadata: &Metadata, + files: &mut Vec, + special_entries: &mut Vec, + directories: &mut usize, + total_file_bytes: &mut u64, +) { + let file_type = metadata.file_type(); + if file_type.is_file() { + *total_file_bytes = total_file_bytes.saturating_add(metadata.len()); + files.push(FileEntry { + path, + size: metadata.len(), + device: metadata.dev(), + inode: metadata.ino(), + }); + } else if file_type.is_dir() { + *directories += 1; + } else { + special_entries.push(SpecialEntry { + path, + kind: special_entry_kind(&file_type), + }); + } +} + +fn describe_symlink(path: &Path) -> SymlinkInfo { + let target = fs::read_link(path).ok(); + let target_kind = match fs::metadata(path) { + Ok(metadata) if metadata.file_type().is_file() => SymlinkTargetKind::File, + Ok(metadata) if metadata.file_type().is_dir() => SymlinkTargetKind::Directory, + Ok(_) => SymlinkTargetKind::Other, + Err(error) if error.kind() == io::ErrorKind::NotFound => SymlinkTargetKind::Broken, + Err(_) => SymlinkTargetKind::Unreadable, + }; + + SymlinkInfo { + path: path.to_path_buf(), + target, + target_kind, + } +} + +#[cfg(unix)] +fn special_entry_kind(file_type: &fs::FileType) -> SpecialEntryKind { + if file_type.is_block_device() { + SpecialEntryKind::BlockDevice + } else if file_type.is_char_device() { + SpecialEntryKind::CharacterDevice + } else if file_type.is_fifo() { + SpecialEntryKind::Fifo + } else if file_type.is_socket() { + SpecialEntryKind::Socket + } else { + SpecialEntryKind::Other + } +} + +fn find_hard_links(files: &[FileEntry]) -> Vec { + let mut by_inode: BTreeMap<(u64, u64), Vec<&FileEntry>> = BTreeMap::new(); + for file in files { + by_inode + .entry((file.device, file.inode)) + .or_default() + .push(file); + } + + by_inode + .into_iter() + .filter_map(|((device, inode), group)| { + (group.len() > 1).then(|| { + let size = group.first().map(|file| file.size).unwrap_or_default(); + let paths = group.into_iter().map(|file| file.path.clone()).collect(); + HardLinkGroup { + device, + inode, + size, + paths, + } + }) + }) + .collect() +} + +fn same_size_candidates(files: &[FileEntry]) -> Vec { + let files = unique_file_id_entries(files); + let mut by_size: BTreeMap> = BTreeMap::new(); + for file in files { + by_size.entry(file.size).or_default().push(file); + } + + by_size + .into_values() + .filter(|group| group.len() > 1) + .flatten() + .collect() +} + +fn unique_file_id_entries(files: &[FileEntry]) -> Vec { + let mut by_file_id: BTreeMap<(u64, u64), &FileEntry> = BTreeMap::new(); + for file in files { + by_file_id.entry((file.device, file.inode)).or_insert(file); + } + + by_file_id.into_values().cloned().collect() +} + +fn hash_files(files: &[FileEntry], hash_bytes: u64, full_file: bool) -> Vec { + files + .par_iter() + .map(|file| { + let hash_result = if full_file { + hash_full_file(&file.path) + } else { + hash_file_prefix(&file.path, hash_bytes) + }; + + match hash_result { + Ok(hash) => HashOutcome::Hashed(HashedFile { + path: file.path.clone(), + size: file.size, + hash, + }), + Err(error) => HashOutcome::Issue(issue( + file.path.clone(), + format!("could not hash file: {error}"), + )), + } + }) + .collect() +} + +fn collect_hash_outcomes( + outcomes: Vec, + hashes: &mut Vec, + errors: &mut Vec, +) { + for outcome in outcomes { + match outcome { + HashOutcome::Hashed(hash) => hashes.push(hash), + HashOutcome::Issue(error) => errors.push(error), + } + } +} + +fn duplicate_groups(files: Vec) -> Vec { + let mut by_hash: BTreeMap<(u64, String), Vec> = BTreeMap::new(); + for file in files { + by_hash + .entry((file.size, file.hash)) + .or_default() + .push(file.path); + } + + by_hash + .into_iter() + .filter_map(|((size, hash), mut paths)| { + paths.sort(); + (paths.len() > 1).then_some(DuplicateGroup { size, hash, paths }) + }) + .collect() +} + +fn files_from_duplicate_groups(groups: &[DuplicateGroup]) -> Vec { + groups + .iter() + .flat_map(|group| { + group.paths.iter().map(|path| FileEntry { + path: path.clone(), + size: group.size, + device: 0, + inode: 0, + }) + }) + .collect() +} + +fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result { + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut hasher = blake3::Hasher::new(); + let mut buffer = vec![0; HASH_BUFFER_BYTES.min(hash_bytes as usize).max(1)]; + let mut remaining = hash_bytes; + + while remaining > 0 { + let read_limit = buffer.len().min(remaining as usize); + let bytes_read = reader.read(&mut buffer[..read_limit])?; + if bytes_read == 0 { + break; + } + hasher.update(&buffer[..bytes_read]); + remaining -= bytes_read as u64; + } + + Ok(hasher.finalize().to_hex().to_string()) +} + +fn hash_full_file(path: &Path) -> io::Result { + let file = File::open(path)?; + let mut reader = BufReader::new(file); + let mut hasher = blake3::Hasher::new(); + let mut buffer = vec![0; HASH_BUFFER_BYTES]; + + loop { + let bytes_read = reader.read(&mut buffer)?; + if bytes_read == 0 { + break; + } + hasher.update(&buffer[..bytes_read]); + } + + Ok(hasher.finalize().to_hex().to_string()) +} + +fn issue(path: PathBuf, message: String) -> ScanIssue { + ScanIssue { path, message } +} + +pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Result<()> { + writeln!(writer, "disk-checker report")?; + writeln!(writer, "===================")?; + writeln!( + writer, + "Scanned paths: {}", + join_paths(&report.scanned_paths) + )?; + writeln!(writer, "Hash window: {}", format_bytes(report.hash_bytes))?; + writeln!( + writer, + "Symlink traversal: {}", + if report.followed_symlinks { + "followed" + } else { + "not followed" + } + )?; + writeln!( + writer, + "Full verification: {}", + if report.full_verification { + "yes" + } else { + "no" + } + )?; + writeln!(writer)?; + + writeln!(writer, "Summary")?; + writeln!(writer, "-------")?; + writeln!(writer, "Directories: {}", report.summary.directories)?; + writeln!( + writer, + "Files: {} ({})", + report.summary.files, + format_bytes(report.summary.total_file_bytes) + )?; + writeln!( + writer, + "Same-size files hashed: {}", + report.summary.same_size_candidate_files + )?; + writeln!( + writer, + "Possible duplicate groups: {} ({} files)", + report.summary.possible_duplicate_groups, report.summary.possible_duplicate_files + )?; + if report.full_verification { + writeln!( + writer, + "Verified duplicate groups: {} ({} files)", + report.summary.verified_duplicate_groups, report.summary.verified_duplicate_files + )?; + } + writeln!( + writer, + "Symlinks: {} ({} broken)", + report.summary.symlinks, report.summary.broken_symlinks + )?; + writeln!( + writer, + "Hard link groups: {}", + report.summary.hard_link_groups + )?; + writeln!( + writer, + "Special entries: {}", + report.summary.special_entries + )?; + writeln!(writer, "Errors: {}", report.summary.errors)?; + writeln!(writer)?; + + write_duplicate_section( + &mut writer, + "Possible duplicates (same size + partial hash)", + &report.possible_duplicates, + )?; + + if report.full_verification { + write_duplicate_section( + &mut writer, + "Verified duplicates (same size + full hash)", + &report.verified_duplicates, + )?; + } + + write_symlink_section(&mut writer, &report.symlinks)?; + write_hard_link_section(&mut writer, &report.hard_links)?; + write_special_section(&mut writer, &report.special_entries)?; + write_error_section(&mut writer, &report.errors)?; + + Ok(()) +} + +fn write_duplicate_section( + writer: &mut impl Write, + title: &str, + groups: &[DuplicateGroup], +) -> io::Result<()> { + writeln!(writer, "{title}")?; + writeln!(writer, "{}", "-".repeat(title.len()))?; + if groups.is_empty() { + writeln!(writer, "None found.")?; + writeln!(writer)?; + return Ok(()); + } + + for (index, group) in groups.iter().enumerate() { + writeln!( + writer, + "{}. size={} hash={} files={}", + index + 1, + format_bytes(group.size), + group.hash, + group.paths.len() + )?; + for path in &group.paths { + writeln!(writer, " - {}", path.display())?; + } + } + writeln!(writer)?; + Ok(()) +} + +fn write_symlink_section(writer: &mut impl Write, symlinks: &[SymlinkInfo]) -> io::Result<()> { + writeln!(writer, "Symlinks")?; + writeln!(writer, "--------")?; + if symlinks.is_empty() { + writeln!(writer, "None found.")?; + writeln!(writer)?; + return Ok(()); + } + + for symlink in symlinks { + match &symlink.target { + Some(target) => writeln!( + writer, + "- {} -> {} ({})", + symlink.path.display(), + target.display(), + symlink.target_kind.label() + )?, + None => writeln!( + writer, + "- {} -> ({})", + symlink.path.display(), + symlink.target_kind.label() + )?, + } + } + writeln!(writer)?; + Ok(()) +} + +fn write_hard_link_section(writer: &mut impl Write, groups: &[HardLinkGroup]) -> io::Result<()> { + writeln!( + writer, + "Hard links (same device + inode; not extra disk copies)" + )?; + writeln!( + writer, + "------------------------------------------------------" + )?; + if groups.is_empty() { + writeln!(writer, "None found.")?; + writeln!(writer)?; + return Ok(()); + } + + for group in groups { + writeln!( + writer, + "- dev={} inode={} size={} paths={}", + group.device, + group.inode, + format_bytes(group.size), + group.paths.len() + )?; + for path in &group.paths { + writeln!(writer, " - {}", path.display())?; + } + } + writeln!(writer)?; + Ok(()) +} + +fn write_special_section(writer: &mut impl Write, entries: &[SpecialEntry]) -> io::Result<()> { + writeln!(writer, "Special entries")?; + writeln!(writer, "---------------")?; + if entries.is_empty() { + writeln!(writer, "None found.")?; + writeln!(writer)?; + return Ok(()); + } + + for entry in entries { + writeln!( + writer, + "- {} ({})", + entry.path.display(), + entry.kind.label() + )?; + } + writeln!(writer)?; + Ok(()) +} + +fn write_error_section(writer: &mut impl Write, errors: &[ScanIssue]) -> io::Result<()> { + writeln!(writer, "Errors")?; + writeln!(writer, "------")?; + if errors.is_empty() { + writeln!(writer, "None.")?; + return Ok(()); + } + + for error in errors { + writeln!(writer, "- {}: {}", error.path.display(), error.message)?; + } + Ok(()) +} + +fn join_paths(paths: &[PathBuf]) -> String { + paths + .iter() + .map(|path| path.display().to_string()) + .collect::>() + .join(", ") +} + +fn serialize_path(path: &Path, serializer: S) -> Result +where + S: Serializer, +{ + serializer.serialize_str(&path.to_string_lossy()) +} + +fn serialize_paths(paths: &[PathBuf], serializer: S) -> Result +where + S: Serializer, +{ + let lossy_paths = paths + .iter() + .map(|path| path.to_string_lossy().into_owned()) + .collect::>(); + lossy_paths.serialize(serializer) +} + +fn serialize_optional_path(path: &Option, serializer: S) -> Result +where + S: Serializer, +{ + path.as_ref() + .map(|path| path.to_string_lossy().into_owned()) + .serialize(serializer) +} + +pub fn format_bytes(bytes: u64) -> String { + const KIB: f64 = 1024.0; + const MIB: f64 = 1024.0 * KIB; + const GIB: f64 = 1024.0 * MIB; + + let bytes_float = bytes as f64; + if bytes_float >= GIB { + format!("{:.2} GiB", bytes_float / GIB) + } else if bytes_float >= MIB { + format!("{:.2} MiB", bytes_float / MIB) + } else if bytes_float >= KIB { + format!("{:.2} KiB", bytes_float / KIB) + } else { + format!("{bytes} B") + } +} + +impl SymlinkTargetKind { + fn label(self) -> &'static str { + match self { + SymlinkTargetKind::File => "file", + SymlinkTargetKind::Directory => "directory", + SymlinkTargetKind::Other => "other", + SymlinkTargetKind::Broken => "broken", + SymlinkTargetKind::Unreadable => "unreadable", + } + } +} + +impl SpecialEntryKind { + fn label(self) -> &'static str { + match self { + SpecialEntryKind::BlockDevice => "block device", + SpecialEntryKind::CharacterDevice => "character device", + SpecialEntryKind::Fifo => "fifo", + SpecialEntryKind::Socket => "socket", + SpecialEntryKind::Other => "other", + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn parses_byte_counts_with_units() { + assert_eq!(parse_byte_count("1MiB"), Ok(DEFAULT_HASH_BYTES)); + assert_eq!(parse_byte_count("2mb"), Ok(2_000_000)); + assert_eq!(parse_byte_count("4_096"), Ok(4096)); + assert!(parse_byte_count("0").is_err()); + assert!(parse_byte_count("12xb").is_err()); + } + + #[test] + fn reports_possible_duplicates_by_size_and_partial_hash() { + let temp = TempDir::new().expect("temp dir"); + let first = temp.path().join("first.bin"); + let second = temp.path().join("second.bin"); + let third = temp.path().join("third.bin"); + + fs::write(&first, b"abcdef").expect("write first"); + fs::write(&second, b"abcxyz").expect("write second"); + fs::write(&third, b"zzzxxx").expect("write third"); + + let report = scan_paths(ScanConfig { + paths: vec![temp.path().to_path_buf()], + hash_bytes: 3, + follow_links: false, + verify_full: false, + }); + + assert_eq!(report.summary.files, 3); + assert_eq!(report.possible_duplicates.len(), 1); + assert_eq!(report.possible_duplicates[0].paths.len(), 2); + assert!(report.possible_duplicates[0].paths.contains(&first)); + assert!(report.possible_duplicates[0].paths.contains(&second)); + } + + #[test] + fn full_verification_filters_partial_hash_collisions() { + let temp = TempDir::new().expect("temp dir"); + let first = temp.path().join("first.bin"); + let second = temp.path().join("second.bin"); + + fs::write(&first, b"abcdef").expect("write first"); + fs::write(&second, b"abcxyz").expect("write second"); + + let report = scan_paths(ScanConfig { + paths: vec![temp.path().to_path_buf()], + hash_bytes: 3, + follow_links: false, + verify_full: true, + }); + + assert_eq!(report.possible_duplicates.len(), 1); + assert!(report.verified_duplicates.is_empty()); + } + + #[cfg(unix)] + #[test] + fn reports_symlinks_without_following_them() { + use std::os::unix::fs::symlink; + + let temp = TempDir::new().expect("temp dir"); + let target = temp.path().join("target.txt"); + let link = temp.path().join("target-link.txt"); + fs::write(&target, b"same content").expect("write target"); + symlink(&target, &link).expect("create symlink"); + + let report = scan_paths(ScanConfig { + paths: vec![temp.path().to_path_buf()], + hash_bytes: DEFAULT_HASH_BYTES, + follow_links: false, + verify_full: false, + }); + + assert_eq!(report.summary.files, 1); + assert_eq!(report.symlinks.len(), 1); + assert_eq!(report.symlinks[0].path, link); + assert_eq!(report.symlinks[0].target_kind, SymlinkTargetKind::File); + } + + #[cfg(unix)] + #[test] + fn reports_hard_link_groups() { + let temp = TempDir::new().expect("temp dir"); + let original = temp.path().join("original.txt"); + let hard_link = temp.path().join("hard-link.txt"); + fs::write(&original, b"same inode").expect("write original"); + fs::hard_link(&original, &hard_link).expect("create hard link"); + + let report = scan_paths(ScanConfig { + paths: vec![temp.path().to_path_buf()], + hash_bytes: DEFAULT_HASH_BYTES, + follow_links: false, + verify_full: false, + }); + + assert_eq!(report.summary.files, 2); + assert_eq!(report.hard_links.len(), 1); + assert_eq!(report.hard_links[0].paths.len(), 2); + assert!(report.possible_duplicates.is_empty()); + } + + #[cfg(unix)] + #[test] + fn json_report_handles_non_utf8_paths_lossily() { + use std::ffi::OsString; + use std::os::unix::ffi::OsStringExt; + + let temp = TempDir::new().expect("temp dir"); + let invalid_name = OsString::from_vec(vec![b'i', b'n', b'v', 0xff, b'.', b't', b'x', b't']); + let invalid_path = temp.path().join(invalid_name); + fs::write(&invalid_path, b"content").expect("write invalid utf8 path"); + + let report = scan_paths(ScanConfig { + paths: vec![invalid_path], + hash_bytes: DEFAULT_HASH_BYTES, + follow_links: false, + verify_full: false, + }); + + let json = serde_json::to_string(&report).expect("serialize report with lossy path"); + assert!(json.contains("inv")); + assert!(json.contains("�")); + } + + #[test] + fn human_report_mentions_main_sections() { + let report = ScanReport { + scanned_paths: vec![PathBuf::from(".")], + hash_bytes: DEFAULT_HASH_BYTES, + followed_symlinks: false, + full_verification: false, + summary: ScanSummary { + directories: 1, + files: 0, + total_file_bytes: 0, + same_size_candidate_files: 0, + possible_duplicate_groups: 0, + possible_duplicate_files: 0, + verified_duplicate_groups: 0, + verified_duplicate_files: 0, + symlinks: 0, + broken_symlinks: 0, + hard_link_groups: 0, + special_entries: 0, + errors: 0, + }, + possible_duplicates: Vec::new(), + verified_duplicates: Vec::new(), + symlinks: Vec::new(), + hard_links: Vec::new(), + special_entries: Vec::new(), + errors: Vec::new(), + }; + + let mut output = Vec::new(); + write_human_report(&mut output, &report).expect("write report"); + let output = String::from_utf8(output).expect("utf8 report"); + + assert!(output.contains("Possible duplicates")); + assert!(output.contains("Symlinks")); + assert!(output.contains("Hard links")); + assert!(output.contains("Errors")); + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2a59242 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,91 @@ +use std::io::{self, Write}; +use std::path::PathBuf; +use std::process::ExitCode; + +use anyhow::Context; +use clap::Parser; +use disk_checker::{ScanConfig, parse_byte_count, scan_paths, write_human_report}; + +#[derive(Debug, Parser)] +#[command( + author, + version, + about = "Fast folder scanner for sizes, symlinks, hard links, and possible duplicate files", + long_about = "disk-checker scans one or more folders, groups files by size, hashes only the first N bytes of same-size candidates, and reports possible duplicates plus symlinks, hard links, special files, and scan errors." +)] +struct Cli { + /// Folder or file paths to scan. Defaults to the current directory. + #[arg(value_name = "PATH")] + paths: Vec, + + /// Number of bytes to hash from each same-size candidate file. + /// Accepts plain bytes or units like 512KiB, 1MiB, 2MB. + #[arg(long, default_value = "1MiB", value_parser = parse_byte_count)] + hash_bytes: u64, + + /// Follow symlinks while scanning. Symlinks are still reported separately. + #[arg(long)] + follow_links: bool, + + /// Fully hash possible duplicate groups after the fast partial-hash pass. + #[arg(long)] + verify_full: bool, + + /// Number of worker threads used for hashing. Defaults to Rayon automatic sizing. + #[arg(long, value_parser = parse_thread_count)] + threads: Option, + + /// Print machine-readable JSON instead of the human summary. + #[arg(long)] + json: bool, +} + +fn parse_thread_count(input: &str) -> Result { + let threads = input + .parse::() + .map_err(|error| format!("invalid thread count {input:?}: {error}"))?; + if threads == 0 { + Err("thread count must be greater than zero".to_string()) + } else { + Ok(threads) + } +} + +fn main() -> anyhow::Result { + let cli = Cli::parse(); + + if let Some(threads) = cli.threads { + rayon::ThreadPoolBuilder::new() + .num_threads(threads) + .build_global() + .context("failed to initialize hashing thread pool")?; + } + + let paths = if cli.paths.is_empty() { + vec![PathBuf::from(".")] + } else { + cli.paths + }; + + let report = scan_paths(ScanConfig { + paths, + hash_bytes: cli.hash_bytes, + follow_links: cli.follow_links, + verify_full: cli.verify_full, + }); + + let stdout = io::stdout(); + let mut out = stdout.lock(); + if cli.json { + serde_json::to_writer_pretty(&mut out, &report).context("failed to write JSON report")?; + writeln!(out).context("failed to finish JSON report")?; + } else { + write_human_report(&mut out, &report).context("failed to write report")?; + } + + if report.summary.errors > 0 { + Ok(ExitCode::from(2)) + } else { + Ok(ExitCode::SUCCESS) + } +}