Compare commits
6 Commits
f63e2f9b33
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| cb29678285 | |||
| ab14a9d891 | |||
| 4dafcac9dc | |||
| 72906ed4f3 | |||
| 89bf0a047b | |||
| 3b4df2785d |
309
Cargo.lock
generated
309
Cargo.lock
generated
@@ -47,7 +47,7 @@ version = "1.1.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -58,7 +58,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"anstyle",
|
"anstyle",
|
||||||
"once_cell_polyfill",
|
"once_cell_polyfill",
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -109,6 +109,12 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bumpalo"
|
||||||
|
version = "3.20.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.63"
|
version = "1.2.63"
|
||||||
@@ -171,6 +177,31 @@ version = "1.0.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.15.11"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"libc",
|
||||||
|
"once_cell",
|
||||||
|
"unicode-width",
|
||||||
|
"windows-sys 0.59.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "console"
|
||||||
|
version = "0.16.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87"
|
||||||
|
dependencies = [
|
||||||
|
"encode_unicode",
|
||||||
|
"libc",
|
||||||
|
"unicode-width",
|
||||||
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "constant_time_eq"
|
name = "constant_time_eq"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@@ -211,6 +242,19 @@ version = "0.8.21"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dialoguer"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "658bce805d770f407bc62102fca7c2c64ceef2fbcb2b8bd19d2765ce093980de"
|
||||||
|
dependencies = [
|
||||||
|
"console 0.15.11",
|
||||||
|
"shell-words",
|
||||||
|
"tempfile",
|
||||||
|
"thiserror",
|
||||||
|
"zeroize",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "disk-checker"
|
name = "disk-checker"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -218,7 +262,9 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"blake3",
|
"blake3",
|
||||||
"clap",
|
"clap",
|
||||||
|
"dialoguer",
|
||||||
"ignore",
|
"ignore",
|
||||||
|
"indicatif",
|
||||||
"rayon",
|
"rayon",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -231,6 +277,12 @@ version = "1.16.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
|
checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "encode_unicode"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "equivalent"
|
name = "equivalent"
|
||||||
version = "1.0.2"
|
version = "1.0.2"
|
||||||
@@ -244,7 +296,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -265,6 +317,30 @@ version = "0.1.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-core"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-task"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futures-util"
|
||||||
|
version = "0.3.32"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
|
||||||
|
dependencies = [
|
||||||
|
"futures-core",
|
||||||
|
"futures-task",
|
||||||
|
"pin-project-lite",
|
||||||
|
"slab",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "getrandom"
|
name = "getrandom"
|
||||||
version = "0.4.2"
|
version = "0.4.2"
|
||||||
@@ -346,6 +422,19 @@ dependencies = [
|
|||||||
"serde_core",
|
"serde_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "indicatif"
|
||||||
|
version = "0.18.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb"
|
||||||
|
dependencies = [
|
||||||
|
"console 0.16.3",
|
||||||
|
"portable-atomic",
|
||||||
|
"unicode-width",
|
||||||
|
"unit-prefix",
|
||||||
|
"web-time",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is_terminal_polyfill"
|
name = "is_terminal_polyfill"
|
||||||
version = "1.70.2"
|
version = "1.70.2"
|
||||||
@@ -358,6 +447,18 @@ version = "1.0.18"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "js-sys"
|
||||||
|
version = "0.3.99"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"futures-util",
|
||||||
|
"once_cell",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leb128fmt"
|
name = "leb128fmt"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@@ -400,6 +501,18 @@ version = "1.70.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pin-project-lite"
|
||||||
|
version = "0.2.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "portable-atomic"
|
||||||
|
version = "1.13.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prettyplease"
|
name = "prettyplease"
|
||||||
version = "0.2.37"
|
version = "0.2.37"
|
||||||
@@ -481,9 +594,15 @@ dependencies = [
|
|||||||
"errno",
|
"errno",
|
||||||
"libc",
|
"libc",
|
||||||
"linux-raw-sys",
|
"linux-raw-sys",
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustversion"
|
||||||
|
version = "1.0.22"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
version = "1.0.6"
|
version = "1.0.6"
|
||||||
@@ -542,12 +661,24 @@ dependencies = [
|
|||||||
"zmij",
|
"zmij",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "shell-words"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "shlex"
|
name = "shlex"
|
||||||
version = "2.0.1"
|
version = "2.0.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
|
checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "slab"
|
||||||
|
version = "0.4.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@@ -575,7 +706,27 @@ dependencies = [
|
|||||||
"getrandom",
|
"getrandom",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"rustix",
|
"rustix",
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror"
|
||||||
|
version = "1.0.69"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
|
||||||
|
dependencies = [
|
||||||
|
"thiserror-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "thiserror-impl"
|
||||||
|
version = "1.0.69"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -584,12 +735,24 @@ version = "1.0.24"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-width"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-xid"
|
name = "unicode-xid"
|
||||||
version = "0.2.6"
|
version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unit-prefix"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf8parse"
|
name = "utf8parse"
|
||||||
version = "0.2.2"
|
version = "0.2.2"
|
||||||
@@ -624,6 +787,51 @@ dependencies = [
|
|||||||
"wit-bindgen 0.51.0",
|
"wit-bindgen 0.51.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen"
|
||||||
|
version = "0.2.122"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"once_cell",
|
||||||
|
"rustversion",
|
||||||
|
"wasm-bindgen-macro",
|
||||||
|
"wasm-bindgen-shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-macro"
|
||||||
|
version = "0.2.122"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"wasm-bindgen-macro-support",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-macro-support"
|
||||||
|
version = "0.2.122"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e"
|
||||||
|
dependencies = [
|
||||||
|
"bumpalo",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
"wasm-bindgen-shared",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "wasm-bindgen-shared"
|
||||||
|
version = "0.2.122"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wasm-encoder"
|
name = "wasm-encoder"
|
||||||
version = "0.244.0"
|
version = "0.244.0"
|
||||||
@@ -658,13 +866,23 @@ dependencies = [
|
|||||||
"semver",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "web-time"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
|
||||||
|
dependencies = [
|
||||||
|
"js-sys",
|
||||||
|
"wasm-bindgen",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-util"
|
name = "winapi-util"
|
||||||
version = "0.1.11"
|
version = "0.1.11"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -673,6 +891,15 @@ version = "0.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.59.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.61.2"
|
version = "0.61.2"
|
||||||
@@ -682,6 +909,70 @@ dependencies = [
|
|||||||
"windows-link",
|
"windows-link",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wit-bindgen"
|
name = "wit-bindgen"
|
||||||
version = "0.51.0"
|
version = "0.51.0"
|
||||||
@@ -776,6 +1067,12 @@ dependencies = [
|
|||||||
"wasmparser",
|
"wasmparser",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "zeroize"
|
||||||
|
version = "1.8.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "zmij"
|
name = "zmij"
|
||||||
version = "1.0.21"
|
version = "1.0.21"
|
||||||
|
|||||||
@@ -7,7 +7,9 @@ edition = "2024"
|
|||||||
anyhow = "1"
|
anyhow = "1"
|
||||||
blake3 = "1"
|
blake3 = "1"
|
||||||
clap = { version = "4", features = ["derive"] }
|
clap = { version = "4", features = ["derive"] }
|
||||||
|
dialoguer = "0.11"
|
||||||
ignore = "0.4"
|
ignore = "0.4"
|
||||||
|
indicatif = "0.18"
|
||||||
rayon = "1"
|
rayon = "1"
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
|
|||||||
36
README.md
36
README.md
@@ -61,16 +61,50 @@ Verify possible duplicates with a full-file hash pass:
|
|||||||
disk-checker ~/Downloads --verify-full
|
disk-checker ~/Downloads --verify-full
|
||||||
```
|
```
|
||||||
|
|
||||||
Limit hashing workers:
|
Review duplicate groups one by one and choose which path to keep:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
disk-checker ~/Downloads --interactive
|
||||||
|
```
|
||||||
|
|
||||||
|
Interactive mode automatically full-verifies only the duplicate candidate groups before prompting. It is non-destructive: it writes a reviewed shell deletion plan instead of deleting files immediately.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
disk-checker ~/Downloads --interactive --delete-plan review-delete.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
Use the fastest triage mode for huge datasets by grouping same-size files without hashing:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
disk-checker /mnt/storage --size-only --min-size 100MiB --threads 32
|
||||||
|
```
|
||||||
|
|
||||||
|
Limit traversal depth:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
disk-checker /mnt/storage --max-depth 3
|
||||||
|
```
|
||||||
|
|
||||||
|
Limit scanning and hashing workers:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
disk-checker ~/Downloads --threads 4
|
disk-checker ~/Downloads --threads 4
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Disable progress output:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
disk-checker ~/Downloads --no-progress
|
||||||
|
```
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- By default, duplicate results are **possible duplicates**: same file size plus same first `1MiB` BLAKE3 hash.
|
- By default, duplicate results are **possible duplicates**: same file size plus same first `1MiB` BLAKE3 hash.
|
||||||
- This is intentionally fast because it avoids reading whole files unless you pass `--verify-full`.
|
- This is intentionally fast because it avoids reading whole files unless you pass `--verify-full`.
|
||||||
|
- `--size-only` is even faster for triage, but it only means files have the same size; use it to narrow the search, not as proof.
|
||||||
- Symlinks are not followed by default to avoid surprises and cycles.
|
- Symlinks are not followed by default to avoid surprises and cycles.
|
||||||
- Hard link groups are reported separately because they are multiple paths to the same inode, not extra disk copies.
|
- Hard link groups are reported separately because they are multiple paths to the same inode, not extra disk copies.
|
||||||
- Hidden files and gitignored files are included; this is a disk scanner, not a source-code search tool.
|
- Hidden files and gitignored files are included; this is a disk scanner, not a source-code search tool.
|
||||||
|
- Fast mode does **not** read 30TB of file content. It reads metadata plus up to the hash window for same-size candidate files: for example, 30,000 candidate files at `1MiB` is about 30GiB of content reads.
|
||||||
|
- Fully verifying all 30TB in 10 minutes would require roughly 50GB/s sustained reads. `--verify-full` only fully reads candidate groups, but storage throughput is still the hard limit for exact verification.
|
||||||
|
- Progress output is real and writes to stderr: traversal shows live discovered counts because total traversal work is unknown, while hashing shows determinate byte progress from actual reads. Progress is disabled automatically for `--json` and can be disabled with `--no-progress`.
|
||||||
|
|||||||
537
src/lib.rs
537
src/lib.rs
@@ -2,8 +2,11 @@ use std::collections::BTreeMap;
|
|||||||
use std::fs::{self, File, Metadata};
|
use std::fs::{self, File, Metadata};
|
||||||
use std::io::{self, BufReader, Read, Write};
|
use std::io::{self, BufReader, Read, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::sync::mpsc;
|
||||||
|
use std::thread;
|
||||||
|
|
||||||
use ignore::WalkBuilder;
|
use ignore::{WalkBuilder, WalkState};
|
||||||
|
use indicatif::{ProgressBar, ProgressStyle};
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use serde::{Serialize, Serializer};
|
use serde::{Serialize, Serializer};
|
||||||
|
|
||||||
@@ -19,6 +22,11 @@ pub struct ScanConfig {
|
|||||||
pub hash_bytes: u64,
|
pub hash_bytes: u64,
|
||||||
pub follow_links: bool,
|
pub follow_links: bool,
|
||||||
pub verify_full: bool,
|
pub verify_full: bool,
|
||||||
|
pub threads: Option<usize>,
|
||||||
|
pub size_only: bool,
|
||||||
|
pub min_size: u64,
|
||||||
|
pub max_depth: Option<usize>,
|
||||||
|
pub progress: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
@@ -26,6 +34,10 @@ pub struct ScanReport {
|
|||||||
#[serde(serialize_with = "serialize_paths")]
|
#[serde(serialize_with = "serialize_paths")]
|
||||||
pub scanned_paths: Vec<PathBuf>,
|
pub scanned_paths: Vec<PathBuf>,
|
||||||
pub hash_bytes: u64,
|
pub hash_bytes: u64,
|
||||||
|
pub worker_threads: usize,
|
||||||
|
pub size_only: bool,
|
||||||
|
pub min_size: u64,
|
||||||
|
pub max_depth: Option<usize>,
|
||||||
pub followed_symlinks: bool,
|
pub followed_symlinks: bool,
|
||||||
pub full_verification: bool,
|
pub full_verification: bool,
|
||||||
pub summary: ScanSummary,
|
pub summary: ScanSummary,
|
||||||
@@ -135,6 +147,25 @@ enum HashOutcome {
|
|||||||
Issue(ScanIssue),
|
Issue(ScanIssue),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
struct ScanAccumulator {
|
||||||
|
files: Vec<FileEntry>,
|
||||||
|
symlinks: Vec<SymlinkInfo>,
|
||||||
|
special_entries: Vec<SpecialEntry>,
|
||||||
|
errors: Vec<ScanIssue>,
|
||||||
|
directories: usize,
|
||||||
|
total_file_bytes: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum ScannedEntry {
|
||||||
|
File(FileEntry),
|
||||||
|
Directory,
|
||||||
|
Symlink(SymlinkInfo),
|
||||||
|
Special(SpecialEntry),
|
||||||
|
Issue(ScanIssue),
|
||||||
|
}
|
||||||
|
|
||||||
pub fn parse_byte_count(input: &str) -> Result<u64, String> {
|
pub fn parse_byte_count(input: &str) -> Result<u64, String> {
|
||||||
let trimmed = input.trim();
|
let trimmed = input.trim();
|
||||||
if trimmed.is_empty() {
|
if trimmed.is_empty() {
|
||||||
@@ -184,16 +215,19 @@ pub fn parse_byte_count(input: &str) -> Result<u64, String> {
|
|||||||
|
|
||||||
pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
||||||
let hash_bytes = config.hash_bytes.max(1);
|
let hash_bytes = config.hash_bytes.max(1);
|
||||||
|
let worker_threads = worker_threads(config.threads);
|
||||||
|
let min_size = config.min_size;
|
||||||
let mut files = Vec::new();
|
let mut files = Vec::new();
|
||||||
let mut symlinks = Vec::new();
|
let mut symlinks = Vec::new();
|
||||||
let mut special_entries = Vec::new();
|
let mut special_entries = Vec::new();
|
||||||
let mut errors = Vec::new();
|
let mut errors = Vec::new();
|
||||||
let mut directories = 0;
|
let mut directories: usize = 0;
|
||||||
let mut total_file_bytes = 0;
|
let mut total_file_bytes: u64 = 0;
|
||||||
|
|
||||||
for root in &config.paths {
|
for root in &config.paths {
|
||||||
let mut builder = WalkBuilder::new(root);
|
let mut builder = WalkBuilder::new(root);
|
||||||
builder
|
builder
|
||||||
|
.threads(worker_threads)
|
||||||
.follow_links(config.follow_links)
|
.follow_links(config.follow_links)
|
||||||
.hidden(false)
|
.hidden(false)
|
||||||
.ignore(false)
|
.ignore(false)
|
||||||
@@ -201,59 +235,21 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
|||||||
.git_global(false)
|
.git_global(false)
|
||||||
.git_exclude(false)
|
.git_exclude(false)
|
||||||
.parents(false);
|
.parents(false);
|
||||||
|
if let Some(max_depth) = config.max_depth {
|
||||||
for entry in builder.build() {
|
builder.max_depth(Some(max_depth));
|
||||||
match entry {
|
|
||||||
Ok(entry) => {
|
|
||||||
let path = entry.path().to_path_buf();
|
|
||||||
let metadata = match fs::symlink_metadata(&path) {
|
|
||||||
Ok(metadata) => metadata,
|
|
||||||
Err(error) => {
|
|
||||||
errors.push(issue(path, format!("could not read metadata: {error}")));
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
if metadata.file_type().is_symlink() {
|
|
||||||
symlinks.push(describe_symlink(&path));
|
|
||||||
if !config.follow_links {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match fs::metadata(&path) {
|
|
||||||
Ok(target_metadata) => {
|
|
||||||
process_non_symlink_entry(
|
|
||||||
path,
|
|
||||||
&target_metadata,
|
|
||||||
&mut files,
|
|
||||||
&mut special_entries,
|
|
||||||
&mut directories,
|
|
||||||
&mut total_file_bytes,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
errors.push(issue(
|
|
||||||
path,
|
|
||||||
format!("could not follow symlink target: {error}"),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
process_non_symlink_entry(
|
|
||||||
path,
|
|
||||||
&metadata,
|
|
||||||
&mut files,
|
|
||||||
&mut special_entries,
|
|
||||||
&mut directories,
|
|
||||||
&mut total_file_bytes,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Err(error) => {
|
|
||||||
errors.push(issue(PathBuf::from("<walk>"), error.to_string()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let accumulator = walk_root_parallel(
|
||||||
|
&mut builder,
|
||||||
|
config.follow_links,
|
||||||
|
walk_progress(config.progress, root),
|
||||||
|
);
|
||||||
|
files.extend(accumulator.files);
|
||||||
|
symlinks.extend(accumulator.symlinks);
|
||||||
|
special_entries.extend(accumulator.special_entries);
|
||||||
|
errors.extend(accumulator.errors);
|
||||||
|
directories += accumulator.directories;
|
||||||
|
total_file_bytes = total_file_bytes.saturating_add(accumulator.total_file_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
files.sort_by(|left, right| left.path.cmp(&right.path));
|
files.sort_by(|left, right| left.path.cmp(&right.path));
|
||||||
@@ -261,17 +257,43 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
|||||||
special_entries.sort_by(|left, right| left.path.cmp(&right.path));
|
special_entries.sort_by(|left, right| left.path.cmp(&right.path));
|
||||||
|
|
||||||
let hard_links = find_hard_links(&files);
|
let hard_links = find_hard_links(&files);
|
||||||
let same_size_candidates = same_size_candidates(&files);
|
let same_size_candidates = same_size_candidates(&files, min_size);
|
||||||
let same_size_candidate_files = same_size_candidates.len();
|
let same_size_candidate_files = same_size_candidates.len();
|
||||||
|
|
||||||
let partial_outcomes = hash_files(&same_size_candidates, hash_bytes, false);
|
let possible_duplicates = if config.size_only {
|
||||||
let mut partial_hashes = Vec::new();
|
size_only_duplicate_groups(same_size_candidates.clone())
|
||||||
collect_hash_outcomes(partial_outcomes, &mut partial_hashes, &mut errors);
|
} else {
|
||||||
let possible_duplicates = duplicate_groups(partial_hashes);
|
let partial_outcomes = hash_files(
|
||||||
|
&same_size_candidates,
|
||||||
|
hash_bytes,
|
||||||
|
false,
|
||||||
|
hash_progress(
|
||||||
|
config.progress,
|
||||||
|
&same_size_candidates,
|
||||||
|
hash_bytes,
|
||||||
|
false,
|
||||||
|
"Hashing file prefixes",
|
||||||
|
),
|
||||||
|
);
|
||||||
|
let mut partial_hashes = Vec::new();
|
||||||
|
collect_hash_outcomes(partial_outcomes, &mut partial_hashes, &mut errors);
|
||||||
|
duplicate_groups(partial_hashes)
|
||||||
|
};
|
||||||
|
|
||||||
let verified_duplicates = if config.verify_full {
|
let verified_duplicates = if config.verify_full {
|
||||||
let full_candidates = files_from_duplicate_groups(&possible_duplicates);
|
let full_candidates = files_from_duplicate_groups(&possible_duplicates);
|
||||||
let full_outcomes = hash_files(&full_candidates, hash_bytes, true);
|
let full_outcomes = hash_files(
|
||||||
|
&full_candidates,
|
||||||
|
hash_bytes,
|
||||||
|
true,
|
||||||
|
hash_progress(
|
||||||
|
config.progress,
|
||||||
|
&full_candidates,
|
||||||
|
hash_bytes,
|
||||||
|
true,
|
||||||
|
"Full verification hashing",
|
||||||
|
),
|
||||||
|
);
|
||||||
let mut full_hashes = Vec::new();
|
let mut full_hashes = Vec::new();
|
||||||
collect_hash_outcomes(full_outcomes, &mut full_hashes, &mut errors);
|
collect_hash_outcomes(full_outcomes, &mut full_hashes, &mut errors);
|
||||||
duplicate_groups(full_hashes)
|
duplicate_groups(full_hashes)
|
||||||
@@ -301,6 +323,10 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
|||||||
ScanReport {
|
ScanReport {
|
||||||
scanned_paths: config.paths,
|
scanned_paths: config.paths,
|
||||||
hash_bytes,
|
hash_bytes,
|
||||||
|
worker_threads,
|
||||||
|
size_only: config.size_only,
|
||||||
|
min_size,
|
||||||
|
max_depth: config.max_depth,
|
||||||
followed_symlinks: config.follow_links,
|
followed_symlinks: config.follow_links,
|
||||||
full_verification: config.verify_full,
|
full_verification: config.verify_full,
|
||||||
summary: ScanSummary {
|
summary: ScanSummary {
|
||||||
@@ -327,30 +353,202 @@ pub fn scan_paths(config: ScanConfig) -> ScanReport {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn process_non_symlink_entry(
|
fn worker_threads(configured_threads: Option<usize>) -> usize {
|
||||||
path: PathBuf,
|
configured_threads.unwrap_or_else(|| {
|
||||||
metadata: &Metadata,
|
thread::available_parallelism()
|
||||||
files: &mut Vec<FileEntry>,
|
.map(usize::from)
|
||||||
special_entries: &mut Vec<SpecialEntry>,
|
.unwrap_or(1)
|
||||||
directories: &mut usize,
|
})
|
||||||
total_file_bytes: &mut u64,
|
}
|
||||||
) {
|
|
||||||
|
fn walk_root_parallel(
|
||||||
|
builder: &mut WalkBuilder,
|
||||||
|
follow_links: bool,
|
||||||
|
progress: Option<ProgressBar>,
|
||||||
|
) -> ScanAccumulator {
|
||||||
|
let (sender, receiver) = mpsc::channel();
|
||||||
|
let collector = thread::spawn(move || {
|
||||||
|
let mut accumulator = ScanAccumulator::default();
|
||||||
|
for scanned_entry in receiver {
|
||||||
|
collect_scanned_entry(scanned_entry, &mut accumulator);
|
||||||
|
update_walk_progress(progress.as_ref(), &accumulator, false);
|
||||||
|
}
|
||||||
|
update_walk_progress(progress.as_ref(), &accumulator, true);
|
||||||
|
accumulator
|
||||||
|
});
|
||||||
|
|
||||||
|
builder.build_parallel().run(|| {
|
||||||
|
let sender = sender.clone();
|
||||||
|
Box::new(move |entry| {
|
||||||
|
for scanned_entry in classify_walk_entry(entry, follow_links) {
|
||||||
|
if sender.send(scanned_entry).is_err() {
|
||||||
|
return WalkState::Quit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WalkState::Continue
|
||||||
|
})
|
||||||
|
});
|
||||||
|
drop(sender);
|
||||||
|
|
||||||
|
collector
|
||||||
|
.join()
|
||||||
|
.expect("scan result collector thread should not panic")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn walk_progress(enabled: bool, root: &Path) -> Option<ProgressBar> {
|
||||||
|
if !enabled {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let progress = ProgressBar::new_spinner();
|
||||||
|
progress.set_style(
|
||||||
|
ProgressStyle::with_template("{spinner:.green} {msg}")
|
||||||
|
.expect("valid traversal progress template"),
|
||||||
|
);
|
||||||
|
progress.set_message(format!(
|
||||||
|
"Scanning {} — 0 files, 0 dirs, 0 symlinks, 0 errors",
|
||||||
|
root.display()
|
||||||
|
));
|
||||||
|
Some(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn update_walk_progress(progress: Option<&ProgressBar>, accumulator: &ScanAccumulator, done: bool) {
|
||||||
|
let Some(progress) = progress else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let interactions = accumulator.interactions();
|
||||||
|
if done {
|
||||||
|
progress.finish_with_message(format!(
|
||||||
|
"Scanned {} files, {} dirs, {} symlinks, {} special entries, {} errors",
|
||||||
|
accumulator.files.len(),
|
||||||
|
accumulator.directories,
|
||||||
|
accumulator.symlinks.len(),
|
||||||
|
accumulator.special_entries.len(),
|
||||||
|
accumulator.errors.len()
|
||||||
|
));
|
||||||
|
} else if interactions == 1 || interactions.is_multiple_of(100) {
|
||||||
|
progress.tick();
|
||||||
|
progress.set_message(format!(
|
||||||
|
"Scanning — {} files, {} dirs, {} symlinks, {} special entries, {} errors",
|
||||||
|
accumulator.files.len(),
|
||||||
|
accumulator.directories,
|
||||||
|
accumulator.symlinks.len(),
|
||||||
|
accumulator.special_entries.len(),
|
||||||
|
accumulator.errors.len()
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hash_progress(
|
||||||
|
enabled: bool,
|
||||||
|
files: &[FileEntry],
|
||||||
|
hash_bytes: u64,
|
||||||
|
full_file: bool,
|
||||||
|
message: &'static str,
|
||||||
|
) -> Option<ProgressBar> {
|
||||||
|
if !enabled || files.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let total_bytes = files
|
||||||
|
.iter()
|
||||||
|
.map(|file| {
|
||||||
|
if full_file {
|
||||||
|
file.size
|
||||||
|
} else {
|
||||||
|
file.size.min(hash_bytes)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.sum::<u64>();
|
||||||
|
|
||||||
|
if total_bytes == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let progress = ProgressBar::new(total_bytes);
|
||||||
|
progress.set_style(
|
||||||
|
ProgressStyle::with_template(
|
||||||
|
"{msg} [{elapsed_precise}] [{wide_bar:.cyan/blue}] {binary_bytes}/{binary_total_bytes} {binary_bytes_per_sec}",
|
||||||
|
)
|
||||||
|
.expect("valid hashing progress template")
|
||||||
|
.progress_chars("=>-"),
|
||||||
|
);
|
||||||
|
progress.set_message(message);
|
||||||
|
Some(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn classify_walk_entry(
|
||||||
|
entry: Result<ignore::DirEntry, ignore::Error>,
|
||||||
|
follow_links: bool,
|
||||||
|
) -> Vec<ScannedEntry> {
|
||||||
|
match entry {
|
||||||
|
Ok(entry) => classify_path(entry.path().to_path_buf(), follow_links),
|
||||||
|
Err(error) => vec![ScannedEntry::Issue(issue(
|
||||||
|
PathBuf::from("<walk>"),
|
||||||
|
error.to_string(),
|
||||||
|
))],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn classify_path(path: PathBuf, follow_links: bool) -> Vec<ScannedEntry> {
|
||||||
|
let metadata = match fs::symlink_metadata(&path) {
|
||||||
|
Ok(metadata) => metadata,
|
||||||
|
Err(error) => {
|
||||||
|
return vec![ScannedEntry::Issue(issue(
|
||||||
|
path,
|
||||||
|
format!("could not read metadata: {error}"),
|
||||||
|
))];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if !metadata.file_type().is_symlink() {
|
||||||
|
return vec![non_symlink_entry(path, &metadata)];
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut entries = vec![ScannedEntry::Symlink(describe_symlink(&path))];
|
||||||
|
if follow_links {
|
||||||
|
match fs::metadata(&path) {
|
||||||
|
Ok(target_metadata) => entries.push(non_symlink_entry(path, &target_metadata)),
|
||||||
|
Err(error) => entries.push(ScannedEntry::Issue(issue(
|
||||||
|
path,
|
||||||
|
format!("could not follow symlink target: {error}"),
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
entries
|
||||||
|
}
|
||||||
|
|
||||||
|
fn non_symlink_entry(path: PathBuf, metadata: &Metadata) -> ScannedEntry {
|
||||||
let file_type = metadata.file_type();
|
let file_type = metadata.file_type();
|
||||||
if file_type.is_file() {
|
if file_type.is_file() {
|
||||||
*total_file_bytes = total_file_bytes.saturating_add(metadata.len());
|
ScannedEntry::File(FileEntry {
|
||||||
files.push(FileEntry {
|
|
||||||
path,
|
path,
|
||||||
size: metadata.len(),
|
size: metadata.len(),
|
||||||
device: metadata.dev(),
|
device: metadata.dev(),
|
||||||
inode: metadata.ino(),
|
inode: metadata.ino(),
|
||||||
});
|
})
|
||||||
} else if file_type.is_dir() {
|
} else if file_type.is_dir() {
|
||||||
*directories += 1;
|
ScannedEntry::Directory
|
||||||
} else {
|
} else {
|
||||||
special_entries.push(SpecialEntry {
|
ScannedEntry::Special(SpecialEntry {
|
||||||
path,
|
path,
|
||||||
kind: special_entry_kind(&file_type),
|
kind: special_entry_kind(&file_type),
|
||||||
});
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn collect_scanned_entry(entry: ScannedEntry, accumulator: &mut ScanAccumulator) {
|
||||||
|
match entry {
|
||||||
|
ScannedEntry::File(file) => {
|
||||||
|
accumulator.total_file_bytes = accumulator.total_file_bytes.saturating_add(file.size);
|
||||||
|
accumulator.files.push(file);
|
||||||
|
}
|
||||||
|
ScannedEntry::Directory => accumulator.directories += 1,
|
||||||
|
ScannedEntry::Symlink(symlink) => accumulator.symlinks.push(symlink),
|
||||||
|
ScannedEntry::Special(special_entry) => accumulator.special_entries.push(special_entry),
|
||||||
|
ScannedEntry::Issue(error) => accumulator.errors.push(error),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -412,10 +610,13 @@ fn find_hard_links(files: &[FileEntry]) -> Vec<HardLinkGroup> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn same_size_candidates(files: &[FileEntry]) -> Vec<FileEntry> {
|
fn same_size_candidates(files: &[FileEntry], min_size: u64) -> Vec<FileEntry> {
|
||||||
let files = unique_file_id_entries(files);
|
let files = unique_file_id_entries(files);
|
||||||
let mut by_size: BTreeMap<u64, Vec<FileEntry>> = BTreeMap::new();
|
let mut by_size: BTreeMap<u64, Vec<FileEntry>> = BTreeMap::new();
|
||||||
for file in files {
|
for file in files {
|
||||||
|
if file.size < min_size {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
by_size.entry(file.size).or_default().push(file);
|
by_size.entry(file.size).or_default().push(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -426,6 +627,25 @@ fn same_size_candidates(files: &[FileEntry]) -> Vec<FileEntry> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn size_only_duplicate_groups(files: Vec<FileEntry>) -> Vec<DuplicateGroup> {
|
||||||
|
let mut by_size: BTreeMap<u64, Vec<PathBuf>> = BTreeMap::new();
|
||||||
|
for file in files {
|
||||||
|
by_size.entry(file.size).or_default().push(file.path);
|
||||||
|
}
|
||||||
|
|
||||||
|
by_size
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|(size, mut paths)| {
|
||||||
|
paths.sort();
|
||||||
|
(paths.len() > 1).then_some(DuplicateGroup {
|
||||||
|
size,
|
||||||
|
hash: "size-only".to_string(),
|
||||||
|
paths,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
fn unique_file_id_entries(files: &[FileEntry]) -> Vec<FileEntry> {
|
fn unique_file_id_entries(files: &[FileEntry]) -> Vec<FileEntry> {
|
||||||
let mut by_file_id: BTreeMap<(u64, u64), &FileEntry> = BTreeMap::new();
|
let mut by_file_id: BTreeMap<(u64, u64), &FileEntry> = BTreeMap::new();
|
||||||
for file in files {
|
for file in files {
|
||||||
@@ -435,14 +655,20 @@ fn unique_file_id_entries(files: &[FileEntry]) -> Vec<FileEntry> {
|
|||||||
by_file_id.into_values().cloned().collect()
|
by_file_id.into_values().cloned().collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash_files(files: &[FileEntry], hash_bytes: u64, full_file: bool) -> Vec<HashOutcome> {
|
fn hash_files(
|
||||||
files
|
files: &[FileEntry],
|
||||||
|
hash_bytes: u64,
|
||||||
|
full_file: bool,
|
||||||
|
progress: Option<ProgressBar>,
|
||||||
|
) -> Vec<HashOutcome> {
|
||||||
|
let outcomes = files
|
||||||
.par_iter()
|
.par_iter()
|
||||||
.map(|file| {
|
.map(|file| {
|
||||||
|
let file_progress = progress.clone();
|
||||||
let hash_result = if full_file {
|
let hash_result = if full_file {
|
||||||
hash_full_file(&file.path)
|
hash_full_file(&file.path, file_progress.as_ref())
|
||||||
} else {
|
} else {
|
||||||
hash_file_prefix(&file.path, hash_bytes)
|
hash_file_prefix(&file.path, hash_bytes, file_progress.as_ref())
|
||||||
};
|
};
|
||||||
|
|
||||||
match hash_result {
|
match hash_result {
|
||||||
@@ -457,7 +683,13 @@ fn hash_files(files: &[FileEntry], hash_bytes: u64, full_file: bool) -> Vec<Hash
|
|||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect()
|
.collect();
|
||||||
|
|
||||||
|
if let Some(progress) = progress {
|
||||||
|
progress.finish_and_clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
outcomes
|
||||||
}
|
}
|
||||||
|
|
||||||
fn collect_hash_outcomes(
|
fn collect_hash_outcomes(
|
||||||
@@ -505,7 +737,11 @@ fn files_from_duplicate_groups(groups: &[DuplicateGroup]) -> Vec<FileEntry> {
|
|||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
|
fn hash_file_prefix(
|
||||||
|
path: &Path,
|
||||||
|
hash_bytes: u64,
|
||||||
|
progress: Option<&ProgressBar>,
|
||||||
|
) -> io::Result<String> {
|
||||||
let file = File::open(path)?;
|
let file = File::open(path)?;
|
||||||
let mut reader = BufReader::new(file);
|
let mut reader = BufReader::new(file);
|
||||||
let mut hasher = blake3::Hasher::new();
|
let mut hasher = blake3::Hasher::new();
|
||||||
@@ -518,6 +754,9 @@ fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
|
|||||||
if bytes_read == 0 {
|
if bytes_read == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if let Some(progress) = progress {
|
||||||
|
progress.inc(bytes_read as u64);
|
||||||
|
}
|
||||||
hasher.update(&buffer[..bytes_read]);
|
hasher.update(&buffer[..bytes_read]);
|
||||||
remaining -= bytes_read as u64;
|
remaining -= bytes_read as u64;
|
||||||
}
|
}
|
||||||
@@ -525,7 +764,7 @@ fn hash_file_prefix(path: &Path, hash_bytes: u64) -> io::Result<String> {
|
|||||||
Ok(hasher.finalize().to_hex().to_string())
|
Ok(hasher.finalize().to_hex().to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn hash_full_file(path: &Path) -> io::Result<String> {
|
fn hash_full_file(path: &Path, progress: Option<&ProgressBar>) -> io::Result<String> {
|
||||||
let file = File::open(path)?;
|
let file = File::open(path)?;
|
||||||
let mut reader = BufReader::new(file);
|
let mut reader = BufReader::new(file);
|
||||||
let mut hasher = blake3::Hasher::new();
|
let mut hasher = blake3::Hasher::new();
|
||||||
@@ -536,6 +775,9 @@ fn hash_full_file(path: &Path) -> io::Result<String> {
|
|||||||
if bytes_read == 0 {
|
if bytes_read == 0 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if let Some(progress) = progress {
|
||||||
|
progress.inc(bytes_read as u64);
|
||||||
|
}
|
||||||
hasher.update(&buffer[..bytes_read]);
|
hasher.update(&buffer[..bytes_read]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -555,6 +797,29 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
|
|||||||
join_paths(&report.scanned_paths)
|
join_paths(&report.scanned_paths)
|
||||||
)?;
|
)?;
|
||||||
writeln!(writer, "Hash window: {}", format_bytes(report.hash_bytes))?;
|
writeln!(writer, "Hash window: {}", format_bytes(report.hash_bytes))?;
|
||||||
|
writeln!(writer, "Worker threads: {}", report.worker_threads)?;
|
||||||
|
writeln!(
|
||||||
|
writer,
|
||||||
|
"Duplicate mode: {}",
|
||||||
|
if report.size_only {
|
||||||
|
"size only"
|
||||||
|
} else {
|
||||||
|
"size + partial hash"
|
||||||
|
}
|
||||||
|
)?;
|
||||||
|
writeln!(
|
||||||
|
writer,
|
||||||
|
"Minimum duplicate size: {}",
|
||||||
|
format_bytes(report.min_size)
|
||||||
|
)?;
|
||||||
|
writeln!(
|
||||||
|
writer,
|
||||||
|
"Maximum depth: {}",
|
||||||
|
report
|
||||||
|
.max_depth
|
||||||
|
.map(|depth| depth.to_string())
|
||||||
|
.unwrap_or_else(|| "unlimited".to_string())
|
||||||
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
writer,
|
writer,
|
||||||
"Symlink traversal: {}",
|
"Symlink traversal: {}",
|
||||||
@@ -586,7 +851,7 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
|
|||||||
)?;
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
writer,
|
writer,
|
||||||
"Same-size files hashed: {}",
|
"Same-size duplicate candidates: {}",
|
||||||
report.summary.same_size_candidate_files
|
report.summary.same_size_candidate_files
|
||||||
)?;
|
)?;
|
||||||
writeln!(
|
writeln!(
|
||||||
@@ -621,7 +886,11 @@ pub fn write_human_report(mut writer: impl Write, report: &ScanReport) -> io::Re
|
|||||||
|
|
||||||
write_duplicate_section(
|
write_duplicate_section(
|
||||||
&mut writer,
|
&mut writer,
|
||||||
"Possible duplicates (same size + partial hash)",
|
if report.size_only {
|
||||||
|
"Possible duplicates (same size only)"
|
||||||
|
} else {
|
||||||
|
"Possible duplicates (same size + partial hash)"
|
||||||
|
},
|
||||||
&report.possible_duplicates,
|
&report.possible_duplicates,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@@ -844,6 +1113,16 @@ impl SpecialEntryKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ScanAccumulator {
|
||||||
|
fn interactions(&self) -> usize {
|
||||||
|
self.files.len()
|
||||||
|
+ self.directories
|
||||||
|
+ self.symlinks.len()
|
||||||
|
+ self.special_entries.len()
|
||||||
|
+ self.errors.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
@@ -874,6 +1153,11 @@ mod tests {
|
|||||||
hash_bytes: 3,
|
hash_bytes: 3,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
verify_full: false,
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
assert_eq!(report.summary.files, 3);
|
assert_eq!(report.summary.files, 3);
|
||||||
@@ -897,12 +1181,76 @@ mod tests {
|
|||||||
hash_bytes: 3,
|
hash_bytes: 3,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
verify_full: true,
|
verify_full: true,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
assert_eq!(report.possible_duplicates.len(), 1);
|
assert_eq!(report.possible_duplicates.len(), 1);
|
||||||
assert!(report.verified_duplicates.is_empty());
|
assert!(report.verified_duplicates.is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn size_only_mode_groups_same_size_without_hashing_prefixes() {
|
||||||
|
let temp = TempDir::new().expect("temp dir");
|
||||||
|
let first = temp.path().join("first.bin");
|
||||||
|
let second = temp.path().join("second.bin");
|
||||||
|
|
||||||
|
fs::write(&first, b"abcdef").expect("write first");
|
||||||
|
fs::write(&second, b"uvwxyz").expect("write second");
|
||||||
|
|
||||||
|
let report = scan_paths(ScanConfig {
|
||||||
|
paths: vec![temp.path().to_path_buf()],
|
||||||
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
|
follow_links: false,
|
||||||
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: true,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
assert_eq!(report.possible_duplicates.len(), 1);
|
||||||
|
assert_eq!(report.possible_duplicates[0].hash, "size-only");
|
||||||
|
assert!(report.possible_duplicates[0].paths.contains(&first));
|
||||||
|
assert!(report.possible_duplicates[0].paths.contains(&second));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn min_size_filters_duplicate_candidates_before_hashing() {
|
||||||
|
let temp = TempDir::new().expect("temp dir");
|
||||||
|
let small_first = temp.path().join("small-first.bin");
|
||||||
|
let small_second = temp.path().join("small-second.bin");
|
||||||
|
let large_first = temp.path().join("large-first.bin");
|
||||||
|
let large_second = temp.path().join("large-second.bin");
|
||||||
|
|
||||||
|
fs::write(&small_first, b"abc").expect("write small first");
|
||||||
|
fs::write(&small_second, b"abc").expect("write small second");
|
||||||
|
fs::write(&large_first, b"abcdef").expect("write large first");
|
||||||
|
fs::write(&large_second, b"abcdef").expect("write large second");
|
||||||
|
|
||||||
|
let report = scan_paths(ScanConfig {
|
||||||
|
paths: vec![temp.path().to_path_buf()],
|
||||||
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
|
follow_links: false,
|
||||||
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 4,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
assert_eq!(report.possible_duplicates.len(), 1);
|
||||||
|
assert!(report.possible_duplicates[0].paths.contains(&large_first));
|
||||||
|
assert!(report.possible_duplicates[0].paths.contains(&large_second));
|
||||||
|
assert!(!report.possible_duplicates[0].paths.contains(&small_first));
|
||||||
|
assert!(!report.possible_duplicates[0].paths.contains(&small_second));
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
#[test]
|
#[test]
|
||||||
fn reports_symlinks_without_following_them() {
|
fn reports_symlinks_without_following_them() {
|
||||||
@@ -919,6 +1267,11 @@ mod tests {
|
|||||||
hash_bytes: DEFAULT_HASH_BYTES,
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
verify_full: false,
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
assert_eq!(report.summary.files, 1);
|
assert_eq!(report.summary.files, 1);
|
||||||
@@ -941,6 +1294,11 @@ mod tests {
|
|||||||
hash_bytes: DEFAULT_HASH_BYTES,
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
verify_full: false,
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
assert_eq!(report.summary.files, 2);
|
assert_eq!(report.summary.files, 2);
|
||||||
@@ -965,6 +1323,11 @@ mod tests {
|
|||||||
hash_bytes: DEFAULT_HASH_BYTES,
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
verify_full: false,
|
verify_full: false,
|
||||||
|
threads: None,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
|
progress: false,
|
||||||
});
|
});
|
||||||
|
|
||||||
let json = serde_json::to_string(&report).expect("serialize report with lossy path");
|
let json = serde_json::to_string(&report).expect("serialize report with lossy path");
|
||||||
@@ -977,6 +1340,10 @@ mod tests {
|
|||||||
let report = ScanReport {
|
let report = ScanReport {
|
||||||
scanned_paths: vec![PathBuf::from(".")],
|
scanned_paths: vec![PathBuf::from(".")],
|
||||||
hash_bytes: DEFAULT_HASH_BYTES,
|
hash_bytes: DEFAULT_HASH_BYTES,
|
||||||
|
worker_threads: 1,
|
||||||
|
size_only: false,
|
||||||
|
min_size: 0,
|
||||||
|
max_depth: None,
|
||||||
followed_symlinks: false,
|
followed_symlinks: false,
|
||||||
full_verification: false,
|
full_verification: false,
|
||||||
summary: ScanSummary {
|
summary: ScanSummary {
|
||||||
|
|||||||
213
src/main.rs
213
src/main.rs
@@ -1,10 +1,12 @@
|
|||||||
use std::io::{self, Write};
|
use std::fs::OpenOptions;
|
||||||
use std::path::PathBuf;
|
use std::io::{self, IsTerminal, Write};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{Context, bail};
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use disk_checker::{ScanConfig, parse_byte_count, scan_paths, write_human_report};
|
use dialoguer::{Confirm, Select, theme::ColorfulTheme};
|
||||||
|
use disk_checker::{DuplicateGroup, ScanConfig, parse_byte_count, scan_paths, write_human_report};
|
||||||
|
|
||||||
#[derive(Debug, Parser)]
|
#[derive(Debug, Parser)]
|
||||||
#[command(
|
#[command(
|
||||||
@@ -31,13 +33,37 @@ struct Cli {
|
|||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
verify_full: bool,
|
verify_full: bool,
|
||||||
|
|
||||||
/// Number of worker threads used for hashing. Defaults to Rayon automatic sizing.
|
/// Group duplicate candidates by size only. Fastest mode for huge triage; less precise.
|
||||||
|
#[arg(long)]
|
||||||
|
size_only: bool,
|
||||||
|
|
||||||
|
/// Ignore duplicate candidates smaller than this size. Accepts units like 100MiB or 1GB.
|
||||||
|
#[arg(long, default_value = "0", value_parser = parse_min_size)]
|
||||||
|
min_size: u64,
|
||||||
|
|
||||||
|
/// Maximum directory depth to scan. Depth 0 means only the provided path itself.
|
||||||
|
#[arg(long, value_parser = parse_nonzero_or_zero_usize)]
|
||||||
|
max_depth: Option<usize>,
|
||||||
|
|
||||||
|
/// Number of worker threads used for scanning and hashing. Defaults to CPU parallelism.
|
||||||
#[arg(long, value_parser = parse_thread_count)]
|
#[arg(long, value_parser = parse_thread_count)]
|
||||||
threads: Option<usize>,
|
threads: Option<usize>,
|
||||||
|
|
||||||
/// Print machine-readable JSON instead of the human summary.
|
/// Print machine-readable JSON instead of the human summary.
|
||||||
#[arg(long)]
|
#[arg(long)]
|
||||||
json: bool,
|
json: bool,
|
||||||
|
|
||||||
|
/// Disable progress output.
|
||||||
|
#[arg(long)]
|
||||||
|
no_progress: bool,
|
||||||
|
|
||||||
|
/// Interactively review duplicate groups and choose which path to keep.
|
||||||
|
#[arg(long)]
|
||||||
|
interactive: bool,
|
||||||
|
|
||||||
|
/// Shell script path for planned deletes when --interactive is used.
|
||||||
|
#[arg(long, default_value = "disk-checker-delete-plan.sh")]
|
||||||
|
delete_plan: PathBuf,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_thread_count(input: &str) -> Result<usize, String> {
|
fn parse_thread_count(input: &str) -> Result<usize, String> {
|
||||||
@@ -51,8 +77,28 @@ fn parse_thread_count(input: &str) -> Result<usize, String> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_nonzero_or_zero_usize(input: &str) -> Result<usize, String> {
|
||||||
|
input
|
||||||
|
.parse::<usize>()
|
||||||
|
.map_err(|error| format!("invalid depth {input:?}: {error}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_min_size(input: &str) -> Result<u64, String> {
|
||||||
|
if input.trim() == "0" {
|
||||||
|
Ok(0)
|
||||||
|
} else {
|
||||||
|
parse_byte_count(input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn main() -> anyhow::Result<ExitCode> {
|
fn main() -> anyhow::Result<ExitCode> {
|
||||||
let cli = Cli::parse();
|
let cli = Cli::parse();
|
||||||
|
if cli.interactive && cli.json {
|
||||||
|
bail!(
|
||||||
|
"--interactive cannot be combined with --json because prompts would contaminate JSON output"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let verify_full = cli.verify_full || cli.interactive;
|
||||||
|
|
||||||
if let Some(threads) = cli.threads {
|
if let Some(threads) = cli.threads {
|
||||||
rayon::ThreadPoolBuilder::new()
|
rayon::ThreadPoolBuilder::new()
|
||||||
@@ -71,7 +117,12 @@ fn main() -> anyhow::Result<ExitCode> {
|
|||||||
paths,
|
paths,
|
||||||
hash_bytes: cli.hash_bytes,
|
hash_bytes: cli.hash_bytes,
|
||||||
follow_links: cli.follow_links,
|
follow_links: cli.follow_links,
|
||||||
verify_full: cli.verify_full,
|
verify_full,
|
||||||
|
threads: cli.threads,
|
||||||
|
size_only: cli.size_only,
|
||||||
|
min_size: cli.min_size,
|
||||||
|
max_depth: cli.max_depth,
|
||||||
|
progress: !cli.no_progress && !cli.json && io::stderr().is_terminal(),
|
||||||
});
|
});
|
||||||
|
|
||||||
let stdout = io::stdout();
|
let stdout = io::stdout();
|
||||||
@@ -82,6 +133,11 @@ fn main() -> anyhow::Result<ExitCode> {
|
|||||||
} else {
|
} else {
|
||||||
write_human_report(&mut out, &report).context("failed to write report")?;
|
write_human_report(&mut out, &report).context("failed to write report")?;
|
||||||
}
|
}
|
||||||
|
drop(out);
|
||||||
|
|
||||||
|
if cli.interactive {
|
||||||
|
run_interactive_resolver(&report.verified_duplicates, true, &cli.delete_plan)?;
|
||||||
|
}
|
||||||
|
|
||||||
if report.summary.errors > 0 {
|
if report.summary.errors > 0 {
|
||||||
Ok(ExitCode::from(2))
|
Ok(ExitCode::from(2))
|
||||||
@@ -89,3 +145,148 @@ fn main() -> anyhow::Result<ExitCode> {
|
|||||||
Ok(ExitCode::SUCCESS)
|
Ok(ExitCode::SUCCESS)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_interactive_resolver(
|
||||||
|
groups: &[DuplicateGroup],
|
||||||
|
verified: bool,
|
||||||
|
delete_plan: &PathBuf,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
if groups.is_empty() {
|
||||||
|
println!("No duplicate groups to resolve.");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let theme = ColorfulTheme::default();
|
||||||
|
let mut planned_deletes = Vec::new();
|
||||||
|
let mut skipped = 0usize;
|
||||||
|
|
||||||
|
for (group_index, group) in groups.iter().enumerate() {
|
||||||
|
println!();
|
||||||
|
println!(
|
||||||
|
"Duplicate group {}/{} — {} across {} files",
|
||||||
|
group_index + 1,
|
||||||
|
groups.len(),
|
||||||
|
disk_checker::format_bytes(group.size),
|
||||||
|
group.paths.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut choices = group
|
||||||
|
.paths
|
||||||
|
.iter()
|
||||||
|
.map(|path| path.display().to_string())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
choices.push("Skip this group".to_string());
|
||||||
|
|
||||||
|
let selection = Select::with_theme(&theme)
|
||||||
|
.with_prompt("Choose the version to keep")
|
||||||
|
.items(&choices)
|
||||||
|
.default(0)
|
||||||
|
.interact()
|
||||||
|
.context("interactive selection failed")?;
|
||||||
|
|
||||||
|
if selection == group.paths.len() {
|
||||||
|
skipped += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let keep_path = &group.paths[selection];
|
||||||
|
let delete_paths = group
|
||||||
|
.paths
|
||||||
|
.iter()
|
||||||
|
.filter(|path| *path != keep_path)
|
||||||
|
.cloned()
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
println!("Keeping: {}", keep_path.display());
|
||||||
|
for path in &delete_paths {
|
||||||
|
println!(" remove: {}", path.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
let confirmed = Confirm::with_theme(&theme)
|
||||||
|
.with_prompt("Add these files to the deletion plan?")
|
||||||
|
.default(false)
|
||||||
|
.interact()
|
||||||
|
.context("interactive confirmation failed")?;
|
||||||
|
|
||||||
|
if !confirmed {
|
||||||
|
skipped += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
planned_deletes.extend(delete_paths);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !planned_deletes.is_empty() {
|
||||||
|
write_delete_plan(delete_plan, &planned_deletes, verified)?;
|
||||||
|
println!(
|
||||||
|
"Wrote deletion plan for {} files: {}",
|
||||||
|
planned_deletes.len(),
|
||||||
|
delete_plan.display()
|
||||||
|
);
|
||||||
|
println!("Review it, then run: sh {}", delete_plan.display());
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Interactive resolver complete: {} planned, {} groups skipped.",
|
||||||
|
planned_deletes.len(),
|
||||||
|
skipped
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_delete_plan(
|
||||||
|
path: &PathBuf,
|
||||||
|
delete_paths: &[PathBuf],
|
||||||
|
verified: bool,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let mut file = OpenOptions::new()
|
||||||
|
.write(true)
|
||||||
|
.create_new(true)
|
||||||
|
.open(path)
|
||||||
|
.with_context(|| format!("failed to create delete plan {}", path.display()))?;
|
||||||
|
writeln!(file, "#!/bin/sh")?;
|
||||||
|
writeln!(file, "set -eu")?;
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# Review carefully before running. Generated by disk-checker."
|
||||||
|
)?;
|
||||||
|
if verified {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# Source groups were fully verified with --verify-full."
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
writeln!(
|
||||||
|
file,
|
||||||
|
"# WARNING: Source groups were possible duplicates only, not fully verified."
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
for delete_path in delete_paths {
|
||||||
|
writeln!(file, "rm -- {}", shell_quote(delete_path)?)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shell_quote(path: &Path) -> anyhow::Result<String> {
|
||||||
|
let value = path.to_str().with_context(|| {
|
||||||
|
format!(
|
||||||
|
"delete plan cannot safely encode non-UTF-8 path: {}",
|
||||||
|
path.display()
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
Ok(format!("'{}'", value.replace('\'', "'\\''")))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::shell_quote;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn shell_quote_escapes_single_quotes() {
|
||||||
|
assert_eq!(
|
||||||
|
shell_quote(Path::new("/tmp/it's-here.txt")).expect("quote path"),
|
||||||
|
"'/tmp/it'\\''s-here.txt'"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user