Skip to content

Commit

Permalink
Simplify, fix and improve similar images algorithm (#983)
Browse files Browse the repository at this point in the history
* Random changer

* CD

* A1

* Almost?

* Nein

* Heif

* Tests that needs to be fixed

* Fixed test

* Tests

* Fixed image counting in reference folders

* Lock

* Catch possible more bugs in pdf

* Find ever more bugs
  • Loading branch information
qarmin authored Jun 9, 2023
1 parent 04a91ae commit 55b2744
Show file tree
Hide file tree
Showing 12 changed files with 736 additions and 657 deletions.
630 changes: 254 additions & 376 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## Version 5.2.0 - ?
- Add finding similar audio files by content - [#970](https://github.com/qarmin/czkawka/pull/970)
- Allow to find duplicates by name/size at once - [#956](https://github.com/qarmin/czkawka/pull/956)
- Fixed bug when cache for music tags not worked - [#970](https://github.com/qarmin/czkawka/pull/970)
- Allow to set number of threads from CLI - [#972](https://github.com/qarmin/czkawka/pull/972)
- Fix problem with invalid item sorting in bad extensions mode - [#972](https://github.com/qarmin/czkawka/pull/972)
- Big refactor/cleaning of code - [#956](https://github.com/qarmin/czkawka/pull/956)/[#970](https://github.com/qarmin/czkawka/pull/970)/[#972](https://github.com/qarmin/czkawka/pull/972)
- Use builtin gtk webp loader for previews - [#923](https://github.com/qarmin/czkawka/pull/923)
- Fixed docker build - [#947](https://github.com/qarmin/czkawka/pull/947)
- Restore snap builds broken since GTk 4 port - [#965](https://github.com/qarmin/czkawka/pull/947)
- Instruction how to build native ARM64 binaries on Mac - [#945](https://github.com/qarmin/czkawka/pull/945)/[#971](https://github.com/qarmin/czkawka/pull/971)

## Version 5.1.0 - 19.02.2023r
- Added sort button - [#894](https://github.com/qarmin/czkawka/pull/894)
- Allow to set number of thread used to scan - [#839](https://github.com/qarmin/czkawka/pull/839)
Expand Down
4 changes: 2 additions & 2 deletions czkawka_cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ homepage = "https://github.com/qarmin/czkawka"
repository = "https://github.com/qarmin/czkawka"

[dependencies]
clap = { version = "4.2", features = ["derive"] }
clap = { version = "4.3", features = ["derive"] }

# For enum types
image_hasher = "1.1"
image_hasher = "1.2"

[dependencies.czkawka_core]
path = "../czkawka_core"
Expand Down
21 changes: 11 additions & 10 deletions czkawka_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ crossbeam-channel = "0.5"
directories-next = "2.0"

# Needed by similar images
image_hasher = "1.1"
image_hasher = "1.2"
bk-tree = "0.5"
image = "0.24"
hamming = "0.1"

# Needed by same music
bitflags = "2.2"
lofty = "0.12"
bitflags = "2.3"
lofty = "0.14"

# Futures - needed by async progress sender
futures = "0.3.28"
Expand All @@ -41,11 +41,11 @@ rusty-chromaprint = "0.1"
symphonia = { version = "0.5", features = ["all"] }

# Hashes for duplicate files
blake3 = "1.3"
blake3 = "1.4"
crc32fast = "1.3"
xxhash-rust = { version = "0.8", features = ["xxh3"] }

tempfile = "3.5"
tempfile = "3.6"

# Video Duplicates
vid_dup_finder_lib = "0.1"
Expand All @@ -59,8 +59,8 @@ serde_json = "1.0"
# Language
i18n-embed = { version = "0.13", features = ["fluent-system", "desktop-requester"] }
i18n-embed-fl = "0.6"
rust-embed = "6.6"
once_cell = "1.17"
rust-embed = "6.7"
once_cell = "1.18"

# Raw image files
rawloader = "0.37"
Expand All @@ -73,11 +73,12 @@ infer = "0.13"
num_cpus = "1.15"

# Heif/Heic
libheif-rs = { version = "0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
libheif-rs = { version = "=0.18.0", optional = true } # Do not upgrade now, since Ubuntu 22.04 not works with newer version
libheif-sys = { version = "=1.14.2", optional = true } # 1.14.3 brake compilation on Ubuntu 22.04
anyhow = { version = "1.0" }

state = "0.5"
state = "0.6"

[features]
default = []
heif = ["dep:libheif-rs"]
heif = ["dep:libheif-rs", "dep:libheif-sys"]
47 changes: 32 additions & 15 deletions czkawka_core/src/broken_files.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,23 +372,27 @@ impl BrokenFiles {

let mut file_entry_clone = file_entry.clone();
let result = panic::catch_unwind(|| {
if let Err(e) = FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
if let PdfError::Io { .. } = e {
return None;
}

let mut error_string = e.to_string();
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
if error_string.starts_with("Try at") {
if let Some(start_index) = error_string.find("/pdf-") {
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
match FileOptions::cached().parse_options(parser_options).open(&file_entry.path) {
Ok(file) => {
for idx in 0..file.num_pages() {
if let Err(e) = file.get_page(idx) {
let err = validate_pdf_error(&mut file_entry, e);
if let PdfError::InvalidPassword = err {
return None;
} else {
break;
}
}
}
}

file_entry.error_string = error_string;
let error = unpack_pdf_error(e);
if let PdfError::InvalidPassword = error {
return None;
Err(e) => {
if let PdfError::Io { .. } = e {
return None;
}
let err = validate_pdf_error(&mut file_entry, e);
if let PdfError::InvalidPassword = err {
return None;
}
}
}
Some(file_entry)
Expand Down Expand Up @@ -708,3 +712,16 @@ fn unpack_pdf_error(e: PdfError) -> PdfError {
e
}
}

fn validate_pdf_error(file_entry: &mut FileEntry, e: PdfError) -> PdfError {
let mut error_string = e.to_string();
// Workaround for strange error message https://github.com/qarmin/czkawka/issues/898
if error_string.starts_with("Try at") {
if let Some(start_index) = error_string.find("/pdf-") {
error_string = format!("Decoding error in pdf-rs library - {}", &error_string[start_index..]);
}
}

file_entry.error_string = error_string;
unpack_pdf_error(e)
}
2 changes: 1 addition & 1 deletion czkawka_core/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::common_directory::Directories;
use crate::common_items::ExcludedItems;
use crate::common_traits::ResultEntry;

static NUMBER_OF_THREADS: state::Storage<usize> = state::Storage::new();
static NUMBER_OF_THREADS: state::InitCell<usize> = state::InitCell::new();

pub fn get_number_of_threads() -> usize {
let data = NUMBER_OF_THREADS.get();
Expand Down
2 changes: 1 addition & 1 deletion czkawka_core/src/common_dir_traversal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ where
}
}
if counter > 0 {
// Do not increase counter one by one in threads, because usually it
// Increase counter in batch, because usually it may be slow to add multiple times atomic value
atomic_counter.fetch_add(counter, Ordering::Relaxed);
}
(dir_result, warnings, fe_result, set_as_not_empty_folder_list, folder_entries_list)
Expand Down
4 changes: 2 additions & 2 deletions czkawka_core/src/common_directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ pub struct Directories {
pub excluded_directories: Vec<PathBuf>,
pub included_directories: Vec<PathBuf>,
pub reference_directories: Vec<PathBuf>,
exclude_other_filesystems: Option<bool>,
pub exclude_other_filesystems: Option<bool>,
#[cfg(target_family = "unix")]
included_dev_ids: Vec<u64>,
pub included_dev_ids: Vec<u64>,
}

impl Directories {
Expand Down
Loading

0 comments on commit 55b2744

Please sign in to comment.