From 0b7017b2130e6a2f3fc48a49201154fefa66ed69 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sun, 16 Jan 2022 15:24:22 +0300 Subject: [PATCH] Do not save scan results file if no entries are being tracked --- src/broom.cpp | 25 ++++++++++++++++++------- src/broom.hpp | 3 +++ src/entry.cpp | 2 +- src/entry.hpp | 2 +- src/main.cpp | 23 +++++++++++------------ 5 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/broom.cpp b/src/broom.cpp index e23cf4c..70c94a5 100755 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -71,12 +71,12 @@ std::vector Broom::track(const std::filesystem::path path) { // untracks entries with unique file sizes. Returns amount of files // that are no longer being tracked uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries) { - // key: size, value: amount of occurences + // key: size, value: amount of occurrences std::map sizes_map; for (auto entry_iter = tracked_entries.begin(); entry_iter != tracked_entries.end(); entry_iter++) { // check if size of this entry is already in the map - // if yes --> increment occurences counter + // if yes --> increment occurrences counter // if not --> add it to the map with a counter of 1 auto iterator = sizes_map.find(entry_iter->filesize); if (iterator == sizes_map.end()) { @@ -107,21 +107,21 @@ uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries // untracks entries with the same content-pieces. Returns amount of // files that are no longer being tracked uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entries) { - // contents, occurences + // contents, occurrences std::map contents_map; std::map::iterator map_iter; for (entry::Entry& entry : tracked_entries) { // the same logic: // check if contents of this entry are already in the map - // if yes --> increment occurences counter + // if yes --> increment occurrences counter // if not --> add it to the map with a counter of 1 map_iter = contents_map.find(entry.pieces); if (map_iter == contents_map.end()) { // add it to the map contents_map.insert({entry.pieces, 1}); } else { - // increment occurences counter + // increment occurrences counter contents_map[map_iter->first]++; } @@ -161,9 +161,9 @@ void Broom::create_scan_results_list(const std::vector tracked_ent for (const entry::Entry entry : tracked_entries) { // log every entry and its group if (entry.group == group::EMPTY) { - outfile << entry.path << " --- is an empty file" << std::endl; + outfile << "[EMPTY] " << entry.path << std::endl; } else if (entry.group == group::DUPLICATE) { - outfile << entry.path << " --- is a duplicate of another file" << std::endl; + outfile << "[DUPLICATE] " << entry.path << std::endl; } } @@ -207,4 +207,15 @@ uintmax_t Broom::remove_empty_files(std::vector& tracked_entries) return removed; }; +// marks every entry without any group as a duplicate +void Broom::mark_as_duplicates(std::vector& tracked_entries) { + for (entry::Entry& entry : tracked_entries) { + if (entry.group == group::EMPTY) { + // do not mess up grouping + continue; + } + entry.group = group::DUPLICATE; + } +}; + } diff --git a/src/broom.hpp b/src/broom.hpp index c7e8df6..890d61d 100755 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -52,6 +52,9 @@ public: // creates a list of duplicate, empty files into a file void create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); + + // marks every entry without any group as a duplicate + void mark_as_duplicates(std::vector& tracked_entries); }; } diff --git a/src/entry.cpp b/src/entry.cpp index a9bd54f..2b45c6a 100755 --- a/src/entry.cpp +++ b/src/entry.cpp @@ -82,7 +82,7 @@ void Entry::get_pieces() { // make a convenient hex string out of pure bytes std::stringstream pieces_hex; - for (uint8_t i = 0; i < PIECE_SIZE * 2; i++) { + for (uint8_t i = 0; i < PIECE_SIZE * 3; i++) { pieces_hex << std::hex << static_cast(pieces_buffer[i]); }; diff --git a/src/entry.hpp b/src/entry.hpp index ebe74f8..91bc432 100755 --- a/src/entry.hpp +++ b/src/entry.hpp @@ -30,7 +30,7 @@ along with broom. If not, see . namespace entry { // 3 pieces (beginning, middle and end of the file) -const uint8_t PIECE_SIZE = 75; +const uint8_t PIECE_SIZE = 85; const uint8_t PIECES_AMOUNT = 3; // A wrapper for every file in filesystem with all necessary information diff --git a/src/main.cpp b/src/main.cpp index ed6a984..614163f 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -64,6 +64,7 @@ void print_version() { << std::endl; }; + int main(int argc, char* argv[]) { bool benchmarking = false; bool sweeping = false; @@ -165,26 +166,24 @@ int main(int argc, char* argv[]) { } }), tracked_entries.end()); + // untrack unique contents untracked = broom.untrack_unique_contents(tracked_entries); std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl; // mark entries as duplicates - for (entry::Entry& entry : tracked_entries) { - if (entry.group == group::EMPTY) { - // do not mess up grouping - continue; - } - entry.group = group::DUPLICATE; - } + broom.mark_as_duplicates(tracked_entries); std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl; - // now only files with a non-unique size and contents are being tracked - // are they REALLY duplicates ? - // leave the REAL cleanup for the user, saving these entries in a file - broom.create_scan_results_list(tracked_entries); - std::cout << "[INFO] Created scan results file" << std::endl; + if (tracked_entries.size() > 0) { + // now only files with a non-unique size and contents are being tracked + // are they REALLY duplicates ? + // better to leave the REALL cleanup for the user, saving these entries in a file, than doing a blind and possibly destructive purge + broom.create_scan_results_list(tracked_entries); + std::cout << "[INFO] Created scan results file" << std::endl; + } + } catch(const std::exception& e) { std::cerr