From 449beb393a6f5f5dee4ab09e96452388ac90c988 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Thu, 30 Dec 2021 21:00:30 +0300 Subject: [PATCH] Untracking by size now actually works and works FAST --- src/broom.cpp | 45 ++++++++++++++++++--------------------------- src/broom.hpp | 6 +++--- src/entry.cpp | 8 ++++---- src/entry.hpp | 3 +-- src/main.cpp | 2 +- 5 files changed, 27 insertions(+), 37 deletions(-) diff --git a/src/broom.cpp b/src/broom.cpp index 557200f..8e4ac06 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -40,8 +40,8 @@ void Broom::print_statistics() { << std::endl; }; -// get all entities from path recursively and track them -int Broom::track(std::filesystem::path dir) { +// get all files from path recursively and track them +void Broom::track(const std::filesystem::path dir) { auto t0 = std::chrono::high_resolution_clock::now(); std::filesystem::directory_options options = ( @@ -66,7 +66,6 @@ int Broom::track(std::filesystem::path dir) { << std::chrono::duration_cast(tracking_time - t0).count() << " ms" << std::endl; }; - return 0; }; // removes entries with unique file sizes. Returns amount of files @@ -74,38 +73,34 @@ int Broom::track(std::filesystem::path dir) { uintmax_t Broom::untrack_unique_sizes() { // key: size, value: amount of occurences std::map sizes_map; - std::map::iterator iterator; - for (Entry& entry : m_tracked_entries) { + for (Entry entry : m_tracked_entries) { // check if size of this entry is already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 - iterator = sizes_map.find(entry.filesize); + auto iterator = sizes_map.find(entry.filesize); if (iterator == sizes_map.end()) { // there is no such size - sizes_map.insert(std::pair(entry.filesize, 1)); + sizes_map.insert({entry.filesize, 1}); } else { // there is such size - uintmax_t occurences = sizes_map[iterator->first]; - sizes_map[iterator->first] = occurences++; + sizes_map[iterator->first]++; }; }; - // go through the map again, look for uniques and remove entries with - // such filesizes uintmax_t untracked = 0; - for (std::pair size_entry : sizes_map) { - if (size_entry.second > 1) { - // not a unique size. Keep such entries - } else { - // a unique one. Untrack such an entry - std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [size_entry](Entry e) -> bool { - return (e.filesize == size_entry.first); - }); + std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{ + auto iter = sizes_map.find(entry.filesize); + if (iter->second == 1) { + // unique untracked++; - + return true; }; - }; + + // std::cout << "duplicate fsize: " << iter->first << " occurences: " << iter->second << std::endl; + + return false; + }); return untracked; }; @@ -153,7 +148,7 @@ uintmax_t Broom::untrack_unique_sizes() { // find all duplicates among tracked entries, stop tracking uniques -int Broom::find_duplicates() { +void Broom::find_duplicates() { if (m_benchmarking) { auto t0 = std::chrono::high_resolution_clock::now(); @@ -185,12 +180,8 @@ int Broom::find_duplicates() { std::cout << "Duplicates: " << startsize - global_untracked << std::endl; }; - - - return 0; }; // remove ALL duplicate files -int Broom::sweep_all() { - return 0; +void Broom::sweep_all() { }; diff --git a/src/broom.hpp b/src/broom.hpp index 851bea0..136b3cd 100644 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -52,10 +52,10 @@ public: void print_statistics(); // get all entities from path recursively and track them - int track(std::filesystem::path path); + void track(const std::filesystem::path path); // find all duplicates in the directory - int find_duplicates(); + void find_duplicates(); // removes entries with unique file sizes. Returns amount of files // that are no longer being tracked @@ -66,7 +66,7 @@ public: uintmax_t untrack_unique_contents(); // remove ALL duplicate files - int sweep_all(); + void sweep_all(); }; #endif diff --git a/src/entry.cpp b/src/entry.cpp index c37e286..d90d119 100644 --- a/src/entry.cpp +++ b/src/entry.cpp @@ -20,14 +20,14 @@ along with broom. If not, see . #include "entry.hpp" // A wrapper for every file with all necessary information -Entry::Entry(std::filesystem::path path) { +Entry::Entry(const std::filesystem::path entry_path) { // check for existense and being a directory - if (!std::filesystem::exists(path) || std::filesystem::is_directory(path)) { + if (!std::filesystem::exists(entry_path) || std::filesystem::is_directory(entry_path)) { throw "Does not exist or a directory"; }; - // filename - filename = path.filename(); + // path + path = entry_path; // filesize filesize = std::filesystem::file_size(path); diff --git a/src/entry.hpp b/src/entry.hpp index ffaef81..9c825bf 100644 --- a/src/entry.hpp +++ b/src/entry.hpp @@ -30,13 +30,12 @@ const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 2; // A wrapper for every file with all necessary information class Entry { public: - std::string filename; std::filesystem::path path; uintmax_t filesize; char checksum[CHECKSUM_SIZE]; - Entry(std::filesystem::path path); + Entry(const std::filesystem::path entry_path); ~Entry(); // Compare this entry`s checksum with the other one. diff --git a/src/main.cpp b/src/main.cpp index d999e60..cf99ae0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -88,7 +88,7 @@ int main(int argc, char* argv[]) { }; }; - // no path was specified + // no path was specified at all or every path was nonexistent if (tracked_path.string() == "") { print_help(); return 1;