diff --git a/src/broom.cpp b/src/broom.cpp index 611c80d..7ca9edb 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -18,6 +18,9 @@ along with broom. If not, see . */ #include +#include +#include + #include "entry.hpp" #include "broom.hpp" @@ -32,21 +35,75 @@ void Broom::print_statistics() { << std::endl; }; -// Determines whether entry1 is a duplicate of entry2 -bool Broom::is_duplicate(Entry entry1, Entry entry2) { - if (entry1.path == entry2.path) { - // well, it`s the same file we`re talking about - return false; - } - else if (entry1.compare_checksums(entry2.checksum)) { - return true; - } - - return false; +// removes entries with unique file sizes. Returns amount of files +// that are no longer being tracked +uintmax_t Broom::untrack_unique_sizes() { + // key: size, value: amount of occurences + std::map sizes; + std::map::iterator iterator; + + for (Entry& entry : m_tracked_entries) { + // check if size of this entry is already in the map + // if yes --> increment occurences counter + // if not --> add it to the map with a counter of 1 + iterator = sizes.find(entry.filesize); + if (iterator == sizes.end()) { + // there is no such size + sizes.insert(std::pair(entry.filesize, 1)); + } else { + // there is such size + uintmax_t occurences = sizes[iterator->first]; + sizes[iterator->first] = occurences++; + }; + }; + + // go through the map again, look for uniques and remove entries with + // such filesizes + uintmax_t untracked = 0; + for (std::pair size_entry : sizes) { + if (size_entry.second > 1) { + // not a unique size. Keep such entries + } else { + // a unique one. Untrack such an entry + std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [size_entry](Entry e) -> bool { + return (e.filesize == size_entry.first); + }); + untracked++; + + }; + }; + + return untracked; }; -// find all duplicates in the directory -int Broom::find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive = false) { +// get all entities from path recursively and track them +int Broom::track(std::filesystem::path dir) { + std::filesystem::directory_options options = ( + std::filesystem::directory_options::follow_directory_symlink | + std::filesystem::directory_options::skip_permission_denied + ); + + for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) { + if (dir_entry.is_directory()) { + continue; + }; + + Entry entry(dir_entry.path()); + m_tracked_entries.push_back(entry); + }; + + return 0; +}; + +// find all duplicates among tracked entries +int Broom::find_duplicates() { + size_t startsize = m_tracked_entries.size(); + std::cout << "Tracking " << startsize << std::endl; + + uintmax_t untracked = untrack_unique_sizes(); + std::cout << "Untracked " << untracked << " unique sizes" << std::endl; + + std::cout << "Duplicates: " << startsize - untracked << std::endl; return 0; }; diff --git a/src/broom.hpp b/src/broom.hpp index 342ff22..a9ae923 100644 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -21,14 +21,17 @@ along with broom. If not, see . #define BROOM_HPP #include +#include // A class to find and manage duplicate files class Broom { protected: - // how many files has been "sweeped" + // how many files has been (would be ?) "sweeped" uintmax_t m_sweeped_files; - // how many bytes was freed + // how many bytes was (would be ?) freed uintmax_t m_sweeped_size; + // entries that possibly contain duplicates + std::vector m_tracked_entries; public: Broom(); @@ -37,11 +40,15 @@ public: // Print current statistics void print_statistics(); - // Determines whether entry1 is a duplicate of entry2 - bool is_duplicate(Entry entry1, Entry entry2); + // get all entities from path recursively and track them + int track(std::filesystem::path path); // find all duplicates in the directory - int find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive); + int find_duplicates(); + + // removes entries with unique file sizes. Returns amount of files + // that are no longer being tracked + uintmax_t untrack_unique_sizes(); // remove ALL duplicate files int sweep_all(Entry entries[]); diff --git a/src/main.cpp b/src/main.cpp index 5386773..6e1d8ae 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -18,11 +18,8 @@ along with broom. If not, see . */ #include -#include #include -#include #include -#include #include "entry.hpp" #include "broom.hpp" @@ -95,15 +92,11 @@ int main(int argc, char* argv[]) { }; }; - // printing all directories just for testing - for (uint32_t i = 0; i < options.paths.size(); i++) { - for (auto& p : std::filesystem::recursive_directory_iterator(options.paths.at(i))) { - if (!p.is_directory()) { - Entry entry(p); - std::cout << p.path() << "Checksum: " << entry.checksum << std::endl; - } - }; - }; + Broom broom; + + std::filesystem::path first_path = options.paths.at(0); + broom.track(first_path); + broom.find_duplicates(); return 0; };