From 21d4610dc6786f7da9089c5287694d2582f89184 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sat, 1 Jan 2022 14:02:52 +0300 Subject: [PATCH] Not traking entries, but rather tracking paths --- .gitignore | 1 + src/broom.cpp | 24 +++++++++++++----------- src/broom.hpp | 6 +++--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index e9d6a9a..53767e6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ bin/broom +bin/broom_old build/CMakeFiles/ build/cmake_install.cmake build/CMakeCache.txt diff --git a/src/broom.cpp b/src/broom.cpp index 8e4ac06..3ba3e39 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -54,8 +54,7 @@ void Broom::track(const std::filesystem::path dir) { continue; }; - Entry entry(dir_entry.path()); - m_tracked_entries.push_back(entry); + m_tracked_filepaths.push_back(dir_entry); }; if (m_benchmarking) { @@ -74,14 +73,15 @@ uintmax_t Broom::untrack_unique_sizes() { // key: size, value: amount of occurences std::map sizes_map; - for (Entry entry : m_tracked_entries) { + for (std::filesystem::path filepath : m_tracked_filepaths) { // check if size of this entry is already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 - auto iterator = sizes_map.find(entry.filesize); + uintmax_t filesize = std::filesystem::file_size(filepath); + auto iterator = sizes_map.find(filesize); if (iterator == sizes_map.end()) { // there is no such size - sizes_map.insert({entry.filesize, 1}); + sizes_map.insert({filesize, 1}); } else { // there is such size sizes_map[iterator->first]++; @@ -89,8 +89,9 @@ uintmax_t Broom::untrack_unique_sizes() { }; uintmax_t untracked = 0; - std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{ - auto iter = sizes_map.find(entry.filesize); + m_tracked_filepaths.erase(std::remove_if(m_tracked_filepaths.begin(), m_tracked_filepaths.end(), [&untracked, sizes_map](std::filesystem::path filepath) -> bool{ + uintmax_t filesize = std::filesystem::file_size(filepath); + auto iter = sizes_map.find(filesize); if (iter->second == 1) { // unique untracked++; @@ -100,7 +101,8 @@ uintmax_t Broom::untrack_unique_sizes() { // std::cout << "duplicate fsize: " << iter->first << " occurences: " << iter->second << std::endl; return false; - }); + })); + return untracked; }; @@ -112,7 +114,7 @@ uintmax_t Broom::untrack_unique_sizes() { // std::map contents_map; // std::map::iterator iterator; // -// for (Entry& entry : m_tracked_entries) { +// for (Entry& entry : m_tracked_filepaths) { // // the same logic: // // check if contents of this entry is already in the map // // if yes --> increment occurences counter @@ -136,7 +138,7 @@ uintmax_t Broom::untrack_unique_sizes() { // // not a unique size. Keep such entries // } else { // // a unique one. Untrack such an entry -// std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [contents_entry](Entry e) -> bool { +// std::remove_if(m_tracked_filepaths.begin(), m_tracked_filepaths.end(), [contents_entry](Entry e) -> bool { // return (e.compare_checksums(contents_entry.first)); // }); // untracked++; @@ -163,7 +165,7 @@ void Broom::find_duplicates() { << std::endl; } else { - size_t startsize = m_tracked_entries.size(); + size_t startsize = m_tracked_filepaths.size(); std::cout << "Tracking " << startsize << std::endl; uintmax_t global_untracked = 0; diff --git a/src/broom.hpp b/src/broom.hpp index 136b3cd..b723f4b 100644 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -41,8 +41,8 @@ protected: uintmax_t m_sweeped_files; // how many bytes was (would be ?) freed uintmax_t m_sweeped_size; - // entries that possibly contain duplicates - std::vector m_tracked_entries; + // paths to tracked files + std::vector m_tracked_filepaths; public: Broom(Options options); @@ -51,7 +51,7 @@ public: // Print current statistics void print_statistics(); - // get all entities from path recursively and track them + // recursively track every file that lies in given path void track(const std::filesystem::path path); // find all duplicates in the directory