From 04f524db46bbd8a0494d9bc95d02aef3608b969f Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sat, 15 Jan 2022 14:15:19 +0300 Subject: [PATCH] Does not die on permission denieds; Broom class does not own tracked entries anymore --- COPYING | 0 build/CMakeLists.txt | 8 +++- src/broom.cpp | 103 ++++++++++++++++++------------------------- src/broom.hpp | 39 ++++++---------- src/entry.cpp | 0 src/entry.hpp | 2 +- src/group.hpp | 0 src/main.cpp | 30 ++++++------- 8 files changed, 81 insertions(+), 101 deletions(-) mode change 100644 => 100755 COPYING mode change 100644 => 100755 build/CMakeLists.txt mode change 100644 => 100755 src/broom.cpp mode change 100644 => 100755 src/broom.hpp mode change 100644 => 100755 src/entry.cpp mode change 100644 => 100755 src/entry.hpp mode change 100644 => 100755 src/group.hpp mode change 100644 => 100755 src/main.cpp diff --git a/COPYING b/COPYING old mode 100644 new mode 100755 diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt old mode 100644 new mode 100755 index 66ac265..b36d2aa --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -9,8 +9,14 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") set(BUILD_SHARED_LIBS OFF) set(CMAKE_EXE_LINKER_FLAGS "-static") -set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Werror -O2") +find_package(Threads REQUIRED) + +set(CMAKE_THREAD_PREFER_PTHREAD TRUE) +set(THREADS_PREFER_PTHREAD_FLAG TRUE) + +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -Wall -Werror -O2") set(EXECUTABLE_OUTPUT_PATH ../bin) add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp) +target_link_libraries(broom Threads::Threads) diff --git a/src/broom.cpp b/src/broom.cpp old mode 100644 new mode 100755 index 7c21157..3adfc37 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -23,6 +23,7 @@ along with broom. If not, see . #include #include #include +#include #include "entry.hpp" #include "broom.hpp" @@ -32,18 +33,17 @@ namespace broom { Broom::Broom(Options options) { m_benchmarking = options.benchmarking; - m_sweeping = options.sweeping; }; -Broom::~Broom() { - m_tracked_entries.clear(); -}; +Broom::~Broom() {}; // recursively track every file that lies in given path. Throws an invalid_argument // error in case path does not exist -void Broom::track(const std::filesystem::path path) { +std::vector Broom::track(const std::filesystem::path path) { auto t0 = std::chrono::high_resolution_clock::now(); + std::vector tracked_entries; + // check if given path even exists if (!std::filesystem::exists(path)) { throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !"); @@ -62,12 +62,12 @@ void Broom::track(const std::filesystem::path path) { }; entry::Entry entry(dir_entry.path()); - m_tracked_entries.push_back(entry); + tracked_entries.push_back(entry); } } else if (std::filesystem::is_regular_file(path)) { // just a file entry::Entry entry(path); - m_tracked_entries.push_back(entry); + tracked_entries.push_back(entry); } @@ -80,16 +80,18 @@ void Broom::track(const std::filesystem::path path) { << " ms" << std::endl; } - std::cout << "[INFO] Tracking " << m_tracked_entries.size() << " files" << std::endl; + std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl; + + return tracked_entries; }; // removes entries with unique file sizes. Returns amount of files // that are no longer being tracked -uintmax_t Broom::m_untrack_unique_sizes() { +uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries) { // key: size, value: amount of occurences std::map sizes_map; - for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end(); entry_iter++) { + for (auto entry_iter = tracked_entries.begin(); entry_iter != tracked_entries.end(); entry_iter++) { // check if size of this entry is already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 @@ -104,7 +106,7 @@ uintmax_t Broom::m_untrack_unique_sizes() { } uintmax_t untracked = 0; - m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](entry::Entry entry) -> bool{ + tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&untracked, sizes_map](entry::Entry entry) -> bool{ auto iter = sizes_map.find(entry.filesize); if (iter->second == 1) { // unique @@ -113,54 +115,37 @@ uintmax_t Broom::m_untrack_unique_sizes() { }; return false; - }), m_tracked_entries.end()); + }), tracked_entries.end()); return untracked; }; + // removes entries with the same content-pieces. Returns amount of // files that are no longer being tracked -uintmax_t Broom::m_untrack_unique_contents() { +uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entries) { // contents, occurences std::map contents_map; std::map::iterator map_iter; - for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) { + for (entry::Entry& entry : tracked_entries) { // the same logic: // check if contents of this entry are already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 - - if (entry_iter->filesize == 0) { - // that`s an empty file. Skip it - entry_iter++; - continue; - } - - try{ - // can get "permission denied" when opening file - entry_iter->get_pieces(); - } catch(const std::ifstream::failure& e) { - // there is nothing we can do. Untrack this entry - entry_iter = m_tracked_entries.erase(entry_iter); - continue; - } - - map_iter = contents_map.find(entry_iter->pieces); + map_iter = contents_map.find(entry.pieces); if (map_iter == contents_map.end()) { // add it to the map - contents_map.insert({entry_iter->pieces, 1}); - // std::cout << "First time seeing this piece: " << entry_iter->pieces << std::endl; + contents_map.insert({entry.pieces, 1}); } else { // increment occurences counter contents_map[map_iter->first]++; } - entry_iter++; }; uintmax_t untracked = 0; - m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, contents_map](entry::Entry entry) -> bool { + tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&untracked, contents_map](entry::Entry entry) -> bool { auto iter = contents_map.find(entry.pieces); if (iter->second == 1) { // unique @@ -169,21 +154,21 @@ uintmax_t Broom::m_untrack_unique_contents() { } else { return false; } - }), m_tracked_entries.end()); + }), tracked_entries.end()); return untracked; }; // finds all duplicates among tracked entries and marks them with appropriate group. -// Returns amount of duplicate files -uintmax_t Broom::m_find_duplicates() { +// Returns amount of duplicate files. +uintmax_t Broom::find_duplicates(std::vector& tracked_entries) { auto t0 = std::chrono::high_resolution_clock::now(); // print how many files are being tracked - uintmax_t global_untracked = m_tracked_entries.size(); + uintmax_t global_untracked = tracked_entries.size(); // untrack by size - uintmax_t untracked_by_size = m_untrack_unique_sizes(); + uintmax_t untracked_by_size = untrack_unique_sizes(tracked_entries); global_untracked += untracked_by_size; std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl; @@ -196,8 +181,19 @@ uintmax_t Broom::m_find_duplicates() { << " ms" << std::endl; } + + // get pieces for each entry. If error occurs (permission denied) - untrack it + tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool { + try { + entry.get_pieces(); + return false; + } catch(...) { + return true; + } + }), tracked_entries.end()); + // untrack by contents - uintmax_t untracked_by_contents = m_untrack_unique_contents(); + uintmax_t untracked_by_contents = untrack_unique_contents(tracked_entries); global_untracked += untracked_by_contents; auto contents_untrack_time = std::chrono::high_resolution_clock::now(); @@ -211,11 +207,11 @@ uintmax_t Broom::m_find_duplicates() { std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl; - std::cout << "[INFO] Found " << m_tracked_entries.size() << " possible duplicate files" << std::endl; + std::cout << "[INFO] Found " << tracked_entries.size() << " possible duplicate files" << std::endl; // mark duplicate entries - for (entry::Entry& entry : m_tracked_entries) { + for (entry::Entry& entry : tracked_entries) { if (entry.group == group::EMPTY) { // do not mess up grouping continue; @@ -223,11 +219,11 @@ uintmax_t Broom::m_find_duplicates() { entry.group = group::DUPLICATE; } - return m_tracked_entries.size(); + return tracked_entries.size(); }; // creates a list of duplicate, empty files into a file -void Broom::create_scan_results_list(const std::filesystem::path dir, const std::string filename) { +void Broom::create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir, const std::string filename) { if (!std::filesystem::exists(dir)) { // create it then bool created = std::filesystem::create_directories(dir); @@ -242,7 +238,7 @@ void Broom::create_scan_results_list(const std::filesystem::path dir, const std: throw "Could not create a scan results file"; } - for (const entry::Entry entry : m_tracked_entries) { + for (const entry::Entry entry : tracked_entries) { // log every entry and its group if (entry.group == group::EMPTY) { outfile << entry.path << " --- is an empty file" << std::endl; @@ -258,9 +254,9 @@ void Broom::create_scan_results_list(const std::filesystem::path dir, const std: // finds empty files among tracked entries and gives them appropriate group // Returns amount of found empty files -uintmax_t Broom::m_find_empty_files() { +uintmax_t Broom::find_empty_files(std::vector& tracked_entries) { uintmax_t found_empty_files = 0; - for (entry::Entry& entry : m_tracked_entries) { + for (entry::Entry& entry : tracked_entries) { if (entry.filesize == 0) { // empty files can`t be considered as duplicates. assign a group entry.group = group::EMPTY; @@ -273,15 +269,4 @@ uintmax_t Broom::m_find_empty_files() { return found_empty_files; }; -// scans directory for duplicates and empty files -void Broom::scan() { - m_find_empty_files(); - m_find_duplicates(); -}; - -// remove ALL duplicate files -void Broom::sweep() { -}; - - } diff --git a/src/broom.hpp b/src/broom.hpp old mode 100644 new mode 100755 index 1f80d1f..a0c86e5 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -36,44 +36,33 @@ class Broom { protected: // enable/disable benchmarking output bool m_benchmarking; - // TODO(think about how sweeping should work) - bool m_sweeping; - // paths to tracked files - std::vector m_tracked_entries; +public: + Broom(Options options); + ~Broom(); - // finds empty files among tracked entries. + // recursively tracks every file that lies in given path. Throws an invalid_argument + // error in case path does not exist. Returns collected entries + std::vector track(const std::filesystem::path path); + + // finds empty files among tracked entries and marks them with the appropriate group. // Returns amount of found empty files - uintmax_t m_find_empty_files(); + uintmax_t find_empty_files(std::vector& tracked_entries); // removes entries with unique file sizes. Returns amount of files // that are no longer being tracked - uintmax_t m_untrack_unique_sizes(); + uintmax_t untrack_unique_sizes(std::vector& tracked_entries); // removes entries with the same content-pieces. Returns amount of - // files that are no longer being tracked - uintmax_t m_untrack_unique_contents(); + // files that are no longer being tracked. + uintmax_t untrack_unique_contents(std::vector& tracked_entries); // finds all duplicates among tracked entries and marks them with appropriate group // Returns amount of duplicate files - uintmax_t m_find_duplicates(); - -public: - Broom(Options options); - ~Broom(); - - // recursively track every file that lies in given path. Throws an invalid_argument - // error in case path does not exist - void track(const std::filesystem::path path); + uintmax_t find_duplicates(std::vector& tracked_entries); // creates a list of duplicate, empty files into a file - void create_scan_results_list(const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); - - // TODO - void sweep(); - - // scans tracked entries for duplicates and empty files - void scan(); + void create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); }; } diff --git a/src/entry.cpp b/src/entry.cpp old mode 100644 new mode 100755 diff --git a/src/entry.hpp b/src/entry.hpp old mode 100644 new mode 100755 index 9722be5..ebe74f8 --- a/src/entry.hpp +++ b/src/entry.hpp @@ -30,7 +30,7 @@ along with broom. If not, see . namespace entry { // 3 pieces (beginning, middle and end of the file) -const uint8_t PIECE_SIZE = 6; +const uint8_t PIECE_SIZE = 75; const uint8_t PIECES_AMOUNT = 3; // A wrapper for every file in filesystem with all necessary information diff --git a/src/group.hpp b/src/group.hpp old mode 100644 new mode 100755 diff --git a/src/main.cpp b/src/main.cpp old mode 100644 new mode 100755 index 2c4d5bb..a5332cd --- a/src/main.cpp +++ b/src/main.cpp @@ -21,6 +21,7 @@ along with broom. If not, see . #include #include #include +#include #include "entry.hpp" #include "broom.hpp" @@ -98,24 +99,23 @@ int main(int argc, char* argv[]) { broom::Broom broom(options); try { - broom.track(tracked_path); - broom.scan(); - broom.create_scan_results_list(); - } catch(const std::invalid_argument& e) { - std::cerr - << "[ERROR] Invalid argument: " << std::endl - << e.what() << std::endl; - return 1; + std::vector tracked_entries = broom.track(tracked_path); + broom.find_empty_files(tracked_entries); - } catch(const std::filesystem::filesystem_error& e) { - std::cerr - << "[ERROR] FS error: " << std::endl - << e.what() << std::endl; - return 1; + // get contents for each entry first + //auto handle = std::async(std::launch::async, [&tracked_entries]() { + // for (entry::Entry& e : tracked_entries) { + // e.get_pieces(); + // } + //}); + + //broom.untrack_unique_contents(tracked_entries); + broom.find_duplicates(tracked_entries); - } catch(...) { + broom.create_scan_results_list(tracked_entries); + } catch(const std::exception& e) { std::cerr - << "[ERROR] Unexpected exception" << std::endl; + << "[ERROR] " << e.what() << std::endl; return 1; };