From d349618dceb121845e179ac0c6a409057d13c4a2 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sun, 16 Jan 2022 10:21:44 +0300 Subject: [PATCH] Moved printing to main(); Remove empty files on sweeping --- src/broom.cpp | 112 +++++++++++--------------------------------------- src/broom.hpp | 31 +++++--------- src/group.hpp | 20 +++++++++ src/main.cpp | 107 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 145 insertions(+), 125 deletions(-) diff --git a/src/broom.cpp b/src/broom.cpp index 3adfc37..e23cf4c 100755 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -31,17 +31,12 @@ along with broom. If not, see . namespace broom { -Broom::Broom(Options options) { - m_benchmarking = options.benchmarking; -}; - +Broom::Broom() {}; Broom::~Broom() {}; // recursively track every file that lies in given path. Throws an invalid_argument // error in case path does not exist std::vector Broom::track(const std::filesystem::path path) { - auto t0 = std::chrono::high_resolution_clock::now(); - std::vector tracked_entries; // check if given path even exists @@ -70,22 +65,10 @@ std::vector Broom::track(const std::filesystem::path path) { tracked_entries.push_back(entry); } - - if (m_benchmarking) { - auto tracking_time = std::chrono::high_resolution_clock::now(); - - std::cout - << "[BENCHMARK] Tracking took " - << std::chrono::duration_cast(tracking_time - t0).count() - << " ms" << std::endl; - } - - std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl; - return tracked_entries; }; -// removes entries with unique file sizes. Returns amount of files +// untracks entries with unique file sizes. Returns amount of files // that are no longer being tracked uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries) { // key: size, value: amount of occurences @@ -121,7 +104,7 @@ uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries }; -// removes entries with the same content-pieces. Returns amount of +// untracks entries with the same content-pieces. Returns amount of // files that are no longer being tracked uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entries) { // contents, occurences @@ -159,69 +142,6 @@ uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entr return untracked; }; -// finds all duplicates among tracked entries and marks them with appropriate group. -// Returns amount of duplicate files. -uintmax_t Broom::find_duplicates(std::vector& tracked_entries) { - auto t0 = std::chrono::high_resolution_clock::now(); - - // print how many files are being tracked - uintmax_t global_untracked = tracked_entries.size(); - - // untrack by size - uintmax_t untracked_by_size = untrack_unique_sizes(tracked_entries); - global_untracked += untracked_by_size; - std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl; - - auto sizes_untrack_time = std::chrono::high_resolution_clock::now(); - - if (m_benchmarking) { - std::cout - << "[BENCHMARK] Untracking by size took " - << std::chrono::duration_cast(sizes_untrack_time - t0).count() - << " ms" << std::endl; - } - - - // get pieces for each entry. If error occurs (permission denied) - untrack it - tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool { - try { - entry.get_pieces(); - return false; - } catch(...) { - return true; - } - }), tracked_entries.end()); - - // untrack by contents - uintmax_t untracked_by_contents = untrack_unique_contents(tracked_entries); - global_untracked += untracked_by_contents; - - auto contents_untrack_time = std::chrono::high_resolution_clock::now(); - - if (m_benchmarking) { - std::cout - << "[BENCHMARK] Untracking by contents took " - << std::chrono::duration_cast(contents_untrack_time - sizes_untrack_time).count() - << " ms" << std::endl; - } - - std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl; - - std::cout << "[INFO] Found " << tracked_entries.size() << " possible duplicate files" << std::endl; - - // mark duplicate entries - - for (entry::Entry& entry : tracked_entries) { - if (entry.group == group::EMPTY) { - // do not mess up grouping - continue; - } - entry.group = group::DUPLICATE; - } - - return tracked_entries.size(); -}; - // creates a list of duplicate, empty files into a file void Broom::create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir, const std::string filename) { if (!std::filesystem::exists(dir)) { @@ -248,8 +168,6 @@ void Broom::create_scan_results_list(const std::vector tracked_ent } outfile.close(); - - std::cout << "[INFO] Created scan results file" << std::endl; }; // finds empty files among tracked entries and gives them appropriate group @@ -264,9 +182,29 @@ uintmax_t Broom::find_empty_files(std::vector& tracked_entries) { } } - std::cout << "[INFO] Found " << found_empty_files << " empty files" << std::endl; - return found_empty_files; }; +// REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files +uintmax_t Broom::remove_empty_files(std::vector& tracked_entries) { + uintmax_t removed = 0; + + tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool { + if (entry.group == group::EMPTY) { + try { + entry.remove(); + removed++; + return true; + } catch(...) { + return true; + } + + } + return false; + + }), tracked_entries.end()); + + return removed; +}; + } diff --git a/src/broom.hpp b/src/broom.hpp index a0c86e5..c7e8df6 100755 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -24,42 +24,31 @@ along with broom. If not, see . #include namespace broom { -// Broom`s settings -struct Options { - bool sweeping; - bool benchmarking; -}; - -// A class to find and manage duplicate files +// A class to find and manage duplicate, empty files class Broom { -protected: - // enable/disable benchmarking output - bool m_benchmarking; - public: - Broom(Options options); + Broom(); ~Broom(); // recursively tracks every file that lies in given path. Throws an invalid_argument // error in case path does not exist. Returns collected entries std::vector track(const std::filesystem::path path); - // finds empty files among tracked entries and marks them with the appropriate group. - // Returns amount of found empty files - uintmax_t find_empty_files(std::vector& tracked_entries); - - // removes entries with unique file sizes. Returns amount of files + // untracks entries with unique file sizes. Returns amount of files // that are no longer being tracked uintmax_t untrack_unique_sizes(std::vector& tracked_entries); - // removes entries with the same content-pieces. Returns amount of + // untracks entries with the same content-pieces. Returns amount of // files that are no longer being tracked. uintmax_t untrack_unique_contents(std::vector& tracked_entries); - // finds all duplicates among tracked entries and marks them with appropriate group - // Returns amount of duplicate files - uintmax_t find_duplicates(std::vector& tracked_entries); + // finds empty files among tracked entries and marks them with the appropriate group. + // Returns amount of found empty files + uintmax_t find_empty_files(std::vector& tracked_entries); + + // REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files + uintmax_t remove_empty_files(std::vector& tracked_entries); // creates a list of duplicate, empty files into a file void create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); diff --git a/src/group.hpp b/src/group.hpp index f9a12ee..956169d 100755 --- a/src/group.hpp +++ b/src/group.hpp @@ -1,3 +1,23 @@ +/* +Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) + +This file is part of broom. + +broom is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +broom is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with broom. If not, see . +*/ + + #ifndef GROUP_HPP #define GROUP_HPP diff --git a/src/main.cpp b/src/main.cpp index a5332cd..ed6a984 100755 --- a/src/main.cpp +++ b/src/main.cpp @@ -22,12 +22,13 @@ along with broom. If not, see . #include #include #include +#include #include "entry.hpp" #include "broom.hpp" // Broom version number -#define VERSION "v0.1.1" +#define VERSION "v0.1.2" void print_help() { std::cout @@ -37,8 +38,8 @@ void print_help() { << "-h | --help -> print this message and exit" << std::endl << std::endl << "[COMMANDS]" << std::endl - << "sweep -> scan for duplicate files and delete (sweep) all of them but the last one" << std::endl - << "scan -> scan for duplicate files and output information in a file" << std::endl + << "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl + << "scan -> scan and save results in a file without touching any files [DEFAULT]" << std::endl << std::endl; }; @@ -47,6 +48,15 @@ void print_version() { << "broom " << VERSION << std::endl << "incurable hoarder`s helpful friend" << std::endl << std::endl + << " _" << std::endl + << " //" << std::endl + << " // " << std::endl + << " // " << std::endl + << " // " << std::endl + << " /####/ " << std::endl + << " ////// " << std::endl + << " /////// " << std::endl << std::endl + << "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl << "This program comes with ABSOLUTELY NO WARRANTY." << std::endl << "This is free software, and you are welcome to redistribute it" << std::endl @@ -55,7 +65,9 @@ void print_version() { }; int main(int argc, char* argv[]) { - broom::Options options; + bool benchmarking = false; + bool sweeping = false; + std::filesystem::path tracked_path; if (argc < 2) { @@ -76,13 +88,13 @@ int main(int argc, char* argv[]) { return 0; } else if (strcmp(argv[i], "-b") == 0 || strcmp(argv[i], "--benchmark") == 0) { - options.benchmarking = true; + benchmarking = true; } else if (strcmp(argv[i], "sweep") == 0) { - options.sweeping = true; + sweeping = true; } else if (strcmp(argv[i], "scan") == 0) { - options.sweeping = false; + sweeping = false; } else { // add path @@ -97,22 +109,83 @@ int main(int argc, char* argv[]) { }; - broom::Broom broom(options); + broom::Broom broom; try { + // auto t0 = std::chrono::high_resolution_clock::now(); + /* + auto tracking_time = std::chrono::high_resolution_clock::now(); + + std::cout + << "[BENCHMARK] Tracking took " + << std::chrono::duration_cast(tracking_time - t0).count() + << " ms" << std::endl; + */ + + std::cout + << " _" << std::endl + << " //" << std::endl + << " // " << std::endl + << " // " << std::endl + << " // " << std::endl + << " /####/ " << std::endl + << " ////// " << std::endl + << " /////// " << std::endl << std::endl; + if (sweeping) { + std::cout << "[Sweeping]" << std::endl << std::endl; + } else { + std::cout << "[Scanning]" << std::endl << std::endl; + } + + // track files in a given directory std::vector tracked_entries = broom.track(tracked_path); - broom.find_empty_files(tracked_entries); + std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl; - // get contents for each entry first - //auto handle = std::async(std::launch::async, [&tracked_entries]() { - // for (entry::Entry& e : tracked_entries) { - // e.get_pieces(); - // } - //}); + // find empty files + uintmax_t empty_files = broom.find_empty_files(tracked_entries); + std::cout << "[INFO] Found " << empty_files << " empty files" << std::endl; - //broom.untrack_unique_contents(tracked_entries); - broom.find_duplicates(tracked_entries); + // if sweeping - remove empty files right away + if (sweeping) { + uintmax_t removed = broom.remove_empty_files(tracked_entries); + std::cout << "[INFO] Removed " << removed << " empty files" << std::endl; + } + // untrack unique sizes + uintmax_t untracked = broom.untrack_unique_sizes(tracked_entries); + std::cout << "[INFO] Untracked " << untracked << " files with a unique size" << std::endl; + + // get content pieces for each entry + tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool { + // ignore possible "permission denied"s + try { + entry.get_pieces(); + return false; + } catch(...) { + return true; + } + }), tracked_entries.end()); + + // untrack unique contents + untracked = broom.untrack_unique_contents(tracked_entries); + std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl; + + // mark entries as duplicates + for (entry::Entry& entry : tracked_entries) { + if (entry.group == group::EMPTY) { + // do not mess up grouping + continue; + } + entry.group = group::DUPLICATE; + } + + std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl; + + // now only files with a non-unique size and contents are being tracked + // are they REALLY duplicates ? + // leave the REAL cleanup for the user, saving these entries in a file broom.create_scan_results_list(tracked_entries); + std::cout << "[INFO] Created scan results file" << std::endl; + } catch(const std::exception& e) { std::cerr << "[ERROR] " << e.what() << std::endl;