diff --git a/src/broom.cpp b/src/broom.cpp
index 3adfc37..e23cf4c 100755
--- a/src/broom.cpp
+++ b/src/broom.cpp
@@ -31,17 +31,12 @@ along with broom. If not, see .
namespace broom {
-Broom::Broom(Options options) {
- m_benchmarking = options.benchmarking;
-};
-
+Broom::Broom() {};
Broom::~Broom() {};
// recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist
std::vector Broom::track(const std::filesystem::path path) {
- auto t0 = std::chrono::high_resolution_clock::now();
-
std::vector tracked_entries;
// check if given path even exists
@@ -70,22 +65,10 @@ std::vector Broom::track(const std::filesystem::path path) {
tracked_entries.push_back(entry);
}
-
- if (m_benchmarking) {
- auto tracking_time = std::chrono::high_resolution_clock::now();
-
- std::cout
- << "[BENCHMARK] Tracking took "
- << std::chrono::duration_cast(tracking_time - t0).count()
- << " ms" << std::endl;
- }
-
- std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
-
return tracked_entries;
};
-// removes entries with unique file sizes. Returns amount of files
+// untracks entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries) {
// key: size, value: amount of occurences
@@ -121,7 +104,7 @@ uintmax_t Broom::untrack_unique_sizes(std::vector& tracked_entries
};
-// removes entries with the same content-pieces. Returns amount of
+// untracks entries with the same content-pieces. Returns amount of
// files that are no longer being tracked
uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entries) {
// contents, occurences
@@ -159,69 +142,6 @@ uintmax_t Broom::untrack_unique_contents(std::vector& tracked_entr
return untracked;
};
-// finds all duplicates among tracked entries and marks them with appropriate group.
-// Returns amount of duplicate files.
-uintmax_t Broom::find_duplicates(std::vector& tracked_entries) {
- auto t0 = std::chrono::high_resolution_clock::now();
-
- // print how many files are being tracked
- uintmax_t global_untracked = tracked_entries.size();
-
- // untrack by size
- uintmax_t untracked_by_size = untrack_unique_sizes(tracked_entries);
- global_untracked += untracked_by_size;
- std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl;
-
- auto sizes_untrack_time = std::chrono::high_resolution_clock::now();
-
- if (m_benchmarking) {
- std::cout
- << "[BENCHMARK] Untracking by size took "
- << std::chrono::duration_cast(sizes_untrack_time - t0).count()
- << " ms" << std::endl;
- }
-
-
- // get pieces for each entry. If error occurs (permission denied) - untrack it
- tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
- try {
- entry.get_pieces();
- return false;
- } catch(...) {
- return true;
- }
- }), tracked_entries.end());
-
- // untrack by contents
- uintmax_t untracked_by_contents = untrack_unique_contents(tracked_entries);
- global_untracked += untracked_by_contents;
-
- auto contents_untrack_time = std::chrono::high_resolution_clock::now();
-
- if (m_benchmarking) {
- std::cout
- << "[BENCHMARK] Untracking by contents took "
- << std::chrono::duration_cast(contents_untrack_time - sizes_untrack_time).count()
- << " ms" << std::endl;
- }
-
- std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl;
-
- std::cout << "[INFO] Found " << tracked_entries.size() << " possible duplicate files" << std::endl;
-
- // mark duplicate entries
-
- for (entry::Entry& entry : tracked_entries) {
- if (entry.group == group::EMPTY) {
- // do not mess up grouping
- continue;
- }
- entry.group = group::DUPLICATE;
- }
-
- return tracked_entries.size();
-};
-
// creates a list of duplicate, empty files into a file
void Broom::create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir, const std::string filename) {
if (!std::filesystem::exists(dir)) {
@@ -248,8 +168,6 @@ void Broom::create_scan_results_list(const std::vector tracked_ent
}
outfile.close();
-
- std::cout << "[INFO] Created scan results file" << std::endl;
};
// finds empty files among tracked entries and gives them appropriate group
@@ -264,9 +182,29 @@ uintmax_t Broom::find_empty_files(std::vector& tracked_entries) {
}
}
- std::cout << "[INFO] Found " << found_empty_files << " empty files" << std::endl;
-
return found_empty_files;
};
+// REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files
+uintmax_t Broom::remove_empty_files(std::vector& tracked_entries) {
+ uintmax_t removed = 0;
+
+ tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool {
+ if (entry.group == group::EMPTY) {
+ try {
+ entry.remove();
+ removed++;
+ return true;
+ } catch(...) {
+ return true;
+ }
+
+ }
+ return false;
+
+ }), tracked_entries.end());
+
+ return removed;
+};
+
}
diff --git a/src/broom.hpp b/src/broom.hpp
index a0c86e5..c7e8df6 100755
--- a/src/broom.hpp
+++ b/src/broom.hpp
@@ -24,42 +24,31 @@ along with broom. If not, see .
#include
namespace broom {
-// Broom`s settings
-struct Options {
- bool sweeping;
- bool benchmarking;
-};
-
-// A class to find and manage duplicate files
+// A class to find and manage duplicate, empty files
class Broom {
-protected:
- // enable/disable benchmarking output
- bool m_benchmarking;
-
public:
- Broom(Options options);
+ Broom();
~Broom();
// recursively tracks every file that lies in given path. Throws an invalid_argument
// error in case path does not exist. Returns collected entries
std::vector track(const std::filesystem::path path);
- // finds empty files among tracked entries and marks them with the appropriate group.
- // Returns amount of found empty files
- uintmax_t find_empty_files(std::vector& tracked_entries);
-
- // removes entries with unique file sizes. Returns amount of files
+ // untracks entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t untrack_unique_sizes(std::vector& tracked_entries);
- // removes entries with the same content-pieces. Returns amount of
+ // untracks entries with the same content-pieces. Returns amount of
// files that are no longer being tracked.
uintmax_t untrack_unique_contents(std::vector& tracked_entries);
- // finds all duplicates among tracked entries and marks them with appropriate group
- // Returns amount of duplicate files
- uintmax_t find_duplicates(std::vector& tracked_entries);
+ // finds empty files among tracked entries and marks them with the appropriate group.
+ // Returns amount of found empty files
+ uintmax_t find_empty_files(std::vector& tracked_entries);
+
+ // REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files
+ uintmax_t remove_empty_files(std::vector& tracked_entries);
// creates a list of duplicate, empty files into a file
void create_scan_results_list(const std::vector tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
diff --git a/src/group.hpp b/src/group.hpp
index f9a12ee..956169d 100755
--- a/src/group.hpp
+++ b/src/group.hpp
@@ -1,3 +1,23 @@
+/*
+Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
+
+This file is part of broom.
+
+broom is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+broom is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with broom. If not, see .
+*/
+
+
#ifndef GROUP_HPP
#define GROUP_HPP
diff --git a/src/main.cpp b/src/main.cpp
index a5332cd..ed6a984 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,12 +22,13 @@ along with broom. If not, see .
#include
#include
#include
+#include
#include "entry.hpp"
#include "broom.hpp"
// Broom version number
-#define VERSION "v0.1.1"
+#define VERSION "v0.1.2"
void print_help() {
std::cout
@@ -37,8 +38,8 @@ void print_help() {
<< "-h | --help -> print this message and exit" << std::endl << std::endl
<< "[COMMANDS]" << std::endl
- << "sweep -> scan for duplicate files and delete (sweep) all of them but the last one" << std::endl
- << "scan -> scan for duplicate files and output information in a file" << std::endl
+ << "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl
+ << "scan -> scan and save results in a file without touching any files [DEFAULT]" << std::endl
<< std::endl;
};
@@ -47,6 +48,15 @@ void print_version() {
<< "broom " << VERSION << std::endl
<< "incurable hoarder`s helpful friend" << std::endl << std::endl
+ << " _" << std::endl
+ << " //" << std::endl
+ << " // " << std::endl
+ << " // " << std::endl
+ << " // " << std::endl
+ << " /####/ " << std::endl
+ << " ////// " << std::endl
+ << " /////// " << std::endl << std::endl
+
<< "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl
<< "This program comes with ABSOLUTELY NO WARRANTY." << std::endl
<< "This is free software, and you are welcome to redistribute it" << std::endl
@@ -55,7 +65,9 @@ void print_version() {
};
int main(int argc, char* argv[]) {
- broom::Options options;
+ bool benchmarking = false;
+ bool sweeping = false;
+
std::filesystem::path tracked_path;
if (argc < 2) {
@@ -76,13 +88,13 @@ int main(int argc, char* argv[]) {
return 0;
}
else if (strcmp(argv[i], "-b") == 0 || strcmp(argv[i], "--benchmark") == 0) {
- options.benchmarking = true;
+ benchmarking = true;
}
else if (strcmp(argv[i], "sweep") == 0) {
- options.sweeping = true;
+ sweeping = true;
}
else if (strcmp(argv[i], "scan") == 0) {
- options.sweeping = false;
+ sweeping = false;
}
else {
// add path
@@ -97,22 +109,83 @@ int main(int argc, char* argv[]) {
};
- broom::Broom broom(options);
+ broom::Broom broom;
try {
+ // auto t0 = std::chrono::high_resolution_clock::now();
+ /*
+ auto tracking_time = std::chrono::high_resolution_clock::now();
+
+ std::cout
+ << "[BENCHMARK] Tracking took "
+ << std::chrono::duration_cast(tracking_time - t0).count()
+ << " ms" << std::endl;
+ */
+
+ std::cout
+ << " _" << std::endl
+ << " //" << std::endl
+ << " // " << std::endl
+ << " // " << std::endl
+ << " // " << std::endl
+ << " /####/ " << std::endl
+ << " ////// " << std::endl
+ << " /////// " << std::endl << std::endl;
+ if (sweeping) {
+ std::cout << "[Sweeping]" << std::endl << std::endl;
+ } else {
+ std::cout << "[Scanning]" << std::endl << std::endl;
+ }
+
+ // track files in a given directory
std::vector tracked_entries = broom.track(tracked_path);
- broom.find_empty_files(tracked_entries);
+ std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
- // get contents for each entry first
- //auto handle = std::async(std::launch::async, [&tracked_entries]() {
- // for (entry::Entry& e : tracked_entries) {
- // e.get_pieces();
- // }
- //});
+ // find empty files
+ uintmax_t empty_files = broom.find_empty_files(tracked_entries);
+ std::cout << "[INFO] Found " << empty_files << " empty files" << std::endl;
- //broom.untrack_unique_contents(tracked_entries);
- broom.find_duplicates(tracked_entries);
+ // if sweeping - remove empty files right away
+ if (sweeping) {
+ uintmax_t removed = broom.remove_empty_files(tracked_entries);
+ std::cout << "[INFO] Removed " << removed << " empty files" << std::endl;
+ }
+ // untrack unique sizes
+ uintmax_t untracked = broom.untrack_unique_sizes(tracked_entries);
+ std::cout << "[INFO] Untracked " << untracked << " files with a unique size" << std::endl;
+
+ // get content pieces for each entry
+ tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
+ // ignore possible "permission denied"s
+ try {
+ entry.get_pieces();
+ return false;
+ } catch(...) {
+ return true;
+ }
+ }), tracked_entries.end());
+
+ // untrack unique contents
+ untracked = broom.untrack_unique_contents(tracked_entries);
+ std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl;
+
+ // mark entries as duplicates
+ for (entry::Entry& entry : tracked_entries) {
+ if (entry.group == group::EMPTY) {
+ // do not mess up grouping
+ continue;
+ }
+ entry.group = group::DUPLICATE;
+ }
+
+ std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
+
+ // now only files with a non-unique size and contents are being tracked
+ // are they REALLY duplicates ?
+ // leave the REAL cleanup for the user, saving these entries in a file
broom.create_scan_results_list(tracked_entries);
+ std::cout << "[INFO] Created scan results file" << std::endl;
+
} catch(const std::exception& e) {
std::cerr
<< "[ERROR] " << e.what() << std::endl;