Browse Source

Moved printing to main(); Remove empty files on sweeping

main
Unbewohnte 3 years ago
parent
commit
d349618dce
  1. 112
      src/broom.cpp
  2. 31
      src/broom.hpp
  3. 20
      src/group.hpp
  4. 107
      src/main.cpp

112
src/broom.cpp

@ -31,17 +31,12 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
namespace broom {
Broom::Broom(Options options) {
m_benchmarking = options.benchmarking;
};
Broom::Broom() {};
Broom::~Broom() {};
// recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist
std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
auto t0 = std::chrono::high_resolution_clock::now();
std::vector<entry::Entry> tracked_entries;
// check if given path even exists
@ -70,22 +65,10 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
tracked_entries.push_back(entry);
}
if (m_benchmarking) {
auto tracking_time = std::chrono::high_resolution_clock::now();
std::cout
<< "[BENCHMARK] Tracking took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl;
}
std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
return tracked_entries;
};
// removes entries with unique file sizes. Returns amount of files
// untracks entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries) {
// key: size, value: amount of occurences
@ -121,7 +104,7 @@ uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries
};
// removes entries with the same content-pieces. Returns amount of
// untracks entries with the same content-pieces. Returns amount of
// files that are no longer being tracked
uintmax_t Broom::untrack_unique_contents(std::vector<entry::Entry>& tracked_entries) {
// contents, occurences
@ -159,69 +142,6 @@ uintmax_t Broom::untrack_unique_contents(std::vector<entry::Entry>& tracked_entr
return untracked;
};
// finds all duplicates among tracked entries and marks them with appropriate group.
// Returns amount of duplicate files.
uintmax_t Broom::find_duplicates(std::vector<entry::Entry>& tracked_entries) {
auto t0 = std::chrono::high_resolution_clock::now();
// print how many files are being tracked
uintmax_t global_untracked = tracked_entries.size();
// untrack by size
uintmax_t untracked_by_size = untrack_unique_sizes(tracked_entries);
global_untracked += untracked_by_size;
std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl;
auto sizes_untrack_time = std::chrono::high_resolution_clock::now();
if (m_benchmarking) {
std::cout
<< "[BENCHMARK] Untracking by size took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(sizes_untrack_time - t0).count()
<< " ms" << std::endl;
}
// get pieces for each entry. If error occurs (permission denied) - untrack it
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
try {
entry.get_pieces();
return false;
} catch(...) {
return true;
}
}), tracked_entries.end());
// untrack by contents
uintmax_t untracked_by_contents = untrack_unique_contents(tracked_entries);
global_untracked += untracked_by_contents;
auto contents_untrack_time = std::chrono::high_resolution_clock::now();
if (m_benchmarking) {
std::cout
<< "[BENCHMARK] Untracking by contents took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(contents_untrack_time - sizes_untrack_time).count()
<< " ms" << std::endl;
}
std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl;
std::cout << "[INFO] Found " << tracked_entries.size() << " possible duplicate files" << std::endl;
// mark duplicate entries
for (entry::Entry& entry : tracked_entries) {
if (entry.group == group::EMPTY) {
// do not mess up grouping
continue;
}
entry.group = group::DUPLICATE;
}
return tracked_entries.size();
};
// creates a list of duplicate, empty files into a file
void Broom::create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir, const std::string filename) {
if (!std::filesystem::exists(dir)) {
@ -248,8 +168,6 @@ void Broom::create_scan_results_list(const std::vector<entry::Entry> tracked_ent
}
outfile.close();
std::cout << "[INFO] Created scan results file" << std::endl;
};
// finds empty files among tracked entries and gives them appropriate group
@ -264,9 +182,29 @@ uintmax_t Broom::find_empty_files(std::vector<entry::Entry>& tracked_entries) {
}
}
std::cout << "[INFO] Found " << found_empty_files << " empty files" << std::endl;
return found_empty_files;
};
// REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files
uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries) {
uintmax_t removed = 0;
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool {
if (entry.group == group::EMPTY) {
try {
entry.remove();
removed++;
return true;
} catch(...) {
return true;
}
}
return false;
}), tracked_entries.end());
return removed;
};
}

31
src/broom.hpp

@ -24,42 +24,31 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <vector>
namespace broom {
// Broom`s settings
struct Options {
bool sweeping;
bool benchmarking;
};
// A class to find and manage duplicate files
// A class to find and manage duplicate, empty files
class Broom {
protected:
// enable/disable benchmarking output
bool m_benchmarking;
public:
Broom(Options options);
Broom();
~Broom();
// recursively tracks every file that lies in given path. Throws an invalid_argument
// error in case path does not exist. Returns collected entries
std::vector<entry::Entry> track(const std::filesystem::path path);
// finds empty files among tracked entries and marks them with the appropriate group.
// Returns amount of found empty files
uintmax_t find_empty_files(std::vector<entry::Entry>& tracked_entries);
// removes entries with unique file sizes. Returns amount of files
// untracks entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries);
// removes entries with the same content-pieces. Returns amount of
// untracks entries with the same content-pieces. Returns amount of
// files that are no longer being tracked.
uintmax_t untrack_unique_contents(std::vector<entry::Entry>& tracked_entries);
// finds all duplicates among tracked entries and marks them with appropriate group
// Returns amount of duplicate files
uintmax_t find_duplicates(std::vector<entry::Entry>& tracked_entries);
// finds empty files among tracked entries and marks them with the appropriate group.
// Returns amount of found empty files
uintmax_t find_empty_files(std::vector<entry::Entry>& tracked_entries);
// REMOVES grouped empty files and untracks them after deletion. Returns the amount of removed empty files
uintmax_t remove_empty_files(std::vector<entry::Entry>& tracked_entries);
// creates a list of duplicate, empty files into a file
void create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");

20
src/group.hpp

@ -1,3 +1,23 @@
/*
Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
This file is part of broom.
broom is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
broom is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with broom. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef GROUP_HPP
#define GROUP_HPP

107
src/main.cpp

@ -22,12 +22,13 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <string.h>
#include <vector>
#include <future>
#include <algorithm>
#include "entry.hpp"
#include "broom.hpp"
// Broom version number
#define VERSION "v0.1.1"
#define VERSION "v0.1.2"
void print_help() {
std::cout
@ -37,8 +38,8 @@ void print_help() {
<< "-h | --help -> print this message and exit" << std::endl << std::endl
<< "[COMMANDS]" << std::endl
<< "sweep -> scan for duplicate files and delete (sweep) all of them but the last one" << std::endl
<< "scan -> scan for duplicate files and output information in a file" << std::endl
<< "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl
<< "scan -> scan and save results in a file without touching any files [DEFAULT]" << std::endl
<< std::endl;
};
@ -47,6 +48,15 @@ void print_version() {
<< "broom " << VERSION << std::endl
<< "incurable hoarder`s helpful friend" << std::endl << std::endl
<< " _" << std::endl
<< " //" << std::endl
<< " // " << std::endl
<< " // " << std::endl
<< " // " << std::endl
<< " /####/ " << std::endl
<< " ////// " << std::endl
<< " /////// " << std::endl << std::endl
<< "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl
<< "This program comes with ABSOLUTELY NO WARRANTY." << std::endl
<< "This is free software, and you are welcome to redistribute it" << std::endl
@ -55,7 +65,9 @@ void print_version() {
};
int main(int argc, char* argv[]) {
broom::Options options;
bool benchmarking = false;
bool sweeping = false;
std::filesystem::path tracked_path;
if (argc < 2) {
@ -76,13 +88,13 @@ int main(int argc, char* argv[]) {
return 0;
}
else if (strcmp(argv[i], "-b") == 0 || strcmp(argv[i], "--benchmark") == 0) {
options.benchmarking = true;
benchmarking = true;
}
else if (strcmp(argv[i], "sweep") == 0) {
options.sweeping = true;
sweeping = true;
}
else if (strcmp(argv[i], "scan") == 0) {
options.sweeping = false;
sweeping = false;
}
else {
// add path
@ -97,22 +109,83 @@ int main(int argc, char* argv[]) {
};
broom::Broom broom(options);
broom::Broom broom;
try {
// auto t0 = std::chrono::high_resolution_clock::now();
/*
auto tracking_time = std::chrono::high_resolution_clock::now();
std::cout
<< "[BENCHMARK] Tracking took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl;
*/
std::cout
<< " _" << std::endl
<< " //" << std::endl
<< " // " << std::endl
<< " // " << std::endl
<< " // " << std::endl
<< " /####/ " << std::endl
<< " ////// " << std::endl
<< " /////// " << std::endl << std::endl;
if (sweeping) {
std::cout << "[Sweeping]" << std::endl << std::endl;
} else {
std::cout << "[Scanning]" << std::endl << std::endl;
}
// track files in a given directory
std::vector<entry::Entry> tracked_entries = broom.track(tracked_path);
broom.find_empty_files(tracked_entries);
std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
// get contents for each entry first
//auto handle = std::async(std::launch::async, [&tracked_entries]() {
// for (entry::Entry& e : tracked_entries) {
// e.get_pieces();
// }
//});
// find empty files
uintmax_t empty_files = broom.find_empty_files(tracked_entries);
std::cout << "[INFO] Found " << empty_files << " empty files" << std::endl;
//broom.untrack_unique_contents(tracked_entries);
broom.find_duplicates(tracked_entries);
// if sweeping - remove empty files right away
if (sweeping) {
uintmax_t removed = broom.remove_empty_files(tracked_entries);
std::cout << "[INFO] Removed " << removed << " empty files" << std::endl;
}
// untrack unique sizes
uintmax_t untracked = broom.untrack_unique_sizes(tracked_entries);
std::cout << "[INFO] Untracked " << untracked << " files with a unique size" << std::endl;
// get content pieces for each entry
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
// ignore possible "permission denied"s
try {
entry.get_pieces();
return false;
} catch(...) {
return true;
}
}), tracked_entries.end());
// untrack unique contents
untracked = broom.untrack_unique_contents(tracked_entries);
std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl;
// mark entries as duplicates
for (entry::Entry& entry : tracked_entries) {
if (entry.group == group::EMPTY) {
// do not mess up grouping
continue;
}
entry.group = group::DUPLICATE;
}
std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
// now only files with a non-unique size and contents are being tracked
// are they REALLY duplicates ?
// leave the REAL cleanup for the user, saving these entries in a file
broom.create_scan_results_list(tracked_entries);
std::cout << "[INFO] Created scan results file" << std::endl;
} catch(const std::exception& e) {
std::cerr
<< "[ERROR] " << e.what() << std::endl;

Loading…
Cancel
Save