Compare commits

..

No commits in common. 'main' and 'v0.2.2' have entirely different histories.
main ... v0.2.2

  1. 16
      README.md
  2. 2
      build/CMakeLists.txt
  3. 58
      src/broom.cpp
  4. 7
      src/broom.hpp
  5. 2
      src/entry.cpp
  6. 12
      src/entry.hpp
  7. 33
      src/group.hpp
  8. 145
      src/main.cpp

16
README.md

@ -22,7 +22,7 @@
- clone this repository - clone this repository
`git clone http://unbewohnte.xyz:3000/Unbewohnte/broom` `git clone https://github.com/Unbewohnte/broom`
- proceed to the directory - proceed to the directory
@ -40,7 +40,7 @@ if you're on GNU/Linux - you can run `install.sh` for broom to become system-wid
### "The lazy" way ### "The lazy" way
- proceed [to the releases page](http://unbewohnte.xyz:3000/Unbewohnte/broom/releases) and get yourself a pre-compiled binary - proceed [to the releases page](https://github.com/Unbewohnte/broom/releases) and get yourself a pre-compiled binary
--- ---
@ -53,11 +53,10 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
- `-v` or `--version` -> print version information and exit - `-v` or `--version` -> print version information and exit
- `-h` or `--help` -> print this message and exit - `-h` or `--help` -> print this message and exit
- `-od` or `--output-directory` -> path to the directory to save results file in
[COMMANDS] [COMMANDS]
- `sweep` -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks - `sweep` -> scan for duplicate files, save results in a file and REMOVE empty files
- `scan` -> scan and save results in a file without removing anything [DEFAULT] - `scan` -> scan and save results in a file without removing anything [DEFAULT]
@ -65,10 +64,10 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
### Examples ### Examples
- `broom scan -od . ~/homework` - `broom scan ~/homework`
- `broom sweep ~/homework` - `broom sweep ~/homework/I/have/a/lot/of/empty/files/here/for/some/reason`
after the scan the results file will be saved in your current working directory, unless you specified it to be somewhere else. Scan results file contains after the scan the results file will be saved in your current working directory, scan results file contains
a list of duplicate files that are grouped together so you can see EXACTLY WHERE each duplicate is in the filesystem. a list of duplicate files that are grouped together so you can see EXACTLY WHERE each duplicate is in the filesystem.
--- ---
@ -80,5 +79,4 @@ GPLv3
## TODO ## TODO
- Make it go `P` A `R` A `L` L `E` L - Make it go `P` A `R` A `L` L `E` L
- ~~Output approximate size that could be freed~~ - Output approximate size that could be freed
- ~~Remove duplicates and create symlinks~~

2
build/CMakeLists.txt

@ -22,5 +22,5 @@ endif()
set(EXECUTABLE_OUTPUT_PATH ../bin) set(EXECUTABLE_OUTPUT_PATH ../bin)
add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp) add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp)
target_link_libraries(broom Threads::Threads) target_link_libraries(broom Threads::Threads)

58
src/broom.cpp

@ -28,6 +28,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include "entry.hpp" #include "entry.hpp"
#include "broom.hpp" #include "broom.hpp"
#include "group.hpp"
namespace broom { namespace broom {
@ -51,7 +52,7 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
); );
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) { for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
if (!dir_entry.is_regular_file() || std::filesystem::is_symlink(dir_entry.path())) { if (!dir_entry.is_regular_file()) {
// skip everything that we cannot process so easily // skip everything that we cannot process so easily
continue; continue;
}; };
@ -59,7 +60,7 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
entry::Entry entry(dir_entry.path()); entry::Entry entry(dir_entry.path());
tracked_entries.push_back(entry); tracked_entries.push_back(entry);
} }
} else if (std::filesystem::is_regular_file(path) && !std::filesystem::is_symlink(path)) { } else if (std::filesystem::is_regular_file(path)) {
// just a file // just a file
entry::Entry entry(path); entry::Entry entry(path);
tracked_entries.push_back(entry); tracked_entries.push_back(entry);
@ -185,7 +186,7 @@ uintmax_t Broom::find_empty_files(std::vector<entry::Entry>& tracked_entries) {
for (entry::Entry& entry : tracked_entries) { for (entry::Entry& entry : tracked_entries) {
if (entry.filesize == 0) { if (entry.filesize == 0) {
// empty files can`t be considered as duplicates. assign a group // empty files can`t be considered as duplicates. assign a group
entry.group = entry::EMPTY; entry.group = group::EMPTY;
found_empty_files++; found_empty_files++;
} }
} }
@ -198,7 +199,7 @@ uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries)
uintmax_t removed = 0; uintmax_t removed = 0;
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool { tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool {
if (entry.group == entry::EMPTY) { if (entry.group == group::EMPTY) {
try { try {
entry.remove(); entry.remove();
removed++; removed++;
@ -215,30 +216,14 @@ uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries)
return removed; return removed;
}; };
// Untracks specified group in tracked entries. Returns an amount of entries untracked
uintmax_t Broom::untrack_group(std::vector<entry::Entry>& tracked_entries, entry::Group group) {
uintmax_t untracked = 0;
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&untracked, &group](entry::Entry& entry) -> bool {
if (entry.group == group) {
untracked++;
return true;
} else {
return false;
}
}), tracked_entries.end());
return untracked;
}
// marks every entry without any group as a duplicate // marks every entry without any group as a duplicate
void Broom::mark_as_duplicates(std::vector<entry::Entry>& tracked_entries) { void Broom::mark_as_duplicates(std::vector<entry::Entry>& tracked_entries) {
for (entry::Entry& entry : tracked_entries) { for (entry::Entry& entry : tracked_entries) {
if (entry.group == entry::EMPTY) { if (entry.group == group::EMPTY) {
// do not mess up grouping // do not mess up grouping
continue; continue;
} }
entry.group = entry::DUPLICATE; entry.group = group::DUPLICATE;
} }
}; };
@ -267,33 +252,4 @@ std::map<std::string, std::vector<entry::Entry>> Broom::group_duplicates(std::ve
return duplicate_groups; return duplicate_groups;
}; };
// REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
// first remaining real file
void Broom::remove_duplicates_make_symlinks(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates) {
for (const auto& record : grouped_duplicates) {
unsigned int i = 0;
std::filesystem::path original_file_path;
for (const auto& duplicate_entry : record.second) {
if (i == 0) {
// the first duplicate in the group. Save it
original_file_path = duplicate_entry.path;
} else {
// not the first entry; REMOVE it and create a symlink,
// pointing to the real file
std::filesystem::path removed_duplicate_path = duplicate_entry.path;
try {
// remove the entry
duplicate_entry.remove();
// make a symlink
std::filesystem::create_symlink(original_file_path, removed_duplicate_path);
} catch(...) {}
}
// serves only the first iteration. It doesn`t matter if it is not incremented after that
i++;
}
}
};
} }

7
src/broom.hpp

@ -47,9 +47,6 @@ public:
// files that are no longer being tracked. // files that are no longer being tracked.
uintmax_t untrack_unique_contents(std::vector<entry::Entry>& tracked_entries); uintmax_t untrack_unique_contents(std::vector<entry::Entry>& tracked_entries);
// Untracks specified group in tracked entries. Returns an amount of entries untracked
uintmax_t untrack_group(std::vector<entry::Entry>& tracked_entries, entry::Group group);
// finds empty files among tracked entries and marks them with the appropriate group. // finds empty files among tracked entries and marks them with the appropriate group.
// Returns amount of found empty files // Returns amount of found empty files
uintmax_t find_empty_files(std::vector<entry::Entry>& tracked_entries); uintmax_t find_empty_files(std::vector<entry::Entry>& tracked_entries);
@ -64,10 +61,6 @@ public:
// string of pieces. REMOVES EVERYTHING FROM GIVEN TRACKED ENTRIES // string of pieces. REMOVES EVERYTHING FROM GIVEN TRACKED ENTRIES
std::map<std::string, std::vector<entry::Entry>> group_duplicates(std::vector<entry::Entry>& tracked_entries); std::map<std::string, std::vector<entry::Entry>> group_duplicates(std::vector<entry::Entry>& tracked_entries);
// REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
// first remaining real file
void remove_duplicates_make_symlinks(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates);
// creates a list of duplicate, empty files and puts it into a file // creates a list of duplicate, empty files and puts it into a file
void create_scan_results_list(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); void create_scan_results_list(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
}; };

2
src/entry.cpp

@ -95,7 +95,7 @@ void Entry::get_pieces() {
}; };
// Remove entry from the disk // Remove entry from the disk
void Entry::remove() const { void Entry::remove() {
std::filesystem::remove(path); std::filesystem::remove(path);
}; };

12
src/entry.hpp

@ -26,14 +26,10 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <iomanip> #include <iomanip>
#include <string> #include <string>
#include "group.hpp"
namespace entry {
enum Group {
DUPLICATE,
EMPTY,
};
namespace entry {
// 3 pieces (beginning, middle and end of the file) // 3 pieces (beginning, middle and end of the file)
const uint8_t PIECE_SIZE = 75; const uint8_t PIECE_SIZE = 75;
const uint8_t PIECES_AMOUNT = 3; const uint8_t PIECES_AMOUNT = 3;
@ -44,7 +40,7 @@ public:
std::filesystem::path path; // set via constructor std::filesystem::path path; // set via constructor
uintmax_t filesize; // set via constructor uintmax_t filesize; // set via constructor
std::string pieces; // 3 hex-represented pieces of file; set only via a method call to not stress the disk std::string pieces; // 3 hex-represented pieces of file; set only via a method call to not stress the disk
Group group; // set externally group::Group group; // set externally
Entry(const std::filesystem::path entry_path); Entry(const std::filesystem::path entry_path);
~Entry(); ~Entry();
@ -55,7 +51,7 @@ public:
void get_pieces(); void get_pieces();
// REMOVE entry from the disk // REMOVE entry from the disk
void remove() const; void remove();
}; };
} }

33
src/group.hpp

@ -0,0 +1,33 @@
/*
Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
This file is part of broom.
broom is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
broom is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with broom. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef GROUP_HPP
#define GROUP_HPP
namespace group {
enum Group {
DUPLICATE,
EMPTY,
};
}
#endif

145
src/main.cpp

@ -1,5 +1,5 @@
/* /*
Copyright (C) 2021 Kasyanov Nikolay Alexeyevich (Unbewohnte (me@unbewohnte.xyz)) Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
This file is part of broom. This file is part of broom.
@ -28,51 +28,50 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include "broom.hpp" #include "broom.hpp"
// Broom version number // Broom version number
#define VERSION "v0.3.1" #define VERSION "v0.2.2"
void print_help() { void print_help() {
std::cout std::cout
<< "broom [FLAGS..] [COMMAND] [DIRECTORY]\n\n" << "broom [FLAGS..] [COMMAND] [DIRECTORY]" << std::endl << std::endl
<< "[FLAGS]\n" << "[FLAGS]" << std::endl
<< "-v | --version -> print version information and exit\n" << "-v | --version -> print version information and exit" << std::endl
<< "-h | --help -> print this message and exit\n" << "-h | --help -> print this message and exit" << std::endl << std::endl
<< "-ie | --ignore-empty -> do not remove empty files when sweeping\n"
<< "-od | --output-directory -> path to the directory to save results file in when scanning\n\n" << "[COMMANDS]" << std::endl
<< "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl
<< "[COMMANDS]\n" << "scan -> scan and save results in a file without removing anything [DEFAULT]" << std::endl << std::endl
<< "sweep -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks\n"
<< "scan -> scan and save results in a file without removing anything [DEFAULT]\n\n" << "[DIRECTORY]" << std::endl
<< "path to the directory to be scanned" << std::endl
<< "[DIRECTORY]\n" << std::endl;
<< "path to the directory to be scanned\n\n";
}; };
void print_version() { void print_version() {
std::cout std::cout
<< "broom " << VERSION << "\n" << "broom " << VERSION << std::endl
<< "incurable hoarder`s helpful friend\n\n" << "incurable hoarder`s helpful friend" << std::endl << std::endl
<< " _\n" << " _" << std::endl
<< " //\n" << " //" << std::endl
<< " // \n" << " // " << std::endl
<< " // \n" << " // " << std::endl
<< " // \n" << " // " << std::endl
<< " /####/ \n" << " /####/ " << std::endl
<< " ////// \n" << " ////// " << std::endl
<< " /////// \n\n" << " /////// " << std::endl << std::endl
<< "Copyright (C) 2021 Kasyanov Nikolay Alexeyevich (Unbewohnte (me@unbewohnte.xyz))\n" << "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl
<< "This program comes with ABSOLUTELY NO WARRANTY.\n" << "This program comes with ABSOLUTELY NO WARRANTY." << std::endl
<< "This is free software, and you are welcome to redistribute it\n" << "This is free software, and you are welcome to redistribute it" << std::endl
<< "under certain conditions\n"; << "under certain conditions" << std::endl
<< std::endl;
}; };
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
std::filesystem::path results_file_dir_path = ".";
std::filesystem::path tracked_path;
bool sweeping = false; bool sweeping = false;
bool ignore_empty = false;
std::filesystem::path tracked_path;
if (argc < 2) { if (argc < 2) {
print_help(); print_help();
@ -91,13 +90,6 @@ int main(int argc, char* argv[]) {
print_version(); print_version();
return 0; return 0;
} }
else if (strcmp(argv[i], "-od") == 0 || strcmp(argv[i], "--output-directory") == 0) {
i++;
results_file_dir_path = std::filesystem::path(argv[i]);
}
else if (strcmp(argv[i], "-ie") == 0 || strcmp(argv[i], "--ignore-empty") == 0) {
ignore_empty = true;
}
else if (strcmp(argv[i], "sweep") == 0) { else if (strcmp(argv[i], "sweep") == 0) {
sweeping = true; sweeping = true;
} }
@ -120,41 +112,37 @@ int main(int argc, char* argv[]) {
broom::Broom broom; broom::Broom broom;
try { try {
std::cout std::cout
<< " _\n" << " _" << std::endl
<< " //\n" << " //" << std::endl
<< " // \n" << " // " << std::endl
<< " // \n" << " // " << std::endl
<< " // \n" << " // " << std::endl
<< " /####/ \n" << " /####/ " << std::endl
<< " ////// \n" << " ////// " << std::endl
<< " /////// \n\n"; << " /////// " << std::endl << std::endl;
if (sweeping) { if (sweeping) {
std::cout << "[Sweeping]\n\n"; std::cout << "[Sweeping]" << std::endl << std::endl;
} else { } else {
std::cout << "[Scanning]\n\n"; std::cout << "[Scanning]" << std::endl << std::endl;
} }
// track files in a given directory // track files in a given directory
std::vector<entry::Entry> tracked_entries = broom.track(tracked_path); std::vector<entry::Entry> tracked_entries = broom.track(tracked_path);
std::cout << "[INFO] Tracking " << tracked_entries.size() << " files\n"; std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
// find empty files // find empty files
uintmax_t empty_files = broom.find_empty_files(tracked_entries); uintmax_t empty_files = broom.find_empty_files(tracked_entries);
std::cout << "[INFO] Found " << empty_files << " empty files\n"; std::cout << "[INFO] Found " << empty_files << " empty files" << std::endl;
// if sweeping - remove empty files right away // if sweeping - remove empty files right away
if (sweeping && !ignore_empty) { if (sweeping) {
uintmax_t removed = broom.remove_empty_files(tracked_entries); uintmax_t removed = broom.remove_empty_files(tracked_entries);
std::cout << "[INFO] Removed " << removed << " empty files\n"; std::cout << "[INFO] Removed " << removed << " empty files" << std::endl;
} else {
// just untrack them, do not remove
uintmax_t untracked_empty = broom.untrack_group(tracked_entries, entry::Group::EMPTY);
std::cout << "[INFO] Skipped " << untracked_empty << " empty files\n";
} }
// untrack unique sizes // untrack unique sizes
uintmax_t untracked = broom.untrack_unique_sizes(tracked_entries); uintmax_t untracked = broom.untrack_unique_sizes(tracked_entries);
std::cout << "[INFO] Untracked " << untracked << " files with a unique size\n"; std::cout << "[INFO] Untracked " << untracked << " files with a unique size" << std::endl;
// get content pieces for each entry // get content pieces for each entry
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool { tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
@ -170,45 +158,24 @@ int main(int argc, char* argv[]) {
// untrack unique contents // untrack unique contents
untracked = broom.untrack_unique_contents(tracked_entries); untracked = broom.untrack_unique_contents(tracked_entries);
std::cout << "[INFO] Untracked " << untracked << " files with unique contents\n"; std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl;
// mark entries as duplicates // mark entries as duplicates
broom.mark_as_duplicates(tracked_entries); broom.mark_as_duplicates(tracked_entries);
std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked\n"; std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
if (tracked_entries.size() == 0) {
// No duplicates at all !
std::cout << "[INFO] Nothing I can help with ! Congratulations !\n";
return 0;
}
// make duplicate groups from all this mess that tracked_entries right now are
auto grouped_duplicates = broom.group_duplicates(tracked_entries); auto grouped_duplicates = broom.group_duplicates(tracked_entries);
double could_be_freed = 0; // now only files with a non-unique size and contents are being tracked
for (auto& record : grouped_duplicates) { // are they REALLY duplicates ?
could_be_freed += record.second[0].filesize * (record.second.size() - 1); // better to leave the REALL cleanup for the user, saving these entries in a file, than doing a blind and possibly destructive purge
} broom.create_scan_results_list(grouped_duplicates);
std::cout << "[INFO] Created scan results file" << std::endl;
if (!sweeping) {
// output a little information about how much space could be freed if every duplicate
// in the group will be deleted but one
std::cout <<"[INFO] " << could_be_freed / 1024 / 1024 << " MB could be freed\n";
broom.create_scan_results_list(grouped_duplicates, results_file_dir_path);
std::cout << "[INFO] Created scan results file\n";
} else {
// remove duplicates and create symlinks
std::cout << "[INFO] Removing duplicates and creating symlinks...\n";
broom.remove_duplicates_make_symlinks(grouped_duplicates);
std::cout <<"[INFO] Freed approximately " << could_be_freed / 1024 / 1024 << " MB (May be incorrect)\n";
}
} catch(const std::exception& e) { } catch(const std::exception& e) {
std::cerr std::cerr
<< "[ERROR] " << e.what() <<"\n"; << "[ERROR] " << e.what() << std::endl;
return 1; return 1;
}; };

Loading…
Cancel
Save