Browse Source

broom now can remove duplicates and create symlinks !; Updated README; Moved entry groups enum in the entry.hpp file; Broom seems to has become quite a useful tool !

main v0.3.0
Unbewohnte 2 years ago
parent
commit
302ef7238f
  1. 9
      README.md
  2. 2
      build/CMakeLists.txt
  3. 42
      src/broom.cpp
  4. 4
      src/broom.hpp
  5. 2
      src/entry.cpp
  6. 12
      src/entry.hpp
  7. 33
      src/group.hpp
  8. 35
      src/main.cpp

9
README.md

@ -57,7 +57,7 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
[COMMANDS]
- `sweep` -> scan for duplicate files, save results in a file and REMOVE empty files
- `sweep` -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks
- `scan` -> scan and save results in a file without removing anything [DEFAULT]
@ -66,9 +66,9 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
### Examples
- `broom scan -od . ~/homework`
- `broom sweep ~/homework/I/have/a/lot/of/empty/files/here/for/some/reason`
- `broom sweep ~/homework`
after the scan the results file will be saved in your current working directory, scan results file contains
after the scan the results file will be saved in your current working directory, unless you specified it to be somewhere else. Scan results file contains
a list of duplicate files that are grouped together so you can see EXACTLY WHERE each duplicate is in the filesystem.
---
@ -80,4 +80,5 @@ GPLv3
## TODO
- Make it go `P` A `R` A `L` L `E` L
- Output approximate size that could be freed
- ~~Output approximate size that could be freed~~
- ~~Remove duplicates and create symlinks~~

2
build/CMakeLists.txt

@ -22,5 +22,5 @@ endif()
set(EXECUTABLE_OUTPUT_PATH ../bin)
add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp)
add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp)
target_link_libraries(broom Threads::Threads)

42
src/broom.cpp

@ -28,7 +28,6 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include "entry.hpp"
#include "broom.hpp"
#include "group.hpp"
namespace broom {
@ -52,7 +51,7 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
);
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
if (!dir_entry.is_regular_file()) {
if (!dir_entry.is_regular_file() || std::filesystem::is_symlink(dir_entry.path())) {
// skip everything that we cannot process so easily
continue;
};
@ -60,7 +59,7 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
entry::Entry entry(dir_entry.path());
tracked_entries.push_back(entry);
}
} else if (std::filesystem::is_regular_file(path)) {
} else if (std::filesystem::is_regular_file(path) && !std::filesystem::is_symlink(path)) {
// just a file
entry::Entry entry(path);
tracked_entries.push_back(entry);
@ -186,7 +185,7 @@ uintmax_t Broom::find_empty_files(std::vector<entry::Entry>& tracked_entries) {
for (entry::Entry& entry : tracked_entries) {
if (entry.filesize == 0) {
// empty files can`t be considered as duplicates. assign a group
entry.group = group::EMPTY;
entry.group = entry::EMPTY;
found_empty_files++;
}
}
@ -199,7 +198,7 @@ uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries)
uintmax_t removed = 0;
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool {
if (entry.group == group::EMPTY) {
if (entry.group == entry::EMPTY) {
try {
entry.remove();
removed++;
@ -219,11 +218,11 @@ uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries)
// marks every entry without any group as a duplicate
void Broom::mark_as_duplicates(std::vector<entry::Entry>& tracked_entries) {
for (entry::Entry& entry : tracked_entries) {
if (entry.group == group::EMPTY) {
if (entry.group == entry::EMPTY) {
// do not mess up grouping
continue;
}
entry.group = group::DUPLICATE;
entry.group = entry::DUPLICATE;
}
};
@ -252,4 +251,33 @@ std::map<std::string, std::vector<entry::Entry>> Broom::group_duplicates(std::ve
return duplicate_groups;
};
// REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
// first remaining real file
void Broom::remove_duplicates_make_symlinks(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates) {
for (const auto& record : grouped_duplicates) {
unsigned int i = 0;
std::filesystem::path original_file_path;
for (const auto& duplicate_entry : record.second) {
if (i == 0) {
// the first duplicate in the group. Save it
original_file_path = duplicate_entry.path;
} else {
// not the first entry; REMOVE it and create a symlink,
// pointing to the real file
std::filesystem::path removed_duplicate_path = duplicate_entry.path;
try {
// remove the entry
duplicate_entry.remove();
// make a symlink
std::filesystem::create_symlink(original_file_path, removed_duplicate_path);
} catch(...) {}
}
// serves only the first iteration. It doesn`t matter if it is not incremented after that
i++;
}
}
};
}

4
src/broom.hpp

@ -61,6 +61,10 @@ public:
// string of pieces. REMOVES EVERYTHING FROM GIVEN TRACKED ENTRIES
std::map<std::string, std::vector<entry::Entry>> group_duplicates(std::vector<entry::Entry>& tracked_entries);
// REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
// first remaining real file
void remove_duplicates_make_symlinks(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates);
// creates a list of duplicate, empty files and puts it into a file
void create_scan_results_list(const std::map<std::string, std::vector<entry::Entry>> grouped_duplicates, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
};

2
src/entry.cpp

@ -95,7 +95,7 @@ void Entry::get_pieces() {
};
// Remove entry from the disk
void Entry::remove() {
void Entry::remove() const {
std::filesystem::remove(path);
};

12
src/entry.hpp

@ -26,10 +26,14 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <iomanip>
#include <string>
#include "group.hpp"
namespace entry {
enum Group {
DUPLICATE,
EMPTY,
};
// 3 pieces (beginning, middle and end of the file)
const uint8_t PIECE_SIZE = 75;
const uint8_t PIECES_AMOUNT = 3;
@ -40,7 +44,7 @@ public:
std::filesystem::path path; // set via constructor
uintmax_t filesize; // set via constructor
std::string pieces; // 3 hex-represented pieces of file; set only via a method call to not stress the disk
group::Group group; // set externally
Group group; // set externally
Entry(const std::filesystem::path entry_path);
~Entry();
@ -51,7 +55,7 @@ public:
void get_pieces();
// REMOVE entry from the disk
void remove();
void remove() const;
};
}

33
src/group.hpp

@ -1,33 +0,0 @@
/*
Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
This file is part of broom.
broom is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
broom is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with broom. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef GROUP_HPP
#define GROUP_HPP
namespace group {
enum Group {
DUPLICATE,
EMPTY,
};
}
#endif

35
src/main.cpp

@ -28,7 +28,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include "broom.hpp"
// Broom version number
#define VERSION "v0.2.3"
#define VERSION "v0.3.0"
void print_help() {
std::cout
@ -39,7 +39,7 @@ void print_help() {
<< "-od | --output-directory -> path to the directory to save results file in" << std::endl << std::endl
<< "[COMMANDS]" << std::endl
<< "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl
<< "sweep -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks" << std::endl
<< "scan -> scan and save results in a file without removing anything [DEFAULT]" << std::endl << std::endl
<< "[DIRECTORY]" << std::endl
@ -170,26 +170,35 @@ int main(int argc, char* argv[]) {
std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
auto grouped_duplicates = broom.group_duplicates(tracked_entries);
if (grouped_duplicates.size() == 0) {
if (tracked_entries.size() == 0) {
// No duplicates at all !
std::cout << "[INFO] Nothing I can help with ! Congratulations !" << std::endl;
return 0;
}
// now only files with a non-unique size and contents are being tracked
// are they REALLY duplicates ?
// better to leave the REALL cleanup for the user, saving these entries in a file, than doing a blind and possibly destructive purge
broom.create_scan_results_list(grouped_duplicates, results_file_dir_path);
std::cout << "[INFO] Created scan results file" << std::endl;
// make duplicate groups from all this mess that tracked_entries right now are
auto grouped_duplicates = broom.group_duplicates(tracked_entries);
// output a little information about how much space could be freed if every duplicate
// in the group will be deleted but one
double could_be_freed = 0;
for (auto& record : grouped_duplicates) {
could_be_freed += record.second[0].filesize * (record.second.size() - 1);
}
std::cout <<"[INFO] " << could_be_freed / 1024 / 1024 << " MB could be freed" << std::endl;
if (!sweeping) {
// output a little information about how much space could be freed if every duplicate
// in the group will be deleted but one
std::cout <<"[INFO] " << could_be_freed / 1024 / 1024 << " MB could be freed" << std::endl;
broom.create_scan_results_list(grouped_duplicates, results_file_dir_path);
std::cout << "[INFO] Created scan results file" << std::endl;
} else {
// remove duplicates and create symlinks
std::cout << "[INFO] Removing duplicates and creating symlinks..." << std::endl;
broom.remove_duplicates_make_symlinks(grouped_duplicates);
std::cout <<"[INFO] Freed approximately " << could_be_freed / 1024 / 1024 << " MB (May be incorrect)" << std::endl;
}
} catch(const std::exception& e) {
std::cerr

Loading…
Cancel
Save