diff --git a/README.md b/README.md
index 3e4e51d..6bbbec6 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
[COMMANDS]
-- `sweep` -> scan for duplicate files, save results in a file and REMOVE empty files
+- `sweep` -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks
- `scan` -> scan and save results in a file without removing anything [DEFAULT]
@@ -66,9 +66,9 @@ broom [FLAGS..] [COMMAND] [DIRECTORY]
### Examples
- `broom scan -od . ~/homework`
-- `broom sweep ~/homework/I/have/a/lot/of/empty/files/here/for/some/reason`
+- `broom sweep ~/homework`
-after the scan the results file will be saved in your current working directory, scan results file contains
+after the scan the results file will be saved in your current working directory, unless you specified it to be somewhere else. Scan results file contains
a list of duplicate files that are grouped together so you can see EXACTLY WHERE each duplicate is in the filesystem.
---
@@ -80,4 +80,5 @@ GPLv3
## TODO
- Make it go `P` A `R` A `L` L `E` L
-- Output approximate size that could be freed
\ No newline at end of file
+- ~~Output approximate size that could be freed~~
+- ~~Remove duplicates and create symlinks~~
\ No newline at end of file
diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt
index 52d0e11..914bc32 100755
--- a/build/CMakeLists.txt
+++ b/build/CMakeLists.txt
@@ -22,5 +22,5 @@ endif()
set(EXECUTABLE_OUTPUT_PATH ../bin)
-add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp)
+add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp)
target_link_libraries(broom Threads::Threads)
diff --git a/src/broom.cpp b/src/broom.cpp
index 23a7eb3..dae6f4d 100755
--- a/src/broom.cpp
+++ b/src/broom.cpp
@@ -28,7 +28,6 @@ along with broom. If not, see .
#include "entry.hpp"
#include "broom.hpp"
-#include "group.hpp"
namespace broom {
@@ -52,7 +51,7 @@ std::vector Broom::track(const std::filesystem::path path) {
);
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
- if (!dir_entry.is_regular_file()) {
+ if (!dir_entry.is_regular_file() || std::filesystem::is_symlink(dir_entry.path())) {
// skip everything that we cannot process so easily
continue;
};
@@ -60,7 +59,7 @@ std::vector Broom::track(const std::filesystem::path path) {
entry::Entry entry(dir_entry.path());
tracked_entries.push_back(entry);
}
- } else if (std::filesystem::is_regular_file(path)) {
+ } else if (std::filesystem::is_regular_file(path) && !std::filesystem::is_symlink(path)) {
// just a file
entry::Entry entry(path);
tracked_entries.push_back(entry);
@@ -186,7 +185,7 @@ uintmax_t Broom::find_empty_files(std::vector& tracked_entries) {
for (entry::Entry& entry : tracked_entries) {
if (entry.filesize == 0) {
// empty files can`t be considered as duplicates. assign a group
- entry.group = group::EMPTY;
+ entry.group = entry::EMPTY;
found_empty_files++;
}
}
@@ -199,7 +198,7 @@ uintmax_t Broom::remove_empty_files(std::vector& tracked_entries)
uintmax_t removed = 0;
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&removed](entry::Entry& entry) -> bool {
- if (entry.group == group::EMPTY) {
+ if (entry.group == entry::EMPTY) {
try {
entry.remove();
removed++;
@@ -219,11 +218,11 @@ uintmax_t Broom::remove_empty_files(std::vector& tracked_entries)
// marks every entry without any group as a duplicate
void Broom::mark_as_duplicates(std::vector& tracked_entries) {
for (entry::Entry& entry : tracked_entries) {
- if (entry.group == group::EMPTY) {
+ if (entry.group == entry::EMPTY) {
// do not mess up grouping
continue;
}
- entry.group = group::DUPLICATE;
+ entry.group = entry::DUPLICATE;
}
};
@@ -252,4 +251,33 @@ std::map> Broom::group_duplicates(std::ve
return duplicate_groups;
};
+// REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
+// first remaining real file
+void Broom::remove_duplicates_make_symlinks(const std::map> grouped_duplicates) {
+ for (const auto& record : grouped_duplicates) {
+ unsigned int i = 0;
+ std::filesystem::path original_file_path;
+
+ for (const auto& duplicate_entry : record.second) {
+ if (i == 0) {
+ // the first duplicate in the group. Save it
+ original_file_path = duplicate_entry.path;
+ } else {
+ // not the first entry; REMOVE it and create a symlink,
+ // pointing to the real file
+ std::filesystem::path removed_duplicate_path = duplicate_entry.path;
+ try {
+ // remove the entry
+ duplicate_entry.remove();
+ // make a symlink
+ std::filesystem::create_symlink(original_file_path, removed_duplicate_path);
+ } catch(...) {}
+ }
+
+ // serves only the first iteration. It doesn`t matter if it is not incremented after that
+ i++;
+ }
+ }
+};
+
}
diff --git a/src/broom.hpp b/src/broom.hpp
index 89ef2d2..023ed25 100755
--- a/src/broom.hpp
+++ b/src/broom.hpp
@@ -61,6 +61,10 @@ public:
// string of pieces. REMOVES EVERYTHING FROM GIVEN TRACKED ENTRIES
std::map> group_duplicates(std::vector& tracked_entries);
+ // REMOVES every duplicate file in a group except the first one and creates symlinks pointing to the
+ // first remaining real file
+ void remove_duplicates_make_symlinks(const std::map> grouped_duplicates);
+
// creates a list of duplicate, empty files and puts it into a file
void create_scan_results_list(const std::map> grouped_duplicates, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
};
diff --git a/src/entry.cpp b/src/entry.cpp
index ad194dd..2e11dfe 100755
--- a/src/entry.cpp
+++ b/src/entry.cpp
@@ -95,7 +95,7 @@ void Entry::get_pieces() {
};
// Remove entry from the disk
-void Entry::remove() {
+void Entry::remove() const {
std::filesystem::remove(path);
};
diff --git a/src/entry.hpp b/src/entry.hpp
index 211475e..489f144 100755
--- a/src/entry.hpp
+++ b/src/entry.hpp
@@ -26,10 +26,14 @@ along with broom. If not, see .
#include
#include
-#include "group.hpp"
-
namespace entry {
+
+enum Group {
+ DUPLICATE,
+ EMPTY,
+};
+
// 3 pieces (beginning, middle and end of the file)
const uint8_t PIECE_SIZE = 75;
const uint8_t PIECES_AMOUNT = 3;
@@ -40,7 +44,7 @@ public:
std::filesystem::path path; // set via constructor
uintmax_t filesize; // set via constructor
std::string pieces; // 3 hex-represented pieces of file; set only via a method call to not stress the disk
- group::Group group; // set externally
+ Group group; // set externally
Entry(const std::filesystem::path entry_path);
~Entry();
@@ -51,7 +55,7 @@ public:
void get_pieces();
// REMOVE entry from the disk
- void remove();
+ void remove() const;
};
}
diff --git a/src/group.hpp b/src/group.hpp
deleted file mode 100755
index 956169d..0000000
--- a/src/group.hpp
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))
-
-This file is part of broom.
-
-broom is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-
-broom is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with broom. If not, see .
-*/
-
-
-#ifndef GROUP_HPP
-#define GROUP_HPP
-
-namespace group {
-
-enum Group {
- DUPLICATE,
- EMPTY,
-};
-
-}
-
-#endif
diff --git a/src/main.cpp b/src/main.cpp
index 466c697..5df9c63 100755
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -28,7 +28,7 @@ along with broom. If not, see .
#include "broom.hpp"
// Broom version number
-#define VERSION "v0.2.3"
+#define VERSION "v0.3.0"
void print_help() {
std::cout
@@ -39,7 +39,7 @@ void print_help() {
<< "-od | --output-directory -> path to the directory to save results file in" << std::endl << std::endl
<< "[COMMANDS]" << std::endl
- << "sweep -> scan for duplicate files, save results in a file and REMOVE empty files" << std::endl
+ << "sweep -> scan for duplicate files, REMOVE empty files and REPLACE other duplicates with symlinks" << std::endl
<< "scan -> scan and save results in a file without removing anything [DEFAULT]" << std::endl << std::endl
<< "[DIRECTORY]" << std::endl
@@ -170,26 +170,35 @@ int main(int argc, char* argv[]) {
std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
- auto grouped_duplicates = broom.group_duplicates(tracked_entries);
-
- if (grouped_duplicates.size() == 0) {
+ if (tracked_entries.size() == 0) {
+ // No duplicates at all !
std::cout << "[INFO] Nothing I can help with ! Congratulations !" << std::endl;
return 0;
}
- // now only files with a non-unique size and contents are being tracked
- // are they REALLY duplicates ?
- // better to leave the REALL cleanup for the user, saving these entries in a file, than doing a blind and possibly destructive purge
- broom.create_scan_results_list(grouped_duplicates, results_file_dir_path);
- std::cout << "[INFO] Created scan results file" << std::endl;
+ // make duplicate groups from all this mess that tracked_entries right now are
+ auto grouped_duplicates = broom.group_duplicates(tracked_entries);
- // output a little information about how much space could be freed if every duplicate
- // in the group will be deleted but one
double could_be_freed = 0;
for (auto& record : grouped_duplicates) {
could_be_freed += record.second[0].filesize * (record.second.size() - 1);
}
- std::cout <<"[INFO] " << could_be_freed / 1024 / 1024 << " MB could be freed" << std::endl;
+
+ if (!sweeping) {
+ // output a little information about how much space could be freed if every duplicate
+ // in the group will be deleted but one
+ std::cout <<"[INFO] " << could_be_freed / 1024 / 1024 << " MB could be freed" << std::endl;
+
+ broom.create_scan_results_list(grouped_duplicates, results_file_dir_path);
+ std::cout << "[INFO] Created scan results file" << std::endl;
+
+ } else {
+ // remove duplicates and create symlinks
+ std::cout << "[INFO] Removing duplicates and creating symlinks..." << std::endl;
+ broom.remove_duplicates_make_symlinks(grouped_duplicates);
+
+ std::cout <<"[INFO] Freed approximately " << could_be_freed / 1024 / 1024 << " MB (May be incorrect)" << std::endl;
+ }
} catch(const std::exception& e) {
std::cerr