Browse Source

Moved Broom's options in the other file; benchmarking flag

main
Unbewohnte 3 years ago
parent
commit
3f8dacd34c
  1. 150
      src/broom.cpp
  2. 22
      src/broom.hpp
  3. 11
      src/entry.cpp
  4. 6
      src/entry.hpp
  5. 32
      src/main.cpp

150
src/broom.cpp

@ -20,11 +20,16 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <chrono>
#include "entry.hpp" #include "entry.hpp"
#include "broom.hpp" #include "broom.hpp"
Broom::Broom() {}; Broom::Broom(Options options) {
m_benchmarking = options.benchmarking;
m_sweeping = options.sweeping;
};
Broom::~Broom() {}; Broom::~Broom() {};
// Print current statistics // Print current statistics
@ -35,32 +40,61 @@ void Broom::print_statistics() {
<< std::endl; << std::endl;
}; };
// get all entities from path recursively and track them
int Broom::track(std::filesystem::path dir) {
auto t0 = std::chrono::high_resolution_clock::now();
std::filesystem::directory_options options = (
std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied
);
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) {
if (dir_entry.is_directory()) {
continue;
};
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
if (m_benchmarking) {
auto tracking_time = std::chrono::high_resolution_clock::now();
std::cout
<< "Tracking took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl;
};
return 0;
};
// removes entries with unique file sizes. Returns amount of files // removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked // that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes() { uintmax_t Broom::untrack_unique_sizes() {
// key: size, value: amount of occurences // key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes; std::map<uintmax_t, uintmax_t> sizes_map;
std::map<uintmax_t, uintmax_t>::iterator iterator; std::map<uintmax_t, uintmax_t>::iterator iterator;
for (Entry& entry : m_tracked_entries) { for (Entry& entry : m_tracked_entries) {
// check if size of this entry is already in the map // check if size of this entry is already in the map
// if yes --> increment occurences counter // if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
iterator = sizes.find(entry.filesize); iterator = sizes_map.find(entry.filesize);
if (iterator == sizes.end()) { if (iterator == sizes_map.end()) {
// there is no such size // there is no such size
sizes.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1)); sizes_map.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1));
} else { } else {
// there is such size // there is such size
uintmax_t occurences = sizes[iterator->first]; uintmax_t occurences = sizes_map[iterator->first];
sizes[iterator->first] = occurences++; sizes_map[iterator->first] = occurences++;
}; };
}; };
// go through the map again, look for uniques and remove entries with // go through the map again, look for uniques and remove entries with
// such filesizes // such filesizes
uintmax_t untracked = 0; uintmax_t untracked = 0;
for (std::pair<uintmax_t, uintmax_t> size_entry : sizes) { for (std::pair<uintmax_t, uintmax_t> size_entry : sizes_map) {
if (size_entry.second > 1) { if (size_entry.second > 1) {
// not a unique size. Keep such entries // not a unique size. Keep such entries
} else { } else {
@ -76,43 +110,87 @@ uintmax_t Broom::untrack_unique_sizes() {
return untracked; return untracked;
}; };
// get all entities from path recursively and track them // removes entries with unique first and last 20 bytes. Returns amount of
int Broom::track(std::filesystem::path dir) { // files that are no longer being tracked
std::filesystem::directory_options options = ( // uintmax_t Broom::untrack_unique_contents() {
std::filesystem::directory_options::follow_directory_symlink | // // contents, occurences
std::filesystem::directory_options::skip_permission_denied // std::map<char[CHECKSUM_SIZE], uintmax_t> contents_map;
); // std::map<char[CHECKSUM_SIZE], uintmax_t>::iterator iterator;
//
// for (Entry& entry : m_tracked_entries) {
// // the same logic:
// // check if contents of this entry is already in the map
// // if yes --> increment occurences counter
// // if not --> add it to the map with a counter of 1
//
// iterator = contents_map.find(entry.checksum);
//
// if (iterator == contents_map.end()) {
// // add it to the map
// contents_map.insert(std::pair<char[CHECKSUM_SIZE], uintmax_t>(entry.checksum, 1));
// } else {
// // increment occurences counter
// uintmax_t occurences = contents_map[iterator->first];
// contents_map[iterator->first] = occurences++;
// };
// };
//
// uintmax_t untracked = 0;
// for (std::pair<const char[CHECKSUM_SIZE], uintmax_t> contents_entry : contents_map) {
// if (contents_entry.second > 1) {
// // not a unique size. Keep such entries
// } else {
// // a unique one. Untrack such an entry
// std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [contents_entry](Entry e) -> bool {
// return (e.compare_checksums(contents_entry.first));
// });
// untracked++;
// };
// };
//
// return untracked;
// };
// find all duplicates among tracked entries, stop tracking uniques
int Broom::find_duplicates() {
if (m_benchmarking) {
auto t0 = std::chrono::high_resolution_clock::now();
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) { untrack_unique_sizes();
if (dir_entry.is_directory()) {
continue;
};
Entry entry(dir_entry.path()); auto sizes_untrack_time = std::chrono::high_resolution_clock::now();
m_tracked_entries.push_back(entry);
};
return 0; std::cout
}; << "Untracking by size took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(sizes_untrack_time - t0).count()
<< " ms" << std::endl
// find all duplicates among tracked entries << std::endl;
int Broom::find_duplicates() { } else {
size_t startsize = m_tracked_entries.size(); size_t startsize = m_tracked_entries.size();
std::cout << "Tracking " << startsize << std::endl; std::cout << "Tracking " << startsize << std::endl;
uintmax_t untracked = untrack_unique_sizes(); uintmax_t global_untracked = 0;
std::cout << "Untracked " << untracked << " unique sizes" << std::endl;
// uintmax_t untracked_by_contents = untrack_unique_contents();
// global_untracked += untracked_by_contents;
// std::cout << "Untracked " << untracked_by_contents << " unique contents" << std::endl;
uintmax_t untracked_by_size = untrack_unique_sizes();
global_untracked += untracked_by_size;
std::cout << "Untracked " << untracked_by_size << " unique sizes" << std::endl;
std::cout << "Duplicates: " << startsize - global_untracked << std::endl;
};
std::cout << "Duplicates: " << startsize - untracked << std::endl;
return 0;
};
// remove ALL duplicate files
int Broom::sweep_all(Entry entries[]) {
return 0; return 0;
}; };
// remove ALL duplicates but the one with specified index // remove ALL duplicate files
int Broom::sweep_all_but(Entry entries[], uint32_t index = 0) { int Broom::sweep_all() {
return 0; return 0;
}; };

22
src/broom.hpp

@ -23,9 +23,20 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <cstdint> #include <cstdint>
#include <vector> #include <vector>
// Broom`s settings
struct Options {
bool sweeping;
bool benchmarking;
};
// A class to find and manage duplicate files // A class to find and manage duplicate files
class Broom { class Broom {
protected: protected:
// enable/disable benchmarking output
bool m_benchmarking;
bool m_sweeping;
// how many files has been (would be ?) "sweeped" // how many files has been (would be ?) "sweeped"
uintmax_t m_sweeped_files; uintmax_t m_sweeped_files;
// how many bytes was (would be ?) freed // how many bytes was (would be ?) freed
@ -34,7 +45,7 @@ protected:
std::vector<Entry> m_tracked_entries; std::vector<Entry> m_tracked_entries;
public: public:
Broom(); Broom(Options options);
~Broom(); ~Broom();
// Print current statistics // Print current statistics
@ -50,11 +61,12 @@ public:
// that are no longer being tracked // that are no longer being tracked
uintmax_t untrack_unique_sizes(); uintmax_t untrack_unique_sizes();
// remove ALL duplicate files // removes entries with unique first and last 20 bytes. Returns amount of
int sweep_all(Entry entries[]); // files that are no longer being tracked
uintmax_t untrack_unique_contents();
// remove ALL duplicates but the one with specified index // remove ALL duplicate files
int sweep_all_but(Entry entries[], uint32_t index); int sweep_all();
}; };
#endif #endif

11
src/entry.cpp

@ -51,20 +51,13 @@ Entry::Entry(std::filesystem::path path) {
char end_buf[CHUNK_SIZE]; char end_buf[CHUNK_SIZE];
entry_file.read(end_buf, CHUNK_SIZE); entry_file.read(end_buf, CHUNK_SIZE);
entry_file.seekg(CHUNK_SIZE, std::ios::beg);
char middle_buf[CHUNK_SIZE];
entry_file.read(middle_buf, CHUNK_SIZE);
for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) {
if (i < CHUNK_SIZE) { if (i < CHUNK_SIZE) {
checksum[i] = start_buf[i]; checksum[i] = start_buf[i];
} }
else if (i > CHUNK_SIZE*2) {
checksum[i] = middle_buf[i-(CHUNK_SIZE*2)];
}
else if (i > CHUNK_SIZE) { else if (i > CHUNK_SIZE) {
checksum[i] = end_buf[i - CHUNK_SIZE]; checksum[i] = end_buf[i - CHUNK_SIZE];
} };
}; };
}; };
@ -75,7 +68,7 @@ Entry::~Entry() {};
// Compare this entry`s checksum with the other one. // Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false // If the checksums are the same -> returns true, else -> false
bool Entry::compare_checksums(char other_checksum[CHECKSUM_SIZE]) { bool Entry::compare_checksums(const char other_checksum[CHECKSUM_SIZE]) {
for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) {
if (checksum[i] != other_checksum[i]) { if (checksum[i] != other_checksum[i]) {
return false; return false;

6
src/entry.hpp

@ -23,9 +23,9 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
// 3 chunks (beginning, end, middle of the file) // 3 chunks (beginning and end of the file)
const uint8_t CHUNK_SIZE = 24; const uint8_t CHUNK_SIZE = 24;
const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 3; const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 2;
// A wrapper for every file with all necessary information // A wrapper for every file with all necessary information
class Entry { class Entry {
@ -41,7 +41,7 @@ public:
// Compare this entry`s checksum with the other one. // Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false // If the checksums are the same -> returns true, else -> false
bool compare_checksums(char other_checksum[CHECKSUM_SIZE]); bool compare_checksums(const char other_checksum[CHECKSUM_SIZE]);
// Remove entry from the disk // Remove entry from the disk
void remove(); void remove();

32
src/main.cpp

@ -27,12 +27,6 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
// Broom version number // Broom version number
#define VERSION "v0.1.0" #define VERSION "v0.1.0"
// Broom`s settings
struct Options {
bool sweeping;
std::vector<std::filesystem::path> paths;
};
void print_help() { void print_help() {
std::cout std::cout
<< "broom [FLAGS..] [COMMAND] [FILES|DIRECTORIES...]" << std::endl << std::endl << "broom [FLAGS..] [COMMAND] [FILES|DIRECTORIES...]" << std::endl << std::endl
@ -58,6 +52,7 @@ void print_version() {
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
Options options; Options options;
std::filesystem::path tracked_path;
if (argc < 2) { if (argc < 2) {
print_help(); print_help();
@ -65,7 +60,7 @@ int main(int argc, char* argv[]) {
}; };
// process command line arguments // process command line arguments
for (unsigned int i = 0; i < argc; i++) { for (unsigned int i = 1; i < argc; i++) {
// flags -> command -> directories&&files // flags -> command -> directories&&files
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
@ -76,6 +71,9 @@ int main(int argc, char* argv[]) {
print_version(); print_version();
return 0; return 0;
} }
else if (strcmp(argv[i], "-b") == 0 || strcmp(argv[i], "--benchmark") == 0) {
options.benchmarking = true;
}
else if (strcmp(argv[i], "sweep") == 0) { else if (strcmp(argv[i], "sweep") == 0) {
options.sweeping = true; options.sweeping = true;
} }
@ -84,18 +82,22 @@ int main(int argc, char* argv[]) {
} }
else { else {
// add path // add path
if (i == 0) { if (std::filesystem::exists(argv[i])) {
continue; tracked_path = argv[i];
} else { };
options.paths.push_back(argv[i]);
}
}; };
}; };
Broom broom; // no path was specified
if (tracked_path.string() == "") {
print_help();
return 1;
};
Broom broom(options);
std::filesystem::path first_path = options.paths.at(0); broom.track(tracked_path);
broom.track(first_path);
broom.find_duplicates(); broom.find_duplicates();
return 0; return 0;

Loading…
Cancel
Save