Browse Source

Moved Broom's options in the other file; benchmarking flag

main
Unbewohnte 3 years ago
parent
commit
3f8dacd34c
  1. 144
      src/broom.cpp
  2. 22
      src/broom.hpp
  3. 11
      src/entry.cpp
  4. 6
      src/entry.hpp
  5. 32
      src/main.cpp

144
src/broom.cpp

@ -20,11 +20,16 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <iostream>
#include <algorithm>
#include <map>
#include <chrono>
#include "entry.hpp"
#include "broom.hpp"
Broom::Broom() {};
Broom::Broom(Options options) {
m_benchmarking = options.benchmarking;
m_sweeping = options.sweeping;
};
Broom::~Broom() {};
// Print current statistics
@ -35,32 +40,61 @@ void Broom::print_statistics() {
<< std::endl;
};
// get all entities from path recursively and track them
int Broom::track(std::filesystem::path dir) {
auto t0 = std::chrono::high_resolution_clock::now();
std::filesystem::directory_options options = (
std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied
);
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) {
if (dir_entry.is_directory()) {
continue;
};
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
if (m_benchmarking) {
auto tracking_time = std::chrono::high_resolution_clock::now();
std::cout
<< "Tracking took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl;
};
return 0;
};
// removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes() {
// key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes;
std::map<uintmax_t, uintmax_t> sizes_map;
std::map<uintmax_t, uintmax_t>::iterator iterator;
for (Entry& entry : m_tracked_entries) {
// check if size of this entry is already in the map
// if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1
iterator = sizes.find(entry.filesize);
if (iterator == sizes.end()) {
iterator = sizes_map.find(entry.filesize);
if (iterator == sizes_map.end()) {
// there is no such size
sizes.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1));
sizes_map.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1));
} else {
// there is such size
uintmax_t occurences = sizes[iterator->first];
sizes[iterator->first] = occurences++;
uintmax_t occurences = sizes_map[iterator->first];
sizes_map[iterator->first] = occurences++;
};
};
// go through the map again, look for uniques and remove entries with
// such filesizes
uintmax_t untracked = 0;
for (std::pair<uintmax_t, uintmax_t> size_entry : sizes) {
for (std::pair<uintmax_t, uintmax_t> size_entry : sizes_map) {
if (size_entry.second > 1) {
// not a unique size. Keep such entries
} else {
@ -76,43 +110,87 @@ uintmax_t Broom::untrack_unique_sizes() {
return untracked;
};
// get all entities from path recursively and track them
int Broom::track(std::filesystem::path dir) {
std::filesystem::directory_options options = (
std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied
);
// removes entries with unique first and last 20 bytes. Returns amount of
// files that are no longer being tracked
// uintmax_t Broom::untrack_unique_contents() {
// // contents, occurences
// std::map<char[CHECKSUM_SIZE], uintmax_t> contents_map;
// std::map<char[CHECKSUM_SIZE], uintmax_t>::iterator iterator;
//
// for (Entry& entry : m_tracked_entries) {
// // the same logic:
// // check if contents of this entry is already in the map
// // if yes --> increment occurences counter
// // if not --> add it to the map with a counter of 1
//
// iterator = contents_map.find(entry.checksum);
//
// if (iterator == contents_map.end()) {
// // add it to the map
// contents_map.insert(std::pair<char[CHECKSUM_SIZE], uintmax_t>(entry.checksum, 1));
// } else {
// // increment occurences counter
// uintmax_t occurences = contents_map[iterator->first];
// contents_map[iterator->first] = occurences++;
// };
// };
//
// uintmax_t untracked = 0;
// for (std::pair<const char[CHECKSUM_SIZE], uintmax_t> contents_entry : contents_map) {
// if (contents_entry.second > 1) {
// // not a unique size. Keep such entries
// } else {
// // a unique one. Untrack such an entry
// std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [contents_entry](Entry e) -> bool {
// return (e.compare_checksums(contents_entry.first));
// });
// untracked++;
// };
// };
//
// return untracked;
// };
// find all duplicates among tracked entries, stop tracking uniques
int Broom::find_duplicates() {
if (m_benchmarking) {
auto t0 = std::chrono::high_resolution_clock::now();
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) {
if (dir_entry.is_directory()) {
continue;
};
untrack_unique_sizes();
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
auto sizes_untrack_time = std::chrono::high_resolution_clock::now();
return 0;
};
std::cout
<< "Untracking by size took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(sizes_untrack_time - t0).count()
<< " ms" << std::endl
// find all duplicates among tracked entries
int Broom::find_duplicates() {
<< std::endl;
} else {
size_t startsize = m_tracked_entries.size();
std::cout << "Tracking " << startsize << std::endl;
uintmax_t untracked = untrack_unique_sizes();
std::cout << "Untracked " << untracked << " unique sizes" << std::endl;
uintmax_t global_untracked = 0;
std::cout << "Duplicates: " << startsize - untracked << std::endl;
return 0;
// uintmax_t untracked_by_contents = untrack_unique_contents();
// global_untracked += untracked_by_contents;
// std::cout << "Untracked " << untracked_by_contents << " unique contents" << std::endl;
uintmax_t untracked_by_size = untrack_unique_sizes();
global_untracked += untracked_by_size;
std::cout << "Untracked " << untracked_by_size << " unique sizes" << std::endl;
std::cout << "Duplicates: " << startsize - global_untracked << std::endl;
};
// remove ALL duplicate files
int Broom::sweep_all(Entry entries[]) {
return 0;
};
// remove ALL duplicates but the one with specified index
int Broom::sweep_all_but(Entry entries[], uint32_t index = 0) {
// remove ALL duplicate files
int Broom::sweep_all() {
return 0;
};

22
src/broom.hpp

@ -23,9 +23,20 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <cstdint>
#include <vector>
// Broom`s settings
struct Options {
bool sweeping;
bool benchmarking;
};
// A class to find and manage duplicate files
class Broom {
protected:
// enable/disable benchmarking output
bool m_benchmarking;
bool m_sweeping;
// how many files has been (would be ?) "sweeped"
uintmax_t m_sweeped_files;
// how many bytes was (would be ?) freed
@ -34,7 +45,7 @@ protected:
std::vector<Entry> m_tracked_entries;
public:
Broom();
Broom(Options options);
~Broom();
// Print current statistics
@ -50,11 +61,12 @@ public:
// that are no longer being tracked
uintmax_t untrack_unique_sizes();
// remove ALL duplicate files
int sweep_all(Entry entries[]);
// removes entries with unique first and last 20 bytes. Returns amount of
// files that are no longer being tracked
uintmax_t untrack_unique_contents();
// remove ALL duplicates but the one with specified index
int sweep_all_but(Entry entries[], uint32_t index);
// remove ALL duplicate files
int sweep_all();
};
#endif

11
src/entry.cpp

@ -51,20 +51,13 @@ Entry::Entry(std::filesystem::path path) {
char end_buf[CHUNK_SIZE];
entry_file.read(end_buf, CHUNK_SIZE);
entry_file.seekg(CHUNK_SIZE, std::ios::beg);
char middle_buf[CHUNK_SIZE];
entry_file.read(middle_buf, CHUNK_SIZE);
for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) {
if (i < CHUNK_SIZE) {
checksum[i] = start_buf[i];
}
else if (i > CHUNK_SIZE*2) {
checksum[i] = middle_buf[i-(CHUNK_SIZE*2)];
}
else if (i > CHUNK_SIZE) {
checksum[i] = end_buf[i - CHUNK_SIZE];
}
};
};
};
@ -75,7 +68,7 @@ Entry::~Entry() {};
// Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false
bool Entry::compare_checksums(char other_checksum[CHECKSUM_SIZE]) {
bool Entry::compare_checksums(const char other_checksum[CHECKSUM_SIZE]) {
for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) {
if (checksum[i] != other_checksum[i]) {
return false;

6
src/entry.hpp

@ -23,9 +23,9 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <filesystem>
#include <fstream>
// 3 chunks (beginning, end, middle of the file)
// 3 chunks (beginning and end of the file)
const uint8_t CHUNK_SIZE = 24;
const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 3;
const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 2;
// A wrapper for every file with all necessary information
class Entry {
@ -41,7 +41,7 @@ public:
// Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false
bool compare_checksums(char other_checksum[CHECKSUM_SIZE]);
bool compare_checksums(const char other_checksum[CHECKSUM_SIZE]);
// Remove entry from the disk
void remove();

32
src/main.cpp

@ -27,12 +27,6 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
// Broom version number
#define VERSION "v0.1.0"
// Broom`s settings
struct Options {
bool sweeping;
std::vector<std::filesystem::path> paths;
};
void print_help() {
std::cout
<< "broom [FLAGS..] [COMMAND] [FILES|DIRECTORIES...]" << std::endl << std::endl
@ -58,6 +52,7 @@ void print_version() {
int main(int argc, char* argv[]) {
Options options;
std::filesystem::path tracked_path;
if (argc < 2) {
print_help();
@ -65,7 +60,7 @@ int main(int argc, char* argv[]) {
};
// process command line arguments
for (unsigned int i = 0; i < argc; i++) {
for (unsigned int i = 1; i < argc; i++) {
// flags -> command -> directories&&files
if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
@ -76,6 +71,9 @@ int main(int argc, char* argv[]) {
print_version();
return 0;
}
else if (strcmp(argv[i], "-b") == 0 || strcmp(argv[i], "--benchmark") == 0) {
options.benchmarking = true;
}
else if (strcmp(argv[i], "sweep") == 0) {
options.sweeping = true;
}
@ -84,18 +82,22 @@ int main(int argc, char* argv[]) {
}
else {
// add path
if (i == 0) {
continue;
} else {
options.paths.push_back(argv[i]);
}
if (std::filesystem::exists(argv[i])) {
tracked_path = argv[i];
};
};
};
Broom broom;
// no path was specified
if (tracked_path.string() == "") {
print_help();
return 1;
};
Broom broom(options);
std::filesystem::path first_path = options.paths.at(0);
broom.track(first_path);
broom.track(tracked_path);
broom.find_duplicates();
return 0;

Loading…
Cancel
Save