Browse Source

Tracking slowdownm, sorting speedup

main
Unbewohnte 3 years ago
parent
commit
0048bbbf5e
  1. 1
      .gitignore
  2. 43
      src/broom.cpp
  3. 6
      src/broom.hpp
  4. 16
      src/entry.cpp
  5. 10
      src/entry.hpp

1
.gitignore vendored

@ -1,5 +1,6 @@
bin/broom
bin/broom_old
bin/broom_old_old
build/CMakeFiles/
build/cmake_install.cmake
build/CMakeCache.txt

43
src/broom.cpp

@ -19,8 +19,10 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <iostream>
#include <algorithm>
#include <iterator>
#include <map>
#include <chrono>
#include <stdexcept>
#include "entry.hpp"
#include "broom.hpp"
@ -40,6 +42,20 @@ void Broom::print_statistics() {
<< std::endl;
};
// returns amount of regular files in path, searching recursively.
// Throws an invalid_argument error in case path does not exist
uintmax_t amount_of_files(const std::filesystem::path path) {
if (!std::filesystem::exists(path)) {
throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !");
};
if (!std::filesystem::is_directory(path)) {
throw std::invalid_argument("\"" + path.string() + "\"" + " is not a directory");
};
return std::distance(std::filesystem::directory_iterator(path), std::filesystem::directory_iterator{});
};
// recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist
void Broom::track(const std::filesystem::path path) {
@ -56,15 +72,17 @@ void Broom::track(const std::filesystem::path path) {
std::filesystem::directory_options::skip_permission_denied
);
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
if (!dir_entry.is_regular_file()) {
continue;
};
m_tracked_filepaths.push_back(dir_entry);
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
} else if (std::filesystem::is_regular_file(path)) {
m_tracked_filepaths.push_back(path);
Entry entry(path);
m_tracked_entries.push_back(entry);
};
@ -84,16 +102,16 @@ uintmax_t Broom::untrack_unique_sizes() {
// key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes_map;
for (std::filesystem::path filepath : m_tracked_filepaths) {
for (Entry& entry : m_tracked_entries) {
// check if size of this entry is already in the map
// if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1
uintmax_t filesize = std::filesystem::file_size(filepath);
entry.get_size();
auto iterator = sizes_map.find(filesize);
auto iterator = sizes_map.find(entry.filesize);
if (iterator == sizes_map.end()) {
// there is no such size
sizes_map.insert({filesize, 1});
sizes_map.insert({entry.filesize, 1});
} else {
// there is such size
sizes_map[iterator->first]++;
@ -101,9 +119,8 @@ uintmax_t Broom::untrack_unique_sizes() {
};
uintmax_t untracked = 0;
m_tracked_filepaths.erase(std::remove_if(m_tracked_filepaths.begin(), m_tracked_filepaths.end(), [&untracked, sizes_map](std::filesystem::path filepath) -> bool{
uintmax_t filesize = std::filesystem::file_size(filepath);
auto iter = sizes_map.find(filesize);
m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{
auto iter = sizes_map.find(entry.filesize);
if (iter->second == 1) {
// unique
untracked++;
@ -126,7 +143,7 @@ uintmax_t Broom::untrack_unique_sizes() {
// std::map<char[CHECKSUM_SIZE], uintmax_t> contents_map;
// std::map<char[CHECKSUM_SIZE], uintmax_t>::iterator iterator;
//
// for (Entry& entry : m_tracked_filepaths) {
// for (Entry& entry : m_tracked_entries) {
// // the same logic:
// // check if contents of this entry is already in the map
// // if yes --> increment occurences counter
@ -150,7 +167,7 @@ uintmax_t Broom::untrack_unique_sizes() {
// // not a unique size. Keep such entries
// } else {
// // a unique one. Untrack such an entry
// std::remove_if(m_tracked_filepaths.begin(), m_tracked_filepaths.end(), [contents_entry](Entry e) -> bool {
// std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [contents_entry](Entry e) -> bool {
// return (e.compare_checksums(contents_entry.first));
// });
// untracked++;
@ -175,7 +192,7 @@ void Broom::find_duplicates() {
<< std::chrono::duration_cast<std::chrono::milliseconds>(sizes_untrack_time - t0).count()
<< " ms" << std::endl;
} else {
size_t startsize = m_tracked_filepaths.size();
size_t startsize = m_tracked_entries.size();
std::cout << "Tracking " << startsize << std::endl;
uintmax_t global_untracked = 0;

6
src/broom.hpp

@ -42,7 +42,7 @@ protected:
// how many bytes was (would be ?) freed
uintmax_t m_sweeped_size;
// paths to tracked files
std::vector<std::filesystem::path> m_tracked_filepaths;
std::vector<Entry> m_tracked_entries;
public:
Broom(Options options);
@ -51,6 +51,10 @@ public:
// Print current statistics
void print_statistics();
// returns amount of regular files in path, searching recursively.
// Throws an invalid_argument error in case path does not exist
uintmax_t amount_of_files(const std::filesystem::path path);
// recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist
void track(const std::filesystem::path path);

16
src/entry.cpp

@ -21,23 +21,26 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
// A wrapper for every file with all necessary information
Entry::Entry(const std::filesystem::path entry_path) {
// check for existense and being a directory
if (!std::filesystem::exists(entry_path) || std::filesystem::is_directory(entry_path)) {
throw "Does not exist or a directory";
};
// path
path = entry_path;
};
Entry::~Entry() {};
// sets this entry`s filesize
void Entry::get_size() {
// filesize
filesize = std::filesystem::file_size(path);
};
// calculates and sets this entry`s checksum
void Entry::get_checksum() {
// checksum
std::fstream entry_file;
entry_file.open(path);
if (!entry_file.is_open()) {
throw "Could not open file";
throw std::ifstream::failure("Could not open \"" + path.filename().string() + "\"");
}
// TODO(Properly test it)
@ -64,7 +67,6 @@ Entry::Entry(const std::filesystem::path entry_path) {
entry_file.close();
};
Entry::~Entry() {};
// Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false

10
src/entry.hpp

@ -23,7 +23,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <filesystem>
#include <fstream>
// 3 chunks (beginning and end of the file)
// 2 chunks (beginning and end of the file)
const uint8_t CHUNK_SIZE = 24;
const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 2;
@ -38,11 +38,17 @@ public:
Entry(const std::filesystem::path entry_path);
~Entry();
// sets this entry`s filesize
void get_size();
// calculates and sets this entry`s checksum
void get_checksum();
// Compare this entry`s checksum with the other one.
// If the checksums are the same -> returns true, else -> false
bool compare_checksums(const char other_checksum[CHECKSUM_SIZE]);
// Remove entry from the disk
// REMOVE entry from the disk
void remove();
};

Loading…
Cancel
Save