Browse Source

Untracking by size now actually works and works FAST

main
Unbewohnte 3 years ago
parent
commit
449beb393a
  1. 45
      src/broom.cpp
  2. 6
      src/broom.hpp
  3. 8
      src/entry.cpp
  4. 3
      src/entry.hpp
  5. 2
      src/main.cpp

45
src/broom.cpp

@ -40,8 +40,8 @@ void Broom::print_statistics() {
<< std::endl; << std::endl;
}; };
// get all entities from path recursively and track them // get all files from path recursively and track them
int Broom::track(std::filesystem::path dir) { void Broom::track(const std::filesystem::path dir) {
auto t0 = std::chrono::high_resolution_clock::now(); auto t0 = std::chrono::high_resolution_clock::now();
std::filesystem::directory_options options = ( std::filesystem::directory_options options = (
@ -66,7 +66,6 @@ int Broom::track(std::filesystem::path dir) {
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count() << std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl; << " ms" << std::endl;
}; };
return 0;
}; };
// removes entries with unique file sizes. Returns amount of files // removes entries with unique file sizes. Returns amount of files
@ -74,38 +73,34 @@ int Broom::track(std::filesystem::path dir) {
uintmax_t Broom::untrack_unique_sizes() { uintmax_t Broom::untrack_unique_sizes() {
// key: size, value: amount of occurences // key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes_map; std::map<uintmax_t, uintmax_t> sizes_map;
std::map<uintmax_t, uintmax_t>::iterator iterator;
for (Entry& entry : m_tracked_entries) { for (Entry entry : m_tracked_entries) {
// check if size of this entry is already in the map // check if size of this entry is already in the map
// if yes --> increment occurences counter // if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
iterator = sizes_map.find(entry.filesize); auto iterator = sizes_map.find(entry.filesize);
if (iterator == sizes_map.end()) { if (iterator == sizes_map.end()) {
// there is no such size // there is no such size
sizes_map.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1)); sizes_map.insert({entry.filesize, 1});
} else { } else {
// there is such size // there is such size
uintmax_t occurences = sizes_map[iterator->first]; sizes_map[iterator->first]++;
sizes_map[iterator->first] = occurences++;
}; };
}; };
// go through the map again, look for uniques and remove entries with
// such filesizes
uintmax_t untracked = 0; uintmax_t untracked = 0;
for (std::pair<uintmax_t, uintmax_t> size_entry : sizes_map) { std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{
if (size_entry.second > 1) { auto iter = sizes_map.find(entry.filesize);
// not a unique size. Keep such entries if (iter->second == 1) {
} else { // unique
// a unique one. Untrack such an entry
std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [size_entry](Entry e) -> bool {
return (e.filesize == size_entry.first);
});
untracked++; untracked++;
return true;
}; };
};
// std::cout << "duplicate fsize: " << iter->first << " occurences: " << iter->second << std::endl;
return false;
});
return untracked; return untracked;
}; };
@ -153,7 +148,7 @@ uintmax_t Broom::untrack_unique_sizes() {
// find all duplicates among tracked entries, stop tracking uniques // find all duplicates among tracked entries, stop tracking uniques
int Broom::find_duplicates() { void Broom::find_duplicates() {
if (m_benchmarking) { if (m_benchmarking) {
auto t0 = std::chrono::high_resolution_clock::now(); auto t0 = std::chrono::high_resolution_clock::now();
@ -185,12 +180,8 @@ int Broom::find_duplicates() {
std::cout << "Duplicates: " << startsize - global_untracked << std::endl; std::cout << "Duplicates: " << startsize - global_untracked << std::endl;
}; };
return 0;
}; };
// remove ALL duplicate files // remove ALL duplicate files
int Broom::sweep_all() { void Broom::sweep_all() {
return 0;
}; };

6
src/broom.hpp

@ -52,10 +52,10 @@ public:
void print_statistics(); void print_statistics();
// get all entities from path recursively and track them // get all entities from path recursively and track them
int track(std::filesystem::path path); void track(const std::filesystem::path path);
// find all duplicates in the directory // find all duplicates in the directory
int find_duplicates(); void find_duplicates();
// removes entries with unique file sizes. Returns amount of files // removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked // that are no longer being tracked
@ -66,7 +66,7 @@ public:
uintmax_t untrack_unique_contents(); uintmax_t untrack_unique_contents();
// remove ALL duplicate files // remove ALL duplicate files
int sweep_all(); void sweep_all();
}; };
#endif #endif

8
src/entry.cpp

@ -20,14 +20,14 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include "entry.hpp" #include "entry.hpp"
// A wrapper for every file with all necessary information // A wrapper for every file with all necessary information
Entry::Entry(std::filesystem::path path) { Entry::Entry(const std::filesystem::path entry_path) {
// check for existense and being a directory // check for existense and being a directory
if (!std::filesystem::exists(path) || std::filesystem::is_directory(path)) { if (!std::filesystem::exists(entry_path) || std::filesystem::is_directory(entry_path)) {
throw "Does not exist or a directory"; throw "Does not exist or a directory";
}; };
// filename // path
filename = path.filename(); path = entry_path;
// filesize // filesize
filesize = std::filesystem::file_size(path); filesize = std::filesystem::file_size(path);

3
src/entry.hpp

@ -30,13 +30,12 @@ const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 2;
// A wrapper for every file with all necessary information // A wrapper for every file with all necessary information
class Entry { class Entry {
public: public:
std::string filename;
std::filesystem::path path; std::filesystem::path path;
uintmax_t filesize; uintmax_t filesize;
char checksum[CHECKSUM_SIZE]; char checksum[CHECKSUM_SIZE];
Entry(std::filesystem::path path); Entry(const std::filesystem::path entry_path);
~Entry(); ~Entry();
// Compare this entry`s checksum with the other one. // Compare this entry`s checksum with the other one.

2
src/main.cpp

@ -88,7 +88,7 @@ int main(int argc, char* argv[]) {
}; };
}; };
// no path was specified // no path was specified at all or every path was nonexistent
if (tracked_path.string() == "") { if (tracked_path.string() == "") {
print_help(); print_help();
return 1; return 1;

Loading…
Cancel
Save