Browse Source

Untrack files based on size

main
Unbewohnte 3 years ago
parent
commit
bd18527fe5
  1. 83
      src/broom.cpp
  2. 17
      src/broom.hpp
  3. 17
      src/main.cpp

83
src/broom.cpp

@ -18,6 +18,9 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <algorithm>
#include <map>
#include "entry.hpp"
#include "broom.hpp"
@ -32,21 +35,75 @@ void Broom::print_statistics() {
<< std::endl;
};
// Determines whether entry1 is a duplicate of entry2
bool Broom::is_duplicate(Entry entry1, Entry entry2) {
if (entry1.path == entry2.path) {
// well, it`s the same file we`re talking about
return false;
}
else if (entry1.compare_checksums(entry2.checksum)) {
return true;
}
return false;
// removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes() {
// key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes;
std::map<uintmax_t, uintmax_t>::iterator iterator;
for (Entry& entry : m_tracked_entries) {
// check if size of this entry is already in the map
// if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1
iterator = sizes.find(entry.filesize);
if (iterator == sizes.end()) {
// there is no such size
sizes.insert(std::pair<uintmax_t, uintmax_t>(entry.filesize, 1));
} else {
// there is such size
uintmax_t occurences = sizes[iterator->first];
sizes[iterator->first] = occurences++;
};
};
// go through the map again, look for uniques and remove entries with
// such filesizes
uintmax_t untracked = 0;
for (std::pair<uintmax_t, uintmax_t> size_entry : sizes) {
if (size_entry.second > 1) {
// not a unique size. Keep such entries
} else {
// a unique one. Untrack such an entry
std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [size_entry](Entry e) -> bool {
return (e.filesize == size_entry.first);
});
untracked++;
};
};
return untracked;
};
// find all duplicates in the directory
int Broom::find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive = false) {
// get all entities from path recursively and track them
int Broom::track(std::filesystem::path dir) {
std::filesystem::directory_options options = (
std::filesystem::directory_options::follow_directory_symlink |
std::filesystem::directory_options::skip_permission_denied
);
for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) {
if (dir_entry.is_directory()) {
continue;
};
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
return 0;
};
// find all duplicates among tracked entries
int Broom::find_duplicates() {
size_t startsize = m_tracked_entries.size();
std::cout << "Tracking " << startsize << std::endl;
uintmax_t untracked = untrack_unique_sizes();
std::cout << "Untracked " << untracked << " unique sizes" << std::endl;
std::cout << "Duplicates: " << startsize - untracked << std::endl;
return 0;
};

17
src/broom.hpp

@ -21,14 +21,17 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#define BROOM_HPP
#include <cstdint>
#include <vector>
// A class to find and manage duplicate files
class Broom {
protected:
// how many files has been "sweeped"
// how many files has been (would be ?) "sweeped"
uintmax_t m_sweeped_files;
// how many bytes was freed
// how many bytes was (would be ?) freed
uintmax_t m_sweeped_size;
// entries that possibly contain duplicates
std::vector<Entry> m_tracked_entries;
public:
Broom();
@ -37,11 +40,15 @@ public:
// Print current statistics
void print_statistics();
// Determines whether entry1 is a duplicate of entry2
bool is_duplicate(Entry entry1, Entry entry2);
// get all entities from path recursively and track them
int track(std::filesystem::path path);
// find all duplicates in the directory
int find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive);
int find_duplicates();
// removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked
uintmax_t untrack_unique_sizes();
// remove ALL duplicate files
int sweep_all(Entry entries[]);

17
src/main.cpp

@ -18,11 +18,8 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
*/
#include <iostream>
#include <fstream>
#include <string.h>
#include <cstdint>
#include <vector>
#include <filesystem>
#include "entry.hpp"
#include "broom.hpp"
@ -95,15 +92,11 @@ int main(int argc, char* argv[]) {
};
};
// printing all directories just for testing
for (uint32_t i = 0; i < options.paths.size(); i++) {
for (auto& p : std::filesystem::recursive_directory_iterator(options.paths.at(i))) {
if (!p.is_directory()) {
Entry entry(p);
std::cout << p.path() << "Checksum: " << entry.checksum << std::endl;
}
};
};
Broom broom;
std::filesystem::path first_path = options.paths.at(0);
broom.track(first_path);
broom.find_duplicates();
return 0;
};

Loading…
Cancel
Save