Browse Source

Do not save scan results file if no entries are being tracked

main
Unbewohnte 3 years ago
parent
commit
0b7017b213
  1. 25
      src/broom.cpp
  2. 3
      src/broom.hpp
  3. 2
      src/entry.cpp
  4. 2
      src/entry.hpp
  5. 15
      src/main.cpp

25
src/broom.cpp

@ -71,12 +71,12 @@ std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
// untracks entries with unique file sizes. Returns amount of files // untracks entries with unique file sizes. Returns amount of files
// that are no longer being tracked // that are no longer being tracked
uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries) { uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries) {
// key: size, value: amount of occurences // key: size, value: amount of occurrences
std::map<uintmax_t, uintmax_t> sizes_map; std::map<uintmax_t, uintmax_t> sizes_map;
for (auto entry_iter = tracked_entries.begin(); entry_iter != tracked_entries.end(); entry_iter++) { for (auto entry_iter = tracked_entries.begin(); entry_iter != tracked_entries.end(); entry_iter++) {
// check if size of this entry is already in the map // check if size of this entry is already in the map
// if yes --> increment occurences counter // if yes --> increment occurrences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
auto iterator = sizes_map.find(entry_iter->filesize); auto iterator = sizes_map.find(entry_iter->filesize);
if (iterator == sizes_map.end()) { if (iterator == sizes_map.end()) {
@ -107,21 +107,21 @@ uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries
// untracks entries with the same content-pieces. Returns amount of // untracks entries with the same content-pieces. Returns amount of
// files that are no longer being tracked // files that are no longer being tracked
uintmax_t Broom::untrack_unique_contents(std::vector<entry::Entry>& tracked_entries) { uintmax_t Broom::untrack_unique_contents(std::vector<entry::Entry>& tracked_entries) {
// contents, occurences // contents, occurrences
std::map<std::string, uintmax_t> contents_map; std::map<std::string, uintmax_t> contents_map;
std::map<std::string, uintmax_t>::iterator map_iter; std::map<std::string, uintmax_t>::iterator map_iter;
for (entry::Entry& entry : tracked_entries) { for (entry::Entry& entry : tracked_entries) {
// the same logic: // the same logic:
// check if contents of this entry are already in the map // check if contents of this entry are already in the map
// if yes --> increment occurences counter // if yes --> increment occurrences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
map_iter = contents_map.find(entry.pieces); map_iter = contents_map.find(entry.pieces);
if (map_iter == contents_map.end()) { if (map_iter == contents_map.end()) {
// add it to the map // add it to the map
contents_map.insert({entry.pieces, 1}); contents_map.insert({entry.pieces, 1});
} else { } else {
// increment occurences counter // increment occurrences counter
contents_map[map_iter->first]++; contents_map[map_iter->first]++;
} }
@ -161,9 +161,9 @@ void Broom::create_scan_results_list(const std::vector<entry::Entry> tracked_ent
for (const entry::Entry entry : tracked_entries) { for (const entry::Entry entry : tracked_entries) {
// log every entry and its group // log every entry and its group
if (entry.group == group::EMPTY) { if (entry.group == group::EMPTY) {
outfile << entry.path << " --- is an empty file" << std::endl; outfile << "[EMPTY] " << entry.path << std::endl;
} else if (entry.group == group::DUPLICATE) { } else if (entry.group == group::DUPLICATE) {
outfile << entry.path << " --- is a duplicate of another file" << std::endl; outfile << "[DUPLICATE] " << entry.path << std::endl;
} }
} }
@ -207,4 +207,15 @@ uintmax_t Broom::remove_empty_files(std::vector<entry::Entry>& tracked_entries)
return removed; return removed;
}; };
// marks every entry without any group as a duplicate
void Broom::mark_as_duplicates(std::vector<entry::Entry>& tracked_entries) {
for (entry::Entry& entry : tracked_entries) {
if (entry.group == group::EMPTY) {
// do not mess up grouping
continue;
}
entry.group = group::DUPLICATE;
}
};
} }

3
src/broom.hpp

@ -52,6 +52,9 @@ public:
// creates a list of duplicate, empty files into a file // creates a list of duplicate, empty files into a file
void create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); void create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
// marks every entry without any group as a duplicate
void mark_as_duplicates(std::vector<entry::Entry>& tracked_entries);
}; };
} }

2
src/entry.cpp

@ -82,7 +82,7 @@ void Entry::get_pieces() {
// make a convenient hex string out of pure bytes // make a convenient hex string out of pure bytes
std::stringstream pieces_hex; std::stringstream pieces_hex;
for (uint8_t i = 0; i < PIECE_SIZE * 2; i++) { for (uint8_t i = 0; i < PIECE_SIZE * 3; i++) {
pieces_hex << std::hex << static_cast<unsigned>(pieces_buffer[i]); pieces_hex << std::hex << static_cast<unsigned>(pieces_buffer[i]);
}; };

2
src/entry.hpp

@ -30,7 +30,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
namespace entry { namespace entry {
// 3 pieces (beginning, middle and end of the file) // 3 pieces (beginning, middle and end of the file)
const uint8_t PIECE_SIZE = 75; const uint8_t PIECE_SIZE = 85;
const uint8_t PIECES_AMOUNT = 3; const uint8_t PIECES_AMOUNT = 3;
// A wrapper for every file in filesystem with all necessary information // A wrapper for every file in filesystem with all necessary information

15
src/main.cpp

@ -64,6 +64,7 @@ void print_version() {
<< std::endl; << std::endl;
}; };
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
bool benchmarking = false; bool benchmarking = false;
bool sweeping = false; bool sweeping = false;
@ -165,26 +166,24 @@ int main(int argc, char* argv[]) {
} }
}), tracked_entries.end()); }), tracked_entries.end());
// untrack unique contents // untrack unique contents
untracked = broom.untrack_unique_contents(tracked_entries); untracked = broom.untrack_unique_contents(tracked_entries);
std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl; std::cout << "[INFO] Untracked " << untracked << " files with unique contents" << std::endl;
// mark entries as duplicates // mark entries as duplicates
for (entry::Entry& entry : tracked_entries) { broom.mark_as_duplicates(tracked_entries);
if (entry.group == group::EMPTY) {
// do not mess up grouping
continue;
}
entry.group = group::DUPLICATE;
}
std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl; std::cout << "[INFO] " << tracked_entries.size() << " files left being tracked" << std::endl;
if (tracked_entries.size() > 0) {
// now only files with a non-unique size and contents are being tracked // now only files with a non-unique size and contents are being tracked
// are they REALLY duplicates ? // are they REALLY duplicates ?
// leave the REAL cleanup for the user, saving these entries in a file // better to leave the REALL cleanup for the user, saving these entries in a file, than doing a blind and possibly destructive purge
broom.create_scan_results_list(tracked_entries); broom.create_scan_results_list(tracked_entries);
std::cout << "[INFO] Created scan results file" << std::endl; std::cout << "[INFO] Created scan results file" << std::endl;
}
} catch(const std::exception& e) { } catch(const std::exception& e) {
std::cerr std::cerr

Loading…
Cancel
Save