/* Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) This file is part of broom. broom is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. broom is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with broom. If not, see . */ #include #include #include #include #include "entry.hpp" #include "broom.hpp" Broom::Broom(Options options) { m_benchmarking = options.benchmarking; m_sweeping = options.sweeping; }; Broom::~Broom() {}; // Print current statistics void Broom::print_statistics() { std::cout << "| sweeped " << m_sweeped_files << " files" << std::endl << "| with a total size of " << m_sweeped_size << " bytes" << std::endl << std::endl; }; // get all files from path recursively and track them void Broom::track(const std::filesystem::path dir) { auto t0 = std::chrono::high_resolution_clock::now(); std::filesystem::directory_options options = ( std::filesystem::directory_options::follow_directory_symlink | std::filesystem::directory_options::skip_permission_denied ); for (std::filesystem::directory_entry dir_entry : std::filesystem::recursive_directory_iterator(dir, options)) { if (dir_entry.is_directory()) { continue; }; Entry entry(dir_entry.path()); m_tracked_entries.push_back(entry); }; if (m_benchmarking) { auto tracking_time = std::chrono::high_resolution_clock::now(); std::cout << "Tracking took " << std::chrono::duration_cast(tracking_time - t0).count() << " ms" << std::endl; }; }; // removes entries with unique file sizes. Returns amount of files // that are no longer being tracked uintmax_t Broom::untrack_unique_sizes() { // key: size, value: amount of occurences std::map sizes_map; for (Entry entry : m_tracked_entries) { // check if size of this entry is already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 auto iterator = sizes_map.find(entry.filesize); if (iterator == sizes_map.end()) { // there is no such size sizes_map.insert({entry.filesize, 1}); } else { // there is such size sizes_map[iterator->first]++; }; }; uintmax_t untracked = 0; std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{ auto iter = sizes_map.find(entry.filesize); if (iter->second == 1) { // unique untracked++; return true; }; // std::cout << "duplicate fsize: " << iter->first << " occurences: " << iter->second << std::endl; return false; }); return untracked; }; // removes entries with unique first and last 20 bytes. Returns amount of // files that are no longer being tracked // uintmax_t Broom::untrack_unique_contents() { // // contents, occurences // std::map contents_map; // std::map::iterator iterator; // // for (Entry& entry : m_tracked_entries) { // // the same logic: // // check if contents of this entry is already in the map // // if yes --> increment occurences counter // // if not --> add it to the map with a counter of 1 // // iterator = contents_map.find(entry.checksum); // // if (iterator == contents_map.end()) { // // add it to the map // contents_map.insert(std::pair(entry.checksum, 1)); // } else { // // increment occurences counter // uintmax_t occurences = contents_map[iterator->first]; // contents_map[iterator->first] = occurences++; // }; // }; // // uintmax_t untracked = 0; // for (std::pair contents_entry : contents_map) { // if (contents_entry.second > 1) { // // not a unique size. Keep such entries // } else { // // a unique one. Untrack such an entry // std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [contents_entry](Entry e) -> bool { // return (e.compare_checksums(contents_entry.first)); // }); // untracked++; // }; // }; // // return untracked; // }; // find all duplicates among tracked entries, stop tracking uniques void Broom::find_duplicates() { if (m_benchmarking) { auto t0 = std::chrono::high_resolution_clock::now(); untrack_unique_sizes(); auto sizes_untrack_time = std::chrono::high_resolution_clock::now(); std::cout << "Untracking by size took " << std::chrono::duration_cast(sizes_untrack_time - t0).count() << " ms" << std::endl << std::endl; } else { size_t startsize = m_tracked_entries.size(); std::cout << "Tracking " << startsize << std::endl; uintmax_t global_untracked = 0; // uintmax_t untracked_by_contents = untrack_unique_contents(); // global_untracked += untracked_by_contents; // std::cout << "Untracked " << untracked_by_contents << " unique contents" << std::endl; uintmax_t untracked_by_size = untrack_unique_sizes(); global_untracked += untracked_by_size; std::cout << "Untracked " << untracked_by_size << " unique sizes" << std::endl; std::cout << "Duplicates: " << startsize - global_untracked << std::endl; }; }; // remove ALL duplicate files void Broom::sweep_all() { };