Browse Source

Does not die on permission denieds; Broom class does not own tracked entries anymore

main
Unbewohnte 3 years ago
parent
commit
04f524db46
  1. 0
      COPYING
  2. 8
      build/CMakeLists.txt
  3. 103
      src/broom.cpp
  4. 39
      src/broom.hpp
  5. 0
      src/entry.cpp
  6. 2
      src/entry.hpp
  7. 0
      src/group.hpp
  8. 30
      src/main.cpp

8
build/CMakeLists.txt

@ -9,8 +9,14 @@ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
set(BUILD_SHARED_LIBS OFF) set(BUILD_SHARED_LIBS OFF)
set(CMAKE_EXE_LINKER_FLAGS "-static") set(CMAKE_EXE_LINKER_FLAGS "-static")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Werror -O2") find_package(Threads REQUIRED)
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
set(THREADS_PREFER_PTHREAD_FLAG TRUE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -Wall -Werror -O2")
set(EXECUTABLE_OUTPUT_PATH ../bin) set(EXECUTABLE_OUTPUT_PATH ../bin)
add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp) add_executable(broom ../src/main.cpp ../src/entry.cpp ../src/broom.cpp ../src/group.hpp)
target_link_libraries(broom Threads::Threads)

103
src/broom.cpp

@ -23,6 +23,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <map> #include <map>
#include <chrono> #include <chrono>
#include <stdexcept> #include <stdexcept>
#include <future>
#include "entry.hpp" #include "entry.hpp"
#include "broom.hpp" #include "broom.hpp"
@ -32,18 +33,17 @@ namespace broom {
Broom::Broom(Options options) { Broom::Broom(Options options) {
m_benchmarking = options.benchmarking; m_benchmarking = options.benchmarking;
m_sweeping = options.sweeping;
}; };
Broom::~Broom() { Broom::~Broom() {};
m_tracked_entries.clear();
};
// recursively track every file that lies in given path. Throws an invalid_argument // recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist // error in case path does not exist
void Broom::track(const std::filesystem::path path) { std::vector<entry::Entry> Broom::track(const std::filesystem::path path) {
auto t0 = std::chrono::high_resolution_clock::now(); auto t0 = std::chrono::high_resolution_clock::now();
std::vector<entry::Entry> tracked_entries;
// check if given path even exists // check if given path even exists
if (!std::filesystem::exists(path)) { if (!std::filesystem::exists(path)) {
throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !"); throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !");
@ -62,12 +62,12 @@ void Broom::track(const std::filesystem::path path) {
}; };
entry::Entry entry(dir_entry.path()); entry::Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry); tracked_entries.push_back(entry);
} }
} else if (std::filesystem::is_regular_file(path)) { } else if (std::filesystem::is_regular_file(path)) {
// just a file // just a file
entry::Entry entry(path); entry::Entry entry(path);
m_tracked_entries.push_back(entry); tracked_entries.push_back(entry);
} }
@ -80,16 +80,18 @@ void Broom::track(const std::filesystem::path path) {
<< " ms" << std::endl; << " ms" << std::endl;
} }
std::cout << "[INFO] Tracking " << m_tracked_entries.size() << " files" << std::endl; std::cout << "[INFO] Tracking " << tracked_entries.size() << " files" << std::endl;
return tracked_entries;
}; };
// removes entries with unique file sizes. Returns amount of files // removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked // that are no longer being tracked
uintmax_t Broom::m_untrack_unique_sizes() { uintmax_t Broom::untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries) {
// key: size, value: amount of occurences // key: size, value: amount of occurences
std::map<uintmax_t, uintmax_t> sizes_map; std::map<uintmax_t, uintmax_t> sizes_map;
for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end(); entry_iter++) { for (auto entry_iter = tracked_entries.begin(); entry_iter != tracked_entries.end(); entry_iter++) {
// check if size of this entry is already in the map // check if size of this entry is already in the map
// if yes --> increment occurences counter // if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
@ -104,7 +106,7 @@ uintmax_t Broom::m_untrack_unique_sizes() {
} }
uintmax_t untracked = 0; uintmax_t untracked = 0;
m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](entry::Entry entry) -> bool{ tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&untracked, sizes_map](entry::Entry entry) -> bool{
auto iter = sizes_map.find(entry.filesize); auto iter = sizes_map.find(entry.filesize);
if (iter->second == 1) { if (iter->second == 1) {
// unique // unique
@ -113,54 +115,37 @@ uintmax_t Broom::m_untrack_unique_sizes() {
}; };
return false; return false;
}), m_tracked_entries.end()); }), tracked_entries.end());
return untracked; return untracked;
}; };
// removes entries with the same content-pieces. Returns amount of // removes entries with the same content-pieces. Returns amount of
// files that are no longer being tracked // files that are no longer being tracked
uintmax_t Broom::m_untrack_unique_contents() { uintmax_t Broom::untrack_unique_contents(std::vector<entry::Entry>& tracked_entries) {
// contents, occurences // contents, occurences
std::map<std::string, uintmax_t> contents_map; std::map<std::string, uintmax_t> contents_map;
std::map<std::string, uintmax_t>::iterator map_iter; std::map<std::string, uintmax_t>::iterator map_iter;
for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) { for (entry::Entry& entry : tracked_entries) {
// the same logic: // the same logic:
// check if contents of this entry are already in the map // check if contents of this entry are already in the map
// if yes --> increment occurences counter // if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1
map_iter = contents_map.find(entry.pieces);
if (entry_iter->filesize == 0) {
// that`s an empty file. Skip it
entry_iter++;
continue;
}
try{
// can get "permission denied" when opening file
entry_iter->get_pieces();
} catch(const std::ifstream::failure& e) {
// there is nothing we can do. Untrack this entry
entry_iter = m_tracked_entries.erase(entry_iter);
continue;
}
map_iter = contents_map.find(entry_iter->pieces);
if (map_iter == contents_map.end()) { if (map_iter == contents_map.end()) {
// add it to the map // add it to the map
contents_map.insert({entry_iter->pieces, 1}); contents_map.insert({entry.pieces, 1});
// std::cout << "First time seeing this piece: " << entry_iter->pieces << std::endl;
} else { } else {
// increment occurences counter // increment occurences counter
contents_map[map_iter->first]++; contents_map[map_iter->first]++;
} }
entry_iter++;
}; };
uintmax_t untracked = 0; uintmax_t untracked = 0;
m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, contents_map](entry::Entry entry) -> bool { tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [&untracked, contents_map](entry::Entry entry) -> bool {
auto iter = contents_map.find(entry.pieces); auto iter = contents_map.find(entry.pieces);
if (iter->second == 1) { if (iter->second == 1) {
// unique // unique
@ -169,21 +154,21 @@ uintmax_t Broom::m_untrack_unique_contents() {
} else { } else {
return false; return false;
} }
}), m_tracked_entries.end()); }), tracked_entries.end());
return untracked; return untracked;
}; };
// finds all duplicates among tracked entries and marks them with appropriate group. // finds all duplicates among tracked entries and marks them with appropriate group.
// Returns amount of duplicate files // Returns amount of duplicate files.
uintmax_t Broom::m_find_duplicates() { uintmax_t Broom::find_duplicates(std::vector<entry::Entry>& tracked_entries) {
auto t0 = std::chrono::high_resolution_clock::now(); auto t0 = std::chrono::high_resolution_clock::now();
// print how many files are being tracked // print how many files are being tracked
uintmax_t global_untracked = m_tracked_entries.size(); uintmax_t global_untracked = tracked_entries.size();
// untrack by size // untrack by size
uintmax_t untracked_by_size = m_untrack_unique_sizes(); uintmax_t untracked_by_size = untrack_unique_sizes(tracked_entries);
global_untracked += untracked_by_size; global_untracked += untracked_by_size;
std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl; std::cout << "[INFO] Untracked " << untracked_by_size << " unique sizes" << std::endl;
@ -196,8 +181,19 @@ uintmax_t Broom::m_find_duplicates() {
<< " ms" << std::endl; << " ms" << std::endl;
} }
// get pieces for each entry. If error occurs (permission denied) - untrack it
tracked_entries.erase(std::remove_if(tracked_entries.begin(), tracked_entries.end(), [](entry::Entry& entry) -> bool {
try {
entry.get_pieces();
return false;
} catch(...) {
return true;
}
}), tracked_entries.end());
// untrack by contents // untrack by contents
uintmax_t untracked_by_contents = m_untrack_unique_contents(); uintmax_t untracked_by_contents = untrack_unique_contents(tracked_entries);
global_untracked += untracked_by_contents; global_untracked += untracked_by_contents;
auto contents_untrack_time = std::chrono::high_resolution_clock::now(); auto contents_untrack_time = std::chrono::high_resolution_clock::now();
@ -211,11 +207,11 @@ uintmax_t Broom::m_find_duplicates() {
std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl; std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl;
std::cout << "[INFO] Found " << m_tracked_entries.size() << " possible duplicate files" << std::endl; std::cout << "[INFO] Found " << tracked_entries.size() << " possible duplicate files" << std::endl;
// mark duplicate entries // mark duplicate entries
for (entry::Entry& entry : m_tracked_entries) { for (entry::Entry& entry : tracked_entries) {
if (entry.group == group::EMPTY) { if (entry.group == group::EMPTY) {
// do not mess up grouping // do not mess up grouping
continue; continue;
@ -223,11 +219,11 @@ uintmax_t Broom::m_find_duplicates() {
entry.group = group::DUPLICATE; entry.group = group::DUPLICATE;
} }
return m_tracked_entries.size(); return tracked_entries.size();
}; };
// creates a list of duplicate, empty files into a file // creates a list of duplicate, empty files into a file
void Broom::create_scan_results_list(const std::filesystem::path dir, const std::string filename) { void Broom::create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir, const std::string filename) {
if (!std::filesystem::exists(dir)) { if (!std::filesystem::exists(dir)) {
// create it then // create it then
bool created = std::filesystem::create_directories(dir); bool created = std::filesystem::create_directories(dir);
@ -242,7 +238,7 @@ void Broom::create_scan_results_list(const std::filesystem::path dir, const std:
throw "Could not create a scan results file"; throw "Could not create a scan results file";
} }
for (const entry::Entry entry : m_tracked_entries) { for (const entry::Entry entry : tracked_entries) {
// log every entry and its group // log every entry and its group
if (entry.group == group::EMPTY) { if (entry.group == group::EMPTY) {
outfile << entry.path << " --- is an empty file" << std::endl; outfile << entry.path << " --- is an empty file" << std::endl;
@ -258,9 +254,9 @@ void Broom::create_scan_results_list(const std::filesystem::path dir, const std:
// finds empty files among tracked entries and gives them appropriate group // finds empty files among tracked entries and gives them appropriate group
// Returns amount of found empty files // Returns amount of found empty files
uintmax_t Broom::m_find_empty_files() { uintmax_t Broom::find_empty_files(std::vector<entry::Entry>& tracked_entries) {
uintmax_t found_empty_files = 0; uintmax_t found_empty_files = 0;
for (entry::Entry& entry : m_tracked_entries) { for (entry::Entry& entry : tracked_entries) {
if (entry.filesize == 0) { if (entry.filesize == 0) {
// empty files can`t be considered as duplicates. assign a group // empty files can`t be considered as duplicates. assign a group
entry.group = group::EMPTY; entry.group = group::EMPTY;
@ -273,15 +269,4 @@ uintmax_t Broom::m_find_empty_files() {
return found_empty_files; return found_empty_files;
}; };
// scans directory for duplicates and empty files
void Broom::scan() {
m_find_empty_files();
m_find_duplicates();
};
// remove ALL duplicate files
void Broom::sweep() {
};
} }

39
src/broom.hpp

@ -36,44 +36,33 @@ class Broom {
protected: protected:
// enable/disable benchmarking output // enable/disable benchmarking output
bool m_benchmarking; bool m_benchmarking;
// TODO(think about how sweeping should work)
bool m_sweeping;
// paths to tracked files public:
std::vector<entry::Entry> m_tracked_entries; Broom(Options options);
~Broom();
// finds empty files among tracked entries. // recursively tracks every file that lies in given path. Throws an invalid_argument
// error in case path does not exist. Returns collected entries
std::vector<entry::Entry> track(const std::filesystem::path path);
// finds empty files among tracked entries and marks them with the appropriate group.
// Returns amount of found empty files // Returns amount of found empty files
uintmax_t m_find_empty_files(); uintmax_t find_empty_files(std::vector<entry::Entry>& tracked_entries);
// removes entries with unique file sizes. Returns amount of files // removes entries with unique file sizes. Returns amount of files
// that are no longer being tracked // that are no longer being tracked
uintmax_t m_untrack_unique_sizes(); uintmax_t untrack_unique_sizes(std::vector<entry::Entry>& tracked_entries);
// removes entries with the same content-pieces. Returns amount of // removes entries with the same content-pieces. Returns amount of
// files that are no longer being tracked // files that are no longer being tracked.
uintmax_t m_untrack_unique_contents(); uintmax_t untrack_unique_contents(std::vector<entry::Entry>& tracked_entries);
// finds all duplicates among tracked entries and marks them with appropriate group // finds all duplicates among tracked entries and marks them with appropriate group
// Returns amount of duplicate files // Returns amount of duplicate files
uintmax_t m_find_duplicates(); uintmax_t find_duplicates(std::vector<entry::Entry>& tracked_entries);
public:
Broom(Options options);
~Broom();
// recursively track every file that lies in given path. Throws an invalid_argument
// error in case path does not exist
void track(const std::filesystem::path path);
// creates a list of duplicate, empty files into a file // creates a list of duplicate, empty files into a file
void create_scan_results_list(const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt"); void create_scan_results_list(const std::vector<entry::Entry> tracked_entries, const std::filesystem::path dir = ".", const std::string filename = "scan_results.txt");
// TODO
void sweep();
// scans tracked entries for duplicates and empty files
void scan();
}; };
} }

0
src/entry.cpp

2
src/entry.hpp

@ -30,7 +30,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
namespace entry { namespace entry {
// 3 pieces (beginning, middle and end of the file) // 3 pieces (beginning, middle and end of the file)
const uint8_t PIECE_SIZE = 6; const uint8_t PIECE_SIZE = 75;
const uint8_t PIECES_AMOUNT = 3; const uint8_t PIECES_AMOUNT = 3;
// A wrapper for every file in filesystem with all necessary information // A wrapper for every file in filesystem with all necessary information

0
src/group.hpp

30
src/main.cpp

@ -21,6 +21,7 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <stdexcept> #include <stdexcept>
#include <string.h> #include <string.h>
#include <vector> #include <vector>
#include <future>
#include "entry.hpp" #include "entry.hpp"
#include "broom.hpp" #include "broom.hpp"
@ -98,24 +99,23 @@ int main(int argc, char* argv[]) {
broom::Broom broom(options); broom::Broom broom(options);
try { try {
broom.track(tracked_path); std::vector<entry::Entry> tracked_entries = broom.track(tracked_path);
broom.scan(); broom.find_empty_files(tracked_entries);
broom.create_scan_results_list();
} catch(const std::invalid_argument& e) {
std::cerr
<< "[ERROR] Invalid argument: " << std::endl
<< e.what() << std::endl;
return 1;
} catch(const std::filesystem::filesystem_error& e) { // get contents for each entry first
std::cerr //auto handle = std::async(std::launch::async, [&tracked_entries]() {
<< "[ERROR] FS error: " << std::endl // for (entry::Entry& e : tracked_entries) {
<< e.what() << std::endl; // e.get_pieces();
return 1; // }
//});
//broom.untrack_unique_contents(tracked_entries);
broom.find_duplicates(tracked_entries);
} catch(...) { broom.create_scan_results_list(tracked_entries);
} catch(const std::exception& e) {
std::cerr std::cerr
<< "[ERROR] Unexpected exception" << std::endl; << "[ERROR] " << e.what() << std::endl;
return 1; return 1;
}; };

Loading…
Cancel
Save