From e3a483f4e420293968f37f1359d00fe8343725fc Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sat, 8 Jan 2022 12:38:56 +0300 Subject: [PATCH] Check entry contents in the middle and in the end of the file for less false-positives; though it is still not good enough --- src/broom.cpp | 8 +++++--- src/entry.cpp | 17 +++++++++++------ src/entry.hpp | 8 ++++---- src/main.cpp | 20 ++++++++++++++++---- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/broom.cpp b/src/broom.cpp index b03fbb5..64933d6 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -49,18 +49,20 @@ void Broom::track(const std::filesystem::path path) { if (std::filesystem::is_directory(path)) { // it`s a directory. Track every regular file recursively std::filesystem::directory_options options = ( - std::filesystem::directory_options::skip_permission_denied + std::filesystem::directory_options::skip_permission_denied ); for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) { if (!dir_entry.is_regular_file()) { + // skip everything that we cannot process so easily continue; }; Entry entry(dir_entry.path()); m_tracked_entries.push_back(entry); - }; + } } else if (std::filesystem::is_regular_file(path)) { + // just a file Entry entry(path); m_tracked_entries.push_back(entry); } @@ -127,7 +129,7 @@ uintmax_t Broom::untrack_unique_contents() { for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) { // the same logic: - // check if contents of this entry is already in the map + // check if contents of this entry are already in the map // if yes --> increment occurences counter // if not --> add it to the map with a counter of 1 diff --git a/src/entry.cpp b/src/entry.cpp index 68943fb..034ceb7 100644 --- a/src/entry.cpp +++ b/src/entry.cpp @@ -37,7 +37,8 @@ void Entry::get_size() { filesize = std::filesystem::file_size(path); }; -// reads 2 pieces from the beginning and the end of a file, converts them into + +// reads 2 pieces from the middle and the end of a file, converts them into // a convenient hex-encoded string void Entry::get_pieces() { std::fstream entry_file; @@ -47,18 +48,20 @@ void Entry::get_pieces() { throw std::ifstream::failure("Could not open \"" + path.string() + "\"; reason: " + std::string(std::strerror(errno)) + "\n"); } - // TODO(Properly test it) char pieces_buffer[PIECE_SIZE * 2]; if (filesize <= PIECE_SIZE * 2) { // can`t take whole 2 pieces ! // read the whole file then entry_file.read(pieces_buffer, filesize); } else { - // read CHUNK_SIZE bytes from the beginning of the file - char start_buf[PIECE_SIZE]; - entry_file.read(start_buf, PIECE_SIZE); + uintmax_t middle_of_the_file = (double) filesize / 2.0 - PIECE_SIZE; + + entry_file.seekg(middle_of_the_file, std::ios::beg); + // read CHUNK_SIZE bytes from the middle of the file + char middle_buf[PIECE_SIZE]; + entry_file.read(middle_buf, PIECE_SIZE); for (uint8_t i = 0; i < PIECE_SIZE; i++) { - pieces_buffer[i] = start_buf[i]; + pieces_buffer[i] = middle_buf[i]; }; // jump to the last CHUNK_SIZE bytes of the file and read the as well @@ -78,6 +81,8 @@ void Entry::get_pieces() { }; pieces = pieces_hex.str(); + + std::cout << pieces << std::endl; }; // Remove entry from the disk diff --git a/src/entry.hpp b/src/entry.hpp index d73ac96..4ee69fe 100644 --- a/src/entry.hpp +++ b/src/entry.hpp @@ -25,15 +25,15 @@ along with broom. If not, see . #include #include -// 2 pieces (beginning and end of the file) -const uint8_t PIECE_SIZE = 24; +// 2 pieces (middle and end of the file) +const uint8_t PIECE_SIZE = 16; // A wrapper for every file with all necessary information class Entry { public: std::filesystem::path path; uintmax_t filesize; - std::string pieces; // 2 hex-represented pieces of file (beginning and end) + std::string pieces; // 2 hex-represented pieces of file Entry(const std::filesystem::path entry_path); ~Entry(); @@ -41,7 +41,7 @@ public: // sets this entry`s filesize void get_size(); - // reads 2 pieces from the beginning and the end of a file, converts them into + // reads 2 pieces from the middle and the end of a file, converts them into // a convenient hex-encoded string void get_pieces(); diff --git a/src/main.cpp b/src/main.cpp index d39c846..429b2ec 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -88,21 +88,33 @@ int main(int argc, char* argv[]) { }; // no path was specified at all - if (tracked_path.string() == "") { + if (tracked_path.empty()) { print_help(); return 1; }; Broom broom(options); - try { broom.track(tracked_path); + broom.find_duplicates(); } catch(const std::invalid_argument& e) { - std::cout << e.what() << std::endl; + std::cerr + << "[ERROR] Invalid argument: " << std::endl + << e.what() << std::endl; + return 1; + + } catch(const std::filesystem::filesystem_error& e) { + std::cerr + << "[ERROR] FS error: " << std::endl + << e.what() << std::endl; + return 1; + + } catch(...) { + std::cerr + << "[ERROR] Unexpected exception" << std::endl; return 1; }; - broom.find_duplicates(); return 0; };