Browse Source

Check entry contents in the middle and in the end of the file for less false-positives; though it is still not good enough

main
Unbewohnte 3 years ago
parent
commit
e3a483f4e4
  1. 6
      src/broom.cpp
  2. 17
      src/entry.cpp
  3. 8
      src/entry.hpp
  4. 20
      src/main.cpp

6
src/broom.cpp

@ -54,13 +54,15 @@ void Broom::track(const std::filesystem::path path) {
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) { for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
if (!dir_entry.is_regular_file()) { if (!dir_entry.is_regular_file()) {
// skip everything that we cannot process so easily
continue; continue;
}; };
Entry entry(dir_entry.path()); Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry); m_tracked_entries.push_back(entry);
}; }
} else if (std::filesystem::is_regular_file(path)) { } else if (std::filesystem::is_regular_file(path)) {
// just a file
Entry entry(path); Entry entry(path);
m_tracked_entries.push_back(entry); m_tracked_entries.push_back(entry);
} }
@ -127,7 +129,7 @@ uintmax_t Broom::untrack_unique_contents() {
for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) { for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) {
// the same logic: // the same logic:
// check if contents of this entry is already in the map // check if contents of this entry are already in the map
// if yes --> increment occurences counter // if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1 // if not --> add it to the map with a counter of 1

17
src/entry.cpp

@ -37,7 +37,8 @@ void Entry::get_size() {
filesize = std::filesystem::file_size(path); filesize = std::filesystem::file_size(path);
}; };
// reads 2 pieces from the beginning and the end of a file, converts them into
// reads 2 pieces from the middle and the end of a file, converts them into
// a convenient hex-encoded string // a convenient hex-encoded string
void Entry::get_pieces() { void Entry::get_pieces() {
std::fstream entry_file; std::fstream entry_file;
@ -47,18 +48,20 @@ void Entry::get_pieces() {
throw std::ifstream::failure("Could not open \"" + path.string() + "\"; reason: " + std::string(std::strerror(errno)) + "\n"); throw std::ifstream::failure("Could not open \"" + path.string() + "\"; reason: " + std::string(std::strerror(errno)) + "\n");
} }
// TODO(Properly test it)
char pieces_buffer[PIECE_SIZE * 2]; char pieces_buffer[PIECE_SIZE * 2];
if (filesize <= PIECE_SIZE * 2) { if (filesize <= PIECE_SIZE * 2) {
// can`t take whole 2 pieces ! // can`t take whole 2 pieces !
// read the whole file then // read the whole file then
entry_file.read(pieces_buffer, filesize); entry_file.read(pieces_buffer, filesize);
} else { } else {
// read CHUNK_SIZE bytes from the beginning of the file uintmax_t middle_of_the_file = (double) filesize / 2.0 - PIECE_SIZE;
char start_buf[PIECE_SIZE];
entry_file.read(start_buf, PIECE_SIZE); entry_file.seekg(middle_of_the_file, std::ios::beg);
// read CHUNK_SIZE bytes from the middle of the file
char middle_buf[PIECE_SIZE];
entry_file.read(middle_buf, PIECE_SIZE);
for (uint8_t i = 0; i < PIECE_SIZE; i++) { for (uint8_t i = 0; i < PIECE_SIZE; i++) {
pieces_buffer[i] = start_buf[i]; pieces_buffer[i] = middle_buf[i];
}; };
// jump to the last CHUNK_SIZE bytes of the file and read the as well // jump to the last CHUNK_SIZE bytes of the file and read the as well
@ -78,6 +81,8 @@ void Entry::get_pieces() {
}; };
pieces = pieces_hex.str(); pieces = pieces_hex.str();
std::cout << pieces << std::endl;
}; };
// Remove entry from the disk // Remove entry from the disk

8
src/entry.hpp

@ -25,15 +25,15 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <sstream> #include <sstream>
#include <iomanip> #include <iomanip>
// 2 pieces (beginning and end of the file) // 2 pieces (middle and end of the file)
const uint8_t PIECE_SIZE = 24; const uint8_t PIECE_SIZE = 16;
// A wrapper for every file with all necessary information // A wrapper for every file with all necessary information
class Entry { class Entry {
public: public:
std::filesystem::path path; std::filesystem::path path;
uintmax_t filesize; uintmax_t filesize;
std::string pieces; // 2 hex-represented pieces of file (beginning and end) std::string pieces; // 2 hex-represented pieces of file
Entry(const std::filesystem::path entry_path); Entry(const std::filesystem::path entry_path);
~Entry(); ~Entry();
@ -41,7 +41,7 @@ public:
// sets this entry`s filesize // sets this entry`s filesize
void get_size(); void get_size();
// reads 2 pieces from the beginning and the end of a file, converts them into // reads 2 pieces from the middle and the end of a file, converts them into
// a convenient hex-encoded string // a convenient hex-encoded string
void get_pieces(); void get_pieces();

20
src/main.cpp

@ -88,21 +88,33 @@ int main(int argc, char* argv[]) {
}; };
// no path was specified at all // no path was specified at all
if (tracked_path.string() == "") { if (tracked_path.empty()) {
print_help(); print_help();
return 1; return 1;
}; };
Broom broom(options); Broom broom(options);
try { try {
broom.track(tracked_path); broom.track(tracked_path);
broom.find_duplicates();
} catch(const std::invalid_argument& e) { } catch(const std::invalid_argument& e) {
std::cout << e.what() << std::endl; std::cerr
<< "[ERROR] Invalid argument: " << std::endl
<< e.what() << std::endl;
return 1;
} catch(const std::filesystem::filesystem_error& e) {
std::cerr
<< "[ERROR] FS error: " << std::endl
<< e.what() << std::endl;
return 1;
} catch(...) {
std::cerr
<< "[ERROR] Unexpected exception" << std::endl;
return 1; return 1;
}; };
broom.find_duplicates();
return 0; return 0;
}; };

Loading…
Cancel
Save