Browse Source

Check entry contents in the middle and in the end of the file for less false-positives; though it is still not good enough

main
Unbewohnte 3 years ago
parent
commit
e3a483f4e4
  1. 8
      src/broom.cpp
  2. 17
      src/entry.cpp
  3. 8
      src/entry.hpp
  4. 20
      src/main.cpp

8
src/broom.cpp

@ -49,18 +49,20 @@ void Broom::track(const std::filesystem::path path) {
if (std::filesystem::is_directory(path)) {
// it`s a directory. Track every regular file recursively
std::filesystem::directory_options options = (
std::filesystem::directory_options::skip_permission_denied
std::filesystem::directory_options::skip_permission_denied
);
for (auto dir_entry : std::filesystem::recursive_directory_iterator(path, options)) {
if (!dir_entry.is_regular_file()) {
// skip everything that we cannot process so easily
continue;
};
Entry entry(dir_entry.path());
m_tracked_entries.push_back(entry);
};
}
} else if (std::filesystem::is_regular_file(path)) {
// just a file
Entry entry(path);
m_tracked_entries.push_back(entry);
}
@ -127,7 +129,7 @@ uintmax_t Broom::untrack_unique_contents() {
for (auto entry_iter = m_tracked_entries.begin(); entry_iter != m_tracked_entries.end();) {
// the same logic:
// check if contents of this entry is already in the map
// check if contents of this entry are already in the map
// if yes --> increment occurences counter
// if not --> add it to the map with a counter of 1

17
src/entry.cpp

@ -37,7 +37,8 @@ void Entry::get_size() {
filesize = std::filesystem::file_size(path);
};
// reads 2 pieces from the beginning and the end of a file, converts them into
// reads 2 pieces from the middle and the end of a file, converts them into
// a convenient hex-encoded string
void Entry::get_pieces() {
std::fstream entry_file;
@ -47,18 +48,20 @@ void Entry::get_pieces() {
throw std::ifstream::failure("Could not open \"" + path.string() + "\"; reason: " + std::string(std::strerror(errno)) + "\n");
}
// TODO(Properly test it)
char pieces_buffer[PIECE_SIZE * 2];
if (filesize <= PIECE_SIZE * 2) {
// can`t take whole 2 pieces !
// read the whole file then
entry_file.read(pieces_buffer, filesize);
} else {
// read CHUNK_SIZE bytes from the beginning of the file
char start_buf[PIECE_SIZE];
entry_file.read(start_buf, PIECE_SIZE);
uintmax_t middle_of_the_file = (double) filesize / 2.0 - PIECE_SIZE;
entry_file.seekg(middle_of_the_file, std::ios::beg);
// read CHUNK_SIZE bytes from the middle of the file
char middle_buf[PIECE_SIZE];
entry_file.read(middle_buf, PIECE_SIZE);
for (uint8_t i = 0; i < PIECE_SIZE; i++) {
pieces_buffer[i] = start_buf[i];
pieces_buffer[i] = middle_buf[i];
};
// jump to the last CHUNK_SIZE bytes of the file and read the as well
@ -78,6 +81,8 @@ void Entry::get_pieces() {
};
pieces = pieces_hex.str();
std::cout << pieces << std::endl;
};
// Remove entry from the disk

8
src/entry.hpp

@ -25,15 +25,15 @@ along with broom. If not, see <https://www.gnu.org/licenses/>.
#include <sstream>
#include <iomanip>
// 2 pieces (beginning and end of the file)
const uint8_t PIECE_SIZE = 24;
// 2 pieces (middle and end of the file)
const uint8_t PIECE_SIZE = 16;
// A wrapper for every file with all necessary information
class Entry {
public:
std::filesystem::path path;
uintmax_t filesize;
std::string pieces; // 2 hex-represented pieces of file (beginning and end)
std::string pieces; // 2 hex-represented pieces of file
Entry(const std::filesystem::path entry_path);
~Entry();
@ -41,7 +41,7 @@ public:
// sets this entry`s filesize
void get_size();
// reads 2 pieces from the beginning and the end of a file, converts them into
// reads 2 pieces from the middle and the end of a file, converts them into
// a convenient hex-encoded string
void get_pieces();

20
src/main.cpp

@ -88,21 +88,33 @@ int main(int argc, char* argv[]) {
};
// no path was specified at all
if (tracked_path.string() == "") {
if (tracked_path.empty()) {
print_help();
return 1;
};
Broom broom(options);
try {
broom.track(tracked_path);
broom.find_duplicates();
} catch(const std::invalid_argument& e) {
std::cout << e.what() << std::endl;
std::cerr
<< "[ERROR] Invalid argument: " << std::endl
<< e.what() << std::endl;
return 1;
} catch(const std::filesystem::filesystem_error& e) {
std::cerr
<< "[ERROR] FS error: " << std::endl
<< e.what() << std::endl;
return 1;
} catch(...) {
std::cerr
<< "[ERROR] Unexpected exception" << std::endl;
return 1;
};
broom.find_duplicates();
return 0;
};

Loading…
Cancel
Save