From 76d1cb49a666183839d5326af3d3659da8d80c66 Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Sat, 25 Dec 2021 00:25:11 +0300 Subject: [PATCH] Split in files, removed unnecessary dupfinder class because broom will do all the work --- build/CMakeLists.txt | 2 +- src/broom.cpp | 203 +++++-------------------------------------- src/broom.hpp | 54 ++++++++++++ src/entry.cpp | 89 +++++++++++++++++++ src/entry.hpp | 50 +++++++++++ src/main.cpp | 108 +++++++++++++++++++++++ 6 files changed, 322 insertions(+), 184 deletions(-) create mode 100644 src/broom.hpp create mode 100644 src/entry.cpp create mode 100644 src/entry.hpp create mode 100644 src/main.cpp diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 1eef19b..23c0d36 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -13,4 +13,4 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Werror -O2") set(EXECUTABLE_OUTPUT_PATH ../bin) -add_executable(broom ../src/broom.cpp) +add_executable(broom ../src/main.cpp) diff --git a/src/broom.cpp b/src/broom.cpp index bb276b2..ca7fb44 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -17,216 +17,53 @@ You should have received a copy of the GNU General Public License along with broom. If not, see . */ -#include -#include -#include -#include -#include -#include +#include "entry.hpp" -// Broom version number -#define VERSION "v0.1.0" - -// Reason why files are considered as duplicates -enum DuplicateBy { - CHECKSUM, - FILENAME, - FILESIZE, -}; - -// 3 chunks (beginning, end, middle of the file) -const uint8_t CHUNK_SIZE = 24; -const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 3; - -// A wrapper for every file with all necessary information -class Entry { -public: - Entry(std::filesystem::path path) { - // check for existense and being a directory - if (!std::filesystem::exists(path) || std::filesystem::is_directory(path)) { - throw "Does not exist or a directory"; - }; - - // filename - filename = path.filename(); - - // filesize - filesize = std::filesystem::file_size(path); - - // checksum - std::fstream entry_file; - entry_file.open(path); - - if (!entry_file.is_open()) { - throw "Could not open file"; - } - - char start_buf[CHUNK_SIZE]; - entry_file.read(start_buf, CHUNK_SIZE); - - char end_buf[CHUNK_SIZE]; - entry_file.read(end_buf, CHUNK_SIZE); - - char middle_buf[CHUNK_SIZE]; - entry_file.read(middle_buf, CHUNK_SIZE); - - for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { - if (i < CHUNK_SIZE) { - checksum[i] = start_buf[i]; - } - else if (i > CHUNK_SIZE*2) { - checksum[i] = middle_buf[i-(CHUNK_SIZE*2)]; - } - else if (i > CHUNK_SIZE) { - checksum[i] = end_buf[i - CHUNK_SIZE]; - } - }; - }; - - ~Entry() {}; - - std::string filename; - std::filesystem::path path; - uintmax_t filesize; - char checksum[CHECKSUM_SIZE]; - - // Compare this entry`s checksum with the other one. - // If the checksums are the same -> returns true, else -> false - bool compare_checksums(char other_checksum[CHECKSUM_SIZE]) { - for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { - if (checksum[i] != other_checksum[i]) { - return false; - }; - }; - return true; - }; - - // Remove entity from the disk - void remove() { - std::filesystem::remove(path); - }; -}; - -// A class to manage duplicate files +// A class to find and manage duplicate files class Broom { protected: + // how many files has been "sweeped" + uintmax_t m_sweeped_files; + // how many bytes was freed + uintmax_t m_sweeped_size; public: Broom() {}; ~Broom() {}; - // remove ALL duplicate files - int sweep_all(Entry entries[]) { - return 0; - }; - - // remove ALL duplicates but the one with specified index - int sweep_all_but(Entry entries[], uint32_t index = 0) { - return 0; + // Print current statistics + void print_statistics() { + std::cout + << "| sweeped " << m_sweeped_files << " files" << std::endl + << "| with a total size of " << m_sweeped_size << " bytes" << std::endl + << std::endl; }; -}; -// A class that wraps in itself ways of locating duplicate files in -// a filesystem -class DupSeeker { -protected: // Determines whether entry1 is a duplicate of entry2 bool is_duplicate(Entry entry1, Entry entry2) { if (entry1.path == entry2.path) { // well, it`s the same file we`re talking about return false; - } else if (entry1.compare_checksums(entry2.checksum)) { - // the same checksums. Definitely a duplicate - return true; - }else if (entry1.filename == entry2.filename) { - // probably a duplicate - return true; - } else if (entry1.filesize == entry2.filesize) { - // probably a duplicate + } + else if (entry1.compare_checksums(entry2.checksum)) { return true; - }; + } return false; }; -public: - DupSeeker() {}; - ~DupSeeker() {}; - // find all duplicates in the directory int find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive = false) { return 0; }; -}; - -// Broom`s settings -struct Options { - bool sweeping; - std::vector paths; -}; - -void print_help() { - std::cout - << "broom [FLAGS..] [COMMAND] [FILES|DIRECTORIES...]" << std::endl << std::endl - << "FLAGS" << std::endl - << "-v | --version -> print version information and exit" << std::endl - << "-h | --help -> print this message and exit" << std::endl << std::endl - << "COMMANDS" << std::endl - << "sweep -> scan for duplicate files and delete (sweep) all of them but the last one" << std::endl - << "scan -> scan for duplicate files and output information in a file" << std::endl - << std::endl; -}; - -void print_version() { - std::cout - << "broom " << VERSION << std::endl - << "a command line utility to locate and manage duplicate files" << std::endl << std::endl - << "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl - << "This program comes with ABSOLUTELY NO WARRANTY." << std::endl - << "This is free software, and you are welcome to redistribute it" << std::endl - << "under certain conditions" << std::endl - << std::endl; -}; - -int main(int argc, char* argv[]) { - Options options; - if (argc < 2) { - print_help(); + // remove ALL duplicate files + int sweep_all(Entry entries[]) { return 0; }; - // process command line arguments - for (unsigned int i = 0; i < argc; i++) { - // flags -> command -> directories&&files - - if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { - print_help(); - return 0; - } - else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { - print_version(); - return 0; - } - else if (strcmp(argv[i], "sweep") == 0) { - options.sweeping = true; - } - else if (strcmp(argv[i], "scan") == 0) { - options.sweeping = false; - } - else { - // add path - if (i == 0) { - continue; - } else { - options.paths.push_back(argv[i]); - } - }; - }; - - for (uint32_t i = 0; i < options.paths.size(); i++) { - std::cout << options.paths.at(i) << std::endl; + // remove ALL duplicates but the one with specified index + int sweep_all_but(Entry entries[], uint32_t index = 0) { + return 0; }; - - return 0; }; diff --git a/src/broom.hpp b/src/broom.hpp new file mode 100644 index 0000000..1d72b3b --- /dev/null +++ b/src/broom.hpp @@ -0,0 +1,54 @@ +/* +Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) + +This file is part of broom. + +broom is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +broom is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with broom. If not, see . +*/ + +# ifndef BROOM_HPP +# define BROOM_HPP + +#include +#include + +// A class to find and manage duplicate files +class Broom { +protected: + // how many files has been "sweeped" + uintmax_t m_sweeped_files; + // how many bytes was freed + uintmax_t m_sweeped_size; + +public: + Broom() {}; + ~Broom() {}; + + // Print current statistics + void print_statistics(); + + // Determines whether entry1 is a duplicate of entry2 + bool is_duplicate(Entry entry1, Entry entry2); + + // find all duplicates in the directory + int find_duplicates(std::filesystem::path directory, Entry entries[], bool recursive = false); + + // remove ALL duplicate files + int sweep_all(Entry entries[]); + + // remove ALL duplicates but the one with specified index + int sweep_all_but(Entry entries[], uint32_t index = 0); +}; + +# endif diff --git a/src/entry.cpp b/src/entry.cpp new file mode 100644 index 0000000..487cb96 --- /dev/null +++ b/src/entry.cpp @@ -0,0 +1,89 @@ +/* +Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) + +This file is part of broom. + +broom is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +broom is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with broom. If not, see . +*/ + +#include "entry.hpp" + +// A wrapper for every file with all necessary information +class Entry { +public: + Entry(std::filesystem::path path) { + // check for existense and being a directory + if (!std::filesystem::exists(path) || std::filesystem::is_directory(path)) { + throw "Does not exist or a directory"; + }; + + // filename + filename = path.filename(); + + // filesize + filesize = std::filesystem::file_size(path); + + // checksum + std::fstream entry_file; + entry_file.open(path); + + if (!entry_file.is_open()) { + throw "Could not open file"; + } + + char start_buf[CHUNK_SIZE]; + entry_file.read(start_buf, CHUNK_SIZE); + + char end_buf[CHUNK_SIZE]; + entry_file.read(end_buf, CHUNK_SIZE); + + char middle_buf[CHUNK_SIZE]; + entry_file.read(middle_buf, CHUNK_SIZE); + + for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { + if (i < CHUNK_SIZE) { + checksum[i] = start_buf[i]; + } + else if (i > CHUNK_SIZE*2) { + checksum[i] = middle_buf[i-(CHUNK_SIZE*2)]; + } + else if (i > CHUNK_SIZE) { + checksum[i] = end_buf[i - CHUNK_SIZE]; + } + }; + }; + + ~Entry() {}; + + std::string filename; + std::filesystem::path path; + uintmax_t filesize; + char checksum[CHECKSUM_SIZE]; + + // Compare this entry`s checksum with the other one. + // If the checksums are the same -> returns true, else -> false + bool compare_checksums(char other_checksum[CHECKSUM_SIZE]) { + for (uint8_t i = 0; i < CHECKSUM_SIZE; i++) { + if (checksum[i] != other_checksum[i]) { + return false; + }; + }; + return true; + }; + + // Remove entity from the disk + void remove() { + std::filesystem::remove(path); + }; +}; diff --git a/src/entry.hpp b/src/entry.hpp new file mode 100644 index 0000000..1a24581 --- /dev/null +++ b/src/entry.hpp @@ -0,0 +1,50 @@ +/* +Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) + +This file is part of broom. + +broom is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +broom is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with broom. If not, see . +*/ + +# ifndef ENTRY_HPP +# define ENTRY_HPP + +#include +#include + +// 3 chunks (beginning, end, middle of the file) +const uint8_t CHUNK_SIZE = 24; +const uint8_t CHECKSUM_SIZE = CHUNK_SIZE * 3; + +// A wrapper for every file with all necessary information +class Entry { +public: + Entry(std::filesystem::path path); + ~Entry(); + + std::string filename; + std::filesystem::path path; + uintmax_t filesize; + char checksum[CHECKSUM_SIZE]; + + // Compare this entry`s checksum with the other one. + // If the checksums are the same -> returns true, else -> false + bool compare_checksums(char other_checksum[CHECKSUM_SIZE]); + + // Remove entity from the disk + void remove(); +}; + + +# endif diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..b49aabd --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,108 @@ +/* +Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz)) + +This file is part of broom. + +broom is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +broom is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with broom. If not, see . +*/ + +#include +#include +#include +#include +#include +#include + +#include "entry.hpp" +#include "broom.hpp" + +// Broom version number +#define VERSION "v0.1.0" + +// Broom`s settings +struct Options { + bool sweeping; + std::vector paths; +}; + +void print_help() { + std::cout + << "broom [FLAGS..] [COMMAND] [FILES|DIRECTORIES...]" << std::endl << std::endl + << "FLAGS" << std::endl + << "-v | --version -> print version information and exit" << std::endl + << "-h | --help -> print this message and exit" << std::endl << std::endl + << "COMMANDS" << std::endl + << "sweep -> scan for duplicate files and delete (sweep) all of them but the last one" << std::endl + << "scan -> scan for duplicate files and output information in a file" << std::endl + << std::endl; +}; + +void print_version() { + std::cout + << "broom " << VERSION << std::endl + << "a command line utility to locate and manage duplicate files" << std::endl << std::endl + << "Copyright (C) 2021 Kasyanov Nikolay Alexeevich (Unbewohnte (me@unbewohnte.xyz))" << std::endl + << "This program comes with ABSOLUTELY NO WARRANTY." << std::endl + << "This is free software, and you are welcome to redistribute it" << std::endl + << "under certain conditions" << std::endl + << std::endl; +}; + +int main(int argc, char* argv[]) { + Options options; + + if (argc < 2) { + print_help(); + return 0; + }; + + // process command line arguments + for (unsigned int i = 0; i < argc; i++) { + // flags -> command -> directories&&files + + if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + print_help(); + return 0; + } + else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { + print_version(); + return 0; + } + else if (strcmp(argv[i], "sweep") == 0) { + options.sweeping = true; + } + else if (strcmp(argv[i], "scan") == 0) { + options.sweeping = false; + } + else { + // add path + if (i == 0) { + continue; + } else { + options.paths.push_back(argv[i]); + } + }; + }; + + // printing all directories just for testing + for (uint32_t i = 0; i < options.paths.size(); i++) { + for (auto& p : std::filesystem::recursive_directory_iterator(options.paths.at(i))) { + if (p.is_directory()) { + std::cout << p.path() << std::endl; + } + }; + }; + + return 0; +};