From bd9284f9c50385ac9cf5a069aebf3b470d6e1e2a Mon Sep 17 00:00:00 2001 From: Unbewohnte Date: Fri, 7 Jan 2022 13:37:49 +0300 Subject: [PATCH] Make duplicates list file with duplicates on find_duplicates() --- bin/.duplicates_list.txt | 6 ++++++ src/broom.cpp | 43 ++++++++++++++++++++++++++++++++-------- src/broom.hpp | 6 +++++- 3 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 bin/.duplicates_list.txt diff --git a/bin/.duplicates_list.txt b/bin/.duplicates_list.txt new file mode 100644 index 0000000..56c77f3 --- /dev/null +++ b/bin/.duplicates_list.txt @@ -0,0 +1,6 @@ +"/home/unbewohnte/Downloads/e/anki (copy 5).txt" +"/home/unbewohnte/Downloads/e/anki (copy 6).txt" +"/home/unbewohnte/Downloads/e/anki (copy 7).txt" +"/home/unbewohnte/Downloads/e/anki (copy 2).txt" +"/home/unbewohnte/Downloads/e/anki (copy 1).txt" +"/home/unbewohnte/Downloads/e/anki (copy 3).txt" diff --git a/src/broom.cpp b/src/broom.cpp index f9ce01d..b03fbb5 100644 --- a/src/broom.cpp +++ b/src/broom.cpp @@ -44,7 +44,7 @@ void Broom::track(const std::filesystem::path path) { // check if given path even exists if (!std::filesystem::exists(path)) { throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !"); - }; + } if (std::filesystem::is_directory(path)) { // it`s a directory. Track every regular file recursively @@ -63,7 +63,7 @@ void Broom::track(const std::filesystem::path path) { } else if (std::filesystem::is_regular_file(path)) { Entry entry(path); m_tracked_entries.push_back(entry); - }; + } if (m_benchmarking) { @@ -73,7 +73,7 @@ void Broom::track(const std::filesystem::path path) { << "[BENCHMARK] Tracking took " << std::chrono::duration_cast(tracking_time - t0).count() << " ms" << std::endl; - }; + } }; // removes entries with unique file sizes. Returns amount of files @@ -95,8 +95,8 @@ uintmax_t Broom::untrack_unique_sizes() { } else { // there is such size sizes_map[iterator->first]++; - }; - }; + } + } uintmax_t untracked = 0; m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{ @@ -149,7 +149,7 @@ uintmax_t Broom::untrack_unique_contents() { } else { // increment occurences counter contents_map[map_iter->first]++; - }; + } entry_iter++; }; @@ -190,7 +190,7 @@ void Broom::find_duplicates() { << "[BENCHMARK] Untracking by size took " << std::chrono::duration_cast(sizes_untrack_time - t0).count() << " ms" << std::endl; - }; + } // untrack by contents uintmax_t untracked_by_contents = untrack_unique_contents(); @@ -203,12 +203,39 @@ void Broom::find_duplicates() { << "[BENCHMARK] Untracking by contents took " << std::chrono::duration_cast(contents_untrack_time - sizes_untrack_time).count() << " ms" << std::endl; - }; + } std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl; std::cout << "[INFO] Duplicates: " << m_tracked_entries.size() << std::endl; + create_duplicates_list(); + + std::cout << "[INFO] Created a duplicates list" << std::endl; +}; + +// saves current list of duplicate file paths into a file in dir +void Broom::create_duplicates_list(const std::filesystem::path dir, const std::string filename) { + if (!std::filesystem::exists(dir)) { + // create it then + bool created = std::filesystem::create_directories(dir); + if (!created) { + throw "Could not create a directory"; + } + } + + // create output file there + std::fstream outfile(dir / filename, std::ios::out); + if (!outfile.is_open()) { + throw "Could not create an output file"; + } + + for (const Entry duplicate_entry : m_tracked_entries) { + // log every duplicate entry + outfile << duplicate_entry.path << std::endl; + } + + outfile.close(); }; // remove ALL duplicate files diff --git a/src/broom.hpp b/src/broom.hpp index e315936..b83cccb 100644 --- a/src/broom.hpp +++ b/src/broom.hpp @@ -35,6 +35,7 @@ class Broom { protected: // enable/disable benchmarking output bool m_benchmarking; + // TODO(think about how sweeping should work) bool m_sweeping; // how many files has been (would be ?) "sweeped" @@ -63,7 +64,10 @@ public: // files that are no longer being tracked uintmax_t untrack_unique_contents(); - // remove ALL duplicate files + // saves current list of duplicate file paths into a file + void create_duplicates_list(const std::filesystem::path dir = ".", const std::string filename = "duplicate_files_list.txt"); + + // TODO void sweep_all(); };