Browse Source

Make duplicates list file with duplicates on find_duplicates()

main
Unbewohnte 3 years ago
parent
commit
bd9284f9c5
  1. 6
      bin/.duplicates_list.txt
  2. 43
      src/broom.cpp
  3. 6
      src/broom.hpp

6
bin/.duplicates_list.txt

@ -0,0 +1,6 @@
"/home/unbewohnte/Downloads/e/anki (copy 5).txt"
"/home/unbewohnte/Downloads/e/anki (copy 6).txt"
"/home/unbewohnte/Downloads/e/anki (copy 7).txt"
"/home/unbewohnte/Downloads/e/anki (copy 2).txt"
"/home/unbewohnte/Downloads/e/anki (copy 1).txt"
"/home/unbewohnte/Downloads/e/anki (copy 3).txt"

43
src/broom.cpp

@ -44,7 +44,7 @@ void Broom::track(const std::filesystem::path path) {
// check if given path even exists
if (!std::filesystem::exists(path)) {
throw std::invalid_argument("\"" + path.string() + "\"" + " does not exist !");
};
}
if (std::filesystem::is_directory(path)) {
// it`s a directory. Track every regular file recursively
@ -63,7 +63,7 @@ void Broom::track(const std::filesystem::path path) {
} else if (std::filesystem::is_regular_file(path)) {
Entry entry(path);
m_tracked_entries.push_back(entry);
};
}
if (m_benchmarking) {
@ -73,7 +73,7 @@ void Broom::track(const std::filesystem::path path) {
<< "[BENCHMARK] Tracking took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(tracking_time - t0).count()
<< " ms" << std::endl;
};
}
};
// removes entries with unique file sizes. Returns amount of files
@ -95,8 +95,8 @@ uintmax_t Broom::untrack_unique_sizes() {
} else {
// there is such size
sizes_map[iterator->first]++;
};
};
}
}
uintmax_t untracked = 0;
m_tracked_entries.erase(std::remove_if(m_tracked_entries.begin(), m_tracked_entries.end(), [&untracked, sizes_map](Entry entry) -> bool{
@ -149,7 +149,7 @@ uintmax_t Broom::untrack_unique_contents() {
} else {
// increment occurences counter
contents_map[map_iter->first]++;
};
}
entry_iter++;
};
@ -190,7 +190,7 @@ void Broom::find_duplicates() {
<< "[BENCHMARK] Untracking by size took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(sizes_untrack_time - t0).count()
<< " ms" << std::endl;
};
}
// untrack by contents
uintmax_t untracked_by_contents = untrack_unique_contents();
@ -203,12 +203,39 @@ void Broom::find_duplicates() {
<< "[BENCHMARK] Untracking by contents took "
<< std::chrono::duration_cast<std::chrono::milliseconds>(contents_untrack_time - sizes_untrack_time).count()
<< " ms" << std::endl;
};
}
std::cout << "[INFO] Untracked " << untracked_by_contents << " unique contents" << std::endl;
std::cout << "[INFO] Duplicates: " << m_tracked_entries.size() << std::endl;
create_duplicates_list();
std::cout << "[INFO] Created a duplicates list" << std::endl;
};
// saves current list of duplicate file paths into a file in dir
void Broom::create_duplicates_list(const std::filesystem::path dir, const std::string filename) {
if (!std::filesystem::exists(dir)) {
// create it then
bool created = std::filesystem::create_directories(dir);
if (!created) {
throw "Could not create a directory";
}
}
// create output file there
std::fstream outfile(dir / filename, std::ios::out);
if (!outfile.is_open()) {
throw "Could not create an output file";
}
for (const Entry duplicate_entry : m_tracked_entries) {
// log every duplicate entry
outfile << duplicate_entry.path << std::endl;
}
outfile.close();
};
// remove ALL duplicate files

6
src/broom.hpp

@ -35,6 +35,7 @@ class Broom {
protected:
// enable/disable benchmarking output
bool m_benchmarking;
// TODO(think about how sweeping should work)
bool m_sweeping;
// how many files has been (would be ?) "sweeped"
@ -63,7 +64,10 @@ public:
// files that are no longer being tracked
uintmax_t untrack_unique_contents();
// remove ALL duplicate files
// saves current list of duplicate file paths into a file
void create_duplicates_list(const std::filesystem::path dir = ".", const std::string filename = "duplicate_files_list.txt");
// TODO
void sweep_all();
};

Loading…
Cancel
Save