lib45d
45Drives C++ Library API Documentation
MTDirCrawler.hpp
1 // -*- C++ -*-
2 /*
3  * Copyright (C) 2021 Joshua Boudreau <jboudreau@45drives.com>
4  *
5  * This file is part of lib45d.
6  *
7  * lib45d is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * lib45d is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with lib45d. If not, see <https://www.gnu.org/licenses/>.
19  */
20 
21 #pragma once
22 
23 #include <atomic>
24 #include <condition_variable>
25 #include <deque>
26 #include <functional>
27 #include <mutex>
28 #include <thread>
29 #include <vector>
30 #if __cplusplus >= 201703L
31 # include <filesystem>
32 namespace ffd_internal_fs = std::filesystem;
33 #else
34 # include <boost/filesystem.hpp>
35 namespace ffd_internal_fs = boost::filesystem;
36 #endif
37 
38 namespace ffd {
44  class MTDirCrawler {
45  public:
50  MTDirCrawler() : done_(false), queue_(), workers_(), threads_running_(0), mutex_(), cv_() {}
55  ~MTDirCrawler() = default;
73  void crawl(ffd_internal_fs::path base_path,
74  std::function<bool(const ffd_internal_fs::directory_entry &)> callback,
75  int threads) {
76  crawl_async(base_path, callback, threads);
77  wait();
78  }
88  void crawl_async(ffd_internal_fs::path base_path,
89  std::function<bool(const ffd_internal_fs::directory_entry &)> callback,
90  int threads) {
91  seed(base_path);
92  threads_running_ = threads;
93  for (int i = 0; i < threads; ++i) {
94  workers_.emplace_back(&MTDirCrawler::worker, this, callback);
95  }
96  }
102  void wait(void) {
103  for (std::thread &t : workers_) {
104  t.join();
105  }
106  workers_.clear();
107  done_ = false;
108  }
109  private:
110  bool done_;
111  std::deque<ffd_internal_fs::directory_entry> queue_;
112  std::vector<std::thread> workers_;
113  std::atomic<int> threads_running_;
114  std::mutex mutex_;
115  std::condition_variable cv_;
116  void seed(const ffd_internal_fs::path &base_path) {
117  queue_.emplace_back(base_path);
118  }
119  void worker(std::function<bool(const ffd_internal_fs::directory_entry &)> callback) {
120  ffd_internal_fs::directory_entry node;
121  while (!done_) {
122  {
123  std::unique_lock<std::mutex> lk(mutex_);
124  --threads_running_;
125  if (threads_running_ <= 0 && queue_.empty()) {
126  done_ = true;
127  cv_.notify_all();
128  return;
129  }
130  while (queue_.empty() && !done_) {
131  cv_.wait(lk);
132  }
133  if (done_ && queue_.empty())
134  return;
135  node = queue_.front();
136  queue_.pop_front();
137  ++threads_running_;
138  }
139  if (callback(node) && ffd_internal_fs::is_directory(node)) {
140 #if __cplusplus >= 201703L
141  for (auto const &node : ffd_internal_fs::directory_iterator{ node }) {
142 #else
143  for (ffd_internal_fs::directory_iterator ditr{ node };
144  ditr != ffd_internal_fs::directory_iterator{};
145  *ditr++) {
146 #endif
147  {
148  std::lock_guard<std::mutex> lk(mutex_);
149 #if __cplusplus >= 201703L
150  queue_.emplace_back(node);
151 #else
152  queue_.emplace_back(*ditr);
153 #endif
154  }
155  cv_.notify_one();
156  }
157  }
158  }
159  }
160  };
161 } // namespace ffd
ffd::MTDirCrawler::~MTDirCrawler
~MTDirCrawler()=default
Destroy the MTDirCrawler object.
ffd
45Drives namespace
Definition: Bytes.hpp:27
ffd::MTDirCrawler::crawl
void crawl(ffd_internal_fs::path base_path, std::function< bool(const ffd_internal_fs::directory_entry &)> callback, int threads)
Kicks off thread workers and waits for them to finish.
Definition: MTDirCrawler.hpp:73
ffd::MTDirCrawler::wait
void wait(void)
Wait for threads to finish. Must be called at some point after MTDirCrawler::crawl_async().
Definition: MTDirCrawler.hpp:102
ffd::MTDirCrawler
Crawls through a directory with multiple worker threads, calling a calback function on each directory...
Definition: MTDirCrawler.hpp:44
ffd::MTDirCrawler::MTDirCrawler
MTDirCrawler()
Construct a new MTDirCrawler object.
Definition: MTDirCrawler.hpp:50
ffd::MTDirCrawler::crawl_async
void crawl_async(ffd_internal_fs::path base_path, std::function< bool(const ffd_internal_fs::directory_entry &)> callback, int threads)
Kicks off thread workers. MTDirCrawler::wait() must be called at some point to join threads.
Definition: MTDirCrawler.hpp:88