diff options
Diffstat (limited to 'src/inode2filename/search_directories.cc')
-rw-r--r-- | src/inode2filename/search_directories.cc | 1366 |
1 files changed, 0 insertions, 1366 deletions
diff --git a/src/inode2filename/search_directories.cc b/src/inode2filename/search_directories.cc deleted file mode 100644 index 1d31671..0000000 --- a/src/inode2filename/search_directories.cc +++ /dev/null @@ -1,1366 +0,0 @@ -// Copyright (C) 2018 The Android Open Source Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "common/debug.h" -#include "inode2filename/search_directories.h" -#include "inode2filename/system_call.h" - -#include <android-base/file.h> -#include <android-base/logging.h> -#include <android-base/scopeguard.h> -#include <android-base/stringprintf.h> -#include <android-base/unique_fd.h> - -#include "rxcpp/rx.hpp" - -#include <iostream> -#include <stdio.h> -#include <fstream> -#include <vector> -#include <optional> - -#include <signal.h> -#include <stdlib.h> -#include <unistd.h> - -#include <sys/types.h> - -#ifdef __ANDROID__ -#include <sys/sysmacros.h> -#endif - -#include <sys/stat.h> -#include <fcntl.h> -#include <poll.h> -#include <dirent.h> - -#include <unordered_map> - -namespace rx = rxcpp; -using android::base::unique_fd; // NOLINT -using android::base::StringPrintf; // NOLINT - -namespace iorap::inode2filename { - -#define DEBUG_INODE_SET 0 - -// A multimap of 'ino_t -> List[Inode]' (where the value Inodes have the same ino_t as the key). -// -// A flat list of Inodes is turned into the above map, then keys can be removed one at a time -// until the InodeSet eventually becomes empty. -struct InodeSet { - - InodeSet() = default; -#if DEBUG_INODE_SET - InodeSet(const InodeSet& other) { - LOG(INFO) << "InodeSet-copyctor"; - set_ = other.set_; - } - - InodeSet(InodeSet&& other) { - LOG(INFO) << "InodeSet-movector"; - set_ = std::move(other.set_); - } - - InodeSet& operator=(const InodeSet& other) { - LOG(INFO) << "InodeSet-opassign-copy"; - set_ = other.set_; - return *this; - } - - InodeSet& operator=(InodeSet&& other) { - LOG(INFO) << "InodeSet-opassign-move"; - set_ = std::move(other.set_); - return *this; - } -#else - InodeSet(InodeSet&& other) = default; - InodeSet& operator=(InodeSet&& other) = default; - // Copying InodeSet can be very expensive, refuse to even allow compiling such code. - InodeSet(const InodeSet& other) = delete; - InodeSet& operator=(const InodeSet& other) = delete; -#endif - - struct ValueRange { - auto/*Iterable<Inode>*/ begin() { - return begin_; - } - - auto/*Iterable<Inode>*/ end() { - return end_; - } - - bool empty() const { - return begin_ == end_; - } - - explicit operator bool() const { - return !empty(); - } - - std::unordered_multimap<ino_t, Inode>::iterator begin_, end_; - - friend std::ostream& operator<<(std::ostream& os, const ValueRange& s); - }; - - // Create an observable that emits the remaining inodes in the map. - // - // Mutation functions must not be called until this observable - // has been finished emitting all values (e.g. with on_completed) since that - // would cause the underlying iterators to go into an undefined state. - auto/*observable<Inode>*/ IterateValues() const { - return rxcpp::observable<>::iterate(set_).map( // XX: should we use identity_immediate here? - [](const std::pair<const ino_t, Inode>& pair) { - return pair.second; - } - ); - // TODO: this would be more efficient as a range-v3 view. - } - - constexpr bool Empty() const { - return set_.empty(); - } - - static InodeSet OfList(const std::vector<Inode>& list) { - InodeSet new_inode_set; - std::unordered_multimap<ino_t, Inode>* map = &new_inode_set.set_; - - for (const Inode& inode : list) { - map->insert({inode.inode, inode}); - } - - return new_inode_set; - } - - // Return an optional list of 'Inode' structs whose 'inode' field matches the 'inode' parameter. - // Returns an empty range if there was nothing found. - ValueRange FindInodeList(ino_t inode) { - auto range = set_.equal_range(inode); - return ValueRange{range.first, range.second}; - } - - // Match all fields of an Inode against a 'struct stat' stat_buf. - // - // The returned Inode (if any) is removed from the InodeSet; it will not be returned by - // FindInodeList in future calls. - std::optional<Inode> FindAndRemoveInodeInList(ValueRange inode_list, - const struct stat& stat_buf) { - LOG(VERBOSE) << "FindAndRemoveInodeInList " << inode_list << ", " - << "stat_buf{st_dev=" << stat_buf.st_dev << ",st_ino=" << stat_buf.st_ino << "}"; - - auto /*iterator*/ found = std::find_if(inode_list.begin(), - inode_list.end(), - [&](const std::pair<ino_t, Inode>& pair) { - const Inode& inode = pair.second; - if (inode.inode != stat_buf.st_ino) { - return false; - } - - dev_t inode_dev = - makedev(static_cast<int>(inode.device_major), static_cast<int>(inode.device_minor)); - - // Inodes could be the same across different devices. - // Also match the device id. - if (inode_dev != stat_buf.st_dev) { - LOG(VERBOSE) << "InodeSet:FindAndRemoveInodeInList matched ino: " << inode.inode - << " but not device" - << ", expected dev: " << stat_buf.st_dev - << ", actual dev: " << inode_dev; - return false; - } - return true; - }); - - if (found != inode_list.end()) { - Inode inode = found->second; - LOG(VERBOSE) << "InodeSet:FindAndRemoveInodeInList *success* inode+device " << inode; - DCHECK(found->second.inode == stat_buf.st_ino); - // Erase the inode from the list. This is important. - set_.erase(found); - return inode; - } - - return std::nullopt; - } - - // Match all fields of an Inode against another Inode. - // - // The returned Inode (if any) is removed from the InodeSet; it will not be returned by - // FindInodeList in future calls. - std::optional<Inode> FindAndRemoveInodeInList(ValueRange inode_list, - const Inode& inode) { - LOG(VERBOSE) << "FindAndRemoveInodeInList " << inode_list << ", " - << inode << "}"; - - auto /*iterator*/ found = std::find_if(inode_list.begin(), - inode_list.end(), - [&](const std::pair<ino_t, Inode>& pair) { - return inode == pair.second; - }); - - if (found != inode_list.end()) { - Inode inode = found->second; - LOG(VERBOSE) << "InodeSet:FindAndRemoveInodeInList *success* inode+device " << inode; - DCHECK_EQ(found->second, inode); - // Erase the inode from the list. This is important. - set_.erase(found); - return inode; - } - - return std::nullopt; - } - - // TODO: equality and string operators for testing/logging. - private: - // Explanation: readdir returns a 'file' -> 'ino_t inode' mapping. - // - // However inodes can be reused on different partitions (but they have a different device number). - // To handle this edge case, and to avoid calling stat whenever the inode definitely doesn't match - // store the inodes into a single-key,multi-value container. - // - // This enables fast scanning of readdir results by matching just the 'inode' portion, - // then calling stat only when the inode portion definitely matches to confirm the device. - - // There are no single-key multi-value containers in standard C++, so pretend - // we have one by writing this simple facade around an unordered set. - // - // We expect that the vector size is usually size=1 (or 2 or 3) since the # of devices - // is fixed by however many partitions there are on the system, AND the same inode # - // would have to be reused across a different file. - std::unordered_multimap<ino_t, Inode> set_; // TODO: Rename to map_. - - friend std::ostream& operator<<(std::ostream& os, const InodeSet& s); -}; - -std::ostream& operator<<(std::ostream& os, const InodeSet& s) { - os << "InodeSet{"; - for (const auto& kv : s.set_) { - // e.g. "123=>(1:2:123)" ... its expected for the 'ino_t' portion to be repeated. - os << "" << kv.first << "=>(" << kv.second << "),"; - } - os << "}"; - return os; -} - -std::ostream& operator<<(std::ostream& os, const InodeSet::ValueRange& v) { - // Don't want to make a const and non const version of ValueRange. - InodeSet::ValueRange& s = const_cast<InodeSet::ValueRange&>(v); - - os << "InodeSet::ValueRange{"; - for (const auto& kv : s) { - // e.g. "123=>(1:2:123)" ... its expected for the 'ino_t' portion to be repeated. - os << "" << kv.first << "=>(" << kv.second << "),"; - } - os << "}"; - return os; -} - -void search_for_inodes_in(std::vector<Inode>& inode_list, const std::string& dirpath); - -enum DirectoryEntryErrorCode { - kInvalid, // not a real error code. to detect bad initialization. - kOpenDir, // opendir failed. - kReadDir, // readdir failed. - kDtUnknown, // d_type was DT_UNKNOWN error. -}; - -struct DirectoryEntryError { - DirectoryEntryErrorCode code; - int err_no; - std::string filename; -}; - -std::ostream& operator<<(std::ostream& os, const DirectoryEntryError& e) { - os << "DirectoryEntryError{" - << static_cast<int>(e.code) << "," << e.err_no << "," << e.filename << "}"; - return os; - // TODO: pretty-print code and err-no -} - -static common::DebugCounter gDebugDirectoryEntryCounter{}; -static constexpr bool kDebugDirectoryEntry = false; - -#define DIRECTORY_ENTRY_MOVE_DCHECK() \ - DCHECK_EQ(other.moved_from_, false) << __PRETTY_FUNCTION__ << "CNT:" << other.debug_counter_; -#define DIRECTORY_ENTRY_TRACE_CTOR() \ - if (kDebugDirectoryEntry) LOG(VERBOSE) << __PRETTY_FUNCTION__ << "@CNT:" << debug_counter_ - -struct DirectoryEntry { - using ResultT = iorap::expected<DirectoryEntry, DirectoryEntryError>; - using ObservableT = rx::observable<ResultT>; - - static constexpr ino_t kInvalidIno = std::numeric_limits<ino_t>::max(); - static constexpr auto kInvalidFileName = ""; - - // Path to file, the prefix is one of the root directories. - std::string filename{kInvalidFileName}; - // Inode number of the file. Not unique across different devices. - ino_t d_ino{kInvalidIno}; - // File type (DT_LNK, DT_REG, DT_DIR, or DT_UNKNOWN) - unsigned char d_type{DT_UNKNOWN}; // Note: not seen outside of sentinel roots. - // TODO: Consider invariant checks for valid combinations of above fields? - - // Debug-only flags. - bool moved_from_{false}; - size_t debug_counter_{0}; - - private: - // TODO: remove default constructor? - // - // SEEMS TO BE USED by std::vector etc. FIX DAT. - DirectoryEntry() noexcept { - debug_counter_ = gDebugDirectoryEntryCounter++; - DIRECTORY_ENTRY_TRACE_CTOR(); - } - public: - DirectoryEntry(std::string filename, ino_t d_ino, unsigned char d_type) noexcept - : filename{std::move(filename)}, - d_ino{d_ino}, - d_type{d_type} { - debug_counter_ = gDebugDirectoryEntryCounter++; - DIRECTORY_ENTRY_TRACE_CTOR(); - } - - DirectoryEntry(const DirectoryEntry& other) noexcept { - // Do not use member-initialization syntax so that this DCHECK can execute first. - DIRECTORY_ENTRY_MOVE_DCHECK(); - - filename = other.filename; - d_ino = other.d_ino; - d_type = other.d_type; - children_paths_ = other.children_paths_; - children_initialized_ = other.children_initialized_; - debug_counter_ = other.debug_counter_; - DIRECTORY_ENTRY_TRACE_CTOR(); - } - - DirectoryEntry& operator=(const DirectoryEntry& other) noexcept { - if (this == &other) { - return *this; - } - - DIRECTORY_ENTRY_MOVE_DCHECK(); - - filename = other.filename; - d_ino = other.d_ino; - d_type = other.d_type; - children_paths_ = other.children_paths_; - children_initialized_ = other.children_initialized_; - debug_counter_ = other.debug_counter_; - DIRECTORY_ENTRY_TRACE_CTOR(); - - return *this; - } - - DirectoryEntry& operator=(DirectoryEntry&& other) noexcept { - if (this == &other) { - return *this; - } - - DIRECTORY_ENTRY_MOVE_DCHECK(); - - filename = std::move(other.filename); - d_ino = other.d_ino; - d_type = other.d_type; - children_paths_ = std::move(other.children_paths_); - children_initialized_ = other.children_initialized_; - debug_counter_ = other.debug_counter_; - DIRECTORY_ENTRY_TRACE_CTOR(); - - return *this; - } - - DirectoryEntry(DirectoryEntry&& other) noexcept { - DIRECTORY_ENTRY_MOVE_DCHECK(); - other.moved_from_ = true; - - filename = std::move(other.filename); - d_ino = other.d_ino; - d_type = other.d_type; - children_paths_ = std::move(other.children_paths_); - children_initialized_ = other.children_initialized_; - debug_counter_ = other.debug_counter_; - DIRECTORY_ENTRY_TRACE_CTOR(); - } - - // Create a sentinel (root of roots) whose children entries are those specified by - // children_paths. - static DirectoryEntry CreateSentinel(std::vector<std::string> children_paths) { - DirectoryEntry e; - e.d_type = DT_DIR; - ++gDebugDirectoryEntryCounter; - - for (std::string& child_path : children_paths) { - // TODO: Should we call Stat on the child path here to reconstitute the ino_t for a root dir? - // Otherwise it can look a little strange (i.e. the root dir itself will never match - // the searched inode). - // - // Probably not too big of a problem in practice. - DirectoryEntry child_entry{std::move(child_path), kInvalidIno, DT_DIR}; - ResultT child_entry_as_result{std::move(child_entry)}; - e.children_paths_.push_back(std::move(child_entry_as_result)); - } - - e.children_initialized_ = true; - - return e; - } - - // Return an observable which emits the direct children only. - // The children entries are now read from disk (with readdir) if they weren't read previously. - std::vector<ResultT> GetChildrenEntries(borrowed<SystemCall*> system_call) const& { - BuildChildrenPaths(system_call); - return children_paths_; - } - - // Return an observable which emits the direct children only. - // The children entries are now read from disk (with readdir) if they weren't read previously. - // Movable overload. - std::vector<ResultT> GetChildrenEntries(borrowed<SystemCall*> system_call) && { - BuildChildrenPaths(system_call); - return std::move(children_paths_); - } - - // Returns a (lazy) observable that emits every single node, in pre-order, - // rooted at this tree. - // - // New entries are only read from disk (with e.g. readdir) when more values are pulled - // from the observable. Only the direct children of any entry are read at any time. - // - // The emission can be stopped prematurely by unsubscribing from the observable. - // This means the maximum amount of 'redundant' IO reads is bounded by the children count - // of all entries emitted thus far minus entries actually emitted. - ObservableT GetSubTreePreOrderEntries(borrowed<SystemCall*> system_call) const; - - private: - // Out-of-line definition to avoid circular type dependency. - void BuildChildrenPaths(borrowed<SystemCall*> system_call) const; - - // We need to lazily initialize children_paths_ only when we try to read them. - // - // Assuming the underlying file system doesn't change (which isn't strictly true), - // the directory children are referentially transparent. - // - // In practice we do not need to distinguish between the file contents changing out - // from under us in this code, so we don't need the more strict requirements. - mutable std::vector<ResultT> children_paths_; - mutable bool children_initialized_{false}; - - friend std::ostream& operator<<(std::ostream& os, const DirectoryEntry& d); -}; - -std::ostream& operator<<(std::ostream& os, const DirectoryEntry& d) { - os << "DirectoryEntry{" << d.filename << ",ino:" << d.d_ino << ",type:" << d.d_type << "}"; - return os; -} - -using DirectoryEntryResult = DirectoryEntry::ResultT; - -// Read all directory entries and return it as a vector. This must be an eager operation, -// as readdir is not re-entrant. -// -// This could be considered as a limitation from the 'observable' perspective since -// one can end up reading unnecessary extra directory entries that are then never consumed. -// -// The following entries are skipped: -// - '.' self -// - ".." parent -// -// All DT types except the following are removed: -// * DT_LNK - symbolic link (empty children) -// * DT_REG - regular file (empty children) -// * DT_DIR - directory (has children) -static std::vector<DirectoryEntryResult> - ReadDirectoryEntriesFromDirectoryPath(std::string dirpath, borrowed<SystemCall*> system_call) { - DIR *dirp; - struct dirent *dp; - - LOG(VERBOSE) << "ReadDirectoryEntriesFromDirectoryPath(" << dirpath << ")"; - - if ((dirp = system_call->opendir(dirpath.c_str())) == nullptr) { - PLOG(ERROR) << "Couldn't open directory: " << dirpath; - return {DirectoryEntryError{kOpenDir, errno, dirpath}}; - } - - // Read all the results up front because readdir is not re-entrant. - std::vector<DirectoryEntryResult> results; - - // Get full path + the directory entry path. - auto child_path = [&] { return dirpath + "/" + dp->d_name; }; - - do { - errno = 0; - if ((dp = system_call->readdir(dirp)) != nullptr) { - if (dp->d_type == DT_DIR) { - if (strcmp(".", dp->d_name) == 0 || strcmp("..", dp->d_name) == 0) { - LOG(VERBOSE) << "Skip self/parent: " << dp->d_name; - continue; - } - - LOG(VERBOSE) << "Find entry " << child_path() - << ", ino: " << dp->d_ino << ", type: " << dp->d_type; - results.push_back(DirectoryEntry{child_path(), - static_cast<ino_t>(dp->d_ino), - dp->d_type}); - } else if (dp->d_type == DT_UNKNOWN) { - // This seems bad if it happens. We should probably do something about this. - LOG(WARNING) << "Found unknown DT entry: " << child_path(); - - results.push_back(DirectoryEntryError{kDtUnknown, /*errno*/0, child_path()}); - } else if (dp->d_type == DT_LNK || dp->d_type == DT_REG) { - // Regular non-directory file entry. - results.push_back(DirectoryEntry{child_path(), - static_cast<ino_t>(dp->d_ino), - dp->d_type}); - } else { - // Block device, character device, socket, etc... - LOG(VERBOSE) << "Skip DT entry of type: " << dp->d_type << " " << child_path(); - } - } else if (errno != 0) { - PLOG(ERROR) << "Error reading directory entry in " << dirpath; - - results.push_back(DirectoryEntryError{kReadDir, errno, dirpath}); - } - } while (dp != nullptr); - - if (system_call->closedir(dirp) < 0) { - PLOG(ERROR) << "Failed to close directory " << dirpath; - } - - return results; -} - -void DirectoryEntry::BuildChildrenPaths(borrowed<SystemCall*> system_call) const { - if (children_initialized_) { - return; - } - - if (d_type == DT_DIR) { - children_paths_ = ReadDirectoryEntriesFromDirectoryPath(filename, system_call); - // TODO: consider using dependency injection here to substitute this function during testing? - } -} - -struct InodeSearchParameters { - std::vector<Inode> inode_list; - std::vector<std::string> root_dirs; -}; - -// [IN] -// observable: expected<Value, Error>, ... -// [OUT] -// observable: Value, ... -// -// Any encountered 'Error' items are dropped after logging. -template <typename T> -auto MapExpectedOrLog(T&& observable, - ::android::base::LogSeverity log_level) { - return observable.filter([log_level](const auto& result) { - if (result) { - return true; - } else { - LOG(log_level) << result.error(); - return false; - } - }).map([](auto&& result) { - return IORAP_FORWARD_LAMBDA(result).value(); - }); -} - -template <typename T> -auto MapExpectedOrLogError(T&& observable) { - return MapExpectedOrLog(std::forward<T>(observable), ::android::base::ERROR); -} - -template <typename T> -auto MapOptionalOrDrop(T&& observable) { - return observable.filter([](const auto& result) { - return result.has_value(); - }).map([](auto&& result) { - return IORAP_FORWARD_LAMBDA(result).value(); - }); - // TODO: static_assert this isn't used with an unexpected. -} - -template <typename T, typename F> -auto VisitValueOrLogError(T&& expected, F&& visit_func, const char* error_prefix = "") { - if (!expected) { - LOG(ERROR) << error_prefix << " " << expected.error(); - } else { - visit_func(std::forward<T>(expected).value()); - } - // TODO: Could be good to make this more monadic by returning an optional. -} - -template <typename TSimple, typename T, typename F> -void TreeTraversalPreOrderObservableImpl(rx::subscriber<TSimple> dest, T&& node, F&& fn) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (begin) " << __PRETTY_FUNCTION__; - - if (!dest.is_subscribed()) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (unsubscribed)"; - return; - } else { - LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (on_next node)"; - - // Copy the node here. This is less bad than it seems since we haven't yet - // calculated its children (except in the root), so its just doing a shallow memcpy (sizeof(T)). - // - // This assumes the children are calculated lazily, otherwise we'd need to have a separate - // NodeBody class which only holds the non-children elements. - - TSimple copy = std::forward<T>(node); - dest.on_next(std::move(copy)); - - if (!node.has_value()) { - return; - } - - // Whenever we call 'on_next' also check if we end up unsubscribing. - // This avoids the expensive call into the children. - if (!dest.is_subscribed()) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (post-self unsubscribe)"; - return; - } - - // Eagerly get the childrem, moving them instead of copying them. - auto&& children = fn(std::forward<T>(node)); - for (auto&& child : children) { - TreeTraversalPreOrderObservableImpl(dest, IORAP_FORWARD_LAMBDA(child), fn); - // TODO: double check this is doing the std::move properly for rvalues. - - if (!dest.is_subscribed()) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservableImpl (unsubscribed in children)"; - break; - } - }; - } -} - -// Creates an observable over all the nodes in the tree rooted at node. -// fn is a function that returns the children of that node. -// -// The items are emitted left-to-right pre-order, and stop early if the -// observable is unsubscribed from. -// -// Implementation requirement: -// typeof(node) -> expected<V, E> or optional<V> or similar. -// fn(node) -> iterable<typeof(node)> -// -// preorder(self): -// visit(self) -// for child in fn(self): -// preorder(child) -template <typename T, typename F> -auto/*observable<T>*/ TreeTraversalPreOrderObservable(T&& node, F&& fn) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservable: " << __PRETTY_FUNCTION__; - - using T_simple = std::decay_t<T>; - return rx::observable<>::create<T_simple>( - // Copy node to avoid lifetime issues. - [node=node,fn=std::forward<F>(fn)](rx::subscriber<T_simple> dest) { - LOG(VERBOSE) << "TreeTraversalPreOrderObservable (lambda)"; - TreeTraversalPreOrderObservableImpl<T_simple>(dest, - std::move(node), - std::move(fn)); - dest.on_completed(); - } - ); -} - -DirectoryEntry::ObservableT - DirectoryEntry::GetSubTreePreOrderEntries(borrowed<SystemCall*> system_call) const { - return TreeTraversalPreOrderObservable( - DirectoryEntryResult{*this}, - [system_call=system_call](auto/*DirectoryEntryResult*/&& result) - -> std::vector<DirectoryEntryResult> { - if (!result) { - LOG(VERBOSE) << "GetSubTreePreOrderEntries (no value return)"; - // Cannot have children when it was an error. - return {}; - } - return - IORAP_FORWARD_LAMBDA(result) - .value() - .GetChildrenEntries(system_call); - }); -} - -struct StatError { - int err_no; - std::string path_name; -}; - -std::ostream& operator<<(std::ostream& os, const StatError& e) { - os << "StatError{" << e.err_no << "," << e.path_name - << ": " << strerror(e.err_no) << "}"; - return os; -} - -template <typename U = void> // suppress unused warning. -static iorap::expected<struct stat, StatError> Stat(const std::string& path_name, - borrowed<SystemCall*> system_call) { - struct stat statbuf{}; - - // Call stat(2) in live code. Overridden in test code. - if (system_call->stat(path_name.c_str(), /*out*/&statbuf) == 0) { - return statbuf; - } else { - return iorap::unexpected(StatError{errno, path_name}); - } -} - -using StatResult = iorap::expected<struct stat, StatError>; - -// An inode's corresponding filename on the system. -struct SearchMatch { - Inode inode; - // Relative path joined with a root directory. - // - // Use absolute path root dirs to get back absolute path filenames. - // If relative, this is relative to the current working directory. - std::string filename; -}; - -std::ostream& operator<<(std::ostream& os, const SearchMatch& s) { - os << "SearchMatch{" << s.inode << ", " << s.filename << "}"; - return os; -} - -struct SearchState { - // Emit 'match' Inodes corresponding to the ones here. - InodeSet inode_set; - - // An inode matching one of the ones in inode_set was discovered in the most-recently - // emitted SearchState. - // - // The InodeSet removes any matching 'Inode'. - std::optional<SearchMatch> match; - - SearchState() = default; - SearchState(SearchState&& other) = default; - - // Do not copy this because copying InodeSet is excruciatingly slow. - SearchState(const SearchState& other) = delete; - - // TODO: make sure this doesn't copy [inodes], as that would be unnecessarily expensive. -}; - -std::ostream& operator<<(std::ostream& os, const SearchState& s) { - os << "SearchState{match:"; - // Print the 'match' first. The InodeSet could be very large so it could be truncated in logs. - if (s.match) { - os << s.match.value(); - } else { - os << "(none)"; - } - os << ", inode_set:" << s.inode_set << "}"; - return os; -} - -// TODO: write operator<< etc. - -// Return a lazy observable that will search for all filenames whose inodes -// match the inodes in inode_search_list. -// -// Every unmatched inode will be emitted as an unexpected at the end of the stream. -auto/*[observable<InodeResult>, connectable]*/ SearchDirectoriesForMatchingInodes( - std::vector<std::string> root_dirs, - std::vector<Inode> inode_search_list, - borrowed<SystemCall*> system_call) { - - // Create a (lazy) observable that will emit each DirectoryEntry that is a recursive subchild - // of root_dirs. Emission will be stopped when its unsubscribed from. - // - // This is done by calling readdir(3) lazily. - auto/*obs<DirectoryEntry>*/ find_all_subdir_entries = ([&]() { - DirectoryEntry sentinel = DirectoryEntry::CreateSentinel(std::move(root_dirs)); - auto/*obs<DirectoryEntryResult*/ results = sentinel.GetSubTreePreOrderEntries(system_call); - - // Drop any errors by logging them to logcat. "Unwrap" the expected into the underlying data. - auto/*obs<DirectoryEntry*>*/ expected_drop_errors = MapExpectedOrLogError(std::move(results)); - return expected_drop_errors; - })(); - - // DirectoryEntry is missing the dev_t portion, so we may need to call scan(2) again - // to confirm the dev_t. We skip calling scan(2) when the ino_t does not match. - // InodeSet lets us optimally avoid calling scan(2). - std::shared_ptr<SearchState> initial = std::make_shared<SearchState>(); - initial->inode_set = InodeSet::OfList(inode_search_list); - - auto/*[observable<SearchState>,Connectable]*/ search_state_results = find_all_subdir_entries.scan( - std::move(initial), - [system_call=system_call](std::shared_ptr<SearchState> search_state, - const DirectoryEntry& dir_entry) { - LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#Scan " - << dir_entry << ", state: " << *search_state; - - search_state->match = std::nullopt; - - InodeSet* inodes = &search_state->inode_set; - - // Find all the possible inodes across different devices. - InodeSet::ValueRange inode_list = inodes->FindInodeList(dir_entry.d_ino); - - // This directory doesn't correspond to any inodes we are searching for. - if (!inode_list) { - return search_state; - } - - StatResult maybe_stat = Stat(dir_entry.filename, system_call); - VisitValueOrLogError(maybe_stat, [&](const struct stat& stat_buf) { - // Try to match the specific inode. Usually this will not result in a match (nullopt). - std::optional<Inode> inode = inodes->FindAndRemoveInodeInList(inode_list, stat_buf); - - if (inode) { - search_state->match = SearchMatch{inode.value(), dir_entry.filename}; - } - }); - - return search_state; - } - // Avoid exhausting a potentially 'infinite' stream of files by terminating as soon - // as we find every single inode we care about. - ).take_while([](std::shared_ptr<SearchState> state) { - // Also emit the last item that caused the search set to go empty. - bool cond = !state->inode_set.Empty() || state->match; - - if (WOULD_LOG(VERBOSE)) { - static int kCounter = 0; - LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#take_while (" << kCounter++ << - ",is_empty:" - << state->inode_set.Empty() << ", match:" << state->match.has_value(); - } - // Minor O(1) implementation inefficiency: - // (Too minor to fix but it can be strange if looking at the logs or readdir traces). - // - // Note, because we return 'true' after the search set went empty, - // the overall stream graph still pulls from search_state_results exactly once more: - // - // This means that for cond to go to false, we would've read one extra item and then discarded - // it. If that item was the first child of a directory, that means we essentially did - // one redundant pass of doing a readdir. - // In other words if the search set goes to empty while the current item is a directory, - // - // it will definitely readdir on it at least once as we try to get the first child in - // OnTreeTraversal. - // - // This could be fixed with a 'take_until(Predicate)' operator which doesn't discard - // the last item when the condition becomes false. However rxcpp seems to lack this operator, - // whereas RxJava has it. - - if (!cond) { - LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#take_while " - << "should now terminate for " << *state; - } - - return cond; - }).publish(); - // The publish here is mandatory. The stream is consumed twice (once by matched and once by - // unmatched streams). Without the publish, once all items from 'matched' were consumed it would - // start another instance of 'search_state_results' (i.e. it appears as if the search - // is restarted). - // - // By using 'publish', the search_state_results is effectively shared by both downstream nodes. - // Note that this also requires the subscriber to additionally call #connect on the above stream, - // otherwise no work will happen. - - // Lifetime notes: - // - // The the 'SearchState' is emitted into both below streams simultaneously. - // The 'unmatched_inode_values' only touches the inode_set. - // The 'matched_inode_values' only touches the match. - // Either stream can 'std::move' from those fields because they don't move each other's fields. - auto/*observable<InodeResult>*/ matched_inode_values = search_state_results - .filter([](std::shared_ptr<SearchState> search_state) { - return search_state->match.has_value(); }) - .map([](std::shared_ptr<SearchState> search_state) { - return std::move(search_state->match.value()); }) - // observable<SearchMatch> - .map([](SearchMatch search_match) { - return InodeResult::makeSuccess(search_match.inode, std::move(search_match.filename)); - }); // observable<InodeResult> - - auto/*observable<?>*/ unmatched_inode_values = search_state_results - // The 'last' SearchState is the one that contains all the remaining inodes. - .take_last(1) // observable<SearchState> - .flat_map([](std::shared_ptr<SearchState> search_state) { - LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#unmatched -- flat_map"; - // Aside: Could've used a move here if the inodes weren't so lightweight already. - return search_state->inode_set.IterateValues(); }) - // observable<Inode> - .map([](const Inode& inode) { - LOG(VERBOSE) << "SearchDirectoriesForMatchingInodes#unmatched -- map"; - return InodeResult::makeFailure(inode, InodeResult::kCouldNotFindFilename); - }); - // observable<InodeResult> - - // The matched and unmatched InodeResults are emitted together. - // Use merge, not concat, because we need both observables to be subscribed to simultaneously. - - auto/*observable<InodeResult*/ all_inode_results = - matched_inode_values.merge(unmatched_inode_values); - - // Now that all mid-stream observables have been connected, turn the Connectable observable - // into a regular observable. - - // The caller has to call 'connect' on the search_state_results after subscribing - // and before any work can actually start. - return std::make_pair(all_inode_results, search_state_results); -} - -rxcpp::observable<InodeResult> SearchDirectories::FindFilenamesFromInodes( - std::vector<std::string> root_directories, - std::vector<Inode> inode_list, - SearchMode mode) const { - DCHECK(mode == SearchMode::kInProcessDirect) << " other modes not implemented yet"; - - auto/*observable[2]*/ [inode_results, connectable] = SearchDirectoriesForMatchingInodes( - std::move(root_directories), - std::move(inode_list), - system_call_); - - return inode_results.ref_count(connectable); -} - -// I think we could avoid this with auto_connect, which rxcpp doesn't seem to have. -// -// I can't figure out any other way to avoid this, or at least to allow connecting -// on the primary observable (instead of a secondary side-observable). -// -// If using the obvious publish+ref_count then the unmerged stream gets no items emitted into it. -// If tried to ref_count later, everything turns into no-op. -// If trying to call connect too early, the subscribe is missed. -template <typename T> -struct RxAnyConnectableFromObservable : public SearchDirectories::RxAnyConnectable { - virtual void connect() override { - observable.connect(); - } - - virtual ~RxAnyConnectableFromObservable() {} - - RxAnyConnectableFromObservable(rxcpp::connectable_observable<T> observable) - : observable(observable) { - } - - rxcpp::connectable_observable<T> observable; -}; - -// Type deduction helper. -template <typename T> -std::unique_ptr<SearchDirectories::RxAnyConnectable> - MakeRxAnyConnectableFromObservable(rxcpp::connectable_observable<T> observable) { - SearchDirectories::RxAnyConnectable* ptr = new RxAnyConnectableFromObservable<T>{observable}; - return std::unique_ptr<SearchDirectories::RxAnyConnectable>{ptr}; -} - -std::pair<rxcpp::observable<InodeResult>, std::unique_ptr<SearchDirectories::RxAnyConnectable>> - SearchDirectories::FindFilenamesFromInodesPair( - std::vector<std::string> root_directories, - std::vector<Inode> inode_list, - SearchMode mode) const { - DCHECK(mode == SearchMode::kInProcessDirect) << " other modes not implemented yet"; - - auto/*observable[2]*/ [inode_results, connectable] = SearchDirectoriesForMatchingInodes( - std::move(root_directories), - std::move(inode_list), - system_call_); - - std::unique_ptr<SearchDirectories::RxAnyConnectable> connectable_ptr = - MakeRxAnyConnectableFromObservable(connectable.as_dynamic()); - - return {inode_results, std::move(connectable_ptr)}; -} - -rxcpp::observable<InodeResult> - SearchDirectories::FindFilenamesFromInodes(std::vector<std::string> root_directories, - rxcpp::observable<Inode> inodes, - SearchMode mode) const { - - // It's inefficient to search for inodes until the full search list is available, - // so first reduce to a vector so we can access all the inodes simultaneously. - return inodes.reduce(std::vector<Inode>{}, - [](std::vector<Inode> vec, Inode inode) { - vec.push_back(inode); - return vec; - }, - [](std::vector<Inode> v){ - return v; // TODO: use an identity function - }) - .flat_map([root_directories=std::move(root_directories), mode, self=*this] - (std::vector<Inode> vec) { - // All borrowed values (e.g. SystemCall) must outlive the observable. - return self.FindFilenamesFromInodes(root_directories, vec, mode); - } - ); -} - -auto/*[observable<InodeResult>]*/ EmitAllInodesFromDirectories( - std::vector<std::string> root_dirs, - borrowed<SystemCall*> system_call) { - - // Create a (lazy) observable that will emit each DirectoryEntry that is a recursive subchild - // of root_dirs. Emission will be stopped when its unsubscribed from. - // - // This is done by calling readdir(3) lazily. - auto/*obs<DirectoryEntry>*/ find_all_subdir_entries = ([&]() { - DirectoryEntry sentinel = DirectoryEntry::CreateSentinel(std::move(root_dirs)); - auto/*obs<DirectoryEntryResult*/ results = sentinel.GetSubTreePreOrderEntries(system_call); - - // Drop any errors by logging them to logcat. "Unwrap" the expected into the underlying data. - auto/*obs<DirectoryEntry*>*/ expected_drop_errors = MapExpectedOrLogError(std::move(results)); - return expected_drop_errors; - })(); - - // Fill in -1 for the dev_t since readdir only returns the ino_t. - // The caller of this function is expected to call stat(2) later on to fill in - // the full data. - return find_all_subdir_entries.map([](DirectoryEntry e) { - return InodeResult::makeSuccess(Inode::FromDeviceAndInode(-1, e.d_ino), std::move(e.filename)); - }); -} - -rxcpp::observable<InodeResult> - SearchDirectories::ListAllFilenames(std::vector<std::string> root_directories) const { - // TODO: refactor implementation into DiskScanDataSource. - return EmitAllInodesFromDirectories(std::move(root_directories), - /*borrowed*/system_call_); -} - -struct FilterState { - // Emit 'match' Inodes corresponding to the ones here. - InodeSet inode_set; - - // An inode matching one of the ones in inode_set was discovered in the most-recently - // emitted SearchState. - // - // The InodeSet removes any matching 'Inode'. - std::optional<InodeResult> match; - - FilterState() = default; - FilterState(FilterState&& other) = default; - - // Copying the InodeSet is expensive, so forbid any copies. - FilterState(const FilterState& other) = delete; -}; - -std::ostream& operator<<(std::ostream& os, const FilterState& s) { - os << "FilterState{match:"; - // Print the 'match' first. The InodeSet could be very large so it could be truncated in logs. - if (s.match) { - os << s.match.value(); - } else { - os << "(none)"; - } - os << ", inode_set:" << s.inode_set << "}"; - return os; -} - -rxcpp::observable<InodeResult> SearchDirectories::FilterFilenamesForSpecificInodes( - rxcpp::observable<InodeResult> all_inodes, - std::vector<Inode> inode_list, - bool missing_device_number, // missing dev_t portion? - bool needs_verification) const { - // TODO: refactor into InodeResolver - - borrowed<SystemCall*> system_call = system_call_; - - // InodeResult may be missing the dev_t portion, so we may need to call scan(2) again - // to confirm the dev_t. We skip calling scan(2) when the ino_t does not match. - // InodeSet lets us optimally avoid calling scan(2). - std::shared_ptr<FilterState> initial = std::make_shared<FilterState>(); - initial->inode_set = InodeSet::OfList(inode_list); - - auto/*[observable<FilterState>,Connectable]*/ filter_state_results = all_inodes.scan( - std::move(initial), - [system_call, missing_device_number] - (std::shared_ptr<FilterState> filter_state, InodeResult inode_result) { - LOG(VERBOSE) << "FilterFilenamesForSpecificInodes#Scan " - << inode_result << ", state: " << *filter_state; - - filter_state->match = std::nullopt; - - InodeSet* inodes = &filter_state->inode_set; - - // Find all the possible (dev_t, ino_t) potential needles given an ino_t in the haystack. - InodeSet::ValueRange inode_list = inodes->FindInodeList(inode_result.inode.inode); - - // This inode result doesn't correspond to any inodes we are searching for. - if (!inode_list) { - // Drop the result and keep going. - return filter_state; - } - - if (missing_device_number) { - // Need to fill in dev_t by calling stat(2). - VisitValueOrLogError(std::move(inode_result.data), [&](std::string filename) { - StatResult maybe_stat = Stat(filename, system_call); - VisitValueOrLogError(maybe_stat, [&](const struct stat& stat_buf) { - // Try to match the specific inode. Usually this will not result in a match (nullopt). - std::optional<Inode> inode = inodes->FindAndRemoveInodeInList(inode_list, stat_buf); - - if (inode) { - filter_state->match = InodeResult::makeSuccess(inode.value(), std::move(filename)); - } - }); - - // Note: stat errors are logged here to make the error closer to the occurrence. - // In theory, we could just return it as an InodeResult but then the error would - // just get logged elsewhere. - }); - } else { - // Trust the dev_t in InodeResult is valid. Later passes can verify it. - - // Try to match the specific inode. Usually this will not result in a match (nullopt). - std::optional<Inode> inode = - inodes->FindAndRemoveInodeInList(inode_list, inode_result.inode); - - if (inode) { - filter_state->match = inode_result; - } - - // Note that the InodeResult doesn't necessarily need to have a valid filename here. - // If the earlier pass returned an error-ed result, this will forward the error code. - } - - return filter_state; - } - // Avoid exhausting a potentially 'infinite' stream of files by terminating as soon - // as we find every single inode we care about. - ).take_while([](std::shared_ptr<FilterState> state) { - // Also emit the last item that caused the search set to go empty. - bool cond = !state->inode_set.Empty() || state->match; - - if (WOULD_LOG(VERBOSE)) { - static int kCounter = 0; - LOG(VERBOSE) << "FilterFilenamesForSpecificInodes#take_while (" << kCounter++ << - ",is_empty:" - << state->inode_set.Empty() << ", match:" << state->match.has_value(); - } - // Minor O(1) implementation inefficiency: - // (Too minor to fix but it can be strange if looking at the logs or readdir traces). - // - // Note, because we return 'true' after the search set went empty, - // the overall stream graph still pulls from filter_state_results exactly once more: - // - // This means that for cond to go to false, we would've read one extra item and then discarded - // it. If that item was the first child of a directory, that means we essentially did - // one redundant pass of doing a readdir. - // In other words if the search set goes to empty while the current item is a directory, - // - // it will definitely readdir on it at least once as we try to get the first child in - // OnTreeTraversal. - // - // This could be fixed with a 'take_until(Predicate)' operator which doesn't discard - // the last item when the condition becomes false. However rxcpp seems to lack this operator, - // whereas RxJava has it. - - if (!cond) { - LOG(VERBOSE) << "FilterFilenamesForSpecificInodes#take_while " - << "should now terminate for " << *state; - } - - return cond; - }).publish(); - // The publish here is mandatory. The stream is consumed twice (once by matched and once by - // unmatched streams). Without the publish, once all items from 'matched' were consumed it would - // start another instance of 'filter_state_results' (i.e. it appears as if the search - // is restarted). - // - // By using 'publish', the filter_state_results is effectively shared by both downstream nodes. - // Note that this also requires the subscriber to additionally call #connect on the above stream, - // otherwise no work will happen. - - // Lifetime notes: - // - // The the 'FilterState' is emitted into both below streams simultaneously. - // The 'unmatched_inode_values' only touches the inode_set. - // The 'matched_inode_values' only touches the match. - // Either stream can 'std::move' from those fields because they don't move each other's fields. - auto/*observable<InodeResult>*/ matched_inode_values = filter_state_results - .filter([](std::shared_ptr<FilterState> filter_state) { - return filter_state->match.has_value(); }) - .map([](std::shared_ptr<FilterState> filter_state) { - return std::move(filter_state->match.value()); }); - // observable<InodeResult> - - auto/*observable<?>*/ unmatched_inode_values = filter_state_results - // The 'last' FilterState is the one that contains all the remaining inodes. - .take_last(1) // observable<FilterState> - .flat_map([](std::shared_ptr<FilterState> filter_state) { - LOG(VERBOSE) << "FilterFilenamesForSpecificInodes#unmatched -- flat_map"; - // Aside: Could've used a move here if the inodes weren't so lightweight already. - return filter_state->inode_set.IterateValues(); }) - // observable<Inode> - .map([](const Inode& inode) { - LOG(VERBOSE) << "FilterFilenamesForSpecificInodes#unmatched -- map"; - return InodeResult::makeFailure(inode, InodeResult::kCouldNotFindFilename); - }); - // observable<InodeResult> - - // The matched and unmatched InodeResults are emitted together. - // Use merge, not concat, because we need both observables to be subscribed to simultaneously. - - auto/*observable<InodeResult*/ all_inode_results = - matched_inode_values.merge(unmatched_inode_values); - - // Verify the inode results by calling stat(2). - // Unverified results are turned into an error. - - auto/*observable<InodeResult>*/ verified_inode_results = - all_inode_results.map([needs_verification, system_call](InodeResult result) { - if (!needs_verification || !result) { - // Skip verification if requested, or if the result didn't have a filename. - return result; - } - - const std::string& filename = result.data.value(); - StatResult maybe_stat = Stat(filename, system_call); - - if (maybe_stat) - { - if (result.inode == Inode::FromDeviceAndInode(maybe_stat->st_dev, maybe_stat->st_ino)) { - return result; - } else { - LOG(WARNING) - << "FilterFilenamesForSpecificInodes#verified fail out-of-date inode: " << result; - return InodeResult::makeFailure(result.inode, InodeResult::kVerificationFailed); - } - } else { - // Forward stat errors directly, as it could be a missing security rule, - // but turn -ENOENT into casual verification errors. - const StatError& err = maybe_stat.error(); - int error_code = err.err_no; - if (err.err_no == ENOENT) { - error_code = InodeResult::kVerificationFailed; - - // TODO: Don't LOG(WARNING) here because this could be very common if we - // access the data much much later after the initial results were read in. - LOG(WARNING) - << "FilterFilenamesForSpecificInodes#verified fail out-of-date filename: " << result; - } else { - LOG(ERROR) - << "FilterFilenamesForSpecificInodes#verified stat(2) failure: " << err; - } - - return InodeResult::makeFailure(result.inode, error_code); - } - }); - - // Now that all mid-stream observables have been connected, turn the Connectable observable - // into a regular observable. - return verified_inode_results.ref_count(filter_state_results); -} - -rxcpp::observable<InodeResult> SearchDirectories::EmitAllFilenames( - rxcpp::observable<InodeResult> all_inodes, - bool missing_device_number, // missing dev_t portion? - bool needs_verification) const { - // TODO: refactor into InodeResolver - - borrowed<SystemCall*> system_call = system_call_; - - // InodeResult may be missing the dev_t portion, so we may need to call scan(2) again - // to confirm the dev_t. - - using EmitAllState = std::optional<InodeResult>; - - auto/*[observable<FilterState>,Connectable]*/ all_inode_results = all_inodes.map( - [system_call, missing_device_number](InodeResult inode_result) { - LOG(VERBOSE) << "EmitAllFilenames#map " - << inode_result; - - // Could fail if the device number is missing _and_ stat(2) fails. - EmitAllState match = std::nullopt; - - if (missing_device_number) { - // Need to fill in dev_t by calling stat(2). - VisitValueOrLogError(std::move(inode_result.data), [&](std::string filename) { - StatResult maybe_stat = Stat(filename, system_call); - VisitValueOrLogError(maybe_stat, [&](const struct stat& stat_buf) { - Inode inode = Inode::FromDeviceAndInode(stat_buf.st_dev, stat_buf.st_ino); - match = InodeResult::makeSuccess(inode, std::move(filename)); - }); - - // Note: stat errors are logged here to make the error closer to the occurrence. - // In theory, we could just return it as an InodeResult but then the error would - // just get logged elsewhere. - }); - } else { - // Trust the dev_t in InodeResult is valid. Later passes can verify it. - match = std::move(inode_result); - - // Note that the InodeResult doesn't necessarily need to have a valid filename here. - // If the earlier pass returned an error-ed result, this will forward the error code. - } - - return match; // implicit move. - } - ); - - auto/*observable<InodeResult>*/ matched_inode_values = all_inode_results - .filter([](const EmitAllState& filter_state) { return filter_state.has_value(); }) - .map([](EmitAllState& filter_state) { return std::move(filter_state.value()); }); - // observable<InodeResult> - - // Verify the inode results by calling stat(2). - // Unverified results are turned into an error. - - auto/*observable<InodeResult>*/ verified_inode_results = - matched_inode_values.map([needs_verification, system_call](InodeResult result) { - if (!needs_verification || !result) { - // Skip verification if requested, or if the result didn't have a filename. - return result; - } - - const std::string& filename = result.data.value(); - StatResult maybe_stat = Stat(filename, system_call); - - if (maybe_stat) - { - if (result.inode == Inode::FromDeviceAndInode(maybe_stat->st_dev, maybe_stat->st_ino)) { - return result; - } else { - LOG(WARNING) - << "EmitAllFilenames#verified fail out-of-date inode: " << result; - return InodeResult::makeFailure(result.inode, InodeResult::kVerificationFailed); - } - } else { - // Forward stat errors directly, as it could be a missing security rule, - // but turn -ENOENT into casual verification errors. - const StatError& err = maybe_stat.error(); - int error_code = err.err_no; - if (err.err_no == ENOENT) { - error_code = InodeResult::kVerificationFailed; - - // TODO: Don't LOG(WARNING) here because this could be very common if we - // access the data much much later after the initial results were read in. - LOG(WARNING) - << "EmitAllFilenames#verified fail out-of-date filename: " << result; - } else { - LOG(ERROR) - << "EmitAllFilenames#verified stat(2) failure: " << err; - } - - return InodeResult::makeFailure(result.inode, error_code); - } - }); - - // TODO: refactor this function some more with the Find(inode_set) equivalent. - - // Now that all mid-stream observables have been connected, turn the Connectable observable - // into a regular observable. - return verified_inode_results; -} - -} // namespace iorap::inode2filename |