Coverage Report

Created: 2025-07-23 08:14

/src/osquery/osquery/tables/system/hash.cpp
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * Copyright (c) 2014-present, The osquery authors
3
 *
4
 * This source code is licensed as defined by the LICENSE file found in the
5
 * root directory of this source tree.
6
 *
7
 * SPDX-License-Identifier: (Apache-2.0 OR GPL-2.0-only)
8
 */
9
10
// clang-format off
11
#include <sys/types.h>
12
#include <sys/stat.h>
13
// clang-format on
14
15
#ifndef WIN32
16
#include <unistd.h>
17
#endif
18
19
#include <set>
20
#include <thread>
21
22
#include <boost/filesystem.hpp>
23
24
#include <osquery/core/flags.h>
25
#include <osquery/filesystem/filesystem.h>
26
#include <osquery/hashing/hashing.h>
27
#include <osquery/logger/logger.h>
28
#include <osquery/core/tables.h>
29
#include <osquery/sql/dynamic_table_row.h>
30
#include <osquery/utils/mutex.h>
31
#include <osquery/utils/info/platform_type.h>
32
#include <osquery/worker/ipc/platform_table_container_ipc.h>
33
#include <osquery/worker/logging/glog/glog_logger.h>
34
#include <osquery/worker/logging/logger.h>
35
36
namespace osquery {
37
38
FLAG(bool,
39
     disable_hash_cache,
40
     false,
41
     "Cache calculated file hashes, re-calculate only if inode times change");
42
43
FLAG(uint32, hash_cache_max, 500, "Size of LRU file hash cache");
44
45
HIDDEN_FLAG(uint32,
46
            hash_delay,
47
            20,
48
            "Number of milliseconds to delay after hashing");
49
50
DECLARE_uint64(read_max);
51
52
namespace tables {
53
54
/// Clear this amount of rows every time cache eviction is triggered.
55
const size_t kHashCacheEvictSize{5};
56
57
/**
58
 * @brief Implements persistent in-memory caching of files' hashes.
59
 *
60
 * This cache has LRU eviction policy. The hash is recalculated
61
 * every time the mtime or size of the file changes.
62
 */
63
struct FileHashCache {
64
  /// The file's modification time, changes with a touch.
65
  time_t file_mtime;
66
67
  /// The file's serial or information number (inode).
68
  ino_t file_inode;
69
70
  /// The file's size.
71
  off_t file_size;
72
73
  /// For eviction, the last time this cache item was used.
74
  time_t cache_access_time;
75
76
  /// Cache content, the hashes.
77
  MultiHashes hashes;
78
79
  /// Cache index, the file path.
80
  std::string path;
81
82
  /// Comparison function for organizing the LRU heap.
83
0
  static bool greater(const FileHashCache* l, const FileHashCache* r) {
84
0
    return l->cache_access_time > r->cache_access_time;
85
0
  }
86
87
  /**
88
   * @brief Do-it-all access function.
89
   *
90
   * Maintains the cache of hash sums, stats file at path, if it has changed or
91
   * it is not present in cache calculates the hashes and caches the result.
92
   *
93
   * @param path the path of file to hash.
94
   * @param out stores the calculated hashes.
95
   *
96
   * @return true if succeeded, false if something went wrong.
97
   */
98
  static bool load(const std::string& path, MultiHashes& out, Logger& logger);
99
};
100
101
#if defined(WIN32)
102
103
#define stat _stat
104
#define strerror_r(e, buf, sz) strerror_s((buf), (sz), (e))
105
106
#endif
107
108
/**
109
 * @brief Checks the current stat output against the cached view.
110
 *
111
 * If the modified/altered time or the file's inode has changed then the hash
112
 * should be recalculated.
113
 */
114
0
static inline bool statInvalid(const struct stat& st, const FileHashCache& fh) {
115
0
  if (st.st_ino != fh.file_inode || st.st_mtime != fh.file_mtime) {
116
    // Most plausible case for modification detection.
117
0
    return true;
118
0
  }
119
120
0
  if (st.st_size != fh.file_size) {
121
    // Just in case there's tomfoolery.
122
0
    return true;
123
0
  }
124
0
  return false;
125
0
}
126
127
bool FileHashCache::load(const std::string& path,
128
                         MultiHashes& out,
129
0
                         Logger& logger) {
130
  // synchronize the access to cache
131
0
  static Mutex mx;
132
  // path => cache entry
133
0
  static std::unordered_map<std::string, FileHashCache> cache;
134
  // minheap on cache_access_time
135
0
  static std::vector<FileHashCache*> lru;
136
137
0
  WriteLock guard(mx);
138
139
0
  struct stat st;
140
0
  if (stat(path.c_str(), &st) != 0) {
141
0
    char buf[0x200] = {0};
142
0
    strerror_r(errno, buf, sizeof(buf));
143
0
    logger.log(google::GLOG_WARNING, "Cannot stat file: " + path + ": " + buf);
144
0
    return false;
145
0
  }
146
147
0
  auto entry = cache.find(path);
148
0
  if (entry == cache.end()) { // none, load
149
0
    if (cache.size() >= FLAGS_hash_cache_max) {
150
      // too large, evict
151
0
      for (size_t i = 0; i < kHashCacheEvictSize; ++i) {
152
0
        if (lru.empty()) {
153
0
          continue;
154
0
        }
155
0
        std::string key = lru[0]->path;
156
0
        std::pop_heap(lru.begin(), lru.end(), FileHashCache::greater);
157
0
        lru.pop_back();
158
0
        if (cache.find(key) != cache.end()) {
159
0
          cache.erase(key);
160
0
        }
161
0
      }
162
0
    }
163
164
0
    auto hashes = hashMultiFromFile(
165
0
        HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
166
0
    FileHashCache rec = {st.st_mtime, // .file_mtime
167
0
                         st.st_ino, // .file_inode
168
0
                         st.st_size, // .file_size
169
0
                         time(nullptr), // .cache_access_time
170
0
                         std::move(hashes), // .hashes
171
0
                         path}; // .path
172
0
    cache[path] = std::move(rec);
173
0
    lru.push_back(&cache[path]);
174
0
    std::push_heap(lru.begin(), lru.end(), FileHashCache::greater);
175
0
    out = cache[path].hashes;
176
0
  } else if (statInvalid(st, entry->second)) { // changed, update
177
0
    auto hashes = hashMultiFromFile(
178
0
        HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
179
0
    entry->second.cache_access_time = time(nullptr);
180
0
    entry->second.file_inode = st.st_ino;
181
0
    entry->second.file_mtime = st.st_mtime;
182
0
    entry->second.file_size = st.st_size;
183
0
    entry->second.hashes = std::move(hashes);
184
0
    std::make_heap(lru.begin(), lru.end(), FileHashCache::greater);
185
0
    out = entry->second.hashes;
186
0
  } else { // ok, got it
187
0
    out = entry->second.hashes;
188
0
    entry->second.cache_access_time = time(nullptr);
189
0
    std::make_heap(lru.begin(), lru.end(), FileHashCache::greater);
190
0
  }
191
0
  return true;
192
0
}
193
194
void genHashForFile(const std::string& path,
195
                    const std::string& dir,
196
                    QueryContext& context,
197
                    QueryData& results,
198
0
                    Logger& logger) {
199
  // Must provide the path, filename, directory separate from boost path->string
200
  // helpers to match any explicit (query-parsed) predicate constraints.
201
0
  auto tr = TableRowHolder(new DynamicTableRow());
202
0
  MultiHashes hashes;
203
0
  if (!FLAGS_disable_hash_cache) {
204
0
    FileHashCache::load(path, hashes, logger);
205
0
  } else {
206
0
    if (context.isCached(path)) {
207
      // Use the inner-query cache if the global hash cache is disabled.
208
      // This protects against hashing the same content twice in the same query.
209
0
      tr = context.getCache(path);
210
0
    } else {
211
0
      hashes = hashMultiFromFile(
212
0
          HASH_TYPE_MD5 | HASH_TYPE_SHA1 | HASH_TYPE_SHA256, path);
213
0
      std::this_thread::sleep_for(std::chrono::milliseconds(FLAGS_hash_delay));
214
0
    }
215
0
  }
216
217
0
  DynamicTableRow& r = *dynamic_cast<DynamicTableRow*>(tr.get());
218
0
  r["path"] = path;
219
0
  r["directory"] = dir;
220
0
  r["md5"] = std::move(hashes.md5);
221
0
  r["sha1"] = std::move(hashes.sha1);
222
0
  r["sha256"] = std::move(hashes.sha256);
223
224
0
  if (FLAGS_disable_hash_cache) {
225
0
    context.setCache(path, tr);
226
0
  }
227
228
0
  r["pid_with_namespace"] = "0";
229
230
0
  results.push_back(static_cast<Row>(r));
231
0
}
232
233
void expandFSPathConstraints(QueryContext& context,
234
                             const std::string& path_column_name,
235
0
                             std::set<std::string>& paths) {
236
0
  context.expandConstraints(
237
0
      path_column_name,
238
0
      LIKE,
239
0
      paths,
240
0
      ([&](const std::string& pattern, std::set<std::string>& out) {
241
0
        std::vector<std::string> patterns;
242
0
        auto status =
243
0
            resolveFilePattern(pattern, patterns, GLOB_ALL | GLOB_NO_CANON);
244
0
        if (status.ok()) {
245
0
          for (const auto& resolved : patterns) {
246
0
            out.insert(resolved);
247
0
          }
248
0
        }
249
0
        return status;
250
0
      }));
251
0
}
252
253
0
QueryData genHashImpl(QueryContext& context, Logger& logger) {
254
0
  QueryData results;
255
0
  boost::system::error_code ec;
256
257
  // The query must provide a predicate with constraints including path or
258
  // directory. We search for the parsed predicate constraints with the equals
259
  // operator.
260
0
  auto paths = context.constraints["path"].getAll(EQUALS);
261
0
  expandFSPathConstraints(context, "path", paths);
262
263
  // Iterate through the file paths, adding the hash results
264
0
  for (const auto& path_string : paths) {
265
0
    boost::filesystem::path path = path_string;
266
0
    if (!boost::filesystem::is_regular_file(path, ec)) {
267
0
      continue;
268
0
    }
269
270
0
    genHashForFile(
271
0
        path_string, path.parent_path().string(), context, results, logger);
272
0
  }
273
274
  // Now loop through constraints using the directory column constraint.
275
0
  auto directories = context.constraints["directory"].getAll(EQUALS);
276
0
  expandFSPathConstraints(context, "directory", directories);
277
278
  // Iterate over the directory paths
279
0
  for (const auto& directory_string : directories) {
280
0
    boost::filesystem::path directory = directory_string;
281
0
    if (!boost::filesystem::is_directory(directory, ec)) {
282
0
      continue;
283
0
    }
284
285
    // Iterate over the directory files and generate a hash for each regular
286
    // file.
287
0
    boost::filesystem::directory_iterator begin(directory), end;
288
0
    for (; begin != end; ++begin) {
289
0
      if (boost::filesystem::is_regular_file(begin->path(), ec)) {
290
0
        genHashForFile(
291
0
            begin->path().string(), directory_string, context, results, logger);
292
0
      }
293
0
    }
294
0
  }
295
296
0
  return results;
297
0
}
298
299
0
QueryData genHash(QueryContext& context) {
300
0
  if (hasNamespaceConstraint(context)) {
301
0
    return generateInNamespace(context, "hash", genHashImpl);
302
0
  } else {
303
0
    GLOGLogger logger;
304
0
    return genHashImpl(context, logger);
305
0
  }
306
0
}
307
} // namespace tables
308
} // namespace osquery