rekall.plugins.response.files

1 # Rekall Memory Forensics 2 # 3 # Copyright 2016 Google Inc. All Rights Reserved. 4 # 5 # Authors: 6 # Michael Cohen <scudette@google.com> 7 # 8 # This program is free software; you can redistribute it and/or modify 9 # it under the terms of the GNU General Public License as published by 10 # the Free Software Foundation; either version 2 of the License, or (at 11 # your option) any later version. 12 # 13 # This program is distributed in the hope that it will be useful, but 14 # WITHOUT ANY WARRANTY; without even the implied warranty of 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 # General Public License for more details. 17 # 18 # You should have received a copy of the GNU General Public License 19 # along with this program; if not, write to the Free Software 20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 21 # 22 23 """This module adds arbitrary file reading to Rekall.""" 24 25 __author__ = "Michael Cohen <scudette@google.com>" 26 import fnmatch 27 import hashlib 28 import itertools 29 import platform 30 import re 31 import os 32 33 from rekall import plugin 34 from rekall.plugins.response import common 35 from rekall_lib import utils 36 37 38 BUFFER_SIZE = 10 * 1024 * 1024 39 40

41 -class IRFind(common.AbstractIRCommandPlugin):

42 """List files recursively from a root path.""" 43 name = "find" 44 45 __args = [ 46 dict(name="root", positional=True, 47 help="The root directory to start search from.") 48 ] 49 50 table_header = [ 51 dict(name="Perms", type="Permissions", width=16), 52 dict(name="Size", align="r", width=10), 53 dict(name="Path"), 54 ] 55

56 - def collect(self):

57 for root, dirs, files in os.walk(self.plugin_args.root): 58 for d in dirs + files: 59 full_path = os.path.join(root, d) 60 result = common.FileFactory(full_path, session=self.session) 61 if result: 62 yield dict(Perms=result.st_mode, 63 Size=result.st_size, 64 Path=result)

65 66

67 -class IRStat(common.AbstractIRCommandPlugin):

68 name = "stat" 69 70 __args = [ 71 dict(name="paths", positional=True, type="Array", 72 help="Paths to stat."), 73 ] 74 75 table_header = [ 76 dict(name="Perms", type="Permissions", width=16), 77 dict(name="Size", align="r", width=10), 78 dict(name="Path"), 79 ] 80

81 - def collect(self):

82 for full_path in self.plugin_args.paths: 83 result = common.FileFactory(full_path, session=self.session) 84 if result: 85 yield dict(Perms=result.st_mode, Size=result.st_size, 86 Path=result)

87 88

89 -class Hash(object):

90 """A class to hold a hash value."""

91 - def __init__(self, type="md5", value=None):

92 self.type = type 93 self.value = value

94

95 - def __str__(self):

96 return "%s:%s" % (self.type, self.value.encode("hex"))

97 98

99 -class IRHash(common.AbstractIRCommandPlugin):

100 name = "hash" 101 102 __args = [ 103 dict(name="paths", positional=True, type="Array", 104 help="Paths to hash."), 105 dict(name="hash", type="ChoiceArray", default=["sha1"], 106 choices=["md5", "sha1", "sha256"], 107 help="One or more hashes to calculate.") 108 ] 109 110 table_header = [ 111 dict(name="Hashes", width=72), 112 dict(name="Path", type="FileInformation"), 113 ] 114

115 - def calculate_hashes(self, hashes, file_info):

116 hashers = dict((name, getattr(hashlib, name)()) for name in hashes) 117 fd = file_info.open() 118 while 1: 119 data = fd.read(BUFFER_SIZE) 120 if not data: 121 break 122 123 for hasher in hashers.values(): 124 hasher.update(data) 125 126 for key in list(hashers): 127 hashers[key] = hashers[key].hexdigest() 128 129 return hashers

130

131 - def collect(self):

132 for path in self.plugin_args.paths: 133 file_info = common.FileFactory(path) 134 if not file_info.st_mode.is_dir(): 135 yield dict( 136 Hashes=self.calculate_hashes( 137 self.plugin_args.hash, file_info), 138 Path=file_info)

139 140

141 -class Component(object):

142 - def __init__(self, session, component=None, cache=None):

143 self.session = session 144 self.component = component 145 self.component_cache = cache

146

147 - def stat(self, path):

148 key = unicode(path) 149 try: 150 return self.component_cache[key] 151 except KeyError: 152 stat = common.FileFactory(path) 153 self.component_cache.Put(key, stat) 154 155 return stat

156

157 - def __eq__(self, other):

158 return unicode(self) == unicode(other)

159

160 - def __hash__(self):

161 return hash(unicode(self))

162

163 - def __str__(self):

164 return "%s:%s" % (self.__class__.__name__, self.component)

165 166

167 -class LiteralComponent(Component):

168

169 - def case_insensitive_filesystem(self):

170 if platform.system() == "Windows": 171 return True 172 173 return False

174

175 - def filter(self, path):

176 # For case insensitive filesystems we can just try to open the 177 # component. 178 if self.case_insensitive_filesystem(): 179 result_pathspec = path.add(self.component) 180 stat = self.stat(result_pathspec) 181 if stat: 182 return [stat.filename] 183 else: 184 return [] 185 186 # Since we must match a case insensitve filename we need to 187 # list all the files and find the best match. 188 stat = common.FileFactory(path) 189 if not stat: 190 return [] 191 192 children = {} 193 for x in stat.list_names(): 194 children.setdefault(x.lower(), []).append(x) 195 196 return [stat.filename.add(x) 197 for x in children.get(self.component.lower(), [])]

198 199

200 -class RegexComponent(Component):

201 - def __init__(self, *args, **kwargs):

202 super(RegexComponent, self).__init__(*args, **kwargs) 203 self.component_re = re.compile(self.component, re.I)

204

205 - def filter(self, path):

206 stat = self.stat(path) 207 if not stat: 208 return 209 210 if stat.st_mode.is_dir() and not stat.st_mode.is_link(): 211 self.session.report_progress("Searching %s", path) 212 for basename in stat.list_names(): 213 if self.component_re.match(basename): 214 yield stat.filename.add(basename)

215 216

217 -class RecursiveComponent(RegexComponent):

218 - def __init__(self, depth=3, **kwargs):

219 super(RecursiveComponent, self).__init__(**kwargs) 220 self.depth = depth

221

222 - def filter(self, path, depth=0):

223 self.session.report_progress("Recursing into %s", path) 224 225 # TODO: Deal with cross devices. 226 if depth >= self.depth: 227 return 228 229 stat = self.stat(path) 230 if not stat: 231 return 232 233 # Do not follow symlinks. 234 if stat.st_mode.is_dir() and not stat.st_mode.is_link(): 235 # The top level counts as a hit, so that e.g. /**/*.txt 236 # matches /foo.txt as well. 237 if depth == 0: 238 yield stat.filename 239 240 for basename in stat.list_names(): 241 if (self.component_re.match(basename) and 242 not stat.st_mode.is_link()): 243 subdir = stat.filename.add(basename) 244 yield subdir 245 246 for subitem in self.filter(subdir, depth+1): 247 yield subitem

248 249

250 -class IRGlob(common.AbstractIRCommandPlugin):

251 """Search for files by filename glob. 252 253 This code roughly based on the Glob flow in GRR. 254 """ 255 256 name = "glob" 257 258 __args = [ 259 dict(name="globs", positional=True, type="ArrayString", 260 help="List of globs to return."), 261 dict(name="root", 262 help="Root directory to glob from."), 263 dict(name="case_insensitive", default=True, type="Bool", 264 help="Globs will be case insensitive."), 265 dict(name="path_sep", 266 help="Path separator character (/ or \\)"), 267 dict(name="filesystem", choices=list(common.FILE_SPEC_DISPATCHER), 268 type="Choices", default="API", 269 help="The virtual filesystem implementation to glob in.") 270 ] 271 272 table_header = [ 273 dict(name="path", type="FileInformation"), 274 ] 275

276 - def column_types(self):

277 return dict(path=common.FileInformation(filename="/etc"))

278 279 INTERPOLATED_REGEX = re.compile(r"%%([^%]+?)%%") 280 281 # Grouping pattern: e.g. {test.exe,foo.doc,bar.txt} 282 GROUPING_PATTERN = re.compile("({([^}]+,[^}]+)}|%%([^%]+?)%%)") 283 RECURSION_REGEX = re.compile(r"\*\*(\d*)") 284 285 # A regex indicating if there are shell globs in this path. 286 GLOB_MAGIC_CHECK = re.compile("[*?[]") 287

288 - def __init__(self, *args, **kwargs):

289 super(IRGlob, self).__init__(*args, **kwargs) 290 self.component_cache = utils.FastStore(50) 291 292 # Default path seperator is platform dependent. 293 if not self.plugin_args.path_sep: 294 self.plugin_args.path_sep = ( 295 "\\" if platform.system() == "Windows" else "/") 296 297 # By default use the root of the filesystem. 298 if self.plugin_args.root is None: 299 self.plugin_args.root = self.plugin_args.path_sep

300

301 - def _interpolate_grouping(self, pattern):

302 # Take the pattern and split it into components around grouping 303 # patterns. Expand each grouping pattern to a set. 304 305 # e.g. /foo{a,b}/bar -> ["/foo", set(["a", "b"]), "/bar"] 306 result = [] 307 components = [] 308 offset = 0 309 for match in self.GROUPING_PATTERN.finditer(pattern): 310 match_str = match.group(0) 311 # Alternatives. 312 if match_str.startswith("{"): 313 components.append([pattern[offset:match.start()]]) 314 315 # Expand the attribute into the set of possibilities: 316 alternatives = match.group(2).split(",") 317 components.append(set(alternatives)) 318 offset = match.end() 319 320 # KnowledgeBase interpolation. 321 elif match_str.startswith("%"): 322 components.append([pattern[offset:match.start()]]) 323 324 kb = self.session.GetParameter("knowledge_base") 325 alternatives = kb.expand(match_str) 326 327 components.append(set(alternatives)) 328 offset = match.end() 329 330 else: 331 raise plugin.PluginError( 332 "Unknown interpolation %s" % match.group(0)) 333 334 components.append([pattern[offset:]]) 335 # Now calculate the cartesian products of all these sets to form all 336 # strings. 337 for vector in itertools.product(*components): 338 result.append(u"".join(vector)) 339 340 # These should be all possible patterns. 341 # e.g. /fooa/bar , /foob/bar 342 return result

343

344 - def convert_glob_into_path_components(self, pattern):

345 """Converts a glob pattern into a list of pathspec components. 346 347 Wildcards are also converted to regular expressions. The pathspec 348 components do not span directories, and are marked as a regex or a 349 literal component. 350 351 We also support recursion into directories using the ** notation. For 352 example, /home/**2/foo.txt will find all files named foo.txt recursed 2 353 directories deep. If the directory depth is omitted, it defaults to 3. 354 355 Example: 356 /home/test**/*exe -> [{path: 'home', type: "LITERAL", 357 {path: 'test.*\\Z(?ms)', type: "RECURSIVE", 358 {path: '.*exe\\Z(?ms)', type="REGEX"}]] 359 360 Args: 361 pattern: A glob expression with wildcards. 362 363 Returns: 364 A list of PathSpec instances for each component. 365 366 Raises: 367 ValueError: If the glob is invalid. 368 369 """ 370 pattern_components = common.FileSpec( 371 pattern, path_sep=self.plugin_args.path_sep).components() 372 373 components = [] 374 for path_component in pattern_components: 375 if not path_component: 376 continue 377 378 # A ** in the path component means recurse into directories that 379 # match the pattern. 380 m = self.RECURSION_REGEX.search(path_component) 381 if m: 382 depth = 3 383 384 # Allow the user to override the recursion depth. 385 if m.group(1): 386 depth = int(m.group(1)) 387 388 path_component = path_component.replace(m.group(0), "*") 389 component = RecursiveComponent( 390 session=self.session, 391 component=fnmatch.translate(path_component), 392 cache=self.component_cache, 393 depth=depth) 394 395 elif self.GLOB_MAGIC_CHECK.search(path_component): 396 component = RegexComponent( 397 session=self.session, 398 cache=self.component_cache, 399 component=fnmatch.translate(path_component)) 400 401 else: 402 component = LiteralComponent( 403 session=self.session, 404 cache=self.component_cache, 405 component=path_component) 406 407 components.append(component) 408 409 return components

410

411 - def _filter(self, node, path):

412 """Path is the pathspec of the path we begin evaluation with.""" 413 for component, child_node in node.iteritems(): 414 # Terminal node - yield the result. 415 if not child_node: 416 for subpath in component.filter(path): 417 yield subpath 418 419 else: 420 # Non - terminal node, walk the subnode recursively. 421 for matching_path in component.filter(path): 422 for subpath in self._filter(child_node, matching_path): 423 yield subpath

424

425 - def make_component_tree(self, globs):

426 expanded_globs = [] 427 for glob in globs: 428 expanded_globs.extend(self._interpolate_grouping(glob)) 429 430 component_tree = {} 431 for glob in expanded_globs: 432 node = component_tree 433 for component in self.convert_glob_into_path_components(glob): 434 node = node.setdefault(component, {}) 435 436 return component_tree

437

438 - def collect_globs(self, globs):

439 component_tree = self.make_component_tree(globs) 440 root = common.FileSpec(self.plugin_args.root, 441 path_sep=self.plugin_args.path_sep) 442 for path in self._filter(component_tree, root): 443 yield common.FileFactory(path, session=self.session)

444

445 - def collect(self):

446 for x in self.collect_globs(self.plugin_args.globs): 447 yield dict(path=x)

448 449

450 -def print_component_tree(tree, depth=""):

451 """This is used for debugging the component_tree.""" 452 if not tree: 453 return 454 455 for k, v in tree.iteritems(): 456 print "%s %s:" % (depth, k) 457 print_component_tree(v, depth + " ")

458 459 460

461 -class IRDump(IRGlob):

462 """Hexdump files from disk.""" 463 464 name = "hexdump_file" 465 466 __args = [ 467 dict(name="start", type="IntParser", default=0, 468 help="An offset to hexdump."), 469 470 dict(name="length", type="IntParser", default=100, 471 help="Maximum length to dump."), 472 473 dict(name="width", type="IntParser", default=24, 474 help="Number of bytes per row"), 475 476 dict(name="rows", type="IntParser", default=4, 477 help="Number of bytes per row"), 478 ] 479 480 table_header = [ 481 dict(name="divider", type="Divider"), 482 dict(name="FileSpec", hidden=True), 483 dict(name="offset", style="address"), 484 dict(name="hexdump", width=65), 485 ] 486

487 - def collect(self):

488 for hit in super(IRDump, self).collect(): 489 path = hit.get("path") 490 if path: 491 fd = path.open() 492 if fd: 493 yield dict(divider=path.filename) 494 495 to_read = min( 496 self.plugin_args.length, 497 self.plugin_args.width * self.plugin_args.rows) 498 for offset in utils.xrange( 499 self.plugin_args.start, 500 self.plugin_args.start + to_read, 501 self.plugin_args.width): 502 503 fd.seek(offset) 504 data = fd.read(self.plugin_args.width) 505 if not data: 506 break 507 508 yield dict( 509 offset=offset, 510 FileSpec=path.filename, 511 hexdump=utils.HexDumpedString(data), 512 nowrap=True, 513 hex_width=self.plugin_args.width)

514

Source Code for Module rekall.plugins.response.files