rekall.plugins.tools.ewf

1 # Rekall Memory Forensics 2 # 3 # Copyright 2014 Google Inc. All Rights Reserved. 4 # 5 # This program is free software; you can redistribute it and/or modify 6 # it under the terms of the GNU General Public License as published by 7 # the Free Software Foundation; either version 2 of the License, or (at 8 # your option) any later version. 9 # 10 # This program is distributed in the hope that it will be useful, but 11 # WITHOUT ANY WARRANTY; without even the implied warranty of 12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 # General Public License for more details. 14 # 15 # You should have received a copy of the GNU General Public License 16 # along with this program; if not, write to the Free Software 17 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 # 19 20 """This file provides read/write support for EWF files. 21 22 EWF files are generated by Encase/FTK and are a common compressible storage 23 format for digital evidence. 24 25 The below code is based on libewf: 26 https://github.com/libyal/libewf 27 https://googledrive.com/host/0B3fBvzttpiiSMTdoaVExWWNsRjg/ 28 29 30 NOTE: Since EWFv1 files are unable to represent sparse data they are not 31 directly suitable for storing memory images. Therefore in Rekall we generally 32 use EWF files as containers for other formats, such as ELF core dumps. 33 34 NOTE: EWF files produced by the ewfacquire plugin are _NOT_ compatible with 35 Encase/FTK and can not be analyzed by those programs. We merely use the EWF 36 container as a container providing seekable compression for more traditional 37 memory image formats such as ELF. 38 39 When using the ewfacquire plugin, if the source address space contains a single 40 run of data, we generate a single EWF file of this run (e.g. for a disk 41 image). If, however, the source address space contains more than one run, we 42 automatically create an ELF core dump to contain the sparse runs, and that is 43 compressed into the EWF file instead. This is not generally compatible with 44 Encase or FTK since they do not understand layered address spaces! For Rekall 45 this works because Rekall automatically detects that the EWF file contains an 46 ELF core dump and stacks the relevant address spaces. 47 """ 48 49 __author__ = "Michael Cohen <scudette@google.com>" 50 import array 51 import os 52 import struct 53 import zlib 54 55 from rekall import obj 56 from rekall import plugin 57 from rekall import testlib 58 from rekall.plugins.addrspaces import elfcore 59 from rekall.plugins.addrspaces import standard 60 from rekall.plugins.overlays import basic 61 from rekall_lib import utils 62 63 64 EWF_TYPES = dict( 65 ewf_file_header_v1=[0x0d, { 66 'EVF_sig': [0, ['Signature', dict(value="EVF\x09\x0d\x0a\xff\x00")]], 67 68 'fields_start': [8, ['byte']], 69 'segment_number': [9, ['unsigned short int']], 70 'fields_end': [11, ['unsigned short int']], 71 }], 72 73 ewf_file_header_v2=[None, { 74 'EVF_sig': [0, ['Signature', dict(value="EVF2\x0d\x0a\x81\x00")]], 75 76 'major_version': [9, ['byte']], 77 'minor_version': [10, ['byte']], 78 79 'compression_method': [11, ['Enumeration', dict( 80 target="unsigned short int", 81 choices=dict( 82 NONE=0, 83 DEFLATE=1, 84 BZIP2=2, 85 ) 86 )]], 87 'segment_number': [13, ['unsigned short int']], 88 'set_identifier': [15, ['String', dict(length=16)]], 89 }], 90 91 ewf_section_descriptor_v1=[76, { 92 # This string determines how to process this section. 93 'type': [0, ['String', dict(length=16)]], 94 95 # The next section in this file. 96 'next': [16, ['Pointer', dict( 97 target="ewf_section_descriptor_v1" 98 )]], 99 100 'size': [24, ['long long unsigned int']], 101 'checksum': [72, ['int']], 102 }], 103 104 ewf_volume=[94, { 105 'media_type': [0, ['Enumeration', dict( 106 choices={ 107 0: 'remobable_disk', 108 1: 'fixed_disk', 109 2: 'optical_disk', 110 3: 'LVF', 111 4: 'memory', 112 }, 113 )]], 114 'number_of_chunks': [4, ['unsigned int']], 115 'sectors_per_chunk': [8, ['unsigned int']], 116 'bytes_per_sector': [12, ['unsigned int']], 117 'number_of_sectors': [16, ['long long unsigned int']], 118 'chs_cylinders': [24, ['unsigned int']], 119 'chs_heads': [28, ['unsigned int']], 120 'chs_sectors': [32, ['unsigned int']], 121 122 'media_flags': [36, ['Flags', dict( 123 maskmap={ 124 'image': 1, 125 'physical': 2, 126 'Fastblock Tableau write blocker': 4, 127 'Tableau write blocker': 8 128 })]], 129 130 'compression_level': [52, ['Enumeration', dict( 131 choices={ 132 0: 'no compression', 133 1: 'fast/good compression', 134 2: 'best compression', 135 })]], 136 137 'checksum': [90, ['int']], 138 }], 139 140 ewf_table_entry=[4, { 141 # Is the chunk compressed? 142 'compressed': [0, ['BitField', dict(start_bit=31, end_bit=32)]], 143 144 # The offset to the chunk within the file. 145 'offset': [0, ['BitField', dict(start_bit=0, end_bit=31)]], 146 }], 147 148 ewf_table_header_v1=[lambda x: x.entries[x.number_of_entries].obj_end, { 149 'number_of_entries': [0, ['long long unsigned int']], 150 'base_offset': [8, ['long long unsigned int']], 151 'checksum': [20, ['int']], 152 153 # The table just contains a list of table entries to the start of each 154 # chunk. 155 'entries': [24, ['Array', dict( 156 target='ewf_table_entry', 157 count=lambda x: x.number_of_entries 158 )]], 159 }], 160 161 )

162 163 164 -class ewf_section_descriptor_v1(obj.Struct):

165 - def UpdateChecksum(self):

166 """Recalculate the checksum field.""" 167 self.size = self.next.v() - self.obj_offset 168 data = self.obj_vm.read( 169 self.obj_offset, self.checksum.obj_offset - self.obj_offset) 170 171 self.checksum = zlib.adler32(data)

172

173 174 -class ewf_table_header_v1(obj.Struct):

175 - def UpdateChecksum(self):

176 """Recalculate the checksum field.""" 177 data = self.obj_vm.read( 178 self.obj_offset, self.checksum.obj_offset - self.obj_offset) 179 180 self.checksum = zlib.adler32(data)

181

182 183 -class ewf_volume(ewf_table_header_v1):

184 pass

185

186 187 -class EWFProfile(basic.ProfileLLP64, basic.BasicClasses):

188 """Basic profile for EWF files.""" 189 190 @classmethod

191 - def Initialize(cls, profile):

192 super(EWFProfile, cls).Initialize(profile) 193 194 profile.add_types(EWF_TYPES) 195 profile.add_classes( 196 ewf_section_descriptor_v1=ewf_section_descriptor_v1, 197 ewf_table_header_v1=ewf_table_header_v1, 198 ewf_volume=ewf_volume, 199 )

200

201 202 -class EWFFile(object):

203 """A helper for parsing an EWF file.""" 204

205 - def __init__(self, session=None, address_space=None):

206 self.session = session 207 208 # This is a cache of tables. We can quickly find the table responsible 209 # for a particular chunk. 210 self.tables = utils.SortedCollection(key=lambda x: x[0]) 211 self._chunk_offset = 0 212 self.chunk_size = 32 * 1024 213 214 # 32kb * 100 = 3.2mb cache size. 215 self.chunk_cache = utils.FastStore(max_size=100) 216 217 self.address_space = address_space 218 self.profile = EWFProfile(session=session) 219 self.file_header = self.profile.ewf_file_header_v1( 220 offset=0, vm=self.address_space) 221 222 # Make sure the file signature is correct. 223 if not self.file_header.EVF_sig.is_valid(): 224 raise RuntimeError("EVF signature does not match.") 225 226 # Now locate all the sections in the file. 227 first_section = self.profile.ewf_section_descriptor_v1( 228 vm=self.address_space, offset=self.file_header.obj_end) 229 230 for section in first_section.walk_list("next"): 231 if section.type == "header2": 232 self.handle_header2(section) 233 234 elif section.type == "header": 235 self.handle_header(section) 236 237 elif section.type in ["disk", "volume"]: 238 self.handle_volume(section) 239 240 elif section.type == "table": 241 self.handle_table(section) 242 243 # How many chunks we actually have in this file. 244 self.size = self._chunk_offset * self.chunk_size

245

246 - def handle_header(self, section):

247 """Handle the header section. 248 249 We do not currently do anything with it. 250 """ 251 # The old header contains an ascii encoded description, compressed with 252 # zlib. 253 data = zlib.decompress( 254 section.obj_vm.read(section.obj_end, section.size))

255 256 # We dont do anything with this data right now. 257

258 - def handle_header2(self, section):

259 """Handle the header2 section. 260 261 We do not currently do anything with it. 262 """ 263 # The header contains a utf16 encoded description, compressed with zlib. 264 data = zlib.decompress( 265 section.obj_vm.read(section.obj_end, section.size)).decode("utf16")

266 267 # We dont do anything with this data right now. 268

269 - def handle_volume(self, section):

270 """Handle the volume section. 271 272 We mainly use it to know the chunk size. 273 """ 274 volume_header = self.profile.ewf_volume( 275 vm=self.address_space, offset=section.obj_end) 276 277 self.chunk_size = (volume_header.sectors_per_chunk * 278 volume_header.bytes_per_sector)

279

280 - def handle_table(self, section):

281 """Parse the table and store it in our lookup table.""" 282 table_header = self.profile.ewf_table_header_v1( 283 vm=self.address_space, offset=section.obj_end) 284 285 number_of_entries = table_header.number_of_entries 286 287 # This is an optimization which allows us to avoid small reads for each 288 # chunk. We just load the entire table into memory and read it on demand 289 # from there. 290 table = array.array("I") 291 table.fromstring(self.address_space.read( 292 table_header.entries.obj_offset, 293 4 * table_header.number_of_entries)) 294 295 # We assume the last chunk is a full chunk. Feeding zlib.decompress() 296 # extra data does not matter so we just read the most we can. 297 table.append(table[-1] + self.chunk_size) 298 299 self.tables.insert( 300 # First chunk for this table, table header, table entry cache. 301 (self._chunk_offset, table_header, table)) 302 303 # The next table starts at this chunk. 304 self._chunk_offset += number_of_entries

305

306 - def read_chunk(self, chunk_id):

307 """Read a single chunk from the file.""" 308 try: 309 return self.chunk_cache.Get(chunk_id) 310 except KeyError: 311 start_chunk, table_header, table = self.tables.find_le(chunk_id) 312 313 # This should be a ewf_table_entry object but the below is faster. 314 try: 315 table_entry = table[chunk_id - start_chunk] 316 317 offset = table_entry & 0x7fffffff 318 next_offset = table[chunk_id - start_chunk + 1] & 0x7fffffff 319 compressed_chunk_size = next_offset - offset 320 except IndexError: 321 return "" 322 323 data = self.address_space.read( 324 offset + table_header.base_offset, compressed_chunk_size) 325 326 if table_entry & 0x80000000: 327 data = zlib.decompress(data) 328 329 # Cache the chunk for later. 330 self.chunk_cache.Put(chunk_id, data) 331 332 return data

333

334 - def read_partial(self, offset, length):

335 """Read as much as possible from the current offset.""" 336 # Find the table responsible for this chunk. 337 chunk_id, chunk_offset = divmod(offset, self.chunk_size) 338 available_length = min(length, self.chunk_size - chunk_offset) 339 340 # Get the chunk and split it. 341 data = self.read_chunk(chunk_id) 342 343 return data[chunk_offset:chunk_offset + available_length]

344

345 - def read(self, offset, length):

346 """Read data from the file.""" 347 # Most read operations are very short and will not need to merge chunks 348 # at all. In that case concatenating strings is much faster than storing 349 # partial reads into a list and join()ing them. 350 result = '' 351 available_length = length 352 353 while available_length > 0: 354 buf = self.read_partial(offset, available_length) 355 if not buf: 356 break 357 358 result += buf 359 offset += len(buf) 360 available_length -= len(buf) 361 362 return result

363

364 365 -class EWFFileWriter(object):

366 """A writer for EWF files. 367 368 NOTE: The EWF files we produce here are not generally compatible with 369 Encase/FTK. We produce EWFv1 files which are unable to store sparse 370 images. We place an ELF file inside the EWF container to ensure we can 371 efficiently store sparse memory ranges. 372 """ 373

374 - def __init__(self, out_as, session):

375 self.out_as = out_as 376 self.session = session 377 self.profile = EWFProfile(session=self.session) 378 self.chunk_size = 32 * 1024 379 self.current_offset = 0 380 self.chunk_id = 0 381 382 self.last_section = None 383 384 # Start off by writing the file header. 385 file_header = self.profile.ewf_file_header_v1( 386 offset=0, vm=out_as) 387 file_header.EVF_sig = file_header.EVF_sig.signature 388 file_header.fields_start = 1 389 file_header.segment_number = 1 390 file_header.fields_end = 1 391 392 self.current_offset = file_header.obj_end 393 self.buffer = "" 394 self.table_count = 0 395 396 # Get ready to accept data. 397 self.StartNewTable()

398

399 - def __enter__(self):

400 return self

401

402 - def __exit__(self, exc_type, exc_value, trace):

403 self.Close()

404

405 - def AddNewSection(self, section):

406 if self.last_section: 407 self.last_section.next = section 408 self.last_section.UpdateChecksum() 409 410 self.last_section = section

411

412 - def StartNewTable(self):

413 """Writes a sections table and begins collecting chunks into table.""" 414 self.table = [] 415 self.table_count += 1 416 417 sectors_section = self.profile.ewf_section_descriptor_v1( 418 offset=self.current_offset, vm=self.out_as) 419 sectors_section.type = "sectors" 420 self.AddNewSection(sectors_section) 421 422 self.base_offset = self.current_offset = sectors_section.obj_end

423

424 - def write(self, data):

425 """Writes the data into the file. 426 427 This method allows the writer to be used as a file-like object. 428 """ 429 self.buffer += data 430 buffer_offset = 0 431 while len(self.buffer) - buffer_offset >= self.chunk_size: 432 data = self.buffer[buffer_offset:buffer_offset+self.chunk_size] 433 cdata = zlib.compress(data) 434 chunk_offset = self.current_offset - self.base_offset 435 436 if len(cdata) > len(data): 437 self.table.append(chunk_offset) 438 cdata = data 439 else: 440 self.table.append(0x80000000 | chunk_offset) 441 442 self.out_as.write(self.current_offset, cdata) 443 self.current_offset += len(cdata) 444 buffer_offset += self.chunk_size 445 self.chunk_id += 1 446 447 # Flush the table when it gets too large. Tables can only store 31 448 # bit offset and so can only address roughly 2gb. We choose to stay 449 # under 1gb: 30000 * 32kb = 0.91gb. 450 if len(self.table) > 30000: 451 self.session.report_progress( 452 "Flushing EWF Table %s.", self.table_count) 453 self.FlushTable() 454 self.StartNewTable() 455 456 self.buffer = self.buffer[buffer_offset:]

457

458 - def FlushTable(self):

459 """Flush the current table.""" 460 table_section = self.profile.ewf_section_descriptor_v1( 461 offset=self.current_offset, vm=self.out_as) 462 table_section.type = "table" 463 self.AddNewSection(table_section) 464 465 table_header = self.profile.ewf_table_header_v1( 466 offset=table_section.obj_end, vm=self.out_as) 467 468 table_header.number_of_entries = len(self.table) 469 table_header.base_offset = self.base_offset 470 table_header.UpdateChecksum() 471 472 # Now write the table section. 473 self.out_as.write( 474 table_header.entries.obj_offset, 475 struct.pack("I" * len(self.table), *self.table)) 476 477 self.current_offset = (table_header.entries.obj_offset + 478 4 * len(self.table))

479 - def Close(self):

480 # If there is some data left over, pad it to the length of the chunk so 481 # we get to write it. 482 if len(self.buffer): 483 self.write("\x00" * (self.chunk_size - len(self.buffer))) 484 485 self.FlushTable() 486 487 # Write the volume section. 488 volume_section = self.profile.ewf_section_descriptor_v1( 489 offset=self.current_offset, vm=self.out_as) 490 volume_section.type = "volume" 491 self.AddNewSection(volume_section) 492 493 volume_header = self.profile.ewf_volume( 494 offset=volume_section.obj_end, vm=self.out_as) 495 496 volume_header.number_of_chunks = self.chunk_id 497 volume_header.sectors_per_chunk = self.chunk_size / 512 498 volume_header.number_of_sectors = (volume_header.number_of_chunks * 499 volume_header.sectors_per_chunk) 500 501 volume_header.bytes_per_sector = 512 502 volume_header.UpdateChecksum() 503 504 # Write the done section. 505 done_section = self.profile.ewf_section_descriptor_v1( 506 offset=volume_header.obj_end, vm=self.out_as) 507 done_section.type = "done" 508 self.AddNewSection(done_section) 509 510 # Last section points to itself. 511 self.AddNewSection(done_section)

512

513 514 -class EWFAcquire(plugin.PhysicalASMixin, plugin.TypedProfileCommand, 515 plugin.Command):

516 """Copy the physical address space to an EWF file.""" 517 518 name = "ewfacquire" 519 520 __args = [ 521 dict(name="destination", positional=True, required=False, 522 help="The destination file to create. " 523 "If not specified we write output.E01 in current directory.") 524 ] 525

526 - def render(self, renderer):

527 if self.plugin_args.destination is None: 528 out_fd = renderer.open(filename="output.E01", mode="w+b") 529 else: 530 directory, filename = os.path.split(self.plugin_args.destination) 531 out_fd = renderer.open(filename=filename, directory=directory, 532 mode="w+b") 533 534 with out_fd: 535 runs = list(self.physical_address_space.get_mappings()) 536 537 out_address_space = standard.WritableFDAddressSpace( 538 fhandle=out_fd, session=self.session) 539 540 with EWFFileWriter( 541 out_address_space, session=self.session) as writer: 542 if len(runs) > 1: 543 elfcore.WriteElfFile( 544 self.physical_address_space, 545 writer, session=self.session) 546 547 else: 548 last_address = runs[0].end 549 block_size = 1024 * 1024 550 551 for offset in xrange(0, last_address, block_size): 552 available_length = min(block_size, last_address-offset) 553 data = self.physical_address_space.read( 554 offset, available_length) 555 556 self.session.report_progress( 557 "Writing %sMB", offset/1024/1024) 558 559 writer.write(data)

560

561 562 -class TestEWFAcquire(testlib.HashChecker):

563 PARAMETERS = dict(commandline="ewfacquire %(tempdir)s/output_image.e01")

564

Source Code for Module rekall.plugins.tools.ewf