#!/usr/bin/python ''' Create a file in Hashkeeper format, which can be imported into Encase. Structure is based on information provided by Sharren Redmond (Creating Hashsets Manually). NOTE: This was quickly written to hash a small subset of data to compare against multiple evidence files in EnCase. If something is wrong or some features are desired then let me know. -=[ dxp ]=- dxp2532@gmail.com ''' ''' Released under the 3-clause BSD license: Copyright (c) 2011, dxp All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ''' import os import sys import hashlib from time import strftime, localtime, ctime def Usage (): print "Usage: ", os.path.basename(sys.argv[0]), " " print "\t CATEGORY = unique identifier for the hashset (e.g. \"corporate files\", pr0n, malware)" print "\t DESC = short description of hashset's contents (e.g. \"suspected malware\")" print "\t TYPE = \"notable\" or \"known\" (\"notable\" = malicious or suspicious, \"known\" = clean files)" print "\t FILE = input file with a list of file names to be included into the set (one per line)" # Check for correct number of arguments if len(sys.argv) != 5: Usage () sys.exit(1) # Globals HEADER_HSH = '"file_id","hashset_id","file_name","directory","hash","file_size","date_modified","time_modified","time_zone","comments","date_accessed","time_accessed"\n\n' HEADER_HKE = '"hashset_id","name","vendor","package","version","authenicated_flag","notable_flag","initials","num_of_files","description","date_loaded"\n\n' BUF_LEN = 4096 * 1024 FILE_HSH = sys.argv[1]+".hsh" FILE_HKE = sys.argv[1]+".hke" list_filename = sys.argv[4] hashset_name = sys.argv[2] notable_flag = "0" fileid = 0 error = 0 total = 0 prev = 0 count = 0 perc = 0 if sys.argv[3] not in ("notable", "known"): Usage () sys.exit(1) if sys.argv[3] == "notable": notable_flag = "1" # Open input file to read from and hashset files to write to try: # count the amount of entries fd_list = open (list_filename, "rb") for line in fd_list: total = total + 1 fd_list.close() # reopen for the main loop fd_list = open (list_filename, "r") except IOError, (errno, strerr): print "[e]... Failed to open file: %s" % (strerr) sys.exit(1) try: fd_hsh = open (FILE_HSH, "w+") fd_hke = open (FILE_HKE, "w+") except IOError, (errno, strerr): print "[e]... Failed to create hashset files (%s and %s): %s" % (FILE_HSH, FILE_HKE, strerr) sys.exit(1) # # Populate hashset files with header # # .hsh fd_hsh.write(HEADER_HSH) # .hke fd_hke.write(HEADER_HKE) fd_hke.write('1,"' + hashset_name + '",,,,1,' + notable_flag + ',,0,"'+sys.argv[1]+'",' + '\n') print "[i]... Processing %i entries" % (total) # Loop through each line in the list and store results into hashset file for line in fd_list: line = line.strip() count = count + 1 if not os.path.exists(line): continue if not os.path.isfile(line): continue try: file = os.path.basename(line) dir = os.path.dirname(line) mtime = os.path.getmtime(line) atime = os.path.getatime(line) size = os.path.getsize(line) except OSError, (errno, strerr): print "[e]... Failed to stat %s: %s" % (line, strerr) continue if size == 0: continue try: fd = open (line, "r") except IOError, (errno, strerr): print "[e]... Failed to open %s: %s" % (line, strerr) continue m = hashlib.md5() try: data = fd.read(BUF_LEN) except IOError, (errno, strerr): print "[e]... Failed to read %s: %s" % (line, strerr) fd.close() continue while data: error = 0 m.update(data) try: data = fd.read(BUF_LEN) except IOError, (errno, strerr): print "[e]... Failed to re-read %s: %s" % (line, strerr) error = 1 break fd.close() if error: continue hash = m.hexdigest() fileid = fileid + 1 # convert from unixtime to time tuple to string mdate = localtime (mtime) mday = strftime ("%m/%d/%Y", mdate) mtime = strftime ("%H:%M:%S", mdate) adate = localtime (atime) aday = strftime ("%m/%d/%Y", adate) atime = strftime ("%H:%M:%S", adate) # show percentage complete perc = count * 100 / total if perc != prev: print "\r\t %i%%" % (perc), sys.stdout.flush() prev = perc s = str(fileid) + ',1,"' + file + '","' + dir + '","' + hash + '",' + str(size) + ',' + mday + ',' + mtime + ',EDT,"' + sys.argv[1] + '",' + aday + ',' + atime + '\n' fd_hsh.write(s) fd_list.close() fd_hsh.close() fd_hke.close() print "\n[i]... Done, hashset created: \"%s\" and \"%s\"" % (FILE_HKE, FILE_HSH)