# Found on a russian zope mailing list, and modified to fix bugs in parsing # the magic file and string making # -- Daniel Berlin import sys, struct, time, re, exceptions, pprint, stat, os, pwd, grp _mew = 0 # _magic='/tmp/magic' # _magic='/usr/share/magic.mime' _magic='/usr/share/magic.mime' mime = 1 _ldate_adjust = lambda x: time.mktime( time.gmtime(x) ) BUFFER_SIZE = 1024 * 128 # 128K should be enough... class MagicError(exceptions.Exception): pass def _handle(fmt='@x',adj=None): return fmt, struct.calcsize(fmt), adj KnownTypes = { # 'byte':_handle('@b'), 'byte':_handle('@B'), 'ubyte':_handle('@B'), 'string':('s',0,None), 'pstring':_handle('p'), # 'short':_handle('@h'), # 'beshort':_handle('>h'), # 'leshort':_handle('H'), 'leshort':_handle('H'), 'uleshort':_handle('l'), 'lelong':_handle('L'), 'ulelong':_handle('l'), 'ledate':_handle('l',_ldate_adjust), 'leldate':_handle('' : mod, value = self.test[0], make_string( self.test[1:] ) else: mod, value = '=', make_string( self.test ) else: if self.test[0] in '=<>&^' : mod, value = self.test[0], a2i(self.test[1:]) elif self.test[0] == 'x': mod = self.test[0] value = 0 else: mod, value = '=', a2i(self.test) return mod, value def read(self,file): mew( 's' ) file.seek( self.offset(file), 0 ) # SEEK_SET mew( 'r' ) try: data = rdata = None # XXX self.size might be 0 here... if self.size == 0 : # this is an ASCIIZ string... size = None if self.test != '>\\0' : # magic's hack for string read... value = self.get_mod_and_value()[1] size = (value=='\0') and None or len(value) rdata = data = read_asciiz( file, size=size ) else: rdata = file.read( self.size ) if not rdata or (len(rdata)!=self.size) : return None data = struct.unpack( self.struct, rdata )[0] # XXX hack?? except: print >>sys.stderr, self print >>sys.stderr, '@%s struct=%s size=%d rdata=%s' % ( self.offset, `self.struct`, self.size,`rdata`) raise mew( 'R' ) if self.cast : data = self.cast( data ) if self.mask : try: if self.nmod == '&' : data &= self.mask elif self.nmod == '+' : data += self.mask elif self.nmod == '-' : data -= self.mask else: raise MagicTestError(self.nmod) except: print >>sys.stderr,'data=%s nmod=%s mask=%s' % ( `data`, `self.nmod`, `self.mask` ) raise return data def check(self,data): mew('.') if self.mtype == 'true' : return '' # not None ! mod, value = self.get_mod_and_value() if self.type[-6:] == 'string' : # "something like\tthis\n" if self.smod : xdata = data if 'b' in self.smod : # all blanks are optional xdata = ''.join( data.split() ) value = ''.join( value.split() ) if 'c' in self.smod : # all blanks are optional xdata = xdata.upper() value = value.upper() # if 'B' in self.smod : # compact blanks ### XXX sorry, i don't understand this :-( # data = ' '.join( data.split() ) # if ' ' not in data : return None else: xdata = data try: if mod == '=' : result = data == value elif mod == '<' : result = data < value elif mod == '>' : result = data > value elif mod == '&' : result = data & value elif mod == '^' : result = (data & (~value)) == 0 elif mod == 'x' : result = 1 else : raise MagicTestError(self.test) if result : zdata, zval = `data`, `value` if self.mtype[-6:]!='string' : try: zdata, zval = hex(data), hex(value) except: zdata, zval = `data`, `value` if 0 : print >>sys.stderr, '%s @%s %s:%s %s %s => %s (%s)' % ( '>'*self.level, self.offset, zdata, self.mtype, `mod`, zval, `result`, self.message ) return result except: print >>sys.stderr,'mtype=%s data=%s mod=%s value=%s' % ( `self.mtype`, `data`, `mod`, `value` ) raise def add(self,mt): if not isinstance(mt,MagicTest) : raise MagicTestError((mt,'incorrect subtest type %s'%(type(mt),))) if mt.level == self.level+1 : self.subtests.append( mt ) elif self.subtests : self.subtests[-1].add( mt ) elif mt.level > self.level+1 : # it's possible to get level 3 just after level 1 !!! :-( level = self.level + 1 while level < mt.level : xmt = MagicTest(None,'true','x','',line=self.line,level=level) self.add( xmt ) level += 1 else: self.add( mt ) # retry... else: raise MagicTestError((mt,'incorrect subtest level %s'%(`mt.level`,))) def last_test(self): return self.subtests[-1] #end class MagicTest class OffsetError(MagicError): pass class Offset: pos_format = {'b':'B','s':'H','l':'I',} pattern0 = re.compile(r''' # mere offset ^ &? # possible ampersand ( 0 # just zero | [1-9]{1,1}[0-9]* # decimal | 0[0-7]+ # octal | 0x[0-9a-f]+ # hex ) $ ''', re.X|re.I ) pattern1 = re.compile(r''' # indirect offset ^\( (?P&?0 # just zero |&?[1-9]{1,1}[0-9]* # decimal |&?0[0-7]* # octal |&?0x[0-9A-F]+ # hex ) (?P \. # this dot might be alone [BSL]? # one of this chars in either case )? (?P [-+]{0,1} )? (?P0 # just zero |[1-9]{1,1}[0-9]* # decimal |0[0-7]* # octal |0x[0-9a-f]+ # hex )? \)$''', re.X|re.I ) def __init__(self,s): self.source = s self.value = None self.relative = 0 self.base = self.type = self.sign = self.offs = None m = Offset.pattern0.match( s ) if m : # just a number if s[0] == '&' : self.relative, self.value = 1, int( s[1:], 0 ) else: self.value = int( s, 0 ) return m = Offset.pattern1.match( s ) if m : # real indirect offset try: self.base = m.group('base') if self.base[0] == '&' : self.relative, self.base = 1, int( self.base[1:], 0 ) else: self.base = int( self.base, 0 ) if m.group('type') : self.type = m.group('type')[1:] self.sign = m.group('sign') if m.group('off') : self.offs = int( m.group('off'), 0 ) if self.sign == '-' : self.offs = 0 - self.offs except: print >>sys.stderr, '$$', m.groupdict() raise return raise OffsetError(`s`) def __call__(self,file=None): if self.value is not None : return self.value pos = file.tell() try: if not self.relative : file.seek( self.offset, 0 ) frmt = Offset.pos_format.get( self.type, 'I' ) size = struct.calcsize( frmt ) data = struct.unpack( frmt, file.read( size ) ) if self.offs : data += self.offs return data finally: file.seek( pos, 0 ) def __str__(self): return self.source def __repr__(self): return 'Offset(%s)' % `self.source` #end class Offset class MagicFileError(MagicError): pass class MagicFile: def __init__(self,filename=_magic): self.file = None self.tests = [] self.total_tests = 0 self.load( filename ) self.ack_tests = None self.nak_tests = None def __del__(self): self.close() def load(self,filename=None): self.open( filename ) self.parse() self.close() def open(self,filename=None): self.close() if filename is not None : self.filename = filename self.file = open( self.filename, 'r', BUFFER_SIZE ) def close(self): if self.file : self.file.close() self.file = None def parse(self): line_no = 0 for line in self.file.xreadlines() : line_no += 1 if not line or line[0]=='#' : continue line = line.lstrip().rstrip('\r\n') if not line or line[0]=='#' : continue try: x = self.parse_line( line ) if x is None : print >>sys.stderr, '#[%04d]#'%line_no, line continue except: print >>sys.stderr, '###[%04d]###'%line_no, line raise self.total_tests += 1 level, offset, mtype, test, message = x new_test = MagicTest(offset,mtype,test,message, line=line_no,level=level) try: if level == 0 : self.tests.append( new_test ) else: self.tests[-1].add( new_test ) except: if 1 : print >>sys.stderr, 'total tests=%s' % ( `self.total_tests`, ) print >>sys.stderr, 'level=%s' % ( `level`, ) print >>sys.stderr, 'tests=%s' % ( pprint.pformat(self.tests), ) raise else: while self.tests[-1].level > 0 : self.tests.pop() def parse_line(self,line): # print >>sys.stderr, 'line=[%s]' % line if (not line) or line[0]=='#' : return None level = 0 offset = mtype = test = message = '' mask = None # get optional level (count leading '>') while line and line[0]=='>' : line, level = line[1:], level+1 # get offset while line and not line[0].isspace() : offset, line = offset+line[0], line[1:] try: offset = Offset(offset) except: print >>sys.stderr, 'line=[%s]' % line raise # skip spaces line = line.lstrip() # get type c = None while line : last_c, c, line = c, line[0], line[1:] if last_c!='\\' and c.isspace() : break # unescaped space - end of field else: mtype += c if last_c == '\\' : c = None # don't fuck my brain with sequential backslashes # skip spaces line = line.lstrip() # get test c = None while line : last_c, c, line = c, line[0], line[1:] if last_c!='\\' and c.isspace() : break # unescaped space - end of field else: test += c if last_c == '\\' : c = None # don't fuck my brain with sequential backslashes # skip spaces line = line.lstrip() # get message message = line if mime and line.find("\t") != -1: message=line[0:line.find("\t")] # # print '>>', level, offset, mtype, test, message return level, offset, mtype, test, message def detect(self,file): self.ack_tests = 0 self.nak_tests = 0 answers = [] for test in self.tests : message = test.run( file ) if message : self.ack_tests += 1 answers.append( message ) else: self.nak_tests += 1 if answers : return '; '.join( answers ) #end class MagicFile def username(uid): try: return pwd.getpwuid( uid )[0] except: return '#%s'%uid def groupname(gid): try: return grp.getgrgid( gid )[0] except: return '#%s'%gid def get_file_type(fname,follow): t = None if not follow : try: st = os.lstat( fname ) # stat that entry, don't follow links! except os.error, why : pass else: if stat.S_ISLNK(st[stat.ST_MODE]) : t = 'symbolic link' try: lnk = os.readlink( fname ) except: t += ' (unreadable)' else: t += ' to '+lnk if t is None : try: st = os.stat( fname ) except os.error, why : return "can't stat `%s' (%s)." % (why.filename,why.strerror) dmaj, dmin = (st.st_rdev>>8)&0x0FF, st.st_rdev&0x0FF if 0 : pass elif stat.S_ISSOCK(st.st_mode) : t = 'socket' elif stat.S_ISLNK (st.st_mode) : t = follow and 'symbolic link' or t elif stat.S_ISREG (st.st_mode) : t = 'file' elif stat.S_ISBLK (st.st_mode) : t = 'block special (%d/%d)'%(dmaj,dmin) elif stat.S_ISDIR (st.st_mode) : t = 'directory' elif stat.S_ISCHR (st.st_mode) : t = 'character special (%d/%d)'%(dmaj,dmin) elif stat.S_ISFIFO(st.st_mode) : t = 'pipe' else: t = '' if st.st_mode & stat.S_ISUID : t = 'setuid(%d=%s) %s'%(st.st_uid,username(st.st_uid),t) if st.st_mode & stat.S_ISGID : t = 'setgid(%d=%s) %s'%(st.st_gid,groupname(st.st_gid),t) if st.st_mode & stat.S_ISVTX : t = 'sticky '+t return t HELP = '''%s [options] [files...] Options: -?, --help -- this help -m, --magic= -- use this magic instead of %s -f, --files= -- read filenames for * -C, --compile -- write "compiled" magic file -b, --brief -- don't prepend filenames to output lines + -c, --check -- check the magic file -i, --mime -- output MIME types * -k, --keep-going -- don't stop st the first match -n, --flush -- flush stdout after each line -v, --verson -- print version and exit * -z, --compressed -- try to look inside compressed files -L, --follow -- follow symlinks -s, --special -- don't skip special files * -- not implemented so far ;-) + -- implemented, but in another way... ''' def main(): import getopt global _magic try: brief = 0 flush = 0 follow= 0 mime = 0 check = 0 special=0 try: opts, args = getopt.getopt( sys.argv[1:], '?m:f:CbciknvzLs', ( 'help', 'magic=', 'names=', 'compile', 'brief', 'check', 'mime', 'keep-going', 'flush', 'version', 'compressed', 'follow', 'special', ) ) except getopt.error, why: print >>sys.stderr, sys.argv[0], why return 1 else: files = None for o,v in opts : if o in ('-?','--help'): print HELP % ( sys.argv[0], _magic, ) return 0 elif o in ('-f','--files='): files = v elif o in ('-m','--magic='): _magic = v[:] elif o in ('-C','--compile'): pass elif o in ('-b','--brief'): brief = 1 elif o in ('-c','--check'): check = 1 elif o in ('-i','--mime'): mime = 1 if os.path.exists( _magic+'.mime' ) : _magic += '.mime' print >>sys.stderr,sys.argv[0]+':',\ "Using regular magic file `%s'" % _magic elif o in ('-k','--keep-going'): pass elif o in ('-n','--flush'): flush = 1 elif o in ('-v','--version'): print 'VERSION' return 0 elif o in ('-z','--compressed'): pass elif o in ('-L','--follow'): follow = 1 elif o in ('-s','--special'): special = 1 else: if files : files = map(lambda x: x.strip(), v.split(',')) if '-' in files and '-' in args : error( 1, 'cannot use STDIN simultaneously for file list and data' ) for file in files : for name in ( (file=='-') and sys.stdin or open(file,'r',BUFFER_SIZE) ).xreadlines(): name = name.strip() if name not in args : args.append( name ) try: if check : print >>sys.stderr, 'Loading magic database...' t0 = time.time() m = MagicFile(_magic) t1 = time.time() if check : print >>sys.stderr, \ m.total_tests, 'tests loaded', \ 'for', '%.2f' % (t1-t0), 'seconds' print >>sys.stderr, len(m.tests), 'tests at top level' return 0 # XXX "shortened" form ;-) mlen = max( map(len, args) )+1 for arg in args : if not brief : print (arg + ':').ljust(mlen), ftype = get_file_type( arg, follow ) if (special and ftype.find('special')>=0) \ or ftype[-4:] == 'file' : t0 = time.time() try: t = m.detect( arg ) except (IOError,os.error), why: t = "can't read `%s' (%s)" % (why.filename,why.strerror) if ftype[-4:] == 'file' : t = ftype[:-4] + t t1 = time.time() print t and t or 'data' if 0 : print \ '#\t%d tests ok, %d tests failed for %.2f seconds'%\ (m.ack_tests, m.nak_tests, t1-t0) else: print mime and 'application/x-not-regular-file' or ftype if flush : sys.stdout.flush() # print >>sys.stderr, 'DONE' except: if check : return 1 raise else: return 0 finally: pass if __name__ == '__main__' : sys.exit( main() ) # vim:ai # EOF #