#!/usr/bin/perl eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' if 0; # not running under some shell use strict; our $VERSION = '0.15'; $|++; # ########## # # Parse ARGV # # ########## # my %args = (); my %opts = ( # name => [char, expect_arg_bit ] 'help' => ['h'], 'usage' => ['u'], 'version' => ['v'], 'stdin' => [''], 'brief' => ['b'], 'namefile' => ['f', 1], 'orig-name' => ['', 1], 'noalign' => ['N'], 'describe' => ['d'], 'file-compat' => [''], 'output-format' => ['', 1], 'language' => ['l', 1], 'mimetype' => ['i'], 'dereference' => ['L'], 'separator' => ['F',1], 'debug' => ['D'], 'database' => ['', 1], 'all' => ['a'], 'magic-only' => ['M'], ); $args{'file-compat'}++ if $0 =~ m#(^|/)file$#; while ((@ARGV) && ($ARGV[0] =~ /^-/)) { my $opt = shift @ARGV; if ($opt =~ /^--?$/) { $args{stdin}++ if $args{'file-compat'} && $opt eq '-'; last; } elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) { if (exists $opts{$opt}) { if ($opts{$opt}[1]) { my $arg = $2 || shift @ARGV; complain('--'.$opt, 2) unless defined $arg; $args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg; } else { $args{$opt}++ } } else { complain('--'.$opt) } } elsif ($opt =~ s/^-(?!-)//) { foreach my $o (split //, $opt) { my ($key) = grep { $opts{$_}[0] eq $o } keys %opts; complain($o) unless $key; if ($opts{$key}[1]) { my $arg = shift @ARGV; complain('-'.$o, 2) unless defined $arg; $args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace } else { $args{$key}++; } } } else { complain($opt) } } if ($args{help} || $args{usage}) { eval 'use Pod::Usage'; die "Could not find perl module Pod::Usage\n" if $@; pod2usage( { -verbose => 1, -exitval => 0, } ); } if ($args{version}) { print "mimetype $VERSION\n\n", << 'EOV'; Copyright (c) 2003 Jaap G Karssenberg. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. EOV exit 0; } complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile}; # ############# # # prepare stuff # # ############# # our %desc; # desc caching hash # --database @File::MimeInfo::DIRS = split /:/, $args{database} if $args{database}; ## Actually use our module ## eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;'; die $@ if $@; *default = \&File::MimeInfo::default; # --debug if ($args{debug}) { $File::MimeInfo::DEBUG++; $File::MimeInfo::Magic::DEBUG++; print '> Data dirs are: ', join( ', ', $args{database} ? ( split /:/, $args{database} ) : ( File::BaseDir::xdg_data_home(), File::BaseDir::xdg_data_dirs() ) ), "\n"; } # --file-compat $args{describe}++ if $args{'file-compat'} && !$args{mimetype}; # --namefile if ($args{namefile}) { open IN, $args{namefile} || die "Couldn't open file: $args{namefile}\n"; unshift @ARGV, map {chomp; $_} (); close IN; } # --language $File::MimeInfo::LANG = $args{language} if $args{language}; # Formatting stuff my $l = 5; # "STDIN" unless ($args{brief} || $args{noalign}) { for (@ARGV) { $l = length($_) if $l < length($_) } } $args{separator} = ':' unless defined $args{separator}; my $format = $args{'output-format'} ? parse_format($args{'output-format'}) : $args{brief} ? sub { $args{describe} ? desc($_[1]) : $_[1] } : $args{noalign} ? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) } : sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])), $args{describe} ? desc($_[1]) : $_[1] ) }; # --dereference if ($args{dereference}) { eval 'use File::Spec'; die "Could not find perl module File::Spec\n" if $@; } # --stdin if ($args{stdin}) { eval 'use IO::Scalar'; die "Could not find perl module IO::Scalar\n" if $@; } # ######## # # do stuff # # ######## # # --stdin if ($args{stdin}) { my $data; read(STDIN, $data, $File::MimeInfo::Magic::max_buffer); my $scalar = new IO::Scalar \$data; print $format->('STDIN', mimetype($scalar)), "\n"; exit; } foreach my $file (@ARGV) { # --dereference my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file; # --magic-only if ($args{'magic-only'}) { print $format->($file, magic($f) || default($f)), "\n"; } # --all elsif ($args{all}) { for (qw#inodetype globs magic default#) { my $m = eval "$_(\$f)"; print $format->($file, $m), "\n" if $m; } } elsif ($args{'orig-name'}) { my $type = globs($args{'orig-name'}); if ($type ne "") { print $format->($file, $type), "\n"; } else { print $format->($file, mimetype($f)), "\n"; } } else { print $format->($file, mimetype($f)), "\n" } } exit; # ########### # # Subroutines # # ########### # sub complain { # Error messages my $opt = shift; my $m = shift || 1; my $bn = $0; $bn =~ s|^(.*/)*||; if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" } elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" } elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" } elsif ($m == 4) { print STDERR "usage: $bn [options] files" } print "\nTry '$bn --help' for more information.\n" unless $m == 3; exit $m; } sub parse_format { # Advanced formatting my $form = shift; my $code = "sub { '$form' }"; # code will get @_ = qw/file type/ $code =~ s/(?splitpath($file); $link = File::Spec->rel2abs($link, $dir); $link = resolvelink($link) if -l $link; # recurs return $link; } sub desc { # Cache desciption my $mt = shift; return undef unless $mt; $desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default } __END__ =head1 NAME mimetype - Determine file type =head1 SYNOPSIS mimetype [options] [-] files =head1 DESCRIPTION This script tries to determine the mime type of a file using the Shared MIME-info database. It is intended as a kind of I work-alike, but uses mimetypes instead of descriptions. If one symlinks the I command to I it will behave a little more compatible, see L. Commandline options to specify alternative magic files are not implemented the same because of the conflicting data formats. Also the wording of the descriptions will differ. For naming switches I followed the manpage of file(1) version 4.02 when possible. They seem to differ completely from the spec in the 'utilities' chapter of IEEE Std 1003.1-2001 (POSIX). =head1 OPTIONS =over 4 =item B<-a>, B<--all> Show output of all rules that match the file. TODO: this method now just returns one match for each method (globs, magic, etc.). =item B<-b>, B<--brief> Do not prepend filenames to output lines (brief mode). =item B<--database>=I:I:... Force the program to look in these directories for the shared mime-info database. The directories specified by the basedir specification are ignored. =item B<-d>, B<--describe> Print file descriptions instead of mime types, this is the default when using L. =item B<-D>, B<--debug> Print debug information about how the mimetype was determined. =item B<-f> I, B<--namefile>=I Read the names of the files to be examined from the file 'namefile' (one per line) before the argument list. =item B<--file-compat> Make mimetype behave a little more L compatible. This is turned on automaticly when you call mimetype by a link called 'file'. A single '-' won't be considered a seperator between options and filenames anymore, but becomes identical to L. ( You can still use '--' as seperator, but that is not backward compatible with the original file command. ) Also the default becomes to print descriptions instead of mimetypes. =item B<-F> I, B<--separator>=I Use string as custom separator between the file name and its mimetype or description, defaults to ':' . =item B<-h>, B<--help> =item B<-u>, B<--usage> Print a help message and exits. =item B<-i>, B<--mimetype> Use mime types, opposite to L, this is the default when _not_ using L. =item B<-L>, B<--dereference> Follow symbolic links. =item B<-l> I, B<--language>=I The language attribute specifies a two letter language code, this makes descriptions being outputted in the specified language. =item B<-M>, B<--magic-only> Do not check for extensions, globs or inode type, only look at the content of the file. This is particularly useful if for some reason you don't trust the name or the extension a file has. =item B<-N>, B<--noalign> Do not align output fields. =item B<--output-format> If you want an alternative output format, you can specify a format string containing the following escapes: %f for the filename %d description %m mime type Alignment is not available when using this, you need to post-process the output to do that. =item B<--stdin> Determine type of content from STDIN, less powerfull then normal file checking because it only uses magic typing. This will happen also if the STDIN filehandle is a pipe. To use this option L needs to be installed. =item B<-v>, B<--version> Print the version of the program and exit. =back =head1 ENVIRONMENT =over 4 =item XDG_DATA_HOME =item XDG_DATA_DIRS These variables can list base directories to search for data files. The shared mime-info will be expected in the "mime" sub directory of one of these directories. If these are not set, there will be searched for the following directories: $HOME/.local/share/mime /usr/local/share/mime /usr/share/mime See also the "XDG Base Directory Specification" L =back =head1 FILES The base dir for all data files is determined by two environment variables, see L. =over 4 =item F All other files are compiled from these source files. To re-compile them use B. =item F Compiled information about globs. =item F Compiled information about magic numbers. =item F Descriptions of a mimetype in multiple languages, used for the L switch. =back =head1 DIAGNOSTICS If a file has an empty mimetype or an empty description, most probably the file doesn't exist and the given name doesn't match any globs. An empty description can also mean that there is no description available in the language you specified. The program exits with a non-zero exit value if either the commandline arguments failed, a module it depends on wasn't found or the shared mime-info database wasn't accesable. See L for more details. =head1 TODO The '--all' switch doesn't really show all matches, but only one per mime-typing method. This needs to be implemnted in the modules first. =head1 BUGS No known bugs, please mail the author if you find one. B doesn't provide a switch for looking inside compressed files because it seems to me that this can only be done by un-compressing the file, something that defeats the purpose. On the other hand the option should exist for strict compatibility with file(1). Possibly a subclass should be made for this one day. =head1 AUTHOR Jaap Karrssenberg Epardus@cpan.orgE =head1 COPYRIGHT Copyright (c) 2003,2008 Jaap G Karssenberg. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. =head1 SEE ALSO L, L, L, L