summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorFlorian Pritz <bluewind@xssn.at>2010-04-01 03:04:42 +0200
committerFlorian Pritz <bluewind@xssn.at>2010-04-01 03:04:42 +0200
commit8c514266239c745101a71cdf9a5f1982fc3812ce (patch)
treea5e2bf0c64ffe246888860e32ecdcb0df988c88f /scripts
parentf8fac68fd797e059aa1b84951314fc15983ee493 (diff)
add mimetype script
Signed-off-by: Florian Pritz <bluewind@xssn.at>
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/mimetype473
1 files changed, 473 insertions, 0 deletions
diff --git a/scripts/mimetype b/scripts/mimetype
new file mode 100755
index 000000000..ec191adc8
--- /dev/null
+++ b/scripts/mimetype
@@ -0,0 +1,473 @@
+#!/usr/bin/perl
+
+eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
+ if 0; # not running under some shell
+
+eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
+ if 0; # not running under some shell
+
+use strict;
+our $VERSION = '0.15';
+$|++;
+
+# ########## #
+# Parse ARGV #
+# ########## #
+my %args = ();
+my %opts = (
+ # name => [char, expect_arg_bit ]
+ 'help' => ['h'],
+ 'usage' => ['u'],
+ 'version' => ['v'],
+ 'stdin' => [''],
+ 'brief' => ['b'],
+ 'namefile' => ['f', 1],
+ 'noalign' => ['N'],
+ 'describe' => ['d'],
+ 'file-compat' => [''],
+ 'output-format' => ['', 1],
+ 'language' => ['l', 1],
+ 'mimetype' => ['i'],
+ 'dereference' => ['L'],
+ 'separator' => ['F',1],
+ 'debug' => ['D'],
+ 'database' => ['', 1],
+ 'all' => ['a'],
+ 'magic-only' => ['M'],
+);
+
+$args{'file-compat'}++ if $0 =~ m#(^|/)file$#;
+
+while ((@ARGV) && ($ARGV[0] =~ /^-/)) {
+ my $opt = shift @ARGV;
+ if ($opt =~ /^--?$/) {
+ $args{stdin}++ if $args{'file-compat'} && $opt eq '-';
+ last;
+ }
+ elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) {
+ if (exists $opts{$opt}) {
+ if ($opts{$opt}[1]) {
+ my $arg = $2 || shift @ARGV;
+ complain('--'.$opt, 2) unless defined $arg;
+ $args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg;
+ }
+ else { $args{$opt}++ }
+ }
+ else { complain('--'.$opt) }
+ }
+ elsif ($opt =~ s/^-(?!-)//) {
+ foreach my $o (split //, $opt) {
+ my ($key) = grep { $opts{$_}[0] eq $o } keys %opts;
+ complain($o) unless $key;
+
+ if ($opts{$key}[1]) {
+ my $arg = shift @ARGV;
+ complain('-'.$o, 2) unless defined $arg;
+ $args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace
+ }
+ else { $args{$key}++; }
+ }
+ }
+ else { complain($opt) }
+}
+
+if ($args{help} || $args{usage}) {
+ eval 'use Pod::Usage';
+ die "Could not find perl module Pod::Usage\n" if $@;
+ pod2usage( {
+ -verbose => 1,
+ -exitval => 0,
+ } );
+}
+
+if ($args{version}) {
+ print "mimetype $VERSION\n\n", << 'EOV';
+Copyright (c) 2003 Jaap G Karssenberg. All rights reserved.
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+EOV
+ exit 0;
+}
+
+complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile};
+
+# ############# #
+# prepare stuff #
+# ############# #
+
+our %desc; # desc caching hash
+
+# --database
+@File::MimeInfo::DIRS = split /:/, $args{database} if $args{database};
+
+## Actually use our module ##
+eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;';
+die $@ if $@;
+
+*default = \&File::MimeInfo::default;
+
+# --debug
+if ($args{debug}) {
+ $File::MimeInfo::DEBUG++;
+ $File::MimeInfo::Magic::DEBUG++;
+ print '> Data dirs are: ', join( ', ',
+ $args{database}
+ ? ( split /:/, $args{database} )
+ : (
+ File::BaseDir::xdg_data_home(),
+ File::BaseDir::xdg_data_dirs()
+ )
+ ), "\n";
+}
+
+# --file-compat
+$args{describe}++ if $args{'file-compat'} && !$args{mimetype};
+
+# --namefile
+if ($args{namefile}) {
+ open IN, $args{namefile}
+ || die "Couldn't open file: $args{namefile}\n";
+ unshift @ARGV, map {chomp; $_} (<IN>);
+ close IN;
+}
+
+# --language
+$File::MimeInfo::LANG = $args{language} if $args{language};
+
+# Formatting stuff
+my $l = 5; # "STDIN"
+unless ($args{brief} || $args{noalign}) {
+ for (@ARGV) { $l = length($_) if $l < length($_) }
+}
+
+$args{separator} = ':' unless defined $args{separator};
+my $format = $args{'output-format'}
+ ? parse_format($args{'output-format'})
+ : $args{brief}
+ ? sub { $args{describe} ? desc($_[1]) : $_[1] }
+ : $args{noalign}
+ ? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) }
+ : sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])),
+ $args{describe} ? desc($_[1]) : $_[1] ) };
+
+# --dereference
+if ($args{dereference}) {
+ eval 'use File::Spec';
+ die "Could not find perl module File::Spec\n" if $@;
+}
+# --stdin
+if ($args{stdin}) {
+ eval 'use IO::Scalar';
+ die "Could not find perl module IO::Scalar\n" if $@;
+}
+
+# ######## #
+# do stuff #
+# ######## #
+
+# --stdin
+if ($args{stdin}) {
+ my $data;
+ read(STDIN, $data, $File::MimeInfo::Magic::max_buffer);
+ my $scalar = new IO::Scalar \$data;
+ print $format->('STDIN', mimetype($scalar)), "\n";
+ exit;
+}
+
+foreach my $file (@ARGV) {
+ # --dereference
+ my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file;
+ # --magic-only
+ if ($args{'magic-only'}) {
+ print $format->($file, magic($f) || default($f)), "\n";
+ }
+ # --all
+ elsif ($args{all}) {
+ for (qw#inodetype globs magic default#) {
+ my $m = eval "$_(\$f)";
+ print $format->($file, $m), "\n" if $m;
+ }
+ }
+ else { print $format->($file, mimetype($f)), "\n" }
+}
+
+exit;
+
+# ########### #
+# Subroutines #
+# ########### #
+
+sub complain { # Error messages
+ my $opt = shift;
+ my $m = shift || 1;
+
+ my $bn = $0;
+ $bn =~ s|^(.*/)*||;
+ if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" }
+ elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" }
+ elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" }
+ elsif ($m == 4) { print STDERR "usage: $bn [options] files" }
+
+ print "\nTry '$bn --help' for more information.\n" unless $m == 3;
+ exit $m;
+}
+
+sub parse_format { # Advanced formatting
+ my $form = shift;
+ my $code = "sub { '$form' }";
+ # code will get @_ = qw/file type/
+ $code =~ s/(?<!\\)%f/'.\$_[0].'/g;
+ $code =~ s/(?<!\\)%m/'.\$_[1].'/g;
+ $code =~ s/(?<!\\)%d/'.desc(\$_[1]).'/g;
+ return eval $code;
+}
+
+sub resolvelink { # --dereference
+ my $file = shift;
+ my $link = readlink($file) || return $file;
+ my (undef, $dir, undef) = File::Spec->splitpath($file);
+ $link = File::Spec->rel2abs($link, $dir);
+ $link = resolvelink($link) if -l $link; # recurs
+ return $link;
+}
+
+sub desc { # Cache desciption
+ my $mt = shift;
+ return undef unless $mt;
+ $desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default
+}
+
+__END__
+
+=head1 NAME
+
+mimetype - Determine file type
+
+=head1 SYNOPSIS
+
+mimetype [options] [-] files
+
+=head1 DESCRIPTION
+
+This script tries to determine the mime type of a file using the
+Shared MIME-info database. It is intended as a kind of I<file(1)> work-alike,
+but uses mimetypes instead of descriptions.
+
+If one symlinks the I<file> command to I<mimetype> it will behave
+a little more compatible, see L</--file-compat>.
+Commandline options to specify alternative magic files are not
+implemented the same because of the conflicting data formats.
+Also the wording of the descriptions will differ.
+
+For naming switches I followed the manpage of file(1) version 4.02
+when possible. They seem to differ completely from the spec in the
+'utilities' chapter of IEEE Std 1003.1-2001 (POSIX).
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-a>, B<--all>
+
+Show output of all rules that match the file.
+
+TODO: this method now just returns one match for each
+method (globs, magic, etc.).
+
+=item B<-b>, B<--brief>
+
+Do not prepend filenames to output lines (brief mode).
+
+=item B<--database>=I<mimedir>:I<mimedir>:...
+
+Force the program to look in these directories for the shared mime-info
+database. The directories specified by the basedir specification
+are ignored.
+
+=item B<-d>, B<--describe>
+
+Print file descriptions instead of mime types, this is the
+default when using L</--file-compat>.
+
+=item B<-D>, B<--debug>
+
+Print debug information about how the mimetype was determined.
+
+=item B<-f> I<namefile>, B<--namefile>=I<namefile>
+
+Read the names of the files to be examined from the file 'namefile'
+(one per line) before the argument list.
+
+=item B<--file-compat>
+
+Make mimetype behave a little more L<file(1)> compatible. This
+is turned on automaticly when you call mimetype by a link
+called 'file'.
+
+A single '-' won't be considered a seperator
+between options and filenames anymore, but becomes identical to L</--stdin>.
+( You can still use '--' as seperator, but
+that is not backward compatible with the original file command. )
+Also the default becomes to print descriptions instead of mimetypes.
+
+=item B<-F> I<string>, B<--separator>=I<string>
+
+Use string as custom separator between the file name and its mimetype
+or description, defaults to ':' .
+
+=item B<-h>, B<--help>
+
+=item B<-u>, B<--usage>
+
+Print a help message and exits.
+
+=item B<-i>, B<--mimetype>
+
+Use mime types, opposite to L</--describe>,
+this is the default when _not_ using L</--file-compat>.
+
+=item B<-L>, B<--dereference>
+
+Follow symbolic links.
+
+=item B<-l> I<code>, B<--language>=I<code>
+
+The language attribute specifies a two letter language code, this makes
+descriptions being outputted in the specified language.
+
+=item B<-M>, B<--magic-only>
+
+Do not check for extensions, globs or inode type, only look at the content
+of the file. This is particularly useful if for some reason you don't trust
+the name or the extension a file has.
+
+=item B<-N>, B<--noalign>
+
+Do not align output fields.
+
+=item B<--output-format>
+
+If you want an alternative output format, you can specify a format string
+containing the following escapes:
+
+ %f for the filename
+ %d description
+ %m mime type
+
+Alignment is not available when using this,
+you need to post-process the output to do that.
+
+=item B<--stdin>
+
+Determine type of content from STDIN, less powerfull then normal file checking
+because it only uses magic typing. This will happen also if the STDIN filehandle
+is a pipe.
+
+To use this option L<IO::Scalar> needs to be installed.
+
+=item B<-v>, B<--version>
+
+Print the version of the program and exit.
+
+=back
+
+=head1 ENVIRONMENT
+
+=over 4
+
+=item XDG_DATA_HOME
+
+=item XDG_DATA_DIRS
+
+These variables can list base directories to search for data files. The shared
+mime-info will be expected in the "mime" sub directory of one of these
+directories. If these are not set, there will be searched for the
+following directories:
+
+ $HOME/.local/share/mime
+ /usr/local/share/mime
+ /usr/share/mime
+
+See also the "XDG Base Directory Specification"
+L<http://freedesktop.org/Standards/basedir-spec>
+
+=back
+
+=head1 FILES
+
+The base dir for all data files is determined by two environment variables,
+see L</ENVIRONMENT>.
+
+=over 4
+
+=item F<BASE/mime/packages/SOURCE.xml>
+
+All other files are compiled from these source files. To re-compile them
+use B<update-mime-database(1)>.
+
+=item F<BASE/mime/globs>
+
+Compiled information about globs.
+
+=item F<BASE/mime/magic>
+
+Compiled information about magic numbers.
+
+=item F<BASE/mime/MEDIA/SUBTYPE.xml>
+
+Descriptions of a mimetype in multiple languages, used for the
+L</--describe> switch.
+
+=back
+
+=head1 DIAGNOSTICS
+
+If a file has an empty mimetype or an empty description,
+most probably the file doesn't exist and the given name
+doesn't match any globs. An empty description can also mean that
+there is no description available in the language you specified.
+
+The program exits with a non-zero exit value if either the commandline
+arguments failed, a module it depends on wasn't found or the shared
+mime-info database wasn't accesable. See L<File::MimeInfo> for more details.
+
+=head1 TODO
+
+The '--all' switch doesn't really show all matches, but only one per
+mime-typing method. This needs to be implemnted in the modules first.
+
+=head1 BUGS
+
+No known bugs, please mail the author if you find one.
+
+B<mimetype> doesn't provide a switch for looking inside compressed files
+because it seems to me that this can only be done by un-compressing the file,
+something that defeats the purpose. On the other hand the option should
+exist for strict compatibility with file(1). Possibly a subclass should be
+made for this one day.
+
+=head1 AUTHOR
+
+Jaap Karrssenberg E<lt>pardus@cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright (c) 2003,2008 Jaap G Karssenberg. All rights reserved.
+This program is free software; you can redistribute it and/or
+modify it under the same terms as Perl.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+=head1 SEE ALSO
+
+L<file(1)>,
+L<update-mime-database(1)>,
+L<File::MimeInfo(3)>,
+L<http://freedesktop.org/Software/shared-mime-info>
+