From 8c514266239c745101a71cdf9a5f1982fc3812ce Mon Sep 17 00:00:00 2001 From: Florian Pritz Date: Thu, 1 Apr 2010 03:04:42 +0200 Subject: add mimetype script Signed-off-by: Florian Pritz --- scripts/mimetype | 473 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 473 insertions(+) create mode 100755 scripts/mimetype (limited to 'scripts') diff --git a/scripts/mimetype b/scripts/mimetype new file mode 100755 index 000000000..ec191adc8 --- /dev/null +++ b/scripts/mimetype @@ -0,0 +1,473 @@ +#!/usr/bin/perl + +eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' + if 0; # not running under some shell + +eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' + if 0; # not running under some shell + +use strict; +our $VERSION = '0.15'; +$|++; + +# ########## # +# Parse ARGV # +# ########## # +my %args = (); +my %opts = ( + # name => [char, expect_arg_bit ] + 'help' => ['h'], + 'usage' => ['u'], + 'version' => ['v'], + 'stdin' => [''], + 'brief' => ['b'], + 'namefile' => ['f', 1], + 'noalign' => ['N'], + 'describe' => ['d'], + 'file-compat' => [''], + 'output-format' => ['', 1], + 'language' => ['l', 1], + 'mimetype' => ['i'], + 'dereference' => ['L'], + 'separator' => ['F',1], + 'debug' => ['D'], + 'database' => ['', 1], + 'all' => ['a'], + 'magic-only' => ['M'], +); + +$args{'file-compat'}++ if $0 =~ m#(^|/)file$#; + +while ((@ARGV) && ($ARGV[0] =~ /^-/)) { + my $opt = shift @ARGV; + if ($opt =~ /^--?$/) { + $args{stdin}++ if $args{'file-compat'} && $opt eq '-'; + last; + } + elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) { + if (exists $opts{$opt}) { + if ($opts{$opt}[1]) { + my $arg = $2 || shift @ARGV; + complain('--'.$opt, 2) unless defined $arg; + $args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg; + } + else { $args{$opt}++ } + } + else { complain('--'.$opt) } + } + elsif ($opt =~ s/^-(?!-)//) { + foreach my $o (split //, $opt) { + my ($key) = grep { $opts{$_}[0] eq $o } keys %opts; + complain($o) unless $key; + + if ($opts{$key}[1]) { + my $arg = shift @ARGV; + complain('-'.$o, 2) unless defined $arg; + $args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace + } + else { $args{$key}++; } + } + } + else { complain($opt) } +} + +if ($args{help} || $args{usage}) { + eval 'use Pod::Usage'; + die "Could not find perl module Pod::Usage\n" if $@; + pod2usage( { + -verbose => 1, + -exitval => 0, + } ); +} + +if ($args{version}) { + print "mimetype $VERSION\n\n", << 'EOV'; +Copyright (c) 2003 Jaap G Karssenberg. All rights reserved. +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +EOV + exit 0; +} + +complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile}; + +# ############# # +# prepare stuff # +# ############# # + +our %desc; # desc caching hash + +# --database +@File::MimeInfo::DIRS = split /:/, $args{database} if $args{database}; + +## Actually use our module ## +eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;'; +die $@ if $@; + +*default = \&File::MimeInfo::default; + +# --debug +if ($args{debug}) { + $File::MimeInfo::DEBUG++; + $File::MimeInfo::Magic::DEBUG++; + print '> Data dirs are: ', join( ', ', + $args{database} + ? ( split /:/, $args{database} ) + : ( + File::BaseDir::xdg_data_home(), + File::BaseDir::xdg_data_dirs() + ) + ), "\n"; +} + +# --file-compat +$args{describe}++ if $args{'file-compat'} && !$args{mimetype}; + +# --namefile +if ($args{namefile}) { + open IN, $args{namefile} + || die "Couldn't open file: $args{namefile}\n"; + unshift @ARGV, map {chomp; $_} (); + close IN; +} + +# --language +$File::MimeInfo::LANG = $args{language} if $args{language}; + +# Formatting stuff +my $l = 5; # "STDIN" +unless ($args{brief} || $args{noalign}) { + for (@ARGV) { $l = length($_) if $l < length($_) } +} + +$args{separator} = ':' unless defined $args{separator}; +my $format = $args{'output-format'} + ? parse_format($args{'output-format'}) + : $args{brief} + ? sub { $args{describe} ? desc($_[1]) : $_[1] } + : $args{noalign} + ? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) } + : sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])), + $args{describe} ? desc($_[1]) : $_[1] ) }; + +# --dereference +if ($args{dereference}) { + eval 'use File::Spec'; + die "Could not find perl module File::Spec\n" if $@; +} +# --stdin +if ($args{stdin}) { + eval 'use IO::Scalar'; + die "Could not find perl module IO::Scalar\n" if $@; +} + +# ######## # +# do stuff # +# ######## # + +# --stdin +if ($args{stdin}) { + my $data; + read(STDIN, $data, $File::MimeInfo::Magic::max_buffer); + my $scalar = new IO::Scalar \$data; + print $format->('STDIN', mimetype($scalar)), "\n"; + exit; +} + +foreach my $file (@ARGV) { + # --dereference + my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file; + # --magic-only + if ($args{'magic-only'}) { + print $format->($file, magic($f) || default($f)), "\n"; + } + # --all + elsif ($args{all}) { + for (qw#inodetype globs magic default#) { + my $m = eval "$_(\$f)"; + print $format->($file, $m), "\n" if $m; + } + } + else { print $format->($file, mimetype($f)), "\n" } +} + +exit; + +# ########### # +# Subroutines # +# ########### # + +sub complain { # Error messages + my $opt = shift; + my $m = shift || 1; + + my $bn = $0; + $bn =~ s|^(.*/)*||; + if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" } + elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" } + elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" } + elsif ($m == 4) { print STDERR "usage: $bn [options] files" } + + print "\nTry '$bn --help' for more information.\n" unless $m == 3; + exit $m; +} + +sub parse_format { # Advanced formatting + my $form = shift; + my $code = "sub { '$form' }"; + # code will get @_ = qw/file type/ + $code =~ s/(?splitpath($file); + $link = File::Spec->rel2abs($link, $dir); + $link = resolvelink($link) if -l $link; # recurs + return $link; +} + +sub desc { # Cache desciption + my $mt = shift; + return undef unless $mt; + $desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default +} + +__END__ + +=head1 NAME + +mimetype - Determine file type + +=head1 SYNOPSIS + +mimetype [options] [-] files + +=head1 DESCRIPTION + +This script tries to determine the mime type of a file using the +Shared MIME-info database. It is intended as a kind of I work-alike, +but uses mimetypes instead of descriptions. + +If one symlinks the I command to I it will behave +a little more compatible, see L. +Commandline options to specify alternative magic files are not +implemented the same because of the conflicting data formats. +Also the wording of the descriptions will differ. + +For naming switches I followed the manpage of file(1) version 4.02 +when possible. They seem to differ completely from the spec in the +'utilities' chapter of IEEE Std 1003.1-2001 (POSIX). + +=head1 OPTIONS + +=over 4 + +=item B<-a>, B<--all> + +Show output of all rules that match the file. + +TODO: this method now just returns one match for each +method (globs, magic, etc.). + +=item B<-b>, B<--brief> + +Do not prepend filenames to output lines (brief mode). + +=item B<--database>=I:I:... + +Force the program to look in these directories for the shared mime-info +database. The directories specified by the basedir specification +are ignored. + +=item B<-d>, B<--describe> + +Print file descriptions instead of mime types, this is the +default when using L. + +=item B<-D>, B<--debug> + +Print debug information about how the mimetype was determined. + +=item B<-f> I, B<--namefile>=I + +Read the names of the files to be examined from the file 'namefile' +(one per line) before the argument list. + +=item B<--file-compat> + +Make mimetype behave a little more L compatible. This +is turned on automaticly when you call mimetype by a link +called 'file'. + +A single '-' won't be considered a seperator +between options and filenames anymore, but becomes identical to L. +( You can still use '--' as seperator, but +that is not backward compatible with the original file command. ) +Also the default becomes to print descriptions instead of mimetypes. + +=item B<-F> I, B<--separator>=I + +Use string as custom separator between the file name and its mimetype +or description, defaults to ':' . + +=item B<-h>, B<--help> + +=item B<-u>, B<--usage> + +Print a help message and exits. + +=item B<-i>, B<--mimetype> + +Use mime types, opposite to L, +this is the default when _not_ using L. + +=item B<-L>, B<--dereference> + +Follow symbolic links. + +=item B<-l> I, B<--language>=I + +The language attribute specifies a two letter language code, this makes +descriptions being outputted in the specified language. + +=item B<-M>, B<--magic-only> + +Do not check for extensions, globs or inode type, only look at the content +of the file. This is particularly useful if for some reason you don't trust +the name or the extension a file has. + +=item B<-N>, B<--noalign> + +Do not align output fields. + +=item B<--output-format> + +If you want an alternative output format, you can specify a format string +containing the following escapes: + + %f for the filename + %d description + %m mime type + +Alignment is not available when using this, +you need to post-process the output to do that. + +=item B<--stdin> + +Determine type of content from STDIN, less powerfull then normal file checking +because it only uses magic typing. This will happen also if the STDIN filehandle +is a pipe. + +To use this option L needs to be installed. + +=item B<-v>, B<--version> + +Print the version of the program and exit. + +=back + +=head1 ENVIRONMENT + +=over 4 + +=item XDG_DATA_HOME + +=item XDG_DATA_DIRS + +These variables can list base directories to search for data files. The shared +mime-info will be expected in the "mime" sub directory of one of these +directories. If these are not set, there will be searched for the +following directories: + + $HOME/.local/share/mime + /usr/local/share/mime + /usr/share/mime + +See also the "XDG Base Directory Specification" +L + +=back + +=head1 FILES + +The base dir for all data files is determined by two environment variables, +see L. + +=over 4 + +=item F + +All other files are compiled from these source files. To re-compile them +use B. + +=item F + +Compiled information about globs. + +=item F + +Compiled information about magic numbers. + +=item F + +Descriptions of a mimetype in multiple languages, used for the +L switch. + +=back + +=head1 DIAGNOSTICS + +If a file has an empty mimetype or an empty description, +most probably the file doesn't exist and the given name +doesn't match any globs. An empty description can also mean that +there is no description available in the language you specified. + +The program exits with a non-zero exit value if either the commandline +arguments failed, a module it depends on wasn't found or the shared +mime-info database wasn't accesable. See L for more details. + +=head1 TODO + +The '--all' switch doesn't really show all matches, but only one per +mime-typing method. This needs to be implemnted in the modules first. + +=head1 BUGS + +No known bugs, please mail the author if you find one. + +B doesn't provide a switch for looking inside compressed files +because it seems to me that this can only be done by un-compressing the file, +something that defeats the purpose. On the other hand the option should +exist for strict compatibility with file(1). Possibly a subclass should be +made for this one day. + +=head1 AUTHOR + +Jaap Karrssenberg Epardus@cpan.orgE + +=head1 COPYRIGHT + +Copyright (c) 2003,2008 Jaap G Karssenberg. All rights reserved. +This program is free software; you can redistribute it and/or +modify it under the same terms as Perl. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +=head1 SEE ALSO + +L, +L, +L, +L + -- cgit v1.2.3-24-g4f1b