From d6d08a78344675340fe5cffbeaab16550b37eceb Mon Sep 17 00:00:00 2001 From: Florian Pritz Date: Thu, 11 Aug 2011 16:13:05 +0200 Subject: rewrite scripts/mimetype to support ascii with color codes Signed-off-by: Florian Pritz --- scripts/mimetype | 506 ++++--------------------------------------------------- 1 file changed, 31 insertions(+), 475 deletions(-) (limited to 'scripts') diff --git a/scripts/mimetype b/scripts/mimetype index 43c69ebc6..1d89d6922 100755 --- a/scripts/mimetype +++ b/scripts/mimetype @@ -1,482 +1,38 @@ -#!/usr/bin/perl - -eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' - if 0; # not running under some shell - -eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' - if 0; # not running under some shell - +#!/usr/bin/perl +use warnings; use strict; -our $VERSION = '0.15'; -$|++; - -# ########## # -# Parse ARGV # -# ########## # -my %args = (); -my %opts = ( - # name => [char, expect_arg_bit ] - 'help' => ['h'], - 'usage' => ['u'], - 'version' => ['v'], - 'stdin' => [''], - 'brief' => ['b'], - 'namefile' => ['f', 1], - 'orig-name' => ['', 1], - 'noalign' => ['N'], - 'describe' => ['d'], - 'file-compat' => [''], - 'output-format' => ['', 1], - 'language' => ['l', 1], - 'mimetype' => ['i'], - 'dereference' => ['L'], - 'separator' => ['F',1], - 'debug' => ['D'], - 'database' => ['', 1], - 'all' => ['a'], - 'magic-only' => ['M'], -); - -$args{'file-compat'}++ if $0 =~ m#(^|/)file$#; - -while ((@ARGV) && ($ARGV[0] =~ /^-/)) { - my $opt = shift @ARGV; - if ($opt =~ /^--?$/) { - $args{stdin}++ if $args{'file-compat'} && $opt eq '-'; - last; - } - elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) { - if (exists $opts{$opt}) { - if ($opts{$opt}[1]) { - my $arg = $2 || shift @ARGV; - complain('--'.$opt, 2) unless defined $arg; - $args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg; - } - else { $args{$opt}++ } - } - else { complain('--'.$opt) } - } - elsif ($opt =~ s/^-(?!-)//) { - foreach my $o (split //, $opt) { - my ($key) = grep { $opts{$_}[0] eq $o } keys %opts; - complain($o) unless $key; - - if ($opts{$key}[1]) { - my $arg = shift @ARGV; - complain('-'.$o, 2) unless defined $arg; - $args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace +use File::MimeInfo qw(mimetype globs); + +exit 1 unless @ARGV == 2; + +my $filename = $ARGV[0]; +my $file = $ARGV[1]; +my $type = ""; +my $file_type = ""; + +exit 1 unless -f $file; + +$type = globs($filename); + +# globbing takes priority over all other +if (!$type) { + $type = mimetype($file); + if ($type eq "application/octet-stream") { + # application/octet-stream normally means the detection failed + # use the output of file in this case + $file_type = `file -b --mime-type $file`; + chomp $file_type; + $type = $file_type; + if ($type eq "text/plain") { + # detect ascii with color codes + $file_type = `file -b $file`; + chomp $file_type; + if ($file_type eq "ASCII text, with escape sequences") { + $type = "text/plain-ascii"; } - else { $args{$key}++; } - } - } - else { complain($opt) } -} - -if ($args{help} || $args{usage}) { - eval 'use Pod::Usage'; - die "Could not find perl module Pod::Usage\n" if $@; - pod2usage( { - -verbose => 1, - -exitval => 0, - } ); -} - -if ($args{version}) { - print "mimetype $VERSION\n\n", << 'EOV'; -Copyright (c) 2003 Jaap G Karssenberg. All rights reserved. -This program is free software; you can redistribute it and/or -modify it under the same terms as Perl. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. -EOV - exit 0; -} - -complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile}; - -# ############# # -# prepare stuff # -# ############# # - -our %desc; # desc caching hash - -# --database -@File::MimeInfo::DIRS = split /:/, $args{database} if $args{database}; - -## Actually use our module ## -eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;'; -die $@ if $@; - -*default = \&File::MimeInfo::default; - -# --debug -if ($args{debug}) { - $File::MimeInfo::DEBUG++; - $File::MimeInfo::Magic::DEBUG++; - print '> Data dirs are: ', join( ', ', - $args{database} - ? ( split /:/, $args{database} ) - : ( - File::BaseDir::xdg_data_home(), - File::BaseDir::xdg_data_dirs() - ) - ), "\n"; -} - -# --file-compat -$args{describe}++ if $args{'file-compat'} && !$args{mimetype}; - -# --namefile -if ($args{namefile}) { - open IN, $args{namefile} - || die "Couldn't open file: $args{namefile}\n"; - unshift @ARGV, map {chomp; $_} (); - close IN; -} - -# --language -$File::MimeInfo::LANG = $args{language} if $args{language}; - -# Formatting stuff -my $l = 5; # "STDIN" -unless ($args{brief} || $args{noalign}) { - for (@ARGV) { $l = length($_) if $l < length($_) } -} - -$args{separator} = ':' unless defined $args{separator}; -my $format = $args{'output-format'} - ? parse_format($args{'output-format'}) - : $args{brief} - ? sub { $args{describe} ? desc($_[1]) : $_[1] } - : $args{noalign} - ? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) } - : sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])), - $args{describe} ? desc($_[1]) : $_[1] ) }; - -# --dereference -if ($args{dereference}) { - eval 'use File::Spec'; - die "Could not find perl module File::Spec\n" if $@; -} -# --stdin -if ($args{stdin}) { - eval 'use IO::Scalar'; - die "Could not find perl module IO::Scalar\n" if $@; -} - -# ######## # -# do stuff # -# ######## # - -# --stdin -if ($args{stdin}) { - my $data; - read(STDIN, $data, $File::MimeInfo::Magic::max_buffer); - my $scalar = new IO::Scalar \$data; - print $format->('STDIN', mimetype($scalar)), "\n"; - exit; -} - -foreach my $file (@ARGV) { - # --dereference - my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file; - # --magic-only - if ($args{'magic-only'}) { - print $format->($file, magic($f) || default($f)), "\n"; - } - # --all - elsif ($args{all}) { - for (qw#inodetype globs magic default#) { - my $m = eval "$_(\$f)"; - print $format->($file, $m), "\n" if $m; } } - elsif ($args{'orig-name'}) { - my $type = globs($args{'orig-name'}); - if ($type ne "") { - print $format->($file, $type), "\n"; - } else { - print $format->($file, mimetype($f)), "\n"; - } - } - else { print $format->($file, mimetype($f)), "\n" } -} - -exit; - -# ########### # -# Subroutines # -# ########### # - -sub complain { # Error messages - my $opt = shift; - my $m = shift || 1; - - my $bn = $0; - $bn =~ s|^(.*/)*||; - if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" } - elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" } - elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" } - elsif ($m == 4) { print STDERR "usage: $bn [options] files" } - - print "\nTry '$bn --help' for more information.\n" unless $m == 3; - exit $m; } -sub parse_format { # Advanced formatting - my $form = shift; - my $code = "sub { '$form' }"; - # code will get @_ = qw/file type/ - $code =~ s/(?splitpath($file); - $link = File::Spec->rel2abs($link, $dir); - $link = resolvelink($link) if -l $link; # recurs - return $link; -} - -sub desc { # Cache desciption - my $mt = shift; - return undef unless $mt; - $desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default -} - -__END__ - -=head1 NAME - -mimetype - Determine file type - -=head1 SYNOPSIS - -mimetype [options] [-] files - -=head1 DESCRIPTION - -This script tries to determine the mime type of a file using the -Shared MIME-info database. It is intended as a kind of I work-alike, -but uses mimetypes instead of descriptions. - -If one symlinks the I command to I it will behave -a little more compatible, see L. -Commandline options to specify alternative magic files are not -implemented the same because of the conflicting data formats. -Also the wording of the descriptions will differ. - -For naming switches I followed the manpage of file(1) version 4.02 -when possible. They seem to differ completely from the spec in the -'utilities' chapter of IEEE Std 1003.1-2001 (POSIX). - -=head1 OPTIONS - -=over 4 - -=item B<-a>, B<--all> - -Show output of all rules that match the file. - -TODO: this method now just returns one match for each -method (globs, magic, etc.). - -=item B<-b>, B<--brief> - -Do not prepend filenames to output lines (brief mode). - -=item B<--database>=I:I:... - -Force the program to look in these directories for the shared mime-info -database. The directories specified by the basedir specification -are ignored. - -=item B<-d>, B<--describe> - -Print file descriptions instead of mime types, this is the -default when using L. - -=item B<-D>, B<--debug> - -Print debug information about how the mimetype was determined. - -=item B<-f> I, B<--namefile>=I - -Read the names of the files to be examined from the file 'namefile' -(one per line) before the argument list. - -=item B<--file-compat> - -Make mimetype behave a little more L compatible. This -is turned on automaticly when you call mimetype by a link -called 'file'. - -A single '-' won't be considered a seperator -between options and filenames anymore, but becomes identical to L. -( You can still use '--' as seperator, but -that is not backward compatible with the original file command. ) -Also the default becomes to print descriptions instead of mimetypes. - -=item B<-F> I, B<--separator>=I - -Use string as custom separator between the file name and its mimetype -or description, defaults to ':' . - -=item B<-h>, B<--help> - -=item B<-u>, B<--usage> - -Print a help message and exits. - -=item B<-i>, B<--mimetype> - -Use mime types, opposite to L, -this is the default when _not_ using L. - -=item B<-L>, B<--dereference> - -Follow symbolic links. - -=item B<-l> I, B<--language>=I - -The language attribute specifies a two letter language code, this makes -descriptions being outputted in the specified language. - -=item B<-M>, B<--magic-only> - -Do not check for extensions, globs or inode type, only look at the content -of the file. This is particularly useful if for some reason you don't trust -the name or the extension a file has. - -=item B<-N>, B<--noalign> - -Do not align output fields. - -=item B<--output-format> - -If you want an alternative output format, you can specify a format string -containing the following escapes: - - %f for the filename - %d description - %m mime type - -Alignment is not available when using this, -you need to post-process the output to do that. - -=item B<--stdin> - -Determine type of content from STDIN, less powerfull then normal file checking -because it only uses magic typing. This will happen also if the STDIN filehandle -is a pipe. - -To use this option L needs to be installed. - -=item B<-v>, B<--version> - -Print the version of the program and exit. - -=back - -=head1 ENVIRONMENT - -=over 4 - -=item XDG_DATA_HOME - -=item XDG_DATA_DIRS - -These variables can list base directories to search for data files. The shared -mime-info will be expected in the "mime" sub directory of one of these -directories. If these are not set, there will be searched for the -following directories: - - $HOME/.local/share/mime - /usr/local/share/mime - /usr/share/mime - -See also the "XDG Base Directory Specification" -L - -=back - -=head1 FILES - -The base dir for all data files is determined by two environment variables, -see L. - -=over 4 - -=item F - -All other files are compiled from these source files. To re-compile them -use B. - -=item F - -Compiled information about globs. - -=item F - -Compiled information about magic numbers. - -=item F - -Descriptions of a mimetype in multiple languages, used for the -L switch. - -=back - -=head1 DIAGNOSTICS - -If a file has an empty mimetype or an empty description, -most probably the file doesn't exist and the given name -doesn't match any globs. An empty description can also mean that -there is no description available in the language you specified. - -The program exits with a non-zero exit value if either the commandline -arguments failed, a module it depends on wasn't found or the shared -mime-info database wasn't accesable. See L for more details. - -=head1 TODO - -The '--all' switch doesn't really show all matches, but only one per -mime-typing method. This needs to be implemnted in the modules first. - -=head1 BUGS - -No known bugs, please mail the author if you find one. - -B doesn't provide a switch for looking inside compressed files -because it seems to me that this can only be done by un-compressing the file, -something that defeats the purpose. On the other hand the option should -exist for strict compatibility with file(1). Possibly a subclass should be -made for this one day. - -=head1 AUTHOR - -Jaap Karrssenberg Epardus@cpan.orgE - -=head1 COPYRIGHT - -Copyright (c) 2003,2008 Jaap G Karssenberg. All rights reserved. -This program is free software; you can redistribute it and/or -modify it under the same terms as Perl. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -=head1 SEE ALSO - -L, -L, -L, -L +print "$type\n"; -- cgit v1.2.3-24-g4f1b