summaryrefslogtreecommitdiffstats
path: root/scripts/mimetype
blob: ec191adc86693d76f8650b072f4f03863aaf3530 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
#!/usr/bin/perl 

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell

use strict;
our $VERSION = '0.15';
$|++;

# ########## #
# Parse ARGV #
# ########## #
my %args = ();
my %opts = (
	# name	=> [char, expect_arg_bit ]
	'help'		=> ['h'],
	'usage'		=> ['u'],
	'version'	=> ['v'],
	'stdin'		=> [''],
	'brief'		=> ['b'],
	'namefile'	=> ['f', 1],
	'noalign'	=> ['N'],
	'describe'	=> ['d'],
	'file-compat'	=> [''],
	'output-format'	=> ['', 1],
	'language'	=> ['l', 1],
	'mimetype'	=> ['i'],
	'dereference'	=> ['L'],
	'separator'	=> ['F',1],
	'debug'		=> ['D'],
	'database'	=> ['', 1],
	'all'		=> ['a'],
	'magic-only'	=> ['M'],
);

$args{'file-compat'}++ if $0 =~ m#(^|/)file$#;

while ((@ARGV) && ($ARGV[0] =~ /^-/)) {
	my $opt = shift @ARGV;
	if ($opt =~ /^--?$/) {
		$args{stdin}++ if $args{'file-compat'} && $opt eq '-';
		last;
	}
	elsif ($opt =~ s/^--([\w-]+)(?:=(.*))?/$1/) {
		if (exists $opts{$opt}) {
			if ($opts{$opt}[1]) { 
				my $arg = $2 || shift @ARGV;
				complain('--'.$opt, 2) unless defined $arg;
				$args{$opt} .= ( $args{$opt} ? ' ' : '' ) . $arg;
			}
			else { $args{$opt}++ }
		}
		else { complain('--'.$opt) }
	}
	elsif ($opt =~ s/^-(?!-)//) {
		foreach my $o (split //, $opt) {
			my ($key) = grep { $opts{$_}[0] eq $o } keys %opts;
			complain($o) unless $key;

			if ($opts{$key}[1]) { 
				my $arg = shift @ARGV;
				complain('-'.$o, 2) unless defined $arg;
				$args{$key} .= ( $args{$key} ? ' ' : '' ).$arg; # join with whitespace
			}
			else { $args{$key}++; }
		}
	}
	else { complain($opt) }
}

if ($args{help} || $args{usage}) {
	eval 'use Pod::Usage';
	die "Could not find perl module Pod::Usage\n" if $@;
	pod2usage( {
		-verbose => 1,
		-exitval => 0,
	} );
}

if ($args{version}) {
	print "mimetype $VERSION\n\n", << 'EOV';
Copyright (c) 2003 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
EOV
	exit 0;
}

complain(undef, 4) unless scalar(@ARGV) || $args{stdin} || $args{namefile};

# ############# #
# prepare stuff #
# ############# #

our %desc; # desc caching hash

# --database
@File::MimeInfo::DIRS = split /:/, $args{database} if $args{database};

## Actually use our module ##
eval 'use File::MimeInfo::Magic qw/mimetype globs inodetype magic describe/;';
die $@ if $@;

*default = \&File::MimeInfo::default;

# --debug
if ($args{debug}) {
	$File::MimeInfo::DEBUG++;
	$File::MimeInfo::Magic::DEBUG++;
	print	'> Data dirs are: ', join( ', ', 
			$args{database}
				? ( split /:/, $args{database} )
				: (
					File::BaseDir::xdg_data_home(),
					File::BaseDir::xdg_data_dirs()
				)
		), "\n";
}

# --file-compat
$args{describe}++ if $args{'file-compat'} && !$args{mimetype};

# --namefile
if ($args{namefile}) {
	open IN, $args{namefile} 
		|| die "Couldn't open file: $args{namefile}\n";
	unshift @ARGV, map {chomp; $_} (<IN>);
	close IN;
}

# --language
$File::MimeInfo::LANG = $args{language} if $args{language};

# Formatting stuff
my $l = 5; # "STDIN"
unless ($args{brief} || $args{noalign}) {
	for (@ARGV) { $l = length($_) if $l < length($_) }
}

$args{separator} = ':' unless defined $args{separator};
my $format = $args{'output-format'}
	? parse_format($args{'output-format'})
	: $args{brief}
	? sub { $args{describe} ? desc($_[1]) : $_[1] }
	: $args{noalign}
	? sub { ( $_[0], $args{separator}, ' ', $args{describe} ? desc($_[1]) : $_[1] ) }
	: sub { ( $_[0], $args{separator}, ' 'x($l + 1 - length($_[0])),
		$args{describe} ? desc($_[1]) : $_[1] ) };

# --dereference
if ($args{dereference}) {
	eval 'use File::Spec';
	die "Could not find perl module File::Spec\n" if $@;
}
# --stdin
if ($args{stdin}) {
	eval 'use IO::Scalar';
	die "Could not find perl module IO::Scalar\n" if $@;
}

# ######## #
# do stuff #
# ######## #

# --stdin
if ($args{stdin}) {
	my $data;
	read(STDIN, $data, $File::MimeInfo::Magic::max_buffer);
	my $scalar = new IO::Scalar \$data;
	print $format->('STDIN', mimetype($scalar)), "\n";
	exit;
}

foreach my $file (@ARGV) {
	# --dereference
	my $f = ($args{dereference} && -l $file) ? resolvelink($file) : $file;
	# --magic-only
	if ($args{'magic-only'}) { 
		print $format->($file, magic($f) || default($f)), "\n";
	}
	# --all
	elsif ($args{all}) {
		for (qw#inodetype globs magic default#) {
			my $m = eval "$_(\$f)";
			print $format->($file, $m), "\n" if $m;
		}
	}
	else { print $format->($file, mimetype($f)), "\n" }
}

exit;

# ########### #
# Subroutines #
# ########### #

sub complain { # Error messages
	my $opt = shift;
	my $m = shift || 1;

	my $bn = $0;
	$bn =~ s|^(.*/)*||;
	if ($m == 1) { print STDERR "$bn: unrecognized option '$opt'" }
	elsif ($m == 2) { print STDERR "$bn: option '$opt' requires an argument" }
	elsif ($m == 3) { print STDERR "$bn: $opt: No such file or directory\n" }
	elsif ($m == 4) { print STDERR "usage: $bn [options] files" }

	print "\nTry '$bn --help' for more information.\n" unless $m == 3;
	exit $m;
}

sub parse_format { # Advanced formatting
	my $form = shift;
	my $code = "sub { '$form' }";
	# code will get @_ = qw/file type/
	$code =~ s/(?<!\\)%f/'.\$_[0].'/g;
	$code =~ s/(?<!\\)%m/'.\$_[1].'/g;
	$code =~ s/(?<!\\)%d/'.desc(\$_[1]).'/g;
	return eval $code;
}

sub resolvelink { # --dereference
	my $file = shift;
	my $link = readlink($file) || return $file;
	my (undef, $dir, undef) = File::Spec->splitpath($file);
	$link = File::Spec->rel2abs($link, $dir);
	$link = resolvelink($link) if -l $link; # recurs
	return $link;
}

sub desc { # Cache desciption
	my $mt = shift;
	return undef unless $mt;
	$desc{$mt} ||= describe($mt) || describe($mt, ''); # second form overrules the language settings to default
}

__END__

=head1 NAME

mimetype - Determine file type

=head1 SYNOPSIS

mimetype [options] [-] files

=head1 DESCRIPTION

This script tries to determine the mime type of a file using the
Shared MIME-info database. It is intended as a kind of I<file(1)> work-alike, 
but uses mimetypes instead of descriptions.

If one symlinks the I<file> command to I<mimetype> it will behave
a little more compatible, see L</--file-compat>.
Commandline options to specify alternative magic files are not
implemented the same because of the conflicting data formats.
Also the wording of the descriptions will differ.

For naming switches I followed the manpage of file(1) version 4.02
when possible. They seem to differ completely from the spec in the 
'utilities' chapter of IEEE Std 1003.1-2001 (POSIX).

=head1 OPTIONS

=over 4

=item B<-a>, B<--all>

Show output of all rules that match the file.

TODO: this method now just returns one match for each
method (globs, magic, etc.).

=item B<-b>, B<--brief>

Do not prepend filenames to output lines (brief mode).

=item B<--database>=I<mimedir>:I<mimedir>:...

Force the program to look in these directories for the shared mime-info
database. The directories specified by the basedir specification
are ignored.

=item B<-d>, B<--describe>

Print file descriptions instead of mime types, this is the
default when using L</--file-compat>.

=item B<-D>, B<--debug>

Print debug information about how the mimetype was determined.

=item B<-f> I<namefile>, B<--namefile>=I<namefile>

Read the names of the files to be examined from the file 'namefile' 
(one per line) before the argument list.

=item B<--file-compat>

Make mimetype behave a little more L<file(1)> compatible. This
is turned on automaticly when you call mimetype by a link
called 'file'.

A single '-' won't be considered a seperator
between options and filenames anymore, but becomes identical to L</--stdin>.
( You can still use '--' as seperator, but
that is not backward compatible with the original file command. )
Also the default becomes to print descriptions instead of mimetypes.

=item B<-F> I<string>, B<--separator>=I<string>

Use string as custom separator between the file name and its mimetype
or description, defaults to ':' .

=item B<-h>, B<--help>

=item B<-u>, B<--usage>

Print a help message and exits.

=item B<-i>, B<--mimetype>

Use mime types, opposite to L</--describe>,
this is the default when _not_ using L</--file-compat>.

=item B<-L>, B<--dereference>

Follow symbolic links.

=item B<-l> I<code>, B<--language>=I<code>

The language attribute specifies a two letter language code, this makes
descriptions being outputted in the specified language.

=item B<-M>, B<--magic-only>

Do not check for extensions, globs or inode type, only look at the content
of the file. This is particularly useful if for some reason you don't trust
the name or the extension a file has.

=item B<-N>, B<--noalign>

Do not align output fields.

=item B<--output-format>

If you want an alternative output format, you can specify a format string
containing the following escapes:

	%f for the filename
	%d description
	%m mime type

Alignment is not available when using this,
you need to post-process the output to do that.

=item B<--stdin>

Determine type of content from STDIN, less powerfull then normal file checking
because it only uses magic typing. This will happen also if the STDIN filehandle
is a pipe.

To use this option L<IO::Scalar> needs to be installed.

=item B<-v>, B<--version>

Print the version of the program and exit.

=back

=head1 ENVIRONMENT

=over 4

=item XDG_DATA_HOME

=item XDG_DATA_DIRS

These variables can list base directories to search for data files. The shared 
mime-info will be expected in the "mime" sub directory of one of these
directories. If these are not set, there will be searched for the 
following directories:

	$HOME/.local/share/mime
	/usr/local/share/mime
	/usr/share/mime

See also the "XDG Base Directory Specification" 
L<http://freedesktop.org/Standards/basedir-spec>

=back

=head1 FILES

The base dir for all data files is determined by two environment variables,
see L</ENVIRONMENT>.

=over 4

=item F<BASE/mime/packages/SOURCE.xml>

All other files are compiled from these source files. To re-compile them
use B<update-mime-database(1)>.

=item F<BASE/mime/globs>

Compiled information about globs.

=item F<BASE/mime/magic>

Compiled information about magic numbers.

=item F<BASE/mime/MEDIA/SUBTYPE.xml>

Descriptions of a mimetype in multiple languages, used for the
L</--describe> switch.

=back

=head1 DIAGNOSTICS

If a file has an empty mimetype or an empty description,
most probably the file doesn't exist and the given name 
doesn't match any globs. An empty description can also mean that
there is no description available in the language you specified.

The program exits with a non-zero exit value if either the commandline
arguments failed, a module it depends on wasn't found or the shared
mime-info database wasn't accesable. See L<File::MimeInfo> for more details.

=head1 TODO

The '--all' switch doesn't really show all matches, but only one per
mime-typing method. This needs to be implemnted in the modules first.

=head1 BUGS

No known bugs, please mail the author if you find one.

B<mimetype> doesn't provide a switch for looking inside compressed files
because it seems to me that this can only be done by un-compressing the file,
something that defeats the purpose. On the other hand the option should 
exist for strict compatibility with file(1). Possibly a subclass should be
made for this one day.

=head1 AUTHOR

Jaap Karrssenberg E<lt>pardus@cpan.orgE<gt>

=head1 COPYRIGHT

Copyright (c) 2003,2008 Jaap G Karssenberg. All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

=head1 SEE ALSO

L<file(1)>,
L<update-mime-database(1)>,
L<File::MimeInfo(3)>,
L<http://freedesktop.org/Software/shared-mime-info>