From b758be1d37cb49ced8cb17fca3d5a4cb2dd2e03e Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Mon, 6 Jul 2015 19:21:04 +0000
Subject: Bug 1172968: Move the scripts we want to keep from contrib/* and
 place them in scripts/ directory. Remove contrib from repo

---
 contrib/recode.pl | 331 ------------------------------------------------------
 1 file changed, 331 deletions(-)
 delete mode 100755 contrib/recode.pl

(limited to 'contrib/recode.pl')

diff --git a/contrib/recode.pl b/contrib/recode.pl
deleted file mode 100755
index e74e06c07..000000000
--- a/contrib/recode.pl
+++ /dev/null
@@ -1,331 +0,0 @@
-#!/usr/bin/perl -w
-# -*- Mode: perl; indent-tabs-mode: nil -*-
-#
-# The contents of this file are subject to the Mozilla Public
-# License Version 1.1 (the "License"); you may not use this file
-# except in compliance with the License. You may obtain a copy of
-# the License at http://www.mozilla.org/MPL/
-#
-# Software distributed under the License is distributed on an "AS
-# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
-# implied. See the License for the specific language governing
-# rights and limitations under the License.
-#
-# The Original Code is the Bugzilla Bug Tracking System.
-#
-# The Initial Developer of the Original Code is Everything Solved.
-# Portions created by Everything Solved are Copyright (C) 2006
-# Everything Solved. All Rights Reserved.
-#
-# Contributor(s): Max Kanat-Alexander <mkanat@bugzilla.org>
-
-use strict;
-use lib qw(. lib);
-
-use Bugzilla;
-use Bugzilla::Constants;
-use Bugzilla::Util qw(detect_encoding);
-
-use Digest::MD5 qw(md5_base64);
-use Encode qw(encode decode resolve_alias is_utf8);
-use Getopt::Long;
-use Pod::Usage;
-
-#############
-# Constants #
-#############
-
-use constant IGNORE_ENCODINGS => qw(utf8 ascii iso-8859-1);
-
-use constant MAX_STRING_LEN => 25;
-
-# For certain tables, we can't automatically determine their Primary Key.
-# So, we specify it here as a string.
-use constant SPECIAL_KEYS => {
-    # bugs_activity since 4.4 has a unique primary key added
-    bugs_activity     => 'bug_id,bug_when,fieldid',
-    profile_setting   => 'user_id,setting_name',
-    # profiles_activity since 4.4 has a unique primary key added
-    profiles_activity => 'userid,profiles_when,fieldid',
-    setting_value     => 'name,value',
-    # longdescs didn't used to have a PK, before 2.20.
-    longdescs         => 'bug_id,bug_when',
-    # The 2.16 versions table lacked a PK
-    versions          => 'product_id,value',
-    # These are all for earlier versions of Bugzilla. On a modern
-    # version of Bugzilla, this script will ignore these (thanks to
-    # code further down).
-    components        => 'program,value',
-    products          => 'product',
-};
-
-###############
-# Subroutines #
-###############
-
-# "truncate" is a file operation in perl, so we can't use that name.
-sub trunc {
-    my ($str) = @_;
-    my $truncated = substr($str, 0, MAX_STRING_LEN);
-    if (length($truncated) ne length($str)) {
-        $truncated .= '...';
-    }
-    return $truncated;
-}
-
-sub is_valid_utf8 {
-    my ($str) = @_;
-    Encode::_utf8_on($str);
-    return is_utf8($str, 1);
-}
-
-###############
-# Main Script #
-###############
-
-my %switch;
-GetOptions(\%switch, 'dry-run', 'guess', 'charset=s', 'show-failures',
-                     'overrides=s', 'help|h');
-
-pod2usage({ -verbose => 1 }) if $switch{'help'};
-
-# You have to specify at least one of these switches.
-pod2usage({ -verbose => 0 }) if (!$switch{'charset'} && !$switch{'guess'});
-
-if (exists $switch{'charset'}) {
-    $switch{'charset'} = resolve_alias($switch{'charset'})
-        || die "'$switch{charset}' is not a valid charset.";
-}
-
-if ($switch{'guess'}) {
-    if (!eval { require Encode::Detect::Detector }) {
-        my $root = ROOT_USER;
-        print STDERR <<EOT;
-Using --guess requires that Encode::Detect be installed. To install
-Encode::Detect, run the following command:
-
-  $^X install-module.pl Encode::Detect
-
-EOT
-        exit;
-    }
-}
-
-my %overrides;
-if (exists $switch{'overrides'}) {
-    my $file = new IO::File($switch{'overrides'}, 'r') 
-        || die "$switch{overrides}: $!";
-    my @lines = $file->getlines();
-    $file->close();
-    foreach my $line (@lines) {
-        chomp($line);
-        my ($digest, $encoding) = split(' ', $line);
-        $overrides{$digest} = $encoding;
-    }
-}
-
-my $dbh = Bugzilla->dbh;
-
-if ($dbh->isa('Bugzilla::DB::Mysql')) {
-    # Get the actual current encoding of the DB.
-    my $collation_data = $dbh->selectrow_arrayref(
-        "SHOW VARIABLES LIKE 'character_set_database'");
-    my $db_charset = $collation_data->[1];
-    # Set our connection encoding to *that* encoding, so that MySQL
-    # correctly accepts our changes.
-    $dbh->do("SET NAMES $db_charset");
-    # Make the database give us raw bytes.
-    $dbh->do('SET character_set_results = NULL')
-}
-
-$dbh->begin_work;
-
-foreach my $table ($dbh->bz_table_list_real) {
-    my @columns = $dbh->bz_table_columns($table);
-
-    my $pk = SPECIAL_KEYS->{$table};
-    if ($pk) {
-        # Assure that we're on a version of Bugzilla where those keys
-        # actually exist.
-        foreach my $column (split ',', $pk) {
-            $pk = undef if !$dbh->bz_column_info($table, $column);
-        }
-    }
-
-    # Figure out the primary key.
-    foreach my $column (@columns) {
-        my $def = $dbh->bz_column_info($table, $column);
-        $pk = $column if $def->{PRIMARYKEY};
-    }
-    # If there's no PK, it's defined by a UNIQUE index.
-    if (!$pk) {
-        foreach my $column (@columns) {
-            my $index = $dbh->bz_index_info($table, "${table}_${column}_idx");
-            if ($index && ref($index) eq 'HASH') {
-                $pk = join(',', @{$index->{FIELDS}}) 
-                    if $index->{TYPE} eq 'UNIQUE';
-            }
-        }
-    }
-
-    foreach my $column (@columns) {
-        my $def = $dbh->bz_column_info($table, $column);
-        # If this is a text column, it may need work.
-        if ($def->{TYPE} =~ /text|char/i) {
-            # If there's still no PK, we're upgrading from 2.14 or earlier.
-            # We can't reliably determine the PK (or at least, I don't want to
-            # maintain code to record what the PK was at all points in history).
-            # So instead we just use the field itself.
-            $pk = $column if !$pk;
-
-            print "Converting $table.$column...\n";
-            my $sth = $dbh->prepare("SELECT $column, $pk FROM $table 
-                                      WHERE $column IS NOT NULL
-                                            AND $column != ''");
-
-            my @pk_array = map {"$_ = ?"} split(',', $pk);
-            my $pk_where = join(' AND ', @pk_array);
-            my $update_sth = $dbh->prepare(
-                "UPDATE $table SET $column = ? WHERE $pk_where");
-
-            $sth->execute();
-
-            while (my @result = $sth->fetchrow_array) {
-                my $data = shift @result;
-                # Wide characters cause md5_base64() to die.
-                my $digest_data = utf8::is_utf8($data) 
-                                  ? Encode::encode_utf8($data) : $data;
-                my $digest = md5_base64($digest_data);
-
-                my @primary_keys = reverse split(',', $pk);
-                # We copy the array so that we can pop things from it without
-                # affecting the original.
-                my @pk_data = @result;
-                my $pk_line = join (', ',
-                    map { "$_ = " . pop @pk_data } @primary_keys);
-
-                my $encoding;
-                if ($switch{'guess'}) {
-                    $encoding = detect_encoding($data);
-
-                    # We only show failures if they don't appear to be
-                    # ASCII.
-                    if ($switch{'show-failures'} && !$encoding
-                        && !is_valid_utf8($data)) 
-                    {
-                        my $truncated = trunc($data);
-                        print "Row: [$pk_line]\n",
-                              "Failed to guess: Key: $digest",
-                              " DATA: $truncated\n";
-                    }
-
-                    # If we fail a guess, and the data is valid UTF-8,
-                    # just assume we failed because it's UTF-8.
-                    next if is_valid_utf8($data);
-                }
-
-                # If we couldn't detect the charset (or were instructed
-                # not to try), we fall back to --charset. If there's no 
-                # fallback, we just do nothing.
-                if (!$encoding && $switch{'charset'}) {
-                    $encoding = $switch{'charset'};
-                }
-
-                $encoding = $overrides{$digest} if $overrides{$digest};
-
-                # We only fix it if it's not ASCII or UTF-8 already.
-                if ($encoding && !grep($_ eq $encoding, IGNORE_ENCODINGS)) {
-                    my $decoded = encode('utf8', decode($encoding, $data));
-                    if ($switch{'dry-run'} && $data ne $decoded) {
-                        print "Row:  [$pk_line]\n",
-                              "From: [" . trunc($data) . "] Key: $digest\n",
-                              "To:   [" . trunc($decoded) . "]",
-                              " Encoding : $encoding\n";
-                    }
-                    else {
-                        $update_sth->execute($decoded, @result);
-                    }
-                }
-            } # while (my @result = $sth->fetchrow_array)
-        } # if ($column->{TYPE} =~ /text|char/i)
-    } # foreach my $column (@columns)
-}
-
-$dbh->commit;
-
-__END__
-
-=head1 NAME
-
-recode.pl - Converts a database from one encoding (or multiple encodings) 
-to UTF-8.
-
-=head1 SYNOPSIS
-
- contrib/recode.pl [--guess [--show-failures]] [--charset=iso-8859-2]
-                   [--overrides=file_name]
-
-  --dry-run        Don't modify the database.
-
-  --charset        Primary charset your data is currently in. This can be
-                   optionally omitted if you do --guess.
-
-  --guess          Try to guess the charset of the data.
-
-  --show-failures  If we fail to guess, show where we failed.
-
-  --overrides      Specify a file containing overrides. See --help
-                   for more info.
-
-  --help           Display detailed help.
-
- If you aren't sure what to do, try:
-
-   contrib/recode.pl --guess --charset=cp1252
-
-=head1 OPTIONS
-
-=over
-
-=item --dry-run
-
-Don't modify the database, just print out what the conversions will be.
-
-recode.pl will print out a Key for each item. You can use this in the 
-overrides file, described below.
-
-=item --guess 
-
-If your database is in multiple different encodings, specify this switch 
-and recode.pl will do its best to determine the original charset of the data.
-The detection is usually very reliable.
-
-If recode.pl cannot guess the charset, it will leave the data alone, unless
-you've specified --charset.
-
-=item --charset=charset-name
-
-If you do not specify --guess, then your database is converted
-from this character set into the UTF-8.
-
-If you have specified --guess, recode.pl will use this charset as
-a fallback--when it cannot guess the charset of a particular piece
-of data, it will guess that the data is in this charset and convert
-it from this charset to UTF-8.
-
-charset-name must be a charset that is known to perl's Encode 
-module. To see a list of available charsets, do: 
-
-C<perl -MEncode -e 'print join("\n", Encode-E<gt>encodings(":all"))'>
-
-=item --show-failures
-
-If --guess fails to guess a charset, print out the data it failed on.
-
-=item --overrides=file_name
-
-This is a way of specifying certain encodings to override the encodings of 
---guess. The file is a series of lines. The line should start with the Key 
-from --dry-run, and then a space, and then the encoding you'd like to use.
-
-=back
-- 
cgit v1.2.3-24-g4f1b