From 1f30fac936a3b0905e736dd86e559e33caf036ac Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 10 Aug 2011 18:26:03 -0400 Subject: Initial checkin of bmo/4.0 extensions. Still todo: port changes to core Bugzilla code --- extensions/SiteMapIndex/lib/Constants.pm | 47 +++++++ extensions/SiteMapIndex/lib/Util.pm | 205 +++++++++++++++++++++++++++++++ 2 files changed, 252 insertions(+) create mode 100644 extensions/SiteMapIndex/lib/Constants.pm create mode 100644 extensions/SiteMapIndex/lib/Util.pm (limited to 'extensions/SiteMapIndex/lib') diff --git a/extensions/SiteMapIndex/lib/Constants.pm b/extensions/SiteMapIndex/lib/Constants.pm new file mode 100644 index 000000000..fce858121 --- /dev/null +++ b/extensions/SiteMapIndex/lib/Constants.pm @@ -0,0 +1,47 @@ +# -*- Mode: perl; indent-tabs-mode: nil -*- +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is the Sitemap Bugzilla Extension. +# +# The Initial Developer of the Original Code is Everything Solved, Inc. +# Portions created by the Initial Developer are Copyright (C) 2010 the +# Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Max Kanat-Alexander + +package Bugzilla::Extension::SiteMapIndex::Constants; +use strict; +use base qw(Exporter); +our @EXPORT = qw( + SITEMAP_AGE + SITEMAP_MAX + SITEMAP_DELAY + SITEMAP_URL +); + +# This is the amount of hours a sitemap index and it's files are considered +# valid before needing to be regenerated. +use constant SITEMAP_AGE => 12; + +# This is the largest number of entries that can be in a single sitemap file, +# per the sitemaps.org standard. +use constant SITEMAP_MAX => 50_000; + +# We only show bugs that are at least 12 hours old, because if somebody +# files a bug that's a security bug but doesn't protect it, we want to give +# them time to fix that. +use constant SITEMAP_DELAY => 12; + +use constant SITEMAP_URL => 'page.cgi?id=sitemap/sitemap.xml'; + +1; diff --git a/extensions/SiteMapIndex/lib/Util.pm b/extensions/SiteMapIndex/lib/Util.pm new file mode 100644 index 000000000..3c322d8c7 --- /dev/null +++ b/extensions/SiteMapIndex/lib/Util.pm @@ -0,0 +1,205 @@ +# -*- Mode: perl; indent-tabs-mode: nil -*- +# +# The contents of this file are subject to the Mozilla Public +# License Version 1.1 (the "License"); you may not use this file +# except in compliance with the License. You may obtain a copy of +# the License at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS +# IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +# implied. See the License for the specific language governing +# rights and limitations under the License. +# +# The Original Code is the Sitemap Bugzilla Extension. +# +# The Initial Developer of the Original Code is Everything Solved, Inc. +# Portions created by the Initial Developer are Copyright (C) 2010 the +# Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Max Kanat-Alexander +# Dave Lawrence + +package Bugzilla::Extension::SiteMapIndex::Util; +use strict; +use base qw(Exporter); +our @EXPORT = qw( + generate_sitemap + bug_is_ok_to_index +); + +use Bugzilla::Extension::SiteMapIndex::Constants; + +use Bugzilla::Util qw(correct_urlbase datetime_from url_quote); +use Bugzilla::Constants qw(bz_locations); + +use Scalar::Util qw(blessed); +use IO::Compress::Gzip qw(gzip $GzipError); + +sub too_young_date { + my $hours_ago = DateTime->now(time_zone => Bugzilla->local_timezone); + $hours_ago->subtract(hours => SITEMAP_DELAY); + return $hours_ago; +} + +sub bug_is_ok_to_index { + my ($bug) = @_; + return 1 unless blessed($bug) && $bug->isa('Bugzilla::Bug'); + my $creation_ts = datetime_from($bug->creation_ts); + return ($creation_ts lt too_young_date()) ? 1 : 0; +} + +# We put two things in the Sitemap: a list of Browse links for products, +# and links to bugs. +sub generate_sitemap { + my ($extension_name) = @_; + + # If file is less than SITEMAP_AGE hours old, then read in and send to caller. + # If greater, then regenerate and send the new version. + my $index_file = bz_locations->{'datadir'} . "/$extension_name/sitemap_index.xml"; + if (-e $index_file) { + my $index_mtime = (stat($index_file))[9]; + my $index_hours = sprintf("%d", (time() - $index_mtime) / 60 / 60); # in hours + if ($index_hours < SITEMAP_AGE) { + my $index_fh = new IO::File($index_file, 'r'); + $index_fh || die "Could not open current sitemap index: $!"; + my $index_xml; + { local $/; $index_xml = <$index_fh> } + $index_fh->close() || die "Could not close current sitemap index: $!"; + + return $index_xml; + } + } + + # Set the atime and mtime of the index file to the current time + # in case another request is made before we finish. + utime(undef, undef, $index_file); + + # Sitemaps must never contain private data. + Bugzilla->logout_request(); + my $user = Bugzilla->user; + my $products = $user->get_accessible_products; + + my $num_bugs = SITEMAP_MAX - scalar(@$products); + # We do this date math outside of the database because databases + # usually do better with a straight comparison value. + my $hours_ago = too_young_date(); + + # We don't use Bugzilla::Bug objects, because this could be a tremendous + # amount of data, and we only want a little. Also, we only display + # bugs that are not in any group. We show the last $num_bugs + # most-recently-updated bugs. + my $dbh = Bugzilla->dbh; + my $bug_sth = $dbh->prepare( + 'SELECT bugs.bug_id, bugs.delta_ts + FROM bugs + LEFT JOIN bug_group_map ON bugs.bug_id = bug_group_map.bug_id + WHERE bug_group_map.bug_id IS NULL AND creation_ts < ? + ' . $dbh->sql_limit($num_bugs, '?')); + + my $filecount = 1; + my $filelist = []; + my $offset = 0; + + while (1) { + my $bugs = []; + + $bug_sth->execute($hours_ago, $offset); + + while (my ($bug_id, $delta_ts) = $bug_sth->fetchrow_array()) { + push(@$bugs, { bug_id => $bug_id, delta_ts => $delta_ts }); + } + + last if !@$bugs; + + # We only need the product links in the first sitemap file + $products = [] if $filecount > 1; + + push(@$filelist, _generate_sitemap_file($extension_name, $filecount, $products, $bugs)); + + $filecount++; + $offset += $num_bugs; + } + + # Generate index file + return _generate_sitemap_index($extension_name, $filelist); +} + +sub _generate_sitemap_index { + my ($extension_name, $filelist) = @_; + + my $dbh = Bugzilla->dbh; + my $timestamp = $dbh->selectrow_array( + "SELECT " . $dbh->sql_date_format('NOW()', '%Y-%m-%d')); + + my $index_xml = < + +END + + foreach my $filename (@$filelist) { + $index_xml .= " + + " . correct_urlbase() . "data/$extension_name/$filename + $timestamp + +"; + } + + $index_xml .= < +END + + my $index_file = bz_locations->{'datadir'} . "/$extension_name/sitemap_index.xml"; + my $index_fh = new IO::File($index_file, 'w'); + $index_fh || die "Could not open new sitemap index: $!"; + print $index_fh $index_xml; + $index_fh->close() || die "Could not close new sitemap index: $!"; + + return $index_xml; +} + +sub _generate_sitemap_file { + my ($extension_name, $filecount, $products, $bugs) = @_; + + my $bug_url = correct_urlbase() . 'show_bug.cgi?id='; + my $product_url = correct_urlbase() . 'describecomponents.cgi?product='; + + my $sitemap_xml = < + +END + + foreach my $product (@$products) { + $sitemap_xml .= " + + " . $product_url . url_quote($product->name) . " + daily + 0.4 + +"; + } + + foreach my $bug (@$bugs) { + $sitemap_xml .= " + + " . $bug_url . $bug->{bug_id} . " + " . datetime_from($bug->{delta_ts}, 'UTC')->iso8601 . 'Z' . " + +"; + } + + $sitemap_xml .= < +END + + # Write the compressed sitemap data to a file in the cgi root so that they can + # be accessed by the search engines. + my $filename = "sitemap$filecount.xml.gz"; + gzip \$sitemap_xml => bz_locations->{'datadir'} . "/$extension_name/$filename" + || die "gzip failed: $GzipError\n"; + + return $filename; +} + +1; -- cgit v1.2.3-24-g4f1b