From 2bd3296eaa42479b1dd0aa5c12a442a86140b02c Mon Sep 17 00:00:00 2001 From: Byron Jones Date: Tue, 14 Jul 2015 14:55:43 +0800 Subject: Bug 1180570 - store attachment size in the database --- Bugzilla/Attachment.pm | 61 +++++++---------------------------------- Bugzilla/Install/DB.pm | 2 +- extensions/Push/t/ReviewBoard.t | 20 ++++++++------ scripts/sanitizeme.pl | 3 +- 4 files changed, 24 insertions(+), 62 deletions(-) diff --git a/Bugzilla/Attachment.pm b/Bugzilla/Attachment.pm index 625d1cccf..95e6b2977 100644 --- a/Bugzilla/Attachment.pm +++ b/Bugzilla/Attachment.pm @@ -89,6 +89,7 @@ use constant DB_COLUMNS => qw( mimetype modification_time submitter_id + attach_size ); use constant REQUIRED_FIELD_MAP => { @@ -359,45 +360,14 @@ sub data { =item C -the length (in characters) of the attachment content +the length (in bytes) of the attachment content =back =cut -# datasize is a property of the data itself, and it's unclear whether we should -# expose it at all, since you can easily derive it from the data itself: in TT, -# attachment.data.size; in Perl, length($attachment->{data}). But perhaps -# it makes sense for performance reasons, since accessing the data forces it -# to get retrieved from the database/filesystem and loaded into memory, -# while datasize avoids loading the attachment into memory, calling SQL's -# LENGTH() function or stat()ing the file instead. I've left it in for now. - sub datasize { - my $self = shift; - return $self->{datasize} if defined $self->{datasize}; - - # If we have already retrieved the data, return its size. - return length($self->{data}) if exists $self->{data}; - - $self->{datasize} = - Bugzilla->dbh->selectrow_array("SELECT LENGTH(thedata) - FROM attach_data - WHERE id = ?", - undef, $self->id) || 0; - - # If there's no attachment data in the database, either the attachment - # is stored in a local file, and so retrieve its size from the file, - # or the attachment has been deleted. - unless ($self->{datasize}) { - if (open(AH, '<', $self->_get_local_filename())) { - binmode AH; - $self->{datasize} = (stat(AH))[7]; - close(AH); - } - } - - return $self->{datasize}; + return $_[0]->{attach_size}; } =over @@ -566,18 +536,18 @@ sub _check_data { my ($invocant, $params) = @_; my $data = $params->{data}; - $params->{filesize} = ref $data ? -s $data : length($data); + $params->{attach_size} = ref $data ? -s $data : length($data); Bugzilla::Hook::process('attachment_process_data', { data => \$data, attributes => $params }); - $params->{filesize} || ThrowUserError('zero_length_file'); + $params->{attach_size} || ThrowUserError('zero_length_file'); # Make sure the attachment does not exceed the maximum permitted size. my $max_size = max(Bugzilla->params->{'maxlocalattachment'} * 1048576, Bugzilla->params->{'maxattachmentsize'} * 1024); - if ($params->{filesize} > $max_size) { - my $vars = { filesize => sprintf("%.0f", $params->{filesize}/1024) }; + if ($params->{attach_size} > $max_size) { + my $vars = { filesize => sprintf("%.0f", $params->{attach_size}/1024) }; ThrowUserError('file_too_large', $vars); } return $data; @@ -703,16 +673,6 @@ sub get_attachments_by_bug { foreach my $attachment (@$attachments) { $attachment->{attacher} = $user_map{$attachment->{submitter_id}}; } - - # Preload datasizes. - my $sizes = - $dbh->selectall_hashref('SELECT attach_id, LENGTH(thedata) AS datasize - FROM attachments LEFT JOIN attach_data ON attach_id = id - WHERE bug_id = ?', - 'attach_id', undef, $bug->id); - - # Force the size of attachments not in the DB to be recalculated. - $_->{datasize} = $sizes->{$_->id}->{datasize} || undef foreach @$attachments; } return $attachments; } @@ -842,13 +802,12 @@ sub create { my $bug = delete $params->{bug}; $params->{bug_id} = $bug->id; my $data = delete $params->{data}; - my $size = delete $params->{filesize}; my $attachment = $class->insert_create_data($params); my $attachid = $attachment->id; # The file is too large to be stored in the DB, so we store it locally. - if ($size > Bugzilla->params->{'maxattachmentsize'} * 1024) { + if ($params->{attach_size} > Bugzilla->params->{'maxattachmentsize'} * 1024) { my $attachdir = bz_locations()->{'attachdir'}; my $hash = ($attachid % 100) + 100; $hash =~ s/.*(\d\d)$/group.$1/; @@ -966,8 +925,8 @@ sub remove_from_db { $dbh->do('DELETE FROM flags WHERE ' . $dbh->sql_in('id', $flag_ids)) if @$flag_ids; $dbh->do('DELETE FROM attach_data WHERE id = ?', undef, $self->id); - $dbh->do('UPDATE attachments SET mimetype = ?, ispatch = ?, isobsolete = ? - WHERE attach_id = ?', undef, ('text/plain', 0, 1, $self->id)); + $dbh->do('UPDATE attachments SET mimetype = ?, ispatch = ?, isobsolete = ?, attach_size = ? + WHERE attach_id = ?', undef, ('text/plain', 0, 1, 0, $self->id)); $dbh->bz_commit_transaction(); # As we don't call SUPER->remove_from_db we need to manually clear diff --git a/Bugzilla/Install/DB.pm b/Bugzilla/Install/DB.pm index 7b3fb4f7f..92015f22f 100644 --- a/Bugzilla/Install/DB.pm +++ b/Bugzilla/Install/DB.pm @@ -730,7 +730,7 @@ sub update_table_definitions { $dbh->bz_add_index('user_api_keys', 'user_api_keys_user_id_app_id_idx', [qw(user_id app_id)]); - # _add_attach_size(); + _add_attach_size(); ################################################################ # New --TABLE-- changes should go *** A B O V E *** this point # diff --git a/extensions/Push/t/ReviewBoard.t b/extensions/Push/t/ReviewBoard.t index f2a508f59..3eb54760c 100644 --- a/extensions/Push/t/ReviewBoard.t +++ b/extensions/Push/t/ReviewBoard.t @@ -95,15 +95,17 @@ my $dbh = Bugzilla->dbh; $dbh->bz_start_transaction; my $timestamp = $dbh->selectrow_array('SELECT LOCALTIMESTAMP(0)'); my $bug = Bugzilla::Bug->new({id => 9000}); -my $attachment = Bugzilla::Attachment->create( - { bug => $bug, - creation_ts => $timestamp, - data => $data, - filesize => length $data, - description => "rblink.txt", - filename => "rblink.txt", - isprivate => 1, ispatch => 0, - mimetype => 'text/x-review-board-request'}); +my $attachment = Bugzilla::Attachment->create({ + bug => $bug, + creation_ts => $timestamp, + data => $data, + attach_size => length($data), + description => "rblink.txt", + filename => "rblink.txt", + isprivate => 1, + ispatch => 0, + mimetype => 'text/x-review-board-request' +}); diag "".$attachment->id; $dbh->bz_commit_transaction; diff --git a/scripts/sanitizeme.pl b/scripts/sanitizeme.pl index af0c167bf..6d4119905 100755 --- a/scripts/sanitizeme.pl +++ b/scripts/sanitizeme.pl @@ -210,8 +210,9 @@ sub delete_sensitive_user_data { sub delete_attachment_data { # Delete unnecessary attachment data. - print "Removing attachment data to preserve disk space...\n"; + print "Removing attachment data...\n"; $dbh->do("UPDATE attach_data SET thedata = ''"); + $dbh->do("UPDATE attachments SET attach_size = 0"); } sub delete_bug_user_last_visit { -- cgit v1.2.3-24-g4f1b