summaryrefslogtreecommitdiffstats
path: root/application/migrations/014_deduplicate_file_storage.php
blob: 96c041c27fec46dacf87507edb06a78d013b6b81 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
<?php
defined('BASEPATH') OR exit('No direct script access allowed');

class Migration_deduplicate_file_storage extends CI_Migration {

	public function up()
	{
		$prefix = $this->db->dbprefix;

		if ($this->db->dbdriver == 'postgre') {
			$this->db->query('
				CREATE TABLE "'.$prefix.'file_storage" (
					"id" serial NOT NULL,
					"filesize" integer NOT NULL,
					"mimetype" varchar(255) NOT NULL,
					"hash" char(32) NOT NULL,
					"date" integer NOT NULL,
					PRIMARY KEY ("id"),
					UNIQUE ("id", "hash")
				);
			');
			$this->db->query('
				ALTER TABLE "'.$prefix.'files"
				ADD "file_storage_id" integer NULL;
				CREATE INDEX "'.$prefix.'files_file_storage_id_idx" ON "'.$prefix.'files" ("file_storage_id");
			');

			$this->db->query('
				INSERT INTO "'.$prefix.'file_storage" (filesize, mimetype, hash, date)
				SELECT filesize, mimetype, hash, date FROM "'.$prefix.'files";
			');

			$this->db->query('
				UPDATE "'.$prefix.'files" f
				SET file_storage_id = fs.id
				FROM "'.$prefix.'file_storage" fs
				WHERE fs.hash = f.hash
			');

			// remove file_storage rows that are not referenced by files.id
			// AND that are duplicates when grouped by hash
			$this->db->query('
				DELETE
				FROM "'.$prefix.'file_storage" fs
				USING "'.$prefix.'file_storage" fs2
				WHERE fs.hash = fs2.hash
				AND fs.id > fs2.id
				AND fs.id NOT IN (
					SELECT file_storage_id
					FROM "'.$prefix.'files" f
				);
			');
		} else {
			$this->db->query('
				CREATE TABLE `'.$prefix.'file_storage` (
					`id` int(11) NOT NULL AUTO_INCREMENT,
					`filesize` int(11) NOT NULL,
					`mimetype` varchar(255) NOT NULL,
					`hash` char(32) NOT NULL,
					`date` int(11) NOT NULL,
					PRIMARY KEY (`id`),
					UNIQUE KEY `data_id` (`id`, `hash`)
				) ENGINE=InnoDB DEFAULT CHARSET=utf8;
			');
			$this->db->query('
				ALTER TABLE `'.$prefix.'files`
				ADD `file_storage_id` INT NOT NULL,
				ADD INDEX (`file_storage_id`);
			');

			$this->db->query('
				INSERT INTO `'.$prefix.'file_storage` (id, filesize, mimetype, hash, date)
				SELECT NULL, filesize, mimetype, hash, date FROM `'.$prefix.'files`;
			');

			$this->db->query('
				UPDATE `'.$prefix.'files` f
				JOIN `'.$prefix.'file_storage` fs ON fs.hash = f.hash
				SET f.file_storage_id = fs.id
			');

			// XXX: This query also exists in migration 15
			$this->db->query('
				DELETE `'.$prefix.'file_storage`
				FROM `'.$prefix.'file_storage`
				LEFT OUTER JOIN `'.$prefix.'files` ON `'.$prefix.'files`.file_storage_id = `'.$prefix.'file_storage`.id
				WHERE `'.$prefix.'file_storage`.id NOT IN (
					SELECT min(x.id)
					FROM (
						SELECT fs.id, fs.hash
						FROM `'.$prefix.'file_storage` fs
					) x
					GROUP BY x.hash
				)
				AND `'.$prefix.'files`.id IS NULL
				');
		}

		$chunk = 500;
		$total = $this->db->count_all("file_storage");

		for ($limit = 0; $limit < $total; $limit += $chunk) {
			$query = $this->db->select('hash, id')
				->from('file_storage')
				->limit($chunk, $limit)
				->get()->result_array();

			foreach ($query as $key => $item) {
				$old = $this->mfile->file($item["hash"]);
				$data_id = $item["hash"].'-'.$item["id"];
				$new = $this->mfile->file($data_id);
				if (file_exists($old)) {
					rename($old, $new);
				} else {
					echo "Warning: no file found for $data_id. Skipping...\n";
				}
			}
		}

		if ($this->db->dbdriver == 'postgre') {
			$this->db->query('
				ALTER TABLE "'.$prefix.'files"
				ADD FOREIGN KEY ("file_storage_id") REFERENCES "'.$prefix.'file_storage"("id") ON DELETE CASCADE ON UPDATE CASCADE;
			');
		} else {
			$this->db->query('
				ALTER TABLE `'.$prefix.'files`
				ADD FOREIGN KEY (`file_storage_id`) REFERENCES `'.$prefix.'file_storage`(`id`) ON DELETE CASCADE ON UPDATE CASCADE;
			');
		}

		$this->dbforge->drop_column("files", "hash");
		$this->dbforge->drop_column("files", "mimetype");
		$this->dbforge->drop_column("files", "filesize");
	}

	public function down()
	{
		throw new \exceptions\ApiException("migration/downgrade-not-supported", "downgrade not supported");
	}
}