summaryrefslogtreecommitdiffstats
path: root/packages/utils.py
blob: 0f47f17021c13beeb4965ad06da5721dfcf1b439 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
from collections import defaultdict
from itertools import chain
from operator import attrgetter, itemgetter
import re

from django.core.serializers.json import DjangoJSONEncoder
from django.db import connection
from django.db.models import Count, Max, F
from django.db.models.query import QuerySet
from django.contrib.auth.models import User

from main.models import Package, PackageFile, Arch, Repo
from main.utils import (database_vendor,
        groupby_preserve_order, PackageStandin)
from .models import (PackageGroup, PackageRelation,
        License, Depend, Conflict, Provision, Replacement,
        SignoffSpecification, Signoff, fake_signoff_spec)


VERSION_RE = re.compile(r'^((\d+):)?(.+)-([^-]+)$')


def parse_version(version):
    match = VERSION_RE.match(version)
    if not match:
        return None, None, 0
    ver = match.group(3)
    rel = match.group(4)
    if match.group(2):
        epoch = int(match.group(2))
    else:
        epoch = 0
    return ver, rel, epoch


def get_group_info(include_arches=None):
    raw_groups = PackageGroup.objects.values_list(
            'name', 'pkg__arch__name').order_by('name').annotate(
            cnt=Count('pkg'), last_update=Max('pkg__last_update'))
    # now for post_processing. we need to seperate things out and add
    # the count in for 'any' to all of the other architectures.
    group_mapping = {}
    for grp in raw_groups:
        arch_groups = group_mapping.setdefault(grp[1], {})
        arch_groups[grp[0]] = {'name': grp[0], 'arch': grp[1],
                'count': grp[2], 'last_update': grp[3]}

    # we want to promote the count of 'any' packages in groups to the
    # other architectures, and also add any 'any'-only groups
    if 'any' in group_mapping:
        any_groups = group_mapping['any']
        del group_mapping['any']
        for arch, arch_groups in group_mapping.iteritems():
            for grp in any_groups.itervalues():
                if grp['name'] in arch_groups:
                    found = arch_groups[grp['name']]
                    found['count'] += grp['count']
                    if grp['last_update'] > found['last_update']:
                        found['last_update'] = grp['last_update']
                else:
                    new_g = grp.copy()
                    # override the arch to not be 'any'
                    new_g['arch'] = arch
                    arch_groups[grp['name']] = new_g

    # now transform it back into a sorted list, including only the specified
    # architectures if we got a list
    groups = []
    for key, val in group_mapping.iteritems():
        if not include_arches or key in include_arches:
            groups.extend(val.itervalues())
    return sorted(groups, key=itemgetter('name', 'arch'))


def get_split_packages_info():
    '''Return info on split packages that do not have an actual package name
    matching the split pkgbase.'''
    pkgnames = Package.objects.values('pkgname')
    split_pkgs = Package.objects.exclude(pkgname=F('pkgbase')).exclude(
            pkgbase__in=pkgnames).values('pkgbase', 'repo', 'arch').annotate(
            last_update=Max('last_update')).order_by().distinct()
    all_arches = Arch.objects.in_bulk({s['arch'] for s in split_pkgs})
    all_repos = Repo.objects.in_bulk({s['repo'] for s in split_pkgs})
    for split in split_pkgs:
        split['arch'] = all_arches[split['arch']]
        split['repo'] = all_repos[split['repo']]
    return split_pkgs


class Difference(object):
    def __init__(self, pkgname, repo, pkg_a, pkg_b):
        self.pkgname = pkgname
        self.repo = repo
        self.pkg_a = pkg_a
        self.pkg_b = pkg_b

    def classes(self):
        '''A list of CSS classes that should be applied to this row in any
        generated HTML. Useful for sorting, filtering, etc. Contains whether
        this difference is in both architectures or the sole architecture it
        belongs to, as well as the repo name.'''
        css_classes = [self.repo.name.lower()]
        if self.pkg_a and self.pkg_b:
            css_classes.append('both')
        elif self.pkg_a:
            css_classes.append(self.pkg_a.arch.name)
        elif self.pkg_b:
            css_classes.append(self.pkg_b.arch.name)
        return ' '.join(css_classes)

    def __key(self):
        return (self.pkgname, hash(self.repo),
                hash(self.pkg_a), hash(self.pkg_b))

    def __eq__(self, other):
        return self.__key() == other.__key()

    def __hash__(self):
        return hash(self.__key())


def get_differences_info(arch_a, arch_b):
    # This is a monster. Join packages against itself, looking for packages in
    # our non-'any' architectures only, and not having a corresponding package
    # entry in the other table (or having one with a different pkgver). We will
    # then go and fetch all of these packages from the database and display
    # them later using normal ORM models.
    sql = """
SELECT p.id, q.id
    FROM packages p
    LEFT JOIN packages q
    ON (
        p.pkgname = q.pkgname
        AND p.repo_id = q.repo_id
        AND p.arch_id != q.arch_id
        AND p.id != q.id
    )
    WHERE p.arch_id IN (%s, %s)
    AND (
        q.id IS NULL
        OR p.pkgver != q.pkgver
        OR p.pkgrel != q.pkgrel
        OR p.epoch != q.epoch
    )
"""
    cursor = connection.cursor()
    cursor.execute(sql, [arch_a.id, arch_b.id])
    results = cursor.fetchall()
    # column A will always have a value, column B might be NULL
    to_fetch = {row[0] for row in results}
    # fetch all of the necessary packages
    pkgs = Package.objects.normal().in_bulk(to_fetch)
    # now build a set containing differences
    differences = set()
    for row in results:
        pkg_a = pkgs.get(row[0])
        pkg_b = pkgs.get(row[1])
        # We want arch_a to always appear first
        # pkg_a should never be None
        if pkg_a.arch == arch_a:
            item = Difference(pkg_a.pkgname, pkg_a.repo, pkg_a, pkg_b)
        else:
            # pkg_b can be None in this case, so be careful
            name = pkg_a.pkgname if pkg_a else pkg_b.pkgname
            repo = pkg_a.repo if pkg_a else pkg_b.repo
            item = Difference(name, repo, pkg_b, pkg_a)
        differences.add(item)

    # now sort our list by repository, package name
    key_func = attrgetter('repo.name', 'pkgname')
    differences = sorted(differences, key=key_func)
    return differences


def multilib_differences():
    # Query for checking multilib out of date-ness
    if database_vendor(Package) == 'sqlite':
        pkgname_sql = """
            CASE WHEN ml.pkgname LIKE %s
                THEN SUBSTR(ml.pkgname, 7)
            WHEN ml.pkgname LIKE %s
                THEN SUBSTR(ml.pkgname, 1, LENGTH(ml.pkgname) - 9)
            ELSE
                ml.pkgname
            END
        """
    else:
        pkgname_sql = """
            CASE WHEN ml.pkgname LIKE %s
                THEN SUBSTRING(ml.pkgname, 7)
            WHEN ml.pkgname LIKE %s
                THEN SUBSTRING(ml.pkgname FROM 1 FOR CHAR_LENGTH(ml.pkgname) - 9)
            ELSE
                ml.pkgname
            END
        """
    sql = """
SELECT ml.id, reg.id
    FROM packages ml
    JOIN packages reg
    ON (
        reg.pkgname = (""" + pkgname_sql + """)
        AND reg.pkgver != ml.pkgver
    )
    JOIN repos r ON reg.repo_id = r.id
    WHERE ml.repo_id = %s
    AND r.testing = %s
    AND r.staging = %s
    AND reg.arch_id = %s
    ORDER BY ml.last_update
    """
    multilib = Repo.objects.get(name__iexact='multilib')
    i686 = Arch.objects.get(name='i686')
    params = ['lib32-%', '%-multilib', multilib.id, False, False, i686.id]

    cursor = connection.cursor()
    cursor.execute(sql, params)
    results = cursor.fetchall()

    # fetch all of the necessary packages
    to_fetch = set(chain.from_iterable(results))
    pkgs = Package.objects.normal().in_bulk(to_fetch)

    return [(pkgs[ml], pkgs[reg]) for ml, reg in results]


def get_wrong_permissions():
    sql = """
SELECT DISTINCT id
    FROM (
        SELECT pr.id, p.repo_id, pr.user_id
        FROM packages p
        JOIN packages_packagerelation pr ON p.pkgbase = pr.pkgbase
        WHERE pr.type = %s
    ) mp
    LEFT JOIN (
        SELECT user_id, repo_id FROM user_profiles_allowed_repos ar
        INNER JOIN user_profiles up ON ar.userprofile_id = up.id
    ) ur
    ON mp.user_id = ur.user_id AND mp.repo_id = ur.repo_id
    WHERE ur.user_id IS NULL;
"""
    cursor = connection.cursor()
    cursor.execute(sql, [PackageRelation.MAINTAINER])
    to_fetch = [row[0] for row in cursor.fetchall()]
    relations = PackageRelation.objects.select_related(
            'user', 'user__userprofile').filter(
            id__in=to_fetch)
    return relations


def attach_maintainers(packages):
    '''Given a queryset or something resembling it of package objects, find all
    the maintainers and attach them to the packages to prevent N+1 query
    cascading.'''
    if isinstance(packages, QuerySet):
        pkgbases = packages.values('pkgbase')
    else:
        packages = list(packages)
        pkgbases = {p.pkgbase for p in packages if p is not None}
    rels = PackageRelation.objects.filter(type=PackageRelation.MAINTAINER,
            pkgbase__in=pkgbases).values_list(
            'pkgbase', 'user_id').order_by().distinct()

    # get all the user objects we will need
    user_ids = {rel[1] for rel in rels}
    users = User.objects.in_bulk(user_ids)

    # now build a pkgbase -> [maintainers...] map
    maintainers = defaultdict(list)
    for rel in rels:
        maintainers[rel[0]].append(users[rel[1]])

    annotated = []
    # and finally, attach the maintainer lists on the original packages
    for package in packages:
        if package is None:
            continue
        package.maintainers = maintainers[package.pkgbase]
        annotated.append(package)

    return annotated


def approved_by_signoffs(signoffs, spec):
    if signoffs:
        good_signoffs = sum(1 for s in signoffs if not s.revoked)
        return good_signoffs >= spec.required
    return False


class PackageSignoffGroup(object):
    '''Encompasses all packages in testing with the same pkgbase.'''
    def __init__(self, packages):
        if len(packages) == 0:
            raise Exception
        self.packages = packages
        self.user = None
        self.target_repo = None
        self.signoffs = set()
        self.default_spec = True

        first = packages[0]
        self.pkgbase = first.pkgbase
        self.arch = first.arch
        self.repo = first.repo
        self.version = ''
        self.last_update = first.last_update
        self.packager = first.packager
        self.maintainers = first.maintainers
        self.specification = fake_signoff_spec(first.arch)

        version = first.full_version
        if all(version == pkg.full_version for pkg in packages):
            self.version = version

    @property
    def package(self):
        '''Try and return a relevant single package object representing this
        group. Start by seeing if there is only one package, then look for the
        matching package by name, finally falling back to a standin package
        object.'''
        if len(self.packages) == 1:
            return self.packages[0]

        same_pkgs = [p for p in self.packages if p.pkgname == p.pkgbase]
        if same_pkgs:
            return same_pkgs[0]

        return PackageStandin(self.packages[0])

    def find_signoffs(self, all_signoffs):
        '''Look through a list of Signoff objects for ones matching this
        particular group and store them on the object.'''
        for s in all_signoffs:
            if s.pkgbase != self.pkgbase:
                continue
            if self.version and not s.full_version == self.version:
                continue
            if s.arch_id == self.arch.id and s.repo_id == self.repo.id:
                self.signoffs.add(s)

    def find_specification(self, specifications):
        for spec in specifications:
            if spec.pkgbase != self.pkgbase:
                continue
            if self.version and not spec.full_version == self.version:
                continue
            if spec.arch_id == self.arch.id and spec.repo_id == self.repo.id:
                self.specification = spec
                self.default_spec = False
                return

    def approved(self):
        return approved_by_signoffs(self.signoffs, self.specification)

    @property
    def completed(self):
        return sum(1 for s in self.signoffs if not s.revoked)

    @property
    def required(self):
        return self.specification.required

    def user_signed_off(self, user=None):
        '''Did a given user signoff on this package? user can be passed as an
        argument, or attached to the group object itself so this can be called
        from a template.'''
        if user is None:
            user = self.user
        return user in (s.user for s in self.signoffs if not s.revoked)

    def __unicode__(self):
        return u'%s-%s (%s): %d' % (
                self.pkgbase, self.version, self.arch, len(self.signoffs))


def signoffs_id_query(model, repos):
    sql = """
SELECT DISTINCT s.id
    FROM %s s
    JOIN packages p ON (
        s.pkgbase = p.pkgbase
        AND s.pkgver = p.pkgver
        AND s.pkgrel = p.pkgrel
        AND s.epoch = p.epoch
        AND s.arch_id = p.arch_id
        AND s.repo_id = p.repo_id
    )
    WHERE p.repo_id IN (%s)
    AND s.repo_id IN (%s)
    """
    cursor = connection.cursor()
    # query pre-process- fill in table name and placeholders for IN
    repo_sql = ','.join(['%s' for _ in repos])
    sql = sql % (model._meta.db_table, repo_sql, repo_sql)
    repo_ids = [r.pk for r in repos]
    # repo_ids are needed twice, so double the array
    cursor.execute(sql, repo_ids * 2)

    results = cursor.fetchall()
    return [row[0] for row in results]


def get_current_signoffs(repos):
    '''Returns a list of signoff objects for the given repos.'''
    to_fetch = signoffs_id_query(Signoff, repos)
    return Signoff.objects.select_related('user').in_bulk(to_fetch).values()


def get_current_specifications(repos):
    '''Returns a list of signoff specification objects for the given repos.'''
    to_fetch = signoffs_id_query(SignoffSpecification, repos)
    return SignoffSpecification.objects.select_related('arch').in_bulk(
            to_fetch).values()


def get_target_repo_map(repos):
    sql = """
SELECT DISTINCT p1.pkgbase, r.name
    FROM packages p1
    JOIN repos r ON p1.repo_id = r.id
    JOIN packages p2 ON p1.pkgbase = p2.pkgbase
    WHERE r.staging = %s
    AND r.testing = %s
    AND p2.repo_id IN (
    """
    sql += ','.join(['%s' for _ in repos])
    sql += ")"

    params = [False, False]
    params.extend(r.pk for r in repos)

    cursor = connection.cursor()
    cursor.execute(sql, params)
    return dict(cursor.fetchall())


def get_signoff_groups(repos=None, user=None):
    if repos is None:
        repos = Repo.objects.filter(testing=True)
    repo_ids = [r.pk for r in repos]

    test_pkgs = Package.objects.select_related(
            'arch', 'repo', 'packager').filter(repo__in=repo_ids)
    packages = test_pkgs.order_by('pkgname')
    packages = attach_maintainers(packages)

    # Filter by user if asked to do so
    if user is not None:
        packages = [p for p in packages if user == p.packager
                or user in p.maintainers]

    # Collect all pkgbase values in testing repos
    pkgtorepo = get_target_repo_map(repos)

    # Collect all possible signoffs and specifications for these packages
    signoffs = get_current_signoffs(repos)
    specs = get_current_specifications(repos)

    same_pkgbase_key = lambda x: (x.repo.name, x.arch.name, x.pkgbase)
    grouped = groupby_preserve_order(packages, same_pkgbase_key)
    signoff_groups = []
    for group in grouped:
        signoff_group = PackageSignoffGroup(group)
        signoff_group.target_repo = pkgtorepo.get(signoff_group.pkgbase,
                "Unknown")
        signoff_group.find_signoffs(signoffs)
        signoff_group.find_specification(specs)
        signoff_groups.append(signoff_group)

    return signoff_groups


class PackageJSONEncoder(DjangoJSONEncoder):
    pkg_attributes = ['pkgname', 'pkgbase', 'repo', 'arch', 'pkgver',
            'pkgrel', 'epoch', 'pkgdesc', 'url', 'filename', 'compressed_size',
            'installed_size', 'build_date', 'last_update', 'flag_date',
            'maintainers', 'packager']
    pkg_list_attributes = ['groups', 'licenses', 'conflicts',
            'provides', 'replaces', 'depends']

    def default(self, obj):
        if hasattr(obj, '__iter__'):
            # mainly for queryset serialization
            return list(obj)
        if isinstance(obj, Package):
            data = {attr: getattr(obj, attr) for attr in self.pkg_attributes}
            for attr in self.pkg_list_attributes:
                data[attr] = getattr(obj, attr).all()
            return data
        if isinstance(obj, PackageFile):
            filename = obj.filename or ''
            return obj.directory + filename
        if isinstance(obj, (Repo, Arch)):
            return obj.name.lower()
        if isinstance(obj, (PackageGroup, License)):
            return obj.name
        if isinstance(obj, (Depend, Conflict, Provision, Replacement)):
            return unicode(obj)
        elif isinstance(obj, User):
            return obj.username
        return super(PackageJSONEncoder, self).default(obj)

# vim: set ts=4 sw=4 et: