diff options
author | Dylan William Hardison <dylan@hardison.net> | 2018-02-13 16:17:20 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-13 16:17:20 +0100 |
commit | 95e5e0588df0057fbefd3a05e6203c7f8e92ab09 (patch) | |
tree | 9e659a6608402a73ebdb49ca06c289af9c349e6e /contrib/secbugstats | |
parent | 2b916fccae0df60b350369c6fc827c1c9ce1030e (diff) | |
download | bugzilla-95e5e0588df0057fbefd3a05e6203c7f8e92ab09.tar.gz bugzilla-95e5e0588df0057fbefd3a05e6203c7f8e92ab09.tar.xz |
Bug 1343248 - Migrate secbugstats scripts to bmo production
Diffstat (limited to 'contrib/secbugstats')
-rw-r--r-- | contrib/secbugstats/README.md | 26 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/bugdata.py | 144 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/buglife.py | 54 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/curlbug.py | 78 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/fillbugtable.py | 41 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/gather.sh | 58 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/morestats.py | 106 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/repairtables.sh | 12 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/sendstats.py | 307 | ||||
-rw-r--r-- | contrib/secbugstats/bin/settings.py | 119 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/teamgraph.py | 151 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/teamstats.py | 69 | ||||
-rwxr-xr-x | contrib/secbugstats/bin/updatechanged.py | 116 | ||||
-rw-r--r-- | contrib/secbugstats/logging.ini | 21 |
14 files changed, 1302 insertions, 0 deletions
diff --git a/contrib/secbugstats/README.md b/contrib/secbugstats/README.md new file mode 100644 index 000000000..29d347489 --- /dev/null +++ b/contrib/secbugstats/README.md @@ -0,0 +1,26 @@ +# Mozilla Security Bug Stats - Collection and Reporting + +## Summary + +A collection of scripts and CGIs that collects information on Mozilla Security +Bugs, creates web-based charts of the data, and generates a weekly summary +report that is emailed to interested parties. + +## Details + +A good starting place to read this code is the gather.sh shell script. This +script is the driver for the remaining collection and processing scripts which +populate the reports and charts. + +- Collection scripts are run once per week as a cron job +- API results +- logging +- Flash charts +- Flot charts + + +## Required Packages: + +* python +* python-mysqldb +* python-matplotlib diff --git a/contrib/secbugstats/bin/bugdata.py b/contrib/secbugstats/bin/bugdata.py new file mode 100755 index 000000000..df526fd2b --- /dev/null +++ b/contrib/secbugstats/bin/bugdata.py @@ -0,0 +1,144 @@ +#!/usr/bin/python +# +# Pull some data on an individual bug, e.g. creation_time, resolution, etc. +# from the Bugzilla API +# Usage: ./bugdata <bug_id> + +import sys, string, time, re, sys, os, MySQLdb +import simplejson as json +from urllib2 import urlopen +from settings import * + +if "--debug" in sys.argv: + DEBUG = True +else: + DEBUG = False + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +cur = db.cursor() + +# list of bug fields we care about +my_bug_fields = ["creation_time"] + +# list of history fields we care about +my_history_fields = ["keywords","resolution"] + +def bugurl(bug): + return "https://api-dev.bugzilla.mozilla.org/latest/bug/%d?%s" % (bug, BZ_AUTH) + +def histurl(bug): + return "https://api-dev.bugzilla.mozilla.org/latest/bug/%s/history?%s" % (bug, BZ_AUTH) + +# convert API timestamp a MySQL datetime +# d fmt: '2010-06-22T14:56:08Z' +def convertDate(d): + old = time.strptime(d, "%Y-%m-%dT%H:%M:%SZ") + return time.strftime("%Y-%m-%d %H:%M:%S", old) + +# parse the severity from the keywords text +# e.g. [sg:high][OOPP] +def getSeverity(s): + if DEBUG: print "getSeverity: ", s + for keyword in s: + if re.match("^sec-", keyword): + sevmatch = re.search("(?<=sec-)[\w]+", keyword) + if DEBUG: print "--> "+sevmatch.group(0) + return sevmatch.group(0) + if DEBUG: print "--> <none>" + return "" + +# get the bug number to process +try: + BUGID = int(sys.argv[1]) +except: + print "Usage: "+sys.argv[0]+" <bug_id>" + sys.exit() + +# get fields from bug table +if DEBUG: print "Fetching %s" % (bugurl(BUGID)) +resp = urlopen( bugurl(BUGID) ) +bugobj = json.loads( resp.read() ) +# bugobj.keys() == ['cf_blocking_193', 'cf_blocking_192', 'cf_blocking_191', +# 'attachments', 'classification', 'cc', 'depends_on', +# 'creation_time', 'is_reporter_accessible', 'keywords', +# 'summary', 'id', 'cf_status_192', 'severity', 'platform', +# 'priority', 'cf_status_193', 'cf_status_191', 'version', +# 'ref', 'status', 'product', 'blocks', 'qa_contact', +# 'reporter', 'is_everconfirmed', 'component', 'groups', +# 'target_milestone', 'is_cc_accessible', 'whiteboard', +# 'last_change_time', 'token', 'flags', 'assigned_to', +# 'resolution', 'op_sys', 'cf_blocking_fennec'] +opendate = convertDate(bugobj["creation_time"]) +summary = bugobj["summary"] +# last severity rating in Bugs table (could be blank) +severity = getSeverity(bugobj["keywords"]) + +# get fields from bug history +resp = urlopen( histurl(BUGID) ) +histobj = json.loads( resp.read() ) + +history = histobj["history"] +# last change to Bugs table +sql = "SELECT updated from secbugs_Bugs where bugid=%s;" % (BUGID) +if DEBUG: print sql +cur.execute(sql) +row = cur.fetchone() +if row: + updated = str(row[0]) +else: + updated = opendate + +# date bug was resolved +closedate = "" + +# history is organized in groups of changes +for group in history: + # group.keys() == ['changes', 'changer', 'change_time'] + # store change time + change_time = convertDate(group["change_time"]) + for change in group["changes"]: + # change ex. {'removed': 'unspecified', 'field_name': 'version', + # 'added': 'Trunk'} + # skip changes we don't care about + if change["field_name"] not in my_history_fields: + continue + + # Look for resolution time + # e.g. resolution - old: '', new: 'FIXED' + if change["field_name"] == "resolution": + if len(change["added"]): + closedate = change_time + # bug was reopened + else: + closedate = "" + + # NOTE: for items that will change one of the Bugs fields, + # make sure to check if change_time > secbugs_Bugs.updated and if so + # update that field with the change time. Right now, only + # keywords is doing so... + + # Use most recent sec- keywords marking to determine severity + elif change["field_name"] == "keywords": + # keep track of last update to Bugs table + # e.g. last severity assigned + #if DEBUG: print "change_time: %s, updated: %s" % (str(change_time), updated) + if change_time > updated: + updated = str(change_time) + severity = getSeverity([change["added"]]) + + # default case: log the change to a field we care about + else: + sql = "INSERT INTO secbugs_BugHistory VALUES (%s, '%s', '%s', '%s', '%s');" % (BUGID, change_time, db.escape_string(change["field_name"]), db.escape_string(change["added"]), db.escape_string(change["removed"])) + if DEBUG: print sql + else: cur.execute(sql) + +# check if our bug has a closedate. If not, we may need to set all open bugs' +# "closedate" with the current date so they align right on the chart (maybe not) +if not len(closedate): + closedate = "0000-00-00 00:00:00" + +sql = "INSERT INTO secbugs_Bugs VALUES (%s, '%s', '%s', '%s', '%s', '%s') ON DUPLICATE KEY UPDATE opendate='%s', closedate='%s', severity='%s', summary='%s', updated='%s';" % (BUGID, opendate, closedate, db.escape_string(severity), db.escape_string(summary), updated, opendate, closedate, db.escape_string(severity), db.escape_string(summary), updated) +if DEBUG: print sql +else: cur.execute(sql) diff --git a/contrib/secbugstats/bin/buglife.py b/contrib/secbugstats/bin/buglife.py new file mode 100755 index 000000000..8610652e4 --- /dev/null +++ b/contrib/secbugstats/bin/buglife.py @@ -0,0 +1,54 @@ +#!/usr/bin/python +# Generate a JSON feed of all the security bug lifespans +# The output is used as the data source for charts/buglife + +import sys, MySQLdb, cgi +from string import join, capitalize, split +from time import mktime, strftime +from datetime import datetime +from settings import * + +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +cur = db.cursor() + +# Bugs sample +# bugid opendate closedate severity summary updated +# 286382 2005-03-16 09:41:00 0000-00-00 00:00:00 critical [Windows] Many insecure uses of LoadLibrary (filename without path) 2010-06-01 20:55:22 +# 552002 2010-03-12 18:57:00 0000-00-00 00:00:00 critical Crash in [@ nsDOMEvent::QueryInterface(nsID const&, void**) ] 2010-06-01 20:38:05 + +def cleanCat(cat): + return cat.capitalize() + +# return a UNIX timestamp +# if the date passed in is None then return a timestamp for today rounded +# to the top of the current hour +def formatDate(d): + if d is None: + d = datetime(int(strftime("%Y")), int(strftime("%m")), int(strftime("%d")), + int(strftime("%H")), 0) + return mktime(d.timetuple()) + +# bug severities to include +severities = ["critical", "high", "moderate", "low"] + +# display JSON +print "{" + +for sev in severities: + print " '%s': [" % (cleanCat(sev)) + sql = "SELECT * from secbugs_Bugs WHERE severity='%s' order by opendate;" % (sev) + cur.execute(sql) + row = cur.fetchone() + while row is not None: + # row e.g. (572428L, datetime.datetime(2010, 6, 16, 16, 6), None, 'critical', 'Crash [@ js_CallGCMarker]', datetime.datetime(2010, 6, 22, 9, 28, 59)) + print " { 'bugid': %s, 'opendate': %s, 'closedate': %s, 'summary': '%s' }," % \ + (str(row[0]), formatDate(row[1]), formatDate(row[2]), cgi.escape(db.escape_string(row[4]))) + row = cur.fetchone() + print " ]," + +print "}" diff --git a/contrib/secbugstats/bin/curlbug.py b/contrib/secbugstats/bin/curlbug.py new file mode 100755 index 000000000..ec4ce8d1d --- /dev/null +++ b/contrib/secbugstats/bin/curlbug.py @@ -0,0 +1,78 @@ +#!/usr/bin/python +# Bugzilla API script that queries for the number of open bugs by category, e.g. +# Critical, High, Moderate, Low, as well as some additional tracking categories. +# Saves the JSON results on the filesystem for further processing + +import httplib, urllib, urllib2, cookielib, string, time, re, sys, os, MySQLdb, \ + simplejson +from base64 import b64decode +from settings import * +import logging +logger = logging.getLogger() + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +c = db.cursor() + +if "--debug" in sys.argv: + # store the json files in /tmp and don't run SQL + DEBUG = True + JSONLOCATION = "/tmp" +else: + DEBUG = False + +opener = urllib2.build_opener(urllib2.HTTPCookieProcessor()) + +def fetchBugzillaPage(path): + url = "https://api-dev.bugzilla.mozilla.org/latest/bug?%s&%s" % (path, BZ_AUTH) + if DEBUG: print url + return opener.open(url).read() + +# Queries to run: +# Keys are the category of bugs and values are the query params to send to the +# Bugzilla API. +tocheck = {"sg_critical" : "keywords=sec-critical;keywords_type=allwords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_high" : "keywords=sec-high;keywords_type=allwords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_moderate" : "keywords=sec-moderate;keywords_type=allwords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_low" : "keywords=sec-low;keywords_type=allwords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_total" : "keywords=sec-critical%20sec-high%20sec-moderate%20sec-low;keywords_type=anywords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_unconfirmed" : "bug_status=UNCONFIRMED;field0-0-0=bug_group;type0-0-1=substring;field0-0-1=status_whiteboard;value0-0-2=sec-;classification=Client%20Software;classification=Components;status_whiteboard_type=notregexp;status_whiteboard=sg%3Aneedinfo;field0-0-2=keywords;value0-0-1=[sg%3A;type0-0-0=equals;value0-0-0=core-security;type0-0-2=substring", + "sg_needstriage" : "type0-1-0=notsubstring;field0-1-0=keywords;field0-0-0=bug_group;status_whiteboard_type=notregexp;value0-1-0=sec-;status_whiteboard=\[sg%3A;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;type0-0-0=equals;value0-0-0=core-security", + "sg_investigate" : "status_whiteboard=[sg%3Ainvestigat;status_whiteboard_type=allwordssubstr;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED", + "sg_vector" : "keywords=sec-vector;keywords_type=allwords;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;classification=Client%20Software;classification=Components", + "sg_needinfo" : "status_whiteboard=[sg%3Aneedinfo;status_whiteboard_type=allwordssubstr;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED", + "sg_untouched" : "keywords=sec-critical%20sec-high%20sec-moderate%20sec-low;keywords_type=anywords;field0-0-0=days_elapsed;classification=Client%20Software;classification=Components;bug_status=UNCONFIRMED;bug_status=NEW;bug_status=ASSIGNED;bug_status=REOPENED;type0-0-0=greaterthan;value0-0-0=14", + "sg_opened" : "field0-0-0=bug_group;type0-0-1=substring;field0-0-1=status_whiteboard;value0-0-2=sec-;chfield=[Bug%20creation];chfieldfrom=-1w;field0-0-2=keywords;value0-0-1=[sg%3A;type0-0-0=equals;value0-0-0=core-security;type0-0-2=substring;classification=Client%20Software;classification=Components", + "sg_closed" : "type0-1-0=notsubstring;field0-1-0=keywords;field0-0-0=bug_group;type0-0-1=substring;field0-0-1=status_whiteboard;classification=Client%20Software;classification=Components;value0-0-2=sec-;chfield=resolution;chfieldfrom=-1w;value0-1-0=sec-review;bug_status=RESOLVED;bug_status=VERIFIED;bug_status=CLOSED;field0-0-2=keywords;value0-0-1=[sg%3A;type0-0-0=equals;value0-0-0=core-security;type0-0-2=substring", + } + +now = time.localtime() +timestamp_file = time.strftime('%Y%m%d%H%M', now) +timestamp_db = time.strftime('%Y-%m-%d %H:%M', now) + +# Store the results for further processing (e.g. how many bugs per +# Product/Component?) but first save the number of results for the +# high-level stats. +for key, url in tocheck.items(): + print "Fetching", key + # will retry Bugzilla queries if they fail + attempt = 1 + count = None + while count is None: + if attempt > 1: + print "Retrying %s - attempt %d" % (key, attempt) + json = fetchBugzillaPage(url) + # save a copy of the bugzilla query + filename = timestamp_file+"_"+key+".json" + fp = open(JSONLOCATION+"/"+filename, "w") + fp.write(json) + fp.close() + # log the number of hits each query returned + results = simplejson.loads(json) + count = len(results["bugs"]) + attempt += 1 + sql = "INSERT INTO secbugs_Stats(category, count, date) VALUES('%s', %s, '%s');" % \ + (key, count, timestamp_db) + c.execute(sql) + logger.debug("sql: %s", sql) diff --git a/contrib/secbugstats/bin/fillbugtable.py b/contrib/secbugstats/bin/fillbugtable.py new file mode 100755 index 000000000..7f991a12d --- /dev/null +++ b/contrib/secbugstats/bin/fillbugtable.py @@ -0,0 +1,41 @@ +#!/usr/bin/python +# Fill the secbugs_Bugs table +# Should not be run regularly as it runs bugdata.py (API script) on every +# bug_id we have stored in the DB. +import sys, MySQLdb, os +from settings import * + +if "--debug" in sys.argv: + DEBUG = True +else: + DEBUG = False + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +cur = db.cursor() + +severities = ["sg_critical","sg_high","sg_moderate","sg_low"] + +for sev in severities: + sql = "SELECT d.bug_list FROM secbugs_Details d WHERE d.sid IN (SELECT s.sid FROM secbugs_Stats s WHERE category='%s' and date > '2008-07-01');" % (sev) + # complete list of bugs for this severity level + complete = [] + # print "#", sql + cur.execute(sql) + row = cur.fetchone() + while row is not None: + # row e.g. ('408736,430127',) + # print "# ", row + bugs = row[0].split(",") + for bug in bugs: + if len(bug): complete.append(bug) + row = cur.fetchone() + + unique = list(set(complete)) + print "Going to fetch data for %d %s bugs..." % (len(unique), sev[3:]) + + for bug in unique: + cmd = "%s/bugdata.py %s" % (SCRIPTS_DIR, bug) + print cmd + if not DEBUG: os.popen(cmd) diff --git a/contrib/secbugstats/bin/gather.sh b/contrib/secbugstats/bin/gather.sh new file mode 100755 index 000000000..678752762 --- /dev/null +++ b/contrib/secbugstats/bin/gather.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# This is the main "driver" script that calls the other collection and +# processing scripts in the correct order. This is currently run as a +# weekly cron job but could be run at any frequency. The output of this +# script should be redirected to a log file for debugging. + +# scripts location (where does this config file live?) +# local settings +eval "$(python contrib/secbugstats/bin/settings.py)" + +mkdir -p $JSON_CUR +mkdir -p $TEAMS_CHART_LOC +mkdir -p $BUGLIFE_CHART_LOC + +# Move last week's data files to the archive +mv $JSON_CUR/* $JSON_OLD/ + +# Fetch the high-level bug data, e.g. num critical, high, moderate, low +echo "[curlbug.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/curlbug.py +echo "[end curlbug.py]" + +# Process each JSON file that the preceding script produced to find out +# the product, component, bug numbers, bug ages, etc. for each category +echo "[morestats.py `date +%Y-%m-%d\ %T`]" +for i in `ls $JSON_CUR/*sg_{low,moderate,high,critical,needstriage,unconfirmed,opened,closed,total,untouched,investigate,needinfo,vector}.json`; +do +$SCRIPTS_DIR/morestats.py $i; +done +echo "[end morestats.py]" + +# Update our Bug tables for bugs which we're currently working on. These +# are the set of bugs that showed up in any of the queries for Critical, +# High, Moderate and Low. (We don't need to pull details for bugs that +# haven't changed.) +echo "[updatechanged.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/updatechanged.py +echo "[end updatechanged.py]" + +# Popluate the data feed for the Teams chart +echo "[teamstats.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/teamstats.py > $TEAMS_CHART_LOC/stats.txt +echo "[end teamstats.py]" + +# Popluate the data feed for the "Bug Lifetimes" chart +echo "[buglife.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/buglife.py > $BUGLIFE_CHART_LOC/stats.txt +echo "[end buglife.py]" + +# Draw the PNG chart that we embed in the email report +echo "[teamgraph.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/teamgraph.py +echo "[end graph.py]" + +# # Email the report +echo "[sendstats.py `date +%Y-%m-%d\ %T`]" +$SCRIPTS_DIR/sendstats.py +echo "[end sendstats.py]" diff --git a/contrib/secbugstats/bin/morestats.py b/contrib/secbugstats/bin/morestats.py new file mode 100755 index 000000000..a79f68896 --- /dev/null +++ b/contrib/secbugstats/bin/morestats.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# Process a JSON file yielded by curlbug.py to find out which specific bugs +# are open in each category, what product/component they are open in, and how +# old the various bugs are. +import simplejson, sys, MySQLdb, re +from time import mktime, localtime, strptime +from settings import * + +def median(l): + l.sort() + if len(l) < 1: return 0 + elif len(l)%2 == 1: + return l[len(l)/2] + # even number of elements -> return avg of middle two + else: + return float((l[len(l)/2]+l[(len(l)/2)-1])/2) + +def average(l): + l.sort() + return float(sum(l)/len(l)) + +try: + INPUT_FILE = sys.argv[1] +except: + print "Usage: "+sys.argv[0]+" <input_file>" + sys.exit() + +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +c = db.cursor() + +# foreign key to relate these details to the higher-level stat bucket +# enforce filename format, e.g. 200808241200_sg_critical.json +FILE_NAME = INPUT_FILE.split("/")[-1] +p = re.compile("[0-9]{12}[a-z_]+\.json") +if not re.match(p, FILE_NAME): + print sys.argv[0]+": Unexpected filename format: %s. Exiting." % (FILE_NAME) + sys.exit() + +category = FILE_NAME[13:-5] +date = "%s-%s-%s %s:%s:00" % (FILE_NAME[0:4],FILE_NAME[4:6],FILE_NAME[6:8], + FILE_NAME[8:10],FILE_NAME[10:12]) + +# bail out if we weren't able to determine which stat bucket to associate with +sql = "SELECT sid FROM secbugs_Stats WHERE category='%s' AND date='%s';" % (category,date) +if DEBUG: print sql +c.execute(sql) +row = c.fetchone() +if not row: + print "%s: unable to determine Stat group for category: %s, date: %s. Exiting." % \ + (sys.argv[0], category, date) + sys.exit() +SID = row[0] + +# storage for our stat details, we'll key on (product, component) +details = {} + +json = open(INPUT_FILE, "rb").read() +buglist = simplejson.loads(json) +for bug in buglist["bugs"]: + # reset field values + product = component = bug_id = bug_age = "" + # gather field values from the JSON object + product = bug["product"] + component = bug["component"] + bug_id = bug["id"] + # bug age in days + try: + bug_age = (mktime(localtime())-mktime(strptime(bug["creation_time"], "%Y-%m-%dT%H:%M:%SZ")))/(60*60*24) + except Exception, e: + print "Exception trying to get bug age:", e + bug_age = 0 + # bail if we don't have other values set + if not product or not component or not bug_id: continue + # DEBUG + if DEBUG: + print "Processing bug_id: %s, bug_age: %s" % (bug_id, bug_age) + # create a new list of bugs based on (product, component) and also + # keep a list of bug ages to generate mean and median age + if (product, component) not in details.keys(): + details[(product, component)] = [[bug_id], [bug_age]] + # add to the existing list of bugs and total age of those bugs + else: + details[(product, component)][0].append(bug_id) + details[(product, component)][1].append(bug_age) + # if DEBUG: print " bug ages for group: %s" % (details[(product, component)][1]) + +# store the details we gathered +for pc in details.keys(): + # print pc, len(details[pc]), details[pc] + # see if we are inserting new details or if we are updating existing details + sql = "SELECT did FROM secbugs_Details where product='%s' AND component='%s' AND sid=%s AND date='%s';" % (pc[0], pc[1], SID, date) + c.execute(sql) + row = c.fetchone() + # update row + if row: + sql = "UPDATE secbugs_Details SET sid=%s, product='%s', component='%s', count=%s, bug_list='%s', date='%s', avg_age_days=%s, med_age_days=%s WHERE did=%s;" % (SID, pc[0], pc[1], len(details[pc][0]), ",".join([str(i) for i in details[pc][0]]), date, int(round(average(details[pc][1]))), int(round(median(details[pc][1]))), row[0]) + # insert new row + else: + sql = "INSERT INTO secbugs_Details(sid, product, component, count, bug_list, date, avg_age_days, med_age_days) VALUES(%s, '%s', '%s', %s, '%s', '%s', %s, %s);" % (SID, pc[0], pc[1], len(details[pc][0]), ",".join([str(i) for i in details[pc][0]]), date, int(round(average(details[pc][1]))), int(round(median(details[pc][1])))) + if DEBUG: print sql + else: c.execute(sql) diff --git a/contrib/secbugstats/bin/repairtables.sh b/contrib/secbugstats/bin/repairtables.sh new file mode 100755 index 000000000..147669dda --- /dev/null +++ b/contrib/secbugstats/bin/repairtables.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# You can manually run this script if you start getting MySQL errors about +# a table being corrupt. This will preserve the data in your tables. + +# scripts location (where does this config file live?) +SCRIPTS_DIR="$(dirname "$(readlink /proc/$$/fd/255)")" +source $SCRIPTS_DIR/settings.cfg + +echo "repair table BugHistory;" | mysql -h$DB_HOST $DB_NAME -u$DB_USER -p$DB_PASS +echo "repair table Bugs;" | mysql -h$DB_HOST $DB_NAME -u$DB_USER -p$DB_PASS +echo "repair table Details;" | mysql -h$DB_HOST $DB_NAME -u$DB_USER -p$DB_PASS +echo "repair table Stats;" | mysql -h$DB_HOST $DB_NAME -u$DB_USER -p$DB_PASS diff --git a/contrib/secbugstats/bin/sendstats.py b/contrib/secbugstats/bin/sendstats.py new file mode 100755 index 000000000..32d296533 --- /dev/null +++ b/contrib/secbugstats/bin/sendstats.py @@ -0,0 +1,307 @@ +#!/usr/bin/python +# Email a summary report of the bug statistics which also embeds the PNG chart +# generated by teamgraph.py +import sys, os, smtplib, time, MySQLdb, operator +from datetime import date, timedelta +from time import strftime +from string import join, split +from email.MIMEMultipart import MIMEMultipart +from email.MIMEText import MIMEText +from email.MIMEImage import MIMEImage +from settings import * + +# extra debug output +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +try: + db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) + db.autocommit(True) + c = db.cursor(MySQLdb.cursors.DictCursor) +except: + print "sendstats.py: can't connect to database\n" + sys.exit() + +# return the most recent date from our data set +def getThisWeekDate(slashed=False): + sql = "select distinct date from secbugs_Details order by date desc limit 1;" + c.execute(sql) + row = c.fetchone() + if slashed: + return row["date"].strftime("%m/%d/%y") + else: + return row["date"].strftime("%Y-%m-%d") + +# return the second-most recent date from our data set +def getLastWeekDate(slashed=False): + sql = "select distinct date from secbugs_Details order by date desc limit 1,1;" + c.execute(sql) + row = c.fetchone() + if slashed: + return row["date"].strftime("%m/%d/%y") + else: + return row["date"].strftime("%Y-%m-%d") + +# format a number so positive values have a leading "+" +def getDelta(n): + if n > 0: + return "+" + str(n) + else: + return n + +# given two lists of bug numbers, return the set that went away in the latter +def getCleared(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + cleared = [] + for bug in last: + if len(bug) and bug not in cur: cleared.append(bug) + return cleared + +# given two lists of bug numbers, return the set that showed up in the latter +def getAdded(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + added = [] + for bug in cur: + if len(bug) and bug not in last: added.append(bug) + return added + +# simple object to store and sort data from multiple queries +class DataRow(): + def __init__(self): + self.numCritical = 0 + self.critList = "" + self.numHigh = 0 + self.highList = "" + self.numModerate = 0 + self.modList = "" + self.numLow = 0 + self.lowList = "" + self.total = 0 + self.product = "" + self.component = "" + def dump(self): + s = "Critical: "+str(self.numCritical)+", " + s += "High: "+str(self.numHigh)+", " + s += "Moderate: "+str(self.numModerate)+", " + s += "Low: "+str(self.numLow)+", " + return s + +# start building email body +body = "<html>\n<body>\n<head>\n<style type=\"text/css\">.number{text-align:center;}table{margin:0 1em;border:1px solid black;border-collapse:collapse}th{text-align:left;padding:0 .5em}td{padding:0 .5em;border-top:1px solid #000;padding-top:.2em}td.total{border-top:3px double #000}th.header{font-weight:600}th.bright,td.bright{border-right:1px solid #000;font-weight:bold}.small{font-size:70%}.medium{font-size:80%}.gray{background-color:#eee}</style>\n</head>\n" +body += "<body>\n<p>Hello,</p><p>Here are some statistics regarding Mozilla Client Security Bugs for the week of %s:</p>\n<h2>Open Bugs By Severity</h2>\n<table>\n<tr><th>Category</th><th>Current</th><th>Last</th><th>Delta</th></tr>\n" % (getThisWeekDate(True)) + +# keep track of totals for counts +curTotal = 0 +lastTotal = 0 +totCleared = [] +totAdded = [] +i = 0 + +# bug counts +for cat in [("sg_critical", "Critical"), ("sg_high", "High"), ("sg_moderate", "Moderate"), ("sg_low", "Low")]: + # get the stats from this week + sql = "select d.count, d.bug_list from secbugs_Details d, secbugs_Stats s where d.sid=s.sid and s.category='%s' and d.date like '%s%%';" % (cat[0], getThisWeekDate()) + c.execute(sql) + thisWkCount = 0 + thisWkList = "" + row = c.fetchone() + while row != None: + thisWkCount += row["count"] + thisWkList += row["bug_list"] if not len(thisWkList) else ","+row["bug_list"] + row = c.fetchone() + # get the stats from last week + sql = "select d.count, d.bug_list from secbugs_Details d, secbugs_Stats s where d.sid=s.sid and s.category='%s' and d.date like '%s%%';" % (cat[0], getLastWeekDate()) + c.execute(sql) + lastWkCount = 0 + lastWkList = "" + row = c.fetchone() + while row != None: + lastWkCount += row["count"] + lastWkList += row["bug_list"] if not len(lastWkList) else ","+row["bug_list"] + row = c.fetchone() + # find out which bugs were cleared and which were added in the past week + cleared = getCleared(thisWkList, lastWkList) + added = getAdded(thisWkList, lastWkList) + if i%2 == 0: body += "<tr class=\"gray\">\n" + else: body += "<tr>\n" + # current count + if thisWkCount > 0: + curLink = "<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a>" % (thisWkList, thisWkCount) + else: curLink = "0" + # previous count + if lastWkCount > 0: + prevLink = "<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a>" % (lastWkList, lastWkCount) + else: prevLink = "0" + body += " <td>%s</td>\n <td class=\"number\">%s</td>\n <td class=\"number\">%s</td>\n <td class=\"number\">%s</td>\n <td class=\"small\">" % (cat[1], curLink, prevLink, getDelta(thisWkCount-lastWkCount)) + # links to lists of cleared and added bugs + if len(cleared) > 0: + body += "(<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">Cleared %s</a>, " % (join(cleared,","), len(cleared)) + else: body += "(Cleared 0, " + if len(added) > 0: + body += "<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">Added %s</a>)</td>\n</tr>\n" % (join(added,","), len(added)) + else: body += "Added 0)</td>\n</tr>\n" + curTotal += thisWkCount + lastTotal += lastWkCount + totCleared += cleared + totAdded += added + i += 1 # alternate row color + +body += "<tr class=\"gray\">\n <td class=\"total\"> </td>\n <td class=\"number total\">%s</td>\n <td class=\"number total\">%s</td>\n <td class=\"number total\">%s</td>\n <td class=\"total small\">" % (curTotal, lastTotal, getDelta(curTotal-lastTotal)) +if len(totCleared) > 0: + body += "(<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">Cleared %s</a>, " % (join(totCleared,","), len(totCleared)) +else: body += "(Cleared 0, " +if len(totAdded) > 0: + body += "<a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">Added %s</a>)</td>\n</tr>\n" % (join(totAdded,","), len(totAdded)) +else: body += "Added 0)</td></tr>\n" +body += "</table>\n" + +# show top teams with security bugs +i = 0 +curDate = getThisWeekDate() + +# Keep track of how many bugs of each severity each team has open +teamStats = {} + +# Risk scoring rubric: assigns point value to each severity of bug +weights = { "sg_critical": 5, + "sg_high": 4, + "sg_moderate": 2, + "sg_low": 1} + +# Figure out how many open bugs (and cummulative risk index) each team has +for team in TEAMS: + teamName = team[0] + sql = "SELECT secbugs_Stats.category, secbugs_Details.date, GROUP_CONCAT(secbugs_Details.bug_list) AS bugs FROM secbugs_Details INNER JOIN secbugs_Stats ON secbugs_Details.sid=secbugs_Stats.sid WHERE secbugs_Details.date LIKE '%s%%' AND secbugs_Stats.category IN ('sg_critical','sg_high','sg_moderate','sg_low') AND (%s) GROUP BY category;" % (curDate, team[1]) + c.execute(sql) + rows = c.fetchall() + + # print "** %s **" % team[0] + # Keep track of the list of each category of bugs for each team and the + # weighted risk index + teamStats[teamName] = {"sg_critical": "", "num_sg_critical": 0, + "sg_high": "", "num_sg_high": 0, + "sg_moderate": "", "num_sg_moderate": 0, + "sg_low": "", "num_sg_low": 0, + "score": 0} + for row in rows: + # print "%s: %d" % (row["category"], len(row["bugs"].split(","))) + # Store the list of bugs + teamStats[teamName][row["category"]] = row["bugs"] + count = len(row["bugs"].split(",")) if len(row["bugs"]) else 0 + teamStats[teamName]["num_" + row["category"]] += count + # Add points to score - critical: 5, high: 4, moderate: 2, low: 1 + points = weights[row["category"]] * count + teamStats[teamName]["score"] += points + +# calculate some totals +totalRisk = sum([t[1]["score"] for t in teamStats.items()]) +totalCritical = sum([t[1]["num_sg_critical"] for t in teamStats.items()]) +totalHigh = sum([t[1]["num_sg_high"] for t in teamStats.items()]) +totalModerate = sum([t[1]["num_sg_moderate"] for t in teamStats.items()]) +totalLow = sum([t[1]["num_sg_low"] for t in teamStats.items()]) + +# Sort the list of teams by their risk index +sortedTeams = sorted(teamStats.items(), key = lambda k: k[1]["score"], + reverse = True) + +# "Risk Index By Team" table +body += "<h2>Risk Index By Team</h2>\n<table>\n<tr><th>Rank</th><th>Team</th></th><th>Risk Index</th><th>Critical</th><th>High</th><th>Moderate</th><th>Low</th></tr>\n" +i = 0 + +# Add a table row for each team +for s in sortedTeams: + # ('GFX', {'score': 13, 'sg_critical': '', + # 'sg_low': '563740,566209,455573,322708,351800,103454,563838,304123', + # 'sg_moderate': '', 'sg_high': '575294'}) + if i%2 == 0: body += "<tr class=\"gray\">\n" + else: body += "<tr>\n" + # rank, team, score + body += " <td class=\"number\">%d</td><td>%s</td><td class=\"number\">%d</td>" % \ + (i+1, s[0], s[1]["score"]) + # critical + numCritical = len(s[1]["sg_critical"].split(",")) if len(s[1]["sg_critical"]) else 0 + if numCritical > 0: + body += "<td class=\"number\"><a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a></td>" % (s[1]["sg_critical"], numCritical) + else: + body += "<td class=\"number\">0</td>" + # high + numHigh = len(s[1]["sg_high"].split(",")) if len(s[1]["sg_high"]) else 0 + if numHigh > 0: + body += "<td class=\"number\"><a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a></td>" % (s[1]["sg_high"], numHigh) + else: + body += "<td class=\"number\">0</td>" + # moderate + numModerate = len(s[1]["sg_moderate"].split(",")) if len(s[1]["sg_moderate"]) else 0 + if numModerate > 0: + body += "<td class=\"number\"><a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a></td>" % (s[1]["sg_moderate"], numModerate) + else: + body += "<td class=\"number\">0</td>" + # low + numLow = len(s[1]["sg_low"].split(",")) if len(s[1]["sg_low"]) else 0 + if numLow > 0: + body += "<td class=\"number\"><a href=\"https://bugzilla.mozilla.org/buglist.cgi?bug_id=%s\">%s</a></td>" % (s[1]["sg_low"], numLow) + else: + body += "<td class=\"number\">0</td></tr>\n" + i += 1 + +# add totals to bottom +if i%2 == 0: body += "<tr class=\"gray\">\n" +else: body += "<tr>\n" +body += "<td class=\"total\"> </td><td class=\"total\"> </td><td class=\"total number\">%d</td><td class=\"total number\">%d</td><td class=\"total number\">%d</td><td class=\"total number\">%d</td><td class=\"total number\">%d</td></tr>\n" % (totalRisk, totalCritical, totalHigh, totalModerate, totalLow) + +# finish up Team rank table and scoring rubric +body += "</table>\n<br>\n" +body += """<table class="medium"> +<tr><th rowspan="2" class="bright">Scoring</th><td class="bright">Severity</td><td>Critical</td><td>High</td><td>Moderate</td><td>Low</td></tr> +<tr><td class="bright">Weight</td><td class="number">5</td><td class="number">4</td><td class="number">2</td><td class="number">1</td></tr> +</table> +<br>""" + +# embed chart of historic risk index +body += "<h2>Historic Risk Index by Team</h2><img src=\"cid:image1\"><br>\n" + +# finish up the email message +body += "</body>\n</html>" + +# set up multipart email message +msgRoot = MIMEMultipart("related") +msgRoot["Subject"] = "Weekly Client Security Bug secbugs_Stats Report" +msgRoot["From"] = EMAIL_FROM +msgRoot["To"] = ",".join(EMAIL_TO) +msgRoot.preamble = "This is a multi-part message in MIME format." + +# Plaintext body +msgAlternative = MIMEMultipart("alternative") +msgRoot.attach(msgAlternative) +msgText = MIMEText("Plain text alternative not supported.") +msgAlternative.attach(msgText) + +# HTML body +msgText = MIMEText(body, "html") +msgAlternative.attach(msgText) + +# image attachment +files = os.popen("find %s -name '*.png'" % JSONLOCATION) +img = files.readline().strip() +if img: + fp = open(img, "rb") + msgImage = MIMEImage(fp.read()) + fp.close() + msgImage.add_header("Content-ID", "<image1>") + msgRoot.attach(msgImage) + +# if console is chosen, print only to the console +if "--console" in sys.argv: + print "\n", msgRoot.as_string() +# print out only HTML body +elif "--html" in sys.argv: + print "\n", body +# send out the mail +else: + s = smtplib.SMTP(SMTP_HOST, SMTP_PORT) + # s = smtplib.SMTP_SSL(SMTP_HOST, SMTP_PORT) + # s.login(LDAP_USER, LDAP_PASS) + s.sendmail(EMAIL_FROM, EMAIL_TO, msgRoot.as_string()) diff --git a/contrib/secbugstats/bin/settings.py b/contrib/secbugstats/bin/settings.py new file mode 100644 index 000000000..b9b328629 --- /dev/null +++ b/contrib/secbugstats/bin/settings.py @@ -0,0 +1,119 @@ +# Configuration options for secbugstats tools +# This file needs to live next to all the other collection/processing scripts. +# There are also configuration settings in settings.cfg which I didn't combine +# here because Bash and Python read configs differently. + +import urllib, sys, pipes, os.path +try: + import simplejson as json +except ImportError as error: + if __name__ == '__main__': + msg = """ + echo 'error in %s:' + echo '%s' >&2 + exit 1 + """ + print msg % (__file__, pipes.quote(str(error))) + sys.exit(1) + + +# scripts location (where does this config file live?) +SCRIPTS_DIR = os.path.dirname(os.path.abspath(__file__)) +BUGZILLA_DIR = os.path.realpath(SCRIPTS_DIR + "/../../..") +DATA_DIR = BUGZILLA_DIR + "/data" + +def read_localconfig(): + lc_cmd = "%s/scripts/localconfig-as-json" % (BUGZILLA_DIR) + lc_json = os.popen(pipes.quote(lc_cmd)).read() + return json.loads( lc_json ) + +localconfig = read_localconfig() + +URLBASE = localconfig['urlbase'] +if URLBASE.endswith('/'): + URLBASE = URLBASE[0:-1] + +# database settings +DB_HOST = localconfig['db_host'] +DB_USER = localconfig['db_user'] +DB_PASS = localconfig['db_pass'] +DB_NAME = localconfig['db_name'] + +# LDAP settings +LDAP_USER = "" +LDAP_PASS = "" + +# Email settings +# email address to send the report from +EMAIL_FROM = "secbugstats-noreply@mozilla.com" +# list of email addresses to send the report to +EMAIL_TO = ['security-group@mozilla.org'] +SMTP_HOST = "smtp.mozilla.org" +SMTP_PORT = 25 # 465 + +# Bugzilla account settings +BZ_APIKEY = '' +try: + BZ_APIKEY = os.environ['SECBUGSTATS_APIKEY'] +except KeyError: + pass +BZ_AUTH = urllib.urlencode({'api_key': BZ_APIKEY, 'restriclogin': "true"}) + +# where to store the JSON files that curlbug.py downloads +JSONLOCATION = "%s/secbugstats/json/current" % (DATA_DIR) + +# where to store the most recent curlbug.py output +JSON_CUR = JSONLOCATION +# where to store the old curlbug.py output +JSON_OLD = "%s/secbugstats/json" % (DATA_DIR) + +# teams chart location +TEAMS_CHART_LOC = "%s/secbugstats/teams" % (DATA_DIR) + +# bug lifespan chart location +BUGLIFE_CHART_LOC = "%s/secbugstats/buglife" % (DATA_DIR) + +# Selection criteria for various teams based on bug product and component +TEAMS = [["Layout", + "secbugs_Details.product='Core' AND (secbugs_Details.component LIKE 'layout%' OR secbugs_Details.component LIKE 'print%' OR secbugs_Details.component LIKE 'widget%' OR secbugs_Details.component IN ('CSS Parsing and Computation','Style System (CSS)','SVG','Internationalization','MathML'))"], + ["Media", + "secbugs_Details.product='Core' AND (secbugs_Details.component LIKE 'WebRTC%' OR secbugs_Details.component LIKE 'Audio/Video%' OR secbugs_Details.component='Web Audio')"], + ["JavaScript", + "secbugs_Details.product='Core' AND (secbugs_Details.component LIKE 'javascript%' OR secbugs_Details.component IN ('Nanojit'))"], + ["DOM", + "secbugs_Details.product='Core' AND (secbugs_Details.component LIKE 'DOM%' OR secbugs_Details.component LIKE 'xp toolkit%' OR secbugs_Details.component IN ('Document Navigation','Drag and Drop','Editor','Event Handling','HTML: Form Submission','HTML: Parser','RDF','Security','Security: CAPS','Selection','Serializers','Spelling checker','Web Services','XBL','XForms','XML','XPConnect','XSLT','XUL'))"], + ["GFX", + "secbugs_Details.product='Core' AND (secbugs_Details.component LIKE 'GFX%' OR secbugs_Details.component LIKE 'canvas%' OR secbugs_Details.component LIKE 'Graphics%' OR secbugs_Details.component IN ('Graphics','Image: Painting','ImageLib'))"], + ["Frontend", + "secbugs_Details.product='Firefox' OR secbugs_Details.product='Firefox for Metro' OR secbugs_Details.product='Toolkit' OR (secbugs_Details.product='Core' AND (secbugs_Details.component IN ('Form Manager','History: Global','Identity','Installer: XPInstall Engine','Security: UI','Keyboard: Navigation')))"], + ["Networking", + "secbugs_Details.product='Core' AND secbugs_Details.component like 'Networking%'"], + ["Mail", + "secbugs_Details.product='MailNews Core' OR secbugs_Details.product='Thunderbird' OR (secbugs_Details.product='Core' AND (secbugs_Details.component like 'Mail%'))"], + ["Other", + "secbugs_Details.product='Core' AND (secbugs_Details.component IN ('DMD','File Handling','General','Geolocation','IPC','Java: OJI','jemalloc','js-ctypes','Memory Allocator','mfbt','mozglue','Permission Manager','Preferences: Backend','String','XPCOM','MFBT','Disability Access APIs','Rewriting and Analysis') OR secbugs_Details.component LIKE 'Embedding%' OR secbugs_Details.component LIKE '(HAL)')"], + ["Crypto", + "secbugs_Details.product IN ('JSS','NSS','NSPR') OR (secbugs_Details.product='Core' AND secbugs_Details.component IN ('Security: PSM','Security: S/MIME'))"], + ["Services", + "secbugs_Details.product IN ('Cloud Services','Mozilla Services')"], + ["Plugins", + "secbugs_Details.product IN ('Plugins','External Software Affecting Firefox') OR (secbugs_Details.product='Core' AND secbugs_Details.component='Plug-ins')"], + ["Boot2Gecko", + "secbugs_Details.product='Firefox OS' OR secbugs_Details.product='Boot2Gecko'"], + ["Mobile", + "secbugs_Details.product IN ('Fennec Graveyard','Firefox for Android','Android Background Services','Firefox for iOS','Focus')"]] + +def main(): + this_module = sys.modules[__name__] + + for k in dir(this_module): + v = getattr(this_module, k) + if type(v) == str and not k.startswith("__"): + print "%s=%s" % (k, pipes.quote(v)) + +if __name__ == '__main__': + main() +else: + import logging + from logging.config import fileConfig + fileConfig(SCRIPTS_DIR + '/../logging.ini')
\ No newline at end of file diff --git a/contrib/secbugstats/bin/teamgraph.py b/contrib/secbugstats/bin/teamgraph.py new file mode 100755 index 000000000..0ee3051ce --- /dev/null +++ b/contrib/secbugstats/bin/teamgraph.py @@ -0,0 +1,151 @@ +#!/usr/bin/python +# Create a PNG chart of the historical risk index by team to embed in the +# weekly report +import numpy as NP +import matplotlib, sys, MySQLdb, datetime +matplotlib.use("Agg") +from matplotlib import pyplot as PLT +from matplotlib.patches import Rectangle +from matplotlib.font_manager import FontProperties +from settings import * +import time + +# extra debug output +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +try: + db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) + db.autocommit(True) + c = db.cursor(MySQLdb.cursors.DictCursor) +except: + print "teamgraph.py: can't connect to database\n" + sys.exit() + +# Keep track of how many bugs of each severity each team has open +teamStats = {} + +# Risk scoring rubric: assigns point value to each severity of bug +weights = { "sg_critical": 5, + "sg_high": 4, + "sg_moderate": 2, + "sg_low": 1} + +# Gather the risk index for each team at each point in time starting in +# September 2009 (fairly arbitrary choice) +sql = "SELECT DISTINCT date from secbugs_Details WHERE date > '2009-09-01' ORDER BY date;" +c.execute(sql) +rows = c.fetchall() +for row in rows: + date = row["date"].strftime("%Y-%m-%d") + for team in TEAMS: + teamName = team[0] + # Create the empty list for each team. The list will hold + # (date, riskScore) tuples + if teamName not in teamStats.keys(): + teamStats[teamName] = [] + sql2 = "SELECT secbugs_Stats.category, secbugs_Details.date, SUM(secbugs_Details.count) AS total FROM secbugs_Details INNER JOIN secbugs_Stats ON secbugs_Details.sid=secbugs_Stats.sid WHERE secbugs_Details.date LIKE '%s%%' AND secbugs_Stats.category IN ('sg_critical','sg_high','sg_moderate','sg_low') AND (%s) GROUP BY date, category;" % (date, team[1]) + # if DEBUG: print sql2 + # | category | date | total | + # | sg_critical | 2011-08-28 12:00:00 | 3 | + # | sg_high | 2011-08-28 12:00:00 | 6 | + # ... + c.execute(sql2) + sevCounts = c.fetchall() + # Calculate the risk index for this date/team combo + riskIndex = 0 + for sev in sevCounts: + riskIndex += weights[sev["category"]] * sev["total"] + teamStats[teamName].append( (date, riskIndex) ) + +# Sort list of team stats by most recent risk index +statList = sorted(teamStats.items(), key = lambda k: k[1][-1][1]) +# [('Frontend', [('2011-08-07', Decimal('110')), ..., ('2011-08-28', Decimal('102'))]), +# ('DOM', [('2011-08-07', Decimal('115')), ..., ('2011-08-28', Decimal('127'))])] + +# # just create some random data +# fnx = lambda : NP.random.randint(3, 10, 10) +# x = NP.arange(0, 10) +# # [0 1 2 3 4 5 6 7 8 9] +# y1 = fnx() +# # [7 5 7 7 4 3 5 8 7 3] +# y2 = fnx() +# y3 = fnx() + +x = [datetime.datetime.strptime(s[0], "%Y-%m-%d") for s in statList[0][1]] +# x = NP.arange(len(statList[0][1])) + +series = tuple([[s[1] for s in stat[1]] for stat in + [team for team in statList]]) +# ([0, 4, 4, 4], [6, 6, 6, 6], [22, 22, 17, 12], [13, 17, 17, 17], +# [28, 28, 29, 24], [24, 29, 29, 29], [30, 29, 29, 30], [45, 49, 49, +# 49], [32, 42, 52, 63], [110, 110, 107, 102], [115, 123, 123, 127]) + + +# y_data = NP.row_stack((y1, y2, y3)) +# [[7 5 7 7 4 3 5 8 7 3] +# [6 5 5 5 9 3 8 9 5 8] +# [3 7 5 4 7 7 3 6 6 4]] +y_data = NP.row_stack(series) + +# this call to 'cumsum' (cumulative sum), passing in your y data, +# is necessary to avoid having to manually order the datasets +y_data_stacked = NP.cumsum(y_data, axis=0) +# [[0 4 4 4] +# [6 10 10 10] +# [28 32 27 22] +# [41 49 44 39] +# [69 77 73 63] +# [93 106 102 92] +# [123 135 131 122] +# [168 184 180 171] +# [200 226 232 234] +# [310 336 339 336] +# [425 459 462 463]] + +fig = PLT.figure() +ax1 = fig.add_subplot(111) +# set y-axis to start at 0 +PLT.ylim(ymin = 0) + +colors = ["#ffe84c", "#7633bd", "#3d853d", "#a23c3c", "#8cacc6", + "#bd9b33", "#9440ed", "#4da74d", "#cb4b4b", "#afd8f8", + "#edc240"] + +# first one manually? okay... +ax1.fill_between(x, 0, y_data_stacked[0,:], facecolor = colors[0]) +# hack for the legend (doesn't work with fill_between) +# http://www.mail-archive.com/matplotlib-users@lists.sourceforge.net/msg10893.html +rects = [] +rects.insert(0, Rectangle((0, 0), 1, 1, color = colors[0])) +labels = [] +labels.insert(0, statList[0][0]) + +# fill in the rest +for i in range(1, len(y_data_stacked)): + ax1.fill_between(x, y_data_stacked[i-1,:], y_data_stacked[i,:], + facecolor = colors[i]) + # legend hack: add the Rectangle patch to the plot + rects.insert(0, Rectangle((0, 0), 1, 1, color = colors[i])) + labels.insert(0, statList[i][0]) + +# reduce the number of ticks on the bottom axis to improve readability +fig.autofmt_xdate(bottom = 0.2, rotation = 45, ha = "right") +ax1.set_ylim(ymin = 0) + +# reduce width by 10% +box = ax1.get_position() +ax1.set_position([box.x0, box.y0, box.width * 0.9, box.height]) +# shrink the font size +fontP = FontProperties() +fontP.set_size("x-small") +ax1.legend(rects, labels, loc='center left', title = "Teams", + bbox_to_anchor = (1, 0.5), fancybox = True, shadow = False, + prop = fontP) + +# PLT.title("Risk Index: " + x[-1].strftime("%Y-%m-%d")) + +# save the image on the filesystem +filename = "teamgraph-%s.png" % time.strftime('%Y%m%d', time.localtime()) +fig.savefig("%s/%s" % (JSONLOCATION, filename)) diff --git a/contrib/secbugstats/bin/teamstats.py b/contrib/secbugstats/bin/teamstats.py new file mode 100755 index 000000000..da5c20d13 --- /dev/null +++ b/contrib/secbugstats/bin/teamstats.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# aggregate trend data for Eng. teams performance on fixing security bugs +import sys, MySQLdb +from string import join, capitalize, split +from time import mktime +from settings import * + +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) +db.autocommit(True) +c = db.cursor() + +def getCleared(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + cleared = [] + for bug in last: + if len(bug) and bug not in cur: cleared.append(bug) + return cleared + +def getAdded(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + added = [] + for bug in cur: + if len(bug) and bug not in last: added.append(bug) + return added + +def cleanCat(cat): + return cat[3:].capitalize() + +# return a UNIX timestamp +def formatDate(d): + return mktime(d.timetuple()) + + +severities = ["sg_critical", "sg_high", "sg_moderate", "sg_low"] + +# print a list of JSON objects +print "{" + +for sev in severities: + print " '%s': [" % (cleanCat(sev)) + for t in TEAMS: + # keep track of how many bugs were opened/closed from the last period + last = { "Critical": "", "High": "", "Moderate": "", "Low": "" } + + # only select from one severity at a time + sql = "SELECT secbugs_Details.date, sum(secbugs_Details.count), GROUP_CONCAT(secbugs_Details.bug_list), secbugs_Stats.category, round(sum(secbugs_Details.avg_age_days*secbugs_Details.count)/(sum(secbugs_Details.count))) as avg_age_days FROM secbugs_Details INNER JOIN secbugs_Stats ON secbugs_Details.sid=secbugs_Stats.sid WHERE (%s) AND secbugs_Stats.category='%s' GROUP BY date, category ORDER BY date, category;" % (t[1], sev) + if DEBUG: print "sql:", sql + c.execute(sql) + row = c.fetchone() + while row is not None: + # how many bugs are new from last period + new = getAdded(row[2], last[cleanCat(row[3])]) + if DEBUG: print "new:", new + # how many bugs are closed since last period + cleared = getCleared(row[2], last[cleanCat(row[3])]) + if DEBUG: print "cleared:", cleared + # emit data for this (team, date, category) + print " { 'date': '%s', 'timestamp': %s, 'team': '%s', 'count': %s, 'new': %s, 'cleared': %s, 'avg_age': %s }," % (str(row[0]), formatDate(row[0]), t[0], row[1], len(new), len(cleared), row[4]) + # remember old values + last[cleanCat(row[3])] = row[2] + row = c.fetchone() + print " ]," +print "}" diff --git a/contrib/secbugstats/bin/updatechanged.py b/contrib/secbugstats/bin/updatechanged.py new file mode 100755 index 000000000..96dae4b70 --- /dev/null +++ b/contrib/secbugstats/bin/updatechanged.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +# +# For each bug that we see in our queries this week, run our bugdata.py +# script to pull relevant fields, e.g. resolution, whiteboard. +# Takes no arguments (generates bug lists by diff'ing last week's results +# with this week's. + +import sys, os, smtplib, time, MySQLdb, operator +from datetime import date, timedelta +from time import strftime +from string import join, split +from settings import * + +# extra debug output +if "--debug" in sys.argv: DEBUG = True +else: DEBUG = False + +# set up database connection +try: + db = MySQLdb.connect(host=DB_HOST, user=DB_USER, passwd=DB_PASS, db=DB_NAME) + db.autocommit(True) + c = db.cursor(MySQLdb.cursors.DictCursor) +except: + print "updatechanged.py: can't connect to database\n" + sys.exit() + +def getThisWeekDate(slashed=False): + sql = "select distinct date from secbugs_Details order by date desc limit 1;" + c.execute(sql) + row = c.fetchone() + if slashed: + return row["date"].strftime("%m/%d/%y") + else: + return row["date"].strftime("%Y-%m-%d") + +def getLastWeekDate(slashed=False): + sql = "select distinct date from secbugs_Details order by date desc limit 1,1;" + c.execute(sql) + row = c.fetchone() + if slashed: + return row["date"].strftime("%m/%d/%y") + else: + return row["date"].strftime("%Y-%m-%d") + +def getDelta(n): + if n > 0: + return "+"+str(n) + else: + return n + +def getCleared(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + cleared = [] + for bug in last: + if len(bug) and bug not in cur: cleared.append(bug) + return cleared + +def getAdded(curBugs, lastBugs): + cur = split(curBugs, ",") + last = split(lastBugs, ",") + added = [] + for bug in cur: + if len(bug) and bug not in last: added.append(bug) + return added + +# simple object to store and sort data from multiple queries +class DataRow(): + def __init__(self): + self.numCritical = 0 + self.critList = "" + self.numHigh = 0 + self.highList = "" + self.numModerate = 0 + self.modList = "" + self.numLow = 0 + self.lowList = "" + self.total = 0 + self.product = "" + self.component = "" + def dump(self): + s = "Critical: "+str(self.numCritical)+", " + s += "High: "+str(self.numHigh)+", " + s += "Moderate: "+str(self.numModerate)+", " + s += "Low: "+str(self.numLow)+", " + return s + +# which bugs did we gather stats for this week +for cat in [("sg_critical", "Critical"), ("sg_high", "High"), ("sg_moderate", "Moderate"), ("sg_low", "Low")]: + print cat[1] + # get the stats from this week + sql = "select d.bug_list from secbugs_Details d, secbugs_Stats s where d.sid=s.sid and s.category='%s' and d.date like '%s%%';" % (cat[0], getThisWeekDate()) + c.execute(sql) + thisWkList = "" + row = c.fetchone() + while row != None: + thisWkList += row["bug_list"] if not len(thisWkList) else ","+row["bug_list"] + row = c.fetchone() + # get the stats from last week + sql = "select d.bug_list from secbugs_Details d, secbugs_Stats s where d.sid=s.sid and s.category='%s' and d.date like '%s%%';" % (cat[0], getLastWeekDate()) + c.execute(sql) + lastWkList = "" + row = c.fetchone() + while row != None: + lastWkList += row["bug_list"] if not len(lastWkList) else ","+row["bug_list"] + row = c.fetchone() + # run bugdata on all the bugs in thisWkList and lastWkList + unique = set(thisWkList.split(",")+lastWkList.split(",")) + if DEBUG: + print "thisWk: ", thisWkList + print "lastWk: ", lastWkList + print "unique: ", unique + for bug in unique: + cmd = "%s/bugdata.py %s" % (SCRIPTS_DIR, bug) + print cmd + if not DEBUG: os.popen(cmd) diff --git a/contrib/secbugstats/logging.ini b/contrib/secbugstats/logging.ini new file mode 100644 index 000000000..cecd031de --- /dev/null +++ b/contrib/secbugstats/logging.ini @@ -0,0 +1,21 @@ +[loggers] +keys=root + +[handlers] +keys=stream_handler + +[formatters] +keys=formatter + +[logger_root] +level=DEBUG +handlers=stream_handler + +[handler_stream_handler] +class=StreamHandler +level=DEBUG +formatter=formatter +args=(sys.stderr,) + +[formatter_formatter] +format=%(asctime)s %(name)-12s %(levelname)-8s %(message)s
\ No newline at end of file |