#!/usr/bin/python2.4
# retrieves queue status summary and generates two RSS feeds for {in,out}going packages
# License: GPL v2 or later
# Author: Filippo Giunchedi <filippo@debian.org>
# Version: 0.3

from urllib import urlretrieve
from email.Parser import HeaderParser
import email
import email.Errors
import os.path, cPickle, gzip, sys
import encodings.ascii
import datetime
import PyRSS2Gen

inrss_filename = "new_in.rss"
outrss_filename = "new_out.rss"
packages_db = "packages.db"
max_entries = 30

data = {}
data["packages"] = {}
data["in"] = PyRSS2Gen.RSS2(
             title = "Packages entering NEW",
             link = "http://ftp-master.debian.org/new.html",
             description = "Debian packages entering the NEW queue" )

data["out"] = PyRSS2Gen.RSS2(
             title = "Packages leaving NEW",
             link = "http://ftp-master.debian.org/new.html",
             description = "Debian packages leaving the NEW queue" )

cur = {}
inpkgs = {}
outpkgs = {}

def utf2ascii(src):
    """ returns an ascii encoded copy of the input utf8 string """
    try:
        res = unicode(src, 'utf-8').encode('ascii', 'replace') 
    except UnicodeDecodeError:
        res = "unable_to_decode"
    return res

def parse_new(file=None): # file is a fp to the file to parse
    global cur, inpkgs, data 

    if not file:
        return False
   
    p = HeaderParser()
    while 1:
        # FIXME this parses a new header at each iteration, however this won't
        # work with python2.4 (for some reason yet to be found)
        try:
            m = p.parse(file, True)
        except email.Errors.HeaderParseError:
            continue

        if len(m) == 0: # EOF
            break
	
        if not m.has_key("Filename"):
            #print m
            continue 

        if m["Filename"].startswith("/org/ftp.debian.org/queue/new/"):
            if not ( m.has_key("Source") and m.has_key("Version") and
               m.has_key("Architecture") and m.has_key("Distribution") and
               m.has_key("Maintainer") and m.has_key("Description") and
               m.has_key("Changes") ):
                continue

            pkg = m["Source"]
            ver = m["Version"]
            archs = m["Architecture"]
            dist = m["Distribution"]
            date = m["Date"]
            maint = utf2ascii(m["Maintainer"])
            description = utf2ascii(m["Description"])
            changes = utf2ascii(m["Changes"])

            cur[pkg] = [ver, archs, dist, date, maint, description, changes] 

            if not data["packages"].has_key(pkg):
                # new package, never seen 
                inpkgs[pkg] = cur[pkg]
#            elif cur[pkg][1] != old[pkg][1]:
#                # same package name, different archs (buildds)
#                inpkgs[pkg] = cur[pkg]

    return True

def gen_inrss(file):
    if len(inpkgs) == 0:
        return False 

    for p in inpkgs.keys():
        pkg = inpkgs[p]

        try:
            description = "<pre>Description: %s\nChanges: %s\n</pre>" % (utf2ascii(pkg[5]), utf2ascii(pkg[6]))
        except: #backward compatibility
            sys.stderr.write("exception on %s\n" % repr(pkg))
            description = ""

        data["in"].items.append(
            PyRSS2Gen.RSSItem(
                title = "%s %s landed in NEW" % (p, pkg[0]),
                pubDate = datetime.datetime.now(),
                description = description,
                author = pkg[4]
            )
        )
    
    data["in"].write_xml(open(file,"w+"))

def gen_outrss(file):
    global outpkgs
    
    for p in data["packages"].keys():
        if not cur.has_key(p):
            # this package is removed from NEW
            pkg = data["packages"][p]
            outpkgs[p] = pkg 

            try:
                description = "<pre>Description: %s\nChanges: %s\n</pre>" % (utf2ascii(pkg[5]), utf2ascii(pkg[6]))
            except: #backward compatibily
                sys.stderr.write("exception on %s\n" % repr(pkg))
                description = ""
            
            data["out"].items.append(
                PyRSS2Gen.RSSItem(
                    title = "%s %s left NEW" % (p, pkg[0]),
                    pubDate = datetime.datetime.now(),
		            link = "http://packages.qa.debian.org/" + p,
                    description = description,
                    author = pkg[4]
                )
            )
    
    data["out"].write_xml(open(file,"w+"))
   
def purge_old(feed, max):
    """ purges old RSSItem from feed, no more than max """
    if feed.items is None or len(feed.items) == 0:
        return False
    
    # most recent first
    feed.items.sort(lambda x,y: cmp(y.pubDate, x.pubDate))
    feed.items = feed.items[:max]

if __name__ == "__main__":
    
    # FIXME check for errors
    try:
        tmp = gzip.open(urlretrieve("http://ftp-master.debian.org/~jeroen/queue-summary.gz")[0])
    except:
        sys.stderr.write("problems while obtaining queue-summary.gz")
        sys.exit(1)
    
    if os.path.exists(packages_db):
        f = open(packages_db)
        data = cPickle.load(f)
        f.close()
   #     print "db loaded: %d entries " % len(data["packages"])
        parse_new(tmp)
    else:
        parse_new(tmp)
    
    gen_outrss(outrss_filename)
    gen_inrss(inrss_filename)
    
    if len(inpkgs):
        sys.stdout.write("packages in:  %d\n" % len(inpkgs))
        for p in inpkgs.keys():
            sys.stdout.write("    %s\n" % p)

    if len(outpkgs):
        sys.stdout.write("packages out: %d" % len(outpkgs))
        for p in outpkgs.keys():
            sys.stdout.write("    %s\n" % p)

    purge_old(data["in"], max_entries)
    purge_old(data["out"], max_entries)

    f = open(packages_db, "w+")
    data["packages"] = cur
    cPickle.dump(data, f)
    #print "db saved: %d entries" % len(data["packages"])
    f.close()

# vim:et:ts=4
