
import sys, os, re
import gzip
import dateutil
from dateutil.parser import *

from datetime import timezone
from xml.dom import minidom
import requests

RELEASE_URL = "http://mirrors.kernel.org/fedora/releases"
UPDATES_URL = "http://mirrors.kernel.org/fedora/updates"
ARCHIVE_URL = "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases"
ARCHIVE_UPDATES = "https://archives.fedoraproject.org/pub/archive/fedora/linux/updates"
REPOMD = "/repodata/repomd.xml"

# store release dates for each version
# https://docs.fedoraproject.org/en-US/releases/lifecycle/#Release_Dates

# TBD: parse the json file to pull date programmatically
# https://fedorapeople.org/groups/schedule/f-36/f-36-key-tasks.html
RELEASES = {
	'32': {
		"ports": ["x86_64"], 
		"release_date": 1588075200, 
		"dev_start": 1565697600, 
		"fork_date": 1581426000
	},
	'33': {"ports": ["x86_64"], "release_date": 1603800000, "dev_start": 1581426000, "fork_date": 1597147200},
	'34': {"ports": ["x86_64"], "release_date": 1619524800, "dev_start": 1597147200, "fork_date": 1612875600},
	'35': {"ports": ["x86_64"], "release_date": 1635854400, "dev_start": 1612875600, "fork_date": 1628596800},
	'36': {"ports": ["x86_64"], "release_date": 1652184000, "dev_start": 1628596800, "fork_date": 1644325200},
}


def get_repo_md(release, updates=False):
	port = "x86_64"
	if int(release) <34:
		RELEASE_URL = ARCHIVE_URL
		UPDATES_URL = ARCHIVE_UPDATES
		
	if updates is True:
		url = [UPDATES_URL, release, "Everything", port, REPOMD]
		filename = "repomd-" + release + "-updates.xml"
	else:
		url = [RELEASE_URL, release, "Everything", port, "os", REPOMD]
		filename = "repomd-" + release + "-base.xml"
	r = requests.get('/'.join(url))
	f = open(filename, 'w')
	f.write(r.text)
	f.close()
	# return filename so we can keep track of all the files.
	return filename 


# collect urls from repomd.xml and download relevant data
def parse_repo_md(release, filename):
	parser = minidom.parse(filename)
	tags=parser.getElementsByTagName('data')
	
	if int(release) <34:
		RELEASE_URL = ARCHIVE_URL
		UPDATES_URL = ARCHIVE_UPDATES
		
	
	filename_other = ""
	for tag in tags:
		if tag.attributes["type"].value == "other":
			filename_other = tag.getElementsByTagName("location")	
			if len(filename_other) == 1:
				filename_other = filename_other[0].attributes['href'].value
		if tag.attributes["type"].value == "updateinfo":
			filename_updateinfo = tag.getElementsByTagName("location")	
			if len(filename_updateinfo) == 1:
				filename_updateinfo = filename_updateinfo[0].attributes['href'].value
				
	# don't ask.
	if filename.endswith("-base.xml"):
		url = RELEASE_URL + "/" + release + "/Everything/x86_64/os/"
	else:
		url = UPDATES_URL + "/" + release + "/Everything/x86_64/"
	
	# get generic data
	r = requests.get(url+filename_other)
	data = gzip.decompress(r.content)
	f = open("pkg_list-" + filename, 'w')
	f.write(data.decode('utf-8'))
	f.close()
	return data

def parse_pkg_data(repomd):
	updates = []
	package_list = {}
	parser = minidom.parse("pkg_list-" + repomd)
	pkgs = parser.getElementsByTagName("package")
	
	longest_name = {'name': '', 'length': 0}  # counter to track name length
	
	# roll through the package list
	for x in pkgs:
		version = x.getElementsByTagName("version")[0]
		pkg = {
			"name": x.attributes['name'].value,
			"arch":x.attributes['arch'].value,
			"version": version.attributes['ver'].value,
			"release": version.attributes['rel'].value,
			"changelog": [], # empty for now, will add later
		}
		
		# take a moment to see if this is the longest package name
		# current record seems to be 81 characters...
		if len(pkg['name']) > longest_name['length']:
			longest_name['length'] = len(pkg['name'])
			longest_name['name'] = pkg['name']
		changelog = x.getElementsByTagName("changelog")
		
		# roll through changelog entries for this pkg
		for change in changelog:
			'''
				the NEW version of a package is placed 
				in the "author" attribute.
				
				WTF ?!?!?!
			'''
			# begin BS string parsing...
			author_version = change.attributes['author'].value
			if "&gt;" in author_version:
				author_version = author_version.split("&gt;")
			else: 
				author_version = author_version.split("> ")
			if len(author_version) < 2:
				author_version.append("")
			# end BS string parsing...
				
			update = { 
				"pkg": pkg['name'],
				"new_version": author_version[1], # badly formatted version string
				"author": author_version[0], # dumbass responsible for bad formatting
				"date": change.attributes['date'].value,
				"notes": change.firstChild.data.replace("\n", "&nbsp;"), # replace newlines since they get messy in csv files
			}
			pkg['changelog'].append(update)
			
		# add to the dataset
		package_list[pkg['name']] = pkg
	
	# for funsies
	print(" - Longest pkg name:", longest_name['length'], "characters")
	print(" -", longest_name['name'])
	return package_list

def parse_update_info(repomd):
	updates = []
	package_list = {}
	parser = minidom.parse("updateinfo-" + repomd)
	updates = parser.getElementsByTagName("update")
	for x in updates:
		continue
	return package_list
