#!/usr/bin/python3
import sys
from LinuxChangelogs import Fedora

'''
	
	test-fedora.py
	
	example usage of LinuxChangelogs module to pull fedora repo data
	and save as a pair of csv files
	
	DATA_PATH+'/pkg_list-'+VERSION +'-x86_64.csv'
	DATA_PATH+'/updates-'+VERSION +'-x86_64.csv'
'''


if len(sys.argv) >= 3:
	VERSION = sys.argv[1]
	DATA_PATH = sys.argv[2]
else:
	print("usage: test-fedora.py <release version>", "</path/to/output/>" )

'''
	Getting data is a multistep process
	
	1) download repomd.xml file
	2) download pkg lists using url from repomd.xml
	3) parse the package list
	4) repeat for 'updates' repository

'''

# get repomd.xml
filename = Fedora.get_repo_md(VERSION)
# parse repomd for urls and download package data
Fedora.parse_repo_md(VERSION, filename) 
# parse the package data
package_list = Fedora.parse_pkg_data(filename)
print("RELEASE repo:", len(package_list))

# repeat for updates repo...
filename = Fedora.get_repo_md(VERSION, updates=True)
Fedora.parse_repo_md(VERSION, filename)
package_list_updates = Fedora.parse_pkg_data(filename)
print("UPDATES Repo:", len(package_list_updates))

unique = {}
shared = {}

for pkg, data in package_list_updates.items():
	if not pkg in package_list:
		#unique += 1
		d = {"release":0, "updates":len(data['changelog'])}
		unique[pkg] = d
		#print("WARNING:", pkg, "not found in base list!")
	else:
		shared[pkg] = {"release":len(package_list[pkg]['changelog']), "updates":len(data['changelog'])}
print("pkgs unique to UPDATES repo:", len(unique))
print("pkg entries found in both repos:", len(shared))

def merge_changelog(a, b):
	# dicts are unhashable
	#merged_list = list(set(a + b))
	L = a + b
	merged_list = [ii for n,ii in enumerate(L) if ii not in L[:n]]
	
	return merged_list
	
def merge_package_lists(a, b):
	combined = {}
	for x, data in a.items():
		combined[x] = data
		if x in b:
			new_changelog = merge_changelog(data['changelog'], b[x]['changelog'])
			combined[x]['changelog'] = new_changelog
			del b[x]
			
	for x, data in b.items():
		combined[x] = data
	return combined


# Fedora repo data lives in 2 places
# u only has pkgs that have been updated since release
# BUT those pkg changelogs include old entries and duplicate data..

f = open(DATA_PATH+'/pkg_list-'+VERSION +'-x86_64.csv','w')
u = open(DATA_PATH+'/updates-'+VERSION +'-x86_64.csv', 'w')
for x, data in merge_package_lists(package_list,  package_list_updates).items():
	f.write(','.join( [x, data['version'], data['arch'], '\n'] ))
	for update in data["changelog"]:
		
		# at least we can spot the mass rebuild operations. 
		if "Mass_Rebuild" in update['notes']:
			chg = "Rebuilt"
		else:
			# update notes are not machine readable so we default to
			# "other"
			chg = "Other"
		
		if int(update['date']) > Fedora.RELEASES[VERSION]['dev_start']:			
			# if there are any commas in author
			# replace them with nothing
			line = [update['pkg'],update['new_version'],update['author'].replace(',', ''),update['date'],data['arch'],update['notes'], '\n']
			u.write(' | '.join(line))
		# skipping all updates where datestamp predates
		# release of this fedora version.

# clean up :)
f.close()
u.close()

