From Fedora Project Wiki
#!/usr/bin/env python3 # -*- encoding: utf-8 -*- # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program. If not, see # <http://www.gnu.org/licenses/gpl-3.0.html>. # # @author Jean-Baptiste Holcroft <jb.holcroft@gmail.com> # TODO : add keywords statistics import xml.etree.ElementTree as ET import csv from datetime import date STATISTIC_FILE = 'fedora-23.xml' LANGAGE_DETAILED = "fr" RESULT_FILE = './output_file-%s-%s-%s.csv' \ % (date.today().year, date.today().month, date.today().day) RESULT_FILE_LANGAGE = './output_file_%s-%s-%s-%s.csv' \ % (LANGAGE_DETAILED, date.today().year, date.today().month, date.today().day) NS_KEY = "http://www.w3.org/XML/1998/namespace" NS_MAP = {"xml": NS_KEY} TRANSLATABLE_FIELDS = ["name", "summary", "description"] languages = [] projects_statistics = {} # open global xml file tree = ET.parse(STATISTIC_FILE) root = tree.getroot() # # GLOBAL STATISTICS # print("Make global statistics") # initiate list of languages for i in root.findall(".//*[@xml:lang]", namespaces=NS_MAP): lang = i.get("{%s}lang" % NS_KEY) languages.append(lang) languages = list(set(languages)) component_languages = languages.copy() output_for_csv = [] header_line = ["project", "type", "url"] + languages output_for_csv.append(header_line) for component in root.findall("component"): package_name = component.find("pkgname").text package_type = component.get("type") package_homepage = "" language_statistic = [0] * len(languages) # get project url for url in component.findall("url"): if url.get("type") == "homepage": package_homepage = url.text # get project statistics for translatable_field in TRANSLATABLE_FIELDS: for field in component.findall(translatable_field): lang = field.get("{%s}lang" % NS_KEY) if lang != None: language_statistic[languages.index(lang)] += 1 / len(TRANSLATABLE_FIELDS) package_info = [package_name, package_type, package_homepage] csv_line = package_info + language_statistic output_for_csv.append(csv_line) with open(RESULT_FILE, 'w', newline='') as csvfile: result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in output_for_csv: result_file_csv.writerow(row) # # search for one language # print("Make statistics for language %s" % LANGAGE_DETAILED) output_for_csv = [] header_line = ["project", "type", "url"] + TRANSLATABLE_FIELDS + ["package stats"] output_for_csv.append(header_line) for component in root.findall("component"): package_name = component.find("pkgname").text package_type = component.get("type") package_homepage = "" language_statistic = [""] * len(TRANSLATABLE_FIELDS) embedded_statistic = [""] # get project url for url in component.findall("url"): if url.get("type") == "homepage": package_homepage = url.text for translatable_field in TRANSLATABLE_FIELDS: for field in component.findall(translatable_field): lang = field.get("{%s}lang" % NS_KEY) if lang == LANGAGE_DETAILED: language_statistic[TRANSLATABLE_FIELDS.index(translatable_field)] = "oui" for field in component.findall(".//lang"): if field.text == LANGAGE_DETAILED: percent = field.get("percentage") embedded_statistic[0] = percent package_info = [package_name, package_type, package_homepage] csv_line = package_info + language_statistic + embedded_statistic output_for_csv.append(csv_line) with open(RESULT_FILE_LANGAGE, 'w', newline='') as csvfile: result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) for row in output_for_csv: result_file_csv.writerow(row) print("Done")