From Fedora Project Wiki

Revision as of 11:32, 28 November 2015 by Jibecfed (talk | contribs) (script to get stats from AppData & AppStream)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public
# License along with this program.  If not, see
# <http://www.gnu.org/licenses/gpl-3.0.html>.
#
# @author Jean-Baptiste Holcroft <jb.holcroft@gmail.com>
# TODO : add keywords statistics

import xml.etree.ElementTree as ET
import csv
from datetime import date

STATISTIC_FILE = 'fedora-23.xml'
LANGAGE_DETAILED = "fr"
RESULT_FILE = './output_file-%s-%s-%s.csv' \
    % (date.today().year, date.today().month, date.today().day)
RESULT_FILE_LANGAGE = './output_file_%s-%s-%s-%s.csv' \
    % (LANGAGE_DETAILED, date.today().year, date.today().month, date.today().day)

NS_KEY = "http://www.w3.org/XML/1998/namespace"
NS_MAP = {"xml": NS_KEY}

TRANSLATABLE_FIELDS = ["name", "summary", "description"]
languages = []
projects_statistics = {}

# open global xml file
tree = ET.parse(STATISTIC_FILE)
root = tree.getroot()

#
# GLOBAL STATISTICS
#
print("Make global statistics")
# initiate list of languages
for i in root.findall(".//*[@xml:lang]", namespaces=NS_MAP):
    lang = i.get("{%s}lang" % NS_KEY)
    languages.append(lang)
languages = list(set(languages))
component_languages = languages.copy()

output_for_csv = []
header_line = ["project", "type", "url"] + languages
output_for_csv.append(header_line)

for component in root.findall("component"):
    package_name = component.find("pkgname").text
    package_type = component.get("type")
    package_homepage = ""
    language_statistic = [0] * len(languages)

    # get project url
    for url in component.findall("url"):
        if url.get("type") == "homepage":
            package_homepage = url.text

    # get project statistics
    for translatable_field in TRANSLATABLE_FIELDS:
        for field in component.findall(translatable_field):
            lang = field.get("{%s}lang" % NS_KEY)
            if lang != None:
                language_statistic[languages.index(lang)] += 1 / len(TRANSLATABLE_FIELDS)

    package_info = [package_name, package_type, package_homepage]
    csv_line = package_info + language_statistic

    output_for_csv.append(csv_line)

with open(RESULT_FILE, 'w', newline='') as csvfile:
    result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for row in output_for_csv:
        result_file_csv.writerow(row)

#
# search for one language
#
print("Make statistics for language %s" % LANGAGE_DETAILED)
output_for_csv = []
header_line = ["project", "type", "url"] + TRANSLATABLE_FIELDS + ["package stats"]
output_for_csv.append(header_line)

for component in root.findall("component"):
    package_name = component.find("pkgname").text
    package_type = component.get("type")
    package_homepage = ""
    language_statistic = [""] * len(TRANSLATABLE_FIELDS)
    embedded_statistic = [""]

    # get project url
    for url in component.findall("url"):
        if url.get("type") == "homepage":
            package_homepage = url.text

    for translatable_field in TRANSLATABLE_FIELDS:
        for field in component.findall(translatable_field):
            lang = field.get("{%s}lang" % NS_KEY)
            if lang == LANGAGE_DETAILED:
                language_statistic[TRANSLATABLE_FIELDS.index(translatable_field)] = "oui"

    for field in component.findall(".//lang"):
        if field.text == LANGAGE_DETAILED:
            percent = field.get("percentage")
            embedded_statistic[0] = percent

    package_info = [package_name, package_type, package_homepage]
    csv_line = package_info + language_statistic + embedded_statistic
    output_for_csv.append(csv_line)

with open(RESULT_FILE_LANGAGE, 'w', newline='') as csvfile:
    result_file_csv = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for row in output_for_csv:
        result_file_csv.writerow(row)

print("Done")