Source code for findnationality

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os, sys
import requests
from bs4 import BeautifulSoup

# finds the nationality of each judoka fighting in the
# 1. Oesterreichischen Judo Bundesliga form https://www.judoinside.com
[docs]class Nationality: def __inf__(self): """ creates an Nationality Object """ pass
[docs] def runFindNationality(self): """ finds the nationality of each judoka, fighting in the 1. Oesterreichischen Judo Bundesliga form https://www.judoinside.com and stores the data collected in a .txt file """ url = "https://www.judoinside.com/site/search?q=" placeToStoreResults = '../results/' startYear = 2011 endYear = 2017 for year in range(startYear, endYear): print("current year") print(year) nameNations = placeToStoreResults + str(year) + '/nationalities' + str(year) + '.txt' fileBegegnungen = open(nameNations, 'w') # ATTENTION: overwrites file fileBegegnungen.write('name; nationality\n') names = open(placeToStoreResults + str(year) + "/kaempfer" + str(year) + ".txt").read().splitlines() handwritenNations = open( '../falseClassification/' + "falseClassification" + str(year) + ".txt").read().splitlines() # print(handwritenNations) nameCountry = dict() for line in handwritenNations: nameNation = line.split("; ") nameCountry[nameNation[0].decode('string-escape')] = nameNation[1].decode('string-escape') for name in names: # TODO does not work correctly nameWithoutWhitespaces = name.replace(" ", "+") # make name readbale for judo inside # nameWithoutWhitespaces = nameWithoutWhitespaces.lower().replace("ä", "ae").replace("ö", "oe").replace("ü", "ue")\ # .replace("ß", "ss") data = {"headerSearch"} r = requests.get(url + name) soupFightsOverview = BeautifulSoup(r.content, 'html.parser') judoka = soupFightsOverview.find_all(id="judokaUserDatas") if name in nameCountry: print("Name from List " + str(name) + "; " + str(nameCountry.get(name))) line = str(name) + "; " + str(nameCountry.get(name)) fileBegegnungen.write(line + "\n") elif len(judoka) > 0: judokadeteils = judoka[0].find_all("li") nationName = "" + str(judokadeteils[0]) nation = nationName.replace("<li><span>Country:</span>", "").replace("</li>", "") line = name + "; " + nation print(line) fileBegegnungen.write(line + "\n") else: line = name + "; MissingCo" print(line) fileBegegnungen.write(line + "\n") fileBegegnungen.close()
if __name__ == "__main__": nationalityFighter = Nationality() nationalityFighter.runFindNationality()