import requests from bs4 import BeautifulSoup # MIT import re import json class ModuleGenderneutral: def __init__(self, filename, log): self.tag = "%20s - " % "mGenderneutral" self.log = log self.filename = filename self.genderneutral_dict = self.load_db() if not self.genderneutral_dict: self.genderneutral_dict = self.update_db() self.log.debug(f"{self.tag}Modul Genderneutral geladen") def load_db(self): genderneutral_dict = {} try: with open(self.filename, 'r', encoding='utf16') as f: genderneutral_dict = json.load(f) except Exception as ex: self.log.error(f"{self.tag}Failed loading genderneutral dict {self.filename}: {ex}") return genderneutral_dict def update_db(self): try: genderneutral_dict = {} data = requests.get("https://geschicktgendern.de/").text soup = BeautifulSoup(data, features="html.parser") key = "" key_original = "" value = "" tds = soup.find_all('td') for td in tds: key_original = td.text.strip() key = td.text.strip().lower() if key == value.lower(): continue if key[0:3] == "...": continue key = key.replace("\n", "") td2 = td.find_next_sibling("td") if td2: value = td2.text.strip() else: value = "NONE" continue # Sonderfälle die ignoriert werden if len(value) == 0: continue if "noch kein passender Begriff gefunden;" in value: continue if re.search(r"^[a-zA-ZöäüÖÄÜß]", key) is None: key = re.sub(r'\[|\{', "(", key) key = re.sub(r'\]|\}', ")", key) splitted2 = key.split(')', 1) if len(splitted2) >= 2: # " {splitted2[0].strip()})" key = f"{splitted2[1].strip()}" # print("key: %30s | display: %50s |value: %s " % (key,key_original, value)) key = key.split('(')[0].strip() if key not in genderneutral_dict: # Die Tabelle ist doppelt im Quelltext hinterlegt und so werden doppelte keys ignoriert karl = {} karl[key_original] = value genderneutral_dict[key] = karl else: genderneutral_dict[key][key_original] = value continue except Exception as ex: self.log.error(f"{self.tag}Failed crawling genderneutral dict: {ex}") try: if len(genderneutral_dict) > 0: with open(self.filename, 'w', encoding='utf16') as f: json.dump(genderneutral_dict, f) except Exception as ex: self.log.error(f"{self.tag}Failed saving fresh genderneutral dict : {ex}") return self.load_db() def search_in_db(self, badstring): endstring = '' try: badstring = badstring.replace('.', '') badstring = badstring.replace(',', '') words = str.split(badstring, ' ') for word in words: word = word.strip().lower() if word in self.genderneutral_dict: endstring += f'Potentiell "{word}" gefunden. Alternativen:\n' for key in self.genderneutral_dict[word]: endstring += f' {key}: {self.genderneutral_dict[word][key]}\n' continue if endstring == '' and len(words) >0 and len(words[0])>0: if len(words) > 1: endstring += 'Dieser Satz' else: endstring += 'Dieses Wort' endstring += ' ist nach aktueller Version von geschicktgendern.de genderneutral.' elif endstring =='': endstring +='"Wer nichts sagt, macht jedenfalls nichts falsch" \nZitat von https://www.lawblog.de/archives/2019/07/24/wer-nichts-sagt-macht-jedenfalls-nichts-falsch/ \n(29.09.2021 abgerufen)' except Exception as ex: self.log.error(f"{self.tag}Fucked up checking for bad words: {ex}") endstring = "Ausnahmefehler: Der Text konnte leider nicht auf Genderneutralität überprüft werden.\nBei Fragen wende dich an .request !" return endstring