diff --git a/parsers/en_en.py b/parsers/en_en.py index 6353a0279ecca7f82c77c2645e3175be4e33d61f..960944a0d658afcf5be6942a2f1154ac978810c3 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -101,6 +101,9 @@ class En_en_straktor(Wikstraktor): pos = self.constants['POS'][parsedwikitext] return pos + def fetch(self, graphy, follow_redirections): + return super().fetch(graphy.lower(), follow_redirections) + if __name__ == "__main__": ensk = En_en_straktor() print(ensk.fetch("test"), "entries added") diff --git a/wikstraktor.py b/wikstraktor.py index 7e1871d74df5ded168cfb6ca2e02707ffae1faff..35dfe17c83d3e27c88bb4a558b80ee3c63363717 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -563,7 +563,7 @@ class Wikstraktor: #retrieves the content of a page and processes it (adding the entries to the list of entries) #returns the number of entries added - def fetch(self, graphy): + def fetch(self, graphy, follow_redirections=False): nb_entries_added = 0 page = self.pwb.Page(self.site, graphy) to_parse = [] @@ -580,10 +580,10 @@ class Wikstraktor: nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) else: self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found") - if len(self.redirects) > 0: + if len(self.redirects) > 0 and follow_redirections: for e,p in self.redirects.items(): if not p: - nb_entries_added += self.fetch(self.process_redirect(e)) + nb_entries_added += self.fetch(self.process_redirect(e), follow_redirections) return nb_entries_added def parse(self, entry, v_id, sections): @@ -770,7 +770,7 @@ if __name__ == "__main__": \033[1m\033[32mex :\033[0m ‣\033[0m\033[32m./wikstraktor.py -m blue\033[0m ‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m - ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m""") + ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr\033[0m""") parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en") parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en") parser.add_argument("-m", "--mot", help="le mot à chercher", type=str, default=None) @@ -778,11 +778,13 @@ if __name__ == "__main__": parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true") parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true") parser.add_argument("-n", "--no_id", help="json sans id", action="store_true") + parser.add_argument("-r", "--follow_redirections", help="pour suivre les redirections (ex: did → do)", action="store_true") + args = parser.parse_args() if args.mot != None: w = Wikstraktor.get_instance(args.wiki_language, args.language) resp = None - if w.fetch(args.mot) > 0: + if w.fetch(args.mot, args.follow_redirections) > 0: resp = w.export(not args.no_id, args.force_ascii, args.compact) if args.destination_file != None: f = open(args.destination_file, "w")