Skip to content
Snippets Groups Projects
Commit b236cdb0 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Following redirections → optional, en_en graphy in lower case

parent 948508cc
No related branches found
No related tags found
No related merge requests found
...@@ -101,6 +101,9 @@ class En_en_straktor(Wikstraktor): ...@@ -101,6 +101,9 @@ class En_en_straktor(Wikstraktor):
pos = self.constants['POS'][parsedwikitext] pos = self.constants['POS'][parsedwikitext]
return pos return pos
def fetch(self, graphy, follow_redirections):
return super().fetch(graphy.lower(), follow_redirections)
if __name__ == "__main__": if __name__ == "__main__":
ensk = En_en_straktor() ensk = En_en_straktor()
print(ensk.fetch("test"), "entries added") print(ensk.fetch("test"), "entries added")
...@@ -563,7 +563,7 @@ class Wikstraktor: ...@@ -563,7 +563,7 @@ class Wikstraktor:
#retrieves the content of a page and processes it (adding the entries to the list of entries) #retrieves the content of a page and processes it (adding the entries to the list of entries)
#returns the number of entries added #returns the number of entries added
def fetch(self, graphy): def fetch(self, graphy, follow_redirections=False):
nb_entries_added = 0 nb_entries_added = 0
page = self.pwb.Page(self.site, graphy) page = self.pwb.Page(self.site, graphy)
to_parse = [] to_parse = []
...@@ -580,10 +580,10 @@ class Wikstraktor: ...@@ -580,10 +580,10 @@ class Wikstraktor:
nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)
else: else:
self.log.add_log("Wikstraktor.fetch", f"{graphy}” page not found") self.log.add_log("Wikstraktor.fetch", f"{graphy}” page not found")
if len(self.redirects) > 0: if len(self.redirects) > 0 and follow_redirections:
for e,p in self.redirects.items(): for e,p in self.redirects.items():
if not p: if not p:
nb_entries_added += self.fetch(self.process_redirect(e)) nb_entries_added += self.fetch(self.process_redirect(e), follow_redirections)
return nb_entries_added return nb_entries_added
def parse(self, entry, v_id, sections): def parse(self, entry, v_id, sections):
...@@ -770,7 +770,7 @@ if __name__ == "__main__": ...@@ -770,7 +770,7 @@ if __name__ == "__main__":
\033[1m\033[32mex :\033[0m \033[1m\033[32mex :\033[0m
\033[0m\033[32m./wikstraktor.py -m blue\033[0m \033[0m\033[32m./wikstraktor.py -m blue\033[0m
\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m \033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m
\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -A -C\033[0m""") \033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr\033[0m""")
parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en") parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en")
parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en") parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en")
parser.add_argument("-m", "--mot", help="le mot à chercher", type=str, default=None) parser.add_argument("-m", "--mot", help="le mot à chercher", type=str, default=None)
...@@ -778,11 +778,13 @@ if __name__ == "__main__": ...@@ -778,11 +778,13 @@ if __name__ == "__main__":
parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true") parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true")
parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true") parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true")
parser.add_argument("-n", "--no_id", help="json sans id", action="store_true") parser.add_argument("-n", "--no_id", help="json sans id", action="store_true")
parser.add_argument("-r", "--follow_redirections", help="pour suivre les redirections (ex: did → do)", action="store_true")
args = parser.parse_args() args = parser.parse_args()
if args.mot != None: if args.mot != None:
w = Wikstraktor.get_instance(args.wiki_language, args.language) w = Wikstraktor.get_instance(args.wiki_language, args.language)
resp = None resp = None
if w.fetch(args.mot) > 0: if w.fetch(args.mot, args.follow_redirections) > 0:
resp = w.export(not args.no_id, args.force_ascii, args.compact) resp = w.export(not args.no_id, args.force_ascii, args.compact)
if args.destination_file != None: if args.destination_file != None:
f = open(args.destination_file, "w") f = open(args.destination_file, "w")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment