diff --git a/src/load_templates.py b/src/load_templates.py index f326de74a389da09fdc1d416bc88582fd470139e..db6271e23d062bf6b374315e31b43bf845f29d06 100644 --- a/src/load_templates.py +++ b/src/load_templates.py @@ -6,35 +6,34 @@ import re import threading import time +from typing import List DUMPS_DIR = "dumps" -def start_progress_indicator(): - time_elapsed_indicator.stop = False +def start_progress_indicator(is_done: List[bool], msg: str = ""): + is_done[0] = False + + def time_elapsed_indicator(): + start_time = time.time() + while not is_done[0]: + elapsed_time = time.time() - start_time + print(f"\r{msg} Time elapsed: {elapsed_time:.0f} seconds", end="") + time.sleep(1) + print(f"{msg} Time elapsed: {elapsed_time:.0f} seconds") # type: ignore + indicator_thread = threading.Thread(target=time_elapsed_indicator) indicator_thread.start() return indicator_thread -def stop_progress_indicator(indicator_thread): - time_elapsed_indicator.stop = True +def stop_progress_indicator(indicator_thread, is_done: List[bool]): + is_done[0] = True indicator_thread.join() -def time_elapsed_indicator(): - start_time = time.time() - while not time_elapsed_indicator.stop: - elapsed_time = time.time() - start_time - print(f"\rTime elapsed: {elapsed_time:.2f} seconds", end="") - time.sleep(1) - print( - "\rTime elapsed: {elapsed_time:.2f} seconds".format(elapsed_time=elapsed_time) # type: ignore - ) - - def get_most_recent_file(directory, lang_code): pattern = re.compile( r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2" @@ -53,20 +52,23 @@ def get_most_recent_file(directory, lang_code): def load_templates(wiktlang: str): - print(f"Loading templates for {wiktlang}...") + dump_file = get_most_recent_file(DUMPS_DIR, wiktlang) + + if not dump_file: + raise ValueError(f"Could not find dump file for {wiktlang}.") + + print(f"Loading templates from {dump_file}...") - indicator_thread = start_progress_indicator() + is_done = [False] + indicator_thread = start_progress_indicator( + is_done, msg=f"Loading templates for {wiktlang}..." + ) wxr = get_wiktextract_context(wiktlang) wxr.wtp.db_conn.execute("DELETE FROM pages") wxr.wtp.db_conn.commit() - dump_file = get_most_recent_file(DUMPS_DIR, wiktlang) - - if not dump_file: - raise ValueError(f"Could not find dump file for {wiktlang}.") - parse_wiktionary( wxr, dump_file, @@ -80,6 +82,6 @@ def load_templates(wiktlang: str): ) wxr.wtp.db_conn.commit() - stop_progress_indicator(indicator_thread) + stop_progress_indicator(indicator_thread, is_done) - print("Done loading templates.") + print(f"Done loading templates for {wiktlang}.")