import os
import re
import threading
import time
from typing import List

from wiktextract.wiktionary import parse_wiktionary

from wiktextract_context import get_wiktextract_context

DUMPS_DIR = "dumps"

RECOGNIZED_NAMESPACE_NAMES = [
    "Main",
    "Category",
    "Appendix",
    "Project",
    "Thesaurus",
    "Module",
    "Template",
    "Reconstruction",
]


def start_progress_indicator(is_done: List[bool], msg: str = ""):
    is_done[0] = False

    def time_elapsed_indicator():
        start_time = time.time()
        while not is_done[0]:
            elapsed_time = time.time() - start_time
            print(f"\r{msg} Time elapsed: {elapsed_time:.0f} seconds", end="")
            time.sleep(1)
        print(f"{msg} Time elapsed: {elapsed_time:.0f} seconds")  # type: ignore

    indicator_thread = threading.Thread(target=time_elapsed_indicator)
    indicator_thread.start()

    return indicator_thread


def stop_progress_indicator(indicator_thread, is_done: List[bool]):
    is_done[0] = True
    indicator_thread.join()


def get_most_recent_file(directory, lang_code):
    pattern = re.compile(
        r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2"
    )

    matching_files = [f for f in os.listdir(directory) if pattern.match(f)]

    if not matching_files:
        return None

    most_recent_file = sorted(
        matching_files, key=lambda x: pattern.match(x).group(1), reverse=True  # type: ignore
    )[0]

    return os.path.join(directory, most_recent_file)


def load_templates(wiktlang: str):
    dump_file = get_most_recent_file(DUMPS_DIR, wiktlang)

    if not dump_file:
        raise ValueError(f"Could not find dump file for {wiktlang}.")

    print(f"Loading templates from {dump_file}...")

    is_done = [False]
    indicator_thread = start_progress_indicator(
        is_done, msg=f"Loading templates for {wiktlang}..."
    )

    wxr = get_wiktextract_context(wiktlang)

    wxr.wtp.db_conn.execute("DELETE FROM pages")
    wxr.wtp.db_conn.commit()

    namespace_ids = {
        wxr.wtp.NAMESPACE_DATA.get(name, {}).get("id")
        for name in RECOGNIZED_NAMESPACE_NAMES
    }
    parse_wiktionary(
        wxr,
        dump_file,
        num_processes=1,
        phase1_only=True,
        namespace_ids=namespace_ids,
        out_f=None,  # type: ignore
    )
    wxr.wtp.db_conn.commit()
    wxr.wtp.close_db_conn()

    stop_progress_indicator(indicator_thread, is_done)

    print(f"Done loading templates for {wiktlang}.")


if __name__ == "__main__":
    import config

    for wiktlang in config.supported_wiktlangs:
        load_templates(wiktlang)
