import os
import re

maya_root = "bible_data/jwBibleMaya"
spanish_root = "bible_data/jwBibleSpanish"

# Maps: chapter_id -> path
maya_chapters = {}
spanish_chapters = {}

# Walk through all Maya chapter files
for book in os.listdir(maya_root):
    book_path = os.path.join(maya_root, book)
    if not os.path.isdir(book_path):
        continue
    for fname in os.listdir(book_path):
        if fname.endswith(".txt") and "_ch" in fname:
            chapter_id = fname.replace("_REDO", "").replace(".txt", "")  # e.g., "1-corintios_ch10"
            maya_chapters[chapter_id] = os.path.join(book_path, fname)

# Walk through all Spanish chapter files
for book in os.listdir(spanish_root):
    book_path = os.path.join(spanish_root, book)
    if not os.path.isdir(book_path):
        continue
    for fname in os.listdir(book_path):
        if fname.endswith("_es.txt") and "_ch" in fname:
            chapter_id = fname.replace("_es.txt", "")  # e.g., "1-corintios_ch10"
            spanish_chapters[chapter_id] = os.path.join(book_path, fname)

# Match and compare chapters
total_maya_verses = 0
total_spanish_verses = 0
mismatches = []

all_chapters = sorted(set(maya_chapters.keys()) & set(spanish_chapters.keys()))

for chapter_id in all_chapters:
    maya_path = maya_chapters[chapter_id]
    spanish_path = spanish_chapters[chapter_id]

    with open(maya_path, encoding='utf-8') as f:
        maya_lines = [l for l in f if l.strip()]
    with open(spanish_path, encoding='utf-8') as f:
        spanish_lines = [l for l in f if l.strip()]

    len_maya = len(maya_lines)
    len_spanish = len(spanish_lines)
    total_maya_verses += len_maya
    total_spanish_verses += len_spanish

    if len_maya != len_spanish:
        mismatches.append((chapter_id, len_maya, len_spanish))
        print(f"{chapter_id}: {len_maya} Maya vs {len_spanish} Spanish")

# Report summary
print("\n Totals:")
print(f"Total Maya verses:    {total_maya_verses}")
print(f"Total Spanish verses: {total_spanish_verses}")

if total_maya_verses == total_spanish_verses:
    print("Total verse counts match!")
else:
    print("Total verse counts do NOT match")

# Report mismatches
if mismatches:
    print("\nMismatched chapters:")
    for chapter_id, len_m, len_s in mismatches:
        print(f"{chapter_id}: {len_m} Maya vs {len_s} Spanish")

# Report unmatched chapters
unmatched_maya = set(maya_chapters.keys()) - set(spanish_chapters.keys())
unmatched_spanish = set(spanish_chapters.keys()) - set(maya_chapters.keys())
if unmatched_maya or unmatched_spanish:
    print("\nUnmatched files:")
    for ch in sorted(unmatched_maya):
        print(f"Only in Maya: {ch}")
    for ch in sorted(unmatched_spanish):
        print(f"Only in Spanish: {ch}")

