def extract_verses(soup):
    verses = []

    # Psalms-style structure: fallback
    verse_spans = soup.find_all("span", class_="verse")
    if verse_spans:
        for v in verse_spans:
            # Strip verse number and get only the real verse text
            for sup in v.find_all("sup", class_="verseNum"):
                sup.decompose()
            verse_text = v.get_text(" ", strip=True)
            if verse_text:
                verses.append(verse_text)
        return verses

    # Default structure: Genesis-style
    for li in soup.find_all("li", class_="L2"):
        p = li.find("p")
        if p:
            verse = p.get_text(" ", strip=True)
            if verse:
                verses.append(verse)

    return verses


import requests
from bs4 import BeautifulSoup
import os

def scrape_bible_chapters(book_slug, book_display_name, num_chapters, lang_code="es"):
    base_url = f"https://www.jw.org/{lang_code}/biblioteca/biblia/nwt/libros/{book_slug}/"
    output_dir = f"{book_slug.lower()}_verses_{lang_code}"
    os.makedirs(output_dir, exist_ok=True)

    for chapter in range(1, num_chapters + 1):
        chapter_url = f"{base_url}{chapter}/"
        print(f"Scraping {book_display_name} Chapter {chapter}...")
        response = requests.get(chapter_url)

        if response.status_code != 200:
            print(f"Failed to fetch chapter {chapter}: {response.status_code}")
            continue

        soup = BeautifulSoup(response.content, "html.parser")

        verses = extract_verses(soup)

        filename = os.path.join(output_dir, f"{book_slug.lower()}_ch{chapter}_es.txt")
        with open(filename, "w", encoding="utf-8") as f:
            for verse in verses:
                f.write(verse + "\n")

        print(f"Saved {len(verses)} verses to {filename}")

bible_books = [
    ("2-corintios", 13),
    ("Gálatas", 6),
    ("Efesios", 6),
    ("Filipenses", 4),
    ("Colosenses", 4),
    ("1-tesalonicenses", 5),
    ("2-tesalonicenses", 3),
    ("1-timoteo", 6),
    ("2-timoteo", 4),
    ("Tito", 3),
    ("Filemón", 1),
    ("Hebreos", 13),
    ("Santiago", 5),
    ("1-pedro", 5),
    ("2-pedro", 3),
    ("1-juan", 5),
    ("2-juan", 1),
    ("3-juan", 1),
    ("Judas", 1),
    ("Apocalipsis", 22)
]

if __name__ == "__main__":
    for book_name, chapter_count in bible_books:
        scrape_bible_chapters(book_slug=book_name, book_display_name=book_name, num_chapters=chapter_count)
