import requests
from bs4 import BeautifulSoup
import json


def scrape_forum(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    postlefts = soup.find_all("div", class_="postleft")
    postmsgs = soup.find_all("div", class_="postmsg")

    results = []

   
    mois_map = {
        "janvier": "01",
        "février": "02",
        "mars": "03",
        "avril": "04",
        "mai": "05",
        "juin": "06",
        "juillet": "07",
        "août": "08",
        "septembre": "09",
        "octobre": "10",
        "novembre": "11",
        "décembre": "12"
    }

    for left, msg in zip(postlefts, postmsgs):
        auteur_tag = left.find("strong")
        auteur = auteur_tag.get_text(strip=True) if auteur_tag else "Inconnu"

        dt_tags = left.find_all("dt")
        date_str = dt_tags[-1].get_text(strip=True) if dt_tags else "Date inconnue"


        try:
            parts = date_str.split()

            jour = parts[0]
            mois = mois_map.get(parts[1].lower(), "01")
            annee = parts[2]

            date_iso = f"{annee}-{mois}-{jour.zfill(2)}"
        except:
            date_iso = ""


        texte = msg.get_text(separator="\n", strip=True)

        results.append({
            "author": auteur,
            "date": date_str,
            "date_iso": date_iso,
            "content": texte,
            "tags": ["alcool","nalméfène","nalmefene"]
        })

    return results


def append_to_html(posts, output_file="scrappharma.html", source_url=""):
    try:
        with open(output_file, "r", encoding="utf-8") as f:
            soup = BeautifulSoup(f.read(), "html.parser")
    except FileNotFoundError:
        soup = BeautifulSoup("""
        <html>
        <head>
            <meta charset="UTF-8">
            <title>Scrap Pharma</title>
            <style>
                .post {
                    border: 1px solid #ccc;
                    padding: 10px;
                    margin-bottom: 15px;
                    max-width: 800px;
                }
                pre {
                    white-space: pre-wrap;
                    word-wrap: break-word;
                }
            </style>
        </head>
        <body>
            <h1>Archive des posts</h1>
            <div id="posts-container"></div>
        </body>
        </html>
        """, "html.parser")

    container = soup.find("div", id="posts-container")

    if container is None:
        container = soup.new_tag("div", id="posts-container")
        soup.body.append(container)

    for post in posts:
        post_div = soup.new_tag("div", **{"class": "post"})

        author = soup.new_tag("div", **{"class": "pseudo"})
        author.string = f" {post['author']}"

        date = soup.new_tag("div", **{"class": "date"})
        date.string = f" {post['date']}"

        content = soup.new_tag("pre")
        content.string = post["content"]

        post_div.append(author)
        post_div.append(date)
        post_div.append(content)

        container.append(post_div)

    with open(output_file, "w", encoding="utf-8") as f:
        f.write(soup.prettify())


def export_to_json(posts, output_file="scrappharma.json"):
    try:
        with open(output_file, "r", encoding="utf-8") as f:
            existing_posts = json.load(f)
    except (FileNotFoundError, json.JSONDecodeError):
        existing_posts = []

    all_posts = existing_posts + posts

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(all_posts, f, ensure_ascii=False, indent=4)


if __name__ == "__main__":
    url = "https://www.psychoactif.org/forum/t16038-p1-Selincro-avez-vous-reussi-baisser-vos-consommations-alcool.html"

    posts = scrape_forum(url)

    append_to_html(posts, source_url=url)
    export_to_json(posts)

    print(f"{len(posts)} posts ajoutés dans scrappharma.html et scrappharma.json")