OSINT-Cheat-sheet/Web-Based/gen-node/node-generate.py

import requests
import json
import os
from bs4 import BeautifulSoup

# Arr URLs
urls = {
    "readme": "https://raw.githubusercontent.com/Jieyab89/OSINT-Cheat-sheet/refs/heads/main/README.md",
    "wiki": "https://github.com/Jieyab89/OSINT-Cheat-sheet/wiki",
    "articles": "https://raw.githubusercontent.com/Jieyab89/OSINT-Cheat-sheet/main/awesome-article.md"
}

json_file = os.path.join(os.pardir, "osint_data.json")

if os.path.exists(json_file):
    with open(json_file, "r", encoding="utf-8") as f:
        try:
            existing_data = json.load(f)
        except json.JSONDecodeError:
            existing_data = []
else:
    existing_data = []

existing_categories = {cat["category"]: cat for cat in existing_data}
new_categories = {}
current_category = None

def parse_markdown_md(url):
    response = requests.get(url)
    if response.status_code != 200:
        print(f"[-] Failed to fetch: {url}")
        return {}

    parsed = {}
    current_cat = None

    for line in response.text.split("\n"):
        line = line.strip()

        if line.startswith("# "):
            current_cat = line[2:].strip()
            parsed[current_cat] = {"category": current_cat, "items": []}

        elif line.startswith("- [") and "](" in line:
            parts = line.split("[", 1)[1].split("](")
            name = parts[0].strip()
            link = parts[1].split(")")[0].strip()
            if current_cat:
                parsed[current_cat]["items"].append({"name": name, "url": link})

    return parsed

def parse_github_wiki(url):
    wiki_items = []
    response = requests.get(url)
    if response.status_code != 200:
        print(f"[-] Failed to fetch: {url}")
        return {}

    soup = BeautifulSoup(response.text, "html.parser")
    for link in soup.select(".wiki-pages-box a"):
        title = link.text.strip()
        page_url = "https://github.com" + link["href"]
        wiki_items.append({"name": title, "url": page_url})

    if wiki_items:
        return {"Jieyab89 Wiki Pages": {"category": "Jieyab89 Wiki Pages", "items": wiki_items}}
    return {}

new_categories.update(parse_markdown_md(urls["readme"]))
new_categories.update(parse_markdown_md(urls["articles"]))
new_categories.update(parse_github_wiki(urls["wiki"]))

for category, new_data in new_categories.items():
    if category in existing_categories:
        existing_items = {item["name"] for item in existing_categories[category]["items"]}
        for new_item in new_data["items"]:
            if new_item["name"] not in existing_items:
                existing_categories[category]["items"].append(new_item)
    else:
        existing_categories[category] = new_data

with open(json_file, "w", encoding="utf-8") as f:
    json.dump(list(existing_categories.values()), f, indent=4, ensure_ascii=False)

print(f"[+] Data updated: {json_file}")