mirror of
https://github.com/Jieyab89/OSINT-Cheat-sheet.git
synced 2025-12-12 07:40:57 -08:00
87 lines
2.8 KiB
Python
87 lines
2.8 KiB
Python
import requests
|
|
import json
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
|
|
# Arr URLs
|
|
urls = {
|
|
"readme": "https://raw.githubusercontent.com/Jieyab89/OSINT-Cheat-sheet/refs/heads/main/README.md",
|
|
"wiki": "https://github.com/Jieyab89/OSINT-Cheat-sheet/wiki",
|
|
"articles": "https://raw.githubusercontent.com/Jieyab89/OSINT-Cheat-sheet/main/awesome-article.md"
|
|
}
|
|
|
|
json_file = os.path.join(os.pardir, "osint_data.json")
|
|
|
|
if os.path.exists(json_file):
|
|
with open(json_file, "r", encoding="utf-8") as f:
|
|
try:
|
|
existing_data = json.load(f)
|
|
except json.JSONDecodeError:
|
|
existing_data = []
|
|
else:
|
|
existing_data = []
|
|
|
|
existing_categories = {cat["category"]: cat for cat in existing_data}
|
|
new_categories = {}
|
|
current_category = None
|
|
|
|
def parse_markdown_md(url):
|
|
response = requests.get(url)
|
|
if response.status_code != 200:
|
|
print(f"[-] Failed to fetch: {url}")
|
|
return {}
|
|
|
|
parsed = {}
|
|
current_cat = None
|
|
|
|
for line in response.text.split("\n"):
|
|
line = line.strip()
|
|
|
|
if line.startswith("# "):
|
|
current_cat = line[2:].strip()
|
|
parsed[current_cat] = {"category": current_cat, "items": []}
|
|
|
|
elif line.startswith("- [") and "](" in line:
|
|
parts = line.split("[", 1)[1].split("](")
|
|
name = parts[0].strip()
|
|
link = parts[1].split(")")[0].strip()
|
|
if current_cat:
|
|
parsed[current_cat]["items"].append({"name": name, "url": link})
|
|
|
|
return parsed
|
|
|
|
def parse_github_wiki(url):
|
|
wiki_items = []
|
|
response = requests.get(url)
|
|
if response.status_code != 200:
|
|
print(f"[-] Failed to fetch: {url}")
|
|
return {}
|
|
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
for link in soup.select(".wiki-pages-box a"):
|
|
title = link.text.strip()
|
|
page_url = "https://github.com" + link["href"]
|
|
wiki_items.append({"name": title, "url": page_url})
|
|
|
|
if wiki_items:
|
|
return {"Jieyab89 Wiki Pages": {"category": "Jieyab89 Wiki Pages", "items": wiki_items}}
|
|
return {}
|
|
|
|
new_categories.update(parse_markdown_md(urls["readme"]))
|
|
new_categories.update(parse_markdown_md(urls["articles"]))
|
|
new_categories.update(parse_github_wiki(urls["wiki"]))
|
|
|
|
for category, new_data in new_categories.items():
|
|
if category in existing_categories:
|
|
existing_items = {item["name"] for item in existing_categories[category]["items"]}
|
|
for new_item in new_data["items"]:
|
|
if new_item["name"] not in existing_items:
|
|
existing_categories[category]["items"].append(new_item)
|
|
else:
|
|
existing_categories[category] = new_data
|
|
|
|
with open(json_file, "w", encoding="utf-8") as f:
|
|
json.dump(list(existing_categories.values()), f, indent=4, ensure_ascii=False)
|
|
|
|
print(f"[+] Data updated: {json_file}")
|