ISSN: 2155-9570
if not found_any: print( "đŤ No openly available PDF could be located.\n" "What you can do next:\n" " ⢠Ask your teacher for a classâcopy (many schools have a digital licence).\n" " ⢠Request the title through your school or public libraryâs interâlibrary loan.\n" " ⢠Purchase the official printed edition or an authorised eâbook from the publisher.\n" " ⢠Check the Romanian Ministry of Education portal â sometimes textbooks are released for free during exam years.\n" )
If none of the steps finds a legal PDF, the tool politely suggests the next best options (e.g., request a copy from your teacher, use a schoolâlibrary interâlibrary loan, or buy a printed copy). Tip: You can run the script on a laptop, a Raspberry Pi, or even inside a Jupyter notebook. No special libraries beyond requests and beautifulsoup4 are required. #!/usr/bin/env python3 """ LegalâPDF Finder for Romanian school textbooks. Works for: "Manual de psihologie clasa a X-a" (Editura Aramis) """
# ---------------------------------------------------------------------- # 1ď¸âŁ CONFIGURATION # ---------------------------------------------------------------------- TITLE = "Manual de psihologie clasa a X-a" PUBLISHER_URL = "https://www.editura-aramis.ro/search?q={}" WORLD_CAT_URL = "https://www.worldcat.org/search?q={}" GOOGLE_SEARCH = "https://www.google.com/search?q={}" HEADERS = "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/124.0 Safari/537.36" )
def check_commercial(): """Look for a paid eâbook version on major Romanian retailers.""" retailers = "eMAG": f"https://www.emag.ro/search/urllib.parse.quote_plus(TITLE)", "Carturesti": f"https://www.carte-romanesti.ro/cautare?search=urllib.parse.quote_plus(TITLE)", results = [] for name, url in retailers.items(): r = safe_get(url) if not r: continue if "pdf" in r.text.lower() or "ebook" in r.text.lower(): results.append("source": name, "link": url, "type": "purchase") return results if results else None manual de psihologie clasa a x a editura aramis pdf
soup = BeautifulSoup(r.text, "html.parser") # The exact HTML structure may change â adjust the selector if needed. for a in soup.select("a"): href = a.get("href", "") txt = a.get_text(strip=True).lower() if "pdf" in txt or "ebook" in txt or "download" in txt: full = urllib.parse.urljoin(url, href) return "source": "Editura Aramis", "link": full, "type": "official" return None
# ----------------------------------------------------------------------
import requests from bs4 import BeautifulSoup import urllib.parse import json import sys import time if not found_any: print( "đŤ No openly available
soup = BeautifulSoup(r.text, "html.parser") # Look for a line that says "Full text available" for div in soup.select("div.resultItem"): if "full text" in div.text.lower(): link = div.select_one("a")["href"] return "source": "WorldCat", "link": link, "type": "library loan" return None
found_any = False for label, func in steps: print(f"âł labelâŚ") res = func() time.sleep(0.7) # polite delay for the next request if not res: print(" â No legal PDF found in this step.\n") continue
found_any = True # `res` may be a list (retailers) or a dict (single result) if isinstance(res, list): for r in res: print(f" â r['source']: r['link'] [r['type']]") else: print(f" â res['source']: res['link'] [res['type']]") print() x64) " "AppleWebKit/537.36 (KHTML
soup = BeautifulSoup(r.text, "html.parser") for g in soup.select("div.g"): a = g.select_one("a") if not a: continue link = a["href"] # Google wraps URLs like /url?q=...; strip that if link.startswith("/url?q="): link = urllib.parse.parse_qs(link[7:])["q"][0] # Quick sanity check â must end with .pdf if link.lower().endswith(".pdf"): return "source": "Google (trusted domains)", "link": link, "type": "free" return None
def check_publisher(): """Look for an official eâbook / PDF on Editura Aramis.""" query = urllib.parse.quote_plus(TITLE) url = PUBLISHER_URL.format(query) r = safe_get(url) if not r: return None
def check_worldcat(): """Search WorldCat for a library that holds a digital copy.""" query = urllib.parse.quote_plus(TITLE + " pdf") url = WORLD_CAT_URL.format(query) r = safe_get(url) if not r: return None
def safe_get(url): """Simple wrapper that retries once on failure.""" try: r = requests.get(url, headers=HEADERS, timeout=12) r.raise_for_status() return r except Exception as e: print(f"â ď¸ Request failed (url): e", file=sys.stderr) return None
def google_safe_search(): """Google limited to trusted domains; we only scrape the first page.""" query = urllib.parse.quote_plus( f'"TITLE" filetype:pdf site:.edu OR site:.gov OR site:.org' ) url = GOOGLE_SEARCH.format(query) r = safe_get(url) if not r: return None