71 lines
1.9 KiB
Python
71 lines
1.9 KiB
Python
#
|
|
# books.py
|
|
# Gera um arquivo .sql com INSERTs de livros obtidos do Project Gutemberg
|
|
# https://gutendex.com/
|
|
#
|
|
|
|
import http.client
|
|
import json
|
|
import time
|
|
from urllib.parse import urlparse
|
|
|
|
API_URL = "https://gutendex.com/books/"
|
|
BOOKS_COUNT = 1000
|
|
FETCH_DELAY = 1#s
|
|
OUTPUT = "./books.sql"
|
|
|
|
def fetch(resource):
|
|
url = urlparse(resource)
|
|
client = http.client.HTTPSConnection if url.scheme == "https" else http.client.HTTPConnection
|
|
conn = client(url.netloc)
|
|
|
|
conn.request("GET", f"{url.path}?{url.query}")
|
|
res = conn.getresponse()
|
|
|
|
if res.status < 200 or res.status > 299:
|
|
return {}
|
|
|
|
return json.loads(res.read())
|
|
|
|
def write_inserts(file, page):
|
|
data = fetch(f"{API_URL}?page={page}")
|
|
books = data["results"]
|
|
lines = []
|
|
|
|
esc = lambda str: str.replace('"', '\\"')
|
|
|
|
for book in books:
|
|
summaries = book["summaries"]
|
|
imgs = [book["formats"][f] for f in book["formats"] if f.startswith("image/")]
|
|
|
|
isbn = ""
|
|
titulo = book["title"]
|
|
autor = ",".join([a["name"] for a in book["authors"]])
|
|
genero = ",".join(book["subjects"])
|
|
descricao = summaries[0] if len(summaries) > 0 else ""
|
|
foto = imgs[0] if len(imgs) > 0 else ""
|
|
keywords = ",".join(book["bookshelves"])
|
|
|
|
lines.append(f'("{esc(isbn)}", "{esc(titulo)}", "{esc(autor)}", "{esc(genero)}", "{esc(descricao)}", "{esc(foto)}", "{esc(keywords)}", true, NOW(), NOW())')
|
|
|
|
values = ",\n".join(lines)
|
|
insert = f"INSERT INTO Livro (Isbn, Titulo, Autor, Genero, Descricao, Foto, Keywords, Ativo, CriadoEm, AtualizadoEm) VALUES {values};\n"
|
|
|
|
file.write(insert)
|
|
|
|
return len(lines)
|
|
|
|
def collect(n, file, _page=1):
|
|
if n <= 0:
|
|
return
|
|
|
|
written = write_inserts(file, _page)
|
|
|
|
time.sleep(FETCH_DELAY)
|
|
collect(n-written, file, _page+1)
|
|
|
|
def run():
|
|
with open(OUTPUT, "w") as file:
|
|
collect(BOOKS_COUNT, file)
|
|
|
|
run() |