# # books.py # Gera um arquivo .sql com INSERTs de livros obtidos do Project Gutemberg # https://gutendex.com/ # import http.client import json import time from urllib.parse import urlparse API_URL = "https://gutendex.com/books/" BOOKS_COUNT = 1000 FETCH_DELAY = 1#s OUTPUT = "./books.sql" def fetch(resource): url = urlparse(resource) client = http.client.HTTPSConnection if url.scheme == "https" else http.client.HTTPConnection conn = client(url.netloc) conn.request("GET", f"{url.path}?{url.query}") res = conn.getresponse() if res.status < 200 or res.status > 299: return {} return json.loads(res.read()) def write_inserts(file, page): data = fetch(f"{API_URL}?page={page}") books = data["results"] lines = [] esc = lambda str: str.replace('"', '\\"') for book in books: summaries = book["summaries"] imgs = [book["formats"][f] for f in book["formats"] if f.startswith("image/")] isbn = "" titulo = book["title"] autor = ",".join([a["name"] for a in book["authors"]]) genero = ",".join(book["subjects"]) descricao = summaries[0] if len(summaries) > 0 else "" foto = imgs[0] if len(imgs) > 0 else "" keywords = ",".join(book["bookshelves"]) lines.append(f'("{esc(isbn)}", "{esc(titulo)}", "{esc(autor)}", "{esc(genero)}", "{esc(descricao)}", "{esc(foto)}", "{esc(keywords)}", true, NOW(), NOW())') values = ",\n".join(lines) insert = f"INSERT INTO Livro (Isbn, Titulo, Autor, Genero, Descricao, Foto, Keywords, Ativo, CriadoEm, AtualizadoEm) VALUES {values};\n" file.write(insert) return len(lines) def collect(n, file, _page=1): if n <= 0: return written = write_inserts(file, _page) time.sleep(FETCH_DELAY) collect(n-written, file, _page+1) def run(): with open(OUTPUT, "w") as file: collect(BOOKS_COUNT, file) run()