bueze-menu/app.py
2024-12-03 11:02:03 +01:00

84 lines
2.4 KiB
Python

import hashlib
import io
from pathlib import Path
from typing import Tuple
import pytesseract
import requests
from PIL import Image
from bs4 import BeautifulSoup
from flask import Flask, render_template, make_response
app = Flask(__name__)
url = 'https://bueze.de/unser-mittagstisch/'
image_hash_file = Path('.menu_image_hash')
menu_text = Path("./menu.txt")
def find_image_url():
with requests.get(url) as response:
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
image = soup.find('figure', class_='wp-block-image').find('img')
return image.get('src')
def get_image_bytes(image_url):
with requests.get(image_url, stream=True) as response:
response.raise_for_status()
return response.raw.read()
def is_same_hash(img_bytes):
if image_hash_file.exists():
previous_hash = image_hash_file.read_text()
new_hash = hashlib.md5(img_bytes).hexdigest()
return previous_hash == new_hash
return False
def write_html(text):
with menu_text.open('w') as f:
f.write(text)
def get_menu() -> Tuple[Path, str]:
"""
Get the menu text and image URL.
:return: menu_text, image_url
"""
image_url = find_image_url()
image_bytes = get_image_bytes(image_url)
# Check if the image has changed
if is_same_hash(image_bytes) and menu_text.exists():
print('No new image')
# If the image has changed, write the new hash to the file and extract the text
else:
image_hash_file.write_text(hashlib.md5(image_bytes).hexdigest())
image = Image.open(io.BytesIO(image_bytes))
text = pytesseract.image_to_string(image, lang='deu')
print('New image found')
write_html(text)
return menu_text, image_url
@app.route('/')
def root():
"""
This view function returns the menu text and image as a web page.
"""
text, image = get_menu()
lines = menu_text.read_text().splitlines()
html = render_template('base.html',
lines=lines,
img=image,
text=text.read_text())
return make_response(html, 200)
@app.route('/plain')
def plain():
"""
This view function returns the menu text as plain text.
"""
text, _ = get_menu()
response = make_response(text.read_text(), 200)
response.mimetype = 'text/plain'
return response