commit 9f58e25f10061584170c317d6e982385bea7c37b Author: Marc Koch Date: Tue Dec 3 11:02:03 2024 +0100 🎉 initial commit diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..e105d51 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +__pycache__/ +venv/ +.git/ +*.pyc + +.menu_image_hash +menu.txt \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..379d058 --- /dev/null +++ b/.gitignore @@ -0,0 +1,152 @@ +# Created by https://www.toptal.com/developers/gitignore/api/flask +# Edit at https://www.toptal.com/developers/gitignore?templates=flask + +### Flask ### +instance/* +!instance/.gitignore +.webassets-cache +.env + +### Flask.Python Stack ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm files +.idea/ + +# Generated files +.menu_image_hash +menu.txt + + + diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/bueze-menu.iml b/.idea/bueze-menu.iml new file mode 100644 index 0000000..73abe8b --- /dev/null +++ b/.idea/bueze-menu.iml @@ -0,0 +1,14 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..223075d --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,69 @@ + + + + \ No newline at end of file diff --git a/.idea/material_theme_project_new.xml b/.idea/material_theme_project_new.xml new file mode 100644 index 0000000..8fc3396 --- /dev/null +++ b/.idea/material_theme_project_new.xml @@ -0,0 +1,10 @@ + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..903a1b2 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,10 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..4d7c35e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim-bookworm + +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +WORKDIR /app + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y tesseract-ocr tesseract-ocr-deu && rm -rf /var/lib/apt/lists/* + +COPY . . + +EXPOSE 5000 + +CMD ["flask", "--app", "app", "run", "--host=0.0.0.0"] \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..d1d9b30 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# bueze-menu diff --git a/app.py b/app.py new file mode 100644 index 0000000..ff59a47 --- /dev/null +++ b/app.py @@ -0,0 +1,84 @@ +import hashlib +import io +from pathlib import Path +from typing import Tuple + +import pytesseract +import requests +from PIL import Image +from bs4 import BeautifulSoup +from flask import Flask, render_template, make_response + +app = Flask(__name__) + +url = 'https://bueze.de/unser-mittagstisch/' +image_hash_file = Path('.menu_image_hash') +menu_text = Path("./menu.txt") + +def find_image_url(): + with requests.get(url) as response: + response.raise_for_status() + soup = BeautifulSoup(response.text, 'html.parser') + image = soup.find('figure', class_='wp-block-image').find('img') + return image.get('src') + +def get_image_bytes(image_url): + with requests.get(image_url, stream=True) as response: + response.raise_for_status() + return response.raw.read() + +def is_same_hash(img_bytes): + if image_hash_file.exists(): + previous_hash = image_hash_file.read_text() + new_hash = hashlib.md5(img_bytes).hexdigest() + return previous_hash == new_hash + return False + +def write_html(text): + with menu_text.open('w') as f: + f.write(text) + +def get_menu() -> Tuple[Path, str]: + """ + Get the menu text and image URL. + :return: menu_text, image_url + """ + image_url = find_image_url() + image_bytes = get_image_bytes(image_url) + + # Check if the image has changed + if is_same_hash(image_bytes) and menu_text.exists(): + print('No new image') + + # If the image has changed, write the new hash to the file and extract the text + else: + image_hash_file.write_text(hashlib.md5(image_bytes).hexdigest()) + image = Image.open(io.BytesIO(image_bytes)) + text = pytesseract.image_to_string(image, lang='deu') + print('New image found') + write_html(text) + + return menu_text, image_url + +@app.route('/') +def root(): + """ + This view function returns the menu text and image as a web page. + """ + text, image = get_menu() + lines = menu_text.read_text().splitlines() + html = render_template('base.html', + lines=lines, + img=image, + text=text.read_text()) + return make_response(html, 200) + +@app.route('/plain') +def plain(): + """ + This view function returns the menu text as plain text. + """ + text, _ = get_menu() + response = make_response(text.read_text(), 200) + response.mimetype = 'text/plain' + return response diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..49ae3ce --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +beautifulsoup4==4.12.3 +blinker==1.9.0 +certifi==2024.8.30 +charset-normalizer==3.4.0 +click==8.1.7 +Flask==3.1.0 +idna==3.10 +itsdangerous==2.2.0 +Jinja2==3.1.4 +Markdown==3.7 +MarkupSafe==3.0.2 +packaging==24.2 +pillow==11.0.0 +pytesseract==0.3.13 +requests==2.32.3 +soupsieve==2.6 +urllib3==2.2.3 +Werkzeug==3.1.3 diff --git a/static/style.css b/static/style.css new file mode 100644 index 0000000..1b01cd4 --- /dev/null +++ b/static/style.css @@ -0,0 +1,36 @@ +body { + background-color: #f0f0f0; + font-family: Arial, sans-serif; +} + +.container { + width: 80%; + margin: 6em auto; + display: flex; + justify-content: space-between; + flex-wrap: wrap; +} + +.menu { + width: 60%; + flex: 1; + margin-right: 10em; + text-align: center; +} + +@media (max-width: 1500px) { + .container { + flex-direction: column; + align-items: center; + } + + .menu { + margin-right: 0; + margin-bottom: 3em; + } + + img { + max-width: 100%; + margin-top: 3em; + } +} \ No newline at end of file diff --git a/templates/base.html b/templates/base.html new file mode 100644 index 0000000..de3f22b --- /dev/null +++ b/templates/base.html @@ -0,0 +1,25 @@ + + + + + BüzE Menü + + + +
+ + {{ text }} +
+ + \ No newline at end of file