From a0706c95c352087b67d452051ed916368b2910fe Mon Sep 17 00:00:00 2001 From: "friedemann.blume" Date: Fri, 19 Jul 2024 12:44:15 +0200 Subject: [PATCH] init --- .DS_Store | Bin 0 -> 6148 bytes .gitignore | 1 + Dockerfile | 20 ++++++++++++++++++++ Readme.md | 7 +++++++ mbox_to_markdown.py | 31 +++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 6 files changed, 61 insertions(+) create mode 100644 .DS_Store create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 Readme.md create mode 100644 mbox_to_markdown.py create mode 100644 requirements.txt diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..72b17ccdcc496bb3dfa8620afb2f553d09a20278 GIT binary patch literal 6148 zcmeHK-AV#M6rNGb5ed8~>LwV7t`gD<6^URodx2K+qQcx(bZOmLbj=G9_O@QAH|P<1 zp3eLzirHmKoC9aRGv{Y^=G$dvhY&(*BezIMju65?B_=Y^{2($ebwv`!vkOS&7|wVY zI=*nA|FM#dj%{Q>zPl7@kPhjSaPj>)q3;c&Lg9-hlBu2DG)uEAd$fCEY)9>AG_17; z_p;Xz!gpqN+qv^P<5q6}S_Dzs3pxW;;JF=0d1!ipE5lE zN}iW0(>$M4OQk$7m#fohh8>)oU);7{{C*(bG=~&EHzj)pXYdZjd^vu%Xg`a7I14Vu z`l4PppDK&bj+>XKpI6Iq_t$vXTR--zJqUaebYZ;NJQt>bCNh8wAOo9cz-&$|yZNc$ z43Gh2V9OYg`vV7+Xc>$(s;vVWbp-%Sfm;dK@-883xIxQctPvt0RF?wkQfjUkRF{LF zn>@>4tWlQ}YAzqtJeiss3RO>s@wrMT)Y6D8GJp&$GmzBXoIL+;*5Ch^i?D|bAOru3 z0h+4Y^%^Y6oUJR1lV>dleFBvt^J0zj5-`+J47u_sJ_1z&el86_%V4Y#JRtZZplHAb J8TeBMz5(?YVb1^n literal 0 HcmV?d00001 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..968e59d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +mbox-files/* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..27c86cf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use the official Python image from the Docker Hub +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Create and set the working directory +WORKDIR /app + +# Copy the Python script and any other necessary files into the container +COPY mbox_to_markdown.py /app/ +COPY requirements.txt /app/ + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Define the command to run the Python script +CMD ["python", "mbox_to_markdown.py"] + diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..ac800ca --- /dev/null +++ b/Readme.md @@ -0,0 +1,7 @@ +## Mbox to Markdown converter + +docker build -t mbox-to-markdown . + +docker run --rm -v ./mbox-files:/mnt/input -v /path/to/output/directory:/mnt/output mbox-to-markdown python mbox_to_markdown.py /mnt/input/yourfile.mbox /mnt/output/ + + diff --git a/mbox_to_markdown.py b/mbox_to_markdown.py new file mode 100644 index 0000000..3db2446 --- /dev/null +++ b/mbox_to_markdown.py @@ -0,0 +1,31 @@ +import mailbox +import os +from markdownify import markdownify + +# Configuration +mbox_file = 'path/to/your/file.mbox' +output_dir = 'path/to/output/directory' + +# Ensure output directory exists +os.makedirs(output_dir, exist_ok=True) + +def save_email_as_markdown(email, index): + subject = email.get('subject', 'No Subject') + date = email.get('date', 'No Date') + body = email.get_payload(decode=True).decode(errors='ignore') + body_markdown = markdownify(body) + + # Create a Markdown file for each email + filename = os.path.join(output_dir, f'email_{index}.md') + with open(filename, 'w', encoding='utf-8') as file: + file.write(f'# {subject}\n') + file.write(f'*Date: {date}*\n\n') + file.write(body_markdown) + +def convert_mbox_to_markdown(mbox_file): + mbox = mailbox.mbox(mbox_file) + for i, email in enumerate(mbox): + save_email_as_markdown(email, i) + +convert_mbox_to_markdown(mbox_file) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3fafbc9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +mailbox +markdownify