commit a0706c95c352087b67d452051ed916368b2910fe Author: friedemann.blume Date: Fri Jul 19 12:44:15 2024 +0200 init diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..72b17cc Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..968e59d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +mbox-files/* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..27c86cf --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use the official Python image from the Docker Hub +FROM python:3.11-slim + +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 + +# Create and set the working directory +WORKDIR /app + +# Copy the Python script and any other necessary files into the container +COPY mbox_to_markdown.py /app/ +COPY requirements.txt /app/ + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Define the command to run the Python script +CMD ["python", "mbox_to_markdown.py"] + diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..ac800ca --- /dev/null +++ b/Readme.md @@ -0,0 +1,7 @@ +## Mbox to Markdown converter + +docker build -t mbox-to-markdown . + +docker run --rm -v ./mbox-files:/mnt/input -v /path/to/output/directory:/mnt/output mbox-to-markdown python mbox_to_markdown.py /mnt/input/yourfile.mbox /mnt/output/ + + diff --git a/mbox_to_markdown.py b/mbox_to_markdown.py new file mode 100644 index 0000000..3db2446 --- /dev/null +++ b/mbox_to_markdown.py @@ -0,0 +1,31 @@ +import mailbox +import os +from markdownify import markdownify + +# Configuration +mbox_file = 'path/to/your/file.mbox' +output_dir = 'path/to/output/directory' + +# Ensure output directory exists +os.makedirs(output_dir, exist_ok=True) + +def save_email_as_markdown(email, index): + subject = email.get('subject', 'No Subject') + date = email.get('date', 'No Date') + body = email.get_payload(decode=True).decode(errors='ignore') + body_markdown = markdownify(body) + + # Create a Markdown file for each email + filename = os.path.join(output_dir, f'email_{index}.md') + with open(filename, 'w', encoding='utf-8') as file: + file.write(f'# {subject}\n') + file.write(f'*Date: {date}*\n\n') + file.write(body_markdown) + +def convert_mbox_to_markdown(mbox_file): + mbox = mailbox.mbox(mbox_file) + for i, email in enumerate(mbox): + save_email_as_markdown(email, i) + +convert_mbox_to_markdown(mbox_file) + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3fafbc9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +mailbox +markdownify