Created a script to update the csv imports that don't have the date

This commit is contained in:
Jonas Linter
2025-11-20 11:14:07 +01:00
parent 8308be3e49
commit ce1fd140c9
7 changed files with 318968 additions and 1158 deletions

View File

@@ -30,6 +30,7 @@ class Lead:
anrede: Optional[str] = None # Salutation
land: Optional[str] = None # Country
privacy: Optional[bool] = None
received_date: Optional[str] = None
def parse_mbox_file(filepath: str) -> List[Lead]:
@@ -64,12 +65,48 @@ def parse_mbox_file(filepath: str) -> List[Lead]:
# Extract lead data from body
lead = parse_email_body(body)
# Extract received date from headers
try:
lead.received_date = extract_received_date(headers)
except ValueError as e:
print(f"WARNING: {e}")
raise
if lead.name or lead.mail: # Only add if we have some data
leads.append(lead)
return leads
def extract_received_date(headers: str) -> Optional[str]:
"""
Extract the Date header from email headers and convert to ISO format.
Args:
headers: Email headers section
Returns:
ISO format date string from the Date header, or None if not found
Raises:
ValueError: If Date header cannot be parsed to ISO format
"""
from email.utils import parsedate_to_datetime
for line in headers.split('\n'):
if line.startswith('Date:'):
# Extract everything after "Date: "
date_value = line[6:].strip()
try:
# Parse the RFC 2822 date format and convert to ISO format
dt = parsedate_to_datetime(date_value)
return dt.isoformat()
except (TypeError, ValueError) as e:
# Raise exception so parsing failures are caught and reported
raise ValueError(f"Failed to parse date '{date_value}': {e}")
return None
def parse_email_body(body: str) -> Lead:
"""
Parse the body of an email to extract lead information.
@@ -172,7 +209,8 @@ def export_to_csv(leads: List[Lead], output_file: str) -> None:
'device',
'anrede',
'land',
'privacy'
'privacy',
'received_date'
]
with open(output_file, 'w', newline='', encoding='utf-8') as f: