Created a script to update the csv imports that don't have the date
This commit is contained in:
@@ -30,6 +30,7 @@ class Lead:
|
||||
anrede: Optional[str] = None # Salutation
|
||||
land: Optional[str] = None # Country
|
||||
privacy: Optional[bool] = None
|
||||
received_date: Optional[str] = None
|
||||
|
||||
|
||||
def parse_mbox_file(filepath: str) -> List[Lead]:
|
||||
@@ -64,12 +65,48 @@ def parse_mbox_file(filepath: str) -> List[Lead]:
|
||||
# Extract lead data from body
|
||||
lead = parse_email_body(body)
|
||||
|
||||
# Extract received date from headers
|
||||
try:
|
||||
lead.received_date = extract_received_date(headers)
|
||||
except ValueError as e:
|
||||
print(f"WARNING: {e}")
|
||||
raise
|
||||
|
||||
if lead.name or lead.mail: # Only add if we have some data
|
||||
leads.append(lead)
|
||||
|
||||
return leads
|
||||
|
||||
|
||||
def extract_received_date(headers: str) -> Optional[str]:
|
||||
"""
|
||||
Extract the Date header from email headers and convert to ISO format.
|
||||
|
||||
Args:
|
||||
headers: Email headers section
|
||||
|
||||
Returns:
|
||||
ISO format date string from the Date header, or None if not found
|
||||
|
||||
Raises:
|
||||
ValueError: If Date header cannot be parsed to ISO format
|
||||
"""
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
for line in headers.split('\n'):
|
||||
if line.startswith('Date:'):
|
||||
# Extract everything after "Date: "
|
||||
date_value = line[6:].strip()
|
||||
try:
|
||||
# Parse the RFC 2822 date format and convert to ISO format
|
||||
dt = parsedate_to_datetime(date_value)
|
||||
return dt.isoformat()
|
||||
except (TypeError, ValueError) as e:
|
||||
# Raise exception so parsing failures are caught and reported
|
||||
raise ValueError(f"Failed to parse date '{date_value}': {e}")
|
||||
return None
|
||||
|
||||
|
||||
def parse_email_body(body: str) -> Lead:
|
||||
"""
|
||||
Parse the body of an email to extract lead information.
|
||||
@@ -172,7 +209,8 @@ def export_to_csv(leads: List[Lead], output_file: str) -> None:
|
||||
'device',
|
||||
'anrede',
|
||||
'land',
|
||||
'privacy'
|
||||
'privacy',
|
||||
'received_date'
|
||||
]
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
|
||||
Reference in New Issue
Block a user