You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
6.0 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from email.utils import parseaddr
import logging
from typing import Any
METADATA_HEADERS = [
"From",
"Subject",
"List-Unsubscribe",
"Precedence",
]
AD_SUBJECT_KEYWORDS = {
"discount",
"offer",
"sale",
"promo",
"newsletter",
"deal",
"save",
"coupon",
"special offer",
"limited time",
}
AD_SENDER_HINTS = {
"newsletter",
"marketing",
"offers",
"promotions",
"deals",
"no-reply",
"noreply",
}
logger = logging.getLogger("personal-agent.gmail")
@dataclass(frozen=True)
class ScanResult:
scanned: int
linkedin: int
advertising: int
skipped: int
failed: int
class GmailTriageAgent:
def __init__(self, gmail_service: Any, query: str) -> None:
self.gmail_service = gmail_service
self.query = query
def ensure_labels(self) -> dict[str, str]:
labels_response = (
self.gmail_service.users().labels().list(userId="me").execute()
)
labels = labels_response.get("labels", [])
label_by_name = {label["name"]: label["id"] for label in labels}
for required_name in ("LinkedIn", "Advertising", "AgentProcessed"):
if required_name not in label_by_name:
created = (
self.gmail_service.users()
.labels()
.create(
userId="me",
body={
"name": required_name,
"labelListVisibility": "labelShow",
"messageListVisibility": "show",
},
)
.execute()
)
label_by_name[required_name] = created["id"]
return label_by_name
def scan_and_route_messages(self, max_results: int = 100) -> ScanResult:
label_by_name = self.ensure_labels()
inbox_messages = (
self.gmail_service.users()
.messages()
.list(userId="me", q=self.query, maxResults=max_results)
.execute()
.get("messages", [])
)
linkedin = 0
advertising = 0
skipped = 0
failed = 0
for message in inbox_messages:
outcome = self._route_message(message["id"], label_by_name)
if outcome == "linkedin":
linkedin += 1
elif outcome == "advertising":
advertising += 1
elif outcome == "skipped":
skipped += 1
else:
failed += 1
return ScanResult(
scanned=len(inbox_messages),
linkedin=linkedin,
advertising=advertising,
skipped=skipped,
failed=failed,
)
def _route_message(self, message_id: str, label_by_name: dict[str, str]) -> str:
try:
message = (
self.gmail_service.users()
.messages()
.get(
userId="me",
id=message_id,
format="metadata",
metadataHeaders=METADATA_HEADERS,
)
.execute()
)
headers = {
h["name"].lower(): h["value"]
for h in message.get("payload", {}).get("headers", [])
}
label_ids = set(message.get("labelIds", []))
sender = headers.get("from", "")
subject = headers.get("subject", "")
should_linkedin = self._is_linkedin_email(sender=sender, subject=subject)
should_advertising = self._is_advertising_email(
sender=sender,
subject=subject,
list_unsubscribe=headers.get("list-unsubscribe", ""),
precedence=headers.get("precedence", ""),
message_label_ids=label_ids,
)
add_labels = [label_by_name["AgentProcessed"]]
remove_labels = []
if should_linkedin:
add_labels.insert(0, label_by_name["LinkedIn"])
remove_labels.append("INBOX")
outcome = "linkedin"
elif should_advertising:
add_labels.insert(0, label_by_name["Advertising"])
remove_labels.append("INBOX")
outcome = "advertising"
else:
outcome = "skipped"
(
self.gmail_service.users()
.messages()
.modify(
userId="me",
id=message_id,
body={
"addLabelIds": add_labels,
"removeLabelIds": remove_labels,
},
)
.execute()
)
return outcome
except Exception:
logger.exception("Failed to route message %s", message_id)
return "failed"
def _is_linkedin_email(self, sender: str, subject: str) -> bool:
sender_lower = sender.lower()
subject_lower = subject.lower()
if "linkedin" in sender_lower or "linkedin" in subject_lower:
return True
parsed_address = parseaddr(sender)[1].lower()
return parsed_address.endswith("@linkedin.com")
def _is_advertising_email(
self,
sender: str,
subject: str,
list_unsubscribe: str,
precedence: str,
message_label_ids: set[str],
) -> bool:
sender_lower = sender.lower()
subject_lower = subject.lower()
precedence_lower = precedence.lower()
if "CATEGORY_PROMOTIONS" in message_label_ids:
return True
if list_unsubscribe.strip():
return True
if precedence_lower in {"bulk", "list", "junk"}:
return True
if any(keyword in subject_lower for keyword in AD_SUBJECT_KEYWORDS):
return True
return any(hint in sender_lower for hint in AD_SENDER_HINTS)