From 8522f0b7f6c86ba26e525211e5d18a9eb8c64f1f Mon Sep 17 00:00:00 2001 From: Mic Date: Sun, 12 Apr 2026 22:10:00 +0200 Subject: [PATCH] improved multipart html body decoding --- js/mzta-utils.js | 44 +++++++++++++++++++++++++++++--------------- mzta-background.js | 4 ++-- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/js/mzta-utils.js b/js/mzta-utils.js index ab207bd5..e86927a1 100644 --- a/js/mzta-utils.js +++ b/js/mzta-utils.js @@ -145,38 +145,52 @@ export async function getMailSubject(tab){ } function extractTextParts(fullMessage) { - const textParts = []; - + const textParts = [] function walkParts(parts) { for (const part of parts) { if (part.parts && part.parts.length > 0) { - // Recursively walk through sub-parts - walkParts(part.parts); - } else { - // Check if contentType starts with "text/" - if (part.contentType && part.contentType.startsWith("text/")) { - textParts.push(part); - } + walkParts(part.parts) + } + // console.log(">>>>>>>>>>>> extractTextParts: part.contentType: " + part.contentType + ", part.decryptionStatus: " + part.decryptionStatus + ", part.body: " + part.body); + if (part.contentType && part.contentType.startsWith('text/')) { + textParts.push(part) } } } - if (fullMessage.parts && fullMessage.parts.length > 0) { - walkParts(fullMessage.parts); + walkParts(fullMessage.parts) } + return textParts +} - return textParts; +function smartDecode(buf) { + try { + return new TextDecoder('utf-8', { fatal: true }).decode(buf); + } catch (e) { + return new TextDecoder('windows-1252').decode(buf); + } } -export function getMailBody(fullMessage){ +export async function getMailBody(fullMessage, messageId) { const textParts = extractTextParts(fullMessage); let text = ""; let html = ""; + // console.log(">>>>>>>>>>>>>> getMailBody: textParts: " + JSON.stringify(textParts)); + // console.log(">>>>>>>>>>>>>> getMailBody: fullMessage: " + JSON.stringify(fullMessage)); for (const part of textParts) { + let body = part.body; + if ((body === undefined || body === "") && messageId && part.partName) { + const file = await browser.messages.getAttachmentFile(messageId, part.partName); + const buf = await file.arrayBuffer(); + //const buf = new TextDecoder('utf-8').decode(buf); + body = smartDecode(buf); + } if (part.contentType === "text/plain") { - text += part.body; + // console.log(">>>>>>>>>>>>>> getMailBody: part.body (TEXT): " + body); + text += body ?? ""; } else if (part.contentType === "text/html") { - html += part.body; + // console.log(">>>>>>>>>>>>>> getMailBody: part.body (HTML): " + (body ? body.substring(0, 80) : body)); + html += body ?? ""; } } if(html === "") { diff --git a/mzta-background.js b/mzta-background.js index 7c901ac3..a6d4e840 100644 --- a/mzta-background.js +++ b/mzta-background.js @@ -1113,7 +1113,7 @@ async function processEmails(args) { if (addTagsAuto || spamFilter) { curr_fullMessage = await browser.messages.getFull(message.id); - msg_text = getMailBody(curr_fullMessage); + msg_text = await getMailBody(curr_fullMessage); taLog.log("Starting from the HTML body if present and converting to plain text..."); body_text = htmlBodyToPlainText(msg_text.html); if( body_text.length == 0 ){ @@ -1283,7 +1283,7 @@ async function processEmails(args) { // extract body of current message as text const curr_message_full = await browser.messages.getFull(curr_message.id); - const curr_body_full_html = getMailBody(curr_message_full); + const curr_body_full_html = await getMailBody(curr_message_full); let curr_body_full_text = htmlBodyToPlainText(curr_body_full_html.html); if( curr_body_full_text.length === 0) { taLog.log("No HTML found in the message body, using plain text...");