From 8522f0b7f6c86ba26e525211e5d18a9eb8c64f1f Mon Sep 17 00:00:00 2001
From: Mic <m@micz.it>
Date: Sun, 12 Apr 2026 22:10:00 +0200
Subject: [PATCH] improved multipart html body decoding

---
 js/mzta-utils.js   | 44 +++++++++++++++++++++++++++++---------------
 mzta-background.js |  4 ++--
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/js/mzta-utils.js b/js/mzta-utils.js
index ab207bd5..e86927a1 100644
--- a/js/mzta-utils.js
+++ b/js/mzta-utils.js
@@ -145,38 +145,52 @@ export async function getMailSubject(tab){
 }
 
 function extractTextParts(fullMessage) {
-  const textParts = [];
-
+  const textParts = []
   function walkParts(parts) {
     for (const part of parts) {
       if (part.parts && part.parts.length > 0) {
-        // Recursively walk through sub-parts
-        walkParts(part.parts);
-      } else {
-        // Check if contentType starts with "text/"
-        if (part.contentType && part.contentType.startsWith("text/")) {
-          textParts.push(part);
-        }
+        walkParts(part.parts)
+      }
+      // console.log(">>>>>>>>>>>> extractTextParts: part.contentType: " + part.contentType + ", part.decryptionStatus: " + part.decryptionStatus + ", part.body: " + part.body);
+      if (part.contentType && part.contentType.startsWith('text/')) {
+        textParts.push(part)
       }
     }
   }
-
   if (fullMessage.parts && fullMessage.parts.length > 0) {
-    walkParts(fullMessage.parts);
+    walkParts(fullMessage.parts)
   }
+  return textParts
+}
 
-  return textParts;
+function smartDecode(buf) {
+  try {
+    return new TextDecoder('utf-8', { fatal: true }).decode(buf);
+  } catch (e) {
+    return new TextDecoder('windows-1252').decode(buf);
+  }
 }
   
-export function getMailBody(fullMessage){
+export async function getMailBody(fullMessage, messageId) {
   const textParts = extractTextParts(fullMessage);
   let text = "";
   let html = "";
+  // console.log(">>>>>>>>>>>>>> getMailBody: textParts: " + JSON.stringify(textParts));
+  // console.log(">>>>>>>>>>>>>> getMailBody: fullMessage: " + JSON.stringify(fullMessage));
   for (const part of textParts) {
+    let body = part.body;
+    if ((body === undefined || body === "") && messageId && part.partName) {
+      const file = await browser.messages.getAttachmentFile(messageId, part.partName);
+      const buf = await file.arrayBuffer();
+      //const buf = new TextDecoder('utf-8').decode(buf);
+      body = smartDecode(buf);
+    }
     if (part.contentType === "text/plain") {
-      text += part.body;
+      // console.log(">>>>>>>>>>>>>> getMailBody: part.body (TEXT): " + body);
+      text += body ?? "";
     } else if (part.contentType === "text/html") {
-      html += part.body;
+      // console.log(">>>>>>>>>>>>>> getMailBody: part.body (HTML): " + (body ? body.substring(0, 80) : body));
+      html += body ?? "";
     }
   }
   if(html === "") {
diff --git a/mzta-background.js b/mzta-background.js
index 7c901ac3..a6d4e840 100644
--- a/mzta-background.js
+++ b/mzta-background.js
@@ -1113,7 +1113,7 @@ async function processEmails(args) {
     
             if (addTagsAuto || spamFilter) {
                 curr_fullMessage = await browser.messages.getFull(message.id);
-                msg_text = getMailBody(curr_fullMessage);
+                msg_text = await getMailBody(curr_fullMessage);
                 taLog.log("Starting from the HTML body if present and converting to plain text...");
                 body_text = htmlBodyToPlainText(msg_text.html);
                 if( body_text.length == 0 ){
@@ -1283,7 +1283,7 @@ async function processEmails(args) {
           
             // extract body of current message as text
             const curr_message_full = await browser.messages.getFull(curr_message.id);
-            const curr_body_full_html = getMailBody(curr_message_full);
+            const curr_body_full_html = await getMailBody(curr_message_full);
             let curr_body_full_text = htmlBodyToPlainText(curr_body_full_html.html);
             if( curr_body_full_text.length === 0) {
                 taLog.log("No HTML found in the message body, using plain text...");