diff --git a/sonar-project.properties b/sonar-project.properties
index c8d7c80249..0822294f0f 100644
--- a/sonar-project.properties
+++ b/sonar-project.properties
@@ -27,7 +27,8 @@ sonar.links.ci=https://github.com/apache/nutch/actions
 sonar.sources=src/java,src/plugin
 sonar.tests=src/test,src/plugin
 sonar.test.inclusions=**/src/test/**/*.java,**/Test*.java,**/*IT.java
-sonar.exclusions=**/build.xml,**/build-ivy.xml,**/build-plugin.xml,**/ivy.xml,**/plugin.xml
+# Exclude build/config files and plugin resource directories (no Java code in conf, data, sample)
+sonar.exclusions=**/build.xml,**/build-ivy.xml,**/build-plugin.xml,**/ivy.xml,**/plugin.xml,**/src/plugin/**/conf/**,**/src/plugin/**/data/**,**/src/plugin/**/sample/**
 sonar.source.encoding=UTF-8
 sonar.java.source=17
 
diff --git a/src/java/org/apache/nutch/parse/ParseOutputFormat.java b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
index 295c5e853d..4774d3e20b 100644
--- a/src/java/org/apache/nutch/parse/ParseOutputFormat.java
+++ b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
@@ -57,6 +57,7 @@
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map.Entry;
 
@@ -73,7 +74,23 @@ public class ParseOutputFormat extends OutputFormat<Text, Parse> {
     NUMBER_FORMAT.setMinimumIntegerDigits(5);
     NUMBER_FORMAT.setGroupingUsed(false);
   }
-  
+
+  /**
+   * Parses the comma-separated db.parsemeta.to.crawldb config value.
+   * Uses comma-split and trim to avoid ReDoS from regex backtracking.
+   * @param value config value (may be null or empty)
+   * @return array of trimmed, non-empty metadata keys (never null)
+   */
+  static String[] getParseMetaToCrawlDBKeys(String value) {
+    if (value == null || value.isEmpty()) {
+      return new String[0];
+    }
+    return Arrays.stream(value.split(",", -1))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .toArray(String[]::new);
+  }
+
   private static class SimpleEntry implements Entry<Text, CrawlDatum> {
     private Text key;
     private CrawlDatum value;
@@ -177,8 +194,8 @@ public RecordWriter<Text, Parse> getRecordWriter(TaskAttemptContext context)
     Path data = new Path(new Path(out, ParseData.DIR_NAME), name);
     Path crawl = new Path(new Path(out, CrawlDatum.PARSE_DIR_NAME), name);
 
-    final String[] parseMDtoCrawlDB = conf.get("db.parsemeta.to.crawldb", "")
-        .split(" *, *");
+    final String[] parseMDtoCrawlDB = getParseMetaToCrawlDBKeys(
+        conf.get("db.parsemeta.to.crawldb", ""));
 
     // textOut Options
     final MapFile.Writer textOut;
diff --git a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
index 73d42d0dc6..428eb0b709 100644
--- a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
+++ b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
@@ -24,8 +24,6 @@
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import java.net.URL;
 import java.net.MalformedURLException;
 import java.nio.charset.StandardCharsets;
@@ -64,15 +62,10 @@ public class HtmlParser implements Parser {
   // NUTCH-2042 (cf. TIKA-357): increased to 8 kB
   private static final int CHUNK_SIZE = 8192;
 
-  // NUTCH-1006 Meta equiv with single quotes not accepted
-  private static Pattern metaPattern = Pattern.compile(
-      "<meta\\s+([^>]*http-equiv=(\"|')?content-type(\"|')?[^>]*)>",
-      Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPattern = Pattern.compile(
-      "charset=\\s*([a-z][_\\-0-9a-z]*)", Pattern.CASE_INSENSITIVE);
-  private static Pattern charsetPatternHTML5 = Pattern.compile(
-      "<meta\\s+charset\\s*=\\s*[\"']?([a-z][_\\-0-9a-z]*)[^>]*>",
-      Pattern.CASE_INSENSITIVE);
+  private static final String META_TAG_START = "<meta";
+  private static final String CHARSET_EQ = "charset=";
+  private static final String HTTP_EQUIV = "http-equiv";
+  private static final String CONTENT_TYPE = "content-type";
 
   private String parserImpl;
 
@@ -93,6 +86,82 @@ public class HtmlParser implements Parser {
    *          <code>byte[]</code> representation of an html file
    */
 
+  /**
+   * Extracts charset value from a string like "charset=utf-8" or "charset = utf-8".
+   * Uses linear scan to avoid ReDoS. Value must start with [a-z] and contain only [a-z0-9_-].
+   */
+  private static String extractCharsetValue(String s, int fromIndex) {
+    int idx = s.indexOf(CHARSET_EQ, fromIndex);
+    if (idx < 0) {
+      return null;
+    }
+    int start = idx + CHARSET_EQ.length();
+    while (start < s.length() && (s.charAt(start) == ' ' || s.charAt(start) == '\t')) {
+      start++;
+    }
+    if (start >= s.length()) {
+      return null;
+    }
+    char first = s.charAt(start);
+    if (first != '"' && first != '\'' && (first < 'a' || first > 'z') && (first < 'A' || first > 'Z')) {
+      return null;
+    }
+    if (first == '"' || first == '\'') {
+      start++;
+    }
+    int end = start;
+    while (end < s.length()) {
+      char c = s.charAt(end);
+      if (c == ' ' || c == '\t' || c == ';' || c == '"' || c == '\'' || c == '>') {
+        break;
+      }
+      if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' || c == '-') {
+        end++;
+      } else {
+        break;
+      }
+    }
+    return end > start ? s.substring(start, end) : null;
+  }
+
+  /**
+   * Finds charset from HTML string using linear scans only (no backtracking regex).
+   * Checks meta http-equiv Content-Type then HTML5 meta charset.
+   * Package-private for unit testing.
+   */
+  static String extractCharsetFromMeta(String str) {
+    String lower = str.toLowerCase();
+    int pos = 0;
+    while (true) {
+      int metaStart = lower.indexOf(META_TAG_START, pos);
+      if (metaStart < 0) {
+        break;
+      }
+      int tagEnd = str.indexOf('>', metaStart);
+      if (tagEnd < 0) {
+        break;
+      }
+      String tagContent = str.substring(metaStart, tagEnd);
+      String tagLower = tagContent.toLowerCase();
+      // HTML4: meta http-equiv=Content-Type ... charset=...
+      if (tagLower.contains(HTTP_EQUIV) && tagLower.contains(CONTENT_TYPE)) {
+        String charset = extractCharsetValue(tagContent, 0);
+        if (charset != null) {
+          return charset;
+        }
+      }
+      // HTML5: <meta charset="utf-8">
+      if (tagLower.contains(CHARSET_EQ)) {
+        String charset = extractCharsetValue(tagContent, 0);
+        if (charset != null) {
+          return charset;
+        }
+      }
+      pos = tagEnd + 1;
+    }
+    return null;
+  }
+
   private static String sniffCharacterEncoding(byte[] content) {
     int length = content.length < CHUNK_SIZE ? content.length : CHUNK_SIZE;
 
@@ -102,20 +171,7 @@ private static String sniffCharacterEncoding(byte[] content) {
     // {U+0041, U+0082, U+00B7}.
     String str = new String(content, 0, length, StandardCharsets.US_ASCII);
 
-    Matcher metaMatcher = metaPattern.matcher(str);
-    String encoding = null;
-    if (metaMatcher.find()) {
-      Matcher charsetMatcher = charsetPattern.matcher(metaMatcher.group(1));
-      if (charsetMatcher.find())
-        encoding = charsetMatcher.group(1);
-    }
-    if (encoding == null) {
-      // check for HTML5 meta charset
-      metaMatcher = charsetPatternHTML5.matcher(str);
-      if (metaMatcher.find()) {
-        encoding = metaMatcher.group(1);
-      }
-    }
+    String encoding = extractCharsetFromMeta(str);
     if (encoding == null) {
       // check for BOM
       if (content.length >= 3 && content[0] == (byte) 0xEF
diff --git a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
index 47beff5a0f..5b66870250 100644
--- a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
+++ b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
@@ -33,7 +33,7 @@
 
 import static org.junit.jupiter.api.Assertions.*;
 
-public class TestHtmlParser {
+class TestHtmlParser {
 
   private static final Logger LOG = LoggerFactory
       .getLogger(MethodHandles.lookup().lookupClass());
@@ -105,7 +105,7 @@ protected Parse parse(byte[] contentBytes) {
   }
 
   @Test
-  public void testEncodingDetection() {
+  void testEncodingDetection() {
     for (String[] testPage : encodingTestPages) {
       String name = testPage[0];
       Charset charset = Charset.forName(testPage[1]);
@@ -131,7 +131,7 @@ public void testEncodingDetection() {
   }
 
   @Test
-  public void testResolveBaseUrl() {
+  void testResolveBaseUrl() {
     byte[] contentBytes = resolveBaseUrlTestContent
         .getBytes(StandardCharsets.UTF_8);
     // parse using http://example.com/ as "fetch" URL
@@ -143,4 +143,30 @@ public void testResolveBaseUrl() {
         outlinks[0].getToUrl());
   }
 
+  /** Tests charset extraction from meta tags (ReDoS-safe parsing). */
+  @Test
+  void testExtractCharsetFromMeta() {
+    assertNull(HtmlParser.extractCharsetFromMeta(""));
+    assertNull(HtmlParser.extractCharsetFromMeta("<html><head></head></html>"));
+
+    assertEquals("utf-8", HtmlParser.extractCharsetFromMeta(
+        "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"));
+    assertEquals("utf-8", HtmlParser.extractCharsetFromMeta(
+        "<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />"));
+    assertEquals("ISO-8859-1", HtmlParser.extractCharsetFromMeta(
+        "<meta http-equiv=Content-Type content=\"text/html; charset=ISO-8859-1\">"));
+
+    assertEquals("utf-8", HtmlParser.extractCharsetFromMeta(
+        "<meta charset=\"utf-8\">"));
+    assertEquals("utf-8", HtmlParser.extractCharsetFromMeta(
+        "<meta charset='utf-8'>"));
+    assertEquals("utf-8", HtmlParser.extractCharsetFromMeta(
+        "<meta charset=utf-8>"));
+
+    // First content-type meta wins when both appear
+    String both = "<meta http-equiv=\"Content-Type\" content=\"charset=windows-1252\">"
+        + "<meta charset=\"utf-8\">";
+    assertEquals("windows-1252", HtmlParser.extractCharsetFromMeta(both));
+  }
+
 }
diff --git a/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java b/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
index 194ef915e8..664d0c30e7 100644
--- a/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
+++ b/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
@@ -26,8 +26,6 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import org.apache.nutch.parse.HTMLMetaTags;
 import org.apache.nutch.parse.HtmlParseFilter;
@@ -188,13 +186,72 @@ public ParseResult getParse(Content c) {
     return ParseResult.createParseResult(c.getUrl(), new ParseImpl(script, pd));
   }
 
-  private static final Pattern STRING_PATTERN = Pattern.compile(
-      "(\\\\*(?:\"|\'))([^\\s\"\']+?)(?:\\1)",
-      Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
-  // A simple pattern. This allows also invalid URL characters.
-  private static final Pattern URI_PATTERN = Pattern.compile(
-      "(^|\\s*?)/?\\S+?[/\\.]\\S+($|\\s*)",
-      Pattern.CASE_INSENSITIVE | Pattern.MULTILINE);
+  /**
+   * Extracts content of quoted strings (single or double) from JavaScript.
+   * Uses linear scan to avoid ReDoS. Backslash escapes the next character.
+   * Package-private for unit testing.
+   */
+  static List<String> extractQuotedStrings(String plainText) {
+    List<String> result = new ArrayList<>();
+    int i = 0;
+    while (i < plainText.length()) {
+      char q = 0;
+      int start = -1;
+      if (plainText.charAt(i) == '"' || plainText.charAt(i) == '\'') {
+        q = plainText.charAt(i);
+        start = i + 1;
+      }
+      if (start > 0) {
+        StringBuilder content = new StringBuilder();
+        int j = start;
+        while (j < plainText.length()) {
+          char c = plainText.charAt(j);
+          if (c == '\\') {
+            j++;
+            if (j < plainText.length()) {
+              content.append(plainText.charAt(j));
+              j++;
+            }
+            continue;
+          }
+          if (c == q) {
+            String s = content.toString().trim();
+            if (s.length() > 0) {
+              result.add(s);
+            }
+            i = j + 1;
+            break;
+          }
+          content.append(c);
+          j++;
+        }
+        if (j >= plainText.length()) {
+          i++;
+        }
+      } else {
+        i++;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Checks if the string looks like a URI/path (contains . or /, no internal whitespace).
+   * Linear check to avoid ReDoS. Package-private for unit testing.
+   */
+  static boolean looksLikeUri(String s) {
+    if (s == null) {
+      return false;
+    }
+    s = s.trim();
+    if (s.isEmpty()) {
+      return false;
+    }
+    if (s.indexOf(' ') >= 0 || s.indexOf('\t') >= 0) {
+      return false;
+    }
+    return s.contains(".") || s.contains("/");
+  }
 
   // Alternative pattern, which limits valid url characters.
   // private static final String URI_PATTERN =
@@ -216,14 +273,10 @@ private Outlink[] getJSLinks(String plainText, String anchor, String base) {
 
     try {
 
-      Matcher matcher = STRING_PATTERN.matcher(plainText);
-
-      String url;
+      List<String> quotedStrings = extractQuotedStrings(plainText);
 
-      while (matcher.find()) {
-        url = matcher.group(2);
-        Matcher matcherUri = URI_PATTERN.matcher(url);
-        if (!matcherUri.matches()) {
+      for (String url : quotedStrings) {
+        if (!looksLikeUri(url)) {
           continue;
         }
         if (url.startsWith("www.")) {
diff --git a/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java b/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
index fb22a438e1..1a1cbb6a50 100644
--- a/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
+++ b/src/plugin/parse-js/src/test/org/apache/nutch/parse/js/TestJSParseFilter.java
@@ -17,11 +17,13 @@
 package org.apache.nutch.parse.js;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
+import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
 
@@ -51,7 +53,7 @@
  * temporarily disabled)</li>
  * </ol>
  */
-public class TestJSParseFilter {
+class TestJSParseFilter {
 
   private static final Logger LOG = LoggerFactory
       .getLogger(MethodHandles.lookup().lookupClass());
@@ -67,7 +69,7 @@ public class TestJSParseFilter {
   private Configuration conf;
 
   @BeforeEach
-  public void setUp() {
+  void setUp() {
     conf = NutchConfiguration.create();
     conf.set("file.content.limit", "-1");
     conf.set("plugin.includes", "protocol-file|parse-(html|js)");
@@ -88,8 +90,43 @@ public Outlink[] getOutlinks(String sampleFile)
     return parse.getData().getOutlinks();
   }
 
+  /** Tests quoted string extraction (ReDoS-safe, no regex backtracking). */
   @Test
-  public void testJavaScriptOutlinkExtraction()
+  void testExtractQuotedStrings() {
+    List<String> empty = JSParseFilter.extractQuotedStrings("no quotes here");
+    assertTrue(empty.isEmpty());
+
+    List<String> one = JSParseFilter.extractQuotedStrings("var x = \"http://example.com/\"");
+    assertEquals(1, one.size());
+    assertEquals("http://example.com/", one.get(0));
+
+    List<String> two = JSParseFilter.extractQuotedStrings("a=\"foo\" b='bar'");
+    assertEquals(2, two.size());
+    assertEquals("foo", two.get(0));
+    assertEquals("bar", two.get(1));
+
+    List<String> escaped = JSParseFilter.extractQuotedStrings("\"say \\\"hi\\\"\"");
+    assertEquals(1, escaped.size());
+    assertEquals("say \"hi\"", escaped.get(0));
+  }
+
+  /** Tests URI shape check (ReDoS-safe). */
+  @Test
+  void testLooksLikeUri() {
+    assertFalse(JSParseFilter.looksLikeUri(null));
+    assertFalse(JSParseFilter.looksLikeUri(""));
+    assertFalse(JSParseFilter.looksLikeUri("  "));
+    assertFalse(JSParseFilter.looksLikeUri("no-dot-or-slash"));
+    assertFalse(JSParseFilter.looksLikeUri("has space in it.com"));
+
+    assertTrue(JSParseFilter.looksLikeUri("http://example.com/"));
+    assertTrue(JSParseFilter.looksLikeUri("example.com/path"));
+    assertTrue(JSParseFilter.looksLikeUri("/relative/path"));
+    assertTrue(JSParseFilter.looksLikeUri("  https://foo.bar  "));
+  }
+
+  @Test
+  void testJavaScriptOutlinkExtraction()
       throws ProtocolException, ParseException, IOException {
     String[] filenames = new File(sampleDir).list();
     for (int i = 0; i < filenames.length; i++) {
diff --git a/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java b/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
index 14fed8a772..6ec4568de7 100644
--- a/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
+++ b/src/plugin/urlfilter-validator/src/java/org/apache/nutch/urlfilter/validator/UrlValidator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.nutch.urlfilter.validator;
 
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -57,57 +59,24 @@ public class UrlValidator implements URLFilter {
 
   private static final String ALPHA_CHARS = "a-zA-Z";
 
-  private static final String ALPHA_NUMERIC_CHARS = ALPHA_CHARS + "\\d";
-
   private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
 
   private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
 
   private static final String SCHEME_CHARS = ALPHA_CHARS;
 
-  // Drop numeric, and "+-." for now
-  private static final String AUTHORITY_CHARS = ALPHA_NUMERIC_CHARS + "\\-\\.";
-
   private static final String ATOM = VALID_CHARS + '+';
 
-  /**
-   * This expression derived/taken from the BNF for URI (RFC2396).
-   */
-  private static final Pattern URL_PATTERN = Pattern
-      .compile("^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)"
-          + "(\\?([^#]*))?(#(.*))?");
-
-  /**
-   * Schema/Protocol (ie. http:, ftp:, file:, etc).
-   */
-  private static final int PARSE_URL_SCHEME = 2;
-
-  /**
-   * Includes hostname/ip and port number.
-   */
-  private static final int PARSE_URL_AUTHORITY = 4;
-
-  private static final int PARSE_URL_PATH = 5;
-
-  private static final int PARSE_URL_QUERY = 7;
-
   /**
    * Protocol (ie. http:, ftp:,https:).
    */
   private static final Pattern SCHEME_PATTERN = Pattern.compile("^["
       + SCHEME_CHARS + "]+");
 
-  private static final Pattern AUTHORITY_PATTERN = Pattern.compile("^(["
-      + AUTHORITY_CHARS + "]*)(:\\d*)?(.*)?");
-
-  private static final int PARSE_AUTHORITY_HOST_IP = 1;
-
-  private static final int PARSE_AUTHORITY_PORT = 2;
-
-  /**
-   * Should always be empty.
-   */
-  private static final int PARSE_AUTHORITY_EXTRA = 3;
+  /** Index for host/IP in parseAuthority result. */
+  private static final int PARSE_AUTHORITY_HOST_IP = 0;
+  /** Index for port string (e.g. ":80") in parseAuthority result, or null. */
+  private static final int PARSE_AUTHORITY_PORT = 1;
 
   private static final Pattern PATH_PATTERN = Pattern
       .compile("^(/[-\\w:@&?=+,.!/~*'%$_;\\(\\)]*)?$");
@@ -157,38 +126,72 @@ public void setConf(Configuration conf) {
    *          value is considered invalid.
    * @return true if the url is valid.
    */
+  /**
+   * Parse authority "host" or "host:port" using linear scan (avoids ReDoS).
+   * @return String[2]: { hostOrIp, portOrNull } where port is e.g. ":80" or null
+   */
+  /** Package-private for unit testing. */
+  static String[] parseAuthority(String authority) {
+    if (authority == null || authority.isEmpty()) {
+      return new String[] { "", null };
+    }
+    int lastColon = authority.lastIndexOf(':');
+    if (lastColon < 0) {
+      return new String[] { authority, null };
+    }
+    String portPart = authority.substring(lastColon + 1);
+    boolean allDigits = true;
+    for (int i = 0; i < portPart.length(); i++) {
+      if (!Character.isDigit(portPart.charAt(i))) {
+        allDigits = false;
+        break;
+      }
+    }
+    if (allDigits && !portPart.isEmpty()) {
+      return new String[] { authority.substring(0, lastColon), ":" + portPart };
+    }
+    return new String[] { authority, null };
+  }
+
   private boolean isValid(String value) {
     if (value == null) {
       return false;
     }
 
-    Matcher matchUrlPat = URL_PATTERN.matcher(value);
     if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
       return false;
     }
 
-    // Check the whole url address structure
-    if (!matchUrlPat.matches()) {
-      return false;
-    }
-
-    if (!isValidScheme(matchUrlPat.group(PARSE_URL_SCHEME))) {
+    String scheme;
+    String authority;
+    String path;
+    String query;
+    try {
+      URI uri = new URI(value);
+      scheme = uri.getScheme();
+      authority = uri.getRawAuthority();
+      path = uri.getPath();
+      query = uri.getRawQuery();
+      if (path == null) {
+        path = "";
+      }
+    } catch (URISyntaxException e) {
       return false;
     }
 
-    if (!isValidAuthority(matchUrlPat.group(PARSE_URL_AUTHORITY))) {
+    if (!isValidScheme(scheme)) {
       return false;
     }
 
-    if (!isValidPath(matchUrlPat.group(PARSE_URL_PATH))) {
+    if (!isValidAuthority(authority)) {
       return false;
     }
 
-    if (!isValidQuery(matchUrlPat.group(PARSE_URL_QUERY))) {
+    if (!isValidPath(path)) {
       return false;
     }
 
-    return true;
+    return isValidQuery(query);
   }
 
   /**
@@ -223,15 +226,13 @@ private boolean isValidAuthority(String authority) {
       return false;
     }
 
-    Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
-    if (!authorityMatcher.matches()) {
-      return false;
-    }
+    String[] parsed = parseAuthority(authority);
+    String hostIP = parsed[PARSE_AUTHORITY_HOST_IP];
+    String port = parsed[PARSE_AUTHORITY_PORT];
 
     boolean ipV4Address = false;
     boolean hostname = false;
     // check if authority is IP address or hostname
-    String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
     Matcher matchIPV4Pat = IP_V4_DOMAIN_PATTERN.matcher(hostIP);
     ipV4Address = matchIPV4Pat.matches();
 
@@ -299,29 +300,13 @@ private boolean isValidAuthority(String authority) {
       return false;
     }
 
-    String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
     if (port != null) {
       if (!PORT_PATTERN.matcher(port).matches()) {
         return false;
       }
     }
 
-    String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
-    return isBlankOrNull(extra);
-  }
-
-  /**
-   * <p>
-   * Checks if the field isn't null and length of the field is greater than zero
-   * not including whitespace.
-   * </p>
-   * 
-   * @param value
-   *          The value validation is being performed on.
-   * @return true if blank or null.
-   */
-  private boolean isBlankOrNull(String value) {
-    return ((value == null) || (value.trim().length() == 0));
+    return true;
   }
 
   /**
diff --git a/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java b/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
index d815486de6..6f1760bbcb 100644
--- a/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
+++ b/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
@@ -18,6 +18,7 @@
 
 import org.junit.jupiter.api.Test;
 
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 
@@ -30,7 +31,7 @@
  * 
  */
 
-public class TestUrlValidator {
+class TestUrlValidator {
 
   /**
    * Test method for
@@ -38,7 +39,7 @@ public class TestUrlValidator {
    * .
    */
   @Test
-  public void testFilter() {
+  void testFilter() {
     UrlValidator url_validator = new UrlValidator();
     assertNotNull(url_validator);
 
@@ -73,4 +74,22 @@ public void testFilter() {
         "Valid url: ftp://alfa.bravo.pi/foo/bar/plan.pdf");
 
   }
+
+  /** Tests authority parsing (ReDoS-safe, no regex backtracking). */
+  @Test
+  void testParseAuthority() {
+    assertArrayEquals(new String[] { "", null }, UrlValidator.parseAuthority(null));
+    assertArrayEquals(new String[] { "", null }, UrlValidator.parseAuthority(""));
+
+    assertArrayEquals(new String[] { "example.com", null },
+        UrlValidator.parseAuthority("example.com"));
+    assertArrayEquals(new String[] { "example.com", ":80" },
+        UrlValidator.parseAuthority("example.com:80"));
+    assertArrayEquals(new String[] { "192.168.1.1", ":8080" },
+        UrlValidator.parseAuthority("192.168.1.1:8080"));
+
+    // Port part non-numeric: entire string is host
+    assertArrayEquals(new String[] { "host:port", null },
+        UrlValidator.parseAuthority("host:port"));
+  }
 }
diff --git a/src/test/org/apache/nutch/parse/TestParseOutputFormat.java b/src/test/org/apache/nutch/parse/TestParseOutputFormat.java
new file mode 100644
index 0000000000..6b5cf1a744
--- /dev/null
+++ b/src/test/org/apache/nutch/parse/TestParseOutputFormat.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.parse;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+/** Unit tests for ParseOutputFormat. */
+class TestParseOutputFormat {
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysEmpty() {
+    assertArrayEquals(new String[0], ParseOutputFormat.getParseMetaToCrawlDBKeys(""));
+    assertArrayEquals(new String[0], ParseOutputFormat.getParseMetaToCrawlDBKeys(null));
+  }
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysSingle() {
+    assertArrayEquals(new String[] { "lang" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("lang"));
+    assertArrayEquals(new String[] { "lang" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("  lang  "));
+  }
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysCommaSeparated() {
+    assertArrayEquals(new String[] { "a", "b" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("a,b"));
+    assertArrayEquals(new String[] { "a", "b", "c" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("a,b,c"));
+  }
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysTrimSpacesAroundCommas() {
+    assertArrayEquals(new String[] { "a", "b" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys(" a , b "));
+    assertArrayEquals(new String[] { "lang", "Content-Type" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys(" lang , Content-Type "));
+  }
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysEmptySegmentsFiltered() {
+    assertArrayEquals(new String[] { "a", "b" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("a,,b"));
+    assertArrayEquals(new String[] { "a" },
+        ParseOutputFormat.getParseMetaToCrawlDBKeys("a,,,"));
+    assertArrayEquals(new String[0],
+        ParseOutputFormat.getParseMetaToCrawlDBKeys(",  ,  ,"));
+  }
+
+  @Test
+  void testGetParseMetaToCrawlDBKeysNeverNull() {
+    assertNotNull(ParseOutputFormat.getParseMetaToCrawlDBKeys(null));
+    assertNotNull(ParseOutputFormat.getParseMetaToCrawlDBKeys(""));
+  }
+}