diff --git a/mediapipe/model_maker/python/core/utils/file_util.py b/mediapipe/model_maker/python/core/utils/file_util.py index 71b5a0a7b8..dc43ebe4c3 100644 --- a/mediapipe/model_maker/python/core/utils/file_util.py +++ b/mediapipe/model_maker/python/core/utils/file_util.py @@ -66,6 +66,9 @@ def get_path(self) -> str: ) if not absolute_path.exists(): print(f'Downloading {self.url} to {absolute_path}') + # Enforce HTTPS to prevent man-in-the-middle attacks + if not self.url.startswith('https://'): + raise ValueError(f'URL must use HTTPS protocol for security: {self.url}') r = requests.get(self.url, allow_redirects=True) if self.is_folder: # Use tempf to store the downloaded .tar.gz file diff --git a/mediapipe/model_maker/python/text/text_classifier/dataset.py b/mediapipe/model_maker/python/text/text_classifier/dataset.py index a68d92d0d3..b9cdc7452f 100644 --- a/mediapipe/model_maker/python/text/text_classifier/dataset.py +++ b/mediapipe/model_maker/python/text/text_classifier/dataset.py @@ -105,7 +105,7 @@ def from_csv( if cache_dir is None: cache_dir = tempfile.mkdtemp() # calculate hash for cache based off of files - hasher = hashlib.md5() + hasher = hashlib.sha256() hasher.update(os.path.basename(filename).encode("utf-8")) with tf.io.gfile.GFile(filename, "r") as f: reader = csv.DictReader( diff --git a/mediapipe/model_maker/python/vision/object_detector/dataset_util.py b/mediapipe/model_maker/python/vision/object_detector/dataset_util.py index fbb821b3b9..86276a6569 100644 --- a/mediapipe/model_maker/python/vision/object_detector/dataset_util.py +++ b/mediapipe/model_maker/python/vision/object_detector/dataset_util.py @@ -103,7 +103,7 @@ def get_cache_files_coco( Returns: An object of CacheFiles class. """ - hasher = hashlib.md5() + hasher = hashlib.sha256() # Update with dataset folder name hasher.update(_get_dir_basename(data_dir).encode('utf-8')) # Update with image filenames @@ -140,7 +140,7 @@ def get_cache_files_pascal_voc( Returns: An object of CacheFiles class. """ - hasher = hashlib.md5() + hasher = hashlib.sha256() hasher.update(_get_dir_basename(data_dir).encode('utf-8')) annotation_files = tf.io.gfile.glob( os.path.join(data_dir, 'Annotations') + r'/*.xml'