diff --git a/docs/supported_inventory_types.md b/docs/supported_inventory_types.md
index c45e5645a..29d19943b 100644
--- a/docs/supported_inventory_types.md
+++ b/docs/supported_inventory_types.md
@@ -177,6 +177,7 @@ See the docs on [how to add a new Extractor](/docs/new_extractor.md).
| Type | Details | Extractor Plugin |
|-------|---------------------------------------------------|-------------------|
| vmdk | Supports Ext4, ExFAT, FAT32, and NTFS filesystems | `embeddedfs/vmdk` |
+| ova | Extracts .ova files | `embeddedfs/ova` |
## Detectors
diff --git a/extractor/filesystem/embeddedfs/ova/ova.go b/extractor/filesystem/embeddedfs/ova/ova.go
new file mode 100644
index 000000000..4272b4ab6
--- /dev/null
+++ b/extractor/filesystem/embeddedfs/ova/ova.go
@@ -0,0 +1,161 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package ova provides an extractor for extracting software inventories from OVA archives
+package ova
+
+import (
+ "archive/tar"
+ "context"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "sync"
+
+ "github.com/google/osv-scalibr/artifact/image/symlink"
+ "github.com/google/osv-scalibr/extractor/filesystem"
+ "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common"
+ scalibrfs "github.com/google/osv-scalibr/fs"
+ "github.com/google/osv-scalibr/inventory"
+ "github.com/google/osv-scalibr/plugin"
+)
+
+const (
+ // Name is the unique identifier for the ova extractor.
+ Name = "embeddedfs/ova"
+)
+
+// Extractor implements the filesystem.Extractor interface for ova.
+type Extractor struct{}
+
+// New returns a new ova extractor.
+func New() filesystem.Extractor {
+ return &Extractor{}
+}
+
+// Name returns the name of the extractor.
+func (e *Extractor) Name() string {
+ return Name
+}
+
+// Version returns the version of the extractor.
+func (e *Extractor) Version() int {
+ return 0
+}
+
+// Requirements returns the requirements for the extractor.
+func (e *Extractor) Requirements() *plugin.Capabilities {
+ return &plugin.Capabilities{}
+}
+
+// FileRequired checks if the file is a .ova file based on its extension.
+func (e *Extractor) FileRequired(api filesystem.FileAPI) bool {
+ path := api.Path()
+ return strings.HasSuffix(strings.ToLower(path), ".ova")
+}
+
+// Extract returns an Inventory with embedded filesystems which contains a mount function for the filesystem in the .ova file.
+func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
+ // Check wether input.Reader is nil or not.
+ // This check is crucial because tar.NewReader doesn't validate the input,
+ // it simply wraps it around tar.Reader.
+ if input.Reader == nil {
+ return inventory.Inventory{}, errors.New("input.Reader is nil")
+ }
+
+ // Create a temporary directory for extracted files
+ tempDir, err := os.MkdirTemp("", "scalibr-ova-")
+ if err != nil {
+ return inventory.Inventory{}, fmt.Errorf("failed to create temporary directory: %w", err)
+ }
+
+ // Extract the tar archive
+ var extractErr error
+ tr := tar.NewReader(input.Reader)
+ for {
+ hdr, err := tr.Next()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ extractErr = fmt.Errorf("failed to read tar header: %w", err)
+ break
+ }
+
+ if hdr.Name == ".." {
+ extractErr = fmt.Errorf("%s contains invalid entries", input.Path)
+ break
+ }
+
+ if symlink.TargetOutsideRoot("/", hdr.Name) {
+ extractErr = fmt.Errorf("%s contains invalid entries", input.Path)
+ break
+ }
+
+ target := filepath.Join(tempDir, hdr.Name)
+ switch hdr.Typeflag {
+ case tar.TypeDir:
+ if err := os.MkdirAll(target, 0755); err != nil {
+ extractErr = fmt.Errorf("failed to create directory %s: %w", target, err)
+ break
+ }
+ case tar.TypeReg:
+ dir := filepath.Dir(target)
+ if err := os.MkdirAll(dir, 0755); err != nil {
+ extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err)
+ break
+ }
+ outFile, err := os.Create(target)
+ if err != nil {
+ extractErr = fmt.Errorf("failed to create file %s: %w", target, err)
+ break
+ }
+ if _, err := io.Copy(outFile, tr); err != nil {
+ outFile.Close()
+ extractErr = fmt.Errorf("failed to copy file %s: %w", target, err)
+ break
+ }
+ outFile.Close()
+ default:
+ // Skip other types (symlinks, etc.) for now
+ continue
+ }
+ }
+
+ if extractErr != nil {
+ return inventory.Inventory{}, extractErr
+ }
+
+ getEmbeddedFS := func(ctx context.Context) (scalibrfs.FS, error) {
+ var refCount int32 = 1
+ var refMu sync.Mutex
+ return &common.EmbeddedDirFS{
+ FS: scalibrfs.DirFS(tempDir),
+ File: nil,
+ TmpPaths: []string{tempDir},
+ RefCount: &refCount,
+ RefMu: &refMu,
+ }, nil
+ }
+
+ var inv inventory.Inventory
+ inv.EmbeddedFSs = append(inv.EmbeddedFSs, &inventory.EmbeddedFS{
+ Path: input.Path,
+ GetEmbeddedFS: getEmbeddedFS,
+ })
+ return inv, nil
+}
diff --git a/extractor/filesystem/embeddedfs/ova/ova_test.go b/extractor/filesystem/embeddedfs/ova/ova_test.go
new file mode 100644
index 000000000..4ac886044
--- /dev/null
+++ b/extractor/filesystem/embeddedfs/ova/ova_test.go
@@ -0,0 +1,237 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package ova_test
+
+import (
+ "archive/tar"
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+
+ "github.com/google/osv-scalibr/extractor/filesystem"
+ "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/ova"
+ "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi"
+)
+
+func TestFileRequired(t *testing.T) {
+ extractor := ova.New()
+ tests := []struct {
+ path string
+ want bool
+ }{
+ {"testdata/valid.ova", true},
+ {"testdata/VALID.OVA", true},
+ {"testdata/invalid.ova", true},
+ {"testdata/document.txt", false},
+ {"testdata/noextension", false},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.path, func(t *testing.T) {
+ if got := extractor.FileRequired(simplefileapi.New(tt.path, nil)); got != tt.want {
+ t.Errorf("FileRequired(%q) = %v, want %v", tt.path, got, tt.want)
+ }
+ })
+ }
+}
+
+func TestExtractValidOVA(t *testing.T) {
+ extractor := ova.New()
+ path := filepath.FromSlash("testdata/valid.ova")
+ info, err := os.Stat(path)
+ if err != nil {
+ t.Fatalf("os.Stat(%q) failed: %v", path, err)
+ }
+
+ f, err := os.Open(path)
+ if err != nil {
+ t.Fatalf("os.Open(%q) failed: %v", path, err)
+ }
+ defer f.Close()
+
+ input := &filesystem.ScanInput{
+ Path: path,
+ Root: ".",
+ Info: info,
+ Reader: f,
+ FS: nil,
+ }
+
+ ctx := t.Context()
+ inv, err := extractor.Extract(ctx, input)
+ if err != nil {
+ t.Fatalf("Extract(%q) failed: %v", path, err)
+ }
+
+ if len(inv.EmbeddedFSs) == 0 {
+ t.Fatal("Extract returned nothing")
+ }
+
+ for i, embeddedFS := range inv.EmbeddedFSs {
+ t.Run(fmt.Sprintf("OVAImage_%d", i), func(t *testing.T) {
+ if !strings.HasPrefix(embeddedFS.Path, path) {
+ t.Errorf("EmbeddedFS.Path = %q, want prefix %q", embeddedFS.Path, path)
+ }
+
+ fs, err := embeddedFS.GetEmbeddedFS(ctx)
+ if err != nil {
+ t.Errorf("GetEmbeddedFS() failed: %v", err)
+ }
+
+ entries, err := fs.ReadDir("/")
+ if err != nil {
+ t.Fatalf("fs.ReadDir(/) failed: %v", err)
+ }
+ t.Logf("ReadDir(/) returned %d entries", len(entries))
+
+ info, err := fs.Stat("/")
+ if err != nil {
+ t.Fatalf("fs.Stat(/) failed: %v", err)
+ }
+ if !info.IsDir() {
+ t.Errorf("fs.Stat(/) IsDir() = %v, want true", info.IsDir())
+ }
+
+ found := false
+ for _, entry := range entries {
+ name := entry.Name()
+ if strings.HasSuffix(name, ".ovf") {
+ found = true
+ filePath := name
+ f, err := fs.Open(filePath)
+ if err != nil {
+ t.Fatalf("fs.Open(%q) failed: %v", filePath, err)
+ }
+ defer f.Close()
+
+ buf := make([]byte, 5)
+ n, err := f.Read(buf)
+ if err != nil && !errors.Is(err, io.EOF) {
+ t.Errorf("f.Read(%q) failed: %v", filePath, err)
+ }
+ t.Logf("Read %d bytes from %s\n", n, name)
+
+ // The buffer must start with " valid.ovf <<'EOF'
+
+
+
+
+ Virtual disk information
+
+
+ Example placeholder virtual machine
+ ExampleVM
+
+
+EOF
+
+# Create the OVA (which is just a tar archive of OVF and any disks)
+tar -cvf valid.ova valid.ovf > /dev/null
+rm valid.ovf
+
+echo "Yuvraj Saxena " > invalid.ova
diff --git a/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova b/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova
new file mode 100644
index 000000000..49f6b894b
--- /dev/null
+++ b/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova
@@ -0,0 +1 @@
+Yuvraj Saxena
diff --git a/extractor/filesystem/embeddedfs/ova/testdata/valid.ova b/extractor/filesystem/embeddedfs/ova/testdata/valid.ova
new file mode 100644
index 000000000..fb6f7a046
Binary files /dev/null and b/extractor/filesystem/embeddedfs/ova/testdata/valid.ova differ
diff --git a/extractor/filesystem/list/list.go b/extractor/filesystem/list/list.go
index 5387de5ef..60a6627b3 100644
--- a/extractor/filesystem/list/list.go
+++ b/extractor/filesystem/list/list.go
@@ -26,6 +26,7 @@ import (
"github.com/google/osv-scalibr/extractor/filesystem/containers/dockercomposeimage"
"github.com/google/osv-scalibr/extractor/filesystem/containers/k8simage"
"github.com/google/osv-scalibr/extractor/filesystem/containers/podman"
+ "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/ova"
"github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/vmdk"
"github.com/google/osv-scalibr/extractor/filesystem/language/cpp/conanlock"
"github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec"
@@ -334,6 +335,7 @@ var (
// EmbeddedFS extractors.
EmbeddedFS = InitMap{
vmdk.Name: {vmdk.NewDefault},
+ ova.Name: {ova.New},
}
// Collections of extractors.