diff --git a/docs/supported_inventory_types.md b/docs/supported_inventory_types.md index c45e5645a..29d19943b 100644 --- a/docs/supported_inventory_types.md +++ b/docs/supported_inventory_types.md @@ -177,6 +177,7 @@ See the docs on [how to add a new Extractor](/docs/new_extractor.md). | Type | Details | Extractor Plugin | |-------|---------------------------------------------------|-------------------| | vmdk | Supports Ext4, ExFAT, FAT32, and NTFS filesystems | `embeddedfs/vmdk` | +| ova | Extracts .ova files | `embeddedfs/ova` | ## Detectors diff --git a/extractor/filesystem/embeddedfs/ova/ova.go b/extractor/filesystem/embeddedfs/ova/ova.go new file mode 100644 index 000000000..4272b4ab6 --- /dev/null +++ b/extractor/filesystem/embeddedfs/ova/ova.go @@ -0,0 +1,161 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package ova provides an extractor for extracting software inventories from OVA archives +package ova + +import ( + "archive/tar" + "context" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/google/osv-scalibr/artifact/image/symlink" + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/common" + scalibrfs "github.com/google/osv-scalibr/fs" + "github.com/google/osv-scalibr/inventory" + "github.com/google/osv-scalibr/plugin" +) + +const ( + // Name is the unique identifier for the ova extractor. + Name = "embeddedfs/ova" +) + +// Extractor implements the filesystem.Extractor interface for ova. +type Extractor struct{} + +// New returns a new ova extractor. +func New() filesystem.Extractor { + return &Extractor{} +} + +// Name returns the name of the extractor. +func (e *Extractor) Name() string { + return Name +} + +// Version returns the version of the extractor. +func (e *Extractor) Version() int { + return 0 +} + +// Requirements returns the requirements for the extractor. +func (e *Extractor) Requirements() *plugin.Capabilities { + return &plugin.Capabilities{} +} + +// FileRequired checks if the file is a .ova file based on its extension. +func (e *Extractor) FileRequired(api filesystem.FileAPI) bool { + path := api.Path() + return strings.HasSuffix(strings.ToLower(path), ".ova") +} + +// Extract returns an Inventory with embedded filesystems which contains a mount function for the filesystem in the .ova file. +func (e *Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { + // Check wether input.Reader is nil or not. + // This check is crucial because tar.NewReader doesn't validate the input, + // it simply wraps it around tar.Reader. + if input.Reader == nil { + return inventory.Inventory{}, errors.New("input.Reader is nil") + } + + // Create a temporary directory for extracted files + tempDir, err := os.MkdirTemp("", "scalibr-ova-") + if err != nil { + return inventory.Inventory{}, fmt.Errorf("failed to create temporary directory: %w", err) + } + + // Extract the tar archive + var extractErr error + tr := tar.NewReader(input.Reader) + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + extractErr = fmt.Errorf("failed to read tar header: %w", err) + break + } + + if hdr.Name == ".." { + extractErr = fmt.Errorf("%s contains invalid entries", input.Path) + break + } + + if symlink.TargetOutsideRoot("/", hdr.Name) { + extractErr = fmt.Errorf("%s contains invalid entries", input.Path) + break + } + + target := filepath.Join(tempDir, hdr.Name) + switch hdr.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(target, 0755); err != nil { + extractErr = fmt.Errorf("failed to create directory %s: %w", target, err) + break + } + case tar.TypeReg: + dir := filepath.Dir(target) + if err := os.MkdirAll(dir, 0755); err != nil { + extractErr = fmt.Errorf("failed to create directory %s: %w", dir, err) + break + } + outFile, err := os.Create(target) + if err != nil { + extractErr = fmt.Errorf("failed to create file %s: %w", target, err) + break + } + if _, err := io.Copy(outFile, tr); err != nil { + outFile.Close() + extractErr = fmt.Errorf("failed to copy file %s: %w", target, err) + break + } + outFile.Close() + default: + // Skip other types (symlinks, etc.) for now + continue + } + } + + if extractErr != nil { + return inventory.Inventory{}, extractErr + } + + getEmbeddedFS := func(ctx context.Context) (scalibrfs.FS, error) { + var refCount int32 = 1 + var refMu sync.Mutex + return &common.EmbeddedDirFS{ + FS: scalibrfs.DirFS(tempDir), + File: nil, + TmpPaths: []string{tempDir}, + RefCount: &refCount, + RefMu: &refMu, + }, nil + } + + var inv inventory.Inventory + inv.EmbeddedFSs = append(inv.EmbeddedFSs, &inventory.EmbeddedFS{ + Path: input.Path, + GetEmbeddedFS: getEmbeddedFS, + }) + return inv, nil +} diff --git a/extractor/filesystem/embeddedfs/ova/ova_test.go b/extractor/filesystem/embeddedfs/ova/ova_test.go new file mode 100644 index 000000000..4ac886044 --- /dev/null +++ b/extractor/filesystem/embeddedfs/ova/ova_test.go @@ -0,0 +1,237 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ova_test + +import ( + "archive/tar" + "bytes" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/google/osv-scalibr/extractor/filesystem" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/ova" + "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" +) + +func TestFileRequired(t *testing.T) { + extractor := ova.New() + tests := []struct { + path string + want bool + }{ + {"testdata/valid.ova", true}, + {"testdata/VALID.OVA", true}, + {"testdata/invalid.ova", true}, + {"testdata/document.txt", false}, + {"testdata/noextension", false}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + if got := extractor.FileRequired(simplefileapi.New(tt.path, nil)); got != tt.want { + t.Errorf("FileRequired(%q) = %v, want %v", tt.path, got, tt.want) + } + }) + } +} + +func TestExtractValidOVA(t *testing.T) { + extractor := ova.New() + path := filepath.FromSlash("testdata/valid.ova") + info, err := os.Stat(path) + if err != nil { + t.Fatalf("os.Stat(%q) failed: %v", path, err) + } + + f, err := os.Open(path) + if err != nil { + t.Fatalf("os.Open(%q) failed: %v", path, err) + } + defer f.Close() + + input := &filesystem.ScanInput{ + Path: path, + Root: ".", + Info: info, + Reader: f, + FS: nil, + } + + ctx := t.Context() + inv, err := extractor.Extract(ctx, input) + if err != nil { + t.Fatalf("Extract(%q) failed: %v", path, err) + } + + if len(inv.EmbeddedFSs) == 0 { + t.Fatal("Extract returned nothing") + } + + for i, embeddedFS := range inv.EmbeddedFSs { + t.Run(fmt.Sprintf("OVAImage_%d", i), func(t *testing.T) { + if !strings.HasPrefix(embeddedFS.Path, path) { + t.Errorf("EmbeddedFS.Path = %q, want prefix %q", embeddedFS.Path, path) + } + + fs, err := embeddedFS.GetEmbeddedFS(ctx) + if err != nil { + t.Errorf("GetEmbeddedFS() failed: %v", err) + } + + entries, err := fs.ReadDir("/") + if err != nil { + t.Fatalf("fs.ReadDir(/) failed: %v", err) + } + t.Logf("ReadDir(/) returned %d entries", len(entries)) + + info, err := fs.Stat("/") + if err != nil { + t.Fatalf("fs.Stat(/) failed: %v", err) + } + if !info.IsDir() { + t.Errorf("fs.Stat(/) IsDir() = %v, want true", info.IsDir()) + } + + found := false + for _, entry := range entries { + name := entry.Name() + if strings.HasSuffix(name, ".ovf") { + found = true + filePath := name + f, err := fs.Open(filePath) + if err != nil { + t.Fatalf("fs.Open(%q) failed: %v", filePath, err) + } + defer f.Close() + + buf := make([]byte, 5) + n, err := f.Read(buf) + if err != nil && !errors.Is(err, io.EOF) { + t.Errorf("f.Read(%q) failed: %v", filePath, err) + } + t.Logf("Read %d bytes from %s\n", n, name) + + // The buffer must start with " valid.ovf <<'EOF' + + + + + Virtual disk information + + + Example placeholder virtual machine + ExampleVM + + +EOF + +# Create the OVA (which is just a tar archive of OVF and any disks) +tar -cvf valid.ova valid.ovf > /dev/null +rm valid.ovf + +echo "Yuvraj Saxena " > invalid.ova diff --git a/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova b/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova new file mode 100644 index 000000000..49f6b894b --- /dev/null +++ b/extractor/filesystem/embeddedfs/ova/testdata/invalid.ova @@ -0,0 +1 @@ +Yuvraj Saxena diff --git a/extractor/filesystem/embeddedfs/ova/testdata/valid.ova b/extractor/filesystem/embeddedfs/ova/testdata/valid.ova new file mode 100644 index 000000000..fb6f7a046 Binary files /dev/null and b/extractor/filesystem/embeddedfs/ova/testdata/valid.ova differ diff --git a/extractor/filesystem/list/list.go b/extractor/filesystem/list/list.go index 5387de5ef..60a6627b3 100644 --- a/extractor/filesystem/list/list.go +++ b/extractor/filesystem/list/list.go @@ -26,6 +26,7 @@ import ( "github.com/google/osv-scalibr/extractor/filesystem/containers/dockercomposeimage" "github.com/google/osv-scalibr/extractor/filesystem/containers/k8simage" "github.com/google/osv-scalibr/extractor/filesystem/containers/podman" + "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/ova" "github.com/google/osv-scalibr/extractor/filesystem/embeddedfs/vmdk" "github.com/google/osv-scalibr/extractor/filesystem/language/cpp/conanlock" "github.com/google/osv-scalibr/extractor/filesystem/language/dart/pubspec" @@ -334,6 +335,7 @@ var ( // EmbeddedFS extractors. EmbeddedFS = InitMap{ vmdk.Name: {vmdk.NewDefault}, + ova.Name: {ova.New}, } // Collections of extractors.