Skip to content
This repository was archived by the owner on Mar 27, 2024. It is now read-only.

Adding size sorting for package and file differs/analyzers #36

Merged
merged 9 commits into from
Aug 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .container-diff-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ while IFS=$' \n\r' read -r flag differ image1 image2 file; do
echo "container-diff" "$differ" "differ failed"
exit 1
fi
done < tests/differ_runs.txt
done < tests/test_differ_runs.txt

while IFS=$' \n\r' read -r flag analyzer image file; do
go run main.go $image $flag -j > $file
if [[ $? -ne 0 ]]; then
echo "container-diff" "$analyzer" "analyzer failed"
exit 1
fi
done < tests/analyzer_runs.txt
done < tests/test_analyzer_runs.txt

success=0
while IFS=$' \n\r' read -r type analyzer actual expected; do
Expand All @@ -22,7 +22,7 @@ while IFS=$' \n\r' read -r type analyzer actual expected; do
echo "container-diff" "$analyzer" "$type" "output is not as expected"
success=1
fi
done < tests/diff_comparisons.txt
done < tests/test_run_comparisons.txt
if [[ "$success" -ne 0 ]]; then
exit 1
fi
Expand Down
70 changes: 42 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ All of the analyzer flags with their long versions can be seen below:

| Differ | Short flag | Long Flag |
| ------------------------- |:----------:| ----------:|
| File System diff | -f | --file |
| File system diff | -f | --file |
| History | -d | --history |
| npm installed packages | -n | --node |
| pip installed packages | -p | --pip |
Expand All @@ -89,6 +89,11 @@ To use the docker client instead of shelling out to your local docker daemon, ad

```container-diff <img1> <img2> -e```

To order files and packages by size (in descending order) when performing file system or package analyses/diffs, add a `-o` or `--order` flag.

```container-diff <img1> <img2> -o```


## Analysis Result Format

The JSONs for analysis results are in the following format:
Expand All @@ -105,31 +110,34 @@ The possible structures of the `Analysis` field are detailed below.

The history analyzer outputs a list of strings representing descriptions of how an image layer was created.

### Filesystem Analysis
### File System Analysis

The filesystem analyzer outputs a list of strings representing filesystem contents.
The file system analyzer outputs a list of strings representing file system contents.

### Package Analysis

Package analyzers such as pip, apt, and node inspect the packages installed within the image provided. All package analyses leverage the PackageInfo struct, which contains the version and size for a given package instance, as detailed below:
Package analyzers such as pip, apt, and node inspect the packages installed within the image provided. All package analyses leverage the PackageOutput struct, which contains the version and size for a given package instance (and a potential installation path for a specific instance of a package where multiple versions are allowed to be installed), as detailed below:
```
type PackageInfo struct {
type PackageOutput struct {
Name string
Path string
Version string
Size string
Size int64
}
```

#### Single Version Package Analysis

Single version package analyzers (apt) have the following output structure: `map[string]PackageInfo`
Single version package analyzers (apt) have the following output structure: `[]PackageOutput`

Here, the `Path` field is omitted because there is only one instance of each package.

In this mapping scheme, each package name is mapped to its PackageInfo as described above.

#### Multi Version Package Analysis

Multi version package analyzers (pip, node) have the following output structure: `map[string]map[string]PackageInfo`
Multi version package analyzers (pip, node) have the following output structure: `[]PackageOutput`

In this mapping scheme, each package name corresponds to another map where the filesystem path to each unique instance of the package (i.e. unique version and/or size info) is mapped to that package instance's PackageInfo.
Here, the `Path` field is included because there may be more than one instance of each package, and thus the path exists to pinpoint where the package exists in case additional investigation into the package instance is desired.


## Diff Result Format
Expand All @@ -156,9 +164,9 @@ type HistDiff struct {
}
```

### Filesystem Diff
### File System Diff

The filesystem differ has the following json output structure:
The file system differ has the following json output structure:

```
type DirDiff struct {
Expand All @@ -170,35 +178,41 @@ type DirDiff struct {

### Package Diffs

Package differs such as pip, apt, and node inspect the packages contained within the images provided. All packages differs currently leverage the PackageInfo struct which contains the version and size for a given package instance.
Package differs such as pip, apt, and node inspect the packages contained within the images provided. All packages differs currently leverage the PackageInfo struct which contains the version and size for a given package instance, as detailed below:
```
type PackageInfo struct {
Version string
Size string
}
```

#### Single Version Package Diffs

Single version differs (apt) have the following json output structure:

```
type PackageDiff struct {
Packages1 map[string]PackageInfo
Packages2 map[string]PackageInfo
Packages1 []PackageOutput
Packages2 []PackageOutput
InfoDiff []Info
}
```

Packages1 and Packages2 map package names to PackageInfo structs which contain the version and size of the package. InfoDiff contains a list of Info structs, each of which contains the package name (which occurred in both images but had a difference in size or version), and the PackageInfo struct for each package instance.
Packages1 and Packages2 detail which packages exist uniquely in Image1 and Image2, respectively, with package name, version and size info. InfoDiff contains a list of Info structs, each of which contains the package name (which occurred in both images but had a difference in size or version), and the PackageInfo struct for each package instance.

#### Multi Version Package Diffs

The multi version differs (pip, node) support processing images which may have multiple versions of the same package. Below is the json output structure:

```
type MultiVersionPackageDiff struct {
Packages1 map[string]map[string]PackageInfo
Packages2 map[string]map[string]PackageInfo
Packages1 []PackageOutput
Packages2 []PackageOutput
InfoDiff []MultiVersionInfo
}
```

Packages1 and Packages2 map package name to path where the package was found to PackageInfo struct (version and size of that package instance). InfoDiff here is exanded to allow for multiple versions to be associated with a single package.
Packages1 and Packages2 detail which packages exist uniquely in Image1 and Image2, respectively, with package name, installation path, version and size info. InfoDiff here is exanded to allow for multiple versions to be associated with a single package. In this case, a package of the same name is considered to differ between two images when there exist one or more instances of it installed in one image but not the other (i.e. have a unique version and/or size).

```
type MultiVersionInfo struct {
Expand Down Expand Up @@ -228,7 +242,7 @@ Packages found only in gcr.io/google-appengine/python:2017-06-29-190410: None

Version differences:
PACKAGE IMAGE1 (gcr.io/google-appengine/python:2017-07-21-123058) IMAGE2 (gcr.io/google-appengine/python:2017-06-29-190410)
-libgcrypt20 1.6.3-2 deb8u4, 998B 1.6.3-2 deb8u3, 1002B
-libgcrypt20 1.6.3-2 deb8u4, 998K 1.6.3-2 deb8u3, 1002K

-----NodeDiffer-----

Expand Down Expand Up @@ -281,12 +295,12 @@ def main():

if len(diff['Packages1']) > 0:
for package in diff['Packages1']:
Size = diff['Packages1'][package]['Size']
Size = package['Size']
img1packages.append((str(package), int(str(Size))))

if len(diff['Packages2']) > 0:
for package in diff['Packages2']:
Size = diff['Packages2'][package]['Size']
Size = package['Size']
img2packages.append((str(package), int(str(Size))))

img1packages = reversed(sorted(img1packages, key=lambda x: x[1]))
Expand Down Expand Up @@ -340,21 +354,21 @@ In order to quickly make your own analyzer, follow these steps:
- No: Implement `getPackages` to collect all versions of all packages within an image in a `map[string]PackageInfo`. Use `GetMapDiff` to diff map objects. See [aptDiff.go](https://github.com/GoogleCloudPlatform/container-diff/blob/master/differs/aptDiff.go#L29).
- No: Look to [History](https://github.com/GoogleCloudPlatform/container-diff/blob/ReadMe/differs/historyDiff.go) and [File System](https://github.com/GoogleCloudPlatform/container-diff/blob/ReadMe/differs/fileDiff.go) differs as models for diffing.

3. Write your analyzer driver in the `differs` directory, such that you have a struct for your analyzer type and two method for that differ: `Analyze` for single image analysis and `Diff` for comparison between two images:
3. Write your analyzer driver in the `differs` directory, such that you have a struct for your analyzer type and two methods for that analyzer: `Analyze` for single image analysis and `Diff` for comparison between two images:

```
type YourAnalyzer struct {}

func (a YourAnalyzer) Analyze(image utils.Image) (utils.AnalyzeResult, error) {...}
func (a YourAnalyzer) Diff(image1, image2 utils.Image) (utils.DiffResult, error) {...}
func (a YourAnalyzer) Analyze(image utils.Image) (utils.Result, error) {...}
func (a YourAnalyzer) Diff(image1, image2 utils.Image) (utils.Result, error) {...}
```
The image arguments passed to your analyzer contain the path to the unpacked tar representation of the image, as well as certain configuration information (e.g. environment variables upon image creation and image history).

If using existing package differ tools, you should create the appropriate structs to analyze or diff. Otherwise, create your own analyzer which should yield information to fill an AnalyzeResult or DiffResult in the next step.
If using existing package tools, you should create the appropriate structs (e.g. `SingleVersionPackageAnalyzeResult` or `SingleVersionPackageDiffResult`) to analyze or diff. Otherwise, create your own structs which should yield information to fill an AnalyzeResult or DiffResult as the return type for Analyze() and Diff(), respectively, and should implement the `Result` interface, as in the next step.

4. Create a result struct following either the AnalyzeResult or DiffResult interface by implementing the following two methods.
4. Create a struct following the `Result` interface by implementing the following two methods.
```
GetStruct() DiffResult
GetStruct() interface{}
OutputText(diffType string) error
```

Expand Down
81 changes: 32 additions & 49 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,31 +116,8 @@ func diffImages(image1Arg, image2Arg string, diffArgs []string) error {

req := differs.DiffRequest{image1, image2, diffTypes}
if diffs, err := req.GetDiff(); err == nil {
// Outputs diff results in alphabetical order by differ name
sortedTypes := []string{}
for name := range diffs {
sortedTypes = append(sortedTypes, name)
}
sort.Strings(sortedTypes)
glog.Info("Retrieving diffs")
diffResults := []utils.DiffResult{}
for _, diffType := range sortedTypes {
diff := diffs[diffType]
if json {
diffResults = append(diffResults, diff.GetStruct())
} else {
err = diff.OutputText(diffType)
if err != nil {
glog.Error(err)
}
}
}
if json {
err = utils.JSONify(diffResults)
if err != nil {
glog.Error(err)
}
}
outputResults(diffs)
if !save {
cleanupImage(image1)
cleanupImage(image2)
Expand Down Expand Up @@ -175,31 +152,8 @@ func analyzeImage(imageArg string, analyzerArgs []string) error {

req := differs.SingleRequest{image, analyzeTypes}
if analyses, err := req.GetAnalysis(); err == nil {
// Outputs analysis results in alphabetical order by differ name
sortedTypes := []string{}
for name := range analyses {
sortedTypes = append(sortedTypes, name)
}
sort.Strings(sortedTypes)
glog.Info("Retrieving diffs")
analyzeResults := []utils.AnalyzeResult{}
for _, analyzeType := range sortedTypes {
analysis := analyses[analyzeType]
if json {
analyzeResults = append(analyzeResults, analysis.GetStruct())
} else {
err = analysis.OutputText(analyzeType)
if err != nil {
glog.Error(err)
}
}
}
if json {
err = utils.JSONify(analyzeResults)
if err != nil {
glog.Error(err)
}
}
glog.Info("Retrieving analyses")
outputResults(analyses)
if !save {
cleanupImage(image)
} else {
Expand All @@ -215,6 +169,34 @@ func analyzeImage(imageArg string, analyzerArgs []string) error {
return nil
}

func outputResults(resultMap map[string]utils.Result) {
// Outputs diff/analysis results in alphabetical order by analyzer name
sortedTypes := []string{}
for analyzerType := range resultMap {
sortedTypes = append(sortedTypes, analyzerType)
}
sort.Strings(sortedTypes)

results := make([]interface{}, len(resultMap))
for i, analyzerType := range sortedTypes {
result := resultMap[analyzerType]
if json {
results[i] = result.OutputStruct()
} else {
err := result.OutputText(analyzerType)
if err != nil {
glog.Error(err)
}
}
}
if json {
err := utils.JSONify(results)
if err != nil {
glog.Error(err)
}
}
}

func cleanupImage(image utils.Image) {
if !reflect.DeepEqual(image, (utils.Image{})) {
glog.Infof("Removing image filesystem directory %s from system", image.FSPath)
Expand Down Expand Up @@ -313,4 +295,5 @@ func init() {
RootCmd.Flags().BoolVarP(&file, "file", "f", false, "Set this flag to use the file differ.")
RootCmd.Flags().BoolVarP(&history, "history", "d", false, "Set this flag to use the dockerfile history differ.")
RootCmd.Flags().BoolVarP(&save, "save", "s", false, "Set this flag to save rather than remove the final image filesystems on exit.")
RootCmd.Flags().BoolVarP(&utils.SortSize, "order", "o", false, "Set this flag to sort any file/package results by descending size. Otherwise, they will be sorted by name.")
}
4 changes: 2 additions & 2 deletions differs/aptDiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ type AptAnalyzer struct {
}

// AptDiff compares the packages installed by apt-get.
func (a AptAnalyzer) Diff(image1, image2 utils.Image) (utils.DiffResult, error) {
func (a AptAnalyzer) Diff(image1, image2 utils.Image) (utils.Result, error) {
diff, err := singleVersionDiff(image1, image2, a)
return diff, err
}

func (a AptAnalyzer) Analyze(image utils.Image) (utils.AnalyzeResult, error) {
func (a AptAnalyzer) Analyze(image utils.Image) (utils.Result, error) {
analysis, err := singleVersionAnalysis(image, a)
return analysis, err
}
Expand Down
12 changes: 6 additions & 6 deletions differs/differs.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ type SingleRequest struct {
}

type Analyzer interface {
Diff(image1, image2 utils.Image) (utils.DiffResult, error)
Analyze(image utils.Image) (utils.AnalyzeResult, error)
Diff(image1, image2 utils.Image) (utils.Result, error)
Analyze(image utils.Image) (utils.Result, error)
}

var analyzers = map[string]Analyzer{
Expand All @@ -33,12 +33,12 @@ var analyzers = map[string]Analyzer{
"node": NodeAnalyzer{},
}

func (req DiffRequest) GetDiff() (map[string]utils.DiffResult, error) {
func (req DiffRequest) GetDiff() (map[string]utils.Result, error) {
img1 := req.Image1
img2 := req.Image2
diffs := req.DiffTypes

results := map[string]utils.DiffResult{}
results := map[string]utils.Result{}
for _, differ := range diffs {
differName := reflect.TypeOf(differ).Name()
if diff, err := differ.Diff(img1, img2); err == nil {
Expand All @@ -58,11 +58,11 @@ func (req DiffRequest) GetDiff() (map[string]utils.DiffResult, error) {
return results, err
}

func (req SingleRequest) GetAnalysis() (map[string]utils.AnalyzeResult, error) {
func (req SingleRequest) GetAnalysis() (map[string]utils.Result, error) {
img := req.Image
analyses := req.AnalyzeTypes

results := map[string]utils.AnalyzeResult{}
results := map[string]utils.Result{}
for _, analyzer := range analyses {
analyzeName := reflect.TypeOf(analyzer).Name()
if analysis, err := analyzer.Analyze(img); err == nil {
Expand Down
4 changes: 2 additions & 2 deletions differs/fileDiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ type FileAnalyzer struct {
}

// FileDiff diffs two packages and compares their contents
func (a FileAnalyzer) Diff(image1, image2 utils.Image) (utils.DiffResult, error) {
func (a FileAnalyzer) Diff(image1, image2 utils.Image) (utils.Result, error) {
diff, err := diffImageFiles(image1, image2)
return &utils.DirDiffResult{
Image1: image1.Source,
Expand All @@ -18,7 +18,7 @@ func (a FileAnalyzer) Diff(image1, image2 utils.Image) (utils.DiffResult, error)
}, err
}

func (a FileAnalyzer) Analyze(image utils.Image) (utils.AnalyzeResult, error) {
func (a FileAnalyzer) Analyze(image utils.Image) (utils.Result, error) {
var result utils.FileAnalyzeResult

imgDir, err := utils.GetDirectory(image.FSPath, true)
Expand Down
Loading