Skip to content

Commit

Permalink
Upgrade tag filtering (#62)
Browse files Browse the repository at this point in the history
This introduces new tag filtering options. The existing behavior for tag filtering only applies to the first element in the tag list, which users have expressed as undesirable. This preserves (but deprecates) this original method and introduces two new methods for tag matching: -tag-filter-any and -tag-filter-all. Both accept a regular expression, but "any" matches if one or more tags match the regular expression, but "all" only matches if all tags match the regular expression.
  • Loading branch information
sethvargo authored Nov 30, 2021
1 parent e5cca7b commit ab0c2d0
Show file tree
Hide file tree
Showing 6 changed files with 495 additions and 38 deletions.
33 changes: 25 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# GCR Cleaner

GCR Cleaner deletes untagged images in Google Cloud [Container
GCR Cleaner deletes stale images in Google Cloud [Container
Registry][container-registry] or Google Cloud [Artifact
Registry][artifact-registry]. This can help reduce costs and keep your container
images list in order.
Expand Down Expand Up @@ -169,16 +169,19 @@ The payload is expected to be JSON with the following fields:
the duration will not be deleted. If unspecified, the default is no grace
period (all untagged image refs are deleted).

- `allow_tagged` - If set to true, will check all images including tagged.
If unspecified, the default will only delete untagged images.

- `keep` - If an integer is provided, it will always keep that minimum number
of images. Note that it will not consider images inside the `grace` duration.

- `tag_filter` - Used for tags regexp definition to define pattern to clean,
requires `allow_tagged` must be true. For example: use `-tag-filter "^dev.+$"`
to limit cleaning only on the tags with beginning with is `dev`. The default
is no filtering. The regular expression is parsed according to the [Go regexp package syntax](https://golang.org/pkg/regexp/syntax/).
- `tag_filter_any` - If specified, any image with at **least one tag** that
matches this given regular expression will be deleted. The image will be
deleted even if it has other tags that do not match the given regular
expression. The regular expressions are parsed according to the [Go regexp
package][go-re].

- `tag_filter_all` - If specified, any image where **all tags** match this given
regular expression will be deleted. The image will not be delete if it has
other tags that do not match the given regular expression. The regular
expressions are parsed according to the [Go regexp package][go-re].

- `dry_run` - If set to true, will not delete anything and outputs what would
have been deleted.
Expand All @@ -194,6 +197,19 @@ The payload is expected to be JSON with the following fields:
filtering, meaning GCR Cleaner must download a manifest of all repositories
to which you have access and then do client-side filtering.

- `tag_filter` (_Deprecated_) - This option is deprecated and only exists to
maintain backwards compatibility with some existing broken behavior. You
should not use it. If specified, any image where **the first tag** matches
this given regular expression will be deleted. The image will not be deleted
if other tags match the regular expression. The regular expressions are parsed
according to the [Go regexp package][go-re].

- `allow_tagged` (_Deprecated_) - This option is deprecated and has no effect.
By default, GCR Cleaner will not delete tagged images. To delete tagged
images, specify `tag_filter_any` or `tag_filter_all`. Specifying either of
these will enable deletion by tag.


## Running locally

In addition to the server, you can also run GCR Cleaner locally for one-off tasks using `cmd/gcr-cleaner-cli`:
Expand Down Expand Up @@ -246,3 +262,4 @@ This library is licensed under Apache 2.0. Full license text is available in
[container-registry]: https://cloud.google.com/container-registry
[gcr-cleaner-godoc]: https://godoc.org/github.com/sethvargo/gcr-cleaner/pkg/gcrcleaner
[gcrgc.sh]: https://gist.github.com/ahmetb/7ce6d741bd5baa194a3fac6b1fec8bb7
[go-re]: https://golang.org/pkg/regexp/syntax/
49 changes: 36 additions & 13 deletions cmd/gcr-cleaner-cli/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"fmt"
"os"
"os/signal"
"regexp"
"runtime"
"sort"
"strings"
Expand All @@ -40,13 +39,18 @@ var (

reposMap = make(map[string]struct{}, 4)

tokenPtr = flag.String("token", os.Getenv("GCRCLEANER_TOKEN"), "Authentication token")
recursivePtr = flag.Bool("recursive", false, "Clean all sub-repositories under the -repo root")
gracePtr = flag.Duration("grace", 0, "Grace period")
allowTaggedPtr = flag.Bool("allow-tagged", false, "Delete tagged images")
keepPtr = flag.Int("keep", 0, "Minimum to keep")
tagFilterPtr = flag.String("tag-filter", "", "Tags pattern to clean")
dryRunPtr = flag.Bool("dry-run", false, "Do a noop on delete api call")
tokenPtr = flag.String("token", os.Getenv("GCRCLEANER_TOKEN"), "Authentication token")
recursivePtr = flag.Bool("recursive", false, "Clean all sub-repositories under the -repo root")
gracePtr = flag.Duration("grace", 0, "Grace period")
tagFilterAny = flag.String("tag-filter-any", "", "With -allow-tagged, delete images where any tag matches this regular expression")
tagFilterAll = flag.String("tag-filter-all", "", "With -allow-tagged, delete images where all tags match this regular expression")
keepPtr = flag.Int("keep", 0, "Minimum to keep")
dryRunPtr = flag.Bool("dry-run", false, "Do a noop on delete api call")

// tagFilterPtr and allow-tagged are deprecated
// TODO(sethvargo): remove before 1.0.0
allowTaggedPtr = flag.Bool("allow-tagged", false, "DEPRECATED: Delete tagged images")
tagFilterFirstPtr = flag.String("tag-filter", "", "DEPRECATED: Tags pattern to clean")
)

func main() {
Expand All @@ -63,6 +67,22 @@ func main() {
return nil
})

flag.Usage = func() {
w := flag.CommandLine.Output()
fmt.Fprintf(w, "Usage of %s:\n\n", os.Args[0])
fmt.Fprintf(w, " Deletes untagged or stale images from a Docker registry.\n\n")
fmt.Fprintf(w, "Options:\n\n")

flag.VisitAll(func(f *flag.Flag) {
if strings.HasPrefix(f.Usage, "DEPRECATED") {
return
}

fmt.Fprintf(w, " -%v\n", f.Name)
fmt.Fprintf(w, " %s\n\n", f.Usage)
})
}

flag.Parse()

if err := realMain(ctx); err != nil {
Expand All @@ -88,13 +108,16 @@ func realMain(ctx context.Context) error {
}
sort.Strings(repos)

if !*allowTaggedPtr && *tagFilterPtr != "" {
return fmt.Errorf("-allow-tagged must be true when -tag-filter is declared")
if *allowTaggedPtr {
fmt.Fprintf(stderr, "DEPRECATION: -allow-tagged is deprecated, specifying any tags will enable deleting of tagged images\n")
}
if *tagFilterFirstPtr != "" {
fmt.Fprintf(stderr, "DEPRECATION: -tag-filter is deprecated, use -tag-filter-any or -tag-filter-all instead\n")
}

tagFilterRegexp, err := regexp.Compile(*tagFilterPtr)
tagFilter, err := gcrcleaner.BuildTagFilter(*tagFilterFirstPtr, *tagFilterAny, *tagFilterAll)
if err != nil {
return fmt.Errorf("failed to parse -tag-filter: %w", err)
return fmt.Errorf("failed to parse tag filter: %w", err)
}

// Try to find the "best" authentication.
Expand Down Expand Up @@ -147,7 +170,7 @@ func realMain(ctx context.Context) error {
var result *multierror.Error
for i, repo := range repos {
fmt.Fprintf(stdout, "%s\n", repo)
deleted, err := cleaner.Clean(repo, since, *allowTaggedPtr, *keepPtr, tagFilterRegexp, *dryRunPtr)
deleted, err := cleaner.Clean(repo, since, *keepPtr, tagFilter, *dryRunPtr)
if err != nil {
result = multierror.Append(result, err)
}
Expand Down
31 changes: 24 additions & 7 deletions pkg/gcrcleaner/cleaner.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"context"
"errors"
"fmt"
"regexp"
"sort"
"strings"
"sync"
Expand Down Expand Up @@ -49,7 +48,7 @@ func NewCleaner(auther gcrauthn.Authenticator, c int) (*Cleaner, error) {

// Clean deletes old images from GCR that are (un)tagged and older than "since"
// and higher than the "keep" amount.
func (c *Cleaner) Clean(repo string, since time.Time, allowTagged bool, keep int, tagFilterRegexp *regexp.Regexp, dryRun bool) ([]string, error) {
func (c *Cleaner) Clean(repo string, since time.Time, keep int, tagFilter TagFilter, dryRun bool) ([]string, error) {
gcrrepo, err := gcrname.NewRepository(repo)
if err != nil {
return nil, fmt.Errorf("failed to get repo %s: %w", repo, err)
Expand Down Expand Up @@ -79,7 +78,7 @@ func (c *Cleaner) Clean(repo string, since time.Time, allowTagged bool, keep int
})

for _, m := range manifests {
if c.shouldDelete(m.Info, since, allowTagged, tagFilterRegexp) {
if c.shouldDelete(m.Info, since, tagFilter) {
// Keep a certain amount of images
if keepCount < keep {
keepCount++
Expand Down Expand Up @@ -169,10 +168,28 @@ func (c *Cleaner) deleteOne(ref gcrname.Reference) error {
return nil
}

// shouldDelete returns true if the manifest has no tags or allows deletion of tagged images
// and is before the requested time.
func (c *Cleaner) shouldDelete(m gcrgoogle.ManifestInfo, since time.Time, allowTag bool, tagFilterRegexp *regexp.Regexp) bool {
return (len(m.Tags) == 0 || (allowTag && tagFilterRegexp.MatchString(m.Tags[0]))) && m.Uploaded.UTC().Before(since)
// shouldDelete returns true if the manifest was created before the given
// timestamp and either has no tags or has tags that match the given filter.
func (c *Cleaner) shouldDelete(m gcrgoogle.ManifestInfo, since time.Time, tagFilter TagFilter) bool {
// Immediately exclude images that have been uploaded after the given time.
if m.Uploaded.UTC().After(since) {
return false
}

// If there are no tags, it should be deleted.
if len(m.Tags) == 0 {
return true
}

// If tagged images are allowed and the given filter matches the list of tags,
// this is a deletion candidate. The default tag filter is to reject all
// strings.
if tagFilter.Matches(m.Tags) {
return true
}

// If we got this far, it'ts not a viable deletion candidate.
return false
}

func (c *Cleaner) ListChildRepositories(ctx context.Context, rootRepository string) ([]string, error) {
Expand Down
125 changes: 125 additions & 0 deletions pkg/gcrcleaner/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2021 The GCR Cleaner Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package gcrcleaner

import (
"fmt"
"regexp"
)

// TagFilter is an interface which defines whether a given set of tags matches
// the filter.
type TagFilter interface {
Matches(tags []string) bool
}

// BuildTagFilter builds and compiles a new tag filter for the given inputs. All
// inputs are strings to be compiled to regular expressions and are mutually
// exclusive.
func BuildTagFilter(first, any, all string) (TagFilter, error) {
// Ensure only one tag filter type is given.
if (first != "" && any != "") || (first != "" && all != "") || (any != "" && all != "") {
return nil, fmt.Errorf("only one tag filter type may be specified")
}

switch {
case first != "":
re, err := regexp.Compile(first)
if err != nil {
return nil, fmt.Errorf("failed to compile tag_filter regular expression %q: %w", first, err)
}
return &TagFilterFirst{re}, nil
case any != "":
re, err := regexp.Compile(any)
if err != nil {
return nil, fmt.Errorf("failed to compile tag_filter_any regular expression %q: %w", any, err)
}
return &TagFilterAny{re}, nil
case all != "":
re, err := regexp.Compile(all)
if err != nil {
return nil, fmt.Errorf("failed to compile tag_filter_all regular expression %q: %w", all, err)
}
return &TagFilterAll{re}, nil
default:
// If no filters were provided, return the null filter which just returns
// false for all matches. This preserves the "allow_tagged" behavior.
return &TagFilterNull{}, nil
}
}

var _ TagFilter = (*TagFilterNull)(nil)

// TagFilterNull always returns false.
type TagFilterNull struct{}

func (f *TagFilterNull) Matches(tags []string) bool {
return false
}

var _ TagFilter = (*TagFilterFirst)(nil)

// TagFilterFirst filters based on the first item in the list. If the list is
// empty or if the first item does not match the regex, it returns false.
type TagFilterFirst struct {
re *regexp.Regexp
}

func (f *TagFilterFirst) Matches(tags []string) bool {
if len(tags) < 1 || f.re == nil {
return false
}
return f.re.MatchString(tags[0])
}

var _ TagFilter = (*TagFilterAny)(nil)

// TagFilterAny filters based on the entire list. If any tag in the list
// matches, it returns true. If no tags match, it returns false.
type TagFilterAny struct {
re *regexp.Regexp
}

func (f *TagFilterAny) Matches(tags []string) bool {
if f.re == nil {
return false
}
for _, t := range tags {
if f.re.MatchString(t) {
return true
}
}
return false
}

var _ TagFilter = (*TagFilterAll)(nil)

// TagFilterAll filters based on the entire list. If all tags in the last match,
// it returns true. If one more more tags do not match, it returns false.
type TagFilterAll struct {
re *regexp.Regexp
}

func (f *TagFilterAll) Matches(tags []string) bool {
if f.re == nil {
return false
}
for _, t := range tags {
if !f.re.MatchString(t) {
return false
}
}
return true
}
Loading

0 comments on commit ab0c2d0

Please sign in to comment.