Skip to content

Commit 0be63f2

Browse files
authored
Merge pull request #170 from sfdc-pcg/refactor_and_test
Add GCS Getter
2 parents 9363991 + 6adf0de commit 0be63f2

File tree

9 files changed

+609
-6
lines changed

9 files changed

+609
-6
lines changed

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ can be augmented at runtime by implementing the `Getter` interface.
7171
* Mercurial
7272
* HTTP
7373
* Amazon S3
74+
* Google GCP
7475

7576
In addition to the above protocols, go-getter has what are called "detectors."
7677
These take a URL and attempt to automatically choose the best protocol for
@@ -334,3 +335,14 @@ Some examples for these addressing schemes:
334335
- bucket.s3-eu-west-1.amazonaws.com/foo/bar
335336
- "s3::http://127.0.0.1:9000/test-bucket/hello.txt?aws_access_key_id=KEYID&aws_access_key_secret=SECRETKEY&region=us-east-2"
336337

338+
### GCS (`gcs`)
339+
340+
#### GCS Authentication
341+
342+
In order to access to GCS, authentication credentials should be provided. More information can be found [here](https://cloud.google.com/docs/authentication/getting-started)
343+
344+
#### GCS Bucket Examples
345+
346+
- gcs::https://www.googleapis.com/storage/v1/bucket
347+
- gcs::https://www.googleapis.com/storage/v1/bucket/foo.zip
348+
- www.googleapis.com/storage/v1/bucket/foo

detect.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ func init() {
2626
new(GitDetector),
2727
new(BitBucketDetector),
2828
new(S3Detector),
29+
new(GCSDetector),
2930
new(FileDetector),
3031
}
3132
}

detect_gcs.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package getter
2+
3+
import (
4+
"fmt"
5+
"net/url"
6+
"strings"
7+
)
8+
9+
// GCSDetector implements Detector to detect GCS URLs and turn
10+
// them into URLs that the GCSGetter can understand.
11+
type GCSDetector struct{}
12+
13+
func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
14+
if len(src) == 0 {
15+
return "", false, nil
16+
}
17+
18+
if strings.Contains(src, "googleapis.com/") {
19+
return d.detectHTTP(src)
20+
}
21+
22+
return "", false, nil
23+
}
24+
25+
func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
26+
27+
parts := strings.Split(src, "/")
28+
if len(parts) < 5 {
29+
return "", false, fmt.Errorf(
30+
"URL is not a valid GCS URL")
31+
}
32+
version := parts[2]
33+
bucket := parts[3]
34+
object := strings.Join(parts[4:], "/")
35+
36+
url, err := url.Parse(fmt.Sprintf("https://www.googleapis.com/storage/%s/%s/%s",
37+
version, bucket, object))
38+
if err != nil {
39+
return "", false, fmt.Errorf("error parsing GCS URL: %s", err)
40+
}
41+
42+
return "gcs::" + url.String(), true, nil
43+
}

detect_gcs_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package getter
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestGCSDetector(t *testing.T) {
8+
cases := []struct {
9+
Input string
10+
Output string
11+
}{
12+
{
13+
"www.googleapis.com/storage/v1/bucket/foo",
14+
"gcs::https://www.googleapis.com/storage/v1/bucket/foo",
15+
},
16+
{
17+
"www.googleapis.com/storage/v1/bucket/foo/bar",
18+
"gcs::https://www.googleapis.com/storage/v1/bucket/foo/bar",
19+
},
20+
{
21+
"www.googleapis.com/storage/v1/foo/bar.baz",
22+
"gcs::https://www.googleapis.com/storage/v1/foo/bar.baz",
23+
},
24+
}
25+
26+
pwd := "/pwd"
27+
f := new(GCSDetector)
28+
for i, tc := range cases {
29+
output, ok, err := f.Detect(tc.Input, pwd)
30+
if err != nil {
31+
t.Fatalf("err: %s", err)
32+
}
33+
if !ok {
34+
t.Fatal("not ok")
35+
}
36+
37+
if output != tc.Output {
38+
t.Fatalf("%d: bad: %#v", i, output)
39+
}
40+
}
41+
}

get.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func init() {
6767
Getters = map[string]Getter{
6868
"file": new(FileGetter),
6969
"git": new(GitGetter),
70+
"gcs": new(GCSGetter),
7071
"hg": new(HgGetter),
7172
"s3": new(S3Getter),
7273
"http": httpGetter,

get_gcs.go

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
package getter
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/url"
7+
"os"
8+
"path/filepath"
9+
"strings"
10+
11+
"cloud.google.com/go/storage"
12+
"google.golang.org/api/iterator"
13+
)
14+
15+
// GCSGetter is a Getter implementation that will download a module from
16+
// a GCS bucket.
17+
type GCSGetter struct {
18+
getter
19+
}
20+
21+
func (g *GCSGetter) ClientMode(u *url.URL) (ClientMode, error) {
22+
ctx := g.Context()
23+
24+
// Parse URL
25+
bucket, object, err := g.parseURL(u)
26+
if err != nil {
27+
return 0, err
28+
}
29+
30+
client, err := storage.NewClient(ctx)
31+
if err != nil {
32+
return 0, err
33+
}
34+
iter := client.Bucket(bucket).Objects(ctx, &storage.Query{Prefix: object})
35+
for {
36+
obj, err := iter.Next()
37+
if err != nil && err != iterator.Done {
38+
return 0, err
39+
}
40+
41+
if err == iterator.Done {
42+
break
43+
}
44+
if strings.HasSuffix(obj.Name, "/") {
45+
// A directory matched the prefix search, so this must be a directory
46+
return ClientModeDir, nil
47+
} else if obj.Name != object {
48+
// A file matched the prefix search and doesn't have the same name
49+
// as the query, so this must be a directory
50+
return ClientModeDir, nil
51+
}
52+
}
53+
// There are no directories or subdirectories, and if a match was returned,
54+
// it was exactly equal to the prefix search. So return File mode
55+
return ClientModeFile, nil
56+
}
57+
58+
func (g *GCSGetter) Get(dst string, u *url.URL) error {
59+
ctx := g.Context()
60+
61+
// Parse URL
62+
bucket, object, err := g.parseURL(u)
63+
if err != nil {
64+
return err
65+
}
66+
67+
// Remove destination if it already exists
68+
_, err = os.Stat(dst)
69+
if err != nil && !os.IsNotExist(err) {
70+
return err
71+
}
72+
if err == nil {
73+
// Remove the destination
74+
if err := os.RemoveAll(dst); err != nil {
75+
return err
76+
}
77+
}
78+
79+
// Create all the parent directories
80+
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
81+
return err
82+
}
83+
84+
client, err := storage.NewClient(ctx)
85+
if err != nil {
86+
return err
87+
}
88+
89+
// Iterate through all matching objects.
90+
iter := client.Bucket(bucket).Objects(ctx, &storage.Query{Prefix: object})
91+
for {
92+
obj, err := iter.Next()
93+
if err != nil && err != iterator.Done {
94+
return err
95+
}
96+
if err == iterator.Done {
97+
break
98+
}
99+
100+
if !strings.HasSuffix(obj.Name, "/") {
101+
// Get the object destination path
102+
objDst, err := filepath.Rel(object, obj.Name)
103+
if err != nil {
104+
return err
105+
}
106+
objDst = filepath.Join(dst, objDst)
107+
// Download the matching object.
108+
err = g.getObject(ctx, client, objDst, bucket, obj.Name)
109+
if err != nil {
110+
return err
111+
}
112+
}
113+
}
114+
return nil
115+
}
116+
117+
func (g *GCSGetter) GetFile(dst string, u *url.URL) error {
118+
ctx := g.Context()
119+
120+
// Parse URL
121+
bucket, object, err := g.parseURL(u)
122+
if err != nil {
123+
return err
124+
}
125+
126+
client, err := storage.NewClient(ctx)
127+
if err != nil {
128+
return err
129+
}
130+
return g.getObject(ctx, client, dst, bucket, object)
131+
}
132+
133+
func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst, bucket, object string) error {
134+
rc, err := client.Bucket(bucket).Object(object).NewReader(ctx)
135+
if err != nil {
136+
return err
137+
}
138+
defer rc.Close()
139+
140+
// Create all the parent directories
141+
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
142+
return err
143+
}
144+
145+
f, err := os.Create(dst)
146+
if err != nil {
147+
return err
148+
}
149+
defer f.Close()
150+
151+
_, err = Copy(ctx, f, rc)
152+
return err
153+
}
154+
155+
func (g *GCSGetter) parseURL(u *url.URL) (bucket, path string, err error) {
156+
if strings.Contains(u.Host, "googleapis.com") {
157+
hostParts := strings.Split(u.Host, ".")
158+
if len(hostParts) != 3 {
159+
err = fmt.Errorf("URL is not a valid GCS URL")
160+
return
161+
}
162+
163+
pathParts := strings.SplitN(u.Path, "/", 5)
164+
if len(pathParts) != 5 {
165+
err = fmt.Errorf("URL is not a valid GCS URL")
166+
return
167+
}
168+
bucket = pathParts[3]
169+
path = pathParts[4]
170+
}
171+
return
172+
}

0 commit comments

Comments
 (0)