17/12/2021 - AWS, GO
The default AWS S3 API has 1000 hard limit for getting objects from a bucket but there is a V2 API that overcomes this issue. See example below where we are fetching files that match certain name structure - yyyy-mm-dd-hh-mm-ss.json
. You can ignore match and fetch everything if you wish.
var keyRegex = regexp.MustCompile(`^\d{4}-\d{2}-\d{2}-\d{2}-\d{2}-\d{2}\.json$`)
type S3 struct {
client *s3.S3
}
func (s S3) FetchSomeObjects(ctx context.Context, bucket string) ([]*domain.SomeObject, error) {
var (
pageNum int
objects []*domain.SomeObject
)
err := s.client.ListObjectsV2PagesWithContext(ctx, &s3.ListObjectsV2Input{Bucket: bucket},
func(page *s3.ListObjectsV2Output, isLastPage bool) bool {
pageNum++
for _, object := range page.Contents {
if keyRegex.MatchString(*object.Key) {
objects = append(objects, &domain.SomeObject{Key: *object.Key})
}
}
return pageNum < 100
},
)
if err != nil {
return nil, err
}
return objects, nil
}