Unverified Commit 12f16c8c authored by He Weiwei's avatar He Weiwei Committed by GitHub
Browse files

feat(scan): support to scan image index (#11001)


Signed-off-by: default avatarHe Weiwei <hweiwei@vmware.com>
parent 6a25e6b2
......@@ -10,6 +10,7 @@ schemes:
basePath: /api/v2.0
produces:
- application/json
- text/plain
consumes:
- application/json
securityDefinitions:
......@@ -333,7 +334,7 @@ paths:
summary: Scan the artifact
description: Scan the specified artifact
tags:
- artifact
- scan
operationId: scanArtifact
parameters:
- $ref: '#/parameters/requestId'
......@@ -351,6 +352,38 @@ paths:
$ref: '#/responses/404'
'500':
$ref: '#/responses/500'
/projects/{project_name}/repositories/{repository_name}/artifacts/{reference}/scan/{report_id}/log:
get:
summary: Get the log of the scan report
description: Get the log of the scan report
tags:
- scan
operationId: getReportLog
produces:
- text/plain
parameters:
- $ref: '#/parameters/requestId'
- $ref: '#/parameters/projectName'
- $ref: '#/parameters/repositoryName'
- $ref: '#/parameters/reference'
- name: report_id
type: string
in: path
required: true
description: The report id to get the log
responses:
'200':
description: Successfully get scan log file
schema:
type: string
'401':
$ref: '#/responses/401'
'403':
$ref: '#/responses/403'
'404':
$ref: '#/responses/404'
'500':
$ref: '#/responses/500'
/projects/{project_name}/repositories/{repository_name}/artifacts/{reference}/tags:
post:
summary: Create tag
......@@ -989,6 +1022,10 @@ definitions:
format: date-time
description: 'The end time of the scan process that generating report'
example: '2006-01-02T15:04:05'
complete_percent:
type: integer
description: 'The complete percent of the scanning which value is between 0 and 100'
example: 100
VulnerabilitySummary:
type: object
description: |
......
......@@ -22,7 +22,6 @@ import (
"github.com/goharbor/harbor/src/common/models"
"github.com/goharbor/harbor/src/common/utils/log"
"github.com/goharbor/harbor/src/pkg/q"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
"github.com/pkg/errors"
)
......@@ -36,18 +35,9 @@ func HandleCheckIn(ctx context.Context, checkIn string) {
batchSize := 50
for repo := range fetchRepositories(ctx, batchSize) {
for artifact := range fetchArtifacts(ctx, repo.RepositoryID, batchSize) {
for _, tag := range artifact.Tags {
art := &v1.Artifact{
NamespaceID: artifact.ProjectID,
Repository: repo.Name,
Tag: tag.Name,
Digest: artifact.Digest,
MimeType: artifact.ManifestMediaType,
}
if err := DefaultController.Scan(art, WithRequester(checkIn)); err != nil {
// Just logged
log.Error(errors.Wrap(err, "handle check in"))
}
if err := DefaultController.Scan(ctx, artifact, WithRequester(checkIn)); err != nil {
// Just logged
log.Error(errors.Wrap(err, "handle check in"))
}
}
}
......@@ -67,7 +57,7 @@ func fetchArtifacts(ctx context.Context, repositoryID int64, chunkSize int) <-ch
}
for {
artifacts, err := artifact.Ctl.List(ctx, query, &artifact.Option{WithTag: true})
artifacts, err := artifact.Ctl.List(ctx, query, nil)
if err != nil {
log.Errorf("[scan all]: list artifacts failed, error: %v", err)
return
......
......@@ -15,9 +15,12 @@
package scan
import (
"context"
"encoding/base64"
"fmt"
"sync"
ar "github.com/goharbor/harbor/src/api/artifact"
sc "github.com/goharbor/harbor/src/api/scanner"
cj "github.com/goharbor/harbor/src/common/job"
jm "github.com/goharbor/harbor/src/common/job/models"
......@@ -62,6 +65,8 @@ type jcGetter func() cj.Client
type basicController struct {
// Manage the scan report records
manager report.Manager
// Artifact controller
ar ar.Controller
// Scanner controller
sc sc.Controller
// Robot account controller
......@@ -79,6 +84,8 @@ func NewController() Controller {
return &basicController{
// New report manager
manager: report.NewManager(),
// Refer to the default artifact controller
ar: ar.Ctl,
// Refer to the default scanner controller
sc: sc.DefaultController,
// Refer to the default robot account controller
......@@ -110,26 +117,57 @@ func NewController() Controller {
}
}
// Collect artifacts itself or its children (exclude child which is image index and not supported by the scanner) when the artifact is scannable.
// Report placeholders will be created to track when scan the artifact.
// The reports of these artifacts will make together when get the reports of the artifact.
// There are two scenarios when artifact is scannable:
// 1. The scanner has capability for the artifact directly, eg the artifact is docker image.
// 2. The artifact is image index and the scanner has capability for any artifact which is referenced by the artifact.
func (bc *basicController) collectScanningArtifacts(ctx context.Context, r *scanner.Registration, artifact *ar.Artifact) ([]*ar.Artifact, bool, error) {
var (
scannable bool
artifacts []*ar.Artifact
)
walkFn := func(a *ar.Artifact) error {
hasCapability := HasCapability(r, a)
if !hasCapability && a.HasChildren() {
// image index not supported by the scanner, so continue to walk its children
return nil
}
artifacts = append(artifacts, a)
if hasCapability {
scannable = true
return ar.ErrSkip // this artifact supported by the scanner, skip to walk its children
}
return nil
}
if err := bc.ar.Walk(ctx, artifact, walkFn, nil); err != nil {
return nil, false, err
}
return artifacts, scannable, nil
}
// Scan ...
func (bc *basicController) Scan(artifact *v1.Artifact, options ...Option) error {
func (bc *basicController) Scan(ctx context.Context, artifact *ar.Artifact, options ...Option) error {
if artifact == nil {
return errors.New("nil artifact to scan")
}
// Parse options
ops, err := parseOptions(options...)
if err != nil {
return errors.Wrap(err, "scan controller: scan")
}
r, err := bc.sc.GetRegistrationByProject(artifact.NamespaceID)
r, err := bc.sc.GetRegistrationByProject(artifact.ProjectID)
if err != nil {
return errors.Wrap(err, "scan controller: scan")
}
// In case it does not exist
if r == nil {
return errs.WithCode(errs.PreconditionFailed, errs.Errorf("no available scanner for project: %d", artifact.NamespaceID))
return errs.WithCode(errs.PreconditionFailed, errs.Errorf("no available scanner for project: %d", artifact.ProjectID))
}
// Check if it is disabled
......@@ -137,91 +175,114 @@ func (bc *basicController) Scan(artifact *v1.Artifact, options ...Option) error
return errs.WithCode(errs.PreconditionFailed, errs.Errorf("scanner %s is disabled", r.Name))
}
// Check the health of the registration by ping.
// The metadata of the scanner adapter is also returned.
meta, err := bc.sc.Ping(r)
artifacts, scannable, err := bc.collectScanningArtifacts(ctx, r, artifact)
if err != nil {
return errors.Wrap(err, "scan controller: scan")
return err
}
// Generate a UUID as track ID which groups the report records generated
// by the specified registration for the digest with given mime type.
trackID, err := bc.uuid()
if err != nil {
return errors.Wrap(err, "scan controller: scan")
if !scannable {
return errors.Errorf("the configured scanner %s does not support scanning artifact with mime type %s", r.Name, artifact.ManifestMediaType)
}
type Param struct {
Artifact *ar.Artifact
TrackID string
ProducesMimes []string
}
producesMimes := make([]string, 0)
matched := false
statusConflict := false
for _, ca := range meta.Capabilities {
for _, cm := range ca.ConsumesMimeTypes {
if cm == artifact.MimeType {
matched = true
break
params := []*Param{}
var errs []error
for _, art := range artifacts {
trackID, producesMimes, err := bc.makeReportPlaceholder(ctx, r, art, options...)
if err != nil {
if ierror.IsConflictErr(err) {
errs = append(errs, err)
} else {
return err
}
}
if matched {
for _, pm := range ca.ProducesMimeTypes {
// Create report placeholder first
reportPlaceholder := &scan.Report{
Digest: artifact.Digest,
RegistrationUUID: r.UUID,
Status: job.PendingStatus.String(),
StatusCode: job.PendingStatus.Code(),
TrackID: trackID,
MimeType: pm,
}
// Set requester if it is specified
if len(ops.Requester) > 0 {
reportPlaceholder.Requester = ops.Requester
} else {
// Use the trackID as the requester
reportPlaceholder.Requester = trackID
}
_, e := bc.manager.Create(reportPlaceholder)
if e != nil {
// Check if it is a status conflict error with common error format.
// Common error returned if and only if status conflicts.
if !statusConflict {
statusConflict = errs.AsError(e, errs.Conflict)
}
// Recorded by error wrap and logged at the same time.
if err == nil {
err = e
} else {
err = errors.Wrap(e, err.Error())
}
logger.Error(errors.Wrap(e, "scan controller: scan"))
continue
}
if len(producesMimes) > 0 {
params = append(params, &Param{Artifact: art, TrackID: trackID, ProducesMimes: producesMimes})
}
}
producesMimes = append(producesMimes, pm)
}
// all report placeholder conflicted
if len(errs) == len(artifacts) {
return errs[0]
}
break
errs = errs[:0]
for _, param := range params {
if err := bc.scanArtifact(ctx, r, param.Artifact, param.TrackID, param.ProducesMimes); err != nil {
log.Warningf("scan artifact %s@%s failed, error: %v", artifact.RepositoryName, artifact.Digest, err)
errs = append(errs, err)
}
}
// Scanner does not support scanning the given artifact.
if !matched {
return errors.Errorf("the configured scanner %s does not support scanning artifact with mime type %s", r.Name, artifact.MimeType)
// all scanning of the artifacts failed
if len(errs) == len(params) {
return fmt.Errorf("scan artifact %s@%s failed", artifact.RepositoryName, artifact.Digest)
}
// If all the record are created failed.
if len(producesMimes) == 0 {
// Return the last error
if statusConflict {
return errs.WithCode(errs.Conflict, errs.Wrap(err, "scan controller: scan"))
return nil
}
func (bc *basicController) makeReportPlaceholder(ctx context.Context, r *scanner.Registration, art *ar.Artifact, options ...Option) (string, []string, error) {
trackID, err := bc.uuid()
if err != nil {
return "", nil, errors.Wrap(err, "scan controller: scan")
}
// Parse options
ops, err := parseOptions(options...)
if err != nil {
return "", nil, errors.Wrap(err, "scan controller: scan")
}
create := func(ctx context.Context, digest, registrationUUID, mimeType, trackID string, status job.Status) error {
reportPlaceholder := &scan.Report{
Digest: digest,
RegistrationUUID: registrationUUID,
Status: status.String(),
StatusCode: status.Code(),
TrackID: trackID,
MimeType: mimeType,
}
// Set requester if it is specified
if len(ops.Requester) > 0 {
reportPlaceholder.Requester = ops.Requester
} else {
// Use the trackID as the requester
reportPlaceholder.Requester = trackID
}
return errors.Wrap(err, "scan controller: scan")
_, e := bc.manager.Create(reportPlaceholder)
return e
}
if HasCapability(r, art) {
var producesMimes []string
for _, pm := range r.GetProducesMimeTypes(art.ManifestMediaType) {
if err = create(ctx, art.Digest, r.UUID, pm, trackID, job.PendingStatus); err != nil {
return "", nil, err
}
producesMimes = append(producesMimes, pm)
}
if len(producesMimes) > 0 {
return trackID, producesMimes, nil
}
}
err = create(ctx, art.Digest, r.UUID, v1.MimeTypeNativeReport, trackID, job.ErrorStatus)
return "", nil, err
}
func (bc *basicController) scanArtifact(ctx context.Context, r *scanner.Registration, artifact *ar.Artifact, trackID string, producesMimes []string) error {
jobID, err := bc.launchScanJob(trackID, artifact, r, producesMimes)
if err != nil {
// Update the status to the concrete error
......@@ -243,7 +304,7 @@ func (bc *basicController) Scan(artifact *v1.Artifact, options ...Option) error
}
// GetReport ...
func (bc *basicController) GetReport(artifact *v1.Artifact, mimeTypes []string) ([]*scan.Report, error) {
func (bc *basicController) GetReport(ctx context.Context, artifact *ar.Artifact, mimeTypes []string) ([]*scan.Report, error) {
if artifact == nil {
return nil, errors.New("no way to get report for nil artifact")
}
......@@ -256,26 +317,67 @@ func (bc *basicController) GetReport(artifact *v1.Artifact, mimeTypes []string)
}
// Get current scanner settings
r, err := bc.sc.GetRegistrationByProject(artifact.NamespaceID)
r, err := bc.sc.GetRegistrationByProject(artifact.ProjectID)
if err != nil {
return nil, errors.Wrap(err, "scan controller: get report")
}
if r == nil {
return nil, ierror.NotFoundError(nil).WithMessage("no scanner registration configured for project: %d", artifact.NamespaceID)
return nil, ierror.NotFoundError(nil).WithMessage("no scanner registration configured for project: %d", artifact.ProjectID)
}
artifacts, scannable, err := bc.collectScanningArtifacts(ctx, r, artifact)
if err != nil {
return nil, err
}
if !scannable {
return nil, ierror.NotFoundError(nil).WithMessage("report not found for %s@%s", artifact.RepositoryName, artifact.Digest)
}
groupReports := make([][]*scan.Report, len(artifacts))
var wg sync.WaitGroup
for i, a := range artifacts {
wg.Add(1)
go func(i int, a *ar.Artifact) {
defer wg.Done()
reports, err := bc.manager.GetBy(a.Digest, r.UUID, mimes)
if err != nil {
log.Warningf("get reports of %s@%s failed, error: %v", a.RepositoryName, a.Digest, err)
return
}
groupReports[i] = reports
}(i, a)
}
wg.Wait()
var reports []*scan.Report
for _, group := range groupReports {
if len(group) != 0 {
reports = append(reports, group...)
} else {
// NOTE: If the artifact is OCI image, this happened when the artifact is not scanned.
// If the artifact is OCI image index, this happened when the artifact is not scanned,
// but its children artifacts may scanned so return empty report
return nil, nil
}
}
return bc.manager.GetBy(artifact.Digest, r.UUID, mimes)
return reports, nil
}
// GetSummary ...
func (bc *basicController) GetSummary(artifact *v1.Artifact, mimeTypes []string, options ...report.Option) (map[string]interface{}, error) {
func (bc *basicController) GetSummary(ctx context.Context, artifact *ar.Artifact, mimeTypes []string, options ...report.Option) (map[string]interface{}, error) {
if artifact == nil {
return nil, errors.New("no way to get report summaries for nil artifact")
}
// Get reports first
rps, err := bc.GetReport(artifact, mimeTypes)
rps, err := bc.GetReport(ctx, artifact, mimeTypes)
if err != nil {
return nil, err
}
......@@ -287,7 +389,16 @@ func (bc *basicController) GetSummary(artifact *v1.Artifact, mimeTypes []string,
return nil, err
}
summaries[rp.MimeType] = sum
if s, ok := summaries[rp.MimeType]; ok {
r, err := report.MergeSummary(rp.MimeType, s, sum)
if err != nil {
return nil, err
}
summaries[rp.MimeType] = r
} else {
summaries[rp.MimeType] = sum
}
}
return summaries, nil
......@@ -423,7 +534,7 @@ func (bc *basicController) makeRobotAccount(projectID int64, repository string)
}
// launchScanJob launches a job to run scan
func (bc *basicController) launchScanJob(trackID string, artifact *v1.Artifact, registration *scanner.Registration, mimes []string) (jobID string, err error) {
func (bc *basicController) launchScanJob(trackID string, artifact *ar.Artifact, registration *scanner.Registration, mimes []string) (jobID string, err error) {
var ck string
if registration.UseInternalAddr {
ck = configCoreInternalAddr
......@@ -436,7 +547,7 @@ func (bc *basicController) launchScanJob(trackID string, artifact *v1.Artifact,
return "", errors.Wrap(err, "scan controller: launch scan job")
}
robot, err := bc.makeRobotAccount(artifact.NamespaceID, artifact.Repository)
robot, err := bc.makeRobotAccount(artifact.ProjectID, artifact.RepositoryName)
if err != nil {
return "", errors.Wrap(err, "scan controller: launch scan job")
}
......@@ -450,7 +561,12 @@ func (bc *basicController) launchScanJob(trackID string, artifact *v1.Artifact,
URL: registryAddr,
Authorization: authorization,
},
Artifact: artifact,
Artifact: &v1.Artifact{
NamespaceID: artifact.ProjectID,
Repository: artifact.RepositoryName,
Digest: artifact.Digest,
MimeType: artifact.ManifestMediaType,
},
}
rJSON, err := registration.ToJSON()
......
......@@ -15,12 +15,14 @@
package scan
import (
"context"
"encoding/base64"
"encoding/json"
"fmt"
"testing"
"time"
"github.com/goharbor/harbor/src/api/artifact"
"github.com/goharbor/harbor/src/common"
cj "github.com/goharbor/harbor/src/common/job"
cjm "github.com/goharbor/harbor/src/common/job/models"
......@@ -30,12 +32,14 @@ import (
"github.com/goharbor/harbor/src/pkg/q"
"github.com/goharbor/harbor/src/pkg/robot/model"
sca "github.com/goharbor/harbor/src/pkg/scan"
"github.com/goharbor/harbor/src/pkg/scan/all"
"github.com/goharbor/harbor/src/pkg/scan/dao/scan"
"github.com/goharbor/harbor/src/pkg/scan/dao/scanner"
v1 "github.com/goharbor/harbor/src/pkg/scan/rest/v1"
"github.com/goharbor/harbor/src/pkg/scan/vuln"
artifacttesting "github.com/goharbor/harbor/src/testing/api/artifact"
scannertesting "github.com/goharbor/harbor/src/testing/api/scanner"
mocktesting "github.com/goharbor/harbor/src/testing/mock"
reporttesting "github.com/goharbor/harbor/src/testing/pkg/scan/report"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
......@@ -47,9 +51,11 @@ type ControllerTestSuite struct {
suite.Suite
registration *scanner.Registration
artifact *v1.Artifact
artifact *artifact.Artifact
rawReport string
c Controller
ar artifact.Controller
c Controller
}
// TestController is the entry point of ControllerTestSuite.
......@@ -59,21 +65,11 @@ func TestController(t *testing.T) {
// SetupSuite ...
func (suite *ControllerTestSuite) SetupSuite() {
suite.registration = &scanner.Registration{
ID: 1,
UUID: "uuid001",
Name: "Test-scan-controller",
URL: "http://testing.com:3128",
IsDefault: true,
}
suite.artifact = &v1.Artifact{
NamespaceID: 1,
Repository: "scan",
Tag: "golang",
Digest: "digest-code",
MimeType: v1.MimeTypeDockerArtifact,
}
suite.artifact = &artifact.Artifact{}
suite.artifact.ProjectID = 1
suite.artifact.RepositoryName = "library/photon"
suite.artifact.Digest = "digest-code"
suite.artifact.ManifestMediaType = v1.MimeTypeDockerArtifact
m := &v1.ScannerAdapterMetadata{
Scanner: &v1.Scanner{
......@@ -95,11 +91,20 @@ func (suite *ControllerTestSuite) SetupSuite() {
},
}
suite.registration = &scanner.Registration{
ID: 1,
UUID: "uuid001",
Name: "Test-scan-controller",
URL: "http://testing.com:3128",
IsDefault: true,
Metadata: m,
}
sc := &scannertesting.Controller{}
sc.On("GetRegistrationByProject", suite.artifact.NamespaceID).Return(suite.registration, nil)
sc.On("GetRegistrationByProject", suite.artifact.ProjectID).Return(suite.registration, nil)
sc.On("Ping", suite.registration).Return(m, nil)
mgr := &MockReportManager{}
mgr := &reporttesting.Manager{}
mgr.On("Create", &scan.Report{
Digest: "digest-code",
RegistrationUUID: "uuid001",
......