Signed-off-by: Stephan Renatus <srenatus@chef.io>
This commit is contained in:
Stephan Renatus 2018-09-05 08:46:38 +02:00
parent 14b89029c9
commit 9f10e5d020
3 changed files with 2 additions and 756 deletions

6
glide.lock generated
View file

@ -1,5 +1,5 @@
hash: dda54feb39d5947ad20e9d583ccad9be02343dac0f52752944b7cd39668af7a4 hash: c41e5b1fbd68f04f14b6e9b2ebab8bf18b857b3dc3a5754e4adf7c1884981de7
updated: 2017-12-20T16:05:04.690191111+01:00 updated: 2018-09-05T08:40:39.486766558+02:00
imports: imports:
- name: github.com/beevik/etree - name: github.com/beevik/etree
version: 4cd0dd976db869f817248477718071a28e978df0 version: 4cd0dd976db869f817248477718071a28e978df0
@ -24,8 +24,6 @@ imports:
- pkg/transport - pkg/transport
- name: github.com/coreos/go-oidc - name: github.com/coreos/go-oidc
version: be73733bb8cc830d0205609b95d125215f8e9c70 version: be73733bb8cc830d0205609b95d125215f8e9c70
- name: github.com/coreos/license-bill-of-materials
version: d70207c33a3c79a1c0479b208f8b7ab6215144c7
- name: github.com/felixge/httpsnoop - name: github.com/felixge/httpsnoop
version: eadd4fad6aac69ae62379194fe0219f3dbc80fd3 version: eadd4fad6aac69ae62379194fe0219f3dbc80fd3
- name: github.com/ghodss/yaml - name: github.com/ghodss/yaml

View file

@ -1,19 +0,0 @@
Copyright (c) 2015 Patrick Mézard
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -1,733 +0,0 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"sort"
"strconv"
"strings"
"github.com/coreos/license-bill-of-materials/assets"
)
// Template holds pre-constructed license template info
type Template struct {
Title string
Nickname string
Words map[string]int
}
func parseTemplate(content string) (*Template, error) {
t := Template{}
text := []byte{}
state := 0
scanner := bufio.NewScanner(strings.NewReader(content))
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if state == 0 {
if line == "---" {
state = 1
}
} else if state == 1 {
if line == "---" {
state = 2
} else {
if strings.HasPrefix(line, "title:") {
t.Title = strings.TrimSpace(line[len("title:"):])
} else if strings.HasPrefix(line, "nickname:") {
t.Nickname = strings.TrimSpace(line[len("nickname:"):])
}
}
} else if state == 2 {
text = append(text, scanner.Bytes()...)
text = append(text, []byte("\n")...)
}
}
t.Words = makeWordSet(text)
return &t, scanner.Err()
}
func loadTemplates() ([]*Template, error) {
templates := []*Template{}
for _, a := range assets.Assets {
templ, err := parseTemplate(a.Content)
if err != nil {
return nil, err
}
templates = append(templates, templ)
}
return templates, nil
}
var (
reWords = regexp.MustCompile(`[\w']+`)
reCopyright = regexp.MustCompile(
`(?i)\s*Copyright (?:©|\(c\)|\xC2\xA9)?\s*(?:\d{4}|\[year\]).*`)
)
func cleanLicenseData(data []byte) []byte {
data = bytes.ToLower(data)
data = reCopyright.ReplaceAll(data, nil)
return data
}
func makeWordSet(data []byte) map[string]int {
words := map[string]int{}
data = cleanLicenseData(data)
matches := reWords.FindAll(data, -1)
for i, m := range matches {
s := string(m)
if _, ok := words[s]; !ok {
// Non-matching words are likely in the license header, to mention
// copyrights and authors. Try to preserve the initial sequences,
// to display them later.
words[s] = i
}
}
return words
}
// Word holds word and word position in a license
type Word struct {
Text string
Pos int
}
type sortedWords []Word
func (s sortedWords) Len() int {
return len(s)
}
func (s sortedWords) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s sortedWords) Less(i, j int) bool {
return s[i].Pos < s[j].Pos
}
// MatchResult represents a matched template and matching metrics
type MatchResult struct {
Template *Template
Score float64
ExtraWords []string
MissingWords []string
}
func sortAndReturnWords(words []Word) []string {
sort.Sort(sortedWords(words))
tokens := []string{}
for _, w := range words {
tokens = append(tokens, w.Text)
}
return tokens
}
// matchTemplates returns the best license template matching supplied data,
// its score between 0 and 1 and the list of words appearing in license but not
// in the matched template.
func matchTemplates(license []byte, templates []*Template) MatchResult {
bestScore := float64(-1)
var bestTemplate *Template
bestExtra := []Word{}
bestMissing := []Word{}
words := makeWordSet(license)
for _, t := range templates {
extra := []Word{}
missing := []Word{}
common := 0
for w, pos := range words {
_, ok := t.Words[w]
if ok {
common++
} else {
extra = append(extra, Word{
Text: w,
Pos: pos,
})
}
}
for w, pos := range t.Words {
if _, ok := words[w]; !ok {
missing = append(missing, Word{
Text: w,
Pos: pos,
})
}
}
score := 2 * float64(common) / (float64(len(words)) + float64(len(t.Words)))
if score > bestScore {
bestScore = score
bestTemplate = t
bestMissing = missing
bestExtra = extra
}
}
return MatchResult{
Template: bestTemplate,
Score: bestScore,
ExtraWords: sortAndReturnWords(bestExtra),
MissingWords: sortAndReturnWords(bestMissing),
}
}
// fixEnv returns a copy of the process environment where GOPATH is adjusted to
// supplied value. It returns nil if gopath is empty.
func fixEnv(gopath string) []string {
if gopath == "" {
return nil
}
kept := []string{
"GOPATH=" + gopath,
}
for _, env := range os.Environ() {
if !strings.HasPrefix(env, "GOPATH=") {
kept = append(kept, env)
}
}
return kept
}
// MissingError reports on missing licenses
type MissingError struct {
Err string
}
func (err *MissingError) Error() string {
return err.Err
}
// expandPackages takes a list of package or package expressions and invoke go
// list to expand them to packages. In particular, it handles things like "..."
// and ".".
func expandPackages(gopath string, pkgs []string) ([]string, error) {
args := []string{"list"}
args = append(args, pkgs...)
cmd := exec.Command("go", args...)
cmd.Env = fixEnv(gopath)
out, err := cmd.CombinedOutput()
if err != nil {
output := string(out)
if strings.Contains(output, "cannot find package") ||
strings.Contains(output, "no buildable Go source files") {
return nil, &MissingError{Err: output}
}
return nil, fmt.Errorf("'go %s' failed with:\n%s",
strings.Join(args, " "), output)
}
names := []string{}
for _, s := range strings.Split(string(out), "\n") {
s = strings.TrimSpace(s)
if s != "" {
names = append(names, s)
}
}
return names, nil
}
func listPackagesAndDeps(gopath string, pkgs []string) ([]string, error) {
pkgs, err := expandPackages(gopath, pkgs)
if err != nil {
return nil, err
}
args := []string{"list", "-f", "{{range .Deps}}{{.}}|{{end}}"}
args = append(args, pkgs...)
cmd := exec.Command("go", args...)
cmd.Env = fixEnv(gopath)
out, err := cmd.CombinedOutput()
if err != nil {
output := string(out)
if strings.Contains(output, "cannot find package") ||
strings.Contains(output, "no buildable Go source files") {
return nil, &MissingError{Err: output}
}
return nil, fmt.Errorf("'go %s' failed with:\n%s",
strings.Join(args, " "), output)
}
deps := []string{}
seen := map[string]bool{}
for _, s := range strings.Split(string(out), "|") {
s = strings.TrimSpace(s)
if s != "" && !seen[s] {
deps = append(deps, s)
seen[s] = true
}
}
for _, pkg := range pkgs {
if !seen[pkg] {
seen[pkg] = true
deps = append(deps, pkg)
}
}
sort.Strings(deps)
return deps, nil
}
func listStandardPackages(gopath string) ([]string, error) {
return expandPackages(gopath, []string{"std", "cmd"})
}
// PkgError reports on missing packages
type PkgError struct {
Err string
}
// PkgInfo holds identifying package info
type PkgInfo struct {
Name string
Dir string
Root string
ImportPath string
Error *PkgError
}
func getPackagesInfo(gopath string, pkgs []string) ([]*PkgInfo, error) {
args := []string{"list", "-e", "-json"}
// TODO: split the list for platforms which do not support massive argument
// lists.
args = append(args, pkgs...)
cmd := exec.Command("go", args...)
cmd.Env = fixEnv(gopath)
out, err := cmd.CombinedOutput()
if err != nil {
return nil, fmt.Errorf("go %s failed with:\n%s",
strings.Join(args, " "), string(out))
}
infos := make([]*PkgInfo, 0, len(pkgs))
decoder := json.NewDecoder(bytes.NewBuffer(out))
var derr error
for _, pkg := range pkgs {
info := &PkgInfo{}
derr = decoder.Decode(info)
if derr != nil {
return nil, fmt.Errorf("could not retrieve package information for %s", pkg)
}
if pkg != info.ImportPath {
return nil, fmt.Errorf("package information mismatch: asked for %s, got %s",
pkg, info.ImportPath)
}
if info.Error != nil && info.Name == "" {
info.Name = info.ImportPath
}
infos = append(infos, info)
}
return infos, err
}
var (
reLicense = regexp.MustCompile(`(?i)^(?:` +
`((?:un)?licen[sc]e(?:\.[^.]+)?)|` +
`(copy(?:ing|right)(?:\.[^.]+)?)|` +
`)$`)
)
// scoreLicenseName returns a factor between 0 and 1 weighting how likely
// supplied filename is a license file.
func scoreLicenseName(name string) int8 {
m := reLicense.FindStringSubmatch(name)
switch {
case m == nil:
break
case m[1] != "" || m[2] != "":
return 1
}
return 0
}
// findLicenses looks for license files in package import path, and down to
// parent directories until a file is found or $GOPATH/src is reached. It
// returns a slice of paths all viable files, or a slice containing one empty
// string if none were found.
func findLicenses(info *PkgInfo) ([]string, error) {
path := info.ImportPath
for ; path != "."; path = filepath.Dir(path) {
fis, err := ioutil.ReadDir(filepath.Join(info.Root, "src", path))
if err != nil {
return []string{""}, err
}
allViableNames := make([]string, 0)
for _, fi := range fis {
if !fi.Mode().IsRegular() {
continue
}
score := scoreLicenseName(fi.Name())
if score == 1 {
allViableNames = append(allViableNames, filepath.Join(path, fi.Name()))
}
}
if len(allViableNames) > 0 {
return allViableNames, nil
}
}
return []string{""}, nil
}
// GoPackage represents a top-level package, ex. colors/blue
type GoPackage struct {
PackageName string
RawLicenses []*RawLicense
Err string
}
// RawLicense holds template-matched file data
type RawLicense struct {
Path string
Score float64
Template *Template
ExtraWords []string
MissingWords []string
}
func listPackagesWithLicenses(gopath string, pkgs []string) ([]GoPackage, error) {
templates, err := loadTemplates()
if err != nil {
return nil, err
}
deps, err := listPackagesAndDeps(gopath, pkgs)
if err != nil {
if _, ok := err.(*MissingError); ok {
return nil, err
}
return nil, fmt.Errorf("could not list %s dependencies: %s",
strings.Join(pkgs, " "), err)
}
std, err := listStandardPackages(gopath)
if err != nil {
return nil, fmt.Errorf("could not list standard packages: %s", err)
}
stdSet := map[string]bool{}
for _, n := range std {
stdSet[n] = true
}
infos, err := getPackagesInfo(gopath, deps)
if err != nil {
return nil, err
}
// Cache matched licenses by path. Useful for package with a lot of
// subpackages like bleve.
matched := map[string]MatchResult{}
gPackages := []GoPackage{}
for _, info := range infos {
if info.Error != nil {
gPackages = append(gPackages, GoPackage{
PackageName: info.Name,
Err: info.Error.Err,
RawLicenses: []*RawLicense{{Path: ""}},
})
continue
}
if stdSet[info.ImportPath] {
continue
}
paths, err := findLicenses(info)
if err != nil {
return nil, err
}
rawLicenseInfos := []*RawLicense{}
gPackage := GoPackage{PackageName: info.ImportPath}
for _, path := range paths {
rl := RawLicense{Path: path}
if path != "" {
fpath := filepath.Join(info.Root, "src", path)
m, ok := matched[fpath]
if !ok {
data, err := ioutil.ReadFile(fpath)
if err != nil {
return nil, err
}
m = matchTemplates(data, templates)
matched[fpath] = m
}
rl.Score = m.Score
rl.Template = m.Template
rl.ExtraWords = m.ExtraWords
rl.MissingWords = m.MissingWords
}
rawLicenseInfos = append(rawLicenseInfos, &rl)
}
gPackage.RawLicenses = rawLicenseInfos
gPackages = append(gPackages, gPackage)
}
return gPackages, nil
}
// longestCommonPrefix returns the longest common prefix over import path
// components of supplied licenses.
func longestCommonPrefix(gPackages []GoPackage) string {
type Node struct {
Name string
Children map[string]*Node
Shared int
}
// Build a prefix tree. Not super efficient, but easy to do.
root := &Node{
Children: map[string]*Node{},
Shared: len(gPackages),
}
for _, l := range gPackages {
n := root
for _, part := range strings.Split(l.PackageName, "/") {
c := n.Children[part]
if c == nil {
c = &Node{
Name: part,
Children: map[string]*Node{},
}
n.Children[part] = c
}
c.Shared++
n = c
}
}
n := root
prefix := []string{}
for {
if len(n.Children) != 1 {
break
}
for _, c := range n.Children {
if c.Shared == len(gPackages) {
// Handle case where there are subpackages:
// prometheus/procfs
// prometheus/procfs/xfs
prefix = append(prefix, c.Name)
}
n = c
break
}
}
return strings.Join(prefix, "/")
}
// groupPackagesByLicense returns the input packages after grouping them by license
// path and find their longest import path common prefix. Entries with empty
// paths are left unchanged.
func groupPackagesByLicense(gPackages []GoPackage) ([]GoPackage, error) {
paths := map[string][]GoPackage{}
for _, gp := range gPackages {
for _, rl := range gp.RawLicenses {
if rl.Path == "" {
continue
}
paths[rl.Path] = append(paths[rl.Path], gp)
}
}
for k, v := range paths {
if len(v) <= 1 {
continue
}
prefix := longestCommonPrefix(v)
if prefix == "" {
return nil, fmt.Errorf(
"packages share the same license but not common prefix: %v", v)
}
gp := v[0]
gp.PackageName = prefix
paths[k] = []GoPackage{gp}
}
kept := []GoPackage{}
// Ensures only one package with multiple licenses is appended to the list of
// kept packages
seen := make(map[string]bool)
for _, gp := range gPackages {
if len(gp.RawLicenses) == 0 {
kept = append(kept, gp)
continue
}
for _, rl := range gp.RawLicenses {
if rl.Path == "" {
kept = append(kept, gp)
continue
}
if v, ok := paths[rl.Path]; ok {
if _, ok := seen[v[0].PackageName]; !ok {
kept = append(kept, v[0])
delete(paths, rl.Path)
seen[v[0].PackageName] = true
}
}
}
}
return kept, nil
}
type projectAndLicenses struct {
Project string `json:"project"`
Licenses []license `json:"licenses,omitempty"`
Error string `json:"error,omitempty"`
}
type license struct {
Type string `json:"type,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
}
func licensesToProjectAndLicenses(gPackages []GoPackage) (c []projectAndLicenses, e []projectAndLicenses) {
for _, gp := range gPackages {
if gp.Err != "" {
e = append(e, projectAndLicenses{
Project: removeVendor(gp.PackageName),
Error: gp.Err,
})
continue
}
nt := 0
for _, rl := range gp.RawLicenses {
if rl.Template == nil {
nt++
}
}
if len(gp.RawLicenses) == nt {
e = append(e, projectAndLicenses{
Project: removeVendor(gp.PackageName),
Error: "No license detected",
})
continue
}
ls := []license{}
for _, rl := range gp.RawLicenses {
if rl.Template.Title != "" {
ls = append(ls, license{
Type: rl.Template.Title,
Confidence: rl.Score,
})
}
}
c = append(c, projectAndLicenses{
Project: removeVendor(gp.PackageName),
Licenses: ls,
})
}
return c, e
}
func removeVendor(s string) string {
v := "/vendor/"
i := strings.Index(s, v)
if i == -1 {
return s
}
return s[i+len(v):]
}
func truncateFloat(f float64) float64 {
nf := fmt.Sprintf("%.3f", f)
var err error
f, err = strconv.ParseFloat(nf, 64)
if err != nil {
panic("unexpected parse float error")
}
return f
}
func pkgsToLicenses(pkgs []string, overrides string) (pls []projectAndLicenses, ne []projectAndLicenses) {
fplm := make(map[string][]string)
if err := json.Unmarshal([]byte(overrides), &pls); err != nil {
log.Fatal(err)
}
for _, pl := range pls {
for _, l := range pl.Licenses {
fplm[pl.Project] = append(fplm[pl.Project], l.Type)
}
}
licenses, err := listPackagesWithLicenses("", pkgs)
if err != nil {
log.Fatal(err)
}
if licenses, err = groupPackagesByLicense(licenses); err != nil {
log.Fatal(err)
}
c, e := licensesToProjectAndLicenses(licenses)
// detected licenses
pls = nil
ls := []license{}
for _, pl := range c {
if fl, ok := fplm[pl.Project]; ok {
for _, l := range fl {
ls = append(ls, license{
Type: l,
Confidence: 1.0,
})
}
pl = projectAndLicenses{
Project: pl.Project,
Licenses: ls,
}
delete(fplm, pl.Project)
}
pls = append(pls, pl)
}
// force add undetected licenses given by overrides
ls = nil
for proj, fl := range fplm {
for _, l := range fl {
ls = append(ls, license{
Type: l,
Confidence: 1.0,
})
}
pls = append(pls, projectAndLicenses{
Project: proj,
Licenses: ls,
})
}
// missing / error license
for _, pl := range e {
if _, ok := fplm[pl.Project]; !ok {
ne = append(ne, pl)
}
}
sort.Slice(pls, func(i, j int) bool { return pls[i].Project < pls[j].Project })
sort.Slice(ne, func(i, j int) bool { return ne[i].Project < ne[j].Project })
return pls, ne
}
func main() {
of := flag.String("override-file", "", "a file to overwrite licenses")
flag.Parse()
if flag.NArg() < 1 {
log.Fatal("expect at least one package argument")
}
overrides := "[]"
if len(*of) != 0 {
b, err := ioutil.ReadFile(*of)
if err != nil {
log.Fatal(err)
}
overrides = string(b)
}
c, ne := pkgsToLicenses(flag.Args(), overrides)
b, err := json.MarshalIndent(c, "", " ")
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
if len(ne) != 0 {
fmt.Println("")
b, err := json.MarshalIndent(ne, "", " ")
if err != nil {
log.Fatal(err)
}
fmt.Println(string(b))
os.Exit(1)
}
}