Tools for analyzing use of built-in functions.

This commit is contained in:
M. J. Fromberger
2022-02-18 05:30:00 -08:00
parent 488e1d4bd2
commit b8197ffcdd
6 changed files with 497 additions and 2 deletions

5
go.mod
View File

@@ -32,16 +32,18 @@ require (
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e
golang.org/x/net v0.0.0-20220617184016-355a448f1bc9
golang.org/x/sync v0.0.0-20220513210516-0976fa681c29
golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1
google.golang.org/grpc v1.48.0
pgregory.net/rapid v0.4.8
)
require github.com/google/go-cmp v0.5.8
require (
github.com/bufbuild/buf v1.4.0
github.com/creachadair/atomicfile v0.2.6
github.com/creachadair/taskgroup v0.3.2
github.com/golangci/golangci-lint v1.47.2
github.com/google/go-cmp v0.5.8
github.com/vektra/mockery/v2 v2.14.0
gotest.tools v2.2.0+incompatible
)
@@ -226,7 +228,6 @@ require (
golang.org/x/sys v0.0.0-20220702020025-31831981b65f // indirect
golang.org/x/term v0.0.0-20220526004731-065cf7ba2467 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1 // indirect
google.golang.org/genproto v0.0.0-20220519153652-3a47de7e79bd // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/ini.v1 v1.66.6 // indirect

View File

@@ -0,0 +1,128 @@
// Package bicall locates calls of built-in functions in Go source.
package bicall
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"io"
"strings"
)
// See https://golang.org/ref/spec#Built-in_functions
var isBuiltin = map[string]bool{
"append": true,
"cap": true,
"close": true,
"complex": true,
"copy": true,
"delete": true,
"imag": true,
"len": true,
"make": true,
"new": true,
"panic": true,
"print": true,
"println": true,
"real": true,
"recover": true,
}
// Call represents a call to a built-in function in a source program.
type Call struct {
Name string // the name of the built-in function
Call *ast.CallExpr // the call expression in the AST
Site token.Position // the location of the call
Path []ast.Node // the AST path to the call
Comments []string // comments attributed to the call site by the parser
}
// flattenComments extracts the text of the given comment groups, and removes
// leading and trailing whitespace from them.
func flattenComments(cgs []*ast.CommentGroup) (out []string) {
for _, cg := range cgs {
out = append(out, strings.TrimSuffix(cg.Text(), "\n"))
}
return
}
// Parse parses the contents of r as a Go source file, and calls f for each
// call expression targeting a built-in function that occurs in the resulting
// AST. Location information about each call is attributed to the specified
// filename.
//
// If f reports an error, traversal stops and that error is reported to the
// caller of Parse.
func Parse(r io.Reader, filename string, f func(Call) error) error {
fset := token.NewFileSet()
file, err := parser.ParseFile(fset, filename, r, parser.ParseComments)
if err != nil {
return fmt.Errorf("parsing %q: %w", filename, err)
}
cmap := ast.NewCommentMap(fset, file, file.Comments)
var path []ast.Node
// Find the comments associated with node. If the node does not have its own
// comments, scan upward for a statement containing the node.
commentsFor := func(node ast.Node) []string {
if cgs := cmap[node]; cgs != nil {
return flattenComments(cgs)
}
for i := len(path) - 2; i >= 0; i-- {
if _, ok := path[i].(ast.Stmt); !ok {
continue
}
if cgs := cmap[path[i]]; cgs != nil {
return flattenComments(cgs)
} else {
break
}
}
return nil
}
v := &visitor{
visit: func(node ast.Node) error {
if node == nil {
path = path[:len(path)-1]
return nil
}
path = append(path, node)
if call, ok := node.(*ast.CallExpr); ok {
id, ok := call.Fun.(*ast.Ident)
if !ok || !isBuiltin[id.Name] {
return nil
}
if err := f(Call{
Name: id.Name,
Call: call,
Site: fset.Position(call.Pos()),
Path: path,
Comments: commentsFor(node),
}); err != nil {
return err
}
}
return nil
},
}
ast.Walk(v, file)
return v.err
}
type visitor struct {
err error
visit func(ast.Node) error
}
func (v *visitor) Visit(node ast.Node) ast.Visitor {
if v.err == nil {
v.err = v.visit(node)
}
if v.err != nil {
return nil
}
return v
}

View File

@@ -0,0 +1,101 @@
package bicall_test
import (
"log"
"sort"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/tendermint/tendermint/tools/panic/bicall"
)
func TestParse(t *testing.T) {
want := mustFindNeedles(testInput)
sortByLocation(want)
var got []needle
err := bicall.Parse(strings.NewReader(testInput), "testinput.go", func(c bicall.Call) error {
got = append(got, needle{
Name: c.Name,
Line: c.Site.Line,
Col: c.Site.Column - 1,
})
t.Logf("Found call site for %q at %v", c.Name, c.Site)
// Verify that the indicator comment shows up attributed to the site.
tag := "@" + c.Name
if len(c.Comments) != 1 || c.Comments[0] != tag {
t.Errorf("Wrong comment at %v: got %+q, want [%q]", c.Site, c.Comments, tag)
}
return nil
})
if err != nil {
t.Fatalf("Parse unexpectedly failed: %v", err)
}
sortByLocation(got)
if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("Call site mismatch: (-want, +got)\n%s", diff)
}
}
// sortByLocation permutes ns in-place to be ordered by line and column.
// The specific ordering rule is not important; we just need a consistent order
// for comparison of test results.
func sortByLocation(ns []needle) {
sort.Slice(ns, func(i, j int) bool {
if ns[i].Line == ns[j].Line {
return ns[i].Col < ns[j].Col
}
return ns[i].Line < ns[j].Line
})
}
// To add call sites to the test, include a trailing line comment having the
// form "//@name", where "name" is a built-in function name.
// The first offset of that name on the line prior to the comment will become
// an expected call site for that function.
const testInput = `
package testinput
func main() {
defer func() {
x := recover() //@recover
if x != nil {
println("whoa") //@println
}
}()
ip := new(int) //@new
*ip = 3 + copy([]byte{}, "") //@copy
panic(fmt.Sprintf("ip=%p", ip)) //@panic
}
`
// A needle is a name at a location in the source, that is expected to be
// located in a scan of the input for built-in calls.
// N.B. Fields are exported to allow comparison by the cmp package.
type needle struct {
Name string
Line, Col int
}
func mustFindNeedles(src string) []needle {
var needles []needle
for i, raw := range strings.Split(src, "\n") {
tag := strings.SplitN(raw, "//@", 2)
if len(tag) == 1 {
continue // no needle on this line
}
name := strings.TrimSpace(tag[1])
col := strings.Index(tag[0], name)
if col < 0 {
log.Panicf("No match for %q on line %d of test input", name, i+1)
}
needles = append(needles, needle{
Name: name,
Line: i + 1,
Col: col,
})
}
return needles
}

View File

@@ -0,0 +1,115 @@
package callgraph
import (
"fmt"
"go/ast"
"go/types"
"sort"
"golang.org/x/tools/go/loader"
)
type Triple struct {
Caller Entry
Target Entry
Site Location
}
type Entry struct {
Package string // canonical import path
Name string // name relative to the package ("" for calls at file scope)
}
type Location struct {
Path string
Offset int // 0-based
Line, Col int // 1-based line, 0-based byte offset
}
type Graph struct {
cfg *loader.Config
}
func New() *Graph {
cfg := new(loader.Config)
cfg.TypeCheckFuncBodies = func(ip string) bool {
_, ok := cfg.ImportPkgs[ip]
return ok
}
return &Graph{cfg: cfg}
}
func (g *Graph) Import(ipath string) { g.cfg.Import(ipath) }
func (g *Graph) ImportWithTests(ipath string) { g.cfg.ImportWithTests(ipath) }
func (g *Graph) Process(f func(*Triple)) error {
pgm, err := g.cfg.Load()
if err != nil {
return fmt.Errorf("loading program: %v", err)
}
var pkgs []*loader.PackageInfo
for _, pkg := range pgm.Imported {
pkgs = append(pkgs, pkg)
}
sort.Slice(pkgs, func(i, j int) bool {
return pkgs[i].Pkg.Path() < pkgs[j].Pkg.Path()
})
for _, pkg := range pkgs {
for _, file := range pkg.Files {
fname := g.cfg.Fset.Position(file.Pos()).Filename
var nodes []ast.Node
parent := func() string {
for i := len(nodes) - 1; i >= 0; i-- {
switch t := nodes[i].(type) {
case *ast.FuncDecl:
return t.Name.Name
}
}
return fname
}
ast.Walk(visitFunc(func(node ast.Node) {
if node == nil {
nodes = nodes[:len(nodes)-1]
return
}
nodes = append(nodes, node)
switch t := node.(type) {
case *ast.Ident:
ref := pkg.Info.Uses[t]
if ref == nil {
return // no referent
}
var refPath string
if _, ok := ref.Type().(*types.Signature); ok {
refPath = ref.Pkg().Path() // OK, function
} else if _, ok := ref.(*types.Builtin); ok {
// OK, builtin
} else {
return // not a function call or reference
}
pos := g.cfg.Fset.Position(t.Pos())
f(&Triple{
Caller: Entry{Package: pkg.Pkg.Path(), Name: parent()},
Target: Entry{Package: refPath, Name: ref.Name()},
Site: Location{
Path: pos.Filename,
Offset: pos.Offset,
Line: pos.Line,
Col: pos.Column - 1,
},
})
}
}), file)
}
}
return nil
}
type visitFunc func(ast.Node)
func (v visitFunc) Visit(n ast.Node) ast.Visitor { v(n); return v }

View File

@@ -0,0 +1,19 @@
package callgraph_test
import (
"testing"
"github.com/tendermint/tendermint/tools/panic/callgraph"
)
func TestStub(t *testing.T) {
g := callgraph.New()
g.ImportWithTests("github.com/tendermint/tendermint/internal/consensus")
if err := g.Process(func(cg *callgraph.Triple) {
if cg.Target.Name == "panic" {
t.Logf("Panic call at %v", cg.Site)
}
}); err != nil {
t.Fatal(err)
}
}

View File

@@ -0,0 +1,131 @@
// Program findbuiltin locates calls to built-in functions in Go source
// code, and writes a machine-readable log of where those calls occur.
package main
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"github.com/tendermint/tendermint/tools/panic/bicall"
)
var (
matchNames []string // function names to match
doPipe bool // read paths from stdin
doSkipMissing bool // do not fail for missing files
)
func init() {
flag.Var(stringList{&matchNames}, "match", `Comma-separated function names to select ("" for all)`)
flag.BoolVar(&doPipe, "pipe", false, "Read paths from stdin, one per line")
flag.BoolVar(&doSkipMissing, "skip-missing", false, "Ignore input paths that are not found")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] path... : process the named source files
%[1]s [options] -pipe : process paths from stdin
Scan the specified Go source files for calls to built-in functions, and print a
log of those calls to stdout.
With -pipe, the program reads paths from stdin, one path per line.
In this case, paths given on the command-line are consumed first.
Options:
`, filepath.Base(os.Args[0]))
flag.PrintDefaults()
}
}
func main() {
flag.Parse()
// The default input consists of the command-line arguments.
// If -pipe is given, also concatenate the contents of stdin.
var in io.Reader = strings.NewReader(strings.Join(flag.Args(), "\n"))
if doPipe {
in = io.MultiReader(in, os.Stdin)
}
lines := bufio.NewScanner(in)
for lines.Scan() {
mustProcessFile(lines.Text())
}
if err := lines.Err(); err != nil {
log.Fatalf("Error scanning: %v", err)
}
}
func mustProcessFile(path string) {
f, err := os.Open(path)
if os.IsNotExist(err) && doSkipMissing {
log.Printf("File not found: %q [skipped]", path)
return
} else if err != nil {
log.Fatal(err)
}
defer f.Close()
if err := bicall.Parse(f, path, func(c bicall.Call) error {
if !wantFunction(c.Name) {
return nil
}
bits, err := json.Marshal(struct {
Name string `json:"name"`
Path string `json:"path"`
Line int `json:"line"`
Col int `json:"col"`
Com []string `json:"comments"`
}{
Name: c.Name,
Path: c.Site.Filename,
Line: c.Site.Line,
Col: c.Site.Column - 1,
Com: c.Comments,
})
if err != nil {
log.Fatalf("Marshaling output: %v", err)
}
fmt.Println(string(bits))
return nil
}); err != nil {
log.Fatalf("Parsing %q failed: %v", path, err)
}
}
func wantFunction(name string) bool {
if len(matchNames) == 0 {
return true
}
for _, want := range matchNames {
if want == name {
return true
}
}
return false
}
type stringList struct{ v *[]string }
func (s stringList) Set(v string) error {
ss := strings.Split(v, ",")
if len(ss) == 1 && ss[0] == "" {
*s.v = nil
} else {
*s.v = ss
}
return nil
}
func (s stringList) String() string {
if s.v == nil {
return ""
}
return strings.Join(*s.v, ",")
}