From b8197ffcdddf1915c85f9d73718f0c953d470139 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Fri, 18 Feb 2022 05:30:00 -0800 Subject: [PATCH] Tools for analyzing use of built-in functions. --- go.mod | 5 +- tools/panic/bicall/bicall.go | 128 ++++++++++++++++++++ tools/panic/bicall/bicall_test.go | 101 ++++++++++++++++ tools/panic/callgraph/callgraph.go | 115 ++++++++++++++++++ tools/panic/callgraph/callgraph_test.go | 19 +++ tools/panic/cmd/findbuiltin/findbuiltin.go | 131 +++++++++++++++++++++ 6 files changed, 497 insertions(+), 2 deletions(-) create mode 100644 tools/panic/bicall/bicall.go create mode 100644 tools/panic/bicall/bicall_test.go create mode 100644 tools/panic/callgraph/callgraph.go create mode 100644 tools/panic/callgraph/callgraph_test.go create mode 100644 tools/panic/cmd/findbuiltin/findbuiltin.go diff --git a/go.mod b/go.mod index f238b242e..776d59b0d 100644 --- a/go.mod +++ b/go.mod @@ -32,16 +32,18 @@ require ( golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e golang.org/x/net v0.0.0-20220617184016-355a448f1bc9 golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 + golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1 google.golang.org/grpc v1.48.0 pgregory.net/rapid v0.4.8 ) +require github.com/google/go-cmp v0.5.8 + require ( github.com/bufbuild/buf v1.4.0 github.com/creachadair/atomicfile v0.2.6 github.com/creachadair/taskgroup v0.3.2 github.com/golangci/golangci-lint v1.47.2 - github.com/google/go-cmp v0.5.8 github.com/vektra/mockery/v2 v2.14.0 gotest.tools v2.2.0+incompatible ) @@ -226,7 +228,6 @@ require ( golang.org/x/sys v0.0.0-20220702020025-31831981b65f // indirect golang.org/x/term v0.0.0-20220526004731-065cf7ba2467 // indirect golang.org/x/text v0.3.7 // indirect - golang.org/x/tools v0.1.12-0.20220628192153-7743d1d949f1 // indirect google.golang.org/genproto v0.0.0-20220519153652-3a47de7e79bd // indirect google.golang.org/protobuf v1.28.0 // indirect gopkg.in/ini.v1 v1.66.6 // indirect diff --git a/tools/panic/bicall/bicall.go b/tools/panic/bicall/bicall.go new file mode 100644 index 000000000..fa3ac0ac3 --- /dev/null +++ b/tools/panic/bicall/bicall.go @@ -0,0 +1,128 @@ +// Package bicall locates calls of built-in functions in Go source. +package bicall + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "io" + "strings" +) + +// See https://golang.org/ref/spec#Built-in_functions +var isBuiltin = map[string]bool{ + "append": true, + "cap": true, + "close": true, + "complex": true, + "copy": true, + "delete": true, + "imag": true, + "len": true, + "make": true, + "new": true, + "panic": true, + "print": true, + "println": true, + "real": true, + "recover": true, +} + +// Call represents a call to a built-in function in a source program. +type Call struct { + Name string // the name of the built-in function + Call *ast.CallExpr // the call expression in the AST + Site token.Position // the location of the call + Path []ast.Node // the AST path to the call + Comments []string // comments attributed to the call site by the parser +} + +// flattenComments extracts the text of the given comment groups, and removes +// leading and trailing whitespace from them. +func flattenComments(cgs []*ast.CommentGroup) (out []string) { + for _, cg := range cgs { + out = append(out, strings.TrimSuffix(cg.Text(), "\n")) + } + return +} + +// Parse parses the contents of r as a Go source file, and calls f for each +// call expression targeting a built-in function that occurs in the resulting +// AST. Location information about each call is attributed to the specified +// filename. +// +// If f reports an error, traversal stops and that error is reported to the +// caller of Parse. +func Parse(r io.Reader, filename string, f func(Call) error) error { + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, filename, r, parser.ParseComments) + if err != nil { + return fmt.Errorf("parsing %q: %w", filename, err) + } + cmap := ast.NewCommentMap(fset, file, file.Comments) + + var path []ast.Node + + // Find the comments associated with node. If the node does not have its own + // comments, scan upward for a statement containing the node. + commentsFor := func(node ast.Node) []string { + if cgs := cmap[node]; cgs != nil { + return flattenComments(cgs) + } + for i := len(path) - 2; i >= 0; i-- { + if _, ok := path[i].(ast.Stmt); !ok { + continue + } + if cgs := cmap[path[i]]; cgs != nil { + return flattenComments(cgs) + } else { + break + } + } + return nil + } + v := &visitor{ + visit: func(node ast.Node) error { + if node == nil { + path = path[:len(path)-1] + return nil + } + path = append(path, node) + if call, ok := node.(*ast.CallExpr); ok { + id, ok := call.Fun.(*ast.Ident) + if !ok || !isBuiltin[id.Name] { + return nil + } + + if err := f(Call{ + Name: id.Name, + Call: call, + Site: fset.Position(call.Pos()), + Path: path, + Comments: commentsFor(node), + }); err != nil { + return err + } + } + return nil + }, + } + ast.Walk(v, file) + return v.err +} + +type visitor struct { + err error + visit func(ast.Node) error +} + +func (v *visitor) Visit(node ast.Node) ast.Visitor { + if v.err == nil { + v.err = v.visit(node) + } + if v.err != nil { + return nil + } + return v +} diff --git a/tools/panic/bicall/bicall_test.go b/tools/panic/bicall/bicall_test.go new file mode 100644 index 000000000..f7da2398e --- /dev/null +++ b/tools/panic/bicall/bicall_test.go @@ -0,0 +1,101 @@ +package bicall_test + +import ( + "log" + "sort" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/tendermint/tendermint/tools/panic/bicall" +) + +func TestParse(t *testing.T) { + want := mustFindNeedles(testInput) + sortByLocation(want) + + var got []needle + err := bicall.Parse(strings.NewReader(testInput), "testinput.go", func(c bicall.Call) error { + got = append(got, needle{ + Name: c.Name, + Line: c.Site.Line, + Col: c.Site.Column - 1, + }) + t.Logf("Found call site for %q at %v", c.Name, c.Site) + + // Verify that the indicator comment shows up attributed to the site. + tag := "@" + c.Name + if len(c.Comments) != 1 || c.Comments[0] != tag { + t.Errorf("Wrong comment at %v: got %+q, want [%q]", c.Site, c.Comments, tag) + } + return nil + }) + if err != nil { + t.Fatalf("Parse unexpectedly failed: %v", err) + } + sortByLocation(got) + + if diff := cmp.Diff(want, got); diff != "" { + t.Errorf("Call site mismatch: (-want, +got)\n%s", diff) + } +} + +// sortByLocation permutes ns in-place to be ordered by line and column. +// The specific ordering rule is not important; we just need a consistent order +// for comparison of test results. +func sortByLocation(ns []needle) { + sort.Slice(ns, func(i, j int) bool { + if ns[i].Line == ns[j].Line { + return ns[i].Col < ns[j].Col + } + return ns[i].Line < ns[j].Line + }) +} + +// To add call sites to the test, include a trailing line comment having the +// form "//@name", where "name" is a built-in function name. +// The first offset of that name on the line prior to the comment will become +// an expected call site for that function. +const testInput = ` +package testinput +func main() { + defer func() { + x := recover() //@recover + if x != nil { + println("whoa") //@println + } + }() + ip := new(int) //@new + *ip = 3 + copy([]byte{}, "") //@copy + panic(fmt.Sprintf("ip=%p", ip)) //@panic +} +` + +// A needle is a name at a location in the source, that is expected to be +// located in a scan of the input for built-in calls. +// N.B. Fields are exported to allow comparison by the cmp package. +type needle struct { + Name string + Line, Col int +} + +func mustFindNeedles(src string) []needle { + var needles []needle + for i, raw := range strings.Split(src, "\n") { + tag := strings.SplitN(raw, "//@", 2) + if len(tag) == 1 { + continue // no needle on this line + } + name := strings.TrimSpace(tag[1]) + col := strings.Index(tag[0], name) + if col < 0 { + log.Panicf("No match for %q on line %d of test input", name, i+1) + } + needles = append(needles, needle{ + Name: name, + Line: i + 1, + Col: col, + }) + } + return needles +} diff --git a/tools/panic/callgraph/callgraph.go b/tools/panic/callgraph/callgraph.go new file mode 100644 index 000000000..c0b9a3f89 --- /dev/null +++ b/tools/panic/callgraph/callgraph.go @@ -0,0 +1,115 @@ +package callgraph + +import ( + "fmt" + "go/ast" + "go/types" + "sort" + + "golang.org/x/tools/go/loader" +) + +type Triple struct { + Caller Entry + Target Entry + Site Location +} + +type Entry struct { + Package string // canonical import path + Name string // name relative to the package ("" for calls at file scope) +} + +type Location struct { + Path string + Offset int // 0-based + Line, Col int // 1-based line, 0-based byte offset +} + +type Graph struct { + cfg *loader.Config +} + +func New() *Graph { + cfg := new(loader.Config) + cfg.TypeCheckFuncBodies = func(ip string) bool { + _, ok := cfg.ImportPkgs[ip] + return ok + } + return &Graph{cfg: cfg} +} + +func (g *Graph) Import(ipath string) { g.cfg.Import(ipath) } + +func (g *Graph) ImportWithTests(ipath string) { g.cfg.ImportWithTests(ipath) } + +func (g *Graph) Process(f func(*Triple)) error { + pgm, err := g.cfg.Load() + if err != nil { + return fmt.Errorf("loading program: %v", err) + } + var pkgs []*loader.PackageInfo + for _, pkg := range pgm.Imported { + pkgs = append(pkgs, pkg) + } + sort.Slice(pkgs, func(i, j int) bool { + return pkgs[i].Pkg.Path() < pkgs[j].Pkg.Path() + }) + + for _, pkg := range pkgs { + for _, file := range pkg.Files { + fname := g.cfg.Fset.Position(file.Pos()).Filename + + var nodes []ast.Node + parent := func() string { + for i := len(nodes) - 1; i >= 0; i-- { + switch t := nodes[i].(type) { + case *ast.FuncDecl: + return t.Name.Name + } + } + return fname + } + + ast.Walk(visitFunc(func(node ast.Node) { + if node == nil { + nodes = nodes[:len(nodes)-1] + return + } + nodes = append(nodes, node) + + switch t := node.(type) { + case *ast.Ident: + ref := pkg.Info.Uses[t] + if ref == nil { + return // no referent + } + var refPath string + if _, ok := ref.Type().(*types.Signature); ok { + refPath = ref.Pkg().Path() // OK, function + } else if _, ok := ref.(*types.Builtin); ok { + // OK, builtin + } else { + return // not a function call or reference + } + pos := g.cfg.Fset.Position(t.Pos()) + f(&Triple{ + Caller: Entry{Package: pkg.Pkg.Path(), Name: parent()}, + Target: Entry{Package: refPath, Name: ref.Name()}, + Site: Location{ + Path: pos.Filename, + Offset: pos.Offset, + Line: pos.Line, + Col: pos.Column - 1, + }, + }) + } + }), file) + } + } + return nil +} + +type visitFunc func(ast.Node) + +func (v visitFunc) Visit(n ast.Node) ast.Visitor { v(n); return v } diff --git a/tools/panic/callgraph/callgraph_test.go b/tools/panic/callgraph/callgraph_test.go new file mode 100644 index 000000000..46d966b3f --- /dev/null +++ b/tools/panic/callgraph/callgraph_test.go @@ -0,0 +1,19 @@ +package callgraph_test + +import ( + "testing" + + "github.com/tendermint/tendermint/tools/panic/callgraph" +) + +func TestStub(t *testing.T) { + g := callgraph.New() + g.ImportWithTests("github.com/tendermint/tendermint/internal/consensus") + if err := g.Process(func(cg *callgraph.Triple) { + if cg.Target.Name == "panic" { + t.Logf("Panic call at %v", cg.Site) + } + }); err != nil { + t.Fatal(err) + } +} diff --git a/tools/panic/cmd/findbuiltin/findbuiltin.go b/tools/panic/cmd/findbuiltin/findbuiltin.go new file mode 100644 index 000000000..f950baeb9 --- /dev/null +++ b/tools/panic/cmd/findbuiltin/findbuiltin.go @@ -0,0 +1,131 @@ +// Program findbuiltin locates calls to built-in functions in Go source +// code, and writes a machine-readable log of where those calls occur. +package main + +import ( + "bufio" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "os" + "path/filepath" + "strings" + + "github.com/tendermint/tendermint/tools/panic/bicall" +) + +var ( + matchNames []string // function names to match + doPipe bool // read paths from stdin + doSkipMissing bool // do not fail for missing files +) + +func init() { + flag.Var(stringList{&matchNames}, "match", `Comma-separated function names to select ("" for all)`) + flag.BoolVar(&doPipe, "pipe", false, "Read paths from stdin, one per line") + flag.BoolVar(&doSkipMissing, "skip-missing", false, "Ignore input paths that are not found") + + flag.Usage = func() { + fmt.Fprintf(os.Stderr, `Usage: %[1]s [options] path... : process the named source files + %[1]s [options] -pipe : process paths from stdin + +Scan the specified Go source files for calls to built-in functions, and print a +log of those calls to stdout. + +With -pipe, the program reads paths from stdin, one path per line. +In this case, paths given on the command-line are consumed first. + +Options: +`, filepath.Base(os.Args[0])) + flag.PrintDefaults() + } +} + +func main() { + flag.Parse() + + // The default input consists of the command-line arguments. + // If -pipe is given, also concatenate the contents of stdin. + var in io.Reader = strings.NewReader(strings.Join(flag.Args(), "\n")) + if doPipe { + in = io.MultiReader(in, os.Stdin) + } + + lines := bufio.NewScanner(in) + for lines.Scan() { + mustProcessFile(lines.Text()) + } + if err := lines.Err(); err != nil { + log.Fatalf("Error scanning: %v", err) + } +} + +func mustProcessFile(path string) { + f, err := os.Open(path) + if os.IsNotExist(err) && doSkipMissing { + log.Printf("File not found: %q [skipped]", path) + return + } else if err != nil { + log.Fatal(err) + } + defer f.Close() + + if err := bicall.Parse(f, path, func(c bicall.Call) error { + if !wantFunction(c.Name) { + return nil + } + bits, err := json.Marshal(struct { + Name string `json:"name"` + Path string `json:"path"` + Line int `json:"line"` + Col int `json:"col"` + Com []string `json:"comments"` + }{ + Name: c.Name, + Path: c.Site.Filename, + Line: c.Site.Line, + Col: c.Site.Column - 1, + Com: c.Comments, + }) + if err != nil { + log.Fatalf("Marshaling output: %v", err) + } + fmt.Println(string(bits)) + return nil + }); err != nil { + log.Fatalf("Parsing %q failed: %v", path, err) + } +} + +func wantFunction(name string) bool { + if len(matchNames) == 0 { + return true + } + for _, want := range matchNames { + if want == name { + return true + } + } + return false +} + +type stringList struct{ v *[]string } + +func (s stringList) Set(v string) error { + ss := strings.Split(v, ",") + if len(ss) == 1 && ss[0] == "" { + *s.v = nil + } else { + *s.v = ss + } + return nil +} + +func (s stringList) String() string { + if s.v == nil { + return "" + } + return strings.Join(*s.v, ",") +}