Skip to content

Commit

Permalink
Fast import scanning (#195)
Browse files Browse the repository at this point in the history
Adds experimental new package to do fast scanning of imports. This
elides all of the parsing and processing work and does only enough
lexical analysis to identify import statements and collect all
referenced files.

Co-authored-by: Josh Humphries <[email protected]>
  • Loading branch information
bufdev and jhump authored Nov 8, 2023
1 parent 0897124 commit 146b831
Show file tree
Hide file tree
Showing 3 changed files with 608 additions and 0 deletions.
90 changes: 90 additions & 0 deletions internal/benchmarks/benchmark_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ import (
"sort"
"strings"
"sync"
"sync/atomic"
"testing"
"time"

"github.com/jhump/protoreflect/desc"
"github.com/jhump/protoreflect/desc/protoparse"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/descriptorpb"

Expand All @@ -46,6 +48,7 @@ import (
"github.com/bufbuild/protocompile/internal/protoc"
"github.com/bufbuild/protocompile/linker"
"github.com/bufbuild/protocompile/parser"
"github.com/bufbuild/protocompile/parser/imports"
"github.com/bufbuild/protocompile/protoutil"
"github.com/bufbuild/protocompile/reporter"
)
Expand Down Expand Up @@ -353,6 +356,93 @@ func benchmarkGoogleapisProtoparse(b *testing.B, factory func() *protoparse.Pars
}
}

func BenchmarkGoogleapisScanImports(b *testing.B) {
par := runtime.GOMAXPROCS(-1)
cpus := runtime.NumCPU()
if par > cpus {
par = cpus
}
type entry struct {
filename string
imports []string
}
for i := 0; i < b.N; i++ {
workCh := make(chan string, par)
resultsCh := make(chan entry, par)
grp, ctx := errgroup.WithContext(context.Background())
// producer
grp.Go(func() error {
defer close(workCh)
for _, name := range googleapisSources {
select {
case workCh <- filepath.Join(googleapisDir, name):
case <-ctx.Done():
return ctx.Err()
}
}
return nil
})
var numProcs atomic.Int32
numProcs.Store(int32(par))
for i := 0; i < par; i++ {
// consumers/processors
grp.Go(func() error {
defer func() {
if numProcs.Add(-1) == 0 {
// last one to leave closes the channel
close(resultsCh)
}
}()
for {
var filename string
select {
case name, ok := <-workCh:
if !ok {
return nil
}
filename = name
case <-ctx.Done():
return ctx.Err()
}
r, err := os.Open(filename)
var imps []string
if err != nil {
return err
}
imps, err = imports.ScanForImports(r)
_ = r.Close()
if err != nil {
return err
}
select {
case resultsCh <- entry{filename: filename, imports: imps}:
case <-ctx.Done():
return ctx.Err()
}
}
})
}
results := make(map[string][]string, len(googleapisSources))
grp.Go(func() error {
// accumulator
for {
select {
case entry, ok := <-resultsCh:
if !ok {
return nil
}
results[entry.filename] = entry.imports
case <-ctx.Done():
return ctx.Err()
}
}
})

err := grp.Wait()
require.NoError(b, err)
}
}

func BenchmarkGoogleapisProtoc(b *testing.B) {
benchmarkGoogleapisProtoc(b, "--include_source_info")
}
Expand Down
73 changes: 73 additions & 0 deletions parser/imports/fast_imports.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Copyright 2020-2023 Buf Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package imports

import (
"io"
"strings"
)

var closeSymbol = map[tokenType]tokenType{
openParenToken: closeParenToken,
openBraceToken: closeBraceToken,
openBracketToken: closeBracketToken,
openAngleToken: closeAngleToken,
}

// ScanForImports scans the given reader, which should contain Protobuf source, and
// returns the set of imports declared in the file. It returns an error if there is
// an I/O error reading from r. In the event of such an error, it will still return
// a slice of imports that contains as many imports as were found before the I/O
// error occurred.
func ScanForImports(r io.Reader) ([]string, error) {
var imports []string
var contextStack []tokenType
var currentImport []string
lexer := newLexer(r)
for {
token, text, err := lexer.Lex()
if err != nil {
return imports, err
}
if token == eofToken {
return imports, nil
}

if currentImport != nil {
switch token {
case stringToken:
currentImport = append(currentImport, text.(string))
default:
if len(currentImport) > 0 {
imports = append(imports, strings.Join(currentImport, ""))
}
currentImport = nil
}
}

switch token {
case openParenToken, openBraceToken, openBracketToken, openAngleToken:
contextStack = append(contextStack, closeSymbol[token])
case closeParenToken, closeBraceToken, closeBracketToken, closeAngleToken:
if len(contextStack) > 0 && contextStack[len(contextStack)-1] == token {
contextStack = contextStack[:len(contextStack)-1]
}
case identifierToken:
if text == "import" && len(contextStack) == 0 {
currentImport = []string{}
}
}
}
}
Loading

0 comments on commit 146b831

Please sign in to comment.