blob: f59ae62da9f43ff0c01fbf6c11a3495bc6fe1637 [file] [log] [blame]
package gps
import (
"fmt"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"github.com/Masterminds/semver"
)
// Used to compute a friendly filepath from a URL-shaped input
//
// TODO(sdboyer) this is awful. Right?
var sanitizer = strings.NewReplacer(":", "-", "/", "-", "+", "-")
// A SourceManager is responsible for retrieving, managing, and interrogating
// source repositories. Its primary purpose is to serve the needs of a Solver,
// but it is handy for other purposes, as well.
//
// gps's built-in SourceManager, SourceMgr, is intended to be generic and
// sufficient for any purpose. It provides some additional semantics around the
// methods defined here.
type SourceManager interface {
// SourceExists checks if a repository exists, either upstream or in the
// SourceManager's central repository cache.
SourceExists(ProjectIdentifier) (bool, error)
// SyncSourceFor will attempt to bring all local information about a source
// fully up to date.
SyncSourceFor(ProjectIdentifier) error
// ListVersions retrieves a list of the available versions for a given
// repository name.
ListVersions(ProjectIdentifier) ([]Version, error)
// RevisionPresentIn indicates whether the provided Version is present in
// the given repository.
RevisionPresentIn(ProjectIdentifier, Revision) (bool, error)
// ListPackages parses the tree of the Go packages at or below root of the
// provided ProjectIdentifier, at the provided version.
ListPackages(ProjectIdentifier, Version) (PackageTree, error)
// GetManifestAndLock returns manifest and lock information for the provided
// root import path.
//
// gps currently requires that projects be rooted at their repository root,
// necessitating that the ProjectIdentifier's ProjectRoot must also be a
// repository root.
GetManifestAndLock(ProjectIdentifier, Version) (Manifest, Lock, error)
// ExportProject writes out the tree of the provided import path, at the
// provided version, to the provided directory.
ExportProject(ProjectIdentifier, Version, string) error
// AnalyzerInfo reports the name and version of the logic used to service
// GetManifestAndLock().
AnalyzerInfo() (name string, version *semver.Version)
// DeduceRootProject takes an import path and deduces the corresponding
// project/source root.
DeduceProjectRoot(ip string) (ProjectRoot, error)
}
// A ProjectAnalyzer is responsible for analyzing a given path for Manifest and
// Lock information. Tools relying on gps must implement one.
type ProjectAnalyzer interface {
// Perform analysis of the filesystem tree rooted at path, with the
// root import path importRoot, to determine the project's constraints, as
// indicated by a Manifest and Lock.
DeriveManifestAndLock(path string, importRoot ProjectRoot) (Manifest, Lock, error)
// Report the name and version of this ProjectAnalyzer.
Info() (name string, version *semver.Version)
}
// SourceMgr is the default SourceManager for gps.
//
// There's no (planned) reason why it would need to be reimplemented by other
// tools; control via dependency injection is intended to be sufficient.
type SourceMgr struct {
cachedir string
lf *os.File
srcs map[string]source
srcmut sync.RWMutex
an ProjectAnalyzer
dxt deducerTrie
rootxt prTrie
}
var _ SourceManager = &SourceMgr{}
// NewSourceManager produces an instance of gps's built-in SourceManager. It
// takes a cache directory (where local instances of upstream repositories are
// stored), and a ProjectAnalyzer that is used to extract manifest and lock
// information from source trees.
//
// The returned SourceManager aggressively caches information wherever possible.
// If tools need to do preliminary work involving upstream repository analysis
// prior to invoking a solve run, it is recommended that they create this
// SourceManager as early as possible and use it to their ends. That way, the
// solver can benefit from any caches that may have already been warmed.
//
// gps's SourceManager is intended to be threadsafe (if it's not, please file a
// bug!). It should be safe to reuse across concurrent solving runs, even on
// unrelated projects.
func NewSourceManager(an ProjectAnalyzer, cachedir string) (*SourceMgr, error) {
if an == nil {
return nil, fmt.Errorf("a ProjectAnalyzer must be provided to the SourceManager")
}
err := os.MkdirAll(filepath.Join(cachedir, "sources"), 0777)
if err != nil {
return nil, err
}
glpath := filepath.Join(cachedir, "sm.lock")
_, err = os.Stat(glpath)
if err == nil {
return nil, CouldNotCreateLockError{
Path: glpath,
Err: fmt.Errorf("cache lock file %s exists - another process crashed or is still running?", glpath),
}
}
fi, err := os.OpenFile(glpath, os.O_CREATE|os.O_EXCL, 0600) // is 0600 sane for this purpose?
if err != nil {
return nil, CouldNotCreateLockError{
Path: glpath,
Err: fmt.Errorf("err on attempting to create global cache lock: %s", err),
}
}
return &SourceMgr{
cachedir: cachedir,
lf: fi,
srcs: make(map[string]source),
an: an,
dxt: pathDeducerTrie(),
rootxt: newProjectRootTrie(),
}, nil
}
type CouldNotCreateLockError struct {
Path string
Err error
}
func (e CouldNotCreateLockError) Error() string {
return e.Err.Error()
}
// Release lets go of any locks held by the SourceManager.
func (sm *SourceMgr) Release() {
sm.lf.Close()
os.Remove(filepath.Join(sm.cachedir, "sm.lock"))
}
// AnalyzerInfo reports the name and version of the injected ProjectAnalyzer.
func (sm *SourceMgr) AnalyzerInfo() (name string, version *semver.Version) {
return sm.an.Info()
}
// GetManifestAndLock returns manifest and lock information for the provided
// import path. gps currently requires that projects be rooted at their
// repository root, necessitating that the ProjectIdentifier's ProjectRoot must
// also be a repository root.
//
// The work of producing the manifest and lock is delegated to the injected
// ProjectAnalyzer's DeriveManifestAndLock() method.
func (sm *SourceMgr) GetManifestAndLock(id ProjectIdentifier, v Version) (Manifest, Lock, error) {
src, err := sm.getSourceFor(id)
if err != nil {
return nil, nil, err
}
return src.getManifestAndLock(id.ProjectRoot, v)
}
// ListPackages parses the tree of the Go packages at and below the ProjectRoot
// of the given ProjectIdentifier, at the given version.
func (sm *SourceMgr) ListPackages(id ProjectIdentifier, v Version) (PackageTree, error) {
src, err := sm.getSourceFor(id)
if err != nil {
return PackageTree{}, err
}
return src.listPackages(id.ProjectRoot, v)
}
// ListVersions retrieves a list of the available versions for a given
// repository name.
//
// The list is not sorted; while it may be returned in the order that the
// underlying VCS reports version information, no guarantee is made. It is
// expected that the caller either not care about order, or sort the result
// themselves.
//
// This list is always retrieved from upstream on the first call. Subsequent
// calls will return a cached version of the first call's results. if upstream
// is not accessible (network outage, access issues, or the resource actually
// went away), an error will be returned.
func (sm *SourceMgr) ListVersions(id ProjectIdentifier) ([]Version, error) {
src, err := sm.getSourceFor(id)
if err != nil {
// TODO(sdboyer) More-er proper-er errors
return nil, err
}
return src.listVersions()
}
// RevisionPresentIn indicates whether the provided Revision is present in the given
// repository.
func (sm *SourceMgr) RevisionPresentIn(id ProjectIdentifier, r Revision) (bool, error) {
src, err := sm.getSourceFor(id)
if err != nil {
// TODO(sdboyer) More-er proper-er errors
return false, err
}
return src.revisionPresentIn(r)
}
// SourceExists checks if a repository exists, either upstream or in the cache,
// for the provided ProjectIdentifier.
func (sm *SourceMgr) SourceExists(id ProjectIdentifier) (bool, error) {
src, err := sm.getSourceFor(id)
if err != nil {
return false, err
}
return src.checkExistence(existsInCache) || src.checkExistence(existsUpstream), nil
}
// SyncSourceFor will ensure that all local caches and information about a
// source are up to date with any network-acccesible information.
//
// The primary use case for this is prefetching.
func (sm *SourceMgr) SyncSourceFor(id ProjectIdentifier) error {
src, err := sm.getSourceFor(id)
if err != nil {
return err
}
return src.syncLocal()
}
// ExportProject writes out the tree of the provided ProjectIdentifier's
// ProjectRoot, at the provided version, to the provided directory.
func (sm *SourceMgr) ExportProject(id ProjectIdentifier, v Version, to string) error {
src, err := sm.getSourceFor(id)
if err != nil {
return err
}
return src.exportVersionTo(v, to)
}
// DeduceRootProject takes an import path and deduces the corresponding
// project/source root.
//
// Note that some import paths may require network activity to correctly
// determine the root of the path, such as, but not limited to, vanity import
// paths. (A special exception is written for gopkg.in to minimize network
// activity, as its behavior is well-structured)
func (sm *SourceMgr) DeduceProjectRoot(ip string) (ProjectRoot, error) {
if prefix, root, has := sm.rootxt.LongestPrefix(ip); has {
// The non-matching tail of the import path could still be malformed.
// Validate just that part, if it exists
if prefix != ip {
// TODO(sdboyer) commented until i find a proper description of how
// to validate an import path
//if !pathvld.MatchString(strings.TrimPrefix(ip, prefix+"/")) {
//return "", fmt.Errorf("%q is not a valid import path", ip)
//}
// There was one, and it validated fine - add it so we don't have to
// revalidate it later
sm.rootxt.Insert(ip, root)
}
return root, nil
}
rootf, _, err := sm.deducePathAndProcess(ip)
if err != nil {
return "", err
}
r, err := rootf()
return ProjectRoot(r), err
}
func (sm *SourceMgr) getSourceFor(id ProjectIdentifier) (source, error) {
nn := id.netName()
sm.srcmut.RLock()
src, has := sm.srcs[nn]
sm.srcmut.RUnlock()
if has {
return src, nil
}
_, srcf, err := sm.deducePathAndProcess(nn)
if err != nil {
return nil, err
}
// we don't care about the ident here, and the future produced by
// deducePathAndProcess will dedupe with what's in the sm.srcs map
src, _, err = srcf()
return src, err
}
func (sm *SourceMgr) deducePathAndProcess(path string) (stringFuture, sourceFuture, error) {
df, err := sm.deduceFromPath(path)
if err != nil {
return nil, nil, err
}
var rstart, sstart int32
rc, sc := make(chan struct{}, 1), make(chan struct{}, 1)
// Rewrap in a deferred future, so the caller can decide when to trigger it
rootf := func() (pr string, err error) {
// CAS because a bad interleaving here would panic on double-closing rc
if atomic.CompareAndSwapInt32(&rstart, 0, 1) {
go func() {
defer close(rc)
pr, err = df.root()
if err != nil {
// Don't cache errs. This doesn't really hurt the solver, and is
// beneficial for other use cases because it means we don't have to
// expose any kind of controls for clearing caches.
return
}
tpr := ProjectRoot(pr)
sm.rootxt.Insert(pr, tpr)
// It's not harmful if the netname was a URL rather than an
// import path
if pr != path {
// Insert the result into the rootxt twice - once at the
// root itself, so as to catch siblings/relatives, and again
// at the exact provided import path (assuming they were
// different), so that on subsequent calls, exact matches
// can skip the regex above.
sm.rootxt.Insert(path, tpr)
}
}()
}
<-rc
return pr, err
}
// Now, handle the source
fut := df.psf(sm.cachedir, sm.an)
// Rewrap in a deferred future, so the caller can decide when to trigger it
srcf := func() (src source, ident string, err error) {
// CAS because a bad interleaving here would panic on double-closing sc
if atomic.CompareAndSwapInt32(&sstart, 0, 1) {
go func() {
defer close(sc)
src, ident, err = fut()
if err != nil {
// Don't cache errs. This doesn't really hurt the solver, and is
// beneficial for other use cases because it means we don't have
// to expose any kind of controls for clearing caches.
return
}
sm.srcmut.Lock()
defer sm.srcmut.Unlock()
// Check to make sure a source hasn't shown up in the meantime, or that
// there wasn't already one at the ident.
var hasi, hasp bool
var srci, srcp source
if ident != "" {
srci, hasi = sm.srcs[ident]
}
srcp, hasp = sm.srcs[path]
// if neither the ident nor the input path have an entry for this src,
// we're in the simple case - write them both in and we're done
if !hasi && !hasp {
sm.srcs[path] = src
if ident != path && ident != "" {
sm.srcs[ident] = src
}
return
}
// Now, the xors.
//
// If already present for ident but not for path, copy ident's src
// to path. This covers cases like a gopkg.in path referring back
// onto a github repository, where something else already explicitly
// looked up that same gh repo.
if hasi && !hasp {
sm.srcs[path] = srci
src = srci
}
// If already present for path but not for ident, do NOT copy path's
// src to ident, but use the returned one instead. Really, this case
// shouldn't occur at all...? But the crucial thing is that the
// path-based one has already discovered what actual ident of source
// they want to use, and changing that arbitrarily would have
// undefined effects.
if hasp && !hasi && ident != "" {
sm.srcs[ident] = src
}
// If both are present, then assume we're good, and use the path one
if hasp && hasi {
// TODO(sdboyer) compare these (somehow? reflect? pointer?) and if they're not the
// same object, panic
src = srcp
}
}()
}
<-sc
return
}
return rootf, srcf, nil
}