Skip to content

Commit

Permalink
Pauldorsch/reconcile dependency graph logic (#1183)
Browse files Browse the repository at this point in the history
* reconcile dependency graph logic

* handle null / empty conditional vars

* remove files

* current pip detector case insensitive metadata file match

* some cleanup

* fix tests

* test with reverted reqs

* Revert "test with reverted reqs"

This reverts commit 293a4b5.

* disable parallelism for all but pip report

* whitespace

* pr feedback, fix ignore packages, bump versions
  • Loading branch information
pauld-msft authored Jun 21, 2024
1 parent 2284e06 commit c20c3b0
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 22 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace Microsoft.ComponentDetection.Contracts;
namespace Microsoft.ComponentDetection.Contracts;

using System;
using System.Collections.Generic;
Expand Down Expand Up @@ -65,6 +65,8 @@ public abstract class FileComponentDetector : IComponentDetector

protected IObservable<IComponentStream> ComponentStreams { get; private set; }

protected virtual bool EnableParallelism { get; set; }

/// <inheritdoc />
public async virtual Task<IndividualDetectorScanResult> ExecuteDetectorAsync(ScanRequest request, CancellationToken cancellationToken = default)
{
Expand Down Expand Up @@ -113,7 +115,7 @@ private async Task<IndividualDetectorScanResult> ProcessAsync(IObservable<Proces
new ExecutionDataflowBlockOptions
{
// MaxDegreeOfParallelism is the lower of the processor count and the max threads arg that the customer passed in
MaxDegreeOfParallelism = Math.Min(Environment.ProcessorCount, maxThreads),
MaxDegreeOfParallelism = this.EnableParallelism ? Math.Min(Environment.ProcessorCount, maxThreads) : 1,
});

var preprocessedObserbable = await this.OnPrepareDetectionAsync(processRequests, detectorArgs);
Expand Down
3 changes: 2 additions & 1 deletion src/Microsoft.ComponentDetection.Contracts/ScanRequest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ public ScanRequest(DirectoryInfo sourceDirectory, ExcludeDirectoryPredicate dire
public IComponentRecorder ComponentRecorder { get; private set; }

/// <summary>
/// Gets the maximum number of threads to use in parallel for executing the detection.
/// Gets the maximum number of threads to use in parallel for executing the detection, assuming parallelism is
/// enabled for the detector.
/// </summary>
public int MaxThreads { get; private set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ public interface IPythonCommandService
Task<IList<(string PackageString, GitComponent Component)>> ParseFileAsync(string path, string pythonPath = null);

Task<string> GetPythonVersionAsync(string pythonPath = null);

/// <summary>
/// Gets the os type using: https://docs.python.org/3/library/sys.html#sys.platform .
/// </summary>
/// <returns>OS type where the python script runs.</returns>
Task<string> GetOsTypeAsync(string pythonPath = null);
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
namespace Microsoft.ComponentDetection.Detectors.Pip;

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
Expand All @@ -24,7 +25,7 @@ public class PipDependencySpecification
/// <summary>
/// These are packages that we don't want to evaluate in our graph as they are generally python builtins.
/// </summary>
private static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
public static readonly HashSet<string> PackagesToIgnore = new HashSet<string>
{
"-markerlib",
"pip",
Expand Down Expand Up @@ -154,7 +155,7 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
var conditionalVar = conditionalMatch.Groups[2].Value;
var conditionalOperator = conditionalMatch.Groups[3].Value;
var conditionalValue = conditionalMatch.Groups[4].Value;
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar))
if (!pythonEnvironmentVariables.ContainsKey(conditionalVar) || string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]))
{
continue; // If the variable isn't in the environment, we can't evaluate it.
}
Expand All @@ -175,7 +176,7 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar
else if (string.Equals(conditionalVar, "sys_platform", System.StringComparison.OrdinalIgnoreCase))
{
// if the platform is not windows or linux (empty string in env var), allow the package to be added. Otherwise, ensure it matches the python condition
conditionMet = string.IsNullOrEmpty(pythonEnvironmentVariables[conditionalVar]) || string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
conditionMet = string.Equals(pythonEnvironmentVariables[conditionalVar], conditionalValue, System.StringComparison.OrdinalIgnoreCase);
}
else
{
Expand All @@ -195,4 +196,17 @@ public bool PackageConditionsMet(Dictionary<string, string> pythonEnvironmentVar

return conditionsMet;
}

/// <summary>
/// Common method that can be used to determine whether this package is a valid parent
/// package of another package. Note that this logic is not perfect, it does not
/// respect all of the environment identifiers, nor does it correctly handle extras (it ignores
/// them).
/// </summary>
/// <param name="pythonEnvironmentVariables">List of environment variables used to evaluate the environmant conditions, such as OS this is executing on.</param>
/// <returns>Whether or not this package is valid as a parent package.</returns>
public bool IsValidParentPackage(Dictionary<string, string> pythonEnvironmentVariables) =>
!this.PackageIsUnsafe()
&& this.PackageConditionsMet(pythonEnvironmentVariables)
&& !this.ConditionalDependencySpecifiers.Any(s => s.Contains("extra ==", StringComparison.OrdinalIgnoreCase));
}
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ public async Task<IList<PipDependencySpecification>> FetchPackageDependenciesAsy

var package = new ZipArchive(await response.Content.ReadAsStreamAsync());

var entry = package.GetEntry($"{name.Replace('-', '_')}-{version}.dist-info/METADATA");
var entryName = $"{name.Replace('-', '_')}-{version}.dist-info/METADATA";

// first try case insensitive dicitonary lookup O(1), then attempt case-insensitive match O(entries)
var entry = package.GetEntry(entryName)
?? package.Entries.FirstOrDefault(x => string.Equals(x.FullName, entryName, StringComparison.OrdinalIgnoreCase));

// If there is no metadata file, the package doesn't have any declared dependencies
if (entry == null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
using System.Collections.Generic;
using System.Linq;
using System.Reactive.Linq;
using System.Runtime.InteropServices;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Contracts;
Expand Down Expand Up @@ -39,7 +38,7 @@ public PipComponentDetector(

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 9;
public override int Version { get; } = 10;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand All @@ -55,11 +54,7 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);

var pythonPlatformString = RuntimeInformation.IsOSPlatform(OSPlatform.Windows)
? "win32"
: RuntimeInformation.IsOSPlatform(OSPlatform.Linux)
? "linux"
: string.Empty;
var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,24 @@ public class PipReportComponentDetector : FileComponentDetector, IExperimentalDe

private readonly IPipCommandService pipCommandService;
private readonly IEnvironmentVariableService envVarService;
private readonly IPythonCommandService pythonCommandService;
private readonly IPythonResolver pythonResolver;

public PipReportComponentDetector(
IComponentStreamEnumerableFactory componentStreamEnumerableFactory,
IObservableDirectoryWalkerFactory walkerFactory,
IPipCommandService pipCommandService,
IEnvironmentVariableService envVarService,
IPythonCommandService pythonCommandService,
IPythonResolver pythonResolver,
ILogger<PipReportComponentDetector> logger)
{
this.ComponentStreamEnumerableFactory = componentStreamEnumerableFactory;
this.Scanner = walkerFactory;
this.pipCommandService = pipCommandService;
this.envVarService = envVarService;
this.pythonCommandService = pythonCommandService;
this.pythonResolver = pythonResolver;
this.Logger = logger;
}

Expand All @@ -53,7 +59,9 @@ public PipReportComponentDetector(

public override IEnumerable<ComponentType> SupportedComponentTypes { get; } = new[] { ComponentType.Pip };

public override int Version { get; } = 2;
public override int Version { get; } = 3;

protected override bool EnableParallelism { get; set; } = true;

protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsync(IObservable<ProcessRequest> processRequests, IDictionary<string, string> detectorArgs)
{
Expand All @@ -75,6 +83,22 @@ protected override async Task<IObservable<ProcessRequest>> OnPrepareDetectionAsy
return Enumerable.Empty<ProcessRequest>().ToObservable();
}

this.CurrentScanRequest.DetectorArgs.TryGetValue("Pip.PythonExePath", out var pythonExePath);
if (!await this.pythonCommandService.PythonExistsAsync(pythonExePath))
{
this.Logger.LogInformation($"No python found on system. Python detection will not run.");

return Enumerable.Empty<ProcessRequest>().ToObservable();
}
else
{
var pythonVersion = await this.pythonCommandService.GetPythonVersionAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("python_version", pythonVersion);

var pythonPlatformString = await this.pythonCommandService.GetOsTypeAsync(pythonExePath);
this.pythonResolver.SetPythonEnvironmentVariable("sys_platform", pythonPlatformString);
}

return processRequests;
}

Expand Down Expand Up @@ -169,11 +193,16 @@ private Dictionary<string, PipReportGraphNode> BuildGraphFromInstallationReport(
// graph ourselves using the requires_dist field.
var dependenciesByPkg = new Dictionary<string, List<PipDependencySpecification>>(StringComparer.OrdinalIgnoreCase);
var nodeReferences = new Dictionary<string, PipReportGraphNode>(StringComparer.OrdinalIgnoreCase);
var pythonEnvVars = this.pythonResolver.GetPythonEnvironmentVariables();

foreach (var package in report.InstallItems)
{
// Normalize the package name to ensure consistency between the package name and the graph nodes.
var normalizedPkgName = PipReportUtilities.NormalizePackageNameFormat(package.Metadata.Name);
if (PipDependencySpecification.PackagesToIgnore.Contains(normalizedPkgName))
{
continue;
}

var node = new PipReportGraphNode(
new PipComponent(
Expand All @@ -200,7 +229,7 @@ private Dictionary<string, PipReportGraphNode> BuildGraphFromInstallationReport(
// futures; python_version <= \"2.7\"
// sphinx (!=1.8.0,!=3.1.0,!=3.1.1,>=1.6.5) ; extra == 'docs'
var dependencySpec = new PipDependencySpecification($"Requires-Dist: {dependency}", requiresDist: true);
if (dependencySpec.PackageIsUnsafe())
if (!dependencySpec.IsValidParentPackage(pythonEnvVars))
{
continue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,11 @@ public async Task<string> GetPythonVersionAsync(string pythonPath)
var match = version.Match(versionResult.StdOut);
return match.Success ? match.Groups[1].Value : null;
}

public async Task<string> GetOsTypeAsync(string pythonPath)
{
var pythonCommand = await this.ResolvePythonAsync(pythonPath);
var versionResult = await this.commandLineInvocationService.ExecuteCommandAsync(pythonCommand, new List<string> { "python3", "python2" }, "-c", "\"import sys; print(sys.platform);\"");
return versionResult.ExitCode == 0 && string.IsNullOrEmpty(versionResult.StdErr) ? versionResult.StdOut.Trim() : null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ private async Task<IList<PipGraphNode>> ProcessQueueAsync(ISingleFileComponentRe
var (root, currentNode) = state.ProcessingQueue.Dequeue();

// gather all dependencies for the current node
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => !x.PackageIsUnsafe()).Where(x => x.PackageConditionsMet(this.pythonEnvironmentVariables)).ToList();
var dependencies = (await this.FetchPackageDependenciesAsync(state, currentNode)).Where(x => x.IsValidParentPackage(this.pythonEnvironmentVariables)).ToList();

foreach (var dependencyNode in dependencies)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,26 @@ public void TestPipDependencyRequireDistConditionalDependenciesMet_Linux()

VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}

[TestMethod]
public void TestPipDependencyRequireDistConditionalDependenciesMet_Empty()
{
var specs = new List<(string, bool, PipDependencySpecification)>
{
("Requires-Dist: TestPackage (>=2.0.1) ; python_version == \"3.8\" and sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=2.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.8\"", "and sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=4.0.1) ; python_version == \"3.6\" and sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=4.0.1" }, ConditionalDependencySpecifiers = new List<string> { "python_version == \"3.6\"", "and sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"linux\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"linux\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"win32\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"win32\"" } }),
("Requires-Dist: TestPackage (>=5.0.1) ; sys_platform == \"asdf\"", true, new PipDependencySpecification { Name = "TestPackage", DependencySpecifiers = new List<string> { ">=5.0.1" }, ConditionalDependencySpecifiers = new List<string> { "sys_platform == \"asdf\"" } }),
};

// test null and empty cases should allow packages through
var pythonEnvironmentVariables = new Dictionary<string, string>
{
{ "python_version", null },
{ "sys_platform", string.Empty },
};

VerifyPipConditionalDependencyParsing(specs, pythonEnvironmentVariables, true);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace Microsoft.ComponentDetection.Detectors.Tests;
public class PipReportComponentDetectorTests : BaseDetectorTest<PipReportComponentDetector>
{
private readonly Mock<IPipCommandService> pipCommandService;
private readonly Mock<IPythonCommandService> pythonCommandService;
private readonly Mock<IPythonResolver> pythonResolver;
private readonly Mock<IEnvironmentVariableService> mockEnvVarService;
private readonly Mock<ILogger<PipReportComponentDetector>> mockLogger;

Expand All @@ -35,6 +37,14 @@ public PipReportComponentDetectorTests()
this.pipCommandService = new Mock<IPipCommandService>();
this.DetectorTestUtility.AddServiceMock(this.pipCommandService);

this.pythonCommandService = new Mock<IPythonCommandService>();
this.pythonCommandService.Setup(x => x.PythonExistsAsync(It.IsAny<string>())).ReturnsAsync(true);
this.DetectorTestUtility.AddServiceMock(this.pythonCommandService);

this.pythonResolver = new Mock<IPythonResolver>();
this.pythonResolver.Setup(x => x.GetPythonEnvironmentVariables()).Returns(new Dictionary<string, string>());
this.DetectorTestUtility.AddServiceMock(this.pythonResolver);

this.mockLogger = new Mock<ILogger<PipReportComponentDetector>>();
this.DetectorTestUtility.AddServiceMock(this.mockLogger);

Expand Down Expand Up @@ -434,11 +444,10 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde
var jupyterGraph = graphsByLocations[file1];

var jupyterLabDependencies = jupyterGraph.GetDependenciesForComponent(jupyterComponent.Id);
jupyterLabDependencies.Should().HaveCount(15);
jupyterLabDependencies.Should().HaveCount(12);
jupyterLabDependencies.Should().Contain("async-lru 2.0.4 - pip");
jupyterLabDependencies.Should().Contain("jupyter-server 2.14.0 - pip");
jupyterLabDependencies.Should().Contain("traitlets 5.14.3 - pip");
jupyterLabDependencies.Should().Contain("requests 2.32.2 - pip");
jupyterLabDependencies.Should().Contain("jupyter-lsp 2.2.5 - pip");

var bleachComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("bleach")).Component as PipComponent;
Expand All @@ -447,10 +456,9 @@ public async Task TestPipReportDetector_SingleRoot_ComplexGraph_ComponentRecorde
bleachComponent.License.Should().Be("Apache Software License");

var bleachDependencies = jupyterGraph.GetDependenciesForComponent(bleachComponent.Id);
bleachDependencies.Should().HaveCount(3);
bleachDependencies.Should().HaveCount(2);
bleachDependencies.Should().Contain("six 1.16.0 - pip");
bleachDependencies.Should().Contain("webencodings 0.5.1 - pip");
bleachDependencies.Should().Contain("tinycss2 1.3.0 - pip");

ComponentRecorderTestUtilities.CheckChild<PipComponent>(
componentRecorder,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --strip-extras
#
azure-core==1.30.0
# via
# azure-identity
# msrest
azure-devops==7.1.0b4
# via -r requirements.in
azure-identity==1.16.0b1
# via -r requirements.in
certifi==2024.2.2
# via
# msrest
# requests
cffi==1.16.0
# via cryptography
charset-normalizer==3.3.2
# via requests
cryptography==42.0.4
# via
# azure-identity
# msal
# pyjwt
gitdb==4.0.11
# via gitpython
gitpython==3.1.42
# via -r requirements.in
idna==3.6
# via requests
isodate==0.6.1
# via msrest
msal==1.27.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.1.0
# via azure-identity
msrest==0.7.1
# via
# -r requirements.in
# azure-devops
oauthlib==3.2.2
# via requests-oauthlib
packaging==23.2
# via msal-extensions
portalocker==2.8.2
# via msal-extensions
pycparser==2.21
# via cffi
pyjwt==2.8.0
# via
# msal
# pyjwt
requests==2.31.0
# via
# azure-core
# msal
# msrest
# requests-oauthlib
requests-oauthlib==1.3.1
# via msrest
six==1.16.0
# via
# azure-core
# isodate
smmap==5.0.1
# via gitdb
typing-extensions==4.9.0
# via azure-core
urllib3==2.2.1
# via requests
Loading

0 comments on commit c20c3b0

Please sign in to comment.