diff --git a/Cargo.lock b/Cargo.lock index 0d5beb36c..23211ff7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8338,11 +8338,14 @@ dependencies = [ "reqwest 0.12.9", "ring", "rstest", + "sbom-walker", "schemars", "sea-orm", "sea-query", "serde", "serde_json", + "spdx-expression", + "spdx-rs", "sqlx", "strum 0.26.3", "test-context", @@ -8473,6 +8476,7 @@ dependencies = [ "serde", "serde_json", "sha2", + "spdx-rs", "test-context", "test-log", "thiserror 1.0.69", diff --git a/common/Cargo.toml b/common/Cargo.toml index eb323a323..ce818b675 100644 --- a/common/Cargo.toml +++ b/common/Cargo.toml @@ -28,11 +28,14 @@ postgresql_embedded = { workspace = true, features = ["blocking", "tokio"] } regex = { workspace = true } reqwest = { workspace = true, features = ["native-tls"] } ring = { workspace = true } +sbom-walker = { workspace = true } schemars = { workspace = true } sea-orm = { workspace = true, features = ["sea-query-binder", "sqlx-postgres", "runtime-tokio-rustls", "macros"] } sea-query = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } +spdx-expression = { workspace = true } +spdx-rs = { workspace = true } sqlx = { workspace = true } strum = { workspace = true, features = ["derive"] } thiserror = { workspace = true } diff --git a/common/src/sbom/mod.rs b/common/src/sbom/mod.rs index 6916d9964..d28c2b31e 100644 --- a/common/src/sbom/mod.rs +++ b/common/src/sbom/mod.rs @@ -1,3 +1,5 @@ +pub mod spdx; + use crate::cpe::Cpe; use crate::purl::Purl; use uuid::Uuid; diff --git a/common/src/sbom/spdx.rs b/common/src/sbom/spdx.rs new file mode 100644 index 000000000..5ca5baf47 --- /dev/null +++ b/common/src/sbom/spdx.rs @@ -0,0 +1,33 @@ +use sbom_walker::report::ReportSink; +use serde_json::Value; +use spdx_rs::models::SPDX; + +/// Parse a SPDX document, possibly replacing invalid license expressions. +/// +/// Returns the parsed document and a flag indicating if license expressions got replaced. +pub fn parse_spdx(report: &dyn ReportSink, json: Value) -> Result<(SPDX, bool), serde_json::Error> { + let (json, changed) = fix_license(report, json); + Ok((serde_json::from_value(json)?, changed)) +} + +/// Check the document for invalid SPDX license expressions and replace them with `NOASSERTION`. +pub fn fix_license(report: &dyn ReportSink, mut json: Value) -> (Value, bool) { + let mut changed = false; + if let Some(packages) = json["packages"].as_array_mut() { + for package in packages { + if let Some(declared) = package["licenseDeclared"].as_str() { + if let Err(err) = spdx_expression::SpdxExpression::parse(declared) { + package["licenseDeclared"] = "NOASSERTION".into(); + changed = true; + + let message = + format!("Replacing faulty SPDX license expression with NOASSERTION: {err}"); + log::debug!("{message}"); + report.error(message); + } + } + } + } + + (json, changed) +} diff --git a/modules/analysis/Cargo.toml b/modules/analysis/Cargo.toml index 980dd0f1c..2198612e9 100644 --- a/modules/analysis/Cargo.toml +++ b/modules/analysis/Cargo.toml @@ -15,12 +15,13 @@ actix-web = { workspace = true } anyhow = { workspace = true } cpe = { workspace = true } log = { workspace = true } +parking_lot = { workspace = true } petgraph = { workspace = true } -parking_lot= { workspace = true } sea-orm = { workspace = true } sea-query = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +spdx-rs = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } utoipa = { workspace = true, features = ["actix_extras", "uuid"] } @@ -49,7 +50,3 @@ criterion = { workspace = true, features = ["html_reports", "async_tokio"] } csaf = { workspace = true } packageurl = { workspace = true } zip = { workspace = true } - - - - diff --git a/modules/analysis/src/service/test.rs b/modules/analysis/src/service/test.rs index d6facd859..b6b192dbf 100644 --- a/modules/analysis/src/service/test.rs +++ b/modules/analysis/src/service/test.rs @@ -1,10 +1,12 @@ use super::*; - use std::str::FromStr; +use std::time::SystemTime; use test_context::test_context; use test_log::test; -use trustify_common::{cpe::Cpe, db::query::Query, model::Paginated, purl::Purl}; -use trustify_test_context::TrustifyContext; +use trustify_common::{ + cpe::Cpe, db::query::Query, model::Paginated, purl::Purl, sbom::spdx::fix_license, +}; +use trustify_test_context::{document, spdx::fix_spdx_rels, TrustifyContext}; #[test_context(TrustifyContext)] #[test(tokio::test)] @@ -408,3 +410,24 @@ async fn test_retrieve_all_sbom_roots_by_name1(ctx: &TrustifyContext) -> Result< Ok(()) } + +#[test_context(TrustifyContext)] +#[test(tokio::test)] +async fn load_performance(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let (spdx, _) = + document::("openshift-container-storage-4.8.z.json.xz").await?; + let (spdx, _) = fix_license(&(), spdx); + let spdx = fix_spdx_rels(serde_json::from_value(spdx)?); + ctx.ingest_json(&spdx).await?; + + let start = SystemTime::now(); + let service = AnalysisService::new(); + service.load_all_graphs(&ctx.db).await?; + + log::info!( + "Loading took: {}", + humantime::format_duration(start.elapsed()?) + ); + + Ok(()) +} diff --git a/modules/ingestor/src/graph/sbom/spdx.rs b/modules/ingestor/src/graph/sbom/spdx.rs index a4a9e9d10..e7e4ed382 100644 --- a/modules/ingestor/src/graph/sbom/spdx.rs +++ b/modules/ingestor/src/graph/sbom/spdx.rs @@ -12,7 +12,6 @@ use crate::{ }; use sbom_walker::report::{check, ReportSink}; use sea_orm::ConnectionTrait; -use serde_json::Value; use spdx_rs::models::{RelationshipType, SPDX}; use std::{collections::HashMap, str::FromStr}; use time::OffsetDateTime; @@ -294,33 +293,3 @@ impl<'spdx> TryFrom<&'spdx spdx_rs::models::Relationship> for SpdxRelationship<' .try_into() } } - -/// Check the document for invalid SPDX license expressions and replace them with `NOASSERTION`. -pub fn fix_license(report: &dyn ReportSink, mut json: Value) -> (Value, bool) { - let mut changed = false; - if let Some(packages) = json["packages"].as_array_mut() { - for package in packages { - if let Some(declared) = package["licenseDeclared"].as_str() { - if let Err(err) = spdx_expression::SpdxExpression::parse(declared) { - package["licenseDeclared"] = "NOASSERTION".into(); - changed = true; - - let message = - format!("Replacing faulty SPDX license expression with NOASSERTION: {err}"); - log::debug!("{message}"); - report.error(message); - } - } - } - } - - (json, changed) -} - -/// Parse a SPDX document, possibly replacing invalid license expressions. -/// -/// Returns the parsed document and a flag indicating if license expressions got replaced. -pub fn parse_spdx(report: &dyn ReportSink, json: Value) -> Result<(SPDX, bool), serde_json::Error> { - let (json, changed) = fix_license(report, json); - Ok((serde_json::from_value(json)?, changed)) -} diff --git a/modules/ingestor/src/service/sbom/spdx.rs b/modules/ingestor/src/service/sbom/spdx.rs index d767743c0..b6f75003e 100644 --- a/modules/ingestor/src/service/sbom/spdx.rs +++ b/modules/ingestor/src/service/sbom/spdx.rs @@ -1,6 +1,6 @@ use crate::{ graph::{ - sbom::spdx::{self, parse_spdx}, + sbom::spdx::{self}, Graph, }, model::IngestResult, @@ -9,7 +9,7 @@ use crate::{ use sea_orm::TransactionTrait; use serde_json::Value; use tracing::instrument; -use trustify_common::{hashing::Digests, id::Id}; +use trustify_common::{hashing::Digests, id::Id, sbom::spdx::parse_spdx}; use trustify_entity::labels::Labels; pub struct SpdxLoader<'g> { diff --git a/test-context/src/lib.rs b/test-context/src/lib.rs index 596e0133c..7bdef6943 100644 --- a/test-context/src/lib.rs +++ b/test-context/src/lib.rs @@ -8,20 +8,21 @@ pub mod spdx; use futures::Stream; use peak_alloc::PeakAlloc; use postgresql_embedded::PostgreSQL; -use std::env; -use std::io::{Read, Seek}; -use std::path::{Path, PathBuf}; +use serde::Serialize; +use std::{ + env, + io::{Read, Seek}, + path::{Path, PathBuf}, +}; use test_context::AsyncTestContext; -use tokio_util::bytes::Bytes; -use tokio_util::io::{ReaderStream, SyncIoBridge}; +use tokio_util::{bytes::Bytes, io::ReaderStream}; use tracing::instrument; -use trustify_common as common; -use trustify_common::db; -use trustify_common::decompress::decompress_async; -use trustify_common::hashing::{Digests, HashingRead}; -use trustify_module_ingestor::graph::Graph; -use trustify_module_ingestor::model::IngestResult; -use trustify_module_ingestor::service::{Format, IngestorService}; +use trustify_common::{self as common, db, decompress::decompress_async, hashing::Digests}; +use trustify_module_ingestor::{ + graph::Graph, + model::IngestResult, + service::{Format, IngestorService}, +}; use trustify_module_storage::service::fs::FileSystemBackend; #[allow(dead_code)] @@ -103,6 +104,16 @@ impl TrustifyContext { .await?) } + /// Ingest a document by ingesting its JSON representation + pub async fn ingest_json(&self, doc: S) -> Result { + let bytes = serde_json::to_vec(&doc)?; + + Ok(self + .ingestor + .ingest(&bytes, Format::Unknown, ("source", "TrustifyContext"), None) + .await?) + } + pub fn absolute_path(&self, path: impl AsRef) -> anyhow::Result { absolute(path) } @@ -179,20 +190,16 @@ pub fn document_read(path: &str) -> Result { Ok(std::fs::File::open(absolute(path)?)?) } +/// Read a document and parse it as JSON. pub async fn document(path: &str) -> Result<(T, Digests), anyhow::Error> where T: serde::de::DeserializeOwned + Send + 'static, { - let file = tokio::fs::File::open(absolute(path)?).await?; - let mut reader = HashingRead::new(SyncIoBridge::new(file)); - let f = || match serde_json::from_reader(&mut reader) { - Ok(v) => match reader.finish() { - Ok(digests) => Ok((v, digests)), - Err(e) => Err(anyhow::Error::new(e)), - }, - Err(e) => Err(anyhow::Error::new(e)), - }; - tokio::task::spawn_blocking(f).await? + let data = document_bytes(path).await?; + let digests = Digests::digest(&data); + let f = move || Ok::<_, anyhow::Error>(serde_json::from_slice::(&data)?); + + Ok((tokio::task::spawn_blocking(f).await??, digests)) } #[cfg(test)]