From 5ff94e88c8c6df25d96b8e58b24e314dbb4fb17c Mon Sep 17 00:00:00 2001 From: Laszlo Nagy Date: Sat, 18 May 2024 21:33:27 +1000 Subject: [PATCH] draft rust implementation --- .github/workflows/build_rust.yml | 44 +++ rust/Cargo.toml | 6 + rust/README.md | 37 +++ rust/intercept/Cargo.toml | 35 +++ rust/intercept/src/bin/intercept.rs | 118 ++++++++ rust/intercept/src/bin/wrapper.rs | 28 ++ rust/intercept/src/collector.rs | 101 +++++++ rust/intercept/src/ipc.rs | 105 +++++++ rust/intercept/src/lib.rs | 22 ++ rust/intercept/src/reporter.rs | 64 +++++ rust/semantic/Cargo.toml | 26 ++ rust/semantic/src/compilation.rs | 198 +++++++++++++ rust/semantic/src/configuration.rs | 311 +++++++++++++++++++++ rust/semantic/src/events.rs | 211 ++++++++++++++ rust/semantic/src/execution.rs | 35 +++ rust/semantic/src/filter.rs | 288 +++++++++++++++++++ rust/semantic/src/fixtures.rs | 13 + rust/semantic/src/main.rs | 294 +++++++++++++++++++ rust/semantic/src/tools.rs | 278 ++++++++++++++++++ rust/semantic/src/tools/build.rs | 87 ++++++ rust/semantic/src/tools/configured.rs | 166 +++++++++++ rust/semantic/src/tools/gcc.rs | 219 +++++++++++++++ rust/semantic/src/tools/matchers.rs | 20 ++ rust/semantic/src/tools/matchers/source.rs | 101 +++++++ rust/semantic/src/tools/unix.rs | 189 +++++++++++++ rust/semantic/src/tools/wrapper.rs | 76 +++++ 26 files changed, 3072 insertions(+) create mode 100644 .github/workflows/build_rust.yml create mode 100644 rust/Cargo.toml create mode 100644 rust/README.md create mode 100644 rust/intercept/Cargo.toml create mode 100644 rust/intercept/src/bin/intercept.rs create mode 100644 rust/intercept/src/bin/wrapper.rs create mode 100644 rust/intercept/src/collector.rs create mode 100644 rust/intercept/src/ipc.rs create mode 100644 rust/intercept/src/lib.rs create mode 100644 rust/intercept/src/reporter.rs create mode 100644 rust/semantic/Cargo.toml create mode 100644 rust/semantic/src/compilation.rs create mode 100644 rust/semantic/src/configuration.rs create mode 100644 rust/semantic/src/events.rs create mode 100644 rust/semantic/src/execution.rs create mode 100644 rust/semantic/src/filter.rs create mode 100644 rust/semantic/src/fixtures.rs create mode 100644 rust/semantic/src/main.rs create mode 100644 rust/semantic/src/tools.rs create mode 100644 rust/semantic/src/tools/build.rs create mode 100644 rust/semantic/src/tools/configured.rs create mode 100644 rust/semantic/src/tools/gcc.rs create mode 100644 rust/semantic/src/tools/matchers.rs create mode 100644 rust/semantic/src/tools/matchers/source.rs create mode 100644 rust/semantic/src/tools/unix.rs create mode 100644 rust/semantic/src/tools/wrapper.rs diff --git a/.github/workflows/build_rust.yml b/.github/workflows/build_rust.yml new file mode 100644 index 00000000..bcc211a9 --- /dev/null +++ b/.github/workflows/build_rust.yml @@ -0,0 +1,44 @@ +name: rust CI + +on: + push: + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: rustup component add clippy && rustup update stable && rustup default stable + - run: cd rust && cargo clippy + compile: + name: Compile + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: rustup update stable && rustup default stable + - run: cd rust && cargo check --verbose + test: + name: Test + strategy: + matrix: + os: + - ubuntu-latest + - windows-latest + - macOS-latest + toolchain: + - stable + - beta + - nightly + runs-on: ${{ matrix.os }} + needs: [compile] + steps: + - uses: actions/checkout@v3 + - run: rustup update ${{ matrix.toolchain }} && rustup default ${{ matrix.toolchain }} + - run: cd rust && cargo build --verbose + - run: cd rust && cargo test --verbose + diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000..c74af6f0 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + "semantic", + "intercept" +] +resolver = "2" diff --git a/rust/README.md b/rust/README.md new file mode 100644 index 00000000..6980203c --- /dev/null +++ b/rust/README.md @@ -0,0 +1,37 @@ +# What's this? + +This is a rust rewrite of the current master branch of this project. + +# Why? + +The current master branch is written in C++ and is not very well written. +I want to rewrite it in rust to make it more maintainable and easier to work with. + +## What's wrong with the current codebase? + +- The idea of disabling exception handling and using Rust-like result values is sound, + but the implementation could be improved. +- The use of CMake as a build tool has caused several issues, + including poor handling of third-party libraries and subprojects. +- Some dependencies are problematic: + - Not all of them are available on all platforms. + - Updating them can be challenging. + +## What are the benefits of rewriting the project in Rust? + +- Easy porting of the project to other platforms, including Windows +- Improved maintainability through the use of third-party libraries + and better development tooling + +# How? + +The `3.x` version will be the last version of the C++ codebase. +The `4.x` version will be the first version of the rust codebase. + +The `master` branch will be kept as the main release branch. +And the rust codebase will be developed on the `master` branch, +but it will be kept in a separate directory. + +# When? + +I will work on this project in my free time (as before). diff --git a/rust/intercept/Cargo.toml b/rust/intercept/Cargo.toml new file mode 100644 index 00000000..ee07c022 --- /dev/null +++ b/rust/intercept/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "intercept" +version = "4.0.0" +authors = ["László Nagy "] +description = "Rust crate to intercept executed of commands." +keywords = ["clang", "clang-tooling", "compilation-database"] +repository = "https://github.com/rizsotto/Bear" +homepage = "https://github.com/rizsotto/Bear" +license = "GPL-3" +edition = "2021" + +[dependencies] +anyhow = "1.0" +lazy_static = "1.4" +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } +log = "0.4" +simple_logger = { version = "4.2", default-features = false, features = ["timestamps"]} +clap = { version = "4.4", default-features = false, features = ["std", "cargo", "help", "usage", "suggestions"] } +crossbeam = "0.8" +crossbeam-channel = "0.5" +rand = "0.8.5" +chrono = "0.4.33" + +[lib] +name = "intercept" +path = "src/lib.rs" + +[[bin]] +name = "intercept" +path = "src/bin/intercept.rs" + +[[bin]] +name = "wrapper" +path = "src/bin/wrapper.rs" diff --git a/rust/intercept/src/bin/intercept.rs b/rust/intercept/src/bin/intercept.rs new file mode 100644 index 00000000..e4e50f9a --- /dev/null +++ b/rust/intercept/src/bin/intercept.rs @@ -0,0 +1,118 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +extern crate core; + +use std::io::Write; + +use anyhow::Result; +use clap::{arg, ArgAction, command}; +use crossbeam_channel::bounded; + +use intercept::ipc::{Envelope, Event, ReporterId}; +use intercept::collector::{EventCollector, EventCollectorOnTcp}; + +#[derive(Debug, PartialEq)] +struct Arguments { + command: Vec, + output: String, + config: Option, + verbose: u8, +} + +impl Arguments { + fn parse() -> Result { + let matches = command!() + .args(&[ + arg!( "Build command") + .action(ArgAction::Append) + .value_terminator("--") + .num_args(1..) + .last(true) + .required(true), + arg!(-o --output "Path of the result file") + .default_value("events.json") + .hide_default_value(false), + arg!(-c --config "Path of the config file"), + arg!(-v --verbose ... "Sets the level of verbosity") + .action(ArgAction::Count), + ]) + .get_matches(); + + let result = Arguments { + command: matches.get_many("COMMAND") + .expect("command is required") + .map(String::to_string) + .collect(), + output: matches.get_one::("output") + .expect("output is defaulted") + .clone(), + config: matches.get_one::("config") + .map(String::to_string), + verbose: matches.get_count("verbose"), + }; + + Ok(result) + } +} + +fn run() -> Result { + let arguments = Arguments::parse()?; + + // let collector = EventCollectorOnTcp::new()?; + // let destination = collector.address()?; + // + // std::env::set_var("INTERCEPT_REPORT_DESTINATION", &destination.0); + // std::env::set_var("INTERCEPT_VERBOSE", arguments.verbose.to_string()); + // let mut build = std::process::Command::new(arguments.command[0].clone()) + // .args(&arguments.command[1..]) + // .envs(std::env::vars()) + // .spawn()?; + // + // let (sender, mut receiver) = bounded::(10); + // let collector_loop = std::thread::spawn(move || { + // collector.collect(sender) + // }); + // let writer_loop = std::thread::spawn(move || { + // let mut writer = std::fs::File::create(arguments.output)?; + // loop { + // let envelope = receiver.recv()?; + // let _ = envelope.write_into(&mut writer)?; + // writer.flush()?; + // } + // }); + // + // let build_status = build.wait()?; + // collector.stop()?; + // + // collector_loop.join().unwrap()?; + // writer_loop.join().unwrap()?; + // + // Ok(build_status.code().unwrap()) + Ok(0) +} + +fn main() { + let exit_code = run().unwrap_or_else(|error| { + eprintln!("Error: {}", error); + 1 + }); + + std::process::exit(exit_code); +} diff --git a/rust/intercept/src/bin/wrapper.rs b/rust/intercept/src/bin/wrapper.rs new file mode 100644 index 00000000..195aa33f --- /dev/null +++ b/rust/intercept/src/bin/wrapper.rs @@ -0,0 +1,28 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +extern crate core; + +use anyhow::Result; + +use intercept::ipc::{Envelope, Event, ReporterId}; + +fn main() -> Result<()> { + Ok(()) +} diff --git a/rust/intercept/src/collector.rs b/rust/intercept/src/collector.rs new file mode 100644 index 00000000..3f1b4053 --- /dev/null +++ b/rust/intercept/src/collector.rs @@ -0,0 +1,101 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::net::{TcpListener, TcpStream}; + +use crossbeam::channel::{Receiver, Sender}; +use crossbeam_channel::bounded; + +use super::ipc::{Envelope, SessionLocator}; + +pub trait EventCollector { + fn address(&self) -> Result; + fn collect(&self, destination: Sender) -> Result<(), anyhow::Error>; + fn stop(&self) -> Result<(), anyhow::Error>; +} + +pub struct EventCollectorOnTcp { + control_input: Sender, + control_output: Receiver, + listener: TcpListener, +} + +impl EventCollectorOnTcp { + pub fn new() -> Result { + let (control_input, control_output) = bounded(0); + let listener = TcpListener::bind("127.0.0.1:0")?; + + let result = EventCollectorOnTcp { control_input, control_output, listener }; + + Ok(result) + } + + fn send( + &self, + mut socket: TcpStream, + destination: Sender, + ) -> Result<(), anyhow::Error> { + let envelope = Envelope::read_from(&mut socket)?; + destination.send(envelope)?; + + Ok(()) + } +} + +impl EventCollector for EventCollectorOnTcp { + fn address(&self) -> Result { + let local_addr = self.listener.local_addr()?; + let locator = SessionLocator(local_addr.to_string()); + Ok(locator) + } + + fn collect(&self, destination: Sender) -> Result<(), anyhow::Error> { + loop { + if let Ok(shutdown) = self.control_output.try_recv() { + if shutdown { + break; + } + } + + match self.listener.accept() { + Ok((stream, _)) => { + println!("Got a connection"); + // ... (process the connection in a separate thread or task) + self.send(stream, destination.clone())?; + } + Err(ref e) if e.kind() == std::io::ErrorKind::WouldBlock => { + // No new connection available, continue checking for shutdown + continue; + } + Err(e) => { + println!("Error: {}", e); + break; + } + } + } + + println!("Server shutting down"); + Ok(()) + } + + fn stop(&self) -> Result<(), anyhow::Error> { + self.control_input.send(true)?; + Ok(()) + } +} diff --git a/rust/intercept/src/ipc.rs b/rust/intercept/src/ipc.rs new file mode 100644 index 00000000..21964d94 --- /dev/null +++ b/rust/intercept/src/ipc.rs @@ -0,0 +1,105 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::path::PathBuf; + +use chrono::Utc; +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct SessionLocator(pub String); + +// Reporter id is a unique identifier for a reporter. +// +// It is used to identify the process that sends the execution report. +// Because the OS PID is not unique across a single build (PIDs are +// recycled), we need to use a new unique identifier to identify the process. +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +pub struct ReporterId(pub u64); + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct ProcessId(pub u32); + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct Execution { + pub executable: PathBuf, + pub arguments: Vec, + pub working_dir: PathBuf, + pub environment: HashMap, +} + +// Represent a relevant life cycle event of a process. +// +// Currently, it's only the process life cycle events (start, signal, +// terminate), but can be extended later with performance related +// events like monitoring the CPU usage or the memory allocation if +// this information is available. +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub enum Event { + Started { + pid: ProcessId, + ppid: ProcessId, + execution: Execution, + }, + Terminated { + status: i64 + }, + Signaled { + signal: i32, + }, +} + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +pub struct Envelope { + pub rid: ReporterId, + pub timestamp: u64, + pub event: Event, +} + +impl Envelope { + pub fn new(rid: &ReporterId, event: Event) -> Self { + let timestamp = Utc::now().timestamp_millis() as u64; + Envelope { rid: rid.clone(), timestamp, event } + } + + pub fn read_from(mut reader: impl Read) -> Result { + let mut length_bytes = [0; 4]; + reader.read_exact(&mut length_bytes)?; + let length = u32::from_be_bytes(length_bytes) as usize; + + let mut buffer = vec![0; length]; + reader.read_exact(&mut buffer)?; + let envelope = serde_json::from_slice(buffer.as_ref())?; + + Ok(envelope) + } + + pub fn write_into(&self, mut writer: impl Write) -> Result { + let serialized_envelope = serde_json::to_string(&self)?; + let bytes = serialized_envelope.into_bytes(); + let length = bytes.len() as u32; + + writer.write_all(&length.to_be_bytes())?; + writer.write_all(&bytes)?; + + Ok(length) + } +} diff --git a/rust/intercept/src/lib.rs b/rust/intercept/src/lib.rs new file mode 100644 index 00000000..89a40d4b --- /dev/null +++ b/rust/intercept/src/lib.rs @@ -0,0 +1,22 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +pub mod ipc; +pub mod reporter; +pub mod collector; diff --git a/rust/intercept/src/reporter.rs b/rust/intercept/src/reporter.rs new file mode 100644 index 00000000..3f5c167c --- /dev/null +++ b/rust/intercept/src/reporter.rs @@ -0,0 +1,64 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::net::TcpStream; + +use rand::random; + +use crate::ipc::{Envelope, Event, ReporterId}; + +impl ReporterId { + pub fn new() -> Self { + let id = random::(); + ReporterId(id) + } +} + +// Represents the remote sink of supervised process events. +// +// Events from a process execution can be sent from many actors (mostly +// supervisor processes). The events are collected in a common place +// in order to reconstruct of final report of a build process. +pub trait Reporter { + fn report(&mut self, event: Event) -> Result<(), anyhow::Error>; +} + +struct TcpReporter { + socket: TcpStream, + destination: String, + reporter_id: ReporterId, +} + +impl TcpReporter { + pub fn new(destination: String) -> Result { + let socket = TcpStream::connect(destination.clone())?; + let reporter_id = ReporterId::new(); + let result = TcpReporter { socket, destination, reporter_id }; + Ok(result) + } +} + +impl Reporter for TcpReporter { + fn report(&mut self, event: Event) -> Result<(), anyhow::Error> { + let envelope = Envelope::new(&self.reporter_id, event); + envelope.write_into(&mut self.socket)?; + + Ok(()) + } +} diff --git a/rust/semantic/Cargo.toml b/rust/semantic/Cargo.toml new file mode 100644 index 00000000..daaa6b6d --- /dev/null +++ b/rust/semantic/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "citnames" +version = "4.0.0" +authors = ["László Nagy "] +description = "Rust crate to detect semantic of commands." +keywords = ["clang", "clang-tooling", "compilation-database"] +repository = "https://github.com/rizsotto/Bear" +homepage = "https://github.com/rizsotto/Bear" +license = "GPL-3" +edition = "2021" + +[dependencies] +anyhow = "1.0" +lazy_static = "1.4" +serde = { version = "1.0", default-features = false, features = ["derive"] } +serde_json = { version = "1.0", default-features = false, features = ["std"] } +path-absolutize = "3.1" +json_compilation_db = "1.0" +log = "0.4" +simple_logger = { version = "4.2", default-features = false, features = ["timestamps"]} +clap = { version = "4.4", default-features = false, features = ["std", "cargo", "help", "usage", "suggestions"] } +nom = "7.1" +regex = "1.9" + +[[bin]] +name = "citnames" \ No newline at end of file diff --git a/rust/semantic/src/compilation.rs b/rust/semantic/src/compilation.rs new file mode 100644 index 00000000..6c53c751 --- /dev/null +++ b/rust/semantic/src/compilation.rs @@ -0,0 +1,198 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::path::{Path, PathBuf}; + +use anyhow::{anyhow, Result}; +use json_compilation_db::Entry; +use path_absolutize::Absolutize; + +use crate::tools::{CompilerPass, Semantic}; + +impl TryFrom for Vec { + type Error = anyhow::Error; + + fn try_from(value: Semantic) -> Result { + match value { + Semantic::Compiler { compiler, working_dir, passes } => { + let entries = passes.iter() + .flat_map(|pass| -> Result { + match pass { + CompilerPass::Preprocess => + Err(anyhow!("preprocess pass should not show up in results")), + CompilerPass::Compile { source, output, flags } => + Ok( + Entry { + file: into_abspath(source.clone(), working_dir.as_path())?, + directory: working_dir.clone(), + output: into_abspath_opt(output.clone(), working_dir.as_path())?, + arguments: into_arguments(&compiler, source, output, flags)?, + } + ) + } + }) + .collect(); + + Ok(entries) + } + _ => + Ok(vec![]), + } + } +} + +fn into_arguments( + compiler: &PathBuf, + source: &PathBuf, + output: &Option, + flags: &Vec, +) -> Result, anyhow::Error> { + let mut arguments: Vec = vec![]; + // Assemble the arguments as it would be for a single source file. + arguments.push(into_string(&compiler)?); + for flag in flags { + arguments.push(flag.clone()); + } + if let Some(file) = output { + arguments.push(String::from("-o")); + arguments.push(into_string(file)?) + } + arguments.push(into_string(source)?); + Ok(arguments) +} + +fn into_abspath(path: PathBuf, root: &Path) -> Result { + let candidate = if path.is_absolute() { + path.absolutize() + } else { + path.absolutize_from(root) + }; + candidate.map(|x| x.to_path_buf()) +} + +fn into_abspath_opt(path: Option, root: &Path) -> Result, std::io::Error> { + path.map(|v| into_abspath(v, root)) + .transpose() +} + +fn into_string(path: &Path) -> Result { + path.to_path_buf() + .into_os_string() + .into_string() + .map_err(|_| anyhow!("Path can't be encoded to UTF")) +} + +#[cfg(test)] +mod test { + use crate::vec_of_strings; + + use super::*; + + #[test] + fn test_non_compilations() -> Result<()> { + let empty: Vec = vec![]; + + let result: Vec = Semantic::UnixCommand.try_into()?; + assert_eq!(empty, result); + let result: Vec = Semantic::BuildCommand.try_into()?; + assert_eq!(empty, result); + + let input = Semantic::Compiler { + compiler: PathBuf::from("/usr/bin/cc"), + working_dir: PathBuf::from("/home/user"), + passes: vec![], + }; + let result: Vec = input.try_into()?; + assert_eq!(empty, result); + + Ok(()) + } + + #[test] + fn test_single_source_compilation() -> Result<()> { + let input = Semantic::Compiler { + compiler: PathBuf::from("clang"), + working_dir: PathBuf::from("/home/user"), + passes: vec![ + CompilerPass::Compile { + source: PathBuf::from("source.c"), + output: Some(PathBuf::from("source.o")), + flags: vec_of_strings!["-Wall"], + }, + ], + }; + + let expected = vec![ + Entry { + directory: PathBuf::from("/home/user"), + file: PathBuf::from("/home/user/source.c"), + arguments: vec_of_strings!["clang", "-Wall", "-o", "source.o", "source.c"], + output: Some(PathBuf::from("/home/user/source.o")), + } + ]; + + let result: Vec = input.try_into()?; + + assert_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_multiple_sources_compilation() -> Result<()> { + let input = Semantic::Compiler { + compiler: PathBuf::from("clang"), + working_dir: PathBuf::from("/home/user"), + passes: vec![ + CompilerPass::Preprocess, + CompilerPass::Compile { + source: PathBuf::from("/tmp/source1.c"), + output: None, + flags: vec_of_strings!["-Wall"], + }, + CompilerPass::Compile { + source: PathBuf::from("../source2.c"), + output: None, + flags: vec_of_strings!["-Wall"], + }, + ], + }; + + let expected = vec![ + Entry { + directory: PathBuf::from("/home/user"), + file: PathBuf::from("/tmp/source1.c"), + arguments: vec_of_strings!["clang", "-Wall", "/tmp/source1.c"], + output: None, + }, + Entry { + directory: PathBuf::from("/home/user"), + file: PathBuf::from("/home/source2.c"), + arguments: vec_of_strings!["clang", "-Wall", "../source2.c"], + output: None, + }, + ]; + + let result: Vec = input.try_into()?; + + assert_eq!(expected, result); + + Ok(()) + } +} diff --git a/rust/semantic/src/configuration.rs b/rust/semantic/src/configuration.rs new file mode 100644 index 00000000..d9416c9b --- /dev/null +++ b/rust/semantic/src/configuration.rs @@ -0,0 +1,311 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::path::PathBuf; + +use serde::Deserialize; + +// Represents the application configuration. +#[derive(Debug, Default, Deserialize, PartialEq)] +pub struct Configuration { + #[serde(default)] + pub output: Output, + #[serde(default)] + pub compilation: Compilation, +} + +// Represents compiler related configuration. +#[derive(Debug, Default, Deserialize, PartialEq)] +pub struct Compilation { + #[serde(default)] + pub compilers_to_recognize: Vec, + #[serde(default)] + pub compilers_to_exclude: Vec, +} + +// Represents a compiler wrapper that the tool will recognize. +// +// When executable name matches it tries to parse the flags as it would +// be a known compiler, and append the additional flags to the output +// entry if the compiler is recognized. +#[derive(Debug, Deserialize, PartialEq)] +pub struct CompilerToRecognize { + pub executable: PathBuf, + #[serde(default)] + pub flags_to_add: Vec, + #[serde(default)] + pub flags_to_remove: Vec, +} + +// Groups together the output related configurations. +#[derive(Debug, Default, Deserialize, PartialEq)] +pub struct Output { + #[serde(default)] + pub format: Format, + #[serde(default)] + pub content: Content, +} + +// Controls the output format. +// +// The entries in the JSON compilation database can have different forms. +// One format element is how the command is represented: it can be an array +// of strings or a single string (shell escaping to protect white spaces). +// Another format element is if the output field is emitted or not. +#[derive(Debug, Deserialize, PartialEq)] +pub struct Format { + #[serde(default = "enabled")] + pub command_as_array: bool, + #[serde(default = "disabled")] + pub drop_output_field: bool, +} + +impl Default for Format { + fn default() -> Self { + Format { + command_as_array: enabled(), + drop_output_field: disabled(), + } + } +} + +// Controls the content of the output. +// +// This will act as a filter on the output elements. +// These attributes can be read from the configuration file, and can be +// overridden by command line arguments. +#[derive(Debug, Deserialize, PartialEq)] +pub struct Content { + #[serde(default = "disabled")] + pub include_only_existing_source: bool, + #[serde(default)] + pub duplicate_filter_fields: DuplicateFilterFields, + #[serde(default)] + pub paths_to_include: Vec, + #[serde(default)] + pub paths_to_exclude: Vec, +} + +impl Default for Content { + fn default() -> Self { + Content { + include_only_existing_source: disabled(), + duplicate_filter_fields: DuplicateFilterFields::default(), + paths_to_include: vec![], + paths_to_exclude: vec![], + } + } +} + +fn disabled() -> bool { + false +} + +fn enabled() -> bool { + true +} + +/// Represents how the duplicate filtering detects duplicate entries. +#[derive(Debug, Default, Deserialize, PartialEq)] +#[serde(try_from = "String")] +pub enum DuplicateFilterFields { + FileOnly, + #[default] + FileAndOutputOnly, + All, +} + +impl TryFrom for DuplicateFilterFields { + type Error = String; + + fn try_from(value: String) -> Result { + match value.as_str() { + "file" => + Ok(DuplicateFilterFields::FileOnly), + "file_output" => + Ok(DuplicateFilterFields::FileAndOutputOnly), + "all" => + Ok(DuplicateFilterFields::All), + _ => + Err(format!(r#"Unknown value "{value}" for duplicate filter"#)), + } + } +} + +#[cfg(test)] +mod test { + use crate::{vec_of_pathbuf, vec_of_strings}; + use super::*; + + #[test] + fn test_full_config() { + let content: &[u8] = br#"{ + "output": { + "format": { + "command_as_array": true, + "drop_output_field": false + }, + "content": { + "include_only_existing_source": false, + "duplicate_filter_fields": "all", + "paths_to_include": ["sources"], + "paths_to_exclude": ["tests"] + } + }, + "compilation": { + "compilers_to_recognize": [ + { + "executable": "/usr/local/bin/clang", + "flags_to_add": ["-Dfoo=bar"], + "flags_to_remove": ["-Wall"] + } + ], + "compilers_to_exclude": [ + "clang" + ] + } + }"#; + + let result = serde_json::from_reader(content).unwrap(); + + let expected = Configuration { + output: Output { + format: Format { + command_as_array: true, + drop_output_field: false, + }, + content: Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::All, + paths_to_include: vec_of_pathbuf!["sources"], + paths_to_exclude: vec_of_pathbuf!["tests"], + }, + }, + compilation: Compilation { + compilers_to_recognize: vec![ + CompilerToRecognize { + executable: PathBuf::from("/usr/local/bin/clang"), + flags_to_add: vec_of_strings!["-Dfoo=bar"], + flags_to_remove: vec_of_strings!["-Wall"], + } + ], + compilers_to_exclude: vec_of_pathbuf!["clang"], + }, + }; + + assert_eq!(expected, result); + } + + #[test] + fn test_only_output_config() { + let content: &[u8] = br#"{ + "output": { + "format": { + "command_as_array": false + }, + "content": { + "duplicate_filter_fields": "file" + } + } + }"#; + + let result = serde_json::from_reader(content).unwrap(); + + let expected = Configuration { + output: Output { + format: Format { + command_as_array: false, + drop_output_field: false, + }, + content: Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::FileOnly, + paths_to_include: vec_of_pathbuf![], + paths_to_exclude: vec_of_pathbuf![], + }, + }, + compilation: Compilation::default(), + }; + + assert_eq!(expected, result); + } + + #[test] + fn test_compilation_only_config() { + let content: &[u8] = br#"{ + "compilation": { + "compilers_to_recognize": [ + { + "executable": "/usr/local/bin/clang" + }, + { + "executable": "/usr/local/bin/clang++" + } + ], + "compilers_to_exclude": [ + "clang", "clang++" + ] + } + }"#; + + let result = serde_json::from_reader(content).unwrap(); + + let expected = Configuration { + output: Output::default(), + compilation: Compilation { + compilers_to_recognize: vec![ + CompilerToRecognize { + executable: PathBuf::from("/usr/local/bin/clang"), + flags_to_add: vec![], + flags_to_remove: vec![], + }, + CompilerToRecognize { + executable: PathBuf::from("/usr/local/bin/clang++"), + flags_to_add: vec![], + flags_to_remove: vec![], + }, + ], + compilers_to_exclude: vec_of_pathbuf!["clang", "clang++"], + }, + }; + + assert_eq!(expected, result); + } + + #[test] + fn test_failing_config() { + let content: &[u8] = br#"{ + "output": { + "format": { + "command_as_array": false + }, + "content": { + "duplicate_filter_fields": "files" + } + } + }"#; + + let result: Result = serde_json::from_reader(content); + + assert!(result.is_err()); + + let message = result.unwrap_err().to_string(); + assert_eq!("Unknown value \"files\" for duplicate filter at line 8 column 17", message); + } +} \ No newline at end of file diff --git a/rust/semantic/src/events.rs b/rust/semantic/src/events.rs new file mode 100644 index 00000000..17192e9e --- /dev/null +++ b/rust/semantic/src/events.rs @@ -0,0 +1,211 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashMap; +use std::path::PathBuf; + +use serde_json::{Deserializer, Error, Value}; + +use crate::execution::Execution; + +// Based on stream serializer from `serde_json` crate. +// +// https://docs.rs/serde_json/latest/serde_json/struct.StreamDeserializer.html +pub fn from_reader(reader: impl std::io::Read) -> impl Iterator> { + Deserializer::from_reader(reader) + .into_iter::() + .flat_map(|value| { + match value { + Ok(value) => + into_execution(value).map(Ok), + Err(error) => + Some(Err(error)), + } + }) +} + +fn into_execution(value: Value) -> Option { + value.get("started") + .and_then(|started| started.get("execution")) + .and_then(|execution| execution.as_object()) + .and_then(|map| { + let executable = map.get("executable") + .and_then(Value::as_str) + .map(PathBuf::from); + let arguments = map.get("arguments") + .and_then(Value::as_array) + .map(|vs| vs.iter() + .flat_map(Value::as_str) + .map(str::to_string) + .collect::>() + ); + let working_dir = map.get("working_dir") + .and_then(Value::as_str) + .map(PathBuf::from); + let environment = map.get("environment") + .and_then(Value::as_object) + .map(|m| m.iter() + .map(|kv| (kv.0.clone(), kv.1.as_str().unwrap().to_string())) + .collect::>() + ); + + if executable.is_some() && arguments.is_some() && working_dir.is_some() && environment.is_some() { + Some( + Execution { + executable: executable.unwrap(), + arguments: arguments.unwrap(), + working_dir: working_dir.unwrap(), + environment: environment.unwrap(), + } + ) + } else { + None + } + }) +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + use std::path::PathBuf; + use crate::vec_of_strings; + + use super::*; + + #[test] + fn test_reading_events() { + let content = [into_single_line(r#" + { + "rid": "17014093296157802240", + "started": { + "execution": { + "executable": "/usr/bin/sh", + "arguments": [ + "sh", + "-c", + "ls" + ], + "working_dir": "/var/home/lnagy/Code/Bear.git", + "environment": { + "COLORTERM": "truecolor", + "EDITOR": "/usr/bin/nano", + "USER": "lnagy", + "HOME": "/var/home/lnagy", + "LANG": "C.UTF-8", + "HOSTNAME": "tepsi", + "MAIL": "/var/spool/mail/lnagy" + } + }, + "pid": 395760, + "ppid": 395750 + }, + "timestamp": "2023-08-08T12:02:12.760865Z" + } + "#), + into_single_line(r#" + { + "rid": "8533747834426684686", + "started": { + "execution": { + "executable": "/usr/bin/ls", + "arguments": [ + "ls" + ], + "working_dir": "/var/home/lnagy/Code/Bear.git", + "environment": { + "COLORTERM": "truecolor", + "EDITOR": "/usr/bin/nano", + "USER": "lnagy", + "HOME": "/var/home/lnagy", + "LANG": "C.UTF-8", + "HOSTNAME": "tepsi", + "MAIL": "/var/spool/mail/lnagy" + } + }, + "pid": 395764, + "ppid": 395755 + }, + "timestamp": "2023-08-08T12:02:12.771258Z" + } + "#), + into_single_line(r#" + { + "rid": "8533747834426684686", + "terminated": { + "status": "0" + }, + "timestamp": "2023-08-08T12:02:12.772584Z" + } + "#), + into_single_line(r#" + { + "rid": "17014093296157802240", + "terminated": { + "status": "0" + }, + "timestamp": "2023-08-08T12:02:12.773568Z" + } + "#)] + .join("\n"); + + let mut result = from_reader(content.as_bytes()); + + let expected = Execution { + executable: PathBuf::from("/usr/bin/sh"), + arguments: vec_of_strings![ + "sh", + "-c", + "ls" + ], + working_dir: PathBuf::from("/var/home/lnagy/Code/Bear.git"), + environment: HashMap::from([ + ("COLORTERM".to_string(), "truecolor".to_string()), + ("EDITOR".to_string(), "/usr/bin/nano".to_string()), + ("USER".to_string(), "lnagy".to_string()), + ("HOME".to_string(), "/var/home/lnagy".to_string()), + ("LANG".to_string(), "C.UTF-8".to_string()), + ("HOSTNAME".to_string(), "tepsi".to_string()), + ("MAIL".to_string(), "/var/spool/mail/lnagy".to_string()), + ]), + }; + assert_eq!(expected, result.next().unwrap().unwrap()); + + let expected = Execution { + executable: PathBuf::from("/usr/bin/ls"), + arguments: vec_of_strings!["ls"], + working_dir: PathBuf::from("/var/home/lnagy/Code/Bear.git"), + environment: HashMap::from([ + ("COLORTERM".to_string(), "truecolor".to_string()), + ("EDITOR".to_string(), "/usr/bin/nano".to_string()), + ("USER".to_string(), "lnagy".to_string()), + ("HOME".to_string(), "/var/home/lnagy".to_string()), + ("LANG".to_string(), "C.UTF-8".to_string()), + ("HOSTNAME".to_string(), "tepsi".to_string()), + ("MAIL".to_string(), "/var/spool/mail/lnagy".to_string()), + ]), + }; + assert_eq!(expected, result.next().unwrap().unwrap()); + + assert!(result.next().is_none()); + } + + fn into_single_line(content: &str) -> String { + content.chars().filter(|c| *c != '\n').collect() + } +} diff --git a/rust/semantic/src/execution.rs b/rust/semantic/src/execution.rs new file mode 100644 index 00000000..0bfaab91 --- /dev/null +++ b/rust/semantic/src/execution.rs @@ -0,0 +1,35 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashMap; +use std::path::PathBuf; + +struct SessionLocator(String); + +struct ReporterId(u64); + +struct ProcessId(u32); + +#[derive(Debug, PartialEq)] +pub struct Execution { + pub executable: PathBuf, + pub arguments: Vec, + pub working_dir: PathBuf, + pub environment: HashMap, +} diff --git a/rust/semantic/src/filter.rs b/rust/semantic/src/filter.rs new file mode 100644 index 00000000..eb59e89b --- /dev/null +++ b/rust/semantic/src/filter.rs @@ -0,0 +1,288 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; +use std::path::PathBuf; + +use json_compilation_db::Entry; + +use crate::configuration::{Content, DuplicateFilterFields}; + +pub(crate) type EntryPredicate = Box bool>; + +impl From<&Content> for EntryPredicate { + fn from(val: &Content) -> Self { + let source_check = EntryPredicateBuilder::source_check(val.include_only_existing_source); + let paths_to_include = EntryPredicateBuilder::contains(val.paths_to_include.as_slice()); + let paths_to_exclude = EntryPredicateBuilder::contains(val.paths_to_exclude.as_slice()); + let duplicates = EntryPredicateBuilder::duplicates(&val.duplicate_filter_fields); + + (!paths_to_exclude & paths_to_include & source_check & duplicates).build() + } +} + + +struct EntryPredicateBuilder { + predicate_opt: Option, +} + +impl EntryPredicateBuilder { + fn build(self) -> EntryPredicate { + match self.predicate_opt { + Some(predicate) => predicate, + None => Box::new(|_: &Entry| true), + } + } + + fn source_check(include_only_existing_source: bool) -> Self { + if include_only_existing_source { + let predicate: EntryPredicate = Box::new(|entry| { entry.file.is_file() }); + EntryPredicateBuilder { predicate_opt: Some(predicate) } + } else { + EntryPredicateBuilder { predicate_opt: None } + } + } + + fn contains(paths: &[PathBuf]) -> Self { + if paths.is_empty() { + EntryPredicateBuilder { predicate_opt: None } + } else { + let paths_copy = paths.to_vec(); + let predicate: EntryPredicate = Box::new(move |entry| { + paths_copy.iter().any(|path| { entry.file.starts_with(path) }) + }); + EntryPredicateBuilder { predicate_opt: Some(predicate) } + } + } + + fn duplicates(config: &DuplicateFilterFields) -> Self { + let hash_function: fn(&Entry) -> u64 = config.into(); + let mut have_seen = HashSet::new(); + + let predicate: EntryPredicate = Box::new(move |entry| { + let hash = hash_function(entry); + if !have_seen.contains(&hash) { + have_seen.insert(hash); + true + } else { + false + } + }); + EntryPredicateBuilder { predicate_opt: Some(predicate) } + } +} + +impl std::ops::BitAnd for EntryPredicateBuilder { + type Output = EntryPredicateBuilder; + + fn bitand(self, rhs: Self) -> Self::Output { + let predicate_opt = match (self.predicate_opt, rhs.predicate_opt) { + (None, None) => + None, + (Some(mut lhs), Some(mut rhs)) => { + let predicate: EntryPredicate = Box::new(move |entry| { + let result = lhs(entry); + if result { + rhs(entry) + } else { + result + } + }); + Some(predicate) + } + (None, some_predicate) => + some_predicate, + (some_predicate, None) => + some_predicate, + }; + EntryPredicateBuilder { predicate_opt } + } +} + +impl std::ops::Not for EntryPredicateBuilder { + type Output = EntryPredicateBuilder; + + fn not(self) -> Self::Output { + let predicate_opt = match self.predicate_opt { + Some(mut original) => { + let predicate: EntryPredicate = Box::new(move |entry| { + let result = original(entry); + !result + }); + Some(predicate) + } + None => + None, + }; + EntryPredicateBuilder { predicate_opt } + } +} + +impl DuplicateFilterFields { + fn hash_source(entry: &Entry) -> u64 { + let mut s = DefaultHasher::default(); + entry.file.hash(&mut s); + s.finish() + } + + fn hash_source_and_output(entry: &Entry) -> u64 { + let mut s = DefaultHasher::default(); + entry.file.hash(&mut s); + entry.output.hash(&mut s); + s.finish() + } + + fn hash_all(entry: &Entry) -> u64 { + let mut s = DefaultHasher::default(); + entry.file.hash(&mut s); + entry.directory.hash(&mut s); + entry.arguments.hash(&mut s); + s.finish() + } +} + +impl From<&DuplicateFilterFields> for fn(&Entry) -> u64 { + fn from(val: &DuplicateFilterFields) -> Self { + match val { + DuplicateFilterFields::FileOnly => + DuplicateFilterFields::hash_source, + DuplicateFilterFields::FileAndOutputOnly => + DuplicateFilterFields::hash_source_and_output, + DuplicateFilterFields::All => + DuplicateFilterFields::hash_all, + } + } +} + +#[cfg(test)] +mod test { + use crate::{vec_of_pathbuf, vec_of_strings}; + use super::*; + + #[test] + fn test_duplicate_detection_works() { + let input: Vec = vec![ + Entry { + file: PathBuf::from("/home/user/project/source.c"), + arguments: vec_of_strings!["cc", "-c", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source.o")), + }, + Entry { + file: PathBuf::from("/home/user/project/source.c"), + arguments: vec_of_strings!["cc", "-c", "-Wall", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source.o")), + }, + Entry { + file: PathBuf::from("/home/user/project/test.c"), + arguments: vec_of_strings!["cc", "-c", "test.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/test.o")), + }, + ]; + + let expected: Vec = vec![ + Entry { + file: PathBuf::from("/home/user/project/source.c"), + arguments: vec_of_strings!["cc", "-c", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source.o")), + }, + Entry { + file: PathBuf::from("/home/user/project/test.c"), + arguments: vec_of_strings!["cc", "-c", "test.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/test.o")), + }, + ]; + + let sut: EntryPredicate = (&Content::default()).into(); + let result: Vec = input.into_iter().filter(sut).collect(); + assert_eq!(expected, result); + } + + #[test] + fn test_exclude_include_works() { + let configs: Vec = vec![ + Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::default(), + paths_to_include: vec_of_pathbuf!["/home/user/project/source"], + paths_to_exclude: vec_of_pathbuf!["/home/user/project/test"], + }, + Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::default(), + paths_to_include: vec_of_pathbuf!["/home/user/project/source/"], + paths_to_exclude: vec_of_pathbuf!["/home/user/project/test/"], + }, + Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::default(), + paths_to_include: vec_of_pathbuf!["/home/user/project"], + paths_to_exclude: vec_of_pathbuf!["/home/user/project/test"], + }, + Content { + include_only_existing_source: false, + duplicate_filter_fields: DuplicateFilterFields::default(), + paths_to_include: vec_of_pathbuf!["/home/user/project/"], + paths_to_exclude: vec_of_pathbuf!["/home/user/project/test/"], + }, + ]; + + for config in configs { + let input: Vec = vec![ + Entry { + file: PathBuf::from("/home/user/project/source/source.c"), + arguments: vec_of_strings!["cc", "-c", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source/source.o")), + }, + Entry { + file: PathBuf::from("/home/user/project/source/source.c"), + arguments: vec_of_strings!["cc", "-c", "-Wall", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source/source.o")), + }, + Entry { + file: PathBuf::from("/home/user/project/test/source.c"), + arguments: vec_of_strings!["cc", "-c", "test.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/test/source.o")), + }, + ]; + + let expected: Vec = vec![ + Entry { + file: PathBuf::from("/home/user/project/source/source.c"), + arguments: vec_of_strings!["cc", "-c", "source.c"], + directory: PathBuf::from("/home/user/project"), + output: Some(PathBuf::from("/home/user/project/source/source.o")), + }, + ]; + + let sut: EntryPredicate = (&config).into(); + let result: Vec = input.into_iter().filter(sut).collect(); + assert_eq!(expected, result); + } + } +} diff --git a/rust/semantic/src/fixtures.rs b/rust/semantic/src/fixtures.rs new file mode 100644 index 00000000..9099fff8 --- /dev/null +++ b/rust/semantic/src/fixtures.rs @@ -0,0 +1,13 @@ + +#[cfg(test)] +mod fixtures { + #[macro_export] + macro_rules! vec_of_strings { + ($($x:expr),*) => (vec![$($x.to_string()),*]); + } + + #[macro_export] + macro_rules! vec_of_pathbuf { + ($($x:expr),*) => (vec![$(PathBuf::from($x)),*]); + } +} diff --git a/rust/semantic/src/main.rs b/rust/semantic/src/main.rs new file mode 100644 index 00000000..e40ab1dd --- /dev/null +++ b/rust/semantic/src/main.rs @@ -0,0 +1,294 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +extern crate core; + +use std::fs::{File, OpenOptions}; +use std::io::{BufReader, BufWriter, Read, stdin, stdout}; +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use clap::{arg, ArgAction, command}; +use json_compilation_db::Entry; +use log::LevelFilter; +use serde_json::Error; +use simple_logger::SimpleLogger; + +use crate::configuration::Configuration; +use crate::execution::Execution; +use crate::filter::EntryPredicate; +use crate::tools::{RecognitionResult, Semantic, Tool}; + +mod configuration; +mod events; +mod execution; +mod compilation; +mod tools; +mod filter; +mod fixtures; + +fn main() -> Result<()> { + let arguments = Arguments::parse()?; + prepare_logging(arguments.verbose)?; + + let application = Application::configure(arguments)?; + application.run()?; + + Ok(()) +} + +#[derive(Debug, PartialEq)] +struct Arguments { + input: String, + output: String, + config: Option, + append: bool, + verbose: u8, +} + +impl Arguments { + fn parse() -> Result { + let matches = command!() + .args(&[ + arg!(-i --input "Path of the event file") + .default_value("commands.json") + .hide_default_value(false), + arg!(-o --output "Path of the result file") + .default_value("compile_commands.json") + .hide_default_value(false), + arg!(-c --config "Path of the config file"), + arg!(-a --append "Append result to an existing output file") + .action(ArgAction::SetTrue), + arg!(-v --verbose ... "Sets the level of verbosity") + .action(ArgAction::Count), + ]) + .get_matches(); + + Arguments { + input: matches.get_one::("input") + .expect("input is defaulted") + .clone(), + output: matches.get_one::("output") + .expect("output is defaulted") + .clone(), + config: matches.get_one::("config") + .map(String::to_string), + append: *matches.get_one::("append") + .unwrap_or(&false), + verbose: matches.get_count("verbose"), + } + .validate() + } + + fn validate(self) -> Result { + if self.input == "-" && self.config.as_deref() == Some("-") { + return Err(anyhow!("Both input and config reading the standard input.")); + } + if self.append && self.output == "-" { + return Err(anyhow!("Append can't applied to the standard output.")); + } + + Ok(self) + } +} + +fn prepare_logging(level: u8) -> Result<()> { + let level = match level { + 0 => LevelFilter::Error, + 1 => LevelFilter::Warn, + 2 => LevelFilter::Info, + 3 => LevelFilter::Debug, + _ => LevelFilter::Trace, + }; + let mut logger = SimpleLogger::new() + .with_level(level); + if level <= LevelFilter::Debug { + logger = logger.with_local_timestamps() + } + logger.init()?; + + Ok(()) +} + +fn read_configuration(file: &Option) -> Result { + let configuration = match file.as_deref() { + Some("-") | Some("/dev/stdin") => { + let reader = stdin(); + serde_json::from_reader(reader) + .context("Failed to read configuration from stdin")? + } + Some(file) => { + let reader = OpenOptions::new().read(true).open(file)?; + serde_json::from_reader(reader) + .with_context(|| format!("Failed to read configuration from file: {}", file))? + } + None => + Configuration::default(), + }; + Ok(configuration) +} + +#[derive(Debug, PartialEq)] +struct Application { + input: String, + output: String, + append: bool, + configuration: Configuration, +} + +impl Application { + fn configure(arguments: Arguments) -> Result { + let configuration = read_configuration(&arguments.config)?; + + Ok( + Application { + input: arguments.input, + output: arguments.output, + append: arguments.append, + configuration, + } + ) + } + + fn run(self) -> Result<()> { + let filter: EntryPredicate = (&self.configuration.output.content).into(); + let entries = self.create_entries()? + .inspect(|entry| log::debug!("{:?}", entry)) + .filter(filter); + self.write_entries(entries)?; + + Ok(()) + } + + fn create_entries(&self) -> Result>> { + let tool: Box = (&self.configuration.compilation).into(); + let from_events = entries_from_execution_events(self.input.as_str(), tool)?; + // Based on the append flag, we should read the existing compilation database too. + if self.append { + let from_db = entries_from_compilation_db(Path::new(&self.output))?; + Ok(Box::new(from_events.chain(from_db))) + } else { + Ok(Box::new(from_events)) + } + } + + fn write_entries(&self, entries: impl Iterator) -> Result<(), anyhow::Error> { + match self.output.as_str() { + "-" | "/dev/stdout" => { + let buffer = BufWriter::new(stdout()); + json_compilation_db::write(buffer, entries)? + } + output => { + let temp = format!("{}.tmp", output); + // Create scope for the file, so it will be closed when the scope is over. + { + let file = File::create(&temp) + .with_context(|| format!("Failed to create file: {}", temp))?; + let buffer = BufWriter::new(file); + json_compilation_db::write(buffer, entries)?; + } + std::fs::rename(&temp, output) + .with_context(|| format!("Failed to rename file from '{}' to '{}'.", temp, output))?; + } + }; + + Ok(()) + } +} + +fn entries_from_execution_events(source: &str, tool: Box) -> Result> { + let reader: BufReader> = match source { + "-" | "/dev/stdin" => + BufReader::new(Box::new(stdin())), + _ => { + let file = OpenOptions::new().read(true).open(source) + .with_context(|| format!("Failed to open file: {}", source))?; + BufReader::new(Box::new(file)) + } + }; + let entries = events::from_reader(reader) + .flat_map(failed_execution_read_logged) + .flat_map(move |execution| execution_into_semantic(tool.as_ref(), execution)) + .flat_map(semantic_into_entries); + + Ok(entries) +} + +fn failed_execution_read_logged(candidate: Result) -> Option { + match candidate { + Ok(execution) => Some(execution), + Err(error) => { + log::error!("Failed to read entry: {}", error); + None + } + } +} + +fn execution_into_semantic(tool: &dyn Tool, execution: Execution) -> Option { + match tool.recognize(&execution) { + RecognitionResult::Recognized(Ok(Semantic::UnixCommand)) => { + log::debug!("execution recognized as unix command: {:?}", execution); + None + } + RecognitionResult::Recognized(Ok(Semantic::BuildCommand)) => { + log::debug!("execution recognized as build command: {:?}", execution); + None + } + RecognitionResult::Recognized(Ok(semantic)) => { + log::debug!("execution recognized as compiler call, {:?} : {:?}", semantic, execution); + Some(semantic) + } + RecognitionResult::Recognized(Err(reason)) => { + log::debug!("execution recognized with failure, {:?} : {:?}", reason, execution); + None + } + RecognitionResult::NotRecognized => { + log::debug!("execution not recognized: {:?}", execution); + None + } + } +} + +fn semantic_into_entries(semantic: Semantic) -> Vec { + let entries: Result, anyhow::Error> = semantic.try_into(); + entries.unwrap_or_else(|error| { + log::debug!("compiler call failed to convert to compilation db entry: {}", error); + vec![] + }) +} + +fn entries_from_compilation_db(source: &Path) -> Result> { + let file = OpenOptions::new().read(true).open(source) + .with_context(|| format!("Failed to open file: {:?}", source))?; + let buffer = BufReader::new(file); + let entries = json_compilation_db::read(buffer) + .flat_map(failed_entry_read_logged); + + Ok(entries) +} + +fn failed_entry_read_logged(candidate: Result) -> Option { + match candidate { + Ok(entry) => Some(entry), + Err(error) => { + log::error!("Failed to read entry: {}", error); + None + } + } +} diff --git a/rust/semantic/src/tools.rs b/rust/semantic/src/tools.rs new file mode 100644 index 00000000..2e4af2ad --- /dev/null +++ b/rust/semantic/src/tools.rs @@ -0,0 +1,278 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::path::PathBuf; + +use crate::configuration::Compilation; +use crate::execution::Execution; +use crate::tools::build::Build; +use crate::tools::configured::Configured; +use crate::tools::RecognitionResult::{NotRecognized, Recognized}; +use crate::tools::unix::Unix; +use crate::tools::wrapper::Wrapper; + +mod configured; +mod wrapper; +mod matchers; +mod unix; +mod build; +mod gcc; + +/// This abstraction is representing a tool which is known by us. +pub(crate) trait Tool: Send { + /// A tool has a potential to recognize a command execution and identify + /// the semantic of that command. + fn recognize(&self, _: &Execution) -> RecognitionResult; +} + +#[derive(Debug, PartialEq)] +pub(crate) enum RecognitionResult { + Recognized(Result), + NotRecognized, +} + +/// Represents an executed command semantic. +#[derive(Debug, PartialEq)] +pub(crate) enum Semantic { + UnixCommand, + BuildCommand, + Compiler { + compiler: PathBuf, + working_dir: PathBuf, + passes: Vec, + }, +} + +/// Represents a compiler call. +#[derive(Debug, PartialEq)] +pub(crate) enum CompilerPass { + Preprocess, + Compile { + source: PathBuf, + output: Option, + flags: Vec, + }, +} + + +struct Any { + tools: Vec>, +} + +impl Any { + fn new(tools: Vec>) -> Box { + Box::new(Any { tools }) + } +} + +impl Tool for Any { + /// Any of the tool recognize the semantic, will be returned as result. + fn recognize(&self, x: &Execution) -> RecognitionResult { + for tool in &self.tools { + match tool.recognize(x) { + Recognized(result) => + return Recognized(result), + _ => continue, + } + } + NotRecognized + } +} + + +struct ExcludeOr { + excludes: Vec, + or: Box, +} + +impl ExcludeOr { + fn new(excludes: &[PathBuf], tools: Vec>) -> Box { + Box::new( + ExcludeOr { + // exclude the executables are explicitly mentioned in the config file. + excludes: Vec::from(excludes), + or: Any::new(tools), + } + ) + } +} + +impl Tool for ExcludeOr { + /// Check if the executable is on the exclude list, return as not recognized. + /// Otherwise delegate the recognition to the tool given. + fn recognize(&self, x: &Execution) -> RecognitionResult { + for exclude in &self.excludes { + if &x.executable == exclude { + return NotRecognized; + } + } + self.or.recognize(x) + } +} + +impl From<&Compilation> for Box { + fn from(value: &Compilation) -> Self { + // Build the list of known compilers we will recognize by default. + let mut tools = vec![ + Unix::new(), + Build::new(), + Wrapper::new(), + ]; + + // The hinted tools should be the first to recognize. + if !value.compilers_to_recognize.is_empty() { + let configured = Configured::from(&value.compilers_to_recognize); + tools.insert(0, configured) + } + // Excluded compiler check should be done before anything. + if value.compilers_to_exclude.is_empty() { + Any::new(tools) + } else { + ExcludeOr::new(&value.compilers_to_exclude, tools) + } + } +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + use std::path::PathBuf; + + use crate::vec_of_pathbuf; + + use super::*; + + #[test] + fn test_any_when_no_match() { + let sut = Any { + tools: vec![ + Box::new(MockTool::NotRecognize), + Box::new(MockTool::NotRecognize), + Box::new(MockTool::NotRecognize), + ] + }; + + let input = any_execution(); + + match sut.recognize(&input) { + NotRecognized => assert!(true), + _ => assert!(false), + } + } + + #[test] + fn test_any_when_match() { + let sut = Any { + tools: vec![ + Box::new(MockTool::NotRecognize), + Box::new(MockTool::Recognize), + Box::new(MockTool::NotRecognize), + ] + }; + + let input = any_execution(); + + match sut.recognize(&input) { + Recognized(Ok(_)) => assert!(true), + _ => assert!(false) + } + } + + #[test] + fn test_any_when_match_fails() { + let sut = Any { + tools: vec![ + Box::new(MockTool::NotRecognize), + Box::new(MockTool::RecognizeFailed), + Box::new(MockTool::Recognize), + Box::new(MockTool::NotRecognize), + ] + }; + + let input = any_execution(); + + match sut.recognize(&input) { + Recognized(Err(_)) => assert!(true), + _ => assert!(false), + } + } + + #[test] + fn test_exclude_when_match() { + let sut = ExcludeOr { + excludes: vec_of_pathbuf!["/usr/bin/something"], + or: Box::new(MockTool::Recognize), + }; + + let input = Execution { + executable: PathBuf::from("/usr/bin/something"), + arguments: vec![], + working_dir: PathBuf::new(), + environment: HashMap::new(), + }; + + match sut.recognize(&input) { + NotRecognized => assert!(true), + _ => assert!(false) + } + } + + #[test] + fn test_exclude_when_no_match() { + let sut = ExcludeOr { + excludes: vec_of_pathbuf!["/usr/bin/something"], + or: Box::new(MockTool::Recognize), + }; + + let input = any_execution(); + + match sut.recognize(&input) { + Recognized(Ok(_)) => assert!(true), + _ => assert!(false) + } + } + + enum MockTool { + Recognize, + RecognizeFailed, + NotRecognize, + } + + impl Tool for MockTool { + fn recognize(&self, _: &Execution) -> RecognitionResult { + match self { + MockTool::Recognize => + Recognized(Ok(Semantic::UnixCommand)), + MockTool::RecognizeFailed => + Recognized(Err(String::from("problem"))), + MockTool::NotRecognize => + NotRecognized, + } + } + } + + fn any_execution() -> Execution { + Execution { + executable: PathBuf::new(), + arguments: vec![], + working_dir: PathBuf::new(), + environment: HashMap::new(), + } + } +} diff --git a/rust/semantic/src/tools/build.rs b/rust/semantic/src/tools/build.rs new file mode 100644 index 00000000..36972f0b --- /dev/null +++ b/rust/semantic/src/tools/build.rs @@ -0,0 +1,87 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashSet; +use std::path::Path; + +use lazy_static::lazy_static; + +use crate::execution::Execution; +use crate::tools::{RecognitionResult, Semantic, Tool}; + +pub(crate) struct Build {} + +impl Build { + pub(crate) fn new() -> Box { + Box::new(Build {}) + } +} + +impl Tool for Build { + fn recognize(&self, execution: &Execution) -> RecognitionResult { + let executable = execution.executable.as_path(); + if BUILD_TOOLS.contains(executable) { + RecognitionResult::Recognized(Ok(Semantic::BuildCommand)) + } else { + RecognitionResult::NotRecognized + } + } +} + +lazy_static! { + static ref BUILD_TOOLS: HashSet<&'static Path> = { + let files_paths = [ + "/usr/bin/make", + "/usr/bin/gmake", + ] + .iter() + .map(Path::new); + + HashSet::from_iter(files_paths) + }; +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + use std::path::PathBuf; + + use crate::vec_of_strings; + + use super::*; + + #[test] + fn test_make_is_recognized() { + let input = Execution { + executable: PathBuf::from("/usr/bin/make"), + arguments: vec_of_strings!["make", "-C", "/home/user/build"], + working_dir: PathBuf::from("/home/user"), + environment: HashMap::new(), + }; + + assert_eq!( + RecognitionResult::Recognized(Ok(Semantic::BuildCommand)), + SUT.recognize(&input) + ) + } + + lazy_static! { + static ref SUT: Build = Build {}; + } +} \ No newline at end of file diff --git a/rust/semantic/src/tools/configured.rs b/rust/semantic/src/tools/configured.rs new file mode 100644 index 00000000..269fb519 --- /dev/null +++ b/rust/semantic/src/tools/configured.rs @@ -0,0 +1,166 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::path::PathBuf; +use std::vec; + +use crate::configuration::CompilerToRecognize; +use crate::execution::Execution; +use crate::tools::{CompilerPass, Semantic}; +use crate::tools::{Any, RecognitionResult, Tool}; +use crate::tools::matchers::source::looks_like_a_source_file; +use crate::tools::RecognitionResult::{NotRecognized, Recognized}; + +pub(crate) struct Configured { + pub executable: PathBuf, + pub flags_to_add: Vec, + pub flags_to_remove: Vec, +} + +impl Configured { + pub(crate) fn new(config: &CompilerToRecognize) -> Box { + Box::new( + Configured { + executable: config.executable.clone(), + flags_to_add: config.flags_to_add.clone(), + flags_to_remove: config.flags_to_remove.clone(), + } + ) + } + + pub(crate) fn from(configs: &[CompilerToRecognize]) -> Box { + Any::new(configs.iter().map(Configured::new).collect()) + } +} + +impl Tool for Configured { + /// Any of the tool recognize the semantic, will be returned as result. + fn recognize(&self, x: &Execution) -> RecognitionResult { + if x.executable == self.executable { + let mut flags = vec![]; + let mut sources = vec![]; + + // find sources and filter out requested flags. + for argument in x.arguments.iter().skip(1) { + if self.flags_to_remove.contains(argument) { + continue; + } else if looks_like_a_source_file(argument.as_str()) { + sources.push(PathBuf::from(argument)); + } else { + flags.push(argument.clone()); + } + } + // extend flags with requested flags. + for flag in &self.flags_to_add { + flags.push(flag.clone()); + } + + if sources.is_empty() { + Recognized(Err(String::from("source file is not found"))) + } else { + Recognized( + Ok( + Semantic::Compiler { + compiler: x.executable.clone(), + working_dir: x.working_dir.clone(), + passes: sources.iter() + .map(|source| { + CompilerPass::Compile { + source: source.clone(), + output: None, + flags: flags.clone(), + } + }) + .collect(), + } + ) + ) + } + } else { + NotRecognized + } + } +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + + use lazy_static::lazy_static; + + use crate::vec_of_strings; + + use super::*; + + #[test] + fn test_matching() { + let input = Execution { + executable: PathBuf::from("/usr/bin/something"), + arguments: vec_of_strings!["something", "-Dthis=that", "-I.", "source.c", "-o", "source.c.o"], + working_dir: PathBuf::from("/home/user"), + environment: HashMap::new(), + }; + + let expected = Semantic::Compiler { + compiler: PathBuf::from("/usr/bin/something"), + working_dir: PathBuf::from("/home/user"), + passes: vec![ + CompilerPass::Compile { + flags: vec_of_strings!["-Dthis=that", "-o", "source.c.o", "-Wall"], + source: PathBuf::from("source.c"), + output: None, + } + ], + }; + + assert_eq!(Recognized(Ok(expected)), SUT.recognize(&input)); + } + + #[test] + fn test_matching_without_sources() { + let input = Execution { + executable: PathBuf::from("/usr/bin/something"), + arguments: vec_of_strings!["something", "--help"], + working_dir: PathBuf::from("/home/user"), + environment: HashMap::new(), + }; + + assert_eq!(Recognized(Err(String::from("source file is not found"))), SUT.recognize(&input)); + } + + #[test] + fn test_not_matching() { + let input = Execution { + executable: PathBuf::from("/usr/bin/cc"), + arguments: vec_of_strings!["cc", "-Dthis=that", "-I.", "source.c", "-o", "source.c.o"], + working_dir: PathBuf::from("/home/user"), + environment: HashMap::new(), + }; + + assert_eq!(NotRecognized, SUT.recognize(&input)); + } + + lazy_static! { + static ref SUT: Configured = Configured { + executable: PathBuf::from("/usr/bin/something"), + flags_to_remove: vec_of_strings!["-I."], + flags_to_add: vec_of_strings!["-Wall"], + }; + } +} diff --git a/rust/semantic/src/tools/gcc.rs b/rust/semantic/src/tools/gcc.rs new file mode 100644 index 00000000..0c0e9dd2 --- /dev/null +++ b/rust/semantic/src/tools/gcc.rs @@ -0,0 +1,219 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use nom::branch::alt; +use nom::multi::many1; +use nom::sequence::preceded; + +use crate::execution::Execution; +use crate::tools::{RecognitionResult, Semantic, Tool}; +use crate::tools::gcc::internal::Argument; + +pub(crate) struct Gcc {} + +impl Gcc { + pub(crate) fn new() -> Box { + Box::new(Gcc {}) + } +} + +impl Tool for Gcc { + fn recognize(&self, execution: &Execution) -> RecognitionResult { + let mut parser = preceded( + internal::compiler, + many1(alt((internal::flag, internal::source))), + ); + + match parser(execution.arguments.as_slice()) { + Ok(result) => { + // todo: append flags from environment + let flags = result.1; + let passes = Argument::passes(flags.as_slice()); + + RecognitionResult::Recognized( + Ok( + Semantic::Compiler { + compiler: execution.executable.clone(), + working_dir: execution.working_dir.clone(), + passes, + } + ) + ) + } + Err(error) => { + log::debug!("Gcc failed to parse it: {error}."); + RecognitionResult::NotRecognized + } + } + } +} + +mod internal { + use std::path::PathBuf; + use lazy_static::lazy_static; + use nom::{error::ErrorKind, IResult}; + use regex::Regex; + + use crate::tools::CompilerPass; + use crate::tools::matchers::source::looks_like_a_source_file; + + #[derive(Debug, PartialEq)] + enum Language { + C, + Cpp, + ObjectiveC, + ObjectiveCpp, + Ada, + Fortran, + Go, + D, + Assembler, + Other, + } + + #[derive(Debug, PartialEq)] + enum Pass { + Preprocessor, + Compiler, + Linker, + } + + #[derive(Debug, PartialEq)] + enum Meaning { + Compiler, + ControlKindOfOutput { stop_before: Option }, + ControlLanguage(Language), + ControlPass(Pass), + Diagnostic, + Debug, + Optimize, + Instrumentation, + DirectorySearch(Option), + Developer, + Input(Pass), + Output, + } + + /// Compiler flags are varies the number of arguments, but means one thing. + pub(crate) struct Argument<'a> { + arguments: &'a [String], + meaning: Meaning, + } + + impl<'a> Argument<'a> { + pub(crate) fn passes(flags: &[Argument]) -> Vec { + let mut pass: Pass = Pass::Linker; + let mut inputs: Vec = vec![]; + let mut output: Option = None; + let mut args: Vec = vec![]; + + for flag in flags { + match flag.meaning { + Meaning::ControlKindOfOutput { stop_before: Some(Pass::Compiler) } => { + pass = Pass::Preprocessor; + args.extend(flag.arguments.into_iter().map(String::to_owned)); + } + Meaning::ControlKindOfOutput { stop_before: Some(Pass::Linker) } => { + pass = Pass::Compiler; + args.extend(flag.arguments.into_iter().map(String::to_owned)); + } + Meaning::ControlKindOfOutput { .. } | + Meaning::ControlLanguage(_) | + Meaning::ControlPass(Pass::Preprocessor) | + Meaning::ControlPass(Pass::Compiler) | + Meaning::Diagnostic | + Meaning::Debug | + Meaning::Optimize | + Meaning::Instrumentation | + Meaning::DirectorySearch(None) => { + args.extend(flag.arguments.into_iter().map(String::to_owned)); + } + Meaning::Input(_) => { + assert_eq!(flag.arguments.len(), 1); + inputs.push(flag.arguments[0].clone()) + } + Meaning::Output => { + assert_eq!(flag.arguments.len(), 1); + output = Some(flag.arguments[0].clone()) + } + _ => {} + } + } + + match pass { + Pass::Preprocessor if inputs.is_empty() => { + vec![] + } + Pass::Preprocessor => { + vec![CompilerPass::Preprocess] + } + Pass::Compiler | + Pass::Linker => { + inputs.into_iter() + .map(|source| { + CompilerPass::Compile { + source: PathBuf::from(source), + output: output.as_ref().map(PathBuf::from), + flags: args.clone(), + } + }) + .collect() + } + } + } + } + + pub(crate) fn compiler(i: &[String]) -> IResult<&[String], Argument> { + let candidate = &i[0]; + if COMPILER_REGEX.is_match(candidate) { + const MEANING: Meaning = Meaning::Compiler; + Ok((&i[1..], Argument { arguments: &i[..0], meaning: MEANING })) + } else { + // Declare it as a non-recoverable error, so argument processing will stop after this. + Err(nom::Err::Failure(nom::error::Error::new(i, ErrorKind::Tag))) + } + } + + pub(crate) fn source(i: &[String]) -> IResult<&[String], Argument> { + let candidate = &i[0]; + if looks_like_a_source_file(candidate.as_str()) { + const MEANING: Meaning = Meaning::Input(Pass::Preprocessor); + Ok((&i[1..], Argument { arguments: &i[..0], meaning: MEANING })) + } else { + Err(nom::Err::Error(nom::error::Error::new(i, ErrorKind::Tag))) + } + } + + pub(crate) fn flag(i: &[String]) -> IResult<&[String], Argument> { + todo!() + } + + lazy_static! { + // - cc + // - c++ + // - cxx + // - CC + // - mcc, gcc, m++, g++, gfortran, fortran + // - with prefixes like: arm-none-eabi- + // - with postfixes like: -7.0 or 6.4.0 + static ref COMPILER_REGEX: Regex = Regex::new( + r"(^(cc|c\+\+|cxx|CC|(([^-]*-)*([mg](cc|\+\+)|[g]?fortran)(-?\d+(\.\d+){0,2})?))$)" + ).unwrap(); + } +} diff --git a/rust/semantic/src/tools/matchers.rs b/rust/semantic/src/tools/matchers.rs new file mode 100644 index 00000000..28ab5731 --- /dev/null +++ b/rust/semantic/src/tools/matchers.rs @@ -0,0 +1,20 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +pub(crate) mod source; \ No newline at end of file diff --git a/rust/semantic/src/tools/matchers/source.rs b/rust/semantic/src/tools/matchers/source.rs new file mode 100644 index 00000000..4e6dd684 --- /dev/null +++ b/rust/semantic/src/tools/matchers/source.rs @@ -0,0 +1,101 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashSet; +use lazy_static::lazy_static; + +#[cfg(target_family = "unix")] +pub fn looks_like_a_source_file(argument: &str) -> bool { + // not a command line flag + if argument.starts_with('-') { + return false; + } + if let Some((_, extension)) = argument.rsplit_once('.') { + return EXTENSIONS.contains(extension); + } + false +} + +#[cfg(target_family = "windows")] +pub fn looks_like_a_source_file(argument: &str) -> bool { + // not a command line flag + if argument.starts_with('/') { + return false; + } + if let Some((_, extension)) = argument.rsplit_once('.') { + return EXTENSIONS.contains(extension); + } + false +} + +lazy_static! { + static ref EXTENSIONS: HashSet<&'static str> = { + HashSet::from([ + // header files + "h", "hh", "H", "hp", "hxx", "hpp", "HPP", "h++", "tcc", + // C + "c", "C", + // C++ + "cc", "CC", "c++", "C++", "cxx", "cpp", "cp", + // CUDA + "cu", + // ObjectiveC + "m", "mi", "mm", "M", "mii", + // Preprocessed + "i", "ii", + // Assembly + "s", "S", "sx", "asm", + // Fortran + "f", "for", "ftn", + "F", "FOR", "fpp", "FPP", "FTN", + "f90", "f95", "f03", "f08", + "F90", "F95", "F03", "F08", + // go + "go", + // brig + "brig", + // D + "d", "di", "dd", + // Ada + "ads", "abd" + ]) + }; +} + +#[cfg(test)] +mod test { + use crate::tools::matchers::source::looks_like_a_source_file; + + #[test] + fn test_filenames() { + assert!(looks_like_a_source_file("source.c")); + assert!(looks_like_a_source_file("source.cpp")); + assert!(looks_like_a_source_file("source.cxx")); + assert!(looks_like_a_source_file("source.cc")); + + assert!(looks_like_a_source_file("source.h")); + assert!(looks_like_a_source_file("source.hpp")); + + assert!(!looks_like_a_source_file("gcc")); + assert!(!looks_like_a_source_file("clang")); + assert!(!looks_like_a_source_file("-o")); + assert!(!looks_like_a_source_file("-Wall")); + assert!(!looks_like_a_source_file("/o")); + } +} \ No newline at end of file diff --git a/rust/semantic/src/tools/unix.rs b/rust/semantic/src/tools/unix.rs new file mode 100644 index 00000000..443f863e --- /dev/null +++ b/rust/semantic/src/tools/unix.rs @@ -0,0 +1,189 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::collections::HashSet; +use std::path::Path; + +use lazy_static::lazy_static; + +use crate::execution::Execution; +use crate::tools::{RecognitionResult, Semantic, Tool}; + +pub(crate) struct Unix {} + +impl Unix { + pub(crate) fn new() -> Box { + Box::new(Unix {}) + } +} + +impl Tool for Unix { + fn recognize(&self, execution: &Execution) -> RecognitionResult { + let executable = execution.executable.as_path(); + if COREUTILS_FILES.contains(executable) { + RecognitionResult::Recognized(Ok(Semantic::UnixCommand)) + } else { + RecognitionResult::NotRecognized + } + } +} + +lazy_static! { + static ref COREUTILS_FILES: HashSet<&'static Path> = { + let files_paths = [ + "/usr/bin/[", + "/usr/bin/arch", + "/usr/bin/b2sum", + "/usr/bin/base32", + "/usr/bin/base64", + "/usr/bin/basename", + "/usr/bin/basenc", + "/usr/bin/cat", + "/usr/bin/chcon", + "/usr/bin/chgrp", + "/usr/bin/chmod", + "/usr/bin/chown", + "/usr/bin/cksum", + "/usr/bin/comm", + "/usr/bin/cp", + "/usr/bin/csplit", + "/usr/bin/cut", + "/usr/bin/date", + "/usr/bin/dd", + "/usr/bin/df", + "/usr/bin/dir", + "/usr/bin/dircolors", + "/usr/bin/dirname", + "/usr/bin/du", + "/usr/bin/echo", + "/usr/bin/env", + "/usr/bin/expand", + "/usr/bin/expr", + "/usr/bin/factor", + "/usr/bin/false", + "/usr/bin/fmt", + "/usr/bin/fold", + "/usr/bin/groups", + "/usr/bin/head", + "/usr/bin/hostid", + "/usr/bin/id", + "/usr/bin/install", + "/usr/bin/join", + "/usr/bin/link", + "/usr/bin/ln", + "/usr/bin/logname", + "/usr/bin/ls", + "/usr/bin/md5sum", + "/usr/bin/mkdir", + "/usr/bin/mkfifo", + "/usr/bin/mknod", + "/usr/bin/mktemp", + "/usr/bin/mv", + "/usr/bin/nice", + "/usr/bin/nl", + "/usr/bin/nohup", + "/usr/bin/nproc", + "/usr/bin/numfmt", + "/usr/bin/od", + "/usr/bin/paste", + "/usr/bin/pathchk", + "/usr/bin/pinky", + "/usr/bin/pr", + "/usr/bin/printenv", + "/usr/bin/printf", + "/usr/bin/ptx", + "/usr/bin/pwd", + "/usr/bin/readlink", + "/usr/bin/realpath", + "/usr/bin/rm", + "/usr/bin/rmdir", + "/usr/bin/runcon", + "/usr/bin/seq", + "/usr/bin/sha1sum", + "/usr/bin/sha224sum", + "/usr/bin/sha256sum", + "/usr/bin/sha384sum", + "/usr/bin/sha512sum", + "/usr/bin/shred", + "/usr/bin/shuf", + "/usr/bin/sleep", + "/usr/bin/sort", + "/usr/bin/split", + "/usr/bin/stat", + "/usr/bin/stdbuf", + "/usr/bin/stty", + "/usr/bin/sum", + "/usr/bin/sync", + "/usr/bin/tac", + "/usr/bin/tail", + "/usr/bin/tee", + "/usr/bin/test", + "/usr/bin/timeout", + "/usr/bin/touch", + "/usr/bin/tr", + "/usr/bin/true", + "/usr/bin/truncate", + "/usr/bin/tsort", + "/usr/bin/tty", + "/usr/bin/uname", + "/usr/bin/unexpand", + "/usr/bin/uniq", + "/usr/bin/unlink", + "/usr/bin/users", + "/usr/bin/vdir", + "/usr/bin/wc", + "/usr/bin/who", + "/usr/bin/whoami", + "/usr/bin/yes", + ] + .iter() + .map(Path::new); + + HashSet::from_iter(files_paths) + }; +} + +#[cfg(test)] +mod test { + use std::collections::HashMap; + use std::path::PathBuf; + + use crate::vec_of_strings; + + use super::*; + + #[test] + fn test_unix_tools_are_recognized() { + let input = Execution { + executable: PathBuf::from("/usr/bin/ls"), + arguments: vec_of_strings!["ls", "/home/user/build"], + working_dir: PathBuf::from("/home/user"), + environment: HashMap::new(), + }; + + assert_eq!( + RecognitionResult::Recognized(Ok(Semantic::UnixCommand)), + SUT.recognize(&input) + ) + } + + lazy_static! { + static ref SUT: Unix = Unix {}; + } +} \ No newline at end of file diff --git a/rust/semantic/src/tools/wrapper.rs b/rust/semantic/src/tools/wrapper.rs new file mode 100644 index 00000000..7c23739d --- /dev/null +++ b/rust/semantic/src/tools/wrapper.rs @@ -0,0 +1,76 @@ +/* Copyright (C) 2012-2024 by László Nagy + This file is part of Bear. + + Bear is a tool to generate compilation database for clang tooling. + + Bear is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bear is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +use std::path::PathBuf; + +use crate::execution::Execution; +use crate::tools::{CompilerPass, RecognitionResult, Semantic, Tool}; +use crate::tools::matchers::source::looks_like_a_source_file; +use crate::tools::RecognitionResult::{NotRecognized, Recognized}; + +pub(crate) struct Wrapper {} + +impl Wrapper { + pub(crate) fn new() -> Box { + Box::new(Wrapper {}) + } +} + +impl Tool for Wrapper { + // fixme: this is just a quick and dirty implementation. + fn recognize(&self, x: &Execution) -> RecognitionResult { + if x.executable == PathBuf::from("/usr/bin/g++") { + let mut flags = vec![]; + let mut sources = vec![]; + + // find sources and filter out requested flags. + for argument in x.arguments.iter().skip(1) { + if looks_like_a_source_file(argument.as_str()) { + sources.push(PathBuf::from(argument)); + } else { + flags.push(argument.clone()); + } + } + + if sources.is_empty() { + Recognized(Err(String::from("source file is not found"))) + } else { + Recognized( + Ok( + Semantic::Compiler { + compiler: x.executable.clone(), + working_dir: x.working_dir.clone(), + passes: sources.iter() + .map(|source| { + CompilerPass::Compile { + source: source.clone(), + output: None, + flags: flags.clone(), + } + }) + .collect(), + } + ) + ) + } + } else { + NotRecognized + } + } +}