From b9031e8f12812927979d5036ced11a1257e72b66 Mon Sep 17 00:00:00 2001 From: Grzegorz Lukasik Date: Wed, 4 Dec 2024 21:55:29 +0100 Subject: [PATCH] fix: convert html to markdown in docs Documentation for builtin starlark methods contains HTML tags, while LSP clients handle markdown much easier. Used htmd crate to convert HTML tags to markdown, and additionally change links from relative to pointing to bazel website. Fixes https://github.com/cameron-martin/bazel-lsp/issues/65 --- BUILD | 2 + Cargo.lock | 163 ++++++++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 + src/bazel.rs | 98 +++++++++++++++++++++++------ src/builtin.rs | 60 +++++++++++++++++- 5 files changed, 302 insertions(+), 23 deletions(-) diff --git a/BUILD b/BUILD index 6010218..5f8a510 100644 --- a/BUILD +++ b/BUILD @@ -16,7 +16,9 @@ rust_binary( "@crates//:prost", "@crates//:either", "@crates//:hex", + "@crates//:htmd", "@crates//:lsp-types", + "@crates//:once_cell", "@crates//:ring", "@crates//:serde_json", "@crates//:starlark", diff --git a/Cargo.lock b/Cargo.lock index 06843b8..9212785 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,8 +262,10 @@ dependencies = [ "clap", "either", "hex", + "htmd", "itertools 0.12.1", "lsp-types", + "once_cell", "prost 0.12.4", "prost-types 0.12.4", "protoc-gen-prost", @@ -637,6 +639,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -770,6 +782,30 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "htmd" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad1642def6e8e4dc182941f35454f7d2af917787f91f3f5133300030b41006d0" +dependencies = [ + "html5ever", + "markup5ever_rcdom", +] + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn 2.0.59", +] + [[package]] name = "http" version = "0.2.12" @@ -1049,12 +1085,44 @@ dependencies = [ "url", ] +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + [[package]] name = "maplit" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf", + "phf_codegen", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "markup5ever_rcdom" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18" +dependencies = [ + "html5ever", + "markup5ever", + "tendril", + "xml5ever", +] + [[package]] name = "matchit" version = "0.7.3" @@ -1175,9 +1243,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "parking_lot" @@ -1224,6 +1292,45 @@ dependencies = [ "indexmap 2.2.6", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator 0.11.2", + "phf_shared 0.11.2", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared 0.11.2", + "rand", +] + [[package]] name = "phf_shared" version = "0.10.0" @@ -1233,6 +1340,15 @@ dependencies = [ "siphasher", ] +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + [[package]] name = "pin-project" version = "1.1.5" @@ -1867,8 +1983,21 @@ dependencies = [ "new_debug_unreachable", "once_cell", "parking_lot", - "phf_shared", + "phf_shared 0.10.0", "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", + "proc-macro2", + "quote", ] [[package]] @@ -1923,6 +2052,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "term" version = "0.7.0" @@ -2203,6 +2343,12 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8parse" version = "0.2.1" @@ -2403,6 +2549,17 @@ version = "0.52.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" +[[package]] +name = "xml5ever" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69" +dependencies = [ + "log", + "mac", + "markup5ever", +] + [[package]] name = "zerocopy" version = "0.7.32" diff --git a/Cargo.toml b/Cargo.toml index 7629f81..8c69bfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,8 +11,10 @@ cc = "1.1.13" clap = { version = "4.4.18", features = ["derive"] } either = "1.9.0" hex = "0.4.3" +htmd = "0.1.6" itertools = "0.12.0" lsp-types = "0.94.1" +once_cell = "1.20.2" prost = "0.12.3" prost-types = "0.12.3" protoc-gen-prost = "0.2.3" diff --git a/src/bazel.rs b/src/bazel.rs index 4b6f913..2268e90 100644 --- a/src/bazel.rs +++ b/src/bazel.rs @@ -1103,28 +1103,85 @@ mod tests { Ok(()) } - #[test] - fn test_environment_function_arguments() -> anyhow::Result<()> { - let fixture = TestFixture::new("simple")?; - let context = fixture.context()?; + fn get_function_doc(file_path: &str, function_name: &str) -> DocFunction { + let fixture = TestFixture::new("simple").unwrap(); + let context = fixture.context().unwrap(); - let module = context.get_environment(&LspUrl::File(PathBuf::from("/foo/bar/BUILD"))); + let module = context.get_environment(&LspUrl::File(PathBuf::from(file_path))); - fn doc_function<'a>(module: &'a DocModule, func_name: &str) -> &'a DocFunction { - let (_, glob_member) = module - .members - .iter() - .find(|(member, _)| *member == func_name) - .unwrap(); + let (_, function) = module + .members + .iter() + .find(|(member, _)| *member == function_name) + .unwrap(); - match glob_member { - DocItem::Member(DocMember::Function(f)) => f, - _ => panic!(), - } + match function { + DocItem::Member(DocMember::Function(f)) => f.clone(), + _ => panic!(), } + } + + #[test] + fn test_function_doc_code_block() -> anyhow::Result<()> { + let doc = get_function_doc("/foo/bar/sample.bzl", "hasattr"); assert_eq!( - doc_function(&module, "glob").params.pos_or_named, + doc.docs.clone().unwrap().summary, + "Returns True if the object `x` has an attribute or method of the given `name`, otherwise False. Example: \n```python\nhasattr(ctx.attr, \"myattr\")\n```" + ); + + Ok(()) + } + + #[test] + fn test_function_doc_links() -> anyhow::Result<()> { + // select doc contains both page relative links (#) and absolute links (/). + let select_doc = get_function_doc("sample.bzl", "select"); + assert_eq!( + select_doc.docs.clone().unwrap().summary, + "`select()` is the helper function that makes a rule attribute configurable. See [build encyclopedia](https://bazel.build/reference/be/functions#select) for details." + ); + + assert_eq!( + select_doc.params.pos_or_named, + vec![ + DocParam { + name: "x".into(), + default_value: None, + docs: Some(DocString { + summary: "A dict that maps configuration conditions to values. Each key is a [Label](https://bazel.build/rules/lib/builtins/Label.html) or a label string that identifies a config\\_setting or constraint\\_value instance. See the [documentation on macros](https://bazel.build/rules/macros#label-resolution) for when to use a Label instead of a string.".into(), + details: None, + }), + typ: Ty::any(), + }, + DocParam { + name: "no_match_error".into(), + default_value: Some("''".into()), + docs: Some(DocString { + summary: "Optional custom error to report if no condition matches.".into(), + details: None, + }), + typ: Ty::any(), + }, + ] + ); + + // aspect contains absolute link. + let aspect_doc = get_function_doc("sample.bzl", "aspect"); + assert_eq!( + aspect_doc.docs.clone().unwrap().summary, + "Creates a new aspect. The result of this function must be stored in a global value. Please see the [introduction to Aspects](https://bazel.build/rules/aspects) for more details." + ); + + Ok(()) + } + + #[test] + fn test_function_doc_params() -> anyhow::Result<()> { + let glob_doc = get_function_doc("/foo/bar/BUILD", "glob"); + + assert_eq!( + glob_doc.params.pos_or_named, vec![ DocParam { name: "include".into(), @@ -1166,8 +1223,13 @@ mod tests { ] ); + Ok(()) + } + + #[test] + fn test_function_doc_args_kwargs() -> anyhow::Result<()> { assert_eq!( - doc_function(&module, "max").params.args, + get_function_doc("BUILD", "max").params.args, Some(DocParam { name: "args".into(), default_value: None, @@ -1180,7 +1242,7 @@ mod tests { ); assert_eq!( - doc_function(&module, "dict").params.kwargs, + get_function_doc("BUILD", "dict").params.kwargs, Some(DocParam { name: "kwargs".into(), default_value: None, diff --git a/src/builtin.rs b/src/builtin.rs index bcb04b3..bf7c8da 100644 --- a/src/builtin.rs +++ b/src/builtin.rs @@ -1,5 +1,7 @@ pub use build_proto::blaze_query::*; pub use builtin_proto::builtin::*; +use htmd::{Element, HtmlToMarkdown}; +use once_cell::sync::Lazy; use starlark::{ docs::{DocFunction, DocMember, DocParam, DocParams, DocProperty, DocString}, typing::Ty, @@ -46,6 +48,45 @@ pub static MISSING_GLOBALS: &'static [&'static str] = &[ "distribs", ]; +static HTML_CONVERTER: Lazy = Lazy::new(|| { + HtmlToMarkdown::builder() + .add_handler(vec!["pre"], |element: Element| { + for attr in element.attrs { + if &attr.name.local == "class" && attr.value.to_string() == "language-python" { + return Some(format!("\n```python\n{}\n```\n", element.content)); + } + } + Some(element.content.to_string()) + }) + .add_handler(vec!["a"], |element: Element| { + for attr in element.attrs { + if &attr.name.local == "href" { + // For local links, just remove link altogether. + if attr.value.starts_with("#") { + return Some(element.content.to_string()); + } + + // For relative links, guess the page it points to. + let link = if attr.value.starts_with("/") { + format!("https://bazel.build{}", attr.value.to_string()) + } else if attr.value.starts_with("../") { + format!( + "https://bazel.build/rules/lib/{}", + attr.value.strip_prefix("../").unwrap() + ) + } else { + // For absolute links, just use the link. + attr.value.to_string() + }; + + return Some(format!("[{}]({})", element.content.to_string(), link)); + } + } + Some(element.content.to_string()) + }) + .build() +}); + pub fn build_language_to_doc_members<'a>( build_language: &'a BuildLanguage, ) -> impl Iterator + 'a { @@ -98,7 +139,7 @@ pub fn builtins_to_doc_members<'a>( } fn value_to_doc_member(value: &Value) -> DocMember { - let docs = create_docstring(&value.doc); + let docs = create_docstring_for_possible_html(&value.doc); if let Some(callable) = &value.callable { let mut params = DocParams { @@ -117,7 +158,7 @@ fn value_to_doc_member(value: &Value) -> DocMember { let doc_param = DocParam { name, - docs: create_docstring(¶m.doc), + docs: create_docstring_for_possible_html(¶m.doc), typ: Ty::any(), default_value: if param.is_mandatory { None @@ -163,3 +204,18 @@ fn create_docstring(summary: &str) -> Option { }) } } + +fn create_docstring_for_possible_html(html: &str) -> Option { + // Some documentation is using markdown, use simple heuristic to check whether need to convert + // from html to markdown. + let markdown = if str::contains(html, "<") { + match HTML_CONVERTER.convert(html).ok() { + Some(markdown) => markdown, + None => html.to_string(), + } + } else { + html.to_string() + }; + + create_docstring(&markdown) +}