Skip to content

Commit

Permalink
Add ada-url dependency, initial impl of jsg::Url
Browse files Browse the repository at this point in the history
This will serve as the new underlying implementation of the
url-standard.{h|c++} class while also supporting handling of
module import specifiers as URLs.
  • Loading branch information
jasnell committed Oct 5, 2023
1 parent e51f3be commit 3d9e6cb
Show file tree
Hide file tree
Showing 9 changed files with 458 additions and 0 deletions.
1 change: 1 addition & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ build:windows --per_file_copt='external/ssl/src/crypto/poly1305/poly1305_vec\.c@
build:windows --per_file_copt='external/v8/src/objects/literal-objects\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/objects/literal-objects.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include'
build:windows --per_file_copt='external/v8/src/runtime/runtime-object\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/runtime/runtime-object.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include'
build:windows --per_file_copt='external/v8/src/objects/swiss-name-dictionary\.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/v8/src/objects/swiss-name-dictionary.cc@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include'
build:windows --per_file_copt='external/ada-url/ada\.cpp@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include' --host_per_file_copt='external/ada-url/ada\.cpp@-imsvcC:\\Program Files\\LLVM\\lib\\clang\\16.0.6\\include'

# enable clang coverage: https://clang.llvm.org/docs/SourceBasedCodeCoverage.html
build:clang-coverage --copt="-fprofile-instr-generate" --linkopt="-fprofile-instr-generate"
Expand Down
10 changes: 10 additions & 0 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,16 @@ http_archive(
urls = ["https://github.com/google/brotli/tarball/ec107cf015139c791f79afac0f96c3a2c45e157f"],
)

http_archive(
name = "ada-url",
build_file = "//:build/BUILD.ada-url",
sha256 = "850f5dbe0aa606a1c2f0aaa7feec3c5da6b1e09fb5e5dab9b5554469c7795ef4",
type = "zip",
url = "https://github.com/ada-url/ada/releases/download/v2.6.10/singleheader.zip",
patches = [],
patch_args = ["-p1"],
)

# ========================================================================================
# Dawn
#
Expand Down
12 changes: 12 additions & 0 deletions build/BUILD.ada-url
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@

cc_library(
name = "ada-url",
hdrs = ["ada.h", "ada_c.h"],
srcs = ["ada.cpp"],
visibility = ["//visibility:public"],
include_prefix = ".",
copts = ["-w"],
defines = [
"ADA_SSE2=1"
]
)
2 changes: 2 additions & 0 deletions compile_flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
-nostdinc
-Ibazel-bin/external/com_googlesource_chromium_base_trace_event_common/_virtual_includes/trace_event_common
-Ibazel-bin/external/dawn/include
-Ibazel-bin/external/ada-url/_virtual_includes/ada-url/
-Ibazel-bin/external/com_cloudflare_lol_html/_virtual_includes/lolhtml
-Iexternal/com_google_benchmark/include/
-Iexternal/dawn/include
-Iexternal/ada-url/
-Isrc
-isystem/usr/include
-isystem/usr/include/x86_64-linux-gnu
Expand Down
12 changes: 12 additions & 0 deletions src/workerd/jsg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ wd_cc_library(
":exception",
":modules_capnp",
":observer",
":url",
"//src/workerd/util",
"//src/workerd/util:sentry",
"//src/workerd/util:thread-scopes",
Expand All @@ -34,6 +35,17 @@ wd_cc_library(
],
)

wd_cc_library(
name = "url",
srcs = ["url.c++"],
hdrs = ["url.h"],
visibility = ["//visibility:public"],
deps = [
"@capnp-cpp//src/kj",
"@ada-url",
],
)

wd_cc_library(
name = "exception",
srcs = ["exception.c++"],
Expand Down
1 change: 1 addition & 0 deletions src/workerd/jsg/jsg.h
Original file line number Diff line number Diff line change
Expand Up @@ -2333,3 +2333,4 @@ inline v8::Local<v8::Context> JsContext<T>::getHandle(Lock& js) {
#include "function.h"
#include "iterator.h"
#include "jsvalue.h"
#include "url.h"
62 changes: 62 additions & 0 deletions src/workerd/jsg/url-test.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) 2017-2022 Cloudflare, Inc.
// Licensed under the Apache 2.0 license found in the LICENSE file or at:
// https://opensource.org/licenses/Apache-2.0

#include "jsg-test.h"
#include "url.h"
#include <kj/table.h>

namespace workerd::jsg::test {
namespace {

KJ_TEST("Basics") {
Url theUrl = nullptr;
KJ_IF_SOME(url, Url::tryParse("http://example.org:81"_kj)) {
KJ_ASSERT(url.getOrigin() == "http://example.org:81"_kj);
KJ_ASSERT(url.getHref() == "http://example.org:81/"_kj);
KJ_ASSERT(url.getProtocol() == "http:"_kj);
KJ_ASSERT(url.getHostname() == "example.org"_kj);
KJ_ASSERT(url.getHost() == "example.org:81"_kj);
KJ_ASSERT(url.getPort() == "81"_kj);
KJ_ASSERT(url.getPathname() == "/"_kj);
KJ_ASSERT(url.getSchemeType() == Url::SchemeType::HTTP);
KJ_ASSERT(url.getHostType() == Url::HostType::DEFAULT);
KJ_ASSERT(url.getUsername() == ""_kj);
KJ_ASSERT(url.getPassword() == ""_kj);
KJ_ASSERT(url.getHash() == ""_kj);
KJ_ASSERT(url.getSearch() == ""_kj);

theUrl = url.clone();
KJ_ASSERT(theUrl == url);
theUrl = kj::mv(url);

auto res = KJ_ASSERT_NONNULL(theUrl.resolve("abc"_kj));
KJ_ASSERT(res.getHref() == "http://example.org:81/abc"_kj);

// jsg::Urls support KJ_STRINGIFY
KJ_ASSERT(kj::str(res) == "http://example.org:81/abc");

// jsg::Urls are suitable to be used as keys in a hashset, hashmap
kj::HashSet<Url> urls;
urls.insert(res.clone());
KJ_ASSERT(urls.contains(res));

kj::HashMap<Url, int> urlmap;
urlmap.insert(res.clone(), 1);
KJ_ASSERT(KJ_ASSERT_NONNULL(urlmap.find(res)) == 1);
} else {
KJ_FAIL_ASSERT("url could not be parsed");
}

KJ_ASSERT(Url::idnToAscii("täst.de"_kj) == "xn--tst-qla.de"_kj);
KJ_ASSERT(Url::idnToUnicode("xn--tst-qla.de"_kj) == "täst.de"_kj);
}

KJ_TEST("Non-special URL") {
auto url = KJ_ASSERT_NONNULL(Url::tryParse("abc://123"_kj));
KJ_ASSERT(url.getOrigin() == "null"_kj);
KJ_ASSERT(url.getProtocol() == "abc:"_kj);
}

} // namespace
} // namespace workerd::jsg::test
252 changes: 252 additions & 0 deletions src/workerd/jsg/url.c++
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
#include "url.h"
#include <kj/hash.h>

extern "C" {
#include <ada_c.h>
}

#include <kj/debug.h>

namespace workerd::jsg {

namespace {
class AdaUrlDisposer : public kj::Disposer {
public:
static const AdaUrlDisposer INSTANCE;

protected:
void disposeImpl(void* pointer) const override {
ada_free(pointer);
}
};
const AdaUrlDisposer AdaUrlDisposer::INSTANCE;

class AdaOwnedStringDisposer : public kj::ArrayDisposer {
public:
static const AdaOwnedStringDisposer INSTANCE;

protected:
void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount,
size_t capacity, void (*destroyElement)(void*)) const {
ada_owned_string data = {
static_cast<const char*>(firstElement),
elementCount };
ada_free_owned_string(data);
}
};
const AdaOwnedStringDisposer AdaOwnedStringDisposer::INSTANCE;

kj::Own<void> wrap(ada_url url) {
return kj::Own<void>(url, AdaUrlDisposer::INSTANCE);
}

ada_url get(const kj::Own<void>& inner) {
const void* value = inner.get();
KJ_DASSERT(value != nullptr);
return const_cast<ada_url>(value);
}

} // namespace

Url::Url(kj::Own<void> inner) : inner(kj::mv(inner)) {}

Url::Url(Url&& other) : inner(kj::mv(other.inner)) {}

Url& Url::operator=(Url&& other) {
inner = kj::mv(other.inner);
return *this;
}

bool Url::operator==(const Url& other) const {
return getHref() == other.getHref();
}

bool Url::equal(const Url& other, EquivalenceOption option) const {
if (option == EquivalenceOption::DEFAULT) {
return *this == other;
}

// If we are ignoring fragments, we'll compare each component separately:
return other.getProtocol() == getProtocol() &&
other.getHost() == getHost() &&
other.getUsername() == getUsername() &&
other.getPassword() == getPassword() &&
other.getPathname() == getPathname() &&
other.getSearch() == getSearch();
}

bool Url::canParse(kj::ArrayPtr<const char> input,
kj::Maybe<kj::ArrayPtr<const char>> base) {
KJ_IF_SOME(b, base) {
return ada_can_parse_with_base(input.begin(), input.size(),
b.begin(), b.size());
}
return ada_can_parse(input.begin(), input.size());
}

kj::Maybe<Url> Url::tryParse(kj::ArrayPtr<const char> input,
kj::Maybe<kj::ArrayPtr<const char>> base) {
ada_url result = nullptr;
KJ_IF_SOME(b, base) {
result = ada_parse_with_base(input.begin(), input.size(),
b.begin(), b.size());
} else {
result = ada_parse(input.begin(), input.size());
}
if (!ada_is_valid(result)) return kj::none;
return Url(wrap(result));
}

kj::Maybe<Url> Url::resolve(kj::ArrayPtr<const char> input) {
return tryParse(input, getHref());
}

kj::ArrayPtr<const char> Url::getHref() const {
ada_string href = ada_get_href(get(inner));
return kj::ArrayPtr<const char>(href.data, href.length);
}

kj::ArrayPtr<const char> Url::getUsername() const {
ada_string username = ada_get_username(get(inner));
return kj::ArrayPtr<const char>(username.data, username.length);
}

kj::ArrayPtr<const char> Url::getPassword() const {
ada_string password = ada_get_password(get(inner));
return kj::ArrayPtr<const char>(password.data, password.length);
}

kj::ArrayPtr<const char> Url::getPort() const {
ada_string port = ada_get_port(get(inner));
return kj::ArrayPtr<const char>(port.data, port.length);
}

kj::ArrayPtr<const char> Url::getHash() const {
ada_string hash = ada_get_hash(get(inner));
return kj::ArrayPtr<const char>(hash.data, hash.length);
}

kj::ArrayPtr<const char> Url::getHost() const {
ada_string host = ada_get_host(get(inner));
return kj::ArrayPtr<const char>(host.data, host.length);
}

kj::ArrayPtr<const char> Url::getHostname() const {
ada_string hostname = ada_get_hostname(get(inner));
return kj::ArrayPtr<const char>(hostname.data, hostname.length);
}

kj::ArrayPtr<const char> Url::getPathname() const {
ada_string path = ada_get_pathname(get(inner));
return kj::ArrayPtr<const char>(path.data, path.length);
}

kj::ArrayPtr<const char> Url::getSearch() const {
ada_string search = ada_get_search(get(inner));
return kj::ArrayPtr<const char>(search.data, search.length);
}

kj::ArrayPtr<const char> Url::getProtocol() const {
ada_string protocol = ada_get_protocol(get(inner));
return kj::ArrayPtr<const char>(protocol.data, protocol.length);
}

kj::Array<const char> Url::getOrigin() const {
ada_owned_string result = ada_get_origin(get(inner));
return kj::Array<const char>(
const_cast<char*>(result.data),
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

bool Url::setHref(kj::ArrayPtr<const char> value) {
return ada_set_href(get(inner), value.begin(), value.size());
}

bool Url::setHost(kj::ArrayPtr<const char> value) {
return ada_set_host(get(inner), value.begin(), value.size());
}

bool Url::setHostname(kj::ArrayPtr<const char> value) {
return ada_set_hostname(get(inner), value.begin(), value.size());
}

bool Url::setProtocol(kj::ArrayPtr<const char> value) {
return ada_set_protocol(get(inner), value.begin(), value.size());
}

bool Url::setUsername(kj::ArrayPtr<const char> value) {
return ada_set_username(get(inner), value.begin(), value.size());
}

bool Url::setPassword(kj::ArrayPtr<const char> value) {
return ada_set_password(get(inner), value.begin(), value.size());
}

bool Url::setPort(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_port(get(inner), v.begin(), v.size());
}
ada_clear_port(get(inner));
return true;
}

bool Url::setPathname(kj::ArrayPtr<const char> value) {
return ada_set_pathname(get(inner), value.begin(), value.size());
}

void Url::setSearch(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_search(get(inner), v.begin(), v.size());
}
ada_clear_search(get(inner));
}

void Url::setHash(kj::Maybe<kj::ArrayPtr<const char>> value) {
KJ_IF_SOME(v, value) {
return ada_set_hash(get(inner), v.begin(), v.size());
}
ada_clear_hash(get(inner));
}

Url::SchemeType Url::getSchemeType() const {
uint8_t value = ada_get_scheme_type(const_cast<void*>(get(inner)));
KJ_REQUIRE(value <= static_cast<uint8_t>(SchemeType::FILE));
return static_cast<SchemeType>(value);
}

Url::HostType Url::getHostType() const {
uint8_t value = ada_get_host_type(const_cast<void*>(get(inner)));
KJ_REQUIRE(value <= static_cast<uint8_t>(HostType::IPV6));
return static_cast<HostType>(value);
}

Url Url::clone(EquivalenceOption option) {
ada_url copy = ada_copy(get(inner));
if (option == EquivalenceOption::IGNORE_FRAGMENTS) {
ada_clear_hash(copy);
}
return Url(kj::Own<void>(copy, AdaUrlDisposer::INSTANCE));
}

kj::Array<const char> Url::idnToUnicode(kj::ArrayPtr<const char> value) {
ada_owned_string result = ada_idna_to_unicode(value.begin(), value.size());
return kj::Array<const char>(
result.data,
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

kj::Array<const char> Url::idnToAscii(kj::ArrayPtr<const char> value) {
ada_owned_string result = ada_idna_to_ascii(value.begin(), value.size());
return kj::Array<const char>(
result.data,
result.length,
AdaOwnedStringDisposer::INSTANCE);
}

kj::uint Url::hashCode() const {
return kj::hashCode(getHref());
}

} // namespace workerd::jsg
Loading

0 comments on commit 3d9e6cb

Please sign in to comment.