Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address SOSO SHACL Feedback & Cleanup #78

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions deploy/docker/web/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
FROM ghcr.io/dataoneorg/slinky:0.3.0

RUN apt-get update -y

WORKDIR /web
ADD requirements.txt requirements.txt
ADD . .
RUN python3 -m pip install -r requirements.txt
ADD --chown=slinky:slinky . .
RUN python3 -m pip install -r requirements.txt --user

EXPOSE 8080

Expand Down
1 change: 1 addition & 0 deletions deploy/docker/web/static/css/yasgui.min.css

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions deploy/docker/web/static/resources/yasgui.min.js

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions deploy/docker/web/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
<head>
<meta charset="utf-8">
<title>Slinky the DatONE Graph</title>
<link href="https://api.test.dataone.org/slinky/static/css/base.css" rel="stylesheet">
<link href="https://api.test.dataone.org/slinky/static/css/index.css" rel="stylesheet">
<link href="https://unpkg.com/@triply/yasgui/build/yasgui.min.css" rel="stylesheet" type="text/css" />
<script src="https://unpkg.com/@triply/yasgui/build/yasgui.min.js"></script>
<link href="/static/css/base.css" rel="stylesheet" type="text/css">
<link href="/static/css/index.css" rel="stylesheet" type="text/css">
<link href="/static/css/yasgui.min.css" rel="stylesheet" type="text/css" />
<script src="/static/resources/yasgui.min.js"></script>
</head>

<body>
Expand Down
69 changes: 68 additions & 1 deletion scripts/dump-virtuoso.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,81 @@

set -e

# Name of the Virtuosos container
CONTAINER=$(docker ps --format "{{.Names}}" | grep virtuoso)
# Location where Virtuoso puts the nq files
DUMP_DIR="/opt/virtuoso-opensource/database/dumps/"

if [ -z "$CONTAINER" ]; then
echo "Didn't find a running virtuoso container. Exiting."
exit;
fi

docker exec -i "$CONTAINER" isql-v <<EOF
# Virtuoso expects a folder named 'dumps'
docker exec -i "$CONTAINER" mkdir "$DUMP_DIR"

# Add the dump_nquads method
docker exec -i "$CONTAINER" isql <<EOF
CREATE PROCEDURE dump_nquads
( IN dir VARCHAR := 'dumps'
, IN start_from INT := 1
, IN file_length_limit INTEGER := 100000000
, IN comp INT := 1
)
{
DECLARE inx, ses_len INT
; DECLARE file_name VARCHAR
; DECLARE env, ses ANY
;

inx := start_from;
SET isolation = 'uncommitted';
env := vector (0,0,0);
ses := string_output (10000000);
FOR (SELECT * FROM (sparql define input:storage "" SELECT ?s ?p ?o ?g { GRAPH ?g { ?s ?p ?o } . FILTER ( ?g != virtrdf: ) } ) AS sub OPTION (loop)) DO
{
DECLARE EXIT HANDLER FOR SQLSTATE '22023'
{
GOTO next;
};
http_nquad (env, "s", "p", "o", "g", ses);
ses_len := LENGTH (ses);
IF (ses_len >= file_length_limit)
{
file_name := sprintf ('%s/output%06d.nq', dir, inx);
string_to_file (file_name, ses, -2);
IF (comp)
{
gz_compress_file (file_name, file_name||'.gz');
file_delete (file_name);
}
inx := inx + 1;
env := vector (0,0,0);
ses := string_output (10000000);
}
next:;
}
IF (length (ses))
{
file_name := sprintf ('%s/output%06d.nq', dir, inx);
string_to_file (file_name, ses, -2);
IF (comp)
{
gz_compress_file (file_name, file_name||'.gz');
file_delete (file_name);
}
inx := inx + 1;
env := vector (0,0,0);
}
};
exit;
EOF

# Create the dumps
docker exec -i "$CONTAINER" isql <<EOF
dump_nquads ('dumps', 1, 10000000, 1);
exit;
EOF

# Copy the dumps out of the container
docker cp "$CONTAINER":"$DUMP_DIR" ./data
25 changes: 14 additions & 11 deletions slinky/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,25 @@ RUN apt-get update && \
git \
librdf0-dev \
libtool \
python3 \
python3-pip \
swig
swig \
&& rm -rf /var/lib/apt/lists/*

# Install redlands
WORKDIR /slinky
RUN wget "http://download.librdf.org/source/redland-bindings-1.0.17.1.tar.gz"
RUN tar xzvf redland-bindings-1.0.17.1.tar.gz
WORKDIR /slinky/redland-bindings-1.0.17.1
RUN sh autogen.sh
RUN ./configure --with-python=python3
RUN make install
RUN rm -r /slinky/redland-bindings-1.0.17.1
RUN sh autogen.sh && ./configure --with-python=python3 && make install
# Remove redland artifacts
WORKDIR /slinky
RUN rm -r redland-bindings-1.0.17.1 && rm redland-bindings-1.0.17.1.tar.gz

# Install the slinky python package
ADD . .
RUN pip3 install .
# Create the slinky user
RUN useradd -ms /bin/bash slinky
USER slinky
ENV PATH="/home/slinky/.local/bin:${PATH}"
WORKDIR /home/slinky

WORKDIR /
# Copy the slinky source and install
ADD --chown=slinky:slinky . .
RUN python -m pip install -U pip && pip install . --user
2 changes: 1 addition & 1 deletion slinky/slinky/processors/eml/eml_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def process(self):
self.model.append(
RDF.Statement(
dataset_subject,
RDF.Node(RDF.Uri(NS_SCHEMA.keyword)),
RDF.Node(RDF.Uri(NS_SCHEMA.keywords)),
RDF.Node(keyword.text.strip()),
)
)
Expand Down
2 changes: 1 addition & 1 deletion slinky/slinky/processors/iso/iso_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def process(self):
self.model.append(
RDF.Statement(
dataset_subject,
RDF.Node(RDF.Uri(NS_SCHEMA.keyword)),
RDF.Node(RDF.Uri(NS_SCHEMA.keywords)),
RDF.Node(keyword.text.strip()),
)
)
Expand Down
2 changes: 1 addition & 1 deletion slinky/slinky/processors/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def process_identifiers(self):
self.model.append(
RDF.Statement(
dataset_subject,
RDF.Node(RDF.Uri("http://www.w3.org/2002/07/owl#sameAs")),
RDF.Node(RDF.Uri(NS_SCHEMA.sameAs)),
doi_node,
)
)
Expand Down
9 changes: 0 additions & 9 deletions slinky/tests/extras/virtuoso_init/Dockerfile

This file was deleted.

9 changes: 0 additions & 9 deletions slinky/tests/extras/virtuoso_init/README.md

This file was deleted.

13 changes: 0 additions & 13 deletions slinky/tests/extras/virtuoso_init/entrypoint.sh

This file was deleted.

2 changes: 1 addition & 1 deletion slinky/tests/test_eml201_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_processor_extracts_top_metadata(client, model):
# Create the keywordSet node
statement = RDF.Statement(
RDF.Node(RDF.Uri(node_id)),
RDF.Node(RDF.Uri(NS_SCHEMA.keyword)),
RDF.Node(RDF.Uri(NS_SCHEMA.keywords)),
RDF.Node(keyword),
)
assert model_has_statement(processor.model, statement)
2 changes: 1 addition & 1 deletion slinky/tests/test_eml211_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_processor_extracts_top_metadata(client, model):
# Create the keywordSet node
statement = RDF.Statement(
RDF.Node(RDF.Uri(node_id)),
RDF.Node(RDF.Uri(NS_SCHEMA.keyword)),
RDF.Node(RDF.Uri(NS_SCHEMA.keywords)),
RDF.Node(keyword),
)
assert model_has_statement(processor.model, statement)
2 changes: 1 addition & 1 deletion slinky/tests/test_eml_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_processor_extracts_top_metadata(client, model):
# Create the keywordSet node
statement = RDF.Statement(
RDF.Node(RDF.Uri(node_id)),
RDF.Node(RDF.Uri(NS_SCHEMA.keyword)),
RDF.Node(RDF.Uri(NS_SCHEMA.keywords)),
RDF.Node(keyword),
)
assert model_has_statement(processor.model, statement)
Expand Down