Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update SOLR to 6.6 and enable solr-spatial-field search #41

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ services:
ports:
- "8983:8983"
volumes:
- solr_data:/opt/solr/server/solr/ckan/data/index
- solr_data:/opt/solr/server/solr/ckan/data/

redis:
container_name: redis
Expand Down
45 changes: 22 additions & 23 deletions solr/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,29 +1,28 @@
FROM solr:6.2-alpine
MAINTAINER Open Knowledge
FROM solr:6.6-slim

# Enviroment
ENV SOLR_CORE ckan
ENV CKAN_VERSION dev-v2.7
MAINTAINER Open Knowledge International <[email protected]>

# User
USER root
# Environment
ARG SOLR_CORE=ckan
ENV SOLR_HOME=/opt/solr/server/solr

# Create Directories
RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/conf
RUN mkdir -p /opt/solr/server/solr/$SOLR_CORE/data
# Install JTS Topology Suite
ADD --chown=solr:solr https://repo1.maven.org/maven2/com/vividsolutions/jts-core/1.14.0/jts-core-1.14.0.jar \
/opt/solr/server/solr-webapp/webapp/WEB-INF/lib

# Adding Files
ADD solrconfig.xml \
https://raw.githubusercontent.com/ckan/ckan/$CKAN_VERSION/ckan/config/solr/schema.xml \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.0.0/solr/server/solr/configsets/basic_configs/conf/currency.xml \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.0.0/solr/server/solr/configsets/basic_configs/conf/synonyms.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.0.0/solr/server/solr/configsets/basic_configs/conf/stopwords.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.0.0/solr/server/solr/configsets/basic_configs/conf/protwords.txt \
https://raw.githubusercontent.com/apache/lucene-solr/releases/lucene-solr/6.0.0/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml \
/opt/solr/server/solr/$SOLR_CORE/conf/
WORKDIR $SOLR_HOME
# Create core and deafult config
RUN precreate-core $SOLR_CORE $SOLR_HOME/configsets/basic_configs && \
rm $SOLR_CORE/conf/managed-schema && \
mkdir $SOLR_CORE/data

# Create Core.properties
RUN echo name=$SOLR_CORE > /opt/solr/server/solr/$SOLR_CORE/core.properties
# Add CKAN custom config
ADD solrconfig.xml \
schema.xml \
$SOLR_CORE/conf/

# Give ownership to Solr
USER root
RUN chown -R $SOLR_USER:$SOLR_USER $SOLR_CORE

# Giving ownership to Solr
RUN chown -R $SOLR_USER:$SOLR_USER /opt/solr/server/solr/$SOLR_CORE
USER $SOLR_USER:$SOLR_USER
204 changes: 204 additions & 0 deletions solr/schema.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->

<!--
NB Please copy changes to this file into the multilingual schema:
ckanext/multilingual/solr/schema.xml
-->

<!-- We update the version when there is a backward-incompatible change to this
schema. In this case the version should be set to the next CKAN version number.
(x.y but not x.y.z since it needs to be a float) -->
<schema name="ckan" version="2.8">

<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>

<fieldType name="tdates" class="solr.TrieDateField" precisionStep="7" positionIncrementGap="0" multiValued="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<fieldType name="tints" class="solr.TrieIntField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tfloats" class="solr.TrieFloatField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tlongs" class="solr.TrieLongField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>
<fieldType name="tdoubles" class="solr.TrieDoubleField" precisionStep="8" positionIncrementGap="0" multiValued="true"/>

<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>


<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.FlattenGraphFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- Field Type for using ckanext-spatial solr-spatial-field backend -->
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
spatialContextFactory="JTS"
format="WKT"
autoIndex="true"
distErrPct="0.05"
maxDistErr="0.0001"
distanceUnits="degrees" />

</types>

<fields>
<field name="index_id" type="string" indexed="true" stored="true" required="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="dataset_type" type="string" indexed="true" stored="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="ckan_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="download_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="notes" type="text" indexed="true" stored="true"/>
<field name="author" type="textgen" indexed="true" stored="true" />
<field name="author_email" type="textgen" indexed="true" stored="true" />
<field name="maintainer" type="textgen" indexed="true" stored="true" />
<field name="maintainer_email" type="textgen" indexed="true" stored="true" />
<field name="license" type="string" indexed="true" stored="true" />
<field name="license_id" type="string" indexed="true" stored="true" />
<field name="ratings_count" type="int" indexed="true" stored="false" />
<field name="ratings_average" type="float" indexed="true" stored="false" />
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="organization" type="string" indexed="true" stored="true" multiValued="false"/>

<!-- field for spatial geometry -->
<field name="spatial_geom" type="location_rpt" indexed="true" stored="true" multiValued="true" />

<field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="permission_labels" type="string" indexed="true" stored="false" multiValued="true"/>

<field name="res_name" type="textgen" indexed="true" stored="true" multiValued="true" />
<field name="res_description" type="textgen" indexed="true" stored="true" multiValued="true"/>
<field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_type" type="string" indexed="true" stored="true" multiValued="true"/>

<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="urls" type="text" indexed="true" stored="false" multiValued="true"/>

<field name="depends_on" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="dependency_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="derives_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="has_derivation" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="links_to" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="views_total" type="int" indexed="true" stored="false"/>
<field name="views_recent" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>

<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>

<field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>

<!-- Copy the title field into titleString, and treat as a string
(rather than text type). This allows us to sort on the titleString -->
<field name="title_string" type="string" indexed="true" stored="false" />

<field name="data_dict" type="string" indexed="false" stored="true" />
<field name="validated_data_dict" type="string" indexed="false" stored="true" />

<field name="_version_" type="string" indexed="true" stored="true"/>

<dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="false"/>

<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="res_extras_*" type="text" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="vocab_*" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*" type="string" indexed="true" stored="false"/>
</fields>

<uniqueKey>index_id</uniqueKey>

<!-- Now set in solrconfig.xml -->
<!-- <defaultSearchField>text</defaultSearchField> -->
<!-- <solrQueryParser defaultOperator="AND"/> -->

<copyField source="url" dest="urls"/>
<copyField source="ckan_url" dest="urls"/>
<copyField source="download_url" dest="urls"/>
<copyField source="res_url" dest="urls"/>
<copyField source="extras_*" dest="text"/>
<copyField source="res_extras_*" dest="text"/>
<copyField source="vocab_*" dest="text"/>
<copyField source="urls" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="text" dest="text"/>
<copyField source="license" dest="text"/>
<copyField source="notes" dest="text"/>
<copyField source="tags" dest="text"/>
<copyField source="groups" dest="text"/>
<copyField source="organization" dest="text"/>
<copyField source="res_name" dest="text"/>
<copyField source="res_description" dest="text"/>
<copyField source="maintainer" dest="text"/>
<copyField source="author" dest="text"/>

</schema>
8 changes: 7 additions & 1 deletion solr/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<!--solrconfig.xml documentation [https://wiki.apache.org/solr/SolrConfigXml]-->
<config>

<luceneMatchVersion>6.0.0</luceneMatchVersion>
<luceneMatchVersion>6.6.6</luceneMatchVersion>

<lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" />
<lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
Expand Down Expand Up @@ -80,6 +80,8 @@
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">text</str>
<str name="q.op">AND</str>
</lst>

</requestHandler>
Expand All @@ -90,6 +92,8 @@
<str name="echoParams">explicit</str>
<str name="wt">json</str>
<str name="indent">true</str>
<str name="df">text</str>
<str name="q.op">AND</str>
</lst>

</requestHandler>
Expand All @@ -110,13 +114,15 @@

</initParams>

<!-- This enabled schemaless mode
<initParams path="/update/**">

<lst name="defaults">
<str name="update.chain">add-unknown-fields-to-the-schema</str>
</lst>

</initParams>
-->

<requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler">

Expand Down