| Both sides previous revision
Previous revision
Next revision
|
Previous revision
|
stepbystep:baseservices [2021/03/29 14:13] giancarlo [SOLR] |
stepbystep:baseservices [2021/07/15 16:23] (current) giancarlo [SOLR] |
| </code> | </code> |
| Browse to check here https://dbopen.ba.cnr.it/cantaloupe/ | Browse to check here https://dbopen.ba.cnr.it/cantaloupe/ |
| | \\ |
| | \\ |
| | [[#cantaupdate5|Update Cantaloupe to 5.0 as this version was released.]] |
| | <wrap #cantaupdate5></wrap> |
| | <code bash> |
| | wget https://github.com/cantaloupe-project/cantaloupe/releases/download/v5.0/cantaloupe-5.0.zip |
| | unzip cantaloupe-5.0.zip |
| | sudo mv cantaloupe-5.0 /opt/ |
| | cd /opt/cantaloupe-5.0 |
| | cp ../cantaloupe-5.0-SNAPSHOT/cantaloupe.properties ./ |
| | cp delegates.rb.sample delegates.rb |
| | sudo cp /etc/systemd/system/cantaloupe.service ./ |
| | sudo nano -w cantaloupe.service |
| | </code> |
| | <code conf> |
| | [Unit] |
| | Description=Cantaloupe Image Server 5.0 |
| | |
| | [Service] |
| | Type=simple |
| | User=cantaloupe |
| | ExecStart=/usr/bin/java -Dcantaloupe.config=/opt/cantaloupe-5.0/cantaloupe.properties -Xmx6g -jar /opt/cantaloupe-5.0/cantaloupe-5.0.jar |
| | ExecStop=/usr/bin/killall -9 cantaloupe-5.0.jar |
| | Restart=on-failure |
| | |
| | [Install] |
| | WantedBy=multi-user.target |
| | </code> |
| | <code bash> |
| | sudo chown -R cantaloupe:cantaloupe /opt/cantaloupe-5.0 |
| | sudo systemctl stop cantaloupe |
| | sudo cp cantaloupe.service /etc/systemd/system/ |
| | sudo systemctl daemon-reload |
| | sudo systemctl enable cantaloupe |
| | sudo systemctl start cantaloupe |
| | </code> |
| ===== SOLR ===== | ===== SOLR ===== |
| Edit system settings for Solr | Edit system settings for Solr |
| <WRAP center round alert 60%> | <WRAP center round alert 60%> |
| An issue with plugin compiled from HEAD, so revert to 0.5.0 make all works well. | An issue with plugin compiled from HEAD, so revert to 0.5.0 make all works well. |
| | </WRAP> |
| | <WRAP center round tip 60%> |
| | Issue solved so no more needs to revert. |
| </WRAP> | </WRAP> |
| |
| sudo mv target/solr-ocrhighlighting-0.6.0-SNAPSHOT.jar /opt/solr/contrib/archipelago/lib/ | sudo mv target/solr-ocrhighlighting-0.6.0-SNAPSHOT.jar /opt/solr/contrib/archipelago/lib/ |
| </code> | </code> |
| | We need last ocrhighlighting from master (0.7.0 SNAPSHOT) |
| | <code bash> |
| | $ wget https://github.com/dbmdz/solr-ocrhighlighting/archive/main.zip |
| | $ unzip main.zip |
| | $ cd solr-ocrhighlighting-main/ |
| | $ JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64 mvn package |
| | $ cd .. |
| | $ sudo mv /opt/solr/contrib/archipelago/lib/solr-ocrhighlighting-0.6.0-SNAPSHOT.jar ./ |
| | $ sudo mv solr-ocrhighlighting-main/target/solr-ocrhighlighting-0.7.0-SNAPSHOT.jar /opt/solr/contrib/archipelago/lib/ |
| | $ rm -R solr-ocrhighlighting-main |
| | |
| | Reload Solr |
| | </code> |
| | Update to ocrhighlighting 0.7.0 release |
| | <code bash> |
| | $ wget https://github.com/dbmdz/solr-ocrhighlighting/archive/refs/tags/0.7.0.zip |
| | $ unzip 0.7.0.zip |
| | $ cd solr-ocrhighlighting-0.7.0 |
| | $ JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64 mvn package |
| | $ cd .. |
| | $ sudo mv /opt/solr/contrib/archipelago/lib/solr-ocrhighlighting-0.7.0-SNAPSHOT.jar ./ |
| | $ sudo mv solr-ocrhighlighting-0.7.0/target/solr-ocrhighlighting-0.7.0.jar /opt/solr/contrib/archipelago/lib/ |
| | $ rm -R solr-ocrhighlighting-0.7.0 |
| | |
| | Reload Solr |
| | </code> |
| | |
| Create archipelago core | Create archipelago core |
| <code bash> | <code bash> |
| $ sudo systemctl start solr | $ sudo systemctl start solr |
| </code> | </code> |
| Edit Solr filters for Italian text language | [[#solrita|Edit Solr filters for Italian text language]] |
| | <wrap #solrita></wrap> |
| <code bash> | <code bash> |
| $ sudo -u solr nano -w /srv/solr/data/archipelago/conf/schema_extra_types.xml | $ sudo -u solr nano -w /srv/solr/data/archipelago/conf/schema_extra_types.xml |
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> | <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> |
| | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" /> |
| <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> | <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> |
| <filter class="solr.LengthFilterFactory" min="2" max="100"/> | <filter class="solr.LengthFilterFactory" min="3" max="100"/> |
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
| </analyzer> | </analyzer> |
| <analyzer type="query"> | <analyzer type="query"> |
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> | <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> |
| | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" /> |
| <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> | <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> |
| <filter class="solr.LengthFilterFactory" min="2" max="100"/> | <filter class="solr.LengthFilterFactory" min="3" max="100"/> |
| <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> | |
| </analyzer> | </analyzer> |
| </fieldType> | </fieldType> |
| | |
| |
| <fieldType name="text_ocr_stored" class="solr.TextField" storeOffsetsWithPositions="true" termVectors="true"> | <fieldType name="text_ocr_stored" class="solr.TextField" storeOffsetsWithPositions="true" termVectors="true"> |
| <analyzer type="index"> | <analyzer type="index"> |
| <charFilter class="de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory"/> | <charFilter class="de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory"/> |
| | <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/> |
| <tokenizer class="solr.StandardTokenizerFactory"/> | <tokenizer class="solr.StandardTokenizerFactory"/> |
| <filter class="solr.LowerCaseFilterFactory"/> | <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> | <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> |
| | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" /> |
| <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> | <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> |
| <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" /> | <filter class="solr.LengthFilterFactory" min="3" max="100"/> |
| </analyzer> | </analyzer> |
| <analyzer type="query"> | <analyzer type="query"> |
| | <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/> |
| <tokenizer class="solr.StandardTokenizerFactory"/> | <tokenizer class="solr.StandardTokenizerFactory"/> |
| <filter class="solr.LowerCaseFilterFactory"/> | <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> | <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/> |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> | <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/> |
| | <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" /> |
| <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> | <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> |
| <filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="false" /> | <filter class="solr.LengthFilterFactory" min="3" max="100"/> |
| </analyzer> | </analyzer> |
| </fieldType> | </fieldType> |
| | </code> |
| | [[#solrsugg|Edit Solr schema for Suggester split on word without punctuation]] |
| | <wrap #solrsugg></wrap> |
| | <code bash> |
| | $ sudo -u solr nano -w /srv/solr/data/archipelago/conf/schema.xml |
| | </code> |
| | <code xml> |
| | <fieldType name="text_ws" class="solr.TextField" omitNorms="true" positionIncrementGap="100"> |
| | <analyzer> |
| | <!-- <tokenizer class="solr.WhitespaceTokenizerFactory"/> --> |
| | <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
| | <filter class="solr.LowerCaseFilterFactory"/> |
| | </analyzer> |
| | </fieldType> |
| </code> | </code> |