stepbystep:baseservices

Base services

Install Nginx from package

apt install nginx
systemctl status nginx
 
nginx -v
  nginx version: nginx/1.18.0 (Ubuntu)

Install PHP-FPM

apt install php-fpm php-mysql php-gd php-apcu php-gmp php-curl php-intl php-mbstring php-xmlrpc php-xml php-cli php-zip
 
cd /etc/php/7.4/fpm/
cp php.ini php.ini.ORI
nano -w php.ini
date.timezone = Europe/Rome
memory_limit = 256M
post_max_size = 512M
upload_max_filesize = 512M
max_execution_time = 600
cgi.fix_pathinfo=0
systemctl restart php7.4-fpm
systemctl status php7.4-fpm
 
php --version
  PHP 7.4.3 (cli) (built: Oct  6 2020 15:47:56) ( NTS )
  Copyright (c) The PHP Group
  Zend Engine v3.4.0, Copyright (c) Zend Technologies
      with Zend OPcache v7.4.3, Copyright (c), by Zend Technologies

Add Nginx virtual host for Drupal9 (also see Archipelago deployment)

mkdir /var/www/html/web
cd /etc/nginx/sites-available/
nano -w drupal9
server {
    server_name dbopen.ba.cnr.it;
    client_max_body_size 512M; ## Match with PHP from FPM container
    root /var/www/html/web; ## <-- Your only path reference.
 
    fastcgi_send_timeout 120s;
    fastcgi_read_timeout 120s;
    fastcgi_pass_request_headers on;
 
    fastcgi_buffers 16 16k;
    fastcgi_buffer_size 32k;
 
    location = /favicon.ico {
        log_not_found off;
        access_log off;
    }
 
    location = /robots.txt {
        allow all;
        log_not_found off;
        access_log off;
    }
 
    # Very rarely should these ever be accessed outside of your lan
    location ~* \.(txt|log)$ {
        deny all;
    }
 
    location ~ \..*/.*\.php$ {
        return 403;
    }
 
    location ~ ^/sites/.*/private/ {
        return 403;
    }
 
    # Allow "Well-Known URIs" as per RFC 5785
    location ~* ^/.well-known/ {
        allow all;
    }
 
    # Block access to "hidden" files and directories whose names begin with a
    # period. This includes directories used by version control systems such
    # as Subversion or Git to store control files.
    location ~ (^|/)\. {
        return 403;
    }
 
    location / {
        try_files $uri /index.php?$query_string; # For Drupal >= 7
    }
 
    location @rewrite {
        rewrite ^/(.*)$ /index.php?q=$1;
    }
 
    # Don't allow direct access to PHP files in the vendor directory.
    location ~ /vendor/.*\.php$ {
        deny all;
        return 404;
    }
 
    # In Drupal 8, we must also match new paths where the '.php' appears in
    # the middle, such as update.php/selection. The rule we use is strict,
    # and only allows this pattern with the update.php front controller.
    # This allows legacy path aliases in the form of
    # blog/index.php/legacy-path to continue to route to Drupal nodes. If
    # you do not have any paths like that, then you might prefer to use a
    # laxer rule, such as:
    #   location ~ \.php(/|$) {
    # The laxer rule will continue to work if Drupal uses this new URL
    # pattern with front controllers other than update.php in a future
    # release.
    location ~ '\.php$|^/update.php' {
        fastcgi_split_path_info ^(.+?\.php)(|/.*)$;
        include fastcgi_params;
        # Block httpoxy attacks. See https://httpoxy.org/.
        fastcgi_param HTTP_PROXY "";
        fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name;
        fastcgi_param PATH_INFO $fastcgi_path_info;
        fastcgi_param PHP_VALUE "upload_max_filesize=512M \n post_max_size=512M";
        proxy_read_timeout 900s;
        fastcgi_intercept_errors on;
        fastcgi_pass unix:/var/run/php/php7.4-fpm.sock;
    }
 
    # Fighting with Styles? This little gem is amazing.
    location ~ ^/sites/.*/files/styles/ { # For Drupal >= 7
        try_files $uri @rewrite;
    }
 
    # Handle private files through Drupal.
    location ~ ^/system/files/ { # For Drupal >= 7
        try_files $uri /index.php?$query_string;
    }
}
ln -s /etc/nginx/sites-available/drupal9 /etc/nginx/sites-enabled/
nginx -t
systemctl restart nginx

Add certificate and force HTTPS.

apt install certbot python3-certbot-nginx
certbot --nginx
cd /etc/nginx/sites-enabled/
rm default

Enable http2 on Nginx.

nano -w /etc/nginx/sites-available/drupal9
  listen 443 ssl http2;
 
systemctl restart nginx

Install MariaDB database server.

apt install mariadb-server mariadb-client
 
systemctl status mysql
● mariadb.service - MariaDB 10.3.25 database server
 
mysql_secure_installation
  Set a root password? [Y/n] Y
  Remove anonymous users? [Y/n] Y
  Disallow root login remotely? [Y/n] Y
  Remove test database and access to it? [Y/n] Y
  Reload privilege tables now? [Y/n] Y

Create database and user for Drupal.

sudo mysql -u root -p
 
CREATE DATABASE drupal_db;
CREATE USER 'drupal_user'@'localhost' IDENTIFIED BY 'some_strong_password'; 
GRANT ALL PRIVILEGES ON drupal_db.* TO 'drupal_user'@'localhost' WITH GRANT OPTION;
flush privileges;
quit;

Install composer globally (see Composer official site)

php -r "copy('https://getcomposer.org/installer', 'composer-setup.php');"
php -r "if (hash_file('sha384', 'composer-setup.php') === '756890a4488ce9024fc62c56153228907f1545c228516cbf63f885e036d37e9a59d27d63f46af1d4d07ee0f76181c7d3') { echo 'Installer verified'; } else { echo 'Installer corrupt'; unlink('composer-setup.php'); } echo PHP_EOL;"
 
php composer-setup.php --filename=composer --install-dir=/usr/local/bin
All settings correct for using Composer
Downloading...
Composer (version 2.0.11) successfully installed to: /usr/local/bin/composer
Use it: php /usr/local/bin/composer
 
rm composer-setup.php
 
sudo -u www-data composer --version
Composer version 2.0.11 2021-02-24 14:57:23

Download and install

wget https://dl.minio.io/server/minio/release/linux-amd64/minio
chmod +x minio
sudo mv minio /usr/local/bin/
 
$ minio --version
minio version RELEASE.2021-03-01T04-20-55Z

Prepare user/group for daemon

sudo groupadd --system minio
sudo useradd -s /sbin/nologin --system -g minio minio
sudo usermod -L minio
sudo chage -E0 minio

Create Minio default settings

sudo nano -w /etc/default/minio
 
# Volume to be used for Minio server.
MINIO_VOLUMES="/srv/repo"
# Use if you want to run Minio on a custom port.
MINIO_OPTS="-C /etc/minio --address localhost:9000"
# Access Key of the server.
MINIO_ACCESS_KEY=minioaccess
# Secret key of the server.
MINIO_SECRET_KEY=miniosecret

Set folder and permissions

sudo chown -R minio:minio /srv/repo
sudo mkdir /etc/minio
sudo chown minio:minio /etc/minio

Set Minio service

sudo nano -w /etc/systemd/system/minio.service
 
[Unit]
Description=Minio
Documentation=https://docs.minio.io
Wants=network-online.target
After=network-online.target
AssertFileIsExecutable=/usr/local/bin/minio
 
[Service]
WorkingDirectory=/srv/repo
User=minio
Group=minio
 
EnvironmentFile=-/etc/default/minio
ExecStartPre=/bin/bash -c "if [ -z \"${MINIO_VOLUMES}\" ]; then echo \"Variable MINIO_VOLUMES not set in /etc/default/minio\"; exit 1; fi"
 
ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
 
# Let systemd restart this service always
Restart=always
 
# Specifies the maximum file descriptor number that can be opened by this process
LimitNOFILE=65536
 
# Specifies the maximum number of threads this process can create
TasksMax=infinity
 
# Disable timeout logic and wait until process is stopped
TimeoutStopSec=infinity
SendSIGKILL=no
 
[Install]
WantedBy=multi-user.target

Enable and run service

sudo systemctl daemon-reload
sudo systemctl enable minio
sudo systemctl start minio
 
sudo systemctl status minio

Add reverse proxy for Minio to Nginx.

sudo nano -w /etc/nginx/sites-enabled/drupal9
 
 # Proxy requests to MinIO server running on port 9000
 location /minio/ {
   proxy_set_header X-Real-IP $remote_addr;
   proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
   proxy_set_header X-Forwarded-Proto $scheme;
   proxy_set_header Host $http_host;
 
   proxy_connect_timeout 300;
   # Default is HTTP/1, keepalive is only enabled in HTTP/1.1
   proxy_http_version 1.1;
   proxy_set_header Connection "";
   chunked_transfer_encoding off;
 
   proxy_pass http://localhost:9000;
 }

Check browsing https://dbopen.ba.cnr.it/minio

We need Java.

apt install openjdk-14-jdk
 
java -version
openjdk version "14.0.2" 2020-07-14
OpenJDK Runtime Environment (build 14.0.2+12-Ubuntu-120.04)
OpenJDK 64-Bit Server VM (build 14.0.2+12-Ubuntu-120.04, mixed mode, sharing)

Compile Cantaloupe 5.0-SNAPSHOT from develop branch.

apt install maven (needed only if you have to compile cantaloupe)
 
$ wget https://github.com/cantaloupe-project/cantaloupe/archive/develop.zip
$ unzip develop.zip
$ cd cantaloupe-develop
 
$ mvn clean package -DskipTests 
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  07:47 min
[INFO] Finished at: 2021-03-04T13:10:32Z
[INFO] ------------------------------------------------------------------------

Install Cantaloupe

$ mv cantaloupe-develop/target/cantaloupe-5.0-SNAPSHOT.zip ./
$ unzip cantaloupe-5.0-SNAPSHOT.zip
$ sudo mv cantaloupe-5.0-SNAPSHOT /opt/
 
$ sudo mkdir /srv/canta/cache /srv/canta/log /srv/canta/home /srv/canta/tmp
$ sudo useradd -d /srv/canta/home -s /bin/false cantaloupe
$ sudo chown -R cantaloupe:cantaloupe /opt/cantaloupe-5.0-SNAPSHOT /srv/canta
 
$ sudo cp /opt/cantaloupe-5.0-SNAPSHOT/deps/Linux-x86-64/lib/libkdu_* /usr/lib/

Configure

cd /opt/cantaloupe-5.0-SNAPSHOT
sudo cp delegates.rb.sample delegates.rb (needed for a probably bug?)
sudo cp cantaloupe.properties.sample cantaloupe.properties
sudo nano -w cantaloupe.properties

Settings difference from .sample

temp_pathname = /srv/canta/tmp
max_pixels = 400000000
delegate_script.enabled = true
endpoint.admin.enabled = true
endpoint.admin.secret = admin_secret
source.static = S3Source
FilesystemSource.BasicLookupStrategy.path_prefix = /srv/canta/tmp/
S3Source.endpoint =http://127.0.0.1:9000
S3Source.access_key_id =access_key
S3Source.secret_key =secret_key
S3Source.BasicLookupStrategy.bucket.name =archipelago
S3Source.BasicLookupStrategy.path_prefix =/
processor.selection_strategy = ManualSelectionStrategy
processor.fallback_retrieval_strategy = CacheStrategy
processor.pdf.scratch_file_enabled = true
processor.pdf.max_memory_bytes = 500MB
cache.server.derivative.enabled = true
cache.server.derivative =FilesystemCache
cache.server.purge_missing = true
cache.server.worker.enabled = true
FilesystemCache.pathname = /srv/canta/cache
HeapCache.persist.filesystem.pathname = /srv/canta/heap.cache
log.application.level = trace
log.application.ConsoleAppender.enabled = false
log.application.FileAppender.pathname = /srv/canta/log/application.log
log.application.RollingFileAppender.enabled = true
log.application.RollingFileAppender.pathname = /srv/canta/log/application.log
log.application.RollingFileAppender.TimeBasedRollingPolicy.filename_pattern = /srv/canta/log/application-%d{yyyy-MM-dd}.lo
log.error.FileAppender.pathname = /srv/canta/log/error.log
log.error.RollingFileAppender.enabled = true
log.error.RollingFileAppender.pathname = /srv/canta/log/error.log
log.error.RollingFileAppender.TimeBasedRollingPolicy.filename_pattern = /srv/canta/log/error-%d{yyyy-MM-dd}.log
log.access.FileAppender.pathname = /srv/canta/log/access.log
log.access.RollingFileAppender.pathname = /srv/canta/log/access.log
log.access.RollingFileAppender.TimeBasedRollingPolicy.filename_pattern = /srv/canta/log/access-%d{yyyy-MM-dd}.log

Set Cantaloupe as a service

sudo nano -w /etc/systemd/system/cantaloupe.service
[Unit]
Description=Cantaloupe Image Server 5.0 SNAPSHOT
 
[Service]
Type=simple
User=cantaloupe
ExecStart=/usr/bin/java -Dcantaloupe.config=/opt/cantaloupe-5.0-SNAPSHOT/cantaloupe.properties -Xmx6g -jar /opt/cantaloupe-5.0-SNAPSHOT/cantaloupe-5.0-SNAPSHOT.jar
ExecStop=/usr/bin/killall -9 cantaloupe-5.0-SNAPSHOT.jar
Restart=on-failure
 
[Install]
WantedBy=multi-user.target
sudo systemctl daemon-reload
sudo systemctl enable cantaloupe

Enable Nginx as reverse proxy for Cantaloupe

 # Proxy requests to the Cantaloupe server running on port 8182
        location /cantaloupe/ {
                  proxy_set_header X-Forwarded-Proto $scheme;
                  proxy_set_header X-Forwarded-Host $host;
                  proxy_set_header X-Forwarded-Port $server_port;
                  proxy_set_header X-Forwarded-Path /cantaloupe/;
                  proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                  if ($request_uri ~* "/cantaloupe/(.*)") {
                      proxy_pass http://127.0.0.1:8182/$1;
                  }
        }

Start Cantaloupe service

sudo systemctl start cantaloupe

Browse to check here https://dbopen.ba.cnr.it/cantaloupe/

Update Cantaloupe to 5.0 as this version was released.

wget https://github.com/cantaloupe-project/cantaloupe/releases/download/v5.0/cantaloupe-5.0.zip
unzip cantaloupe-5.0.zip
sudo mv cantaloupe-5.0 /opt/
cd /opt/cantaloupe-5.0
cp ../cantaloupe-5.0-SNAPSHOT/cantaloupe.properties ./
cp delegates.rb.sample delegates.rb
sudo cp /etc/systemd/system/cantaloupe.service ./
sudo nano -w cantaloupe.service
[Unit]
Description=Cantaloupe Image Server 5.0
 
[Service]
Type=simple
User=cantaloupe
ExecStart=/usr/bin/java -Dcantaloupe.config=/opt/cantaloupe-5.0/cantaloupe.properties -Xmx6g -jar /opt/cantaloupe-5.0/cantaloupe-5.0.jar
ExecStop=/usr/bin/killall -9 cantaloupe-5.0.jar
Restart=on-failure
 
[Install]
WantedBy=multi-user.target
sudo chown -R cantaloupe:cantaloupe /opt/cantaloupe-5.0
sudo systemctl stop cantaloupe
sudo cp cantaloupe.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable cantaloupe
sudo systemctl start cantaloupe

Edit system settings for Solr

# sysctl vm.max_map_count
  vm.max_map_count = 65530
# nano -w /etc/sysctl.d/10-solr.conf
  # map_count should be around 1 per 128 KB of system memory
  # we have 16GB system memory so
  vm.max_map_count=131060
# service procps restart
# sysctl vm.max_map_count
vm.max_map_count = 131060
 
$ ulimit -n
  1024
$ ulimit -u
  63709
$ sudo nano -w /etc/systemd/system.conf
  DefaultLimitNOFILE=65535:524288
  DefaultLimitNPROC=65535
$ sudo nano -w /etc/systemd/user.conf
  DefaultLimitNOFILE=65535:524288
  DefaultLimitNPROC=65535
$ sudo nano -w /etc/security/limits.conf
  *       hard    nofile  65535
  *       soft    nofile  65535
  *       hard    nproc   65535
  *       soft    nproc   65535
reboot
$ ulimit -n
  65535
$ ulimit -u
  65535

Install Solr standalone

wget https://downloads.apache.org/lucene/solr/8.8.1/solr-8.8.1.tgz
tar xzf solr-8.8.1.tgz solr-8.8.1/bin/install_solr_service.sh --strip-components=2
sudo bash ./install_solr_service.sh solr-8.8.1.tgz -i /opt -d /srv/solr -u solr -s solr -p 8983
id: ‘solr’: no such user
Creating new user: solr
Warning: The home dir /srv/solr you specified already exists.
Adding system user `solr' (UID 113) ...
Adding new group `solr' (GID 119) ...
Adding new user `solr' (UID 113) with group `solr' ...
The home directory `/srv/solr' already exists.  Not copying from `/etc/skel'.
adduser: Warning: The home directory `/srv/solr' does not belong to the user you are currently creating.
Extracting solr-8.8.1.tgz to /opt
Installing symlink /opt/solr -> /opt/solr-8.8.1 ...
Installing /etc/init.d/solr script ...
Installing /etc/default/solr.in.sh ...
Service solr installed.
Customize Solr startup configuration in /etc/default/solr.in.sh
● solr.service - LSB: Controls Apache Solr as a Service
     Loaded: loaded (/etc/init.d/solr; generated)
     Active: active (exited) since Fri 2021-03-05 10:06:51 UTC; 5s ago
       Docs: man:systemd-sysv-generator(8)
    Process: 2780 ExecStart=/etc/init.d/solr start (code=exited, status=0/SUCCESS)
Mar 05 10:06:43 dbopen systemd[1]: Starting LSB: Controls Apache Solr as a Service...
Mar 05 10:06:44 dbopen su[2784]: (to solr) root on none
Mar 05 10:06:44 dbopen su[2784]: pam_unix(su-l:session): session opened for user solr by (uid=0)
Mar 05 10:06:50 dbopen solr[2864]: Started Solr server on port 8983 (pid=2856). Happy searching!
Mar 05 10:06:51 dbopen systemd[1]: Started LSB: Controls Apache Solr as a Service.

Some basic settings (Memory and IP allow/deny

$ sudo systemctl stop solr
$ sudo nano -w /etc/default/solr.in.sh
SOLR_PID_DIR="/srv/solr"
SOLR_HOME="/srv/solr/data"
LOG4J_PROPS="/srv/solr/log4j2.xml"
SOLR_LOGS_DIR="/srv/solr/logs"
SOLR_PORT="8983"
 
SOLR_HEAP="4g"
SOLR_IP_WHITELIST="127.0.0.1"

Compile solr-ocrhighlighting from HEAD branch

An issue with plugin compiled from HEAD, so revert to 0.5.0 make all works well.

Issue solved so no more needs to revert.

$ wget https://github.com/dbmdz/solr-ocrhighlighting/archive/main.zip
$ unzip main.zip
$ cd solr-ocrhighlighting-main/
$ JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64 mvn package
[INFO] Building jar: /home/user/solr-ocrhighlighting-main/target/solr-ocrhighlighting-0.6.0-SNAPSHOT-javadoc.jar
[INFO]
[INFO] --- maven-source-plugin:3.2.1:jar-no-fork (attach-sources) @ solr-ocrhighlighting ---
[INFO] Building jar: /home/user/solr-ocrhighlighting-main/target/solr-ocrhighlighting-0.6.0-SNAPSHOT-sources.jar
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  45.214 s
[INFO] Finished at: 2021-03-05T11:15:18Z
[INFO] ------------------------------------------------------------------------
sudo mkdir /opt/solr/contrib/archipelago
sudo mkdir /opt/solr/contrib/archipelago/lib
sudo mv target/solr-ocrhighlighting-0.6.0-SNAPSHOT.jar /opt/solr/contrib/archipelago/lib/

We need last ocrhighlighting from master (0.7.0 SNAPSHOT)

$ wget https://github.com/dbmdz/solr-ocrhighlighting/archive/main.zip
$ unzip main.zip
$ cd solr-ocrhighlighting-main/
$ JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64 mvn package
$ cd ..
$ sudo mv /opt/solr/contrib/archipelago/lib/solr-ocrhighlighting-0.6.0-SNAPSHOT.jar ./
$ sudo mv solr-ocrhighlighting-main/target/solr-ocrhighlighting-0.7.0-SNAPSHOT.jar /opt/solr/contrib/archipelago/lib/
$ rm -R solr-ocrhighlighting-main
 
Reload Solr

Update to ocrhighlighting 0.7.0 release

$ wget https://github.com/dbmdz/solr-ocrhighlighting/archive/refs/tags/0.7.0.zip
$ unzip 0.7.0.zip
$ cd solr-ocrhighlighting-0.7.0
$ JAVA_HOME=/usr/lib/jvm/java-14-openjdk-amd64 mvn package
$ cd ..
$ sudo mv /opt/solr/contrib/archipelago/lib/solr-ocrhighlighting-0.7.0-SNAPSHOT.jar ./
$ sudo mv solr-ocrhighlighting-0.7.0/target/solr-ocrhighlighting-0.7.0.jar /opt/solr/contrib/archipelago/lib/
$ rm -R solr-ocrhighlighting-0.7.0
 
Reload Solr

Create archipelago core

$ cd /opt/solr
$ sudo -u solr bin/solr create_core -c archipelago -V
Copying configuration to new core instance directory:
/srv/solr/data/archipelago
Creating new core 'archipelago' using command:
http://localhost:8983/solr/admin/cores?action=CREATE&name=archipelago&instanceDir=archipelago
{
  "responseHeader":{
    "status":0,
    "QTime":2174},
  "core":"archipelago"}

Replace core conf with Archipelago Solr conf

$ sudo systemctl stop solr
$ wget https://github.com/esmero/archipelago-deployment/archive/1.0.0-RC2D9.zip
$ unzip 1.0.0-RC2D9.zip
$ sudo -u solr mv /srv/solr/data/archipelago/conf/solrconfig.xml /srv/solr/data/archipelago/conf/solrconfig.xml.ORI
$ sudo -u solr cp archipelago-deployment-1.0.0-RC2D9/persistent/solrconfig/conf/* /srv/solr/data/archipelago/conf/
$ sudo systemctl start solr

Edit Solr filters for Italian text language

$ sudo -u solr nano -w /srv/solr/data/archipelago/conf/schema_extra_types.xml
<fieldType name="text_und" class="solr.TextField" positionIncrementGap="100">
  <analyzer type="index">
    <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" />
    <filter class="solr.SnowballPorterFilterFactory" language="Italian"/>
    <filter class="solr.LengthFilterFactory" min="3" max="100"/>
  </analyzer>
  <analyzer type="query">
    <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" />
    <filter class="solr.SnowballPorterFilterFactory" language="Italian"/>
    <filter class="solr.LengthFilterFactory" min="3" max="100"/>
  </analyzer>
</fieldType>
 
 
<fieldType name="text_ocr_stored" class="solr.TextField" storeOffsetsWithPositions="true" termVectors="true">
  <analyzer type="index">
    <charFilter class="de.digitalcollections.solrocr.lucene.filters.OcrCharFilterFactory"/>
    <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" />
    <filter class="solr.SnowballPorterFilterFactory" language="Italian"/>
    <filter class="solr.LengthFilterFactory" min="3" max="100"/>
  </analyzer>
  <analyzer type="query">
    <charFilter class="solr.MappingCharFilterFactory" mapping="accents_und.txt"/>
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.LowerCaseFilterFactory"/>
    <filter class="solr.ElisionFilterFactory" articles="lang/contractions_it.txt"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt"/>
    <filter class="solr.KeywordMarkerFilterFactory" protected="protwords_und.txt" />
    <filter class="solr.SnowballPorterFilterFactory" language="Italian"/>
    <filter class="solr.LengthFilterFactory" min="3" max="100"/>
  </analyzer>
</fieldType>

Edit Solr schema for Suggester split on word without punctuation

$ sudo -u solr nano -w /srv/solr/data/archipelago/conf/schema.xml
    <fieldType name="text_ws" class="solr.TextField" omitNorms="true" positionIncrementGap="100">
      <analyzer>
<!--        <tokenizer class="solr.WhitespaceTokenizerFactory"/> -->
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
    </fieldType>
  • stepbystep/baseservices.txt
  • Last modified: 2021/07/15 16:23
  • by giancarlo