Preload Elasticsearch with your dataset

docker pull docker.elastic.co/elasticsearch/elasticsearch:8.7.0
docker network create elastic
docker run –name es01 –net elastic -p 9200:9200 -it docker.elastic.co/elasticsearch/elasticsearch:8.7.0curl -s -k -u elastic:CHANGEME https://localhost:9200 | jq{
“name”: “697bf734a5d5”,
“cluster_name”: “docker-cluster”,
“cluster_uuid”: “cMISiT__RSWkoKDYql1g4g”,
“version”: {
“number”: “8.7.0”,
“build_flavor”: “default”,
“build_type”: “docker”,
“build_hash”: “09520b59b6bc1057340b55750186466ea715e30e”,
“build_date”: “2023-03-27T16:31:09.816451435Z”,
“build_snapshot”: false,
“lucene_version”: “9.5.0”,
“minimum_wire_compatibility_version”: “7.17.0”,
“minimum_index_compatibility_version”: “7.0.0”
},
“tagline”: “You Know, for Search”
},

So, we want to have a dataset already available. Let’s take the sample dataset I’m often using while demoing Elasticsearch: the person dataset. I created [a generator](https://github.com/dadoonet/injector) to create some fake data.

First, let’s download the injector:

wget https://repo1.maven.org/maven2/fr/pilato/elasticsearch/injector/injector/8.7/injector-8.7.jarmkdir data
java -jar injector-8.7.jar –console –silent > data/persons.jsonhead -2 data/persons.json

{“name”:”Charlene Mickael”,”dateofbirth”:”2000-11-01″,”gender”:”female”,”children”:3,”marketing”:{“cars”:1236,”shoes”:null,”toys”:null,”fashion”:null,”music”:null,”garden”:null,”electronic”:null,”hifi”:1775,”food”:null},”address”:{“country”:”Italy”,”zipcode”:”80100″,”city”:”Ischia”,”countrycode”:”IT”,”location”:{“lon”:13.935138341699972,”lat”:40.71842684204817}}}
{“name”:”Kim Hania”,”dateofbirth”:”1998-05-18″,”gender”:”male”,”children”:4,”marketing”:{“cars”:null,”shoes”:null,”toys”:132,”fashion”:null,”music”:null,”garden”:null,”electronic”:null,”hifi”:null,”food”:null},”address”:{“country”:”Germany”,”zipcode”:”9998″,”city”:”Berlin”,”countrycode”:”DE”,”location”:{“lon”:13.164834451298645,”lat”:52.604673827377155}}}#!/bin/bash
ELASTIC_PASSWORD=CHANGEME
mkdir tmp
echo “Split the source in 10000 items”
split -d -l10000 ../data/persons.json tmp/part
BULK_REQUEST_FILE=”tmp/bulk_request.ndjson”
FILES=”tmp/part*”
for f in $FILES
do
rm $BULK_REQUEST_FILE
echo “Preparing $f file…”
while read p; do
echo -e ‘{“index”:{}}’ >> $BULK_REQUEST_FILE
echo -e “$p” >> $BULK_REQUEST_FILE
done “/usr/share/logstash/persons/persons.json”
mode => “read”
codec => json { }
exit_after_read => true
}
}
filter {
mutate {
remove_field => [ “log”, “@timestamp”, “event”, “@version” ]
}
}
output {
elasticsearch {
hosts => “${ELASTICSEARCH_URL}”
index => “person”
user => “elastic”
password => “${ELASTIC_PASSWORD}”
ssl_certificate_verification => false
}
}docker run –rm -it –name ls01 –net elastic \
-v $(pwd)/../data/:/usr/share/logstash/persons/:ro \
-v $(pwd)/pipeline/:/usr/share/logstash/pipeline/:ro \
-e XPACK_MONITORING_ENABLED=false \
-e ELASTICSEARCH_URL=”https://es01:9200″ \
-e ELASTIC_PASSWORD=”CHANGEME” \
docker.elastic.co/logstash/logstash:8.7.0ELASTIC_PASSWORD=CHANGEME
STACK_VERSION=8.7.0
ES_PORT=9200version: “2.2”
services:
es01:
image: docker.elastic.co/elasticsearch/elasticsearch:${STACK_VERSION}
ports:
– ${ES_PORT}:9200
environment:
– node.name=es01
– cluster.initial_master_nodes=es01
– ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
– bootstrap.memory_lock=true
ulimits:
memlock:
soft: -1
hard: -1
healthcheck:
test:
[
“CMD-SHELL”,
“curl -s -k https://localhost:9200 | grep -q ‘missing authentication credentials'”,
]
interval: 10s
timeout: 10s
retries: 120
logstash:
depends_on:
es01:
condition: service_healthy
image: docker.elastic.co/logstash/logstash:${STACK_VERSION}
volumes:
– type: bind
source: ../data
target: /usr/share/logstash/persons
read_only: true
– type: bind
source: pipeline
target: /usr/share/logstash/pipeline
read_only: true
environment:
– ELASTICSEARCH_URL=https://es01:9200
– ELASTIC_PASSWORD=${ELASTIC_PASSWORD}
– XPACK_MONITORING_ENABLED=falsedocker compose upwith-compose-logstash-1 | [2023-04-21T15:17:55,335][INFO ][logstash.runner ] Logstash shut down.
with-compose-logstash-1 exited with code 0# We force merge the segments first
POST /person/_forcemerge?max_num_segments=1
# Snapshot the data
PUT /_snapshot/sample/persons
{
“indices”: “person”,
“include_global_state”: false
}POST /_snapshot/sample/persons/_restoredocker run –name es01 –net elastic -p 9200:9200 -it docker.elastic.co/elasticsearch/elasticsearch:8.7.0docker run –name es01 –net elastic -p 9200:9200 -it -v persons-data:/usr/share/elasticsearch/data -v persons-config:/usr/share/elasticsearch/config docker.elastic.co/elasticsearch/elasticsearch:8.7.0docker volume inspect persons-data persons-config
[
{
“CreatedAt”: “2023-05-09T10:20:14Z”,
“Driver”: “local”,
“Labels”: null,
“Mountpoint”: “/var/lib/docker/volumes/persons-data/_data”,
“Name”: “persons-data”,
“Options”: null,
“Scope”: “local”
},
{
“CreatedAt”: “2023-05-09T10:19:51Z”,
“Driver”: “local”,
“Labels”: null,
“Mountpoint”: “/var/lib/docker/volumes/persons-config/_data”,
“Name”: “persons-config”,
“Options”: null,
“Scope”: “local”
}
]docker run –rm -it -v /tmp/volume-backup:/backup -v /var/lib/docker:/docker alpine:edge tar cfz /backup/persons.tgz /docker/volumes/persons-config /docker/volumes/persons-datadocker volume create persons-config
docker volume create persons-data
docker run –rm -it -v /tmp/volume-backup:/backup -v /var/lib/docker:/docker alpine:edge tar xfz /backup/persons.tgz -C /docker run –name es01 –net elastic -p 9200:9200 -it -v persons-data:/usr/share/elasticsearch/data -v persons-config:/usr/share/elasticsearch/config docker.elastic.co/elasticsearch/elasticsearch:8.7.0POST /api/v1/deployments?validate_only=false
{
“resources”: {
“elasticsearch”: [
{
“region”: “gcp-europe-west1”,
“plan”: {
“cluster_topology”: [
{
“zone_count”: 2,
“elasticsearch”: {
“node_attributes”: {
“data”: “hot”
}
},
“instance_configuration_id”: “gcp.es.datahot.n2.68x10x45”,
“node_roles”: [
“master”,
“ingest”,
“transform”,
“data_hot”,
“remote_cluster_client”,
“data_content”
],
“id”: “hot_content”,
“size”: {
“resource”: “memory”,
“value”: 8192
}
}
],
“elasticsearch”: {
“version”: “8.7.1”
},
“deployment_template”: {
“id”: “gcp-storage-optimized-v5”
},
“transient”: {
“restore_snapshot”: {
“snapshot_name”: “__latest_success__”,
“source_cluster_id”: “CLUSTER_ID”
}
}
},
“ref_id”: “main-elasticsearch”
}
],
“kibana”: [
{
“elasticsearch_cluster_ref_id”: “main-elasticsearch”,
“region”: “gcp-europe-west1”,
“plan”: {
“cluster_topology”: [
{
“instance_configuration_id”: “gcp.kibana.n2.68x32x45”,
“zone_count”: 1,
“size”: {
“resource”: “memory”,
“value”: 1024
}
}
],
“kibana”: {
“version”: “8.7.1”
}
},
“ref_id”: “main-kibana”
}
]
},
“settings”: {
“autoscaling_enabled”: false
},
“name”: “persons”,
“metadata”: {
“system_owned”: false
}
}POST /api/v1/deployments/DEPLOYMENT_ID/_shutdown

Source link

Internxt Cloud Storage Lifetime Subscription (20TB) for $499

iProVPN: 3-Year Subscription for $29

Apple AirPods Pro 2 with MagSafe USB-C Charging Case (Refurbished) for $159

Apple Mac mini M2 (Early 2023) 8GB RAM 256GB SSD (Refurbished) for $359

JBL Flip 6 Portable Bluetooth Speaker (Open Box) for $74

Preload Elasticsearch with your dataset

Internxt Cloud Storage Lifetime Subscription (20TB) for $499

iProVPN: 3-Year Subscription for $29

Apple AirPods Pro 2 with MagSafe USB-C Charging Case (Refurbished) for $159

Apple Mac mini M2 (Early 2023) 8GB RAM 256GB SSD (Refurbished) for $359

JBL Flip 6 Portable Bluetooth Speaker (Open Box) for $74

Share this:

Reader Interactions

Leave a ReplyCancel reply