OpenSearch 3.x adds neural/vector search capabilities that were previously only available in specialised vector databases. Combined with Ollama running locally for embedding generation, you can add semantic product search to Magento 2 without external API dependencies. I show the complete implementation: generating embeddings, storing k-NN vectors, hybrid search combining text and semantic scoring.
Vector search concepts
| Concept | Meaning | E-commerce use case |
|---|---|---|
| Embedding | Dense numeric vector representing text meaning | Represent product description as 384 numbers |
| k-NN search | Find k vectors nearest to a query vector | “Find products semantically similar to query” |
| Hybrid search | Combine text score + vector score | Exact keyword match + semantic relevance |
| Ollama | Local LLM server for embeddings | Generate embeddings without OpenAI API costs |
Setup – OpenSearch 3.x with neural plugin + Ollama
# .ddev/docker-compose.opensearch3.yaml
version: '3.6'
services:
opensearch:
image: opensearchproject/opensearch:3.0.0
environment:
- discovery.type=single-node
- DISABLE_SECURITY_PLUGIN=true
- plugins.neural_search.enabled=true
- "OPENSEARCH_JAVA_OPTS=-Xms1g -Xmx1g"
ports:
- "9200"
labels:
com.ddev.site-name: ${DDEV_SITENAME}
ollama:
image: ollama/ollama:latest
ports:
- "11434"
volumes:
- ollama_data:/root/.ollama
labels:
com.ddev.site-name: ${DDEV_SITENAME}
volumes:
ollama_data:
# Pull embedding model in Ollama
ddev exec -s ollama ollama pull nomic-embed-text
# nomic-embed-text: 274MB, 768-dimensional embeddings
# Verify
curl http://ollama:11434/api/embed -d '{"model":"nomic-embed-text","input":"red shoes"}'
# Returns: {"embeddings":[[0.023, -0.145, ...]]} (768 floats)
Create k-NN index in OpenSearch
<?php
declare(strict_types=1);
class OpenSearchVectorIndexManager
{
public function __construct(
private \GuzzleHttp\Client $client,
private string $baseUrl = 'http://opensearch:9200'
) {}
public function createProductIndex(string $indexName = 'magento_products_vector'): void
{
$this->client->put("{$this->baseUrl}/{$indexName}", [
'json' => [
'settings' => [
'index' => [
'knn' => true,
'knn.algo_param' => ['ef_search' => 100],
],
],
'mappings' => [
'properties' => [
'product_id' => ['type' => 'integer'],
'sku' => ['type' => 'keyword'],
'name' => ['type' => 'text', 'analyzer' => 'standard'],
'description' => ['type' => 'text'],
'price' => ['type' => 'float'],
'category_ids' => ['type' => 'integer'],
'status' => ['type' => 'integer'],
// Vector field for semantic search
'name_vector' => [
'type' => 'knn_vector',
'dimension' => 768, // nomic-embed-text output size
'method' => [
'name' => 'hnsw',
'engine' => 'lucene',
'parameters' => ['ef_construction' => 128, 'm' => 16],
],
],
],
],
],
]);
}
}
Embedding service – Ollama integration
<?php
declare(strict_types=1);
class OllamaEmbeddingService
{
public function __construct(
private \GuzzleHttp\Client $client,
private string $ollamaUrl = 'http://ollama:11434',
private string $model = 'nomic-embed-text'
) {}
/** @return float[] */
public function embed(string $text): array
{
$response = $this->client->post("{$this->ollamaUrl}/api/embed", [
'json' => ['model' => $this->model, 'input' => $text],
'timeout' => 30,
]);
$data = json_decode($response->getBody(), true, 512, JSON_THROW_ON_ERROR);
return $data['embeddings'][0] ?? [];
}
/** @return array - productId => embedding */
public function embedBatch(array $texts): array
{
// Ollama processes one at a time, but we can parallelise with ReactPHP
$results = [];
foreach ($texts as $id => $text) {
$results[$id] = $this->embed($text);
}
return $results;
}
}
Index products with vectors
<?php
declare(strict_types=1);
class ProductVectorIndexer
{
public function __construct(
private \Magento\Catalog\Model\ResourceModel\Product\CollectionFactory $collectionFactory,
private OllamaEmbeddingService $embedder,
private \GuzzleHttp\Client $openSearchClient,
private string $indexName = 'magento_products_vector',
private string $osUrl = 'http://opensearch:9200'
) {}
public function indexAll(int $batchSize = 50): void
{
$page = 1;
do {
$collection = $this->collectionFactory->create();
$collection->addAttributeToSelect(['sku', 'name', 'description', 'price', 'status']);
$collection->addAttributeToFilter('status', 1);
$collection->setPageSize($batchSize);
$collection->setCurPage($page);
$collection->load();
$items = $collection->getItems();
if (empty($items)) break;
$this->indexBatch($items);
$page++;
echo "Indexed page {$page}\n";
} while (count($items) === $batchSize);
}
private function indexBatch(array $products): void
{
$body = '';
foreach ($products as $product) {
$text = $product->getName() . '. ' . strip_tags($product->getDescription() ?? '');
$vector = $this->embedder->embed(substr($text, 0, 500)); // limit text length
$meta = json_encode(['index' => ['_index' => $this->indexName, '_id' => $product->getId()]]);
$doc = json_encode([
'product_id' => (int)$product->getId(),
'sku' => $product->getSku(),
'name' => $product->getName(),
'description' => strip_tags($product->getDescription() ?? ''),
'price' => (float)$product->getPrice(),
'status' => (int)$product->getStatus(),
'name_vector' => $vector,
]);
$body .= $meta . "\n" . $doc . "\n";
}
$this->openSearchClient->post("{$this->osUrl}/_bulk", [
'headers' => ['Content-Type' => 'application/x-ndjson'],
'body' => $body,
]);
}
}
Hybrid search – text + semantic
<?php
class HybridProductSearch
{
public function __construct(
private OllamaEmbeddingService $embedder,
private \GuzzleHttp\Client $client,
private string $osUrl = 'http://opensearch:9200',
private string $indexName = 'magento_products_vector'
) {}
public function search(string $query, int $size = 10): array
{
$queryVector = $this->embedder->embed($query);
// Hybrid query: BM25 text score + k-NN vector score
$response = $this->client->post("{$this->osUrl}/{$this->indexName}/_search", [
'json' => [
'size' => $size,
'query' => [
'hybrid' => [
'queries' => [
// Text search (BM25)
[
'multi_match' => [
'query' => $query,
'fields' => ['name^3', 'description'],
],
],
// Semantic search (k-NN)
[
'knn' => [
'name_vector' => [
'vector' => $queryVector,
'k' => $size * 2,
],
],
],
],
],
],
'search_pipeline' => 'nlp-search-pipeline',
],
]);
$data = json_decode($response->getBody(), true);
return array_map(
fn($hit) => $hit['_source'],
$data['hits']['hits'] ?? []
);
}
}
// Usage
$search = new HybridProductSearch($embedder, $client);
$results = $search->search('comfortable running shoes for wide feet');
// Returns products semantically related to the query,
// even if the exact words don't appear in the product description
Summary
OpenSearch 3.x + Ollama enables semantic product search without cloud API costs or external vector database infrastructure. The embedding model runs locally; the vector index lives in the same OpenSearch instance already used for Magento search. Hybrid search combines the precision of BM25 keyword matching with the semantic understanding of k-NN vector search. The practical result: customers who search “comfortable running shoes for wide feet” find relevant products even when that exact phrase does not appear in the catalogue.
