Iterator and Generator are two related but distinct mechanisms in PHP. Iterator is a GoF pattern that gives sequential access to a collection without exposing its internal structure. Generator is a PHP language feature that creates an Iterator using yield – without storing all values in memory at once. I show both, benchmark the memory difference, and apply them to large-scale Magento product processing.
Iterator pattern – GoF implementation
<?php
declare(strict_types=1);
// A custom Iterator - traverses a product collection page by page
class PaginatedProductIterator implements \Iterator
{
private array $currentPage = [];
private int $currentIndex = 0;
private int $pageNumber = 1;
private bool $finished = false;
public function __construct(
private \Magento\Catalog\Api\ProductRepositoryInterface $productRepository,
private \Magento\Framework\Api\SearchCriteriaBuilder $searchCriteriaBuilder,
private int $pageSize = 100
) {}
public function rewind(): void
{
$this->pageNumber = 1;
$this->currentIndex = 0;
$this->finished = false;
$this->loadPage();
}
public function current(): \Magento\Catalog\Api\Data\ProductInterface
{
return $this->currentPage[$this->currentIndex];
}
public function key(): int
{
return ($this->pageNumber - 1) * $this->pageSize + $this->currentIndex;
}
public function next(): void
{
$this->currentIndex++;
if ($this->currentIndex >= count($this->currentPage)) {
$this->pageNumber++;
$this->currentIndex = 0;
$this->loadPage();
}
}
public function valid(): bool
{
return !$this->finished && isset($this->currentPage[$this->currentIndex]);
}
private function loadPage(): void
{
$sc = $this->searchCriteriaBuilder
->setPageSize($this->pageSize)
->setCurrentPage($this->pageNumber)
->create();
$result = $this->productRepository->getList($sc);
$items = $result->getItems();
if (empty($items)) {
$this->finished = true;
$this->currentPage = [];
} else {
$this->currentPage = array_values($items);
}
}
}
// Usage - processes all products without loading them all into memory at once
$iterator = new PaginatedProductIterator($productRepository, $searchCriteriaBuilder, 100);
foreach ($iterator as $product) {
$this->processProduct($product); // only 100 products in memory at a time
}
IteratorAggregate – simpler approach
<?php
declare(strict_types=1);
// IteratorAggregate - simpler: just return an Iterator or Generator
class ProductExportCollection implements \IteratorAggregate
{
public function __construct(
private \Magento\Framework\App\ResourceConnection $resourceConnection,
private int $batchSize = 500
) {}
public function getIterator(): \Traversable
{
return $this->generateProducts();
}
private function generateProducts(): \Generator
{
$connection = $this->resourceConnection->getConnection();
$lastId = 0;
while (true) {
$rows = $connection->fetchAll(
$connection->select()
->from('catalog_product_flat_1')
->where('entity_id > ?', $lastId)
->where('status = ?', 1)
->order('entity_id ASC')
->limit($this->batchSize)
);
if (empty($rows)) break;
foreach ($rows as $row) {
yield $row['entity_id'] => $row; // key => value
}
$lastId = end($rows)['entity_id'];
// Free memory after each batch
gc_collect_cycles();
}
}
}
// Usage with IteratorAggregate
$collection = new ProductExportCollection($resourceConnection, 500);
foreach ($collection as $productId => $productData) {
$this->exportProduct($productId, $productData);
}
Generator – lazy evaluation with yield
<?php
declare(strict_types=1);
// Generator function - produces values on demand
function generateFibonacci(): \Generator
{
[$a, $b] = [0, 1];
while (true) {
yield $a;
[$a, $b] = [$b, $a + $b];
}
}
// Only compute what you need - infinite sequence, finite processing
$fib = generateFibonacci();
for ($i = 0; $i < 10; $i++) {
echo $fib->current() . ' ';
$fib->next();
}
// 0 1 1 2 3 5 8 13 21 34
// Generator with send() - bidirectional communication
function csvRowProcessor(string $filename): \Generator
{
$file = fopen($filename, 'r');
$headers = fgetcsv($file); // skip header row
while (($row = fgetcsv($file)) !== false) {
$data = array_combine($headers, $row);
$feedback = yield $data; // send data out, receive feedback
if ($feedback === 'skip') {
continue; // caller told us to skip this row
}
}
fclose($file);
}
$processor = csvRowProcessor('/tmp/products.csv');
foreach ($processor as $row) {
if (empty($row['sku'])) {
$processor->send('skip'); // send instruction back to generator
continue;
}
$this->importRow($row);
}
Memory benchmark
<?php
$count = 100_000;
// Array approach - loads everything into memory at once
$start = memory_get_usage();
$data = [];
for ($i = 0; $i < $count; $i++) {
$data[] = ['id' => $i, 'value' => str_repeat('x', 100)];
}
$arrayMemory = memory_get_usage() - $start;
// Generator approach - produces one item at a time
function generateData(int $count): \Generator {
for ($i = 0; $i < $count; $i++) {
yield ['id' => $i, 'value' => str_repeat('x', 100)];
}
}
$start = memory_get_usage();
$gen = generateData($count);
$genMemoryStart = memory_get_usage() - $start;
$start = memory_get_usage();
foreach ($gen as $item) { /* process */ }
$genMemoryPeak = memory_get_peak_usage(true) - $start;
echo 'Array (all in memory): ' . number_format($arrayMemory / 1024 / 1024, 1) . " MB\n"; // ~85 MB
echo 'Generator (lazy): ' . number_format($genMemoryStart / 1024, 1) . " KB\n"; // ~0.5 KB
// Generator uses essentially no memory regardless of how many items it produces
Pipeline of generators
<?php
// Chain generators into a processing pipeline - each step is lazy
function readCsv(string $file): \Generator
{
$f = fopen($file, 'r');
$headers = fgetcsv($f);
while (($row = fgetcsv($f)) !== false) {
yield array_combine($headers, $row);
}
fclose($f);
}
function filterActive(\Generator $source): \Generator
{
foreach ($source as $row) {
if ($row['status'] === 'active') yield $row;
}
}
function normalise(\Generator $source): \Generator
{
foreach ($source as $row) {
yield [...$row, 'sku' => strtoupper(trim($row['sku']))];
}
}
// Chain: read -> filter -> normalise - only one row in memory at a time
$pipeline = normalise(filterActive(readCsv('/tmp/products.csv')));
foreach ($pipeline as $product) {
$this->saveProduct($product);
}
Summary
Iterator and Generator solve the same problem – lazy sequential access – with different levels of abstraction. Iterator is explicit and fully controllable, best for complex traversal logic. Generator is concise and perfect for linear pipelines. The memory advantage is dramatic: processing 100,000 records uses 85MB with arrays and under 1MB with a generator. In Magento 2 this pattern is essential for CLI import/export commands that process large catalogues.
