Laravel 13 与 PostgreSQL pgvector 完全指南
摘要
PostgreSQL 的 pgvector 扩展为 Laravel 13 的向量搜索提供了强大的数据库支持。本文将深入讲解 Laravel 13 与 pgvector 的集成,包括:
- pgvector 扩展安装与配置
- Laravel 模型与向量字段
- 向量索引与性能优化
- 相似性搜索查询
- 与 Laravel AI SDK 配合
- 实战案例:构建推荐系统
本文适合希望构建向量搜索功能的 Laravel 开发者。
1. pgvector 概述
1.1 什么是 pgvector
pgvector 是 PostgreSQL 的开源扩展,提供向量存储和相似性搜索功能:
- 存储高维向量
- 执行相似性搜索
- 支持多种距离度量
- 与 PostgreSQL 事务兼容
1.2 支持的距离度量
| 度量 | 描述 | 使用场景 |
|---|
| L2 距离 | 欧几里得距离 | 图像相似性 |
| 内积 | 点积 | 推荐系统 |
| 余弦距离 | 角度相似性 | 文本语义 |
2. 安装与配置
2.1 安装 pgvector
1 2 3 4 5 6 7 8 9 10 11
| brew install pgvector
sudo apt install postgresql-16-pgvector
docker run -d \ --name postgres-pgvector \ -e POSTGRES_PASSWORD=secret \ ankane/pgvector
|
2.2 启用扩展
1
| CREATE EXTENSION IF NOT EXISTS vector;
|
2.3 Laravel 配置
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| 'pgsql' => [ 'driver' => 'pgsql', 'url' => env('DATABASE_URL'), 'host' => env('DB_HOST', '127.0.0.1'), 'port' => env('DB_PORT', '5432'), 'database' => env('DB_DATABASE', 'forge'), 'username' => env('DB_USERNAME', 'forge'), 'password' => env('DB_PASSWORD', ''), 'charset' => 'utf8', 'prefix' => '', 'search_path' => 'public', 'sslmode' => 'prefer', ],
|
3. 数据库迁移
3.1 创建向量表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| <?php
use Illuminate\Database\Migrations\Migration; use Illuminate\Database\Schema\Blueprint; use Illuminate\Support\Facades\Schema; use Illuminate\Support\Facades\DB;
return new class extends Migration { public function up(): void { DB::statement('CREATE EXTENSION IF NOT EXISTS vector'); Schema::create('documents', function (Blueprint $table) { $table->id(); $table->string('title'); $table->text('content'); $table->vector('embedding', 1536); $table->timestamps(); }); DB::statement('CREATE INDEX documents_embedding_idx ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100)'); } public function down(): void { Schema::dropIfExists('documents'); DB::statement('DROP EXTENSION IF EXISTS vector'); } };
|
3.2 添加向量列
1 2 3
| Schema::table('products', function (Blueprint $table) { $table->vector('embedding', 1536)->nullable(); });
|
4. 模型定义
4.1 基本模型
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| <?php
namespace App\Models;
use Illuminate\Database\Eloquent\Model;
class Document extends Model { protected $fillable = ['title', 'content', 'embedding']; protected $casts = [ 'embedding' => 'vector', ]; }
|
4.2 自动生成嵌入
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| <?php
namespace App\Models;
use Illuminate\Database\Eloquent\Model; use Laravel\Ai\Embeddings;
class Document extends Model { protected $fillable = ['title', 'content', 'embedding']; protected $casts = [ 'embedding' => 'vector', ]; protected static function booted(): void { static::creating(function (Document $document) { if (empty($document->embedding)) { $document->embedding = Embeddings::from($document->content)->generate()->vector; } }); static::updating(function (Document $document) { if ($document->isDirty('content')) { $document->embedding = Embeddings::from($document->content)->generate()->vector; } }); } }
|
5. 向量查询
5.1 相似性搜索
1 2 3 4 5 6 7 8 9 10
| use App\Models\Document; use Laravel\Ai\Embeddings;
$query = 'search query'; $queryEmbedding = Embeddings::from($query)->generate()->vector;
$documents = Document::query() ->whereVectorSimilarTo('embedding', $queryEmbedding) ->limit(10) ->get();
|
5.2 使用原始 SQL
1 2 3 4 5 6
| $documents = DB::table('documents') ->select('*') ->selectRaw('embedding <=> ? AS distance', [$queryEmbedding]) ->orderBy('distance') ->limit(10) ->get();
|
5.3 余弦相似度
1 2 3 4 5 6
| $documents = DB::table('documents') ->select('*') ->selectRaw('1 - (embedding <=> ?) AS similarity', [$queryEmbedding]) ->orderByDesc('similarity') ->limit(10) ->get();
|
6. 向量索引
6.1 IVFFlat 索引
1 2 3
| CREATE INDEX documents_embedding_idx ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
6.2 HNSW 索引
1 2 3
| CREATE INDEX documents_embedding_hnsw_idx ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64);
|
6.3 索引选择
| 索引类型 | 构建速度 | 查询速度 | 内存使用 |
|---|
| IVFFlat | 快 | 中等 | 低 |
| HNSW | 慢 | 快 | 高 |
7. 实战案例:推荐系统
7.1 产品推荐
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
| <?php
namespace App\Services;
use App\Models\Product; use Laravel\Ai\Embeddings;
class RecommendationService { public function getSimilarProducts(Product $product, int $limit = 10) { return Product::query() ->where('id', '!=', $product->id) ->whereVectorSimilarTo('embedding', $product->embedding) ->limit($limit) ->get(); } public function getPersonalizedRecommendations(int $userId, int $limit = 10) { $userEmbedding = $this->getUserEmbedding($userId); return Product::query() ->whereVectorSimilarTo('embedding', $userEmbedding) ->limit($limit) ->get(); } private function getUserEmbedding(int $userId): array { $purchases = Purchase::where('user_id', $userId) ->with('product') ->get(); $embeddings = $purchases->map(fn($p) => $p->product->embedding); return $this->averageEmbeddings($embeddings); } private function averageEmbeddings($embeddings): array { $count = count($embeddings); $dimensions = count($embeddings[0]); $average = array_fill(0, $dimensions, 0); foreach ($embeddings as $embedding) { for ($i = 0; $i < $dimensions; $i++) { $average[$i] += $embedding[$i]; } } return array_map(fn($v) => $v / $count, $average); } }
|
7.2 内容推荐
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| <?php
namespace App\Services;
use App\Models\Article; use Laravel\Ai\Embeddings;
class ContentRecommendationService { public function getRelatedArticles(Article $article, int $limit = 5) { return Article::query() ->where('id', '!=', $article->id) ->where('category_id', $article->category_id) ->whereVectorSimilarTo('embedding', $article->embedding) ->withSimilarityScore() ->having('similarity', '>', 0.7) ->orderByDesc('similarity') ->limit($limit) ->get(); } }
|
8. 性能优化
8.1 批量插入
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| use Laravel\Ai\Embeddings;
class DocumentImportService { public function import(array $documents): void { $texts = array_column($documents, 'content'); $embeddings = Embeddings::batch($texts)->generate(); $data = []; foreach ($documents as $index => $doc) { $data[] = [ 'title' => $doc['title'], 'content' => $doc['content'], 'embedding' => $embeddings[$index]->vector, 'created_at' => now(), 'updated_at' => now(), ]; } Document::insert($data); } }
|
8.2 分区表
1 2
| CREATE TABLE documents_2026 PARTITION OF documents FOR VALUES FROM ('2026-01-01') TO ('2027-01-01');
|
8.3 查询优化
1 2 3 4 5 6 7
| $documents = Document::query() ->where('category_id', $categoryId) ->where('status', 'published') ->whereVectorSimilarTo('embedding', $queryEmbedding) ->limit(10) ->get();
|
9. 总结
Laravel 13 与 pgvector 的集成为构建向量搜索应用提供了强大支持:
- 原生支持:Laravel 13 内置向量查询方法
- 高性能:pgvector 提供高效的相似性搜索
- 灵活索引:支持 IVFFlat 和 HNSW 索引
- 与 AI SDK 配合:无缝生成嵌入
通过本指南,您已经掌握了 pgvector 的核心用法,可以开始构建向量搜索应用了。
参考资料