PostgreSQL向量数据库支持:修订间差异
跳转到导航
跳转到搜索
| 第84行: | 第84行: | ||
=Amazon RDS for PostgreSQL - AWS pgvector= | =Amazon RDS for PostgreSQL - AWS pgvector= | ||
== tf create pg== | |||
<pre> | |||
cat main.tf | |||
provider "aws" { | |||
region = "ap-east-1" | |||
} | |||
# 1. 创建 VPC | |||
resource "aws_vpc" "rds_vpc" { | |||
cidr_block = "10.0.0.0/16" | |||
enable_dns_support = true | |||
enable_dns_hostnames = true | |||
tags = { | |||
Name = "rds-vpc" | |||
} | |||
} | |||
# 2. 创建两个子网(RDS 至少需要2个子网) | |||
resource "aws_subnet" "rds_subnet_1" { | |||
vpc_id = aws_vpc.rds_vpc.id | |||
cidr_block = "10.0.1.0/24" | |||
availability_zone = "ap-east-1a" | |||
map_public_ip_on_launch = true | |||
tags = { | |||
Name = "rds-subnet-1" | |||
} | |||
} | |||
resource "aws_subnet" "rds_subnet_2" { | |||
vpc_id = aws_vpc.rds_vpc.id | |||
cidr_block = "10.0.2.0/24" | |||
availability_zone = "ap-east-1b" | |||
map_public_ip_on_launch = true | |||
tags = { | |||
Name = "rds-subnet-2" | |||
} | |||
} | |||
# 3. 创建 Internet Gateway(让数据库可公网访问) | |||
resource "aws_internet_gateway" "rds_igw" { | |||
vpc_id = aws_vpc.rds_vpc.id | |||
tags = { | |||
Name = "rds-igw" | |||
} | |||
} | |||
# 4. 创建 Route Table 并关联子网 | |||
resource "aws_route_table" "rds_rt" { | |||
vpc_id = aws_vpc.rds_vpc.id | |||
route { | |||
cidr_block = "0.0.0.0/0" | |||
gateway_id = aws_internet_gateway.rds_igw.id | |||
} | |||
tags = { | |||
Name = "rds-route-table" | |||
} | |||
} | |||
resource "aws_route_table_association" "rds_rta_1" { | |||
subnet_id = aws_subnet.rds_subnet_1.id | |||
route_table_id = aws_route_table.rds_rt.id | |||
} | |||
resource "aws_route_table_association" "rds_rta_2" { | |||
subnet_id = aws_subnet.rds_subnet_2.id | |||
route_table_id = aws_route_table.rds_rt.id | |||
} | |||
# 5. 安全组,允许本地访问 PostgreSQL (5432) | |||
resource "aws_security_group" "rds_sg" { | |||
name = "rds-postgres-sg" | |||
description = "Allow PostgreSQL inbound traffic" | |||
vpc_id = aws_vpc.rds_vpc.id | |||
ingress { | |||
description = "PostgreSQL from my IP" | |||
from_port = 5432 | |||
to_port = 5432 | |||
protocol = "tcp" | |||
cidr_blocks = ["107.172.217.42/32"] # ⚠️ 改成你本机公网IP | |||
} | |||
egress { | |||
from_port = 0 | |||
to_port = 0 | |||
protocol = "-1" | |||
cidr_blocks = ["0.0.0.0/0"] | |||
} | |||
tags = { | |||
Name = "rds-sg" | |||
} | |||
} | |||
# 6. RDS Subnet Group | |||
resource "aws_db_subnet_group" "rds_subnet_group" { | |||
name = "rds-subnet-group" | |||
subnet_ids = [aws_subnet.rds_subnet_1.id, aws_subnet.rds_subnet_2.id] | |||
tags = { | |||
Name = "rds-subnet-group" | |||
} | |||
} | |||
# 7. 创建 RDS PostgreSQL 13.18 | |||
resource "aws_db_instance" "postgres" { | |||
identifier = "my-postgres-db" | |||
allocated_storage = 20 | |||
engine = "postgres" | |||
engine_version = "13.18" | |||
instance_class = "db.t3.micro" | |||
username = "myadmin" | |||
password = "YourPassword1234" # ⚠️ 建议改用 SSM/Secrets Manager | |||
db_subnet_group_name = aws_db_subnet_group.rds_subnet_group.name | |||
vpc_security_group_ids = [aws_security_group.rds_sg.id] | |||
publicly_accessible = true | |||
skip_final_snapshot = true | |||
tags = { | |||
Name = "rds-postgres-13-18" | |||
} | |||
} | |||
#################### | |||
# 输出信息 | |||
#################### | |||
output "rds_endpoint" { | |||
value = aws_db_instance.postgres.endpoint | |||
} | |||
root@racknerd-0955f20:~/tfpg# | |||
</> | |||
== check== | |||
<pre> | |||
f20:~/tfpg# psql -h my-postgres-db.chysey0o0hmr.ap-east-1.rds.amazonaws.com -U myadmin -d postgres | |||
Password for user myadmin: | |||
psql (13.22 (Ubuntu 13.22-1.pgdg22.04+1), server 13.18) | |||
SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off) | |||
Type "help" for help. | |||
postgres=> CREATE EXTENSION IF NOT EXISTS vector; | |||
CREATE EXTENSION | |||
postgres=> CREATE TABLE items ( | |||
id bigserial PRIMARY KEY, | |||
embedding vector(3) -- 三维向量 | |||
); | |||
CREATE TABLE | |||
postgres=> INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]'); | |||
INSERT 0 2 | |||
postgres=> -- 欧氏距离最近 | |||
SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 1; | |||
id | embedding | |||
----+----------- | |||
1 | [1,2,3] | |||
(1 row) | |||
postgres=> | |||
</pre> | |||
[[category:devops]] | [[category:devops]] | ||
2025年8月20日 (三) 13:39的版本
起因 AI so hit
LLM version 13.18
PostgreSQL
install
https://www.postgresql.org/download/linux/debian/
PostgreSQL 内置支持
从 PostgreSQL 16 开始,内置了对向量操作的支持:
新增 vector数据类型
支持基本的向量运算
pgvector 扩展
pgvector 是 PostgreSQL 最流行的向量搜索扩展,它允许:
on u24.04
Ubuntu 24.04
sudo apt install -y curl gnupg lsb-release
2002 curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo gpg --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg
2003 echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list
2004 sudo apt update
2006 psgl --version
2007 sudo apt install -y postgresql-13 postgresql-client-13 postgresql-server-dev-13
2010 psql --version
2011 sudo systemctl start postgresql
2012 sudo apt install -y git make gcc
2015 git clone https://github.com/pgvector/pgvector.git
2016 cd pgvector/
2018 git checkout v0.7.0
2020 make
2022 make install
2023 sudo -i -u postgres
2024 sudo systemctl start postgresql
2025 sudo -i -u postgres
postgres@racknerd-0955f20:~$ sudo systemctl start postgresql
[sudo] password for postgres:
sudo: a password is required
postgres@racknerd-0955f20:~$
logout
root@racknerd-0955f20:~/pgvector# sudo systemctl start postgresql
root@racknerd-0955f20:~/pgvector# sudo -i -u postgres
postgres@racknerd-0955f20:~$ psql
psql (13.22 (Ubuntu 13.22-1.pgdg22.04+1))
Type "help" for help.
postgres=# CREATE DATABASE testdb;
ERROR: database "testdb" already exists
postgres=# \c testdb
You are now connected to database "testdb" as user "postgres".
testdb=# CREATE EXTENSION vector;
CREATE EXTENSION
testdb=# CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3));
CREATE TABLE
testdb=# INSERT INTO items (embedding) VALUES ('[1,2,3]');
INSERT 0 1
testdb=# SELECT * FROM items;
id | embedding
----+-----------
1 | [1,2,3]
(1 row)
testdb=#
Amazon RDS for PostgreSQL - AWS pgvector
tf create pg
cat main.tf
provider "aws" {
region = "ap-east-1"
}
# 1. 创建 VPC
resource "aws_vpc" "rds_vpc" {
cidr_block = "10.0.0.0/16"
enable_dns_support = true
enable_dns_hostnames = true
tags = {
Name = "rds-vpc"
}
}
# 2. 创建两个子网(RDS 至少需要2个子网)
resource "aws_subnet" "rds_subnet_1" {
vpc_id = aws_vpc.rds_vpc.id
cidr_block = "10.0.1.0/24"
availability_zone = "ap-east-1a"
map_public_ip_on_launch = true
tags = {
Name = "rds-subnet-1"
}
}
resource "aws_subnet" "rds_subnet_2" {
vpc_id = aws_vpc.rds_vpc.id
cidr_block = "10.0.2.0/24"
availability_zone = "ap-east-1b"
map_public_ip_on_launch = true
tags = {
Name = "rds-subnet-2"
}
}
# 3. 创建 Internet Gateway(让数据库可公网访问)
resource "aws_internet_gateway" "rds_igw" {
vpc_id = aws_vpc.rds_vpc.id
tags = {
Name = "rds-igw"
}
}
# 4. 创建 Route Table 并关联子网
resource "aws_route_table" "rds_rt" {
vpc_id = aws_vpc.rds_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.rds_igw.id
}
tags = {
Name = "rds-route-table"
}
}
resource "aws_route_table_association" "rds_rta_1" {
subnet_id = aws_subnet.rds_subnet_1.id
route_table_id = aws_route_table.rds_rt.id
}
resource "aws_route_table_association" "rds_rta_2" {
subnet_id = aws_subnet.rds_subnet_2.id
route_table_id = aws_route_table.rds_rt.id
}
# 5. 安全组,允许本地访问 PostgreSQL (5432)
resource "aws_security_group" "rds_sg" {
name = "rds-postgres-sg"
description = "Allow PostgreSQL inbound traffic"
vpc_id = aws_vpc.rds_vpc.id
ingress {
description = "PostgreSQL from my IP"
from_port = 5432
to_port = 5432
protocol = "tcp"
cidr_blocks = ["107.172.217.42/32"] # ⚠️ 改成你本机公网IP
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "rds-sg"
}
}
# 6. RDS Subnet Group
resource "aws_db_subnet_group" "rds_subnet_group" {
name = "rds-subnet-group"
subnet_ids = [aws_subnet.rds_subnet_1.id, aws_subnet.rds_subnet_2.id]
tags = {
Name = "rds-subnet-group"
}
}
# 7. 创建 RDS PostgreSQL 13.18
resource "aws_db_instance" "postgres" {
identifier = "my-postgres-db"
allocated_storage = 20
engine = "postgres"
engine_version = "13.18"
instance_class = "db.t3.micro"
username = "myadmin"
password = "YourPassword1234" # ⚠️ 建议改用 SSM/Secrets Manager
db_subnet_group_name = aws_db_subnet_group.rds_subnet_group.name
vpc_security_group_ids = [aws_security_group.rds_sg.id]
publicly_accessible = true
skip_final_snapshot = true
tags = {
Name = "rds-postgres-13-18"
}
}
####################
# 输出信息
####################
output "rds_endpoint" {
value = aws_db_instance.postgres.endpoint
}
root@racknerd-0955f20:~/tfpg#
</>
== check==
<pre>
f20:~/tfpg# psql -h my-postgres-db.chysey0o0hmr.ap-east-1.rds.amazonaws.com -U myadmin -d postgres
Password for user myadmin:
psql (13.22 (Ubuntu 13.22-1.pgdg22.04+1), server 13.18)
SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off)
Type "help" for help.
postgres=> CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION
postgres=> CREATE TABLE items (
id bigserial PRIMARY KEY,
embedding vector(3) -- 三维向量
);
CREATE TABLE
postgres=> INSERT INTO items (embedding) VALUES ('[1,2,3]'), ('[4,5,6]');
INSERT 0 2
postgres=> -- 欧氏距离最近
SELECT * FROM items ORDER BY embedding <-> '[3,1,2]' LIMIT 1;
id | embedding
----+-----------
1 | [1,2,3]
(1 row)
postgres=>