src/lib/embedding.ts
Annotation non disponible
Lance npm run annotate (nécessite ANTHROPIC_API_KEY dans .env.local) pour générer une annotation française par Claude Haiku 4.5.
5 exports
embedImageembeddingToPgVectorcosineSimilarityImageEmbeddingEmbeddingResult
Code source· typescript
// V2 Search par image foundation — CLIP embeddings via HuggingFace Inference API.
// L'embedding 512-dim sert au cosine similarity dans pgvector pour recherche
// produit visuelle 1000× plus rapide que la V1 (Gemini per-query).
import { assertImageUrlSafe } from "@/lib/snap-to-list";
const HF_TOKEN = process.env.HUGGINGFACE_TOKEN;
const HF_MODEL = "openai/clip-vit-base-patch32";
const EMBED_DIM = 512;
export type ImageEmbedding = number[]; // length 512
export type EmbeddingResult = {
embedding: ImageEmbedding;
version: string;
durationMs: number;
};
/**
* Génère l'embedding CLIP 512-dim d'une image.
* Throw si HF_TOKEN absent ou erreur API.
*/
export async function embedImage(imageUrl: string): Promise<EmbeddingResult> {
assertImageUrlSafe(imageUrl);
if (!HF_TOKEN) throw new Error("HUGGINGFACE_TOKEN non configuré");
const t0 = Date.now();
const res = await fetch(`https://api-inference.huggingface.co/models/${HF_MODEL}`, {
method: "POST",
headers: {
Authorization: `Bearer ${HF_TOKEN}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ inputs: imageUrl, options: { wait_for_model: true } }),
});
if (!res.ok) {
const txt = await res.text().catch(() => "");
throw new Error(`HF embed failed ${res.status}: ${txt.slice(0, 200)}`);
}
const data = (await res.json()) as number[] | { feature?: number[]; embeddings?: number[] };
let embedding: number[];
if (Array.isArray(data)) embedding = data;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
else if (Array.isArray((data as any).feature)) embedding = (data as any).feature;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
else if (Array.isArray((data as any).embeddings)) embedding = (data as any).embeddings;
else throw new Error("HF response shape inattendue");
if (embedding.length !== EMBED_DIM) {
throw new Error(`Embedding dim ${embedding.length} != ${EMBED_DIM}`);
}
return {
embedding,
version: "clip-vit-b-32-v1",
durationMs: Date.now() - t0,
};
}
/** Convertit un embedding en format Postgres vector literal "[0.1,0.2,...]" */
export function embeddingToPgVector(emb: ImageEmbedding): string {
return `[${emb.join(",")}]`;
}
/** Cosine similarity entre 2 vecteurs (utile en debug). */
export function cosineSimilarity(a: ImageEmbedding, b: ImageEmbedding): number {
if (a.length !== b.length) throw new Error("dim mismatch");
let dot = 0;
let na = 0;
let nb = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
na += a[i] * a[i];
nb += b[i] * b[i];
}
return dot / (Math.sqrt(na) * Math.sqrt(nb));
}
// V2 Search par image foundation — CLIP embeddings via HuggingFace Inference API.
// L'embedding 512-dim sert au cosine similarity dans pgvector pour recherche
// produit visuelle 1000× plus rapide que la V1 (Gemini per-query).
import { assertImageUrlSafe } from "@/lib/snap-to-list";
const HF_TOKEN = process.env.HUGGINGFACE_TOKEN;
const HF_MODEL = "openai/clip-vit-base-patch32";
const EMBED_DIM = 512;
export type ImageEmbedding = number[]; // length 512
export type EmbeddingResult = {
embedding: ImageEmbedding;
version: string;
durationMs: number;
};
/**
* Génère l'embedding CLIP 512-dim d'une image.
* Throw si HF_TOKEN absent ou erreur API.
*/
export async function embedImage(imageUrl: string): Promise<EmbeddingResult> {
assertImageUrlSafe(imageUrl);
if (!HF_TOKEN) throw new Error("HUGGINGFACE_TOKEN non configuré");
const t0 = Date.now();
const res = await fetch(`https://api-inference.huggingface.co/models/${HF_MODEL}`, {
method: "POST",
headers: {
Authorization: `Bearer ${HF_TOKEN}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ inputs: imageUrl, options: { wait_for_model: true } }),
});
if (!res.ok) {
const txt = await res.text().catch(() => "");
throw new Error(`HF embed failed ${res.status}: ${txt.slice(0, 200)}`);
}
const data = (await res.json()) as number[] | { feature?: number[]; embeddings?: number[] };
let embedding: number[];
if (Array.isArray(data)) embedding = data;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
else if (Array.isArray((data as any).feature)) embedding = (data as any).feature;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
else if (Array.isArray((data as any).embeddings)) embedding = (data as any).embeddings;
else throw new Error("HF response shape inattendue");
if (embedding.length !== EMBED_DIM) {
throw new Error(`Embedding dim ${embedding.length} != ${EMBED_DIM}`);
}
return {
embedding,
version: "clip-vit-b-32-v1",
durationMs: Date.now() - t0,
};
}
/** Convertit un embedding en format Postgres vector literal "[0.1,0.2,...]" */
export function embeddingToPgVector(emb: ImageEmbedding): string {
return `[${emb.join(",")}]`;
}
/** Cosine similarity entre 2 vecteurs (utile en debug). */
export function cosineSimilarity(a: ImageEmbedding, b: ImageEmbedding): number {
if (a.length !== b.length) throw new Error("dim mismatch");
let dot = 0;
let na = 0;
let nb = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
na += a[i] * a[i];
nb += b[i] * b[i];
}
return dot / (Math.sqrt(na) * Math.sqrt(nb));
}