src/lib/embedding.ts

function·app·2.6 KB · 80 lignes· Voir l'itinéraire
Annotation non disponible

Lance npm run annotate (nécessite ANTHROPIC_API_KEY dans .env.local) pour générer une annotation française par Claude Haiku 4.5.

5 exports

embedImageembeddingToPgVectorcosineSimilarityImageEmbeddingEmbeddingResult

Code source· typescript

// V2 Search par image foundation — CLIP embeddings via HuggingFace Inference API.
// L'embedding 512-dim sert au cosine similarity dans pgvector pour recherche
// produit visuelle 1000× plus rapide que la V1 (Gemini per-query).

import { assertImageUrlSafe } from "@/lib/snap-to-list";

const HF_TOKEN = process.env.HUGGINGFACE_TOKEN;
const HF_MODEL = "openai/clip-vit-base-patch32";
const EMBED_DIM = 512;

export type ImageEmbedding = number[]; // length 512

export type EmbeddingResult = {
  embedding: ImageEmbedding;
  version: string;
  durationMs: number;
};

/**
 * Génère l'embedding CLIP 512-dim d'une image.
 * Throw si HF_TOKEN absent ou erreur API.
 */
export async function embedImage(imageUrl: string): Promise<EmbeddingResult> {
  assertImageUrlSafe(imageUrl);
  if (!HF_TOKEN) throw new Error("HUGGINGFACE_TOKEN non configuré");

  const t0 = Date.now();
  const res = await fetch(`https://api-inference.huggingface.co/models/${HF_MODEL}`, {
    method: "POST",
    headers: {
      Authorization: `Bearer ${HF_TOKEN}`,
      "Content-Type": "application/json",
    },
    body: JSON.stringify({ inputs: imageUrl, options: { wait_for_model: true } }),
  });

  if (!res.ok) {
    const txt = await res.text().catch(() => "");
    throw new Error(`HF embed failed ${res.status}: ${txt.slice(0, 200)}`);
  }

  const data = (await res.json()) as number[] | { feature?: number[]; embeddings?: number[] };
  let embedding: number[];
  if (Array.isArray(data)) embedding = data;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  else if (Array.isArray((data as any).feature)) embedding = (data as any).feature;
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
  else if (Array.isArray((data as any).embeddings)) embedding = (data as any).embeddings;
  else throw new Error("HF response shape inattendue");

  if (embedding.length !== EMBED_DIM) {
    throw new Error(`Embedding dim ${embedding.length} != ${EMBED_DIM}`);
  }

  return {
    embedding,
    version: "clip-vit-b-32-v1",
    durationMs: Date.now() - t0,
  };
}

/** Convertit un embedding en format Postgres vector literal "[0.1,0.2,...]" */
export function embeddingToPgVector(emb: ImageEmbedding): string {
  return `[${emb.join(",")}]`;
}

/** Cosine similarity entre 2 vecteurs (utile en debug). */
export function cosineSimilarity(a: ImageEmbedding, b: ImageEmbedding): number {
  if (a.length !== b.length) throw new Error("dim mismatch");
  let dot = 0;
  let na = 0;
  let nb = 0;
  for (let i = 0; i < a.length; i++) {
    dot += a[i] * b[i];
    na += a[i] * a[i];
    nb += b[i] * b[i];
  }
  return dot / (Math.sqrt(na) * Math.sqrt(nb));
}