Examples - Vector Panda

RAG: Retrieval-Augmented Generation

LLM Integration

Embed documents with OpenAI, store them in Vector Panda, and retrieve relevant context for LLM prompts. This is the most common vector search use case.

                    rag_example.py
                    Python
                

from openai import OpenAI
from veep import Client

openai = OpenAI()
vp = Client("your-api-key")

# Embed and store documents
documents = ["Vector databases store embeddings...", "RAG improves LLM accuracy..."]
for i, doc in enumerate(documents):
    embedding = openai.embeddings.create(
        model="text-embedding-3-small", input=doc
    ).data[0].embedding
    vp.add(vectors=[embedding], ids=[str(i)], metadata=[{"text": doc}])

# Query: find relevant context for a question
question = "How do vector databases work?"
q_embedding = openai.embeddings.create(
    model="text-embedding-3-small", input=question
).data[0].embedding

results = vp.search(vector=q_embedding, top_k=3)
context = "\n".join(r.metadata["text"] for r in results)

# Pass context to LLM
response = openai.chat.completions.create(
    model="gpt-4",
    messages=[{"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}]
)
                

Semantic Image Search

Computer Vision

Embed images with CLIP, store the vectors, and find visually similar images by searching with either an image or a text description.

                    image_search.py
                    Python
                

from transformers import CLIPModel, CLIPProcessor
from PIL import Image
from veep import Client

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
vp = Client("your-api-key")

# Index images
for path in image_paths:
    image = Image.open(path)
    inputs = processor(images=image, return_tensors="pt")
    embedding = model.get_image_features(**inputs)[0].detach().numpy()
    vp.add(vectors=[embedding], ids=[path], metadata=[{"path": path}])

# Search by text: "a photo of a sunset over the ocean"
text_inputs = processor(text=["a photo of a sunset over the ocean"], return_tensors="pt")
query_vec = model.get_text_features(**text_inputs)[0].detach().numpy()

results = vp.search(vector=query_vec, top_k=10)
for r in results:
    print(f"{r.metadata['path']}  score={r.score:.4f}")
                

Product Recommendations

E-commerce

Store product feature vectors and find similar products. Use metadata filtering to constrain results by category, price range, or availability.

                    recommendations.py
                    Python
                

from veep import Client

vp = Client("your-api-key")

# Index products with metadata
vp.add(
    vectors=product_embeddings,
    ids=product_ids,
    metadata=[
        {"name": "Running Shoe X", "category": "footwear", "price": 89.99},
        {"name": "Trail Runner Pro", "category": "footwear", "price": 129.99},
        # ...
    ]
)

# Find similar products to what a user is viewing
current_product_vector = get_product_embedding("product-123")
similar = vp.search(
    vector=current_product_vector,
    top_k=6,
    filter={"category": "footwear"}
)

for product in similar:
    print(f"{product.metadata['name']}  ${product.metadata['price']}")
                

Duplicate Detection

Data Quality

Find near-duplicate documents, support tickets, or records by searching for vectors with high similarity scores. Useful for deduplication pipelines.

                    dedup.py
                    Python
                

from veep import Client
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
vp = Client("your-api-key")

# Index all documents
embeddings = model.encode(documents)
vp.add(vectors=embeddings, ids=doc_ids)

# Find duplicates: search each doc against the collection
SIMILARITY_THRESHOLD = 0.95
duplicates = []

for i, emb in enumerate(embeddings):
    results = vp.search(vector=emb, top_k=5)
    for r in results:
        if r.id != doc_ids[i] and r.score > SIMILARITY_THRESHOLD:
            duplicates.append((doc_ids[i], r.id, r.score))

print(f"Found {len(duplicates)} duplicate pairs")
                

Code Examples

RAG: Retrieval-Augmented Generation

Semantic Image Search

Product Recommendations

Duplicate Detection

Ready to build?