LLM Access Needed
LangStruct works with any LLM provider — use free tier services, paid APIs, or even run models locally. Choose what works best for you:
LLM Access Needed
LangStruct works with any LLM provider — use free tier services, paid APIs, or even run models locally. Choose what works best for you:
Google Gemini
export GOOGLE_API_KEY="your-key"
OpenAI
export OPENAI_API_KEY="sk-your-key"
Anthropic
export ANTHROPIC_API_KEY="sk-ant-key"
Local (Ollama)
pip install "langstruct[examples]"
from langstruct import LangStruct
# Define schema by exampleextractor = LangStruct(example={ "company": "Apple Inc.", "revenue": 125.3, "quarter": "Q3 2024"})
# Extract from texttext = "Apple reported $125.3B revenue in Q3 2024..."result = extractor.extract(text)
print(result.entities)# {'company': 'Apple Inc.', 'revenue': 125.3, 'quarter': 'Q3 2024'}
print(result.sources) # Character-level source tracking# {'company': [CharSpan(0, 5, 'Apple')], ...}
# Boost accuracy with refinementrefined_result = extractor.extract(text, refine=True)print(f"Confidence: {refined_result.confidence:.1%}") # Higher confidence
from langstruct import LangStruct
# Same instance for both extraction and parsingls = LangStruct(example={ "company": "Apple Inc.", "revenue": 125.3, "quarter": "Q3 2024"})
# Parse natural language queryquery = "Q3 2024 tech companies over $100B discussing AI"result = ls.query(query)
print(result.semantic_terms)# ['tech companies', 'AI', 'artificial intelligence']
print(result.structured_filters)# {'quarter': 'Q3 2024', 'revenue': {'$gte': 100.0}}
from langstruct import LangStructfrom chromadb import Client
# 1. Single instance for both operationsls = LangStruct(example={ "company": "Apple", "revenue": 100.0, "quarter": "Q3"})vector_db = Client().create_collection("docs")
# 2. Index documents with metadatadef index_document(text): metadata = ls.extract(text).entities vector_db.add(texts=[text], metadatas=[metadata])
# 3. Query with natural languagedef search(query): parsed = ls.query(query) return vector_db.query( query_texts=parsed.semantic_terms, where=parsed.structured_filters, n_results=5 )
# Usageindex_document("Apple reported $125B in Q3 2024...")results = search("Q3 tech companies over $100B")# Returns only Apple, not other Q3 mentions
# Better type inference from multiple examplesexamples = [ {"name": "Alice", "age": 25, "skills": ["Python"]}, {"name": "Bob", "age": 35, "skills": ["JavaScript", "React"]}]
extractor = LangStruct(examples=examples)# Infers: name=str, age=int, skills=List[str]
from pydantic import BaseModel, Fieldfrom typing import List, Optional
class CompanySchema(BaseModel): name: str revenue: float = Field(gt=0, description="Revenue in billions") employees: Optional[int] = None products: List[str] = []
extractor = LangStruct(schema=CompanySchema)
# Default: Auto-detects available modelsextractor = LangStruct(example=schema)
# Specific modelextractor = LangStruct( example=schema, model="gpt-5-mini" # Example latest OpenAI model)
# Local with Ollamaextractor = LangStruct( example=schema, model="ollama/llama3.2")
# Process multiple documents efficientlydocuments = [doc1, doc2, doc3, ...]results = extractor.extract(documents, max_workers=8, show_progress=True)
for result in results: print(f"Confidence: {result.confidence:.1%}") print(f"Entities: {result.entities}")
# Batch processing with refinement for higher accuracyresults = extractor.extract( documents, refine=True, max_workers=5, rate_limit=60, # calls per minute retry_failed=True # raise on failures (False to skip with warnings))# Note: 2-5x higher cost but significantly better accuracy
result = extractor.extract(text)
# See exactly where each value came fromfor field, spans in result.sources.items(): for span in spans: print(f"{field}: '{text[span.start:span.end]}' at {span.start}-{span.end}")
# Save an extractor (preserves all state)extractor.save("./my_extractor")
# Load anywhere (API keys must be available)loaded_extractor = LangStruct.load("./my_extractor")
# Works exactly like the originalresult = loaded_extractor.extract("New text")
# Save individual resultresult.save_json("output.json")
# Export batch resultsextractor.export_batch(results, "output.csv") # csv/json/excel/parquet
# JSONL round‑tripextractor.save_annotated_documents(results, "extractions.jsonl")loaded = extractor.load_annotated_documents("extractions.jsonl")extractor.visualize(loaded, "results.html")
max_workers
; respect quotas with rate_limit
Save & Load
Refinement
RAG Integration
Query Parsing
Examples