Agentic chunking is an intelligent method of splitting documents into smaller chunks by using an LLM to determine natural breakpoints in the text. Rather than splitting text at fixed character counts, it analyzes the content to find semantically meaningful boundaries like paragraph breaks and topic transitions.
Usage
from bitca.agent import Agent
from bitca.document.chunking.agentic import AgenticChunking
from bitca.knowledge.pdf import PDFUrlKnowledgeBase
from bitca.vectordb.pgvector import PgVector
db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
knowledge_base = PDFUrlKnowledgeBase(
urls=["https://bitca-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"],
vector_db=PgVector(table_name="recipes_agentic_chunking", db_url=db_url),
chunking_strategy=AgenticChunking(),
)
knowledge_base.load(recreate=False) # Comment out after first run
agent = Agent(
knowledge_base=knowledge_base,
search_knowledge=True,
)
agent.print_response("How to make Thai curry?", markdown=True)