import asyncio from llama_index.core import Document from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.node_parser import SentenceSplitter from llama_index.core.ingestion import IngestionPipeline from llama_index.core import SimpleDirectoryReader reader = SimpleDirectoryReader(input_dir=r"C:\Users\so7\AppData\Local\Programs\Python\Python313\RAG") documents = reader.load_data() # create the pipeline with transformations pipeline = IngestionPipeline( transformations=[ SentenceSplitter(chunk_overlap=0), HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"), ] ) # Define an async function to handle the pipeline async def main(): # Create the pipeline with transformations pipeline = IngestionPipeline( transformations=[ SentenceSplitter(chunk_overlap=0), HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"), ] ) # Use await inside the async function nodes = await pipeline.arun(documents=[Document.example()]) # Optional: Do something with the nodes (e.g., print them) print(nodes) # Run the async function using asyncio if __name__ == "__main__": asyncio.run(main()) import chromadb from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.core.ingestion import IngestionPipeline from llama_index.core.node_parser import SentenceSplitter from llama_index.embeddings.huggingface import HuggingFaceEmbedding db = chromadb.PersistentClient(path="./pl_db") chroma_collection = db.get_or_create_collection("ppgpl") vector_store = ChromaVectorStore(chroma_collection=chroma_collection) pipeline = IngestionPipeline( transformations=[ SentenceSplitter(chunk_size=25, chunk_overlap=0), HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5"), ], vector_store=vector_store, ) from llama_index.core import VectorStoreIndex from llama_index.embeddings.huggingface import HuggingFaceEmbedding embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") index = VectorStoreIndex.from_vector_store(vector_store, embed_model=embed_model) from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct") query_engine = index.as_query_engine( llm=llm, response_mode="tree_summarize", ) query_engine.query("Солнце на третей ступени") # The meaning of life is 42