From 1648ace1e6f2d6ebe9938a509527fe2755833dbc Mon Sep 17 00:00:00 2001 From: tiennm99 Date: Tue, 11 Mar 2025 19:21:44 +0700 Subject: [PATCH] feat: ask ATNVC data --- data/.gitignore | 2 ++ main.py | 32 +++++++++++++++++--------------- 2 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 data/.gitignore diff --git a/data/.gitignore b/data/.gitignore new file mode 100644 index 0000000..c96a04f --- /dev/null +++ b/data/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore \ No newline at end of file diff --git a/main.py b/main.py index 56f3b43..82d5c74 100644 --- a/main.py +++ b/main.py @@ -4,29 +4,32 @@ from haystack import Pipeline, Document from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.components.builders.prompt_builder import PromptBuilder from haystack.document_stores.in_memory import InMemoryDocumentStore +import os +import glob -# Write documents to InMemoryDocumentStore document_store = InMemoryDocumentStore() -document_store.write_documents([ - Document(content="My name is Jean and I live in Paris."), - Document(content="My name is Mark and I live in Berlin."), - Document(content="My name is Giorgio and I live in Rome.") -]) -# Build a RAG pipeline +documents = [] +for txt_file in glob.glob("data/*.txt"): + with open(txt_file, 'r', encoding='utf-8') as f: + content = f.read() + documents.append(Document(content=content)) + +document_store.write_documents(documents) + prompt_template = """ -Given these documents, answer the question. -Documents: +Dựa trên những tài liệu sau, trả lời câu hỏi. +Tài liệu: {% for doc in documents %} {{ doc.content }} {% endfor %} -Question: {{question}} -Answer: +Câu hỏi: {{question}} +Trả lời: """ retriever = InMemoryBM25Retriever(document_store=document_store) prompt_builder = PromptBuilder(template=prompt_template) -llm = OllamaGenerator(url = "http://localhost:11434", model="llama3.2:1b") +llm = OllamaGenerator(url = "http://localhost:11434", model="qwen2.5") rag_pipeline = Pipeline() rag_pipeline.add_component("retriever", retriever) @@ -35,8 +38,7 @@ rag_pipeline.add_component("llm", llm) rag_pipeline.connect("retriever", "prompt_builder.documents") rag_pipeline.connect("prompt_builder", "llm") -# Ask a question -question = "Who lives in Paris?" +question = "Khan là ai?" results = rag_pipeline.run( { "retriever": {"query": question}, @@ -44,4 +46,4 @@ results = rag_pipeline.run( } ) -print(results["llm"]["replies"]) \ No newline at end of file +print(results["llm"]["replies"])