From d9566baa66a0b2e13b8032a4a74e35421e9d26ea Mon Sep 17 00:00:00 2001 From: Gereon Elvers Date: Tue, 9 Jul 2024 14:37:58 +0200 Subject: [PATCH] Try fix file upload --- app.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/app.py b/app.py index 7dd22b2..90c13cd 100644 --- a/app.py +++ b/app.py @@ -9,6 +9,7 @@ from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.chat_message_histories import ChatMessageHistory from langchain_openai import ChatOpenAI +import tempfile index_name = "langchain-demo" text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) @@ -16,23 +17,18 @@ welcome_message = """PDF Chat Demo""" - def process_file(file: AskFileResponse): - import tempfile if file.type == "text/plain": Loader = TextLoader elif file.type == "application/pdf": Loader = PyPDFLoader - with tempfile.NamedTemporaryFile(delete=False) as tempfile: - tempfile.write(file.content) - loader = Loader(tempfile.name) - documents = loader.load() - docs = text_splitter.split_documents(documents) - for i, doc in enumerate(docs): - doc.metadata["source"] = f"source_{i}" - return docs - + loader = Loader(file.path) + documents = loader.load() + docs = text_splitter.split_documents(documents) + for i, doc in enumerate(docs): + doc.metadata["source"] = f"source_{i}" + return docs def get_docsearch(file: AskFileResponse): docs = process_file(file) @@ -40,7 +36,6 @@ def get_docsearch(file: AskFileResponse): docsearch = Chroma.from_documents(docs, embeddings) return docsearch - @cl.on_chat_start async def start(): files = None @@ -57,7 +52,6 @@ async def start(): msg = cl.Message(content=f"Processing `{file.name}`...") await msg.send() - # No async implementation in the Pinecone client, fallback to sync docsearch = await cl.make_async(get_docsearch)(file) message_history = ChatMessageHistory() @@ -83,7 +77,6 @@ async def start(): cl.user_session.set("chain", chain) - @cl.on_message async def main(message: cl.Message): chain = cl.user_session.get("chain") # type: ConversationalRetrievalChain @@ -108,4 +101,4 @@ async def main(message: cl.Message): else: answer += "\nNo sources found" - await cl.Message(content=answer, elements=text_elements).send() \ No newline at end of file + await cl.Message(content=answer, elements=text_elements).send()