Skip to content

Avinash-2112/AI_Enterprise_Systems

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

4 Commits
 
 
 
 

Repository files navigation

import streamlit as st from PyPDF2 import PdfReader

from streamlit_extras.add_vertical_space import add_vertical_space

from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceInstructEmbeddings from langchain.vectorstores import FAISS from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.chains import RetrievalQA from langchain.llms import HuggingFaceHub from dotenv import load_dotenv from HtmlTemplate import css, bot_template, user_template from InstructorEmbedding import INSTRUCTOR

def get_pdf_text(docs): text = "" for pdf in docs: Pdf_reader = PdfReader(pdf) for page in Pdf_reader.pages: text += page.extract_text() return text

def get_text_chunks(text): text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) chunks = text_splitter.split_text(text=text) return chunks

def get_vectorstore(text_chunks): # embeddings = SpacyEmbeddings(model_name="en_core_web_sm") embeddings = HuggingFaceInstructEmbeddings(model_name = "hkunlp/instructor-xl", model_kwargs ={"device": "cpu"}) # Wrap text chunks in objects with 'page_content' attribute vectorstore = FAISS.from_texts(text_chunks, embedding=embeddings) return vectorstore

def get_conversation_chain(vectorstore): llms = HuggingFaceHub(repo_id = "google/flan-t5-xxl", model_kwargs ={"temperature": 0.3, "max_length":1000}, huggingfacehub_api_token="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") # llms = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xxl") memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) conversational_chain = RetrievalQA.from_chain_type( chain_type="stuff", llm=llms, retriever= vectorstore.as_retriever(search_kwars={"k": 3}), memory = memory )

return conversational_chain

def handle_userimput(user_question): response = st.session_state.conversation({'query': user_question}) st.session_state.chat_history = response['chat_history']

for i, mesaage in enumerate (st.session_state.chat_history):
    if i % 2 == 0:
        st. write(user_template.replace("{{MSG}}", mesaage.content), unsafe_allow_html=True)
    else:
        st. write(bot_template.replace("{{MSG}}", mesaage.content), unsafe_allow_html=True)

def main(): load_dotenv() st.set_page_config(page_title="Chat with Docs", page_icon=":books:") st.write(css, unsafe_allow_html=True)

if "conversation" not in st.session_state:
    st.session_state.conversation = None

if "chat_history" not in st.session_state:
    st.session_state.chat_history = None

st.header("Chat with Docs :books:")
user_question = st.text_input("Ask question about your document")
if user_question:
    handle_userimput(user_question)


with st.sidebar:
    st.subheader("Your documents")
    docs =st.file_uploader("Upload your PDs here and click on 'process'", accept_multiple_files=True)
    if st.button("Process"):
        with st.spinner("Processing"):
            
            #get pdf text
            raw_text = get_pdf_text(docs)
            # st.write(raw_text)

            #get the text chunks
            text_chunks = get_text_chunks(raw_text)

            #create vector store
            vectorstore = get_vectorstore(text_chunks)

            #create conversation chain
            st.session_state.conversation = get_conversation_chain(vectorstore)

if name == 'main': main()

css = '''

<style> .chat-message { padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex; /* Enable flexbox layout */ } /* Add margins to align the boxes themselves */ .chat-message.user { background-color: #2b313e; margin-right: auto; /* Pushes the box to the left */ } .chat-message.bot { background-color: #475063; margin-left: auto; /* Pushes the box to the right */ } .chat-message .avatar { width: 20%; } .chat-message .avatar img { max-width: 78px; max-height: 78px; border-radius: 50%; object-fit: cover; } .chat-message .message { width: 80%; padding: 0 1.5rem; color: #fff; } </style>

'''

bot_template = '''

{{MSG}}
'''

user_template = '''

{{MSG}}
'''

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages