from langchain_community.document_loaders import PyPDFLoader
#from langchain_community.llms import OpenAI
from langchain_openai import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Load and split PDF
loader = PyPDFLoader("contract.pdf")
pages = loader.load_and_split()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(pages)

# Prompt Template
with open("prompts/extract_prompt.txt", "r") as f:
    prompt_template = f.read()

prompt = PromptTemplate(input_variables=["document"], template=prompt_template)
llm = OpenAI(temperature=0)
chain = LLMChain(llm=llm, prompt=prompt)

# Process top 3 chunks
for i, chunk in enumerate(docs[:3]):
    print(f"\n--- Chunk {i+1} ---")
    result = chain.run(document=chunk.page_content)
    print(result)

