import csv
import json
import random
import time
from datetime import datetime

import pandas as pd
import requests
from scrapy import Selector
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

# Function to read usernames and passwords from CSV
def read_credentials_from_csv(file_path):
    credentials = []
    with open(file_path, "r") as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            credentials.append((row["username"], row["password"]))
    return credentials

# Read usernames and passwords from CSV
credentials_file = "credentials.csv"
users_credentials = read_credentials_from_csv(credentials_file)

# Loop through each set of credentials
for username, password in users_credentials:

    chromium_driver_path = "./chromedriver"
    # chrome_options = webdriver.ChromeOptions()
    # chrome_options.headless = True
    driver = webdriver.Chrome(executable_path=chromium_driver_path)
    driver.get("https://www.dataforthai.com/login")

    # Find the username and password input fields and enter the credentials
    username_input = driver.find_element(By.NAME, 'username')  # Replace "username" with the actual name attribute of the username input field
    password_input = driver.find_element('name', 'password')  # Replace "password" with the actual name attribute of the password input field

    username_input.send_keys(username)
    password_input.send_keys(password)

    # Submit the login form
    login_button = driver.find_element(By.XPATH, '//*[@id="btn-login"]')  # Replace the XPath with the correct XPath of the login button
    login_button.click()

    # After login, get the value of the "PHPSESSID" cookie
    phpsessid_cookie = driver.get_cookie("PHPSESSID")
    if phpsessid_cookie:
        phpsessid_value = phpsessid_cookie["value"]
        print(f"Value of 'PHPSESSID' cookie: {phpsessid_value}")
    else:
        print("PHPSESSID cookie not found.")

    time.sleep(1)

    #Scraping part
    try:
        df = pd.read_csv('./300_400.csv', dtype={'tax': str, 'tsic': str})

        # File path to store and load the value of 'i'
        index_file = './last_index_fix.txt'

        # Load the last index value if the file exists
        try:
            with open(index_file, 'r') as file:
                last_index = int(file.read().strip())
        except FileNotFoundError:
            last_index = None

        # Set the parameter value as the sum of 'i' and 25
        break_parameter = last_index + 15 if last_index is not None else None

        # PAYLOAD & COOKIES FOR LOGGING TO DATAFORTHAI
        payload = {}
        cookies = {"PHPSESSID": phpsessid_value}

        # Create a session object for HTTP requests
        # session = requests.Session()

        # PROCESS SCRAPING
        for i, v in enumerate(df.iterrows()):
            if last_index is not None and i <= last_index:
                continue

            if v[1]['name_en'] == '-':
                tax = v[1]['tax']
                # tsic_code = v[1]['tsic']
                # added '0' cause after 131072 the 0 in front of tax is gone.
                url = f'https://dataforthai.com/company/0{tax}/'

                # proxies={
                # "http": "http://osjgtpvk-rotate:iaf4kpi4fvgs@p.webshare.io:80/",
                # "https": "http://osjgtpvk-rotate:iaf4kpi4fvgs@p.webshare.io:80/"
                # }

                proxies={
                "http": "http://sbtmgbme-rotate:tujc456rgvgf@p.webshare.io:80/",
                "https": "http://sbtmgbme-rotate:tujc456rgvgf@p.webshare.io:80/"
                }
                
                # Use the session object for the request
                # req = session.get(url, cookies=cookies, proxies=proxies, params=json.dumps(payload))
                req = requests.get(url, cookies=cookies, proxies=proxies)

                selector = Selector(text=req.text)
                check_url = selector.xpath('//*[@id="main"]/div/div/h3/text()').get()
                if check_url == "ขออภัยค่ะ ไม่พบหน้าที่คุณต้องการ" :
                    continue
                created_at = datetime.now().isoformat()
                df.at[i, 'name_en'] = selector.xpath('//*/div[@id="main"]//h2/text()').get().strip()
                container_body = selector.xpath('//*/div[@id="main"]/div[3]/table[1]/tr/td')[0]
                df.at[i, 'business'] = container_body.xpath('//*/table[2]/tr[1]/td[2]/text()[1]').get().strip()
                df.at[i, 'status'] = container_body.xpath('//*/table[3]/tr/td[2]/text()').get().strip()
                df.at[i, 'date_register_thai'] = container_body.xpath('//*/table[4]/tr/td[2]/text()').get().strip()
                df.at[i, 'capital'] = container_body.xpath('//*/table[5]/tr/td[2]/text()').get().strip()
                address_element = container_body.xpath('//*/table[6]/tr/td[2]/a/text()')
                df.at[i, 'address'] = address_element.get().strip() if address_element else ""

                # # TEST 0
                # with open("./test.html", 'w') as file:
                #     file.write(req.text)
                    

                # name_en_value = selector.xpath('//*/div[@id="main"]//h2/text()').get()
                # df.at[i, 'name_en'] = name_en_value

                # container_body_elements = selector.xpath('//*/div[@id="main"]/div[3]/table[1]')
                # if container_body_elements:

                #     container_body = container_body_elements[0]
                #     df.at[i, 'business'] = container_body.xpath('//*/table[2]/tr[1]/td[2]/text()[1]').get()
                #     df.at[i, 'status'] = container_body.xpath('//*/table[3]/tr/td[2]/text()').get()
                #     df.at[i, 'date_register_thai'] = container_body.xpath('//*/table[4]/tr/td[2]/text()').get()
                #     df.at[i, 'capital'] = container_body.xpath('//*/table[5]/tr/td[2]/text()').get()
                #     address_element = container_body.xpath('//*/table[6]/tr/td[2]/a/text()')
                #     df.at[i, 'address'] = address_element.get() if address_element else ""
                # else:
                #     print("No container body elements found.")

                # # TEST
                # df.at[i, 'name_en'] = selector.xpath('//*[@id="maindata"]/h2').get().strip()
                
                # container_body = selector.xpath('//*/div[@id="main"]/div[3]/table[1]/tr/td')[0]

                # business_element = container_body.xpath('//*/table[2]/tr[1]/td[2]/text()[1]')
                # df.at[i, 'business'] = business_element.get().strip() if business_element else "Business is NONE"

                # status_element = container_body.xpath('//*/table[3]/tr/td[2]/text()')
                # df.at[i, 'status'] = status_element.get().strip() if status_element else "Status is NONE"

                # date_register_element = container_body.xpath('//*/table[4]/tr/td[2]/text()')
                # df.at[i, 'date_register_thai'] = date_register_element.get().strip() if address_element else "Date Register is none"

                # capital_element = container_body.xpath('//*/table[5]/tr/td[2]/text()')
                # df.at[i, 'capital'] = capital_element.get().strip() if address_element else "Capital element is NONE"

                # address_element = container_body.xpath('//*/table[6]/tr/td[2]/a/text()')
                # df.at[i, 'address'] = address_element.get().strip() if address_element else ""

                # Use list comprehension for committee lists
                list_committee = [committee.strip() for committee in container_body.xpath('//*/tr/td/table[8]/tr/td/text()').getall()]
                list_committee_other = [committee.strip() for committee in container_body.xpath('//*/tr/td/table[9]/tr/td/text()').getall()]
                list_committee_other2 = [committee.strip() for committee in container_body.xpath('//*/tr/td/table[10]/tr/td/text()').getall()]
                list_committee_other3 = [committee.strip() for committee in container_body.xpath('//*/tr/td/table[11]/tr/td/text()').getall()]
                df.at[i, 'list_committee'] = list_committee + list_committee_other + list_committee_other2 + list_committee_other3

                df.to_csv('./300_400.csv', encoding='utf-8', index=False)
                print(f'Process for row {i} is done.')

                # Save the current index value to the file
                with open(index_file, 'w') as file:
                    file.write(str(i))

                sleep_time = random.uniform(1, 3)
                time.sleep(sleep_time)

    # except AttributeError:
    #     # print("Getting IndexError in the Loop. Changing account.")
    #     time.sleep(1)
    #     continue

    except Exception as e:
        # Handle the error and print an error message
        print(f"An error occurred: {e}")
        print("Getting IndexError in the Loop. Changing account.")
        time.sleep(1)
        continue

print("All ID in the credentials had been use. Exiting.")
driver.quit()