Веб-парсинг данных с веб-сайта с использованием Selenium в python

ApplicationDetail.txt:

URL: https://www.abc.com
applicationNo : 123456

Класс приложения:

from dataclasses import dataclass


@dataclass
class Application(object):
""" Binding Json Data to this Class """
    Status: str
    ApplicationType: str
    StatusDate: str
    Location: str
    LocationDate: str
    ConfirmationNumber: int
    FirstNamedApplicant: str
    EntityStatus: str

Код:

from selenium import webdriver
import json
from selenium.webdriver.common.by import By
import logging
import traceback
from Application import Application  # importing Application Class


def PrintObj(applicationObject):
"""This Function Will Print Object Binded to Application Class"""
    try:
        print('     ***************** After Deserialize *****************')
        print('Status: %s' % applicationObject.Status)
        print('ApplicationType: %s' % applicationObject.ApplicationType)
        print('StatusDate: %s' % applicationObject.StatusDate)
        print('Location: %s' % applicationObject.Location)
        print('LocationDate: %s' % applicationObject.LocationDate)
        print('ConfirmationNumber:', applicationObject.ConfirmationNumber)
        print('FirstNamedApplicant: %s' % applicationObject.FirstNamedApplicant)
        print('EntityStatus: %s' % applicationObject.EntityStatus)
        print('---------------------------------------------------')
    except AttributeError as ex:
        print(ex)
        logging.error(ex, exc_info=True)
    except:
        print('ERROR Occurred in PrintObj Method')
        logging.error('ERROR Occurred in PrintObj Method', exc_info=True)


def ApplicationDetail(path, mode):
    """This Function Will Fetch ApplicationDetail from .txt File in given path"""
    try:
        logging.info('Opening ApplicationDetail File')
        with open(path, mode) as file:  # Opening ApplicationDetail File
            webSiteURL = file.readline().replace('URL : ', '').replace('n', '')
            application_no = file.readline().replace('applicationNo : ', '').replace('n', '')
            file.close()  # closed ApplicationDetail File
        logging.info('closed ApplicationDetail File')
        return webSiteURL, application_no
    except FileNotFoundError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except:
        print('Something ERROR Occurred in ApplicationDetail Method')
        logging.critical('Something ERROR Occurred in ApplicationDetail Method', exc_info=True)


def JsonStringSerialize(recordDictionary):
    """This Function Will Convert Formal Parameter(recordDictionary) to json_string and Write in .json File"""
    try:
        jsonString = json.dumps(recordDictionary, indent=4)  # serilazing recordDictionary
        logging.info('Serialzing Done')
        with open('ApplicationData.json', 'w') as f_Out:
            logging.info('Writing in .json File')
            f_Out.write(jsonString)  # writing in .json file 
            logging.info('Writing completed in .json File')
            f_Out.close()  # closing .json File
    except json.encoder.JSONEncoder:
        print('Cannot Serializable')
        logging.error('Cannot Serializable', exc_info=True)
    except:
        print('Serialization Failed')
        logging.error('Serialization Failed', exc_info=True)


def DeserializeJson():
"""This Function Will Fetch JSON from .json File, Deserialize and Bind to Application Class"""
    try:
        with open('ApplicationData.json', 'r') as f_Out:
            jsonString = json.load(f_Out)  # Deserializing Json Data
        logging.info('Deserialzing Done')
        return Application(**jsonString)  #  Binding to Application Class
    except json.decoder.JSONDecodeError:
        print('Cannot Deserializable')
        logging.error('Cannot Deserializable', exc_info=True)
    except:
        print('Deserialization Failed')
        logging.error('Deserialization Failed', exc_info=True)


def ScrapData(URL, applicationNo, xpath, elementID, xpath2):
    """This Function will scrap data from given URL"""
    try:
        webBrowser = webdriver.Ie(r'C:UsersXYZWebDriverIEDriverServer.exe')
        webBrowser.get(URL)
        webBrowser.implicitly_wait(15)
        webBrowser.find_element_by_xpath(xpath).send_keys(applicationNo)  # webBrowser.find_elements_by_class_name('saeRow').text
        webBrowser.find_element_by_id(elementID).click()
        logging.info('Scraping Started')
        applicationData = [td.text for td in webBrowser.find_elements_by_xpath(xpath2)]
        logging.info('Scraping Completed')
        return {'Status': applicationData[0], 'ApplicationType': applicationData[1],
                'StatusDate': applicationData[2], 'Location': applicationData[3],
                 'LocationDate': applicationData[4],
                'ConfirmationNumber': applicationData[5],
                'FirstNamedApplicant': applicationData[6],
                'EntityStatus': applicationData[7]}
    except:
        print('Something Error Occurred in ScrapData Method')
        logging.critical('Something Error Occurred in ScrapData Method', exc_info=True)
    finally:
        webBrowser.__exit__()


if __name__ == '__main__':
    try:
        logging.basicConfig(format="%(levelname)s - %(asctime)s - %(message)s", datefmt="%Y-%m-%d %I:%M:%S %p",
                        filename="Log_File.log", level=logging.DEBUG)
        logging.info('Task Started')
        webSiteURL, applicationNo = ApplicationDetail('ApplicationDetails.txt', 'r')
        JsonStringSerialize(ScrapData(webSiteURL, applicationNo, '//input[@id="id"]', "Submit", '//td'))
        if input('      U Want to Deserialize y || nn') == 'y':
            PrintObj(DeserializeJson())
    except TypeError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except NameError as ex:
        print(ex)
        logging.critical(ex, exc_info=True)
    except ModuleNotFoundError as ex:
        print(ex)
        #traceback.print_exc()
        logging.critical(ex, exc_info=True)
    except:
        print('Something Error Occurred in Main Method')
        logging.critical('Something Error Occurred in Main Method')
    finally:
        logging.info('Task Completed')

Вышеупомянутый проект будет отбрасывать данные с веб-сайта, используя селен в Python. Любое предложение / обзор вышеуказанного кода будет полезным?

0

Добавить комментарий

Ваш адрес email не будет опубликован. Обязательные поля помечены *