ApplicationDetail.txt:
URL: https://www.abc.com
applicationNo : 123456
Класс приложения:
from dataclasses import dataclass
@dataclass
class Application(object):
""" Binding Json Data to this Class """
Status: str
ApplicationType: str
StatusDate: str
Location: str
LocationDate: str
ConfirmationNumber: int
FirstNamedApplicant: str
EntityStatus: str
Код:
from selenium import webdriver
import json
from selenium.webdriver.common.by import By
import logging
import traceback
from Application import Application # importing Application Class
def PrintObj(applicationObject):
"""This Function Will Print Object Binded to Application Class"""
try:
print(' ***************** After Deserialize *****************')
print('Status: %s' % applicationObject.Status)
print('ApplicationType: %s' % applicationObject.ApplicationType)
print('StatusDate: %s' % applicationObject.StatusDate)
print('Location: %s' % applicationObject.Location)
print('LocationDate: %s' % applicationObject.LocationDate)
print('ConfirmationNumber:', applicationObject.ConfirmationNumber)
print('FirstNamedApplicant: %s' % applicationObject.FirstNamedApplicant)
print('EntityStatus: %s' % applicationObject.EntityStatus)
print('---------------------------------------------------')
except AttributeError as ex:
print(ex)
logging.error(ex, exc_info=True)
except:
print('ERROR Occurred in PrintObj Method')
logging.error('ERROR Occurred in PrintObj Method', exc_info=True)
def ApplicationDetail(path, mode):
"""This Function Will Fetch ApplicationDetail from .txt File in given path"""
try:
logging.info('Opening ApplicationDetail File')
with open(path, mode) as file: # Opening ApplicationDetail File
webSiteURL = file.readline().replace('URL : ', '').replace('n', '')
application_no = file.readline().replace('applicationNo : ', '').replace('n', '')
file.close() # closed ApplicationDetail File
logging.info('closed ApplicationDetail File')
return webSiteURL, application_no
except FileNotFoundError as ex:
print(ex)
logging.critical(ex, exc_info=True)
except:
print('Something ERROR Occurred in ApplicationDetail Method')
logging.critical('Something ERROR Occurred in ApplicationDetail Method', exc_info=True)
def JsonStringSerialize(recordDictionary):
"""This Function Will Convert Formal Parameter(recordDictionary) to json_string and Write in .json File"""
try:
jsonString = json.dumps(recordDictionary, indent=4) # serilazing recordDictionary
logging.info('Serialzing Done')
with open('ApplicationData.json', 'w') as f_Out:
logging.info('Writing in .json File')
f_Out.write(jsonString) # writing in .json file
logging.info('Writing completed in .json File')
f_Out.close() # closing .json File
except json.encoder.JSONEncoder:
print('Cannot Serializable')
logging.error('Cannot Serializable', exc_info=True)
except:
print('Serialization Failed')
logging.error('Serialization Failed', exc_info=True)
def DeserializeJson():
"""This Function Will Fetch JSON from .json File, Deserialize and Bind to Application Class"""
try:
with open('ApplicationData.json', 'r') as f_Out:
jsonString = json.load(f_Out) # Deserializing Json Data
logging.info('Deserialzing Done')
return Application(**jsonString) # Binding to Application Class
except json.decoder.JSONDecodeError:
print('Cannot Deserializable')
logging.error('Cannot Deserializable', exc_info=True)
except:
print('Deserialization Failed')
logging.error('Deserialization Failed', exc_info=True)
def ScrapData(URL, applicationNo, xpath, elementID, xpath2):
"""This Function will scrap data from given URL"""
try:
webBrowser = webdriver.Ie(r'C:UsersXYZWebDriverIEDriverServer.exe')
webBrowser.get(URL)
webBrowser.implicitly_wait(15)
webBrowser.find_element_by_xpath(xpath).send_keys(applicationNo) # webBrowser.find_elements_by_class_name('saeRow').text
webBrowser.find_element_by_id(elementID).click()
logging.info('Scraping Started')
applicationData = [td.text for td in webBrowser.find_elements_by_xpath(xpath2)]
logging.info('Scraping Completed')
return {'Status': applicationData[0], 'ApplicationType': applicationData[1],
'StatusDate': applicationData[2], 'Location': applicationData[3],
'LocationDate': applicationData[4],
'ConfirmationNumber': applicationData[5],
'FirstNamedApplicant': applicationData[6],
'EntityStatus': applicationData[7]}
except:
print('Something Error Occurred in ScrapData Method')
logging.critical('Something Error Occurred in ScrapData Method', exc_info=True)
finally:
webBrowser.__exit__()
if __name__ == '__main__':
try:
logging.basicConfig(format="%(levelname)s - %(asctime)s - %(message)s", datefmt="%Y-%m-%d %I:%M:%S %p",
filename="Log_File.log", level=logging.DEBUG)
logging.info('Task Started')
webSiteURL, applicationNo = ApplicationDetail('ApplicationDetails.txt', 'r')
JsonStringSerialize(ScrapData(webSiteURL, applicationNo, '//input[@id="id"]', "Submit", '//td'))
if input(' U Want to Deserialize y || nn') == 'y':
PrintObj(DeserializeJson())
except TypeError as ex:
print(ex)
logging.critical(ex, exc_info=True)
except NameError as ex:
print(ex)
logging.critical(ex, exc_info=True)
except ModuleNotFoundError as ex:
print(ex)
#traceback.print_exc()
logging.critical(ex, exc_info=True)
except:
print('Something Error Occurred in Main Method')
logging.critical('Something Error Occurred in Main Method')
finally:
logging.info('Task Completed')
Вышеупомянутый проект будет отбрасывать данные с веб-сайта, используя селен в Python. Любое предложение / обзор вышеуказанного кода будет полезным?