Я написал следующий код, чтобы добиться следующего:
- Регулярно очищайте все живые матчи на сайте ставок oddsportal.com
- Поместите данные о шансах во фрейм данных
- Оцените фрейм данных для двух нечетных поставщиков (Asianodds, Pinnacle) и сравните фактические данные с заранее заданными шаблонами.
- Отправьте телеграмму, если образец был идентифицирован
- Сохраните очищенные ссылки в файле JSON, чтобы они больше не очищались.
В моем коде все еще есть следующие проблемы, с которыми, я надеюсь, поможет этот обзор:
- Производительность: в настоящее время на очистку и анализ требуется 1-2 млн за игру. Как этого добиться быстрее / эффективнее?
- Иногда, когда одновременно выполняется много совпадений, сценарий не может очистить все совпадения до того, как задание cron запустит следующий скрипт, и они конфликтуют. Как я могу проверить селен, если экземпляр уже запущен, и дождаться его завершения?
» ‘
#!/Library/Frameworks/Python.framework/Versions/3.8/bin/python3
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from multiprocessing import Process
#from DbManager import DatabaseManager
import json
import re
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import datetime
import time
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
import requests
import cProfile
o_u_types= [1.50,1.75,2.00,2.25,2.50,2.75,3.00,3.25,3.50]
raw_time=str(datetime.datetime.now())
current_date= raw_time[0:10]
bookmakers=['Asianodds','Pinnacle']
countries=['England','Japan','France','Germany', 'India', 'Chile', 'Italy','Turkey', 'Czech Republic', 'Spain', 'Colombia','Poland','Belgium','Romania','Paraguay',
'Portugal', 'Netherlands','Cyprus','Mexico','Brazil','Uruguay','Serbia','Slovenia','Slovakia','Sweden', 'Norway','USA','Estonia']
limited_league_countries=['England','Germany','Italy','Spain']
leagues=['National League','Championship','League One','2. Bundesliga','3. Liga','Regionalliga West','Regionalliga Sudwest','Serie A','Serie B','LaLiga','LaLiga2']
base_url="https://api.telegram.org/bot"
bot_token='xxxx'
chat_id='-xxxx'
global TYPE_ODDS
TYPE_ODDS = 'OPENING' # you can change to 'OPENING' if you want to collect opening odds, any other value will make the program collect CLOSING odds
link='https://www.oddsportal.com/inplay-odds/live-now/soccer/'
class Oddsportal:
def ReadScrapedLinks(self):
#with open("Modules/Config/scraped.json") as file:
with open("Config/scraped.json") as file:
data = json.load(file)
return data["scraped"]
def SaveScrapedMatch(self, link):
# with open("Modules/Config/scraped.json") as oldfile:
with open("Config/scraped.json") as oldfile:
data = json.load(oldfile)
data["scraped"].append(link)
# with open("Modules/Config/scraped.json", "w+") as newfile:
with open("Config/scraped.json", "w+") as newfile:
json.dump(data, newfile, indent=4)
def filter_list(self,links):
scraped_links=self.ReadScrapedLinks()
self.scraped_links=[]
for link in links:
if link in scraped_links:
continue
self.filtered_links.append(link)
def FindByCSSAndAttribute(self,mobject, css, attribute):
try:
return mobject.find_element_by_css_selector(css).get_attribute(attribute)
except:
return False
def WaitForObjects(self,type, string):
return WebDriverWait(self.driver, 5).until(EC.presence_of_all_elements_located((type, string)))
def fi(self,a):
try:
self.driver.find_element_by_xpath(a).text
except:
return False
def ffi(self,a):
if self.fi(a) != False :
return self.driver.find_element_by_xpath(a).text
def fffi(self,a):
if TYPE_ODDS == 'OPENING':
try:
return get_opening_odd(a)
except:
return self.ffi(a)
else:
return(self.ffi(a))
def fi2(self,a):
try:
self.driver.find_element_by_xpath(a).click()
except:
return False
def ffi2(self,a):
if self.fi2(a) != False :
fi2(a)
return(True)
else:
return(None)
def __init__(self):
mobile_emulation = {
"deviceMetrics": {"width": 360, "height": 640, "pixelRatio": 3.0},
"userAgent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"}
chrome_options = Options()
chrome_options.add_experimental_option(
"mobileEmulation", mobile_emulation)
# chrome_options.add_experimental_option("debuggerAddress", "127.0.0.1:9222")
#Initialize chrome driver
self.driver = webdriver.Chrome(ChromeDriverManager().install())
executor_url = self.driver.command_executor._url
session_id = self.driver.session_id
self.driver.get(executor_url)
print (session_id)
print (executor_url)
res = requests.get(executor_url)
print(res)
def matchcollector(self, link):
self.driver.get(link)
live_matches=WebDriverWait(self.driver, 10).until(
EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.minutes-anim')))
print(f'There are currently {len(live_matches)} matches live.')
#Collect all matches
all_matches = self.WaitForObjects(By.CLASS_NAME, "name.table-participant")
self.all_links=[]
for match_link in all_matches:
#Get match link
link = self.FindByCSSAndAttribute(match_link, 'a', 'href')
try:
in_play_addendum=link.split("https://codereview.stackexchange.com/")[-2]
#Remove in play addendum
modified_link=link.replace(in_play_addendum,"")
self.all_links.append(modified_link)
except:
continue
scraped_links=self.ReadScrapedLinks()
self.filtered_links = []
#Remove already scraped links
for link in self.all_links:
if link in scraped_links:
continue
self.filtered_links.append(link)
print(f'Of all matches, {len(self.filtered_links)} have not yet been checked.')
def openmatch(self,match):
try:
self.driver.get(match)
time.sleep(1)
self.driver.maximize_window()
except:
return False
def getodds(self):
master_df= pd.DataFrame()
for link in self.filtered_links:
self.openmatch(link)
country= self.ffi('//*[@id="breadcrumb"]/a[3]')
if country in countries:
league= self.ffi('//*[@id="breadcrumb"]/a[4]')
if country in limited_league_countries:
if league not in leagues:
continue
else: pass
else: pass
match = self.ffi('//*[@id="col-content"]/h1')
game_message_string=f'Checking {match}'
game_method= '/sendMessage?chat_id={}&text="{}"'.format(chat_id,game_message_string)
game_telegram_url= base_url + bot_token + game_method
requests.get(game_telegram_url)
final_score = self.ffi('//*[@id="event-status"]')
date = self.ffi('//*[@id="col-content"]/p[1]') # Date and time
game_df= pd.DataFrame()
for i in o_u_types:
url_appendix="#over-under;2;{};0".format(i)
o_u_type= i
o_u_match_url= str(link)+str(url_appendix)
print(o_u_match_url)
self.loadoddpage(o_u_match_url)
for x in range(1,28):
L=[]
for j in range(1,15): # only first 10 bookmakers displayed
Book = self.ffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[1]/div/a[2]'.format(x,j)) # first bookmaker name
if Book in bookmakers:
#Fix difference for Pin in live
if Book=='Asianodds':
Over = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[3]/div'.format(x,j)) # first home odd
Under = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[4]/div'.format(x,j)) # first away odd
elif Book=='Pinnacle':
Over = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[3]/div'.format(x,j))
Under = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[4]/div'.format(x,j)) # first away odd
if Over==None:
Over = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[3]/a'.format(x,j)) # first home odd
else:
continue
if Under==None:
Under = self.fffi('//*[@id="odds-data-table"]/div[{}]/table/tbody/tr[{}]/td[4]/a'.format(x,j)) # first away odd
else:
continue
print(match, country, league, Book,Over,Under, date, final_score, link, '/ 500 ')
L = L + [(match, country, league, Book ,Over,Under, date, final_score, link)]
data_df = pd.DataFrame(L)
try:
data_df.columns = ['TeamsRaw', 'Country', 'League', 'Bookmaker', 'Over', 'Under', 'DateRaw' ,'ScoreRaw','Link']
except:
print('Function crashed, probable reason : no games scraped (empty season)')
##################### FINALLY WE CLEAN THE DATA AND SAVE IT ##########################
'''Now we simply need to split team names, transform date, split score'''
#Filter out Bookmakers
# (a) Split team names
data_df["Home_id"] = [re.split(' - ',y)[0] for y in data_df["TeamsRaw"]]
data_df["Away_id"] = [re.split(' - ',y)[1] for y in data_df["TeamsRaw"]]
# (b) Transform date
data_df["Date"] = [re.split(', ',y)[1] for y in data_df["DateRaw"]]
data_df["Over_{}".format(i)]=Over
data_df["Under_{}".format(i)]=Under
master_df=pd.concat([master_df,data_df])
game_df=pd.concat([game_df,data_df])
else:
print('Match not in a relevant country. Blacklisting it.')
self.SaveScrapedMatch(link)
continue
try:
#Setup Logic Operators
game_df.drop_duplicates(keep='first',inplace=True)
game_df = game_df.groupby(['TeamsRaw','Bookmaker'], as_index=False).first()
if len(game_df.index)==2:
for i in o_u_types:
try:
asian_over = game_df.at[0, f"Over_{i}"]
asian_under = game_df.at[0, f"Under_{i}"]
pin_over = game_df.at[1, f"Over_{i}"]
pin_under = game_df.at[1, f"Under_{i}"]
if asian_over > pin_over:
game_df[f"overdominant_{i}"] = "AsianDominant"
elif asian_over < pin_over:
game_df[f"overdominant_{i}"] = "PinDominant"
else:
game_df[f"overdominant_{i}"] = "Parity"
if asian_under > pin_under:
game_df[f"underdominant_{i}"] = "AsianDominant"
elif asian_under < pin_under:
game_df[f"underdominant_{i}"] = "PinDominant"
else:
game_df[f"underdominant_{i}"] = "Parity"
except:
game_df[f"overdominant_{i}"] ='n/a'
game_df[f"underdominant_{i}"] = "n/a"
continue
check_row= game_df.drop([1])
check_row_match= check_row.TeamsRaw.values
check_row_country= check_row.Country.values
check_row_league= check_row.League.values
print(check_row)
king_m5= check_row[(check_row['underdominant_2.25']=='AsianDominant') & (check_row['underdominant_2.5']=='AsianDominant')
& ((check_row['underdominant_1.5']=='Parity') | (check_row['underdominant_1.5']=='n/a'))
& ((check_row['underdominant_1.75']=='Parity') | (check_row['underdominant_1.75']=='n/a'))
& ((check_row['underdominant_2.0']=='Parity') | (check_row['underdominant_2.0']=='n/a'))
& ((check_row['underdominant_2.75']=='Parity') | (check_row['underdominant_2.75']=='n/a'))
& ((check_row['underdominant_3.0']=='Parity') | (check_row['underdominant_3.0']=='n/a'))
& ((check_row['underdominant_3.25']=='Parity') | (check_row['underdominant_3.25']=='n/a'))
& ((check_row['underdominant_3.5']=='Parity') | (check_row['underdominant_3.5']=='n/a'))
& ((check_row['overdominant_1.5']=='Parity') | (check_row['overdominant_1.5']=='n/a'))
& ((check_row['overdominant_1.75']=='Parity') | (check_row['overdominant_1.75']=='n/a'))
& ((check_row['overdominant_2.0']=='Parity') | (check_row['overdominant_2.0']=='n/a'))
& ((check_row['overdominant_2.25']=='Parity') | (check_row['overdominant_2.25']=='n/a'))
& ((check_row['overdominant_2.5']=='Parity') | (check_row['overdominant_2.5']=='n/a'))
& ((check_row['overdominant_2.75']=='Parity') | (check_row['overdominant_2.75']=='n/a'))
& ((check_row['overdominant_3.0']=='Parity') | (check_row['overdominant_3.0']=='n/a'))
& ((check_row['overdominant_3.25']=='Parity') | (check_row['overdominant_3.25']=='n/a'))
& ((check_row['overdominant_3.5']=='Parity') | (check_row['overdominant_3.5']=='n/a'))
]
if not king_m5.empty:
print(f'King M5 pattern found in {check_row_match}')
message_string=f' M5-U1,5 ⚠️👑 👑 in {check_row_country}, {check_row_league}, {check_row_match}'
method= '/sendMessage?chat_id={}&text="{}"'.format(chat_id,message_string)
telegram_url= base_url + bot_token + method
print(telegram_url)
requests.get(telegram_url)
else:
print (f'Match {check_row_match} does not contain a king M5 pattern.')
else:
print('Match does not contain both bookmakers. Blacklisting it.')
self.SaveScrapedMatch(link)
continue
self.SaveScrapedMatch(link)
except:
self.SaveScrapedMatch(link)
continue
def main():
op=Oddsportal()
op.matchcollector(link)
op.checklink()
op.getodds()
if __name__== "__main__":
#p1 = Process(target=main)
#p1.start()
cProfile.run('main()', filename="report.txt", sort=-1)
» ‘