Взять информацию с веб-страницы

После того, как я сделал некоторые улучшения из моего предыдущего обзора кода. Я взял знания, чтобы обновиться и стать лучшим программистом, но теперь я снова прошу пересмотра кода, где, я думаю, это могло бы быть лучше.

Целью этого кода является мониторинг, который проверяет наличие специального сайта каждые 30–120 секунд. Если были изменения, то он проходит через некоторые операторы if, как вы можете видеть, и затем он будет печатать в моем разногласии, если были внесены изменения.

Вот что я создал:

monitoring.py

#!/usr/bin/python3
# -*- coding: utf-8 -*-

import concurrent.futures
import random
import time
from datetime import datetime, timedelta
from typing import Any, Dict, List

import pendulum
from loguru import logger

from scrape_values import Product

store: str = "shelta"
link: str = "https://shelta.se/sneakers/nike-air-zoom-type-whiteblack-cj2033-103"

# -------------------------------------------------------------------------
# Utils
# -------------------------------------------------------------------------
_size_filter: Dict[str, datetime] = {}


def monitor_stock():
    """
    Function that checks if there has happen a restock or countdown change on the website
    """
    payload = Product.from_page(url=link).payload

    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        while True:

            # Request for new product information
            new_payload = Product.from_page(url=link).payload

            # Release sleep
            release_date_sleeper(new_payload)

            # Check countdown timer comparision
            if countdown_timer_comparision(payload, new_payload):
                # Send notification to discord
                executor.submit(send_notification, new_payload, "Timer change!")
                # Replace list
                payload["displayCountDownTimer"] = new_payload["displayCountDownTimer"]

            # Check sizes comparision
            if sizes_comparision(payload, new_payload):
                # Send notification to discord
                executor.submit(send_notification, new_payload, "Restock!")
                # Replace list
                payload["sizes"] = new_payload["sizes"]

            else:
                # No changes happen
                logger.info("No changes made")

                payload["sizes"] = new_payload["sizes"]
                time.sleep(random.randint(30, 120))


def release_date_sleeper(payload) -> None:
    """
    Check if there is a release date on the website. We should sleep if there is to save resources
    :param payload:
    """
    if payload.get('releaseDate'):
        delta_seconds = (payload["releaseDate"].subtract(seconds=10)) - pendulum.now()
        if not delta_seconds.seconds:
            logger.info(f'Release date enabled | Will sleep to -> {(payload["releaseDate"].subtract(seconds=10)).to_datetime_string()}')
            time.sleep(delta_seconds.seconds)


def countdown_timer_comparision(payload, new_payload) -> bool:
    """
    Compare the first requests with the latest request and see if the countdown timer has been changed on the website
    :param payload: First request made
    :param new_payload: Latest request made
    :return: bool
    """
    if new_payload.get("displayCountDownTimer") and payload["displayCountDownTimer"] != new_payload[
        "displayCountDownTimer"]:
        logger.info(f'Detected new timer change -> Name: {new_payload["name"]} | Display Time: {new_payload["displayCountDownTimer"]}')
        return True


def sizes_comparision(payload, new_payload) -> bool:
    """
    Compare the first requests with the latest request and see if the sizes has been changed on the website
    :param payload: First request made
    :param new_payload: Latest request made
    :return: bool
    """
    if payload["sizes"] != new_payload["sizes"]:
        if spam_filter(new_payload["delay"], new_payload["sizes"]):
            logger.info(f'Detected restock -> Name: {new_payload["name"]} | Sizes: {new_payload["sizes"]}')
            return True


def send_notification(payload, status) -> Any:
    """
    Send to discord
    :param payload: Payload of the product
    :param status: Type of status that being sent to discord
    """
    payload["status"] = status
    payload["keyword"] = True
    # FIXME: call discord_notification(payload)


def spam_filter(delay: int, requests: List[str]) -> List[str]:
    """
    Filter requests to only those that haven't been made previously within our defined cooldown period.

    :param delay: Delta seconds
    :param requests:
    :return:
    """
    # Get filtered set of requests.
    filtered = [
        r for r in list(set(requests))
        if (
              r not in _size_filter
                or datetime.now() - _size_filter[r] >= timedelta(seconds=delay)
        )
    ]
    # Refresh timestamps for requests we're actually making.
    for r in filtered:
        _size_filter[r] = datetime.now()

    return filtered


if __name__ == "__main__":
    monitor_stock()

scrape_values.py

import json
import re
from dataclasses import dataclass
from typing import List, Optional

import requests
from bs4 import BeautifulSoup


@dataclass
class Product:
    name: Optional[str] = None
    price: Optional[str] = None
    image: Optional[str] = None
    sizes: List[str] = None

    @staticmethod
    def get_sizes(doc: BeautifulSoup) -> List[str]:
        pat = re.compile(
            r'^<script>var JetshopData="
            r"({.*})'
            r';</script>$',
        )
        for script in doc.find_all('script'):
            match = pat.match(str(script))
            if match is not None:
                break
        else:
            return []

        data = json.loads(match[1])
        return [
            variation
            for get_value in data['ProductInfo']['Attributes']['Variations']
            if get_value.get('IsBuyable')
            for variation in get_value['Variation']
        ]

    @classmethod
    def from_page(cls, url: str) -> Optional['Product']:
        with requests.get(url) as response:
            if not response.ok:
                return None
            doc = BeautifulSoup(response.text, 'html.parser')

        name = doc.select_one('h1.product-page-header')
        price = doc.select_one('span.price')
        image = doc.select_one('meta[property="og:image"]')

        return cls(
            name=name and name.text.strip(),
            price=price and price.text.strip(),
            image=image and image['content'],
            sizes=cls.get_sizes(doc),
        )

    @property
    def payload(self) -> dict:
        return {
            "name": self.name or "Not found",
            "price": self.price or "Not found",
            "image": self.image or "Not found",
            "sizes": self.sizes,
        }

Меня беспокоит, что я мог сделать это неправильно, если разбил его на несколько функций, которые, возможно, не нужны? Я не уверен и надеюсь, что получу классные отзывы! С нетерпением жду

0

Добавить комментарий

Ваш адрес email не будет опубликован. Обязательные поля помечены *