From 0654232db455eacc62004b79c0a45137277080e9 Mon Sep 17 00:00:00 2001 From: VitaminB16 Date: Sat, 9 Mar 2024 19:06:25 +0000 Subject: [PATCH] format: jobspy/scrapers/utils --- src/jobspy/scrapers/utils.py | 43 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/src/jobspy/scrapers/utils.py b/src/jobspy/scrapers/utils.py index 0843594..844cf8b 100644 --- a/src/jobspy/scrapers/utils.py +++ b/src/jobspy/scrapers/utils.py @@ -1,11 +1,10 @@ from __future__ import annotations -import logging import re - -import numpy as np +import logging import requests import tls_client +import numpy as np from markdownify import markdownify as md from requests.adapters import HTTPAdapter, Retry @@ -16,7 +15,8 @@ logger.propagate = False if not logger.handlers: logger.setLevel(logging.INFO) console_handler = logging.StreamHandler() - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + formatter = logging.Formatter(format) console_handler.setFormatter(formatter) logger.addHandler(console_handler) @@ -35,7 +35,12 @@ def extract_emails_from_text(text: str) -> list[str] | None: return email_regex.findall(text) -def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False, delay: int = 1) -> requests.Session: +def create_session( + proxy: dict | None = None, + is_tls: bool = True, + has_retry: bool = False, + delay: int = 1, +) -> requests.Session: """ Creates a requests session with optional tls, proxy, and retry settings. :return: A session object @@ -49,15 +54,17 @@ def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bo if proxy: session.proxies.update(proxy) if has_retry: - retries = Retry(total=3, - connect=3, - status=3, - status_forcelist=[500, 502, 503, 504, 429], - backoff_factor=delay) + retries = Retry( + total=3, + connect=3, + status=3, + status_forcelist=[500, 502, 503, 504, 429], + backoff_factor=delay, + ) adapter = HTTPAdapter(max_retries=retries) - session.mount('http://', adapter) - session.mount('https://', adapter) + session.mount("http://", adapter) + session.mount("https://", adapter) return session @@ -75,17 +82,15 @@ def get_enum_from_job_type(job_type_str: str) -> JobType | None: def currency_parser(cur_str): # Remove any non-numerical characters # except for ',' '.' or '-' (e.g. EUR) - cur_str = re.sub("[^-0-9.,]", '', cur_str) + cur_str = re.sub("[^-0-9.,]", "", cur_str) # Remove any 000s separators (either , or .) - cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:] + cur_str = re.sub("[.,]", "", cur_str[:-3]) + cur_str[-3:] - if '.' in list(cur_str[-3:]): + if "." in list(cur_str[-3:]): num = float(cur_str) - elif ',' in list(cur_str[-3:]): - num = float(cur_str.replace(',', '.')) + elif "," in list(cur_str[-3:]): + num = float(cur_str.replace(",", ".")) else: num = float(cur_str) return np.round(num, 2) - -