mirror of https://github.com/Bunsly/JobSpy
fix(glassdoor): add retry adapter (#77)
parent
33d442bf1e
commit
a5916edcdd
|
@ -26,7 +26,7 @@ class GlassdoorScraper(Scraper):
|
|||
"""
|
||||
Initializes GlassdoorScraper with the Glassdoor job search url
|
||||
"""
|
||||
site = Site(Site.ZIP_RECRUITER)
|
||||
site = Site(Site.GLASSDOOR)
|
||||
super().__init__(site, proxy=proxy)
|
||||
|
||||
self.url = None
|
||||
|
@ -49,7 +49,7 @@ class GlassdoorScraper(Scraper):
|
|||
payload = self.add_payload(
|
||||
scraper_input, location_id, location_type, page_num, cursor
|
||||
)
|
||||
session = create_session(self.proxy, is_tls=False)
|
||||
session = create_session(self.proxy, is_tls=False, has_retry=True)
|
||||
response = session.post(
|
||||
f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
|
||||
)
|
||||
|
@ -171,7 +171,7 @@ class GlassdoorScraper(Scraper):
|
|||
if not location or is_remote:
|
||||
return "11047", "STATE" # remote options
|
||||
url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
|
||||
session = create_session(self.proxy)
|
||||
session = create_session(self.proxy, has_retry=True)
|
||||
response = session.get(url)
|
||||
if response.status_code != 200:
|
||||
raise GlassdoorException(
|
||||
|
@ -194,7 +194,7 @@ class GlassdoorScraper(Scraper):
|
|||
location_type: str,
|
||||
page_num: int,
|
||||
cursor: str | None = None,
|
||||
) -> dict[str, str | Any]:
|
||||
) -> str:
|
||||
payload = {
|
||||
"operationName": "JobSearchResultsQuery",
|
||||
"variables": {
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
import re
|
||||
import numpy as np
|
||||
|
||||
import requests
|
||||
import tls_client
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter, Retry
|
||||
|
||||
from ..jobs import JobType
|
||||
|
||||
|
||||
|
@ -27,11 +29,11 @@ def extract_emails_from_text(text: str) -> list[str] | None:
|
|||
return email_regex.findall(text)
|
||||
|
||||
|
||||
def create_session(proxy: dict | None = None, is_tls: bool = True):
|
||||
def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False):
|
||||
"""
|
||||
Creates a tls client session
|
||||
Creates a requests session with optional tls, proxy, and retry settings.
|
||||
|
||||
:return: A session object with or without proxies.
|
||||
:return: A session object
|
||||
"""
|
||||
if is_tls:
|
||||
session = tls_client.Session(
|
||||
|
@ -44,6 +46,16 @@ def create_session(proxy: dict | None = None, is_tls: bool = True):
|
|||
session.allow_redirects = True
|
||||
if proxy:
|
||||
session.proxies.update(proxy)
|
||||
if has_retry:
|
||||
retries = Retry(total=3,
|
||||
connect=3,
|
||||
status=3,
|
||||
status_forcelist=[500, 502, 503, 504, 429],
|
||||
backoff_factor=1)
|
||||
adapter = HTTPAdapter(max_retries=retries)
|
||||
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
|
||||
return session
|
||||
|
||||
|
|
Loading…
Reference in New Issue