mirror of https://github.com/Bunsly/JobSpy
fix(glassdoor): add retry adapter (#77)
parent
33d442bf1e
commit
a5916edcdd
|
@ -26,7 +26,7 @@ class GlassdoorScraper(Scraper):
|
||||||
"""
|
"""
|
||||||
Initializes GlassdoorScraper with the Glassdoor job search url
|
Initializes GlassdoorScraper with the Glassdoor job search url
|
||||||
"""
|
"""
|
||||||
site = Site(Site.ZIP_RECRUITER)
|
site = Site(Site.GLASSDOOR)
|
||||||
super().__init__(site, proxy=proxy)
|
super().__init__(site, proxy=proxy)
|
||||||
|
|
||||||
self.url = None
|
self.url = None
|
||||||
|
@ -49,7 +49,7 @@ class GlassdoorScraper(Scraper):
|
||||||
payload = self.add_payload(
|
payload = self.add_payload(
|
||||||
scraper_input, location_id, location_type, page_num, cursor
|
scraper_input, location_id, location_type, page_num, cursor
|
||||||
)
|
)
|
||||||
session = create_session(self.proxy, is_tls=False)
|
session = create_session(self.proxy, is_tls=False, has_retry=True)
|
||||||
response = session.post(
|
response = session.post(
|
||||||
f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
|
f"{self.url}/graph", headers=self.headers(), timeout=10, data=payload
|
||||||
)
|
)
|
||||||
|
@ -171,7 +171,7 @@ class GlassdoorScraper(Scraper):
|
||||||
if not location or is_remote:
|
if not location or is_remote:
|
||||||
return "11047", "STATE" # remote options
|
return "11047", "STATE" # remote options
|
||||||
url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
|
url = f"{self.url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
|
||||||
session = create_session(self.proxy)
|
session = create_session(self.proxy, has_retry=True)
|
||||||
response = session.get(url)
|
response = session.get(url)
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise GlassdoorException(
|
raise GlassdoorException(
|
||||||
|
@ -194,7 +194,7 @@ class GlassdoorScraper(Scraper):
|
||||||
location_type: str,
|
location_type: str,
|
||||||
page_num: int,
|
page_num: int,
|
||||||
cursor: str | None = None,
|
cursor: str | None = None,
|
||||||
) -> dict[str, str | Any]:
|
) -> str:
|
||||||
payload = {
|
payload = {
|
||||||
"operationName": "JobSearchResultsQuery",
|
"operationName": "JobSearchResultsQuery",
|
||||||
"variables": {
|
"variables": {
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
import re
|
import re
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import requests
|
|
||||||
import tls_client
|
import tls_client
|
||||||
|
import requests
|
||||||
|
from requests.adapters import HTTPAdapter, Retry
|
||||||
|
|
||||||
from ..jobs import JobType
|
from ..jobs import JobType
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,11 +29,11 @@ def extract_emails_from_text(text: str) -> list[str] | None:
|
||||||
return email_regex.findall(text)
|
return email_regex.findall(text)
|
||||||
|
|
||||||
|
|
||||||
def create_session(proxy: dict | None = None, is_tls: bool = True):
|
def create_session(proxy: dict | None = None, is_tls: bool = True, has_retry: bool = False):
|
||||||
"""
|
"""
|
||||||
Creates a tls client session
|
Creates a requests session with optional tls, proxy, and retry settings.
|
||||||
|
|
||||||
:return: A session object with or without proxies.
|
:return: A session object
|
||||||
"""
|
"""
|
||||||
if is_tls:
|
if is_tls:
|
||||||
session = tls_client.Session(
|
session = tls_client.Session(
|
||||||
|
@ -44,6 +46,16 @@ def create_session(proxy: dict | None = None, is_tls: bool = True):
|
||||||
session.allow_redirects = True
|
session.allow_redirects = True
|
||||||
if proxy:
|
if proxy:
|
||||||
session.proxies.update(proxy)
|
session.proxies.update(proxy)
|
||||||
|
if has_retry:
|
||||||
|
retries = Retry(total=3,
|
||||||
|
connect=3,
|
||||||
|
status=3,
|
||||||
|
status_forcelist=[500, 502, 503, 504, 429],
|
||||||
|
backoff_factor=1)
|
||||||
|
adapter = HTTPAdapter(max_retries=retries)
|
||||||
|
|
||||||
|
session.mount('http://', adapter)
|
||||||
|
session.mount('https://', adapter)
|
||||||
|
|
||||||
return session
|
return session
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue