enh: proxies (#157)

* enh: proxies

* enh: proxies
This commit is contained in:
Cullen Watson
2024-05-25 14:04:09 -05:00
committed by GitHub
parent cd29f79796
commit 5cb7ffe5fd
12 changed files with 149 additions and 354 deletions

View File

@@ -34,12 +34,12 @@ from ...jobs import (
class GlassdoorScraper(Scraper):
def __init__(self, proxy: Optional[str] = None):
def __init__(self, proxies: list[str] | str | None = None):
"""
Initializes GlassdoorScraper with the Glassdoor job search url
"""
site = Site(Site.GLASSDOOR)
super().__init__(site, proxy=proxy)
super().__init__(site, proxies=proxies)
self.base_url = None
self.country = None
@@ -59,7 +59,7 @@ class GlassdoorScraper(Scraper):
self.scraper_input.results_wanted = min(900, scraper_input.results_wanted)
self.base_url = self.scraper_input.country.get_glassdoor_url()
self.session = create_session(self.proxy, is_tls=True, has_retry=True)
self.session = create_session(proxies=self.proxies, is_tls=True, has_retry=True)
token = self._get_csrf_token()
self.headers["gd-csrf-token"] = token if token else self.fallback_token
@@ -245,7 +245,6 @@ class GlassdoorScraper(Scraper):
if not location or is_remote:
return "11047", "STATE" # remote options
url = f"{self.base_url}/findPopularLocationAjax.htm?maxLocationsToReturn=10&term={location}"
session = create_session(self.proxy, has_retry=True)
res = self.session.get(url, headers=self.headers)
if res.status_code != 200:
if res.status_code == 429: