issue#270: glassdoor 403 response by rotating user-agent and updating headers (#274)

This commit is contained in:
Lê Trọng Tài
2025-07-28 21:55:05 +07:00
committed by GitHub
parent 94d413bad1
commit 9aae02453d
9 changed files with 14 additions and 10 deletions

View File

@@ -34,13 +34,13 @@ log = create_logger("Glassdoor")
class Glassdoor(Scraper):
def __init__(
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
self, proxies: list[str] | str | None = None, ca_cert: str | None = None, user_agent: str | None = None
):
"""
Initializes GlassdoorScraper with the Glassdoor job search url
"""
site = Site(Site.GLASSDOOR)
super().__init__(site, proxies=proxies, ca_cert=ca_cert)
super().__init__(site, proxies=proxies, ca_cert=ca_cert, user_agent=user_agent)
self.base_url = None
self.country = None
@@ -65,6 +65,8 @@ class Glassdoor(Scraper):
)
token = self._get_csrf_token()
headers["gd-csrf-token"] = token if token else fallback_token
if self.user_agent:
headers["user-agent"] = self.user_agent
self.session.headers.update(headers)
location_id, location_type = self._get_location(