issue#270: glassdoor 403 response by rotating user-agent and updating headers (#274)

This commit is contained in:
Lê Trọng Tài
2025-07-28 21:55:05 +07:00
committed by GitHub
parent 94d413bad1
commit 9aae02453d
9 changed files with 14 additions and 10 deletions

View File

@@ -45,6 +45,7 @@ def scrape_jobs(
hours_old: int = None,
enforce_annual_salary: bool = False,
verbose: int = 0,
user_agent: str = None,
**kwargs,
) -> pd.DataFrame:
"""
@@ -98,7 +99,7 @@ def scrape_jobs(
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
scraper_class = SCRAPER_MAPPING[site]
scraper = scraper_class(proxies=proxies, ca_cert=ca_cert)
scraper = scraper_class(proxies=proxies, ca_cert=ca_cert, user_agent=user_agent)
scraped_data: JobResponse = scraper.scrape(scraper_input)
cap_name = site.value.capitalize()
site_name = "ZipRecruiter" if cap_name == "Zip_recruiter" else cap_name