[fix] glassdoor duplicates

pull/67/head v1.1.25
Cullen Watson 2023-10-30 20:29:34 -05:00
parent d37f86e1b9
commit 2b7fea40a5
2 changed files with 4 additions and 1 deletions

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.24" version = "1.1.25"
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"] authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
homepage = "https://github.com/Bunsly/JobSpy" homepage = "https://github.com/Bunsly/JobSpy"

View File

@ -78,6 +78,9 @@ class GlassdoorScraper(Scraper):
job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][ job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][
"linkItems" "linkItems"
][i]["url"] ][i]["url"]
if job_url in self.seen_urls:
continue
self.seen_urls.add(job_url)
job = job["jobview"] job = job["jobview"]
title = job["job"]["jobTitleText"] title = job["job"]["jobTitleText"]
company_name = job["header"]["employerNameFromSearch"] company_name = job["header"]["employerNameFromSearch"]