mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 03:54:31 -08:00
[fix] glassdoor duplicates
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.24"
|
version = "1.1.25"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
|
|||||||
@@ -78,6 +78,9 @@ class GlassdoorScraper(Scraper):
|
|||||||
job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][
|
job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][
|
||||||
"linkItems"
|
"linkItems"
|
||||||
][i]["url"]
|
][i]["url"]
|
||||||
|
if job_url in self.seen_urls:
|
||||||
|
continue
|
||||||
|
self.seen_urls.add(job_url)
|
||||||
job = job["jobview"]
|
job = job["jobview"]
|
||||||
title = job["job"]["jobTitleText"]
|
title = job["job"]["jobTitleText"]
|
||||||
company_name = job["header"]["employerNameFromSearch"]
|
company_name = job["header"]["employerNameFromSearch"]
|
||||||
|
|||||||
Reference in New Issue
Block a user