From 2b7fea40a59ad7b289df1fa19e02a0d65709ed0a Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Mon, 30 Oct 2023 20:29:34 -0500 Subject: [PATCH] [fix] glassdoor duplicates --- pyproject.toml | 2 +- src/jobspy/scrapers/glassdoor/__init__.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ebd4c1d..e5c4a14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.24" +version = "1.1.25" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" diff --git a/src/jobspy/scrapers/glassdoor/__init__.py b/src/jobspy/scrapers/glassdoor/__init__.py index 9d939cf..394df41 100644 --- a/src/jobspy/scrapers/glassdoor/__init__.py +++ b/src/jobspy/scrapers/glassdoor/__init__.py @@ -78,6 +78,9 @@ class GlassdoorScraper(Scraper): job_url = res_json["data"]["jobListings"]["jobListingSeoLinks"][ "linkItems" ][i]["url"] + if job_url in self.seen_urls: + continue + self.seen_urls.add(job_url) job = job["jobview"] title = job["job"]["jobTitleText"] company_name = job["header"]["employerNameFromSearch"]