From 6439f7143322e2fc7f34336c08ab3959252222b9 Mon Sep 17 00:00:00 2001 From: Cullen Date: Tue, 28 May 2024 15:39:24 -0500 Subject: [PATCH] chore: version --- README.md | 2 +- pyproject.toml | 2 +- src/jobspy/scrapers/linkedin/__init__.py | 11 ++++------- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 6347c71..6543aea 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ jobs = scrape_jobs( country_indeed='USA', # only needed for indeed / glassdoor # linkedin_fetch_description=True # get full description and direct job url for linkedin (slower) - # proxies=["Efb5EA8OIk0BQb:wifi;us;@proxy.soax.com:9000", "localhost"], + # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"], ) print(f"Found {len(jobs)} jobs") diff --git a/pyproject.toml b/pyproject.toml index f94ae2e..ed98ca6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.54" +version = "1.1.55" description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/Bunsly/JobSpy" diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index f3bda94..03ec37f 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -13,7 +13,6 @@ import regex as re from typing import Optional from datetime import datetime -from threading import Lock from bs4.element import Tag from bs4 import BeautifulSoup from urllib.parse import urlparse, urlunparse, unquote @@ -71,8 +70,7 @@ class LinkedInScraper(Scraper): self.scraper_input = scraper_input job_list: list[JobPost] = [] seen_urls = set() - url_lock = Lock() - page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0 + page = scraper_input.offset // 10 * 10 if scraper_input.offset else 0 request_count = 0 seconds_old = ( scraper_input.hours_old * 3600 if scraper_input.hours_old else None @@ -142,10 +140,9 @@ class LinkedInScraper(Scraper): job_id = href.split("-")[-1] job_url = f"{self.base_url}/jobs/view/{job_id}" - with url_lock: - if job_url in seen_urls: - continue - seen_urls.add(job_url) + if job_url in seen_urls: + continue + seen_urls.add(job_url) try: fetch_desc = scraper_input.linkedin_fetch_description job_post = self._process_job(job_card, job_url, fetch_desc)