mirror of https://github.com/Bunsly/JobSpy
chore: version
parent
7f6271b2e0
commit
6439f71433
|
@ -41,7 +41,7 @@ jobs = scrape_jobs(
|
||||||
country_indeed='USA', # only needed for indeed / glassdoor
|
country_indeed='USA', # only needed for indeed / glassdoor
|
||||||
|
|
||||||
# linkedin_fetch_description=True # get full description and direct job url for linkedin (slower)
|
# linkedin_fetch_description=True # get full description and direct job url for linkedin (slower)
|
||||||
# proxies=["Efb5EA8OIk0BQb:wifi;us;@proxy.soax.com:9000", "localhost"],
|
# proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
|
||||||
|
|
||||||
)
|
)
|
||||||
print(f"Found {len(jobs)} jobs")
|
print(f"Found {len(jobs)} jobs")
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.54"
|
version = "1.1.55"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||||
homepage = "https://github.com/Bunsly/JobSpy"
|
homepage = "https://github.com/Bunsly/JobSpy"
|
||||||
|
|
|
@ -13,7 +13,6 @@ import regex as re
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from threading import Lock
|
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urlparse, urlunparse, unquote
|
from urllib.parse import urlparse, urlunparse, unquote
|
||||||
|
@ -71,8 +70,7 @@ class LinkedInScraper(Scraper):
|
||||||
self.scraper_input = scraper_input
|
self.scraper_input = scraper_input
|
||||||
job_list: list[JobPost] = []
|
job_list: list[JobPost] = []
|
||||||
seen_urls = set()
|
seen_urls = set()
|
||||||
url_lock = Lock()
|
page = scraper_input.offset // 10 * 10 if scraper_input.offset else 0
|
||||||
page = scraper_input.offset // 25 * 25 if scraper_input.offset else 0
|
|
||||||
request_count = 0
|
request_count = 0
|
||||||
seconds_old = (
|
seconds_old = (
|
||||||
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
|
scraper_input.hours_old * 3600 if scraper_input.hours_old else None
|
||||||
|
@ -142,10 +140,9 @@ class LinkedInScraper(Scraper):
|
||||||
job_id = href.split("-")[-1]
|
job_id = href.split("-")[-1]
|
||||||
job_url = f"{self.base_url}/jobs/view/{job_id}"
|
job_url = f"{self.base_url}/jobs/view/{job_id}"
|
||||||
|
|
||||||
with url_lock:
|
if job_url in seen_urls:
|
||||||
if job_url in seen_urls:
|
continue
|
||||||
continue
|
seen_urls.add(job_url)
|
||||||
seen_urls.add(job_url)
|
|
||||||
try:
|
try:
|
||||||
fetch_desc = scraper_input.linkedin_fetch_description
|
fetch_desc = scraper_input.linkedin_fetch_description
|
||||||
job_post = self._process_job(job_card, job_url, fetch_desc)
|
job_post = self._process_job(job_card, job_url, fetch_desc)
|
||||||
|
|
Loading…
Reference in New Issue