mirror of https://github.com/Bunsly/JobSpy
parent
cc9e7866b7
commit
81f70ff8a5
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.26"
|
||||
version = "1.1.27"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor & ZipRecruiter"
|
||||
authors = ["Zachary Hampton <zachary@bunsly.com>", "Cullen Watson <cullen@bunsly.com>"]
|
||||
homepage = "https://github.com/Bunsly/JobSpy"
|
||||
|
|
|
@ -16,9 +16,9 @@ from threading import Lock
|
|||
from urllib.parse import urlparse, urlunparse
|
||||
|
||||
from .. import Scraper, ScraperInput, Site
|
||||
from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type
|
||||
from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type, currency_parser
|
||||
from ..exceptions import LinkedInException
|
||||
from ...jobs import JobPost, Location, JobResponse, JobType, Country
|
||||
from ...jobs import JobPost, Location, JobResponse, JobType, Country, Compensation
|
||||
|
||||
|
||||
class LinkedInScraper(Scraper):
|
||||
|
@ -135,6 +135,22 @@ class LinkedInScraper(Scraper):
|
|||
return JobResponse(jobs=job_list)
|
||||
|
||||
def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:
|
||||
salary_tag = job_card.find('span', class_='job-search-card__salary-info')
|
||||
|
||||
compensation = None
|
||||
if salary_tag:
|
||||
salary_text = salary_tag.get_text(separator=' ').strip()
|
||||
salary_values = [currency_parser(value) for value in salary_text.split('-')]
|
||||
salary_min = salary_values[0]
|
||||
salary_max = salary_values[1]
|
||||
currency = salary_text[0] if salary_text[0] != '$' else 'USD'
|
||||
|
||||
compensation = Compensation(
|
||||
min_amount=int(salary_min),
|
||||
max_amount=int(salary_max),
|
||||
currency=currency,
|
||||
)
|
||||
|
||||
title_tag = job_card.find("span", class_="sr-only")
|
||||
title = title_tag.get_text(strip=True) if title_tag else "N/A"
|
||||
|
||||
|
@ -177,6 +193,7 @@ class LinkedInScraper(Scraper):
|
|||
date_posted=date_posted,
|
||||
job_url=job_url,
|
||||
job_type=job_type,
|
||||
compensation=compensation,
|
||||
benefits=benefits,
|
||||
emails=extract_emails_from_text(description) if description else None,
|
||||
num_urgent_words=count_urgent_words(description) if description else None,
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
import numpy as np
|
||||
|
||||
import requests
|
||||
import tls_client
|
||||
|
@ -62,3 +63,19 @@ def get_enum_from_job_type(job_type_str: str) -> JobType | None:
|
|||
if job_type_str in job_type.value:
|
||||
res = job_type
|
||||
return res
|
||||
|
||||
def currency_parser(cur_str):
|
||||
# Remove any non-numerical characters
|
||||
# except for ',' '.' or '-' (e.g. EUR)
|
||||
cur_str = re.sub("[^-0-9.,]", '', cur_str)
|
||||
# Remove any 000s separators (either , or .)
|
||||
cur_str = re.sub("[.,]", '', cur_str[:-3]) + cur_str[-3:]
|
||||
|
||||
if '.' in list(cur_str[-3:]):
|
||||
num = float(cur_str)
|
||||
elif ',' in list(cur_str[-3:]):
|
||||
num = float(cur_str.replace(',', '.'))
|
||||
else:
|
||||
num = float(cur_str)
|
||||
|
||||
return np.round(num, 2)
|
||||
|
|
Loading…
Reference in New Issue