Compare commits

..

6 Commits

Author SHA1 Message Date
prudvisorra-aifa
051981689f Update util.py (#256) 2025-03-17 11:51:19 -05:00
Cullen Watson
903b7e6f1b fix(linkedin):is remote 2025-03-06 13:38:28 -06:00
Cullen Watson
6782b9884e fix:workflow 2025-03-01 14:49:31 -06:00
Cullen Watson
94c74d60f2 enh:workflow manual run 2025-03-01 14:47:24 -06:00
Cullen Watson
5463e5a664 chore:version 2025-03-01 14:38:25 -06:00
arkhy
ed139e7e6b added missing EU countries and languages (#250)
Co-authored-by: Kate Arkhangelskaya <ekar559e@tu-dresden.de>
2025-03-01 14:30:08 -06:00
7 changed files with 38 additions and 11 deletions

View File

@@ -1,5 +1,9 @@
name: Publish JobSpy to PyPi name: Publish JobSpy to PyPi
on: push on:
push:
branches:
- main
workflow_dispatch:
jobs: jobs:
build-n-publish: build-n-publish:
@@ -27,7 +31,7 @@ jobs:
build build
- name: Publish distribution 📦 to PyPI - name: Publish distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags') if: startsWith(github.ref, 'refs/tags') || github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1 uses: pypa/gh-action-pypi-publish@release/v1
with: with:
password: ${{ secrets.PYPI_API_TOKEN }} password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -220,6 +220,7 @@ JobPost
│ ├── country │ ├── country
│ ├── city │ ├── city
│ ├── state │ ├── state
├── is_remote
├── description ├── description
├── job_type: fulltime, parttime, internship, contract ├── job_type: fulltime, parttime, internship, contract
├── job_function ├── job_function
@@ -229,8 +230,7 @@ JobPost
│ ├── currency │ ├── currency
│ └── salary_source: direct_data, description (parsed from posting) │ └── salary_source: direct_data, description (parsed from posting)
├── date_posted ├── date_posted
── emails ── emails
└── is_remote
Linkedin specific Linkedin specific
└── job_level └── job_level

View File

@@ -20,7 +20,7 @@ def get_job_type(attributes: list) -> list[JobType]:
def get_compensation(compensation: dict) -> Compensation | None: def get_compensation(compensation: dict) -> Compensation | None:
""" """
Parses the job to get compensation Parses the job to get compensation
:param sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrompensation: :param compensation:
:return: compensation object :return: compensation object
""" """
if not compensation["baseSalary"] and not compensation["estimated"]: if not compensation["baseSalary"] and not compensation["estimated"]:

View File

@@ -14,10 +14,11 @@ from bs4.element import Tag
from jobspy.exception import LinkedInException from jobspy.exception import LinkedInException
from jobspy.linkedin.constant import headers from jobspy.linkedin.constant import headers
from jobspy.linkedin.util import ( from jobspy.linkedin.util import (
is_job_remote,
job_type_code, job_type_code,
parse_job_type, parse_job_type,
parse_job_level, parse_job_level,
parse_company_industry, parse_company_industry
) )
from jobspy.model import ( from jobspy.model import (
JobPost, JobPost,
@@ -173,7 +174,7 @@ class LinkedIn(Scraper):
) -> Optional[JobPost]: ) -> Optional[JobPost]:
salary_tag = job_card.find("span", class_="job-search-card__salary-info") salary_tag = job_card.find("span", class_="job-search-card__salary-info")
compensation = None compensation = description = None
if salary_tag: if salary_tag:
salary_text = salary_tag.get_text(separator=" ").strip() salary_text = salary_tag.get_text(separator=" ").strip()
salary_values = [currency_parser(value) for value in salary_text.split("-")] salary_values = [currency_parser(value) for value in salary_text.split("-")]
@@ -217,6 +218,8 @@ class LinkedIn(Scraper):
job_details = {} job_details = {}
if full_descr: if full_descr:
job_details = self._get_job_details(job_id) job_details = self._get_job_details(job_id)
description = job_details.get("description")
is_remote = is_job_remote(title, description, location)
return JobPost( return JobPost(
id=f"li-{job_id}", id=f"li-{job_id}",
@@ -224,6 +227,7 @@ class LinkedIn(Scraper):
company_name=company, company_name=company,
company_url=company_url, company_url=company_url,
location=location, location=location,
is_remote=is_remote,
date_posted=date_posted, date_posted=date_posted,
job_url=f"{self.base_url}/jobs/view/{job_id}", job_url=f"{self.base_url}/jobs/view/{job_id}",
compensation=compensation, compensation=compensation,
@@ -232,7 +236,7 @@ class LinkedIn(Scraper):
company_industry=job_details.get("company_industry"), company_industry=job_details.get("company_industry"),
description=job_details.get("description"), description=job_details.get("description"),
job_url_direct=job_details.get("job_url_direct"), job_url_direct=job_details.get("job_url_direct"),
emails=extract_emails_from_text(job_details.get("description")), emails=extract_emails_from_text(description),
company_logo=job_details.get("company_logo"), company_logo=job_details.get("company_logo"),
job_function=job_details.get("job_function"), job_function=job_details.get("job_function"),
) )

View File

@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jobspy.model import JobType from jobspy.model import JobType, Location
from jobspy.util import get_enum_from_job_type from jobspy.util import get_enum_from_job_type
@@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
industry = industry_span.get_text(strip=True) industry = industry_span.get_text(strip=True)
return industry return industry
def is_job_remote(title: dict, description: str, location: Location) -> bool:
"""
Searches the title, location, and description to check if job is remote
"""
remote_keywords = ["remote", "work from home", "wfh"]
location = location.display_location()
full_string = f'{title} {description} {location}'.lower()
is_remote = any(keyword in full_string for keyword in remote_keywords)
return is_remote

View File

@@ -69,16 +69,20 @@ class Country(Enum):
AUSTRIA = ("austria", "at", "at") AUSTRIA = ("austria", "at", "at")
BAHRAIN = ("bahrain", "bh") BAHRAIN = ("bahrain", "bh")
BELGIUM = ("belgium", "be", "fr:be") BELGIUM = ("belgium", "be", "fr:be")
BULGARIA = ("bulgaria", "bg")
BRAZIL = ("brazil", "br", "com.br") BRAZIL = ("brazil", "br", "com.br")
CANADA = ("canada", "ca", "ca") CANADA = ("canada", "ca", "ca")
CHILE = ("chile", "cl") CHILE = ("chile", "cl")
CHINA = ("china", "cn") CHINA = ("china", "cn")
COLOMBIA = ("colombia", "co") COLOMBIA = ("colombia", "co")
COSTARICA = ("costa rica", "cr") COSTARICA = ("costa rica", "cr")
CROATIA = ("croatia", "hr")
CYPRUS = ("cyprus", "cy")
CZECHREPUBLIC = ("czech republic,czechia", "cz") CZECHREPUBLIC = ("czech republic,czechia", "cz")
DENMARK = ("denmark", "dk") DENMARK = ("denmark", "dk")
ECUADOR = ("ecuador", "ec") ECUADOR = ("ecuador", "ec")
EGYPT = ("egypt", "eg") EGYPT = ("egypt", "eg")
ESTONIA = ("estonia", "ee")
FINLAND = ("finland", "fi") FINLAND = ("finland", "fi")
FRANCE = ("france", "fr", "fr") FRANCE = ("france", "fr", "fr")
GERMANY = ("germany", "de", "de") GERMANY = ("germany", "de", "de")
@@ -92,6 +96,8 @@ class Country(Enum):
ITALY = ("italy", "it", "it") ITALY = ("italy", "it", "it")
JAPAN = ("japan", "jp") JAPAN = ("japan", "jp")
KUWAIT = ("kuwait", "kw") KUWAIT = ("kuwait", "kw")
LATVIA = ("latvia", "lv")
LITHUANIA = ("lithuania", "lt")
LUXEMBOURG = ("luxembourg", "lu") LUXEMBOURG = ("luxembourg", "lu")
MALAYSIA = ("malaysia", "malaysia:my", "com") MALAYSIA = ("malaysia", "malaysia:my", "com")
MALTA = ("malta", "malta:mt", "mt") MALTA = ("malta", "malta:mt", "mt")
@@ -112,6 +118,8 @@ class Country(Enum):
ROMANIA = ("romania", "ro") ROMANIA = ("romania", "ro")
SAUDIARABIA = ("saudi arabia", "sa") SAUDIARABIA = ("saudi arabia", "sa")
SINGAPORE = ("singapore", "sg", "sg") SINGAPORE = ("singapore", "sg", "sg")
SLOVAKIA = ("slovakia", "sk")
SLOVENIA = ("slovenia", "sl")
SOUTHAFRICA = ("south africa", "za") SOUTHAFRICA = ("south africa", "za")
SOUTHKOREA = ("south korea", "kr") SOUTHKOREA = ("south korea", "kr")
SPAIN = ("spain", "es", "es") SPAIN = ("spain", "es", "es")

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.77" version = "1.1.78"
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt" description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"] authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
homepage = "https://github.com/cullenwatson/JobSpy" homepage = "https://github.com/cullenwatson/JobSpy"