mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-06 04:24:30 -08:00
Compare commits
6 Commits
v1.1.77
...
051981689f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
051981689f | ||
|
|
903b7e6f1b | ||
|
|
6782b9884e | ||
|
|
94c74d60f2 | ||
|
|
5463e5a664 | ||
|
|
ed139e7e6b |
10
.github/workflows/publish-to-pypi.yml
vendored
10
.github/workflows/publish-to-pypi.yml
vendored
@@ -1,5 +1,9 @@
|
|||||||
name: Publish JobSpy to PyPi
|
name: Publish JobSpy to PyPi
|
||||||
on: push
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build-n-publish:
|
build-n-publish:
|
||||||
@@ -27,7 +31,7 @@ jobs:
|
|||||||
build
|
build
|
||||||
|
|
||||||
- name: Publish distribution 📦 to PyPI
|
- name: Publish distribution 📦 to PyPI
|
||||||
if: startsWith(github.ref, 'refs/tags')
|
if: startsWith(github.ref, 'refs/tags') || github.event_name == 'workflow_dispatch'
|
||||||
uses: pypa/gh-action-pypi-publish@release/v1
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
with:
|
with:
|
||||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||||
@@ -220,6 +220,7 @@ JobPost
|
|||||||
│ ├── country
|
│ ├── country
|
||||||
│ ├── city
|
│ ├── city
|
||||||
│ ├── state
|
│ ├── state
|
||||||
|
├── is_remote
|
||||||
├── description
|
├── description
|
||||||
├── job_type: fulltime, parttime, internship, contract
|
├── job_type: fulltime, parttime, internship, contract
|
||||||
├── job_function
|
├── job_function
|
||||||
@@ -229,8 +230,7 @@ JobPost
|
|||||||
│ ├── currency
|
│ ├── currency
|
||||||
│ └── salary_source: direct_data, description (parsed from posting)
|
│ └── salary_source: direct_data, description (parsed from posting)
|
||||||
├── date_posted
|
├── date_posted
|
||||||
├── emails
|
└── emails
|
||||||
└── is_remote
|
|
||||||
|
|
||||||
Linkedin specific
|
Linkedin specific
|
||||||
└── job_level
|
└── job_level
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ def get_job_type(attributes: list) -> list[JobType]:
|
|||||||
def get_compensation(compensation: dict) -> Compensation | None:
|
def get_compensation(compensation: dict) -> Compensation | None:
|
||||||
"""
|
"""
|
||||||
Parses the job to get compensation
|
Parses the job to get compensation
|
||||||
:param sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrompensation:
|
:param compensation:
|
||||||
:return: compensation object
|
:return: compensation object
|
||||||
"""
|
"""
|
||||||
if not compensation["baseSalary"] and not compensation["estimated"]:
|
if not compensation["baseSalary"] and not compensation["estimated"]:
|
||||||
|
|||||||
@@ -14,10 +14,11 @@ from bs4.element import Tag
|
|||||||
from jobspy.exception import LinkedInException
|
from jobspy.exception import LinkedInException
|
||||||
from jobspy.linkedin.constant import headers
|
from jobspy.linkedin.constant import headers
|
||||||
from jobspy.linkedin.util import (
|
from jobspy.linkedin.util import (
|
||||||
|
is_job_remote,
|
||||||
job_type_code,
|
job_type_code,
|
||||||
parse_job_type,
|
parse_job_type,
|
||||||
parse_job_level,
|
parse_job_level,
|
||||||
parse_company_industry,
|
parse_company_industry
|
||||||
)
|
)
|
||||||
from jobspy.model import (
|
from jobspy.model import (
|
||||||
JobPost,
|
JobPost,
|
||||||
@@ -173,7 +174,7 @@ class LinkedIn(Scraper):
|
|||||||
) -> Optional[JobPost]:
|
) -> Optional[JobPost]:
|
||||||
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
|
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
|
||||||
|
|
||||||
compensation = None
|
compensation = description = None
|
||||||
if salary_tag:
|
if salary_tag:
|
||||||
salary_text = salary_tag.get_text(separator=" ").strip()
|
salary_text = salary_tag.get_text(separator=" ").strip()
|
||||||
salary_values = [currency_parser(value) for value in salary_text.split("-")]
|
salary_values = [currency_parser(value) for value in salary_text.split("-")]
|
||||||
@@ -217,6 +218,8 @@ class LinkedIn(Scraper):
|
|||||||
job_details = {}
|
job_details = {}
|
||||||
if full_descr:
|
if full_descr:
|
||||||
job_details = self._get_job_details(job_id)
|
job_details = self._get_job_details(job_id)
|
||||||
|
description = job_details.get("description")
|
||||||
|
is_remote = is_job_remote(title, description, location)
|
||||||
|
|
||||||
return JobPost(
|
return JobPost(
|
||||||
id=f"li-{job_id}",
|
id=f"li-{job_id}",
|
||||||
@@ -224,6 +227,7 @@ class LinkedIn(Scraper):
|
|||||||
company_name=company,
|
company_name=company,
|
||||||
company_url=company_url,
|
company_url=company_url,
|
||||||
location=location,
|
location=location,
|
||||||
|
is_remote=is_remote,
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
||||||
compensation=compensation,
|
compensation=compensation,
|
||||||
@@ -232,7 +236,7 @@ class LinkedIn(Scraper):
|
|||||||
company_industry=job_details.get("company_industry"),
|
company_industry=job_details.get("company_industry"),
|
||||||
description=job_details.get("description"),
|
description=job_details.get("description"),
|
||||||
job_url_direct=job_details.get("job_url_direct"),
|
job_url_direct=job_details.get("job_url_direct"),
|
||||||
emails=extract_emails_from_text(job_details.get("description")),
|
emails=extract_emails_from_text(description),
|
||||||
company_logo=job_details.get("company_logo"),
|
company_logo=job_details.get("company_logo"),
|
||||||
job_function=job_details.get("job_function"),
|
job_function=job_details.get("job_function"),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from jobspy.model import JobType
|
from jobspy.model import JobType, Location
|
||||||
from jobspy.util import get_enum_from_job_type
|
from jobspy.util import get_enum_from_job_type
|
||||||
|
|
||||||
|
|
||||||
@@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
|
|||||||
industry = industry_span.get_text(strip=True)
|
industry = industry_span.get_text(strip=True)
|
||||||
|
|
||||||
return industry
|
return industry
|
||||||
|
|
||||||
|
|
||||||
|
def is_job_remote(title: dict, description: str, location: Location) -> bool:
|
||||||
|
"""
|
||||||
|
Searches the title, location, and description to check if job is remote
|
||||||
|
"""
|
||||||
|
remote_keywords = ["remote", "work from home", "wfh"]
|
||||||
|
location = location.display_location()
|
||||||
|
full_string = f'{title} {description} {location}'.lower()
|
||||||
|
is_remote = any(keyword in full_string for keyword in remote_keywords)
|
||||||
|
return is_remote
|
||||||
|
|||||||
@@ -69,16 +69,20 @@ class Country(Enum):
|
|||||||
AUSTRIA = ("austria", "at", "at")
|
AUSTRIA = ("austria", "at", "at")
|
||||||
BAHRAIN = ("bahrain", "bh")
|
BAHRAIN = ("bahrain", "bh")
|
||||||
BELGIUM = ("belgium", "be", "fr:be")
|
BELGIUM = ("belgium", "be", "fr:be")
|
||||||
|
BULGARIA = ("bulgaria", "bg")
|
||||||
BRAZIL = ("brazil", "br", "com.br")
|
BRAZIL = ("brazil", "br", "com.br")
|
||||||
CANADA = ("canada", "ca", "ca")
|
CANADA = ("canada", "ca", "ca")
|
||||||
CHILE = ("chile", "cl")
|
CHILE = ("chile", "cl")
|
||||||
CHINA = ("china", "cn")
|
CHINA = ("china", "cn")
|
||||||
COLOMBIA = ("colombia", "co")
|
COLOMBIA = ("colombia", "co")
|
||||||
COSTARICA = ("costa rica", "cr")
|
COSTARICA = ("costa rica", "cr")
|
||||||
|
CROATIA = ("croatia", "hr")
|
||||||
|
CYPRUS = ("cyprus", "cy")
|
||||||
CZECHREPUBLIC = ("czech republic,czechia", "cz")
|
CZECHREPUBLIC = ("czech republic,czechia", "cz")
|
||||||
DENMARK = ("denmark", "dk")
|
DENMARK = ("denmark", "dk")
|
||||||
ECUADOR = ("ecuador", "ec")
|
ECUADOR = ("ecuador", "ec")
|
||||||
EGYPT = ("egypt", "eg")
|
EGYPT = ("egypt", "eg")
|
||||||
|
ESTONIA = ("estonia", "ee")
|
||||||
FINLAND = ("finland", "fi")
|
FINLAND = ("finland", "fi")
|
||||||
FRANCE = ("france", "fr", "fr")
|
FRANCE = ("france", "fr", "fr")
|
||||||
GERMANY = ("germany", "de", "de")
|
GERMANY = ("germany", "de", "de")
|
||||||
@@ -92,6 +96,8 @@ class Country(Enum):
|
|||||||
ITALY = ("italy", "it", "it")
|
ITALY = ("italy", "it", "it")
|
||||||
JAPAN = ("japan", "jp")
|
JAPAN = ("japan", "jp")
|
||||||
KUWAIT = ("kuwait", "kw")
|
KUWAIT = ("kuwait", "kw")
|
||||||
|
LATVIA = ("latvia", "lv")
|
||||||
|
LITHUANIA = ("lithuania", "lt")
|
||||||
LUXEMBOURG = ("luxembourg", "lu")
|
LUXEMBOURG = ("luxembourg", "lu")
|
||||||
MALAYSIA = ("malaysia", "malaysia:my", "com")
|
MALAYSIA = ("malaysia", "malaysia:my", "com")
|
||||||
MALTA = ("malta", "malta:mt", "mt")
|
MALTA = ("malta", "malta:mt", "mt")
|
||||||
@@ -112,6 +118,8 @@ class Country(Enum):
|
|||||||
ROMANIA = ("romania", "ro")
|
ROMANIA = ("romania", "ro")
|
||||||
SAUDIARABIA = ("saudi arabia", "sa")
|
SAUDIARABIA = ("saudi arabia", "sa")
|
||||||
SINGAPORE = ("singapore", "sg", "sg")
|
SINGAPORE = ("singapore", "sg", "sg")
|
||||||
|
SLOVAKIA = ("slovakia", "sk")
|
||||||
|
SLOVENIA = ("slovenia", "sl")
|
||||||
SOUTHAFRICA = ("south africa", "za")
|
SOUTHAFRICA = ("south africa", "za")
|
||||||
SOUTHKOREA = ("south korea", "kr")
|
SOUTHKOREA = ("south korea", "kr")
|
||||||
SPAIN = ("spain", "es", "es")
|
SPAIN = ("spain", "es", "es")
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|||||||
|
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.77"
|
version = "1.1.78"
|
||||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
|
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
|
||||||
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
|
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
|
||||||
homepage = "https://github.com/cullenwatson/JobSpy"
|
homepage = "https://github.com/cullenwatson/JobSpy"
|
||||||
|
|||||||
Reference in New Issue
Block a user