mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-06 04:24:30 -08:00
Compare commits
5 Commits
1.1.78
...
051981689f
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
051981689f | ||
|
|
903b7e6f1b | ||
|
|
6782b9884e | ||
|
|
94c74d60f2 | ||
|
|
5463e5a664 |
10
.github/workflows/publish-to-pypi.yml
vendored
10
.github/workflows/publish-to-pypi.yml
vendored
@@ -1,5 +1,9 @@
|
||||
name: Publish JobSpy to PyPi
|
||||
on: push
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build-n-publish:
|
||||
@@ -27,7 +31,7 @@ jobs:
|
||||
build
|
||||
|
||||
- name: Publish distribution 📦 to PyPI
|
||||
if: startsWith(github.ref, 'refs/tags')
|
||||
if: startsWith(github.ref, 'refs/tags') || github.event_name == 'workflow_dispatch'
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||
@@ -220,6 +220,7 @@ JobPost
|
||||
│ ├── country
|
||||
│ ├── city
|
||||
│ ├── state
|
||||
├── is_remote
|
||||
├── description
|
||||
├── job_type: fulltime, parttime, internship, contract
|
||||
├── job_function
|
||||
@@ -229,8 +230,7 @@ JobPost
|
||||
│ ├── currency
|
||||
│ └── salary_source: direct_data, description (parsed from posting)
|
||||
├── date_posted
|
||||
├── emails
|
||||
└── is_remote
|
||||
└── emails
|
||||
|
||||
Linkedin specific
|
||||
└── job_level
|
||||
|
||||
@@ -20,7 +20,7 @@ def get_job_type(attributes: list) -> list[JobType]:
|
||||
def get_compensation(compensation: dict) -> Compensation | None:
|
||||
"""
|
||||
Parses the job to get compensation
|
||||
:param sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrompensation:
|
||||
:param compensation:
|
||||
:return: compensation object
|
||||
"""
|
||||
if not compensation["baseSalary"] and not compensation["estimated"]:
|
||||
|
||||
@@ -14,10 +14,11 @@ from bs4.element import Tag
|
||||
from jobspy.exception import LinkedInException
|
||||
from jobspy.linkedin.constant import headers
|
||||
from jobspy.linkedin.util import (
|
||||
is_job_remote,
|
||||
job_type_code,
|
||||
parse_job_type,
|
||||
parse_job_level,
|
||||
parse_company_industry,
|
||||
parse_company_industry
|
||||
)
|
||||
from jobspy.model import (
|
||||
JobPost,
|
||||
@@ -173,7 +174,7 @@ class LinkedIn(Scraper):
|
||||
) -> Optional[JobPost]:
|
||||
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
|
||||
|
||||
compensation = None
|
||||
compensation = description = None
|
||||
if salary_tag:
|
||||
salary_text = salary_tag.get_text(separator=" ").strip()
|
||||
salary_values = [currency_parser(value) for value in salary_text.split("-")]
|
||||
@@ -217,6 +218,8 @@ class LinkedIn(Scraper):
|
||||
job_details = {}
|
||||
if full_descr:
|
||||
job_details = self._get_job_details(job_id)
|
||||
description = job_details.get("description")
|
||||
is_remote = is_job_remote(title, description, location)
|
||||
|
||||
return JobPost(
|
||||
id=f"li-{job_id}",
|
||||
@@ -224,6 +227,7 @@ class LinkedIn(Scraper):
|
||||
company_name=company,
|
||||
company_url=company_url,
|
||||
location=location,
|
||||
is_remote=is_remote,
|
||||
date_posted=date_posted,
|
||||
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
||||
compensation=compensation,
|
||||
@@ -232,7 +236,7 @@ class LinkedIn(Scraper):
|
||||
company_industry=job_details.get("company_industry"),
|
||||
description=job_details.get("description"),
|
||||
job_url_direct=job_details.get("job_url_direct"),
|
||||
emails=extract_emails_from_text(job_details.get("description")),
|
||||
emails=extract_emails_from_text(description),
|
||||
company_logo=job_details.get("company_logo"),
|
||||
job_function=job_details.get("job_function"),
|
||||
)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from jobspy.model import JobType
|
||||
from jobspy.model import JobType, Location
|
||||
from jobspy.util import get_enum_from_job_type
|
||||
|
||||
|
||||
@@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
|
||||
industry = industry_span.get_text(strip=True)
|
||||
|
||||
return industry
|
||||
|
||||
|
||||
def is_job_remote(title: dict, description: str, location: Location) -> bool:
|
||||
"""
|
||||
Searches the title, location, and description to check if job is remote
|
||||
"""
|
||||
remote_keywords = ["remote", "work from home", "wfh"]
|
||||
location = location.display_location()
|
||||
full_string = f'{title} {description} {location}'.lower()
|
||||
is_remote = any(keyword in full_string for keyword in remote_keywords)
|
||||
return is_remote
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.77"
|
||||
version = "1.1.78"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
|
||||
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
|
||||
homepage = "https://github.com/cullenwatson/JobSpy"
|
||||
|
||||
Reference in New Issue
Block a user