Compare commits

...

5 Commits

Author SHA1 Message Date
prudvisorra-aifa
051981689f Update util.py (#256) 2025-03-17 11:51:19 -05:00
Cullen Watson
903b7e6f1b fix(linkedin):is remote 2025-03-06 13:38:28 -06:00
Cullen Watson
6782b9884e fix:workflow 2025-03-01 14:49:31 -06:00
Cullen Watson
94c74d60f2 enh:workflow manual run 2025-03-01 14:47:24 -06:00
Cullen Watson
5463e5a664 chore:version 2025-03-01 14:38:25 -06:00
6 changed files with 30 additions and 11 deletions

View File

@@ -1,5 +1,9 @@
name: Publish JobSpy to PyPi
on: push
on:
push:
branches:
- main
workflow_dispatch:
jobs:
build-n-publish:
@@ -27,7 +31,7 @@ jobs:
build
- name: Publish distribution 📦 to PyPI
if: startsWith(github.ref, 'refs/tags')
if: startsWith(github.ref, 'refs/tags') || github.event_name == 'workflow_dispatch'
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
password: ${{ secrets.PYPI_API_TOKEN }}

View File

@@ -220,6 +220,7 @@ JobPost
│ ├── country
│ ├── city
│ ├── state
├── is_remote
├── description
├── job_type: fulltime, parttime, internship, contract
├── job_function
@@ -229,8 +230,7 @@ JobPost
│ ├── currency
│ └── salary_source: direct_data, description (parsed from posting)
├── date_posted
── emails
└── is_remote
── emails
Linkedin specific
└── job_level

View File

@@ -20,7 +20,7 @@ def get_job_type(attributes: list) -> list[JobType]:
def get_compensation(compensation: dict) -> Compensation | None:
"""
Parses the job to get compensation
:param sssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrompensation:
:param compensation:
:return: compensation object
"""
if not compensation["baseSalary"] and not compensation["estimated"]:

View File

@@ -14,10 +14,11 @@ from bs4.element import Tag
from jobspy.exception import LinkedInException
from jobspy.linkedin.constant import headers
from jobspy.linkedin.util import (
is_job_remote,
job_type_code,
parse_job_type,
parse_job_level,
parse_company_industry,
parse_company_industry
)
from jobspy.model import (
JobPost,
@@ -173,7 +174,7 @@ class LinkedIn(Scraper):
) -> Optional[JobPost]:
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
compensation = None
compensation = description = None
if salary_tag:
salary_text = salary_tag.get_text(separator=" ").strip()
salary_values = [currency_parser(value) for value in salary_text.split("-")]
@@ -217,6 +218,8 @@ class LinkedIn(Scraper):
job_details = {}
if full_descr:
job_details = self._get_job_details(job_id)
description = job_details.get("description")
is_remote = is_job_remote(title, description, location)
return JobPost(
id=f"li-{job_id}",
@@ -224,6 +227,7 @@ class LinkedIn(Scraper):
company_name=company,
company_url=company_url,
location=location,
is_remote=is_remote,
date_posted=date_posted,
job_url=f"{self.base_url}/jobs/view/{job_id}",
compensation=compensation,
@@ -232,7 +236,7 @@ class LinkedIn(Scraper):
company_industry=job_details.get("company_industry"),
description=job_details.get("description"),
job_url_direct=job_details.get("job_url_direct"),
emails=extract_emails_from_text(job_details.get("description")),
emails=extract_emails_from_text(description),
company_logo=job_details.get("company_logo"),
job_function=job_details.get("job_function"),
)

View File

@@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from jobspy.model import JobType
from jobspy.model import JobType, Location
from jobspy.util import get_enum_from_job_type
@@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
industry = industry_span.get_text(strip=True)
return industry
def is_job_remote(title: dict, description: str, location: Location) -> bool:
"""
Searches the title, location, and description to check if job is remote
"""
remote_keywords = ["remote", "work from home", "wfh"]
location = location.display_location()
full_string = f'{title} {description} {location}'.lower()
is_remote = any(keyword in full_string for keyword in remote_keywords)
return is_remote

View File

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "python-jobspy"
version = "1.1.77"
version = "1.1.78"
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
homepage = "https://github.com/cullenwatson/JobSpy"