fix(linkedin):is remote

pull/256/head
Cullen Watson 2025-03-06 13:38:28 -06:00
parent 6782b9884e
commit 903b7e6f1b
3 changed files with 21 additions and 6 deletions

View File

@ -220,6 +220,7 @@ JobPost
│ ├── country
│ ├── city
│ ├── state
├── is_remote
├── description
├── job_type: fulltime, parttime, internship, contract
├── job_function
@ -229,8 +230,7 @@ JobPost
│ ├── currency
│ └── salary_source: direct_data, description (parsed from posting)
├── date_posted
├── emails
└── is_remote
└── emails
Linkedin specific
└── job_level

View File

@ -14,10 +14,11 @@ from bs4.element import Tag
from jobspy.exception import LinkedInException
from jobspy.linkedin.constant import headers
from jobspy.linkedin.util import (
is_job_remote,
job_type_code,
parse_job_type,
parse_job_level,
parse_company_industry,
parse_company_industry
)
from jobspy.model import (
JobPost,
@ -173,7 +174,7 @@ class LinkedIn(Scraper):
) -> Optional[JobPost]:
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
compensation = None
compensation = description = None
if salary_tag:
salary_text = salary_tag.get_text(separator=" ").strip()
salary_values = [currency_parser(value) for value in salary_text.split("-")]
@ -217,6 +218,8 @@ class LinkedIn(Scraper):
job_details = {}
if full_descr:
job_details = self._get_job_details(job_id)
description = job_details.get("description")
is_remote = is_job_remote(title, description, location)
return JobPost(
id=f"li-{job_id}",
@ -224,6 +227,7 @@ class LinkedIn(Scraper):
company_name=company,
company_url=company_url,
location=location,
is_remote=is_remote,
date_posted=date_posted,
job_url=f"{self.base_url}/jobs/view/{job_id}",
compensation=compensation,
@ -232,7 +236,7 @@ class LinkedIn(Scraper):
company_industry=job_details.get("company_industry"),
description=job_details.get("description"),
job_url_direct=job_details.get("job_url_direct"),
emails=extract_emails_from_text(job_details.get("description")),
emails=extract_emails_from_text(description),
company_logo=job_details.get("company_logo"),
job_function=job_details.get("job_function"),
)

View File

@ -1,6 +1,6 @@
from bs4 import BeautifulSoup
from jobspy.model import JobType
from jobspy.model import JobType, Location
from jobspy.util import get_enum_from_job_type
@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
industry = industry_span.get_text(strip=True)
return industry
def is_job_remote(title: dict, description: str, location: Location) -> bool:
"""
Searches the title, location, and description to check if job is remote
"""
remote_keywords = ["remote", "work from home", "wfh"]
location = location.display_location()
full_string = f'{title} {description} {location}'.lower()
is_remote = any(keyword in full_string for keyword in remote_keywords)
return is_remote