From 903b7e6f1b6583592134107f958605c1ebde9987 Mon Sep 17 00:00:00 2001 From: Cullen Watson Date: Thu, 6 Mar 2025 13:38:28 -0600 Subject: [PATCH] fix(linkedin):is remote --- README.md | 4 ++-- jobspy/linkedin/__init__.py | 10 +++++++--- jobspy/linkedin/util.py | 13 ++++++++++++- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index a738da7..72d4821 100644 --- a/README.md +++ b/README.md @@ -220,6 +220,7 @@ JobPost │ ├── country │ ├── city │ ├── state +├── is_remote ├── description ├── job_type: fulltime, parttime, internship, contract ├── job_function @@ -229,8 +230,7 @@ JobPost │ ├── currency │ └── salary_source: direct_data, description (parsed from posting) ├── date_posted -├── emails -└── is_remote +└── emails Linkedin specific └── job_level diff --git a/jobspy/linkedin/__init__.py b/jobspy/linkedin/__init__.py index db20f12..c8bca93 100644 --- a/jobspy/linkedin/__init__.py +++ b/jobspy/linkedin/__init__.py @@ -14,10 +14,11 @@ from bs4.element import Tag from jobspy.exception import LinkedInException from jobspy.linkedin.constant import headers from jobspy.linkedin.util import ( + is_job_remote, job_type_code, parse_job_type, parse_job_level, - parse_company_industry, + parse_company_industry ) from jobspy.model import ( JobPost, @@ -173,7 +174,7 @@ class LinkedIn(Scraper): ) -> Optional[JobPost]: salary_tag = job_card.find("span", class_="job-search-card__salary-info") - compensation = None + compensation = description = None if salary_tag: salary_text = salary_tag.get_text(separator=" ").strip() salary_values = [currency_parser(value) for value in salary_text.split("-")] @@ -217,6 +218,8 @@ class LinkedIn(Scraper): job_details = {} if full_descr: job_details = self._get_job_details(job_id) + description = job_details.get("description") + is_remote = is_job_remote(title, description, location) return JobPost( id=f"li-{job_id}", @@ -224,6 +227,7 @@ class LinkedIn(Scraper): company_name=company, company_url=company_url, location=location, + is_remote=is_remote, date_posted=date_posted, job_url=f"{self.base_url}/jobs/view/{job_id}", compensation=compensation, @@ -232,7 +236,7 @@ class LinkedIn(Scraper): company_industry=job_details.get("company_industry"), description=job_details.get("description"), job_url_direct=job_details.get("job_url_direct"), - emails=extract_emails_from_text(job_details.get("description")), + emails=extract_emails_from_text(description), company_logo=job_details.get("company_logo"), job_function=job_details.get("job_function"), ) diff --git a/jobspy/linkedin/util.py b/jobspy/linkedin/util.py index fe37c48..407b4d2 100644 --- a/jobspy/linkedin/util.py +++ b/jobspy/linkedin/util.py @@ -1,6 +1,6 @@ from bs4 import BeautifulSoup -from jobspy.model import JobType +from jobspy.model import JobType, Location from jobspy.util import get_enum_from_job_type @@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None: industry = industry_span.get_text(strip=True) return industry + + +def is_job_remote(title: dict, description: str, location: Location) -> bool: + """ + Searches the title, location, and description to check if job is remote + """ + remote_keywords = ["remote", "work from home", "wfh"] + location = location.display_location() + full_string = f'{title} {description} {location}'.lower() + is_remote = any(keyword in full_string for keyword in remote_keywords) + return is_remote