mirror of https://github.com/Bunsly/JobSpy
fix(linkedin):is remote
parent
6782b9884e
commit
903b7e6f1b
|
@ -220,6 +220,7 @@ JobPost
|
||||||
│ ├── country
|
│ ├── country
|
||||||
│ ├── city
|
│ ├── city
|
||||||
│ ├── state
|
│ ├── state
|
||||||
|
├── is_remote
|
||||||
├── description
|
├── description
|
||||||
├── job_type: fulltime, parttime, internship, contract
|
├── job_type: fulltime, parttime, internship, contract
|
||||||
├── job_function
|
├── job_function
|
||||||
|
@ -229,8 +230,7 @@ JobPost
|
||||||
│ ├── currency
|
│ ├── currency
|
||||||
│ └── salary_source: direct_data, description (parsed from posting)
|
│ └── salary_source: direct_data, description (parsed from posting)
|
||||||
├── date_posted
|
├── date_posted
|
||||||
├── emails
|
└── emails
|
||||||
└── is_remote
|
|
||||||
|
|
||||||
Linkedin specific
|
Linkedin specific
|
||||||
└── job_level
|
└── job_level
|
||||||
|
|
|
@ -14,10 +14,11 @@ from bs4.element import Tag
|
||||||
from jobspy.exception import LinkedInException
|
from jobspy.exception import LinkedInException
|
||||||
from jobspy.linkedin.constant import headers
|
from jobspy.linkedin.constant import headers
|
||||||
from jobspy.linkedin.util import (
|
from jobspy.linkedin.util import (
|
||||||
|
is_job_remote,
|
||||||
job_type_code,
|
job_type_code,
|
||||||
parse_job_type,
|
parse_job_type,
|
||||||
parse_job_level,
|
parse_job_level,
|
||||||
parse_company_industry,
|
parse_company_industry
|
||||||
)
|
)
|
||||||
from jobspy.model import (
|
from jobspy.model import (
|
||||||
JobPost,
|
JobPost,
|
||||||
|
@ -173,7 +174,7 @@ class LinkedIn(Scraper):
|
||||||
) -> Optional[JobPost]:
|
) -> Optional[JobPost]:
|
||||||
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
|
salary_tag = job_card.find("span", class_="job-search-card__salary-info")
|
||||||
|
|
||||||
compensation = None
|
compensation = description = None
|
||||||
if salary_tag:
|
if salary_tag:
|
||||||
salary_text = salary_tag.get_text(separator=" ").strip()
|
salary_text = salary_tag.get_text(separator=" ").strip()
|
||||||
salary_values = [currency_parser(value) for value in salary_text.split("-")]
|
salary_values = [currency_parser(value) for value in salary_text.split("-")]
|
||||||
|
@ -217,6 +218,8 @@ class LinkedIn(Scraper):
|
||||||
job_details = {}
|
job_details = {}
|
||||||
if full_descr:
|
if full_descr:
|
||||||
job_details = self._get_job_details(job_id)
|
job_details = self._get_job_details(job_id)
|
||||||
|
description = job_details.get("description")
|
||||||
|
is_remote = is_job_remote(title, description, location)
|
||||||
|
|
||||||
return JobPost(
|
return JobPost(
|
||||||
id=f"li-{job_id}",
|
id=f"li-{job_id}",
|
||||||
|
@ -224,6 +227,7 @@ class LinkedIn(Scraper):
|
||||||
company_name=company,
|
company_name=company,
|
||||||
company_url=company_url,
|
company_url=company_url,
|
||||||
location=location,
|
location=location,
|
||||||
|
is_remote=is_remote,
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
job_url=f"{self.base_url}/jobs/view/{job_id}",
|
||||||
compensation=compensation,
|
compensation=compensation,
|
||||||
|
@ -232,7 +236,7 @@ class LinkedIn(Scraper):
|
||||||
company_industry=job_details.get("company_industry"),
|
company_industry=job_details.get("company_industry"),
|
||||||
description=job_details.get("description"),
|
description=job_details.get("description"),
|
||||||
job_url_direct=job_details.get("job_url_direct"),
|
job_url_direct=job_details.get("job_url_direct"),
|
||||||
emails=extract_emails_from_text(job_details.get("description")),
|
emails=extract_emails_from_text(description),
|
||||||
company_logo=job_details.get("company_logo"),
|
company_logo=job_details.get("company_logo"),
|
||||||
job_function=job_details.get("job_function"),
|
job_function=job_details.get("job_function"),
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from jobspy.model import JobType
|
from jobspy.model import JobType, Location
|
||||||
from jobspy.util import get_enum_from_job_type
|
from jobspy.util import get_enum_from_job_type
|
||||||
|
|
||||||
|
|
||||||
|
@ -83,3 +83,14 @@ def parse_company_industry(soup_industry: BeautifulSoup) -> str | None:
|
||||||
industry = industry_span.get_text(strip=True)
|
industry = industry_span.get_text(strip=True)
|
||||||
|
|
||||||
return industry
|
return industry
|
||||||
|
|
||||||
|
|
||||||
|
def is_job_remote(title: dict, description: str, location: Location) -> bool:
|
||||||
|
"""
|
||||||
|
Searches the title, location, and description to check if job is remote
|
||||||
|
"""
|
||||||
|
remote_keywords = ["remote", "work from home", "wfh"]
|
||||||
|
location = location.display_location()
|
||||||
|
full_string = f'{title} {description} {location}'.lower()
|
||||||
|
is_remote = any(keyword in full_string for keyword in remote_keywords)
|
||||||
|
return is_remote
|
||||||
|
|
Loading…
Reference in New Issue