mirror of https://github.com/Bunsly/JobSpy
Linkedin job type (#30)
parent
672b152ead
commit
7efece8fe9
|
@ -16,6 +16,7 @@ class JobType(Enum):
|
||||||
NIGHTS = "nights"
|
NIGHTS = "nights"
|
||||||
OTHER = "other"
|
OTHER = "other"
|
||||||
SUMMER = "summer"
|
SUMMER = "summer"
|
||||||
|
VOLUNTEER = "volunteer"
|
||||||
|
|
||||||
|
|
||||||
class Location(BaseModel):
|
class Location(BaseModel):
|
||||||
|
@ -48,7 +49,6 @@ class JobPost(BaseModel):
|
||||||
description: str = None
|
description: str = None
|
||||||
job_type: JobType = None
|
job_type: JobType = None
|
||||||
compensation: Compensation = None
|
compensation: Compensation = None
|
||||||
# why is 08-28-2023 a validiation error for type date? how do I fix this?
|
|
||||||
date_posted: date = None
|
date_posted: date = None
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
@ -103,7 +103,7 @@ class LinkedInScraper(Scraper):
|
||||||
datetime_tag = metadata_card.find(
|
datetime_tag = metadata_card.find(
|
||||||
"time", class_="job-search-card__listdate"
|
"time", class_="job-search-card__listdate"
|
||||||
)
|
)
|
||||||
description = LinkedInScraper.get_description(job_url)
|
description, job_type = LinkedInScraper.get_description(job_url)
|
||||||
if datetime_tag:
|
if datetime_tag:
|
||||||
datetime_str = datetime_tag["datetime"]
|
datetime_str = datetime_tag["datetime"]
|
||||||
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
|
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
|
||||||
|
@ -117,6 +117,7 @@ class LinkedInScraper(Scraper):
|
||||||
location=location,
|
location=location,
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url,
|
job_url=job_url,
|
||||||
|
job_type=job_type,
|
||||||
)
|
)
|
||||||
job_list.append(job_post)
|
job_list.append(job_post)
|
||||||
if (
|
if (
|
||||||
|
@ -149,7 +150,7 @@ class LinkedInScraper(Scraper):
|
||||||
"""
|
"""
|
||||||
response = requests.get(job_page_url, allow_redirects=True)
|
response = requests.get(job_page_url, allow_redirects=True)
|
||||||
if response.status_code not in range(200, 400):
|
if response.status_code not in range(200, 400):
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
div_content = soup.find(
|
div_content = soup.find(
|
||||||
|
@ -159,7 +160,36 @@ class LinkedInScraper(Scraper):
|
||||||
text_content = None
|
text_content = None
|
||||||
if div_content:
|
if div_content:
|
||||||
text_content = " ".join(div_content.get_text().split()).strip()
|
text_content = " ".join(div_content.get_text().split()).strip()
|
||||||
return text_content
|
|
||||||
|
def get_job_type(
|
||||||
|
soup: BeautifulSoup,
|
||||||
|
) -> Tuple[Optional[str], Optional[JobType]]:
|
||||||
|
"""
|
||||||
|
Gets the job type from job page
|
||||||
|
:param soup:
|
||||||
|
:return: JobType
|
||||||
|
"""
|
||||||
|
h3_tag = soup.find(
|
||||||
|
"h3",
|
||||||
|
class_="description__job-criteria-subheader",
|
||||||
|
string=lambda text: "Employment type" in text,
|
||||||
|
)
|
||||||
|
|
||||||
|
employment_type = None
|
||||||
|
if h3_tag:
|
||||||
|
employment_type_span = h3_tag.find_next_sibling(
|
||||||
|
"span",
|
||||||
|
class_="description__job-criteria-text description__job-criteria-text--criteria",
|
||||||
|
)
|
||||||
|
if employment_type_span:
|
||||||
|
employment_type = employment_type_span.get_text(strip=True)
|
||||||
|
employment_type = employment_type.lower()
|
||||||
|
employment_type = employment_type.replace("-", "")
|
||||||
|
print(employment_type)
|
||||||
|
|
||||||
|
return JobType(employment_type)
|
||||||
|
|
||||||
|
return text_content, get_job_type(soup)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_location(metadata_card: Optional[Tag]) -> Location:
|
def get_location(metadata_card: Optional[Tag]) -> Location:
|
||||||
|
|
Loading…
Reference in New Issue