Linkedin job type (#30)

pull/31/head
Cullen Watson 2023-08-31 14:01:47 -05:00 committed by GitHub
parent 672b152ead
commit 7efece8fe9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 5 deletions

View File

@ -16,6 +16,7 @@ class JobType(Enum):
NIGHTS = "nights" NIGHTS = "nights"
OTHER = "other" OTHER = "other"
SUMMER = "summer" SUMMER = "summer"
VOLUNTEER = "volunteer"
class Location(BaseModel): class Location(BaseModel):
@ -48,7 +49,6 @@ class JobPost(BaseModel):
description: str = None description: str = None
job_type: JobType = None job_type: JobType = None
compensation: Compensation = None compensation: Compensation = None
# why is 08-28-2023 a validiation error for type date? how do I fix this?
date_posted: date = None date_posted: date = None

View File

@ -1,4 +1,4 @@
from typing import Optional from typing import Optional, Tuple
from datetime import datetime from datetime import datetime
import requests import requests
@ -103,7 +103,7 @@ class LinkedInScraper(Scraper):
datetime_tag = metadata_card.find( datetime_tag = metadata_card.find(
"time", class_="job-search-card__listdate" "time", class_="job-search-card__listdate"
) )
description = LinkedInScraper.get_description(job_url) description, job_type = LinkedInScraper.get_description(job_url)
if datetime_tag: if datetime_tag:
datetime_str = datetime_tag["datetime"] datetime_str = datetime_tag["datetime"]
date_posted = datetime.strptime(datetime_str, "%Y-%m-%d") date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
@ -117,6 +117,7 @@ class LinkedInScraper(Scraper):
location=location, location=location,
date_posted=date_posted, date_posted=date_posted,
job_url=job_url, job_url=job_url,
job_type=job_type,
) )
job_list.append(job_post) job_list.append(job_post)
if ( if (
@ -149,7 +150,7 @@ class LinkedInScraper(Scraper):
""" """
response = requests.get(job_page_url, allow_redirects=True) response = requests.get(job_page_url, allow_redirects=True)
if response.status_code not in range(200, 400): if response.status_code not in range(200, 400):
return None return None, None
soup = BeautifulSoup(response.text, "html.parser") soup = BeautifulSoup(response.text, "html.parser")
div_content = soup.find( div_content = soup.find(
@ -159,7 +160,36 @@ class LinkedInScraper(Scraper):
text_content = None text_content = None
if div_content: if div_content:
text_content = " ".join(div_content.get_text().split()).strip() text_content = " ".join(div_content.get_text().split()).strip()
return text_content
def get_job_type(
soup: BeautifulSoup,
) -> Tuple[Optional[str], Optional[JobType]]:
"""
Gets the job type from job page
:param soup:
:return: JobType
"""
h3_tag = soup.find(
"h3",
class_="description__job-criteria-subheader",
string=lambda text: "Employment type" in text,
)
employment_type = None
if h3_tag:
employment_type_span = h3_tag.find_next_sibling(
"span",
class_="description__job-criteria-text description__job-criteria-text--criteria",
)
if employment_type_span:
employment_type = employment_type_span.get_text(strip=True)
employment_type = employment_type.lower()
employment_type = employment_type.replace("-", "")
print(employment_type)
return JobType(employment_type)
return text_content, get_job_type(soup)
@staticmethod @staticmethod
def get_location(metadata_card: Optional[Tag]) -> Location: def get_location(metadata_card: Optional[Tag]) -> Location: