JobSpy/src/jobspy/scrapers/linkedin/__init__.py

"""
jobspy.scrapers.linkedin
~~~~~~~~~~~~~~~~~~~

This module contains routines to scrape LinkedIn.
"""
from typing import Optional
from datetime import datetime

import requests
import time
import re
from requests.exceptions import ProxyError
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
from bs4.element import Tag
from threading import Lock

from .. import Scraper, ScraperInput, Site
from ..utils import count_urgent_words, extract_emails_from_text
from ..exceptions import LinkedInException
from ...jobs import (
    JobPost,
    Location,
    JobResponse,
    JobType,
)


class LinkedInScraper(Scraper):
    MAX_RETRIES = 3
    DELAY = 10

    def __init__(self, proxy: Optional[str] = None):
        """
        Initializes LinkedInScraper with the LinkedIn job search url
        """
        site = Site(Site.LINKEDIN)
        self.country = "worldwide"
        self.url = "https://www.linkedin.com"
        super().__init__(site, proxy=proxy)

    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
        """
        Scrapes LinkedIn for jobs with scraper_input criteria
        :param scraper_input:
        :return: job_response
        """
        job_list: list[JobPost] = []
        seen_urls = set()
        url_lock = Lock()
        page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0

        def job_type_code(job_type_enum):
            mapping = {
                JobType.FULL_TIME: "F",
                JobType.PART_TIME: "P",
                JobType.INTERNSHIP: "I",
                JobType.CONTRACT: "C",
                JobType.TEMPORARY: "T",
            }

            return mapping.get(job_type_enum, "")

        while len(job_list) < scraper_input.results_wanted and page < 1000:
            params = {
                "keywords": scraper_input.search_term,
                "location": scraper_input.location,
                "distance": scraper_input.distance,
                "f_WT": 2 if scraper_input.is_remote else None,
                "f_JT": job_type_code(scraper_input.job_type)
                if scraper_input.job_type
                else None,
                "pageNum": 0,
                page: page + scraper_input.offset,
                "f_AL": "true" if scraper_input.easy_apply else None,
            }

            params = {k: v for k, v in params.items() if v is not None}

            params = {k: v for k, v in params.items() if v is not None}
            retries = 0
            while retries < self.MAX_RETRIES:
                try:
                    response = requests.get(
                        f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
                        params=params,
                        allow_redirects=True,
                        proxies=self.proxy,
                        timeout=10,
                    )
                    response.raise_for_status()

                    break
                except requests.HTTPError as e:
                    if hasattr(e, "response") and e.response is not None:
                        if e.response.status_code == 429:
                            time.sleep(self.DELAY)
                            retries += 1
                            continue
                        else:
                            raise LinkedInException(
                                f"bad response status code: {e.response.status_code}"
                            )
                    else:
                        raise
                except ProxyError as e:
                    raise LinkedInException("bad proxy")
                except Exception as e:
                    raise LinkedInException(str(e))
            else:
                # Raise an exception if the maximum number of retries is reached
                raise LinkedInException(
                    "Max retries reached, failed to get a valid response"
                )

            soup = BeautifulSoup(response.text, "html.parser")

            with ThreadPoolExecutor(max_workers=5) as executor:
                futures = []
                for job_card in soup.find_all("div", class_="base-search-card"):
                    job_url = None
                    href_tag = job_card.find("a", class_="base-card__full-link")
                    if href_tag and "href" in href_tag.attrs:
                        href = href_tag.attrs["href"].split("?")[0]
                        job_id = href.split("-")[-1]
                        job_url = f"{self.url}/jobs/view/{job_id}"

                    with url_lock:
                        if job_url in seen_urls:
                            continue
                        seen_urls.add(job_url)

                    futures.append(executor.submit(self.process_job, job_card, job_url))

                for future in as_completed(futures):
                    try:
                        job_post = future.result()
                        if job_post:
                            job_list.append(job_post)
                    except Exception as e:
                        raise LinkedInException(
                            "Exception occurred while processing jobs"
                        )
            page += 25

        job_list = job_list[: scraper_input.results_wanted]
        return JobResponse(jobs=job_list)

    def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:
        title_tag = job_card.find("span", class_="sr-only")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        company_tag = job_card.find("h4", class_="base-search-card__subtitle")
        company_a_tag = company_tag.find("a") if company_tag else None
        company = company_a_tag.get_text(strip=True) if company_a_tag else "N/A"

        metadata_card = job_card.find("div", class_="base-search-card__metadata")
        location = self.get_location(metadata_card)

        datetime_tag = (
            metadata_card.find("time", class_="job-search-card__listdate")
            if metadata_card
            else None
        )
        date_posted = None
        if datetime_tag and "datetime" in datetime_tag.attrs:
            datetime_str = datetime_tag["datetime"]
            try:
                date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
            except Exception as e:
                date_posted = None
        benefits_tag = job_card.find("span", class_="result-benefits__text")
        benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None

        description, job_type = self.get_job_description(job_url)

        return JobPost(
            title=title,
            description=description,
            company_name=company,
            location=location,
            date_posted=date_posted,
            job_url=job_url,
            # job_type=[JobType.FULL_TIME],
            job_type=job_type,
            benefits=benefits,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
        )

    def get_job_description(
        self, job_page_url: str
    ) -> tuple[None, None] | tuple[str | None, tuple[str | None, JobType | None]]:
        """
        Retrieves job description by going to the job page url
        :param job_page_url:
        :return: description or None
        """
        try:
            response = requests.get(job_page_url, timeout=5, proxies=self.proxy)
            response.raise_for_status()
        except Exception as e:
            return None, None

        soup = BeautifulSoup(response.text, "html.parser")
        div_content = soup.find(
            "div", class_=lambda x: x and "show-more-less-html__markup" in x
        )

        description = None
        if div_content:
            description = " ".join(div_content.get_text().split()).strip()

        def get_job_type(
            soup_job_type: BeautifulSoup,
        ) -> list[JobType] | None:
            """
            Gets the job type from job page
            :param soup_job_type:
            :return: JobType
            """
            h3_tag = soup_job_type.find(
                "h3",
                class_="description__job-criteria-subheader",
                string=lambda text: "Employment type" in text,
            )

            employment_type = None
            if h3_tag:
                employment_type_span = h3_tag.find_next_sibling(
                    "span",
                    class_="description__job-criteria-text description__job-criteria-text--criteria",
                )
                if employment_type_span:
                    employment_type = employment_type_span.get_text(strip=True)
                    employment_type = employment_type.lower()
                    employment_type = employment_type.replace("-", "")

            return LinkedInScraper.get_enum_from_value(employment_type)

        return description, get_job_type(soup)

    @staticmethod
    def get_enum_from_value(value_str):
        for job_type in JobType:
            if value_str in job_type.value:
                return [job_type]
        return None

    def get_location(self, metadata_card: Optional[Tag]) -> Location:
        """
        Extracts the location data from the job metadata card.
        :param metadata_card
        :return: location
        """
        location = Location(country=self.country)
        if metadata_card is not None:
            location_tag = metadata_card.find(
                "span", class_="job-search-card__location"
            )
            location_string = location_tag.text.strip() if location_tag else "N/A"
            parts = location_string.split(", ")
            if len(parts) == 2:
                city, state = parts
                location = Location(
                    city=city,
                    state=state,
                    country=self.country,
                )

        return location
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`"""`
			`jobspy.scrapers.linkedin`
			`~~~~~~~~~~~~~~~~~~~`

			`This module contains routines to scrape LinkedIn.`
			`"""`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`from typing import Optional`
remove duplicates - gsheets (#29) 2023-08-31 08:29:43 -07:00			`from datetime import datetime`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00
			`import requests`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`import time`
[fix] util func 2023-09-28 16:33:14 -07:00			`import re`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`from requests.exceptions import ProxyError`
			`from concurrent.futures import ThreadPoolExecutor, as_completed`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`from bs4 import BeautifulSoup`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00			`from bs4.element import Tag`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`from threading import Lock`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00
Library Migration (#31) 2023-09-03 07:29:25 -07:00			`from .. import Scraper, ScraperInput, Site`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`from ..utils import count_urgent_words, extract_emails_from_text`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`from ..exceptions import LinkedInException`
Validation error (#35) 2023-09-03 18:05:31 -07:00			`from ...jobs import (`
			`JobPost,`
			`Location,`
			`JobResponse,`
			`JobType,`
			`)`
[fix] util func 2023-09-28 16:33:14 -07:00

feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`class LinkedInScraper(Scraper):`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`MAX_RETRIES = 3`
			`DELAY = 10`

Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`def __init__(self, proxy: Optional[str] = None):`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00			`"""`
			`Initializes LinkedInScraper with the LinkedIn job search url`
			`"""`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`site = Site(Site.LINKEDIN)`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`self.country = "worldwide"`
Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`self.url = "https://www.linkedin.com"`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`super().__init__(site, proxy=proxy)`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00
			`def scrape(self, scraper_input: ScraperInput) -> JobResponse:`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00			`"""`
			`Scrapes LinkedIn for jobs with scraper_input criteria`
			`:param scraper_input:`
			`:return: job_response`
			`"""`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`job_list: list[JobPost] = []`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00			`seen_urls = set()`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`url_lock = Lock()`
			`page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`def job_type_code(job_type_enum):`
feat(jobs): add site_type param 2023-07-11 06:24:59 -07:00			`mapping = {`
			`JobType.FULL_TIME: "F",`
			`JobType.PART_TIME: "P",`
			`JobType.INTERNSHIP: "I",`
			`JobType.CONTRACT: "C",`
			`JobType.TEMPORARY: "T",`
			`}`

add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`return mapping.get(job_type_enum, "")`

			`while len(job_list) < scraper_input.results_wanted and page < 1000:`
			`params = {`
			`"keywords": scraper_input.search_term,`
			`"location": scraper_input.location,`
			`"distance": scraper_input.distance,`
			`"f_WT": 2 if scraper_input.is_remote else None,`
			`"f_JT": job_type_code(scraper_input.job_type)`
			`if scraper_input.job_type`
			`else None,`
			`"pageNum": 0,`
			`page: page + scraper_input.offset,`
			`"f_AL": "true" if scraper_input.easy_apply else None,`
			`}`

			`params = {k: v for k, v in params.items() if v is not None}`

			`params = {k: v for k, v in params.items() if v is not None}`
			`retries = 0`
			`while retries < self.MAX_RETRIES:`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`try:`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`response = requests.get(`
			`f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`params=params,`
			`allow_redirects=True,`
			`proxies=self.proxy,`
			`timeout=10,`
			`)`
			`response.raise_for_status()`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00
			`break`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`except requests.HTTPError as e:`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`if hasattr(e, "response") and e.response is not None:`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`if e.response.status_code == 429:`
			`time.sleep(self.DELAY)`
			`retries += 1`
			`continue`
			`else:`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`raise LinkedInException(`
			`f"bad response status code: {e.response.status_code}"`
			`)`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`else:`
			`raise`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`except ProxyError as e:`
			`raise LinkedInException("bad proxy")`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`except Exception as e:`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`raise LinkedInException(str(e))`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`else:`
			`# Raise an exception if the maximum number of retries is reached`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`raise LinkedInException(`
			`"Max retries reached, failed to get a valid response"`
			`)`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`soup = BeautifulSoup(response.text, "html.parser")`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`with ThreadPoolExecutor(max_workers=5) as executor:`
			`futures = []`
			`for job_card in soup.find_all("div", class_="base-search-card"):`
			`job_url = None`
			`href_tag = job_card.find("a", class_="base-card__full-link")`
			`if href_tag and "href" in href_tag.attrs:`
			`href = href_tag.attrs["href"].split("?")[0]`
			`job_id = href.split("-")[-1]`
			`job_url = f"{self.url}/jobs/view/{job_id}"`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`with url_lock:`
			`if job_url in seen_urls:`
			`continue`
			`seen_urls.add(job_url)`

			`futures.append(executor.submit(self.process_job, job_card, job_url))`

			`for future in as_completed(futures):`
			`try:`
			`job_post = future.result()`
			`if job_post:`
			`job_list.append(job_post)`
			`except Exception as e:`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`raise LinkedInException(`
			`"Exception occurred while processing jobs"`
			`)`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`page += 25`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00
			`job_list = job_list[: scraper_input.results_wanted]`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`return JobResponse(jobs=job_list)`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:`
			`title_tag = job_card.find("span", class_="sr-only")`
			`title = title_tag.get_text(strip=True) if title_tag else "N/A"`

			`company_tag = job_card.find("h4", class_="base-search-card__subtitle")`
			`company_a_tag = company_tag.find("a") if company_tag else None`
			`company = company_a_tag.get_text(strip=True) if company_a_tag else "N/A"`

			`metadata_card = job_card.find("div", class_="base-search-card__metadata")`
			`location = self.get_location(metadata_card)`

Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`datetime_tag = (`
			`metadata_card.find("time", class_="job-search-card__listdate")`
			`if metadata_card`
			`else None`
			`)`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`date_posted = None`
			`if datetime_tag and "datetime" in datetime_tag.attrs:`
			`datetime_str = datetime_tag["datetime"]`
			`try:`
			`date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")`
			`except Exception as e:`
			`date_posted = None`
			`benefits_tag = job_card.find("span", class_="result-benefits__text")`
			`benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None`

			`description, job_type = self.get_job_description(job_url)`

			`return JobPost(`
			`title=title,`
			`description=description,`
			`company_name=company,`
			`location=location,`
			`date_posted=date_posted,`
			`job_url=job_url,`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`# job_type=[JobType.FULL_TIME],`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`job_type=job_type,`
			`benefits=benefits,`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`emails=extract_emails_from_text(description) if description else None,`
			`num_urgent_words=count_urgent_words(description) if description else None,`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`)`

Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`def get_job_description(`
			`self, job_page_url: str`
			`) -> tuple[None, None] \| tuple[str \| None, tuple[str \| None, JobType \| None]]:`
fix(linkedin): fetch full description 2023-08-26 05:07:29 -07:00			`"""`
			`Retrieves job description by going to the job page url`
			`:param job_page_url:`
			`:return: description or None`
			`"""`
Thread sites (#40) 2023-09-06 07:47:11 -07:00			`try:`
Proxy support (#44) * add proxy support * return as data frame 2023-09-07 09:28:17 -07:00			`response = requests.get(job_page_url, timeout=5, proxies=self.proxy)`
			`response.raise_for_status()`
			`except Exception as e:`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00			`return None, None`
fix(linkedin): fetch full description 2023-08-26 05:07:29 -07:00
			`soup = BeautifulSoup(response.text, "html.parser")`
			`div_content = soup.find(`
			`"div", class_=lambda x: x and "show-more-less-html__markup" in x`
			`)`

add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`description = None`
fix(linkedin): fetch full description 2023-08-26 05:07:29 -07:00			`if div_content:`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`description = " ".join(div_content.get_text().split()).strip()`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00
			`def get_job_type(`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`soup_job_type: BeautifulSoup,`
			`) -> list[JobType] \| None:`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00			`"""`
			`Gets the job type from job page`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`:param soup_job_type:`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00			`:return: JobType`
			`"""`
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`h3_tag = soup_job_type.find(`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00			`"h3",`
			`class_="description__job-criteria-subheader",`
			`string=lambda text: "Employment type" in text,`
			`)`

			`employment_type = None`
			`if h3_tag:`
			`employment_type_span = h3_tag.find_next_sibling(`
			`"span",`
			`class_="description__job-criteria-text description__job-criteria-text--criteria",`
			`)`
			`if employment_type_span:`
			`employment_type = employment_type_span.get_text(strip=True)`
			`employment_type = employment_type.lower()`
			`employment_type = employment_type.replace("-", "")`

Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`return LinkedInScraper.get_enum_from_value(employment_type)`
Linkedin job type (#30) 2023-08-31 12:01:47 -07:00
add offset param & email extraction (#51) * add offset param * [enh]: extract emails 2023-09-28 16:11:28 -07:00			`return description, get_job_type(soup)`
fix(linkedin): fetch full description 2023-08-26 05:07:29 -07:00
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`@staticmethod`
Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`def get_enum_from_value(value_str):`
			`for job_type in JobType:`
			`if value_str in job_type.value:`
Multiple job types for Indeed, urgent keywords column (#56) * enh(indeed): mult job types * feat(jobs): urgent kws * fix(indeed): use new session obj per request * fix: emails as comma separated in output * fix: put num urgent words in output * chore: readme 2023-10-10 09:23:04 -07:00			`return [job_type]`
Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`return None`

			`def get_location(self, metadata_card: Optional[Tag]) -> Location:`
feat(jobs): remove pages for results_wanted 2023-07-10 20:07:19 -07:00			`"""`
			`Extracts the location data from the job metadata card.`
			`:param metadata_card`
			`:return: location`
			`"""`
Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`location = Location(country=self.country)`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`if metadata_card is not None:`
			`location_tag = metadata_card.find(`
			`"span", class_="job-search-card__location"`
			`)`
			`location_string = location_tag.text.strip() if location_tag else "N/A"`
			`parts = location_string.split(", ")`
			`if len(parts) == 2:`
			`city, state = parts`
			`location = Location(`
			`city=city,`
			`state=state,`
Indeed country support (#38) 2023-09-05 10:17:22 -07:00			`country=self.country,`
feat: add LinkedIn scraper 2023-07-08 07:34:55 -07:00			`)`

			`return location`