"""
jobspy.scrapers.linkedin
~~~~~~~~~~~~~~~~~~~

This module contains routines to scrape LinkedIn.
"""
from typing import Optional
from datetime import datetime

import requests
import time
from requests.exceptions import ProxyError
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
from bs4.element import Tag
from threading import Lock

from .. import Scraper, ScraperInput, Site
from ..utils import count_urgent_words, extract_emails_from_text, get_enum_from_job_type
from ..exceptions import LinkedInException
from ...jobs import JobPost, Location, JobResponse, JobType, Country


class LinkedInScraper(Scraper):
    MAX_RETRIES = 3
    DELAY = 10

    def __init__(self, proxy: Optional[str] = None):
        """
        Initializes LinkedInScraper with the LinkedIn job search url
        """
        site = Site(Site.LINKEDIN)
        self.country = "worldwide"
        self.url = "https://www.linkedin.com"
        super().__init__(site, proxy=proxy)

    def scrape(self, scraper_input: ScraperInput) -> JobResponse:
        """
        Scrapes LinkedIn for jobs with scraper_input criteria
        :param scraper_input:
        :return: job_response
        """
        job_list: list[JobPost] = []
        seen_urls = set()
        url_lock = Lock()
        page = scraper_input.offset // 25 + 25 if scraper_input.offset else 0

        def job_type_code(job_type_enum):
            mapping = {
                JobType.FULL_TIME: "F",
                JobType.PART_TIME: "P",
                JobType.INTERNSHIP: "I",
                JobType.CONTRACT: "C",
                JobType.TEMPORARY: "T",
            }

            return mapping.get(job_type_enum, "")

        while len(job_list) < scraper_input.results_wanted and page < 1000:
            params = {
                "keywords": scraper_input.search_term,
                "location": scraper_input.location,
                "distance": scraper_input.distance,
                "f_WT": 2 if scraper_input.is_remote else None,
                "f_JT": job_type_code(scraper_input.job_type)
                if scraper_input.job_type
                else None,
                "pageNum": 0,
                page: page + scraper_input.offset,
                "f_AL": "true" if scraper_input.easy_apply else None,
            }

            params = {k: v for k, v in params.items() if v is not None}

            params = {k: v for k, v in params.items() if v is not None}
            retries = 0
            while retries < self.MAX_RETRIES:
                try:
                    response = requests.get(
                        f"{self.url}/jobs-guest/jobs/api/seeMoreJobPostings/search?",
                        params=params,
                        allow_redirects=True,
                        proxies=self.proxy,
                        timeout=10,
                    )
                    response.raise_for_status()

                    break
                except requests.HTTPError as e:
                    if hasattr(e, "response") and e.response is not None:
                        if e.response.status_code == 429:
                            time.sleep(self.DELAY)
                            retries += 1
                            continue
                        else:
                            raise LinkedInException(
                                f"bad response status code: {e.response.status_code}"
                            )
                    else:
                        raise
                except ProxyError as e:
                    raise LinkedInException("bad proxy")
                except Exception as e:
                    raise LinkedInException(str(e))
            else:
                # Raise an exception if the maximum number of retries is reached
                raise LinkedInException(
                    "Max retries reached, failed to get a valid response"
                )

            soup = BeautifulSoup(response.text, "html.parser")

            with ThreadPoolExecutor(max_workers=5) as executor:
                futures = []
                for job_card in soup.find_all("div", class_="base-search-card"):
                    job_url = None
                    href_tag = job_card.find("a", class_="base-card__full-link")
                    if href_tag and "href" in href_tag.attrs:
                        href = href_tag.attrs["href"].split("?")[0]
                        job_id = href.split("-")[-1]
                        job_url = f"{self.url}/jobs/view/{job_id}"

                    with url_lock:
                        if job_url in seen_urls:
                            continue
                        seen_urls.add(job_url)

                    futures.append(executor.submit(self.process_job, job_card, job_url))

                for future in as_completed(futures):
                    try:
                        job_post = future.result()
                        if job_post:
                            job_list.append(job_post)
                    except Exception as e:
                        raise LinkedInException(
                            "Exception occurred while processing jobs"
                        )
            page += 25

        job_list = job_list[: scraper_input.results_wanted]
        return JobResponse(jobs=job_list)

    def process_job(self, job_card: Tag, job_url: str) -> Optional[JobPost]:
        title_tag = job_card.find("span", class_="sr-only")
        title = title_tag.get_text(strip=True) if title_tag else "N/A"

        company_tag = job_card.find("h4", class_="base-search-card__subtitle")
        company_a_tag = company_tag.find("a") if company_tag else None
        company = company_a_tag.get_text(strip=True) if company_a_tag else "N/A"

        metadata_card = job_card.find("div", class_="base-search-card__metadata")
        location = self.get_location(metadata_card)

        datetime_tag = (
            metadata_card.find("time", class_="job-search-card__listdate")
            if metadata_card
            else None
        )
        date_posted = None
        if datetime_tag and "datetime" in datetime_tag.attrs:
            datetime_str = datetime_tag["datetime"]
            try:
                date_posted = datetime.strptime(datetime_str, "%Y-%m-%d")
            except Exception as e:
                date_posted = None
        benefits_tag = job_card.find("span", class_="result-benefits__text")
        benefits = " ".join(benefits_tag.get_text().split()) if benefits_tag else None

        description, job_type = self.get_job_description(job_url)

        return JobPost(
            title=title,
            description=description,
            company_name=company,
            location=location,
            date_posted=date_posted,
            job_url=job_url,
            job_type=job_type,
            benefits=benefits,
            emails=extract_emails_from_text(description) if description else None,
            num_urgent_words=count_urgent_words(description) if description else None,
        )

    def get_job_description(
        self, job_page_url: str
    ) -> tuple[None, None] | tuple[str | None, tuple[str | None, JobType | None]]:
        """
        Retrieves job description by going to the job page url
        :param job_page_url:
        :return: description or None
        """
        try:
            response = requests.get(job_page_url, timeout=5, proxies=self.proxy)
            response.raise_for_status()
        except Exception as e:
            return None, None

        soup = BeautifulSoup(response.text, "html.parser")
        div_content = soup.find(
            "div", class_=lambda x: x and "show-more-less-html__markup" in x
        )

        description = None
        if div_content:
            description = " ".join(div_content.get_text().split()).strip()

        def get_job_type(
            soup_job_type: BeautifulSoup,
        ) -> list[JobType] | None:
            """
            Gets the job type from job page
            :param soup_job_type:
            :return: JobType
            """
            h3_tag = soup_job_type.find(
                "h3",
                class_="description__job-criteria-subheader",
                string=lambda text: "Employment type" in text,
            )

            employment_type = None
            if h3_tag:
                employment_type_span = h3_tag.find_next_sibling(
                    "span",
                    class_="description__job-criteria-text description__job-criteria-text--criteria",
                )
                if employment_type_span:
                    employment_type = employment_type_span.get_text(strip=True)
                    employment_type = employment_type.lower()
                    employment_type = employment_type.replace("-", "")

            return [get_enum_from_job_type(employment_type)]

        return description, get_job_type(soup)

    def get_location(self, metadata_card: Optional[Tag]) -> Location:
        """
        Extracts the location data from the job metadata card.
        :param metadata_card
        :return: location
        """
        location = Location(country=Country.from_string(self.country))
        if metadata_card is not None:
            location_tag = metadata_card.find(
                "span", class_="job-search-card__location"
            )
            location_string = location_tag.text.strip() if location_tag else "N/A"
            parts = location_string.split(", ")
            if len(parts) == 2:
                city, state = parts
                location = Location(
                    city=city,
                    state=state,
                    country=Country.from_string(self.country),
                )

        return location