[chore] format

2023-10-18 14:22:59 -05:00 · 2023-10-18 14:22:59 -05:00 · 28dba8b16e
parent bb21f47537
commit 28dba8b16e
3 changed files with 31 additions and 16 deletions
--- a/src/jobspy/jobs/init.py
+++ b/src/jobspy/jobs/init.py
@ -37,15 +37,16 @@ class JobType(Enum):
        "повназайнятість",
        "toànthờigian",
    )
-    PART_TIME = (
-        "parttime",
-        "teilzeit",
-        "částečnýúvazek",
-        "deltid"
-    )
+    PART_TIME = ("parttime", "teilzeit", "částečnýúvazek", "deltid")
    CONTRACT = ("contract", "contractor")
    TEMPORARY = ("temporary",)
-    INTERNSHIP = ("internship", "prácticas", "ojt(onthejobtraining)", "praktikum", "praktik")
+    INTERNSHIP = (
+        "internship",
+        "prácticas",
+        "ojt(onthejobtraining)",
+        "praktikum",
+        "praktik",
+    )

    PER_DIEM = ("perdiem",)
    NIGHTS = ("nights",)
--- a/src/jobspy/scrapers/indeed/init.py
+++ b/src/jobspy/scrapers/indeed/init.py
@ -16,7 +16,12 @@ from bs4.element import Tag
 from concurrent.futures import ThreadPoolExecutor, Future

 from ..exceptions import IndeedException
-from ..utils import count_urgent_words, extract_emails_from_text, create_session, get_enum_from_job_type
+from ..utils import (
+    count_urgent_words,
+    extract_emails_from_text,
+    create_session,
+    get_enum_from_job_type,
+)
 from ...jobs import (
    JobPost,
    Compensation,
@ -165,8 +170,7 @@ class IndeedScraper(Scraper):
        jobs = jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
        with ThreadPoolExecutor(max_workers=1) as executor:
            job_results: list[Future] = [
-                executor.submit(process_job, job)
-                for job in jobs
+                executor.submit(process_job, job) for job in jobs
            ]

        job_list = [result.result() for result in job_results if result.result()]
@ -231,14 +235,16 @@ class IndeedScraper(Scraper):
        if response.status_code not in range(200, 400):
            return None

-        soup = BeautifulSoup(response.text, 'html.parser')
-        script_tag = soup.find('script', text=lambda x: x and 'window._initialData' in x)
+        soup = BeautifulSoup(response.text, "html.parser")
+        script_tag = soup.find(
+            "script", text=lambda x: x and "window._initialData" in x
+        )

        if not script_tag:
            return None

        script_code = script_tag.string
-        match = re.search(r'window\._initialData\s*=\s*({.*?})\s*;', script_code, re.S)
+        match = re.search(r"window\._initialData\s*=\s*({.*?})\s*;", script_code, re.S)

        if not match:
            return None
@ -246,12 +252,18 @@ class IndeedScraper(Scraper):
        json_string = match.group(1)
        data = json.loads(json_string)
        try:
-            job_description = data["jobInfoWrapperModel"]["jobInfoModel"]["sanitizedJobDescription"]
+            job_description = data["jobInfoWrapperModel"]["jobInfoModel"][
+                "sanitizedJobDescription"
+            ]
        except (KeyError, TypeError, IndexError):
            return None

-        soup = BeautifulSoup(job_description, "html.parser")  # No need for StringIO, pass the string directly
-        text_content = " ".join(soup.get_text(separator=" ").split()).strip()  # Clean and normalize whitespaces
+        soup = BeautifulSoup(
+            job_description, "html.parser"
+        )
+        text_content = " ".join(
+            soup.get_text(separator=" ").split()
+        ).strip()

        return text_content

@ -297,6 +309,7 @@ class IndeedScraper(Scraper):
                ):
                    return tag
            return None
+
        script_tag = find_mosaic_script()

        if script_tag:
--- a/src/jobspy/scrapers/utils.py
+++ b/src/jobspy/scrapers/utils.py
@ -44,6 +44,7 @@ def create_session(proxy: str | None = None):

    return session

+
 def get_enum_from_job_type(job_type_str: str) -> JobType | None:
    """
    Given a string, returns the corresponding JobType enum member if a match is found.