mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-07 04:54:32 -08:00
enh(linkedin): search by company ids (#99)
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
from ..jobs import Enum, BaseModel, JobType, JobResponse, Country
|
||||
from typing import List, Optional, Any
|
||||
|
||||
|
||||
class Site(Enum):
|
||||
@@ -10,23 +9,24 @@ class Site(Enum):
|
||||
|
||||
|
||||
class ScraperInput(BaseModel):
|
||||
site_type: List[Site]
|
||||
search_term: str
|
||||
site_type: list[Site]
|
||||
search_term: str | None = None
|
||||
|
||||
location: str = None
|
||||
country: Optional[Country] = Country.USA
|
||||
distance: Optional[int] = None
|
||||
location: str | None = None
|
||||
country: Country | None = Country.USA
|
||||
distance: int | None = None
|
||||
is_remote: bool = False
|
||||
job_type: Optional[JobType] = None
|
||||
easy_apply: bool = None # linkedin
|
||||
job_type: JobType | None = None
|
||||
easy_apply: bool | None = None
|
||||
full_description: bool = False
|
||||
offset: int = 0
|
||||
linkedin_company_ids: list[int] | None = None
|
||||
|
||||
results_wanted: int = 15
|
||||
|
||||
|
||||
class Scraper:
|
||||
def __init__(self, site: Site, proxy: Optional[List[str]] = None):
|
||||
def __init__(self, site: Site, proxy: list[str] | None = None):
|
||||
self.site = site
|
||||
self.proxy = (lambda p: {"http": p, "https": p} if p else None)(proxy)
|
||||
|
||||
|
||||
@@ -348,7 +348,7 @@ class IndeedScraper(Scraper):
|
||||
def add_params(scraper_input: ScraperInput, page: int) -> dict[str, str | Any]:
|
||||
params = {
|
||||
"q": scraper_input.search_term,
|
||||
"l": scraper_input.location,
|
||||
"l": scraper_input.location if scraper_input.location else scraper_input.country.value[0].split(',')[-1],
|
||||
"filter": 0,
|
||||
"start": scraper_input.offset + page * 10,
|
||||
"sort": "date"
|
||||
|
||||
@@ -70,7 +70,9 @@ class LinkedInScraper(Scraper):
|
||||
|
||||
return mapping.get(job_type_enum, "")
|
||||
|
||||
while len(job_list) < scraper_input.results_wanted and page < 1000:
|
||||
continue_search = lambda: len(job_list) < scraper_input.results_wanted and page < 1000
|
||||
|
||||
while continue_search():
|
||||
session = create_session(is_tls=False, has_retry=True, delay=5)
|
||||
params = {
|
||||
"keywords": scraper_input.search_term,
|
||||
@@ -83,6 +85,7 @@ class LinkedInScraper(Scraper):
|
||||
"pageNum": 0,
|
||||
"start": page + scraper_input.offset,
|
||||
"f_AL": "true" if scraper_input.easy_apply else None,
|
||||
"f_C": ','.join(map(str, scraper_input.linkedin_company_ids)) if scraper_input.linkedin_company_ids else None
|
||||
}
|
||||
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
@@ -130,8 +133,9 @@ class LinkedInScraper(Scraper):
|
||||
except Exception as e:
|
||||
raise LinkedInException("Exception occurred while processing jobs")
|
||||
|
||||
page += 25
|
||||
time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2))
|
||||
if continue_search():
|
||||
time.sleep(random.uniform(LinkedInScraper.DELAY, LinkedInScraper.DELAY + 2))
|
||||
page += 25
|
||||
|
||||
job_list = job_list[: scraper_input.results_wanted]
|
||||
return JobResponse(jobs=job_list)
|
||||
|
||||
@@ -31,6 +31,7 @@ class ZipRecruiterScraper(Scraper):
|
||||
|
||||
self.jobs_per_page = 20
|
||||
self.seen_urls = set()
|
||||
self.delay = 5
|
||||
|
||||
def find_jobs_in_page(
|
||||
self, scraper_input: ScraperInput, continue_token: str | None = None
|
||||
@@ -59,7 +60,6 @@ class ZipRecruiterScraper(Scraper):
|
||||
raise ZipRecruiterException("bad proxy")
|
||||
raise ZipRecruiterException(str(e))
|
||||
|
||||
time.sleep(5)
|
||||
response_data = response.json()
|
||||
jobs_list = response_data.get("jobs", [])
|
||||
next_continue_token = response_data.get("continue", None)
|
||||
@@ -85,6 +85,9 @@ class ZipRecruiterScraper(Scraper):
|
||||
if len(job_list) >= scraper_input.results_wanted:
|
||||
break
|
||||
|
||||
if page > 1:
|
||||
time.sleep(self.delay)
|
||||
|
||||
jobs_on_page, continue_token = self.find_jobs_in_page(
|
||||
scraper_input, continue_token
|
||||
)
|
||||
@@ -108,7 +111,7 @@ class ZipRecruiterScraper(Scraper):
|
||||
description_soup = BeautifulSoup(job_description_html, "html.parser")
|
||||
description = modify_and_get_description(description_soup)
|
||||
|
||||
company = job["hiring_company"].get("name") if "hiring_company" in job else None
|
||||
company = job.get("hiring_company", {}).get("name")
|
||||
country_value = "usa" if job.get("job_country") == "US" else "canada"
|
||||
country_enum = Country.from_string(country_value)
|
||||
|
||||
@@ -184,6 +187,8 @@ class ZipRecruiterScraper(Scraper):
|
||||
if scraper_input.distance:
|
||||
params["radius"] = scraper_input.distance
|
||||
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
|
||||
return params
|
||||
|
||||
@staticmethod
|
||||
|
||||
Reference in New Issue
Block a user