chore: remove unused func

pull/106/head
Cullen Watson 2024-02-12 10:58:46 -06:00
parent 05419886de
commit 409fe272f3
1 changed files with 7 additions and 31 deletions

View File

@ -49,7 +49,7 @@ class IndeedScraper(Scraper):
def scrape_page( def scrape_page(
self, scraper_input: ScraperInput, page: int self, scraper_input: ScraperInput, page: int
) -> tuple[list[JobPost], int]: ) -> list[JobPost]:
""" """
Scrapes a page of Indeed for jobs with scraper_input criteria Scrapes a page of Indeed for jobs with scraper_input criteria
:param scraper_input: :param scraper_input:
@ -57,7 +57,6 @@ class IndeedScraper(Scraper):
:return: jobs found on page, total number of jobs found for search :return: jobs found on page, total number of jobs found for search
""" """
job_list = [] job_list = []
total_num_jobs = 0
self.country = scraper_input.country self.country = scraper_input.country
domain = self.country.indeed_domain_value domain = self.country.indeed_domain_value
self.url = f"https://{domain}.indeed.com" self.url = f"https://{domain}.indeed.com"
@ -80,12 +79,11 @@ class IndeedScraper(Scraper):
logger.error(f'Indeed: Bad proxy') logger.error(f'Indeed: Bad proxy')
else: else:
logger.error(f'Indeed: {str(e)}') logger.error(f'Indeed: {str(e)}')
return job_list, total_num_jobs return job_list
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
total_num_jobs = IndeedScraper.total_jobs(soup)
if "did not match any jobs" in response.text: if "did not match any jobs" in response.text:
return job_list, total_num_jobs return job_list
jobs = IndeedScraper.parse_jobs( jobs = IndeedScraper.parse_jobs(
soup soup
@ -147,7 +145,7 @@ class IndeedScraper(Scraper):
job_list = [result.result() for result in job_results if result.result()] job_list = [result.result() for result in job_results if result.result()]
return job_list, total_num_jobs return job_list
def scrape(self, scraper_input: ScraperInput) -> JobResponse: def scrape(self, scraper_input: ScraperInput) -> JobResponse:
""" """
@ -155,7 +153,7 @@ class IndeedScraper(Scraper):
:param scraper_input: :param scraper_input:
:return: job_response :return: job_response
""" """
job_list, total_results = self.scrape_page(scraper_input, 0) job_list = self.scrape_page(scraper_input, 0)
pages_processed = 1 pages_processed = 1
while len(self.seen_urls) < scraper_input.results_wanted: while len(self.seen_urls) < scraper_input.results_wanted:
@ -169,7 +167,7 @@ class IndeedScraper(Scraper):
] ]
for future in futures: for future in futures:
jobs, _ = future.result() jobs = future.result()
if jobs: if jobs:
job_list += jobs job_list += jobs
new_jobs = True new_jobs = True
@ -184,11 +182,7 @@ class IndeedScraper(Scraper):
if len(self.seen_urls) > scraper_input.results_wanted: if len(self.seen_urls) > scraper_input.results_wanted:
job_list = job_list[:scraper_input.results_wanted] job_list = job_list[:scraper_input.results_wanted]
job_response = JobResponse( return JobResponse(jobs=job_list)
jobs=job_list,
total_results=total_results,
)
return job_response
@staticmethod @staticmethod
def get_job_type(job: dict) -> list[JobType] | None: def get_job_type(job: dict) -> list[JobType] | None:
@ -288,24 +282,6 @@ class IndeedScraper(Scraper):
"Could not find any results for the search" "Could not find any results for the search"
) )
@staticmethod
def total_jobs(soup: BeautifulSoup) -> int:
"""
Parses the total jobs for that search from soup object
:param soup:
:return: total_num_jobs
"""
script = soup.find("script", string=lambda t: t and "window._initialData" in t)
pattern = re.compile(r"window._initialData\s*=\s*({.*})\s*;", re.DOTALL)
match = pattern.search(script.string)
total_num_jobs = 0
if match:
json_str = match.group(1)
data = json.loads(json_str)
total_num_jobs = int(data["searchTitleBarModel"]["totalNumResults"])
return total_num_jobs
@staticmethod @staticmethod
def get_headers(): def get_headers():
return { return {