mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-04 11:34:47 -08:00
Compare commits
9 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fda080a373 | ||
|
|
6e7ab6ff74 | ||
|
|
7160d0faed | ||
|
|
6e014cf732 | ||
|
|
6e8576f8a8 | ||
|
|
51888004b7 | ||
|
|
b6d5cd8d79 | ||
|
|
84ed670df3 | ||
|
|
4b16ac7967 |
1
.github/CODEOWNERS
vendored
Normal file
1
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1 @@
|
||||
* @cullenwatson
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
## Features
|
||||
|
||||
- Scrapes job postings from **LinkedIn**, **Indeed**, **Glassdoor**, **Google**, **ZipRecruiter**, **Bayt** & **Naukri** concurrently
|
||||
- Scrapes job postings from **LinkedIn**, **Indeed**, **Glassdoor**, **Google**, **ZipRecruiter**, & other job boards concurrently
|
||||
- Aggregates the job postings in a dataframe
|
||||
- Proxies support to bypass blocking
|
||||
|
||||
@@ -25,7 +25,7 @@ import csv
|
||||
from jobspy import scrape_jobs
|
||||
|
||||
jobs = scrape_jobs(
|
||||
site_name=["indeed", "linkedin", "zip_recruiter", "glassdoor", "google", "bayt", "naukri"],
|
||||
site_name=["indeed", "linkedin", "zip_recruiter", "google"], # "glassdoor", "bayt", "naukri", "bdjobs"
|
||||
search_term="software engineer",
|
||||
google_search_term="software engineer jobs near San Francisco, CA since yesterday",
|
||||
location="San Francisco, CA",
|
||||
@@ -59,7 +59,7 @@ zip_recruiter Software Developer TEKsystems Phoenix
|
||||
```plaintext
|
||||
Optional
|
||||
├── site_name (list|str):
|
||||
| linkedin, zip_recruiter, indeed, glassdoor, google, bayt
|
||||
| linkedin, zip_recruiter, indeed, glassdoor, google, bayt, bdjobs
|
||||
| (default is all)
|
||||
│
|
||||
├── search_term (str)
|
||||
@@ -86,6 +86,9 @@ Optional
|
||||
│
|
||||
├── easy_apply (bool):
|
||||
| filters for jobs that are hosted on the job board site (LinkedIn easy apply filter no longer works)
|
||||
|
|
||||
├── user_agent (str):
|
||||
| override the default user agent which may be outdated
|
||||
│
|
||||
├── description_format (str):
|
||||
| markdown, html (Format type of the job descriptions. Default is markdown.)
|
||||
|
||||
@@ -107,6 +107,7 @@ def scrape_jobs(
|
||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||
cap_name = site.value.capitalize()
|
||||
site_name = "ZipRecruiter" if cap_name == "Zip_recruiter" else cap_name
|
||||
site_name = "LinkedIn" if cap_name == "Linkedin" else cap_name
|
||||
create_logger(site_name).info(f"finished scraping")
|
||||
return site.value, scraped_data
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#__init__.py
|
||||
# __init__.py
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
@@ -12,7 +12,12 @@ from bs4.element import Tag
|
||||
|
||||
from jobspy.exception import BDJobsException
|
||||
from jobspy.bdjobs.constant import headers, search_params
|
||||
from jobspy.bdjobs.util import parse_location, parse_date, find_job_listings, is_job_remote
|
||||
from jobspy.bdjobs.util import (
|
||||
parse_location,
|
||||
parse_date,
|
||||
find_job_listings,
|
||||
is_job_remote,
|
||||
)
|
||||
from jobspy.model import (
|
||||
JobPost,
|
||||
Location,
|
||||
@@ -39,9 +44,9 @@ class BDJobs(Scraper):
|
||||
search_url = "https://jobs.bdjobs.com/jobsearch.asp"
|
||||
delay = 2
|
||||
band_delay = 3
|
||||
|
||||
|
||||
def __init__(
|
||||
self, proxies: list[str] | str | None = None, ca_cert: str | None = None
|
||||
self, proxies: list[str] | str | None = None, ca_cert: str | None = None, user_agent: str | None = None
|
||||
):
|
||||
"""
|
||||
Initializes BDJobsScraper with the BDJobs job search url
|
||||
@@ -58,7 +63,7 @@ class BDJobs(Scraper):
|
||||
self.session.headers.update(headers)
|
||||
self.scraper_input = None
|
||||
self.country = "bangladesh"
|
||||
|
||||
|
||||
def scrape(self, scraper_input: ScraperInput) -> JobResponse:
|
||||
"""
|
||||
Scrapes BDJobs for jobs with scraper_input criteria
|
||||
@@ -70,72 +75,64 @@ class BDJobs(Scraper):
|
||||
seen_ids = set()
|
||||
page = 1
|
||||
request_count = 0
|
||||
|
||||
|
||||
# Set up search parameters
|
||||
params = search_params.copy()
|
||||
params["txtsearch"] = scraper_input.search_term
|
||||
|
||||
|
||||
continue_search = lambda: len(job_list) < scraper_input.results_wanted
|
||||
|
||||
|
||||
while continue_search():
|
||||
request_count += 1
|
||||
log.info(f"search page: {request_count}")
|
||||
|
||||
|
||||
try:
|
||||
# Add page parameter if needed
|
||||
if page > 1:
|
||||
params["pg"] = page
|
||||
|
||||
|
||||
response = self.session.get(
|
||||
self.search_url,
|
||||
params=params,
|
||||
timeout=getattr(scraper_input, 'request_timeout', 60)
|
||||
timeout=getattr(scraper_input, "request_timeout", 60),
|
||||
)
|
||||
|
||||
# DEBUG: Save the received HTML content
|
||||
try:
|
||||
with open("scraper_received_bdjobs.html", "w", encoding="utf-8") as f:
|
||||
f.write(response.text)
|
||||
log.info(f"Saved scraper response to scraper_received_bdjobs.html")
|
||||
except Exception as e_write:
|
||||
log.error(f"Error writing debug HTML file: {e_write}")
|
||||
|
||||
if response.status_code != 200:
|
||||
log.error(f"BDJobs response status code {response.status_code}")
|
||||
break
|
||||
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
job_cards = find_job_listings(soup)
|
||||
|
||||
|
||||
if not job_cards or len(job_cards) == 0:
|
||||
log.info("No more job listings found")
|
||||
break
|
||||
|
||||
|
||||
log.info(f"Found {len(job_cards)} job cards on page {page}")
|
||||
|
||||
|
||||
for job_card in job_cards:
|
||||
try:
|
||||
job_post = self._process_job(job_card)
|
||||
if job_post and job_post.id not in seen_ids:
|
||||
seen_ids.add(job_post.id)
|
||||
job_list.append(job_post)
|
||||
|
||||
|
||||
if not continue_search():
|
||||
break
|
||||
except Exception as e:
|
||||
log.error(f"Error processing job card: {str(e)}")
|
||||
|
||||
|
||||
page += 1
|
||||
# Add delay between requests
|
||||
time.sleep(random.uniform(self.delay, self.delay + self.band_delay))
|
||||
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error during scraping: {str(e)}")
|
||||
break
|
||||
|
||||
job_list = job_list[:scraper_input.results_wanted]
|
||||
|
||||
job_list = job_list[: scraper_input.results_wanted]
|
||||
return JobResponse(jobs=job_list)
|
||||
|
||||
|
||||
def _process_job(self, job_card: Tag) -> Optional[JobPost]:
|
||||
"""
|
||||
Processes a job card element into a JobPost object
|
||||
@@ -147,48 +144,88 @@ class BDJobs(Scraper):
|
||||
job_link = job_card.find("a", href=lambda h: h and "jobdetail" in h.lower())
|
||||
if not job_link:
|
||||
return None
|
||||
|
||||
|
||||
job_url = job_link.get("href")
|
||||
if not job_url.startswith("http"):
|
||||
job_url = urljoin(self.base_url, job_url)
|
||||
|
||||
|
||||
# Extract job ID from URL
|
||||
job_id = job_url.split("jobid=")[-1].split("&")[0] if "jobid=" in job_url else f"bdjobs-{hash(job_url)}"
|
||||
|
||||
job_id = (
|
||||
job_url.split("jobid=")[-1].split("&")[0]
|
||||
if "jobid=" in job_url
|
||||
else f"bdjobs-{hash(job_url)}"
|
||||
)
|
||||
|
||||
# Extract title
|
||||
title = job_link.get_text(strip=True)
|
||||
if not title:
|
||||
title_elem = job_card.find(["h2", "h3", "h4", "strong", "div"], class_=lambda c: c and "job-title-text" in c)
|
||||
title_elem = job_card.find(
|
||||
["h2", "h3", "h4", "strong", "div"],
|
||||
class_=lambda c: c and "job-title-text" in c,
|
||||
)
|
||||
title = title_elem.get_text(strip=True) if title_elem else "N/A"
|
||||
|
||||
|
||||
# Extract company name - IMPROVED
|
||||
company_elem = job_card.find(["span", "div"], class_=lambda c: c and "comp-name-text" in (c or "").lower())
|
||||
company_elem = job_card.find(
|
||||
["span", "div"],
|
||||
class_=lambda c: c and "comp-name-text" in (c or "").lower(),
|
||||
)
|
||||
if company_elem:
|
||||
company_name = company_elem.get_text(strip=True)
|
||||
else:
|
||||
# Try alternative selectors
|
||||
company_elem = job_card.find(["span", "div"], class_=lambda c: c and any(term in (c or "").lower() for term in ["company", "org", "comp-name"]))
|
||||
company_name = company_elem.get_text(strip=True) if company_elem else "N/A"
|
||||
|
||||
company_elem = job_card.find(
|
||||
["span", "div"],
|
||||
class_=lambda c: c
|
||||
and any(
|
||||
term in (c or "").lower()
|
||||
for term in ["company", "org", "comp-name"]
|
||||
),
|
||||
)
|
||||
company_name = (
|
||||
company_elem.get_text(strip=True) if company_elem else "N/A"
|
||||
)
|
||||
|
||||
# Extract location
|
||||
location_elem = job_card.find(["span", "div"], class_=lambda c: c and "locon-text-d" in (c or "").lower())
|
||||
location_elem = job_card.find(
|
||||
["span", "div"],
|
||||
class_=lambda c: c and "locon-text-d" in (c or "").lower(),
|
||||
)
|
||||
if not location_elem:
|
||||
location_elem = job_card.find(["span", "div"], class_=lambda c: c and any(term in (c or "").lower() for term in ["location", "area", "locon"]))
|
||||
location_text = location_elem.get_text(strip=True) if location_elem else "Dhaka, Bangladesh"
|
||||
|
||||
location_elem = job_card.find(
|
||||
["span", "div"],
|
||||
class_=lambda c: c
|
||||
and any(
|
||||
term in (c or "").lower()
|
||||
for term in ["location", "area", "locon"]
|
||||
),
|
||||
)
|
||||
location_text = (
|
||||
location_elem.get_text(strip=True)
|
||||
if location_elem
|
||||
else "Dhaka, Bangladesh"
|
||||
)
|
||||
|
||||
# Create Location object
|
||||
location = parse_location(location_text, self.country)
|
||||
|
||||
|
||||
# Extract date posted
|
||||
date_elem = job_card.find(["span", "div"], class_=lambda c: c and any(term in (c or "").lower() for term in ["date", "deadline", "published"]))
|
||||
date_elem = job_card.find(
|
||||
["span", "div"],
|
||||
class_=lambda c: c
|
||||
and any(
|
||||
term in (c or "").lower()
|
||||
for term in ["date", "deadline", "published"]
|
||||
),
|
||||
)
|
||||
date_posted = None
|
||||
if date_elem:
|
||||
date_text = date_elem.get_text(strip=True)
|
||||
date_posted = parse_date(date_text)
|
||||
|
||||
|
||||
# Check if job is remote
|
||||
is_remote = is_job_remote(title, location=location)
|
||||
|
||||
|
||||
# Create job post object
|
||||
job_post = JobPost(
|
||||
id=job_id,
|
||||
@@ -200,17 +237,17 @@ class BDJobs(Scraper):
|
||||
is_remote=is_remote,
|
||||
site=self.site,
|
||||
)
|
||||
|
||||
|
||||
# Always fetch description for BDJobs
|
||||
job_details = self._get_job_details(job_url)
|
||||
job_post.description = job_details.get("description", "")
|
||||
job_post.job_type = job_details.get("job_type", "")
|
||||
|
||||
|
||||
return job_post
|
||||
except Exception as e:
|
||||
log.error(f"Error in _process_job: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def _get_job_details(self, job_url: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Gets detailed job information from the job page
|
||||
@@ -221,59 +258,96 @@ class BDJobs(Scraper):
|
||||
response = self.session.get(job_url, timeout=60)
|
||||
if response.status_code != 200:
|
||||
return {}
|
||||
|
||||
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
|
||||
# Find job description - IMPROVED based on correct.py
|
||||
description = ""
|
||||
|
||||
|
||||
# Try to find the job content div first (as in correct.py)
|
||||
job_content_div = soup.find('div', class_='jobcontent')
|
||||
job_content_div = soup.find("div", class_="jobcontent")
|
||||
if job_content_div:
|
||||
# Look for responsibilities section
|
||||
responsibilities_heading = job_content_div.find('h4', id='job_resp') or job_content_div.find(['h4', 'h5'], string=lambda s: s and 'responsibilities' in s.lower())
|
||||
responsibilities_heading = job_content_div.find(
|
||||
"h4", id="job_resp"
|
||||
) or job_content_div.find(
|
||||
["h4", "h5"], string=lambda s: s and "responsibilities" in s.lower()
|
||||
)
|
||||
if responsibilities_heading:
|
||||
responsibilities_elements = []
|
||||
# Find all following elements until the next heading or hr
|
||||
for sibling in responsibilities_heading.find_next_siblings():
|
||||
if sibling.name in ['hr', 'h4', 'h5']:
|
||||
if sibling.name in ["hr", "h4", "h5"]:
|
||||
break
|
||||
if sibling.name == 'ul':
|
||||
responsibilities_elements.extend(li.get_text(separator=' ', strip=True) for li in sibling.find_all('li'))
|
||||
elif sibling.name == 'p':
|
||||
responsibilities_elements.append(sibling.get_text(separator=' ', strip=True))
|
||||
|
||||
description = "\n".join(responsibilities_elements) if responsibilities_elements else ""
|
||||
|
||||
if sibling.name == "ul":
|
||||
responsibilities_elements.extend(
|
||||
li.get_text(separator=" ", strip=True)
|
||||
for li in sibling.find_all("li")
|
||||
)
|
||||
elif sibling.name == "p":
|
||||
responsibilities_elements.append(
|
||||
sibling.get_text(separator=" ", strip=True)
|
||||
)
|
||||
|
||||
description = (
|
||||
"\n".join(responsibilities_elements)
|
||||
if responsibilities_elements
|
||||
else ""
|
||||
)
|
||||
|
||||
# If no description found yet, try the original approach
|
||||
if not description:
|
||||
description_elem = soup.find(["div", "section"], class_=lambda c: c and any(term in (c or "").lower() for term in ["job-description", "details", "requirements"]))
|
||||
description_elem = soup.find(
|
||||
["div", "section"],
|
||||
class_=lambda c: c
|
||||
and any(
|
||||
term in (c or "").lower()
|
||||
for term in ["job-description", "details", "requirements"]
|
||||
),
|
||||
)
|
||||
if description_elem:
|
||||
description_elem = remove_attributes(description_elem)
|
||||
description = description_elem.prettify(formatter="html")
|
||||
if hasattr(self.scraper_input, 'description_format') and self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
|
||||
if (
|
||||
hasattr(self.scraper_input, "description_format")
|
||||
and self.scraper_input.description_format
|
||||
== DescriptionFormat.MARKDOWN
|
||||
):
|
||||
description = markdown_converter(description)
|
||||
|
||||
|
||||
# Extract job type
|
||||
job_type_elem = soup.find(["span", "div"], string=lambda s: s and any(term in (s or "").lower() for term in ["job type", "employment type"]))
|
||||
job_type_elem = soup.find(
|
||||
["span", "div"],
|
||||
string=lambda s: s
|
||||
and any(
|
||||
term in (s or "").lower()
|
||||
for term in ["job type", "employment type"]
|
||||
),
|
||||
)
|
||||
job_type = None
|
||||
if job_type_elem:
|
||||
job_type_text = job_type_elem.find_next(["span", "div"]).get_text(strip=True)
|
||||
job_type_text = job_type_elem.find_next(["span", "div"]).get_text(
|
||||
strip=True
|
||||
)
|
||||
job_type = job_type_text if job_type_text else None
|
||||
|
||||
|
||||
# Extract company industry
|
||||
industry_elem = soup.find(["span", "div"], string=lambda s: s and "industry" in (s or "").lower())
|
||||
industry_elem = soup.find(
|
||||
["span", "div"], string=lambda s: s and "industry" in (s or "").lower()
|
||||
)
|
||||
company_industry = None
|
||||
if industry_elem:
|
||||
industry_text = industry_elem.find_next(["span", "div"]).get_text(strip=True)
|
||||
industry_text = industry_elem.find_next(["span", "div"]).get_text(
|
||||
strip=True
|
||||
)
|
||||
company_industry = industry_text if industry_text else None
|
||||
|
||||
|
||||
return {
|
||||
"description": description,
|
||||
"job_type": job_type,
|
||||
"company_industry": company_industry
|
||||
"company_industry": company_industry,
|
||||
}
|
||||
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Error getting job details: {str(e)}")
|
||||
return {}
|
||||
return {}
|
||||
|
||||
@@ -35,6 +35,7 @@ from jobspy.util import (
|
||||
extract_emails_from_text,
|
||||
currency_parser,
|
||||
markdown_converter,
|
||||
plain_converter,
|
||||
create_session,
|
||||
remove_attributes,
|
||||
create_logger,
|
||||
@@ -164,7 +165,7 @@ class LinkedIn(Scraper):
|
||||
|
||||
if continue_search():
|
||||
time.sleep(random.uniform(self.delay, self.delay + self.band_delay))
|
||||
start += len(job_list)
|
||||
start += len(job_cards)
|
||||
|
||||
job_list = job_list[: scraper_input.results_wanted]
|
||||
return JobResponse(jobs=job_list)
|
||||
@@ -208,6 +209,10 @@ class LinkedIn(Scraper):
|
||||
if metadata_card
|
||||
else None
|
||||
)
|
||||
if not datetime_tag and metadata_card:
|
||||
datetime_tag = metadata_card.find(
|
||||
"time", class_="job-search-card__listdate--new"
|
||||
)
|
||||
date_posted = None
|
||||
if datetime_tag and "datetime" in datetime_tag.attrs:
|
||||
datetime_str = datetime_tag["datetime"]
|
||||
@@ -267,7 +272,8 @@ class LinkedIn(Scraper):
|
||||
description = div_content.prettify(formatter="html")
|
||||
if self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
|
||||
description = markdown_converter(description)
|
||||
|
||||
elif self.scraper_input.description_format == DescriptionFormat.PLAIN:
|
||||
description = plain_converter(description)
|
||||
h3_tag = soup.find(
|
||||
"h3", text=lambda text: text and "Job function" in text.strip()
|
||||
)
|
||||
|
||||
@@ -234,7 +234,7 @@ class Compensation(BaseModel):
|
||||
class DescriptionFormat(Enum):
|
||||
MARKDOWN = "markdown"
|
||||
HTML = "html"
|
||||
|
||||
PLAIN = "plain"
|
||||
|
||||
class JobPost(BaseModel):
|
||||
id: str | None = None
|
||||
|
||||
@@ -164,12 +164,15 @@ class Naukri(Scraper):
|
||||
date_posted = self._parse_date(job.get("footerPlaceholderLabel"), job.get("createdDate"))
|
||||
|
||||
job_url = f"https://www.naukri.com{job.get('jdURL', f'/job/{job_id}')}"
|
||||
description = job.get("jobDescription") if full_descr else None
|
||||
raw_description = job.get("jobDescription") if full_descr else None
|
||||
|
||||
job_type = parse_job_type(raw_description) if raw_description else None
|
||||
company_industry = parse_company_industry(raw_description) if raw_description else None
|
||||
|
||||
description = raw_description
|
||||
if description and self.scraper_input.description_format == DescriptionFormat.MARKDOWN:
|
||||
description = markdown_converter(description)
|
||||
|
||||
job_type = parse_job_type(description) if description else None
|
||||
company_industry = parse_company_industry(description) if description else None
|
||||
is_remote = is_job_remote(title, description or "", location)
|
||||
company_logo = job.get("logoPathV3") or job.get("logoPath")
|
||||
|
||||
|
||||
@@ -5,10 +5,12 @@ from jobspy.model import JobType, Location
|
||||
from jobspy.util import get_enum_from_job_type
|
||||
|
||||
|
||||
def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
|
||||
def parse_job_type(soup: BeautifulSoup |str) -> list[JobType] | None:
|
||||
"""
|
||||
Gets the job type from the job page
|
||||
"""
|
||||
if isinstance(soup, str):
|
||||
soup = BeautifulSoup(soup, "html.parser")
|
||||
job_type_tag = soup.find("span", class_="job-type")
|
||||
if job_type_tag:
|
||||
job_type_str = job_type_tag.get_text(strip=True).lower().replace("-", "")
|
||||
@@ -16,10 +18,12 @@ def parse_job_type(soup: BeautifulSoup) -> list[JobType] | None:
|
||||
return None
|
||||
|
||||
|
||||
def parse_company_industry(soup: BeautifulSoup) -> str | None:
|
||||
def parse_company_industry(soup: BeautifulSoup | str) -> str | None:
|
||||
"""
|
||||
Gets the company industry from the job page
|
||||
"""
|
||||
if isinstance(soup, str):
|
||||
soup = BeautifulSoup(soup, "html.parser")
|
||||
industry_tag = soup.find("span", class_="industry")
|
||||
return industry_tag.get_text(strip=True) if industry_tag else None
|
||||
|
||||
|
||||
@@ -157,6 +157,15 @@ def markdown_converter(description_html: str):
|
||||
markdown = md(description_html)
|
||||
return markdown.strip()
|
||||
|
||||
def plain_converter(decription_html:str):
|
||||
from bs4 import BeautifulSoup
|
||||
if decription_html is None:
|
||||
return None
|
||||
soup = BeautifulSoup(decription_html, "html.parser")
|
||||
text = soup.get_text(separator=" ")
|
||||
text = re.sub(r'\s+',' ',text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def extract_emails_from_text(text: str) -> list[str] | None:
|
||||
if not text:
|
||||
|
||||
56
poetry.lock
generated
56
poetry.lock
generated
@@ -749,17 +749,6 @@ files = [
|
||||
[package.extras]
|
||||
all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "iniconfig"
|
||||
version = "2.0.0"
|
||||
description = "brain-dead simple config-ini parsing"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
|
||||
{file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ipykernel"
|
||||
version = "6.29.5"
|
||||
@@ -1229,13 +1218,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "markdownify"
|
||||
version = "0.13.1"
|
||||
version = "1.1.0"
|
||||
description = "Convert HTML to markdown."
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
{file = "markdownify-0.13.1-py3-none-any.whl", hash = "sha256:1d181d43d20902bcc69d7be85b5316ed174d0dda72ff56e14ae4c95a4a407d22"},
|
||||
{file = "markdownify-0.13.1.tar.gz", hash = "sha256:ab257f9e6bd4075118828a28c9d02f8a4bfeb7421f558834aa79b2dfeb32a098"},
|
||||
{file = "markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef"},
|
||||
{file = "markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@@ -1710,21 +1699,6 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a
|
||||
test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"]
|
||||
type = ["mypy (>=1.11.2)"]
|
||||
|
||||
[[package]]
|
||||
name = "pluggy"
|
||||
version = "1.5.0"
|
||||
description = "plugin and hook calling mechanisms for python"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
{file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
|
||||
{file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
dev = ["pre-commit", "tox"]
|
||||
testing = ["pytest", "pytest-benchmark"]
|
||||
|
||||
[[package]]
|
||||
name = "pre-commit"
|
||||
version = "4.0.1"
|
||||
@@ -1975,28 +1949,6 @@ files = [
|
||||
[package.extras]
|
||||
windows-terminal = ["colorama (>=0.4.6)"]
|
||||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "7.4.4"
|
||||
description = "pytest: simple powerful testing with Python"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
|
||||
{file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
colorama = {version = "*", markers = "sys_platform == \"win32\""}
|
||||
exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
|
||||
iniconfig = "*"
|
||||
packaging = "*"
|
||||
pluggy = ">=0.12,<2.0"
|
||||
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
|
||||
|
||||
[package.extras]
|
||||
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
|
||||
|
||||
[[package]]
|
||||
name = "python-dateutil"
|
||||
version = "2.9.0.post0"
|
||||
@@ -2869,4 +2821,4 @@ files = [
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "57169347d2ce0ff19c4d3024ce000651bb3a816e36f454618f480741094fb4a7"
|
||||
content-hash = "6260adc8f96f6cf1ba4e2c23f05504c19e67140b9d346aed3d12eea6957b2104"
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.poetry]
|
||||
name = "python-jobspy"
|
||||
version = "1.1.80"
|
||||
version = "1.1.82"
|
||||
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
|
||||
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
|
||||
homepage = "https://github.com/cullenwatson/JobSpy"
|
||||
@@ -21,10 +21,10 @@ python = "^3.10"
|
||||
requests = "^2.31.0"
|
||||
beautifulsoup4 = "^4.12.2"
|
||||
pandas = "^2.1.0"
|
||||
NUMPY = "1.26.3"
|
||||
numpy = ">=1.26.0"
|
||||
pydantic = "^2.3.0"
|
||||
tls-client = "^1.0.1"
|
||||
markdownify = "^0.13.1"
|
||||
markdownify = "^1.1.0"
|
||||
regex = "^2024.4.28"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
|
||||
Reference in New Issue
Block a user