mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-05 12:04:33 -08:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
628f4dee9c | ||
|
|
2e59ab03e3 | ||
|
|
008ca61e12 | ||
|
|
8fc4c3bf90 | ||
|
|
bff39a2625 | ||
|
|
c676050dc0 | ||
|
|
37976f7ec2 | ||
|
|
9fb2fdd80f |
@@ -95,7 +95,7 @@ Optional
|
|||||||
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
|
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
|
||||||
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
|
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
|
||||||
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
|
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
|
||||||
├── offset (enum): starts the search from an offset (e.g. 25 will start the search from the 25th result)
|
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
|
||||||
```
|
```
|
||||||
|
|
||||||
### JobPost Schema
|
### JobPost Schema
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "python-jobspy"
|
name = "python-jobspy"
|
||||||
version = "1.1.8"
|
version = "1.1.12"
|
||||||
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
homepage = "https://github.com/cullenwatson/JobSpy"
|
homepage = "https://github.com/cullenwatson/JobSpy"
|
||||||
|
|||||||
@@ -163,6 +163,7 @@ def scrape_jobs(
|
|||||||
"max_amount",
|
"max_amount",
|
||||||
"currency",
|
"currency",
|
||||||
"emails",
|
"emails",
|
||||||
|
"job_url_hyper" if hyperlinks else "job_url",
|
||||||
"description",
|
"description",
|
||||||
]
|
]
|
||||||
jobs_formatted_df = jobs_df[desired_order]
|
jobs_formatted_df = jobs_df[desired_order]
|
||||||
|
|||||||
@@ -27,7 +27,12 @@ from ...jobs import (
|
|||||||
JobType,
|
JobType,
|
||||||
)
|
)
|
||||||
from .. import Scraper, ScraperInput, Site
|
from .. import Scraper, ScraperInput, Site
|
||||||
from ...utils import extract_emails_from_text
|
|
||||||
|
def extract_emails_from_text(text: str) -> Optional[list[str]]:
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
|
||||||
|
return email_regex.findall(text)
|
||||||
|
|
||||||
|
|
||||||
class IndeedScraper(Scraper):
|
class IndeedScraper(Scraper):
|
||||||
@@ -128,8 +133,8 @@ class IndeedScraper(Scraper):
|
|||||||
if interval in CompensationInterval.__members__:
|
if interval in CompensationInterval.__members__:
|
||||||
compensation = Compensation(
|
compensation = Compensation(
|
||||||
interval=CompensationInterval[interval],
|
interval=CompensationInterval[interval],
|
||||||
min_amount=int(extracted_salary.get("max")),
|
min_amount=int(extracted_salary.get("min")),
|
||||||
max_amount=int(extracted_salary.get("min")),
|
max_amount=int(extracted_salary.get("max")),
|
||||||
currency=currency,
|
currency=currency,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
|
import re
|
||||||
from requests.exceptions import ProxyError
|
from requests.exceptions import ProxyError
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
@@ -23,7 +24,13 @@ from ...jobs import (
|
|||||||
JobResponse,
|
JobResponse,
|
||||||
JobType,
|
JobType,
|
||||||
)
|
)
|
||||||
from ...utils import extract_emails_from_text
|
|
||||||
|
|
||||||
|
def extract_emails_from_text(text: str) -> Optional[list[str]]:
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
|
||||||
|
return email_regex.findall(text)
|
||||||
|
|
||||||
|
|
||||||
class LinkedInScraper(Scraper):
|
class LinkedInScraper(Scraper):
|
||||||
@@ -256,3 +263,9 @@ class LinkedInScraper(Scraper):
|
|||||||
)
|
)
|
||||||
|
|
||||||
return location
|
return location
|
||||||
|
|
||||||
|
def extract_emails_from_text(text: str) -> Optional[list[str]]:
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
|
||||||
|
return email_regex.findall(text)
|
||||||
@@ -28,7 +28,12 @@ from ...jobs import (
|
|||||||
JobType,
|
JobType,
|
||||||
Country,
|
Country,
|
||||||
)
|
)
|
||||||
from ...utils import extract_emails_from_text
|
|
||||||
|
def extract_emails_from_text(text: str) -> Optional[list[str]]:
|
||||||
|
if not text:
|
||||||
|
return None
|
||||||
|
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
|
||||||
|
return email_regex.findall(text)
|
||||||
|
|
||||||
|
|
||||||
class ZipRecruiterScraper(Scraper):
|
class ZipRecruiterScraper(Scraper):
|
||||||
|
|||||||
Reference in New Issue
Block a user