diff --git a/pyproject.toml b/pyproject.toml index d6ce411..155b78a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "python-jobspy" -version = "1.1.9" +version = "1.1.10" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/cullenwatson/JobSpy" diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py index 38f71e8..235147f 100644 --- a/src/jobspy/scrapers/indeed/__init__.py +++ b/src/jobspy/scrapers/indeed/__init__.py @@ -27,7 +27,12 @@ from ...jobs import ( JobType, ) from .. import Scraper, ScraperInput, Site -from ...utils import extract_emails_from_text + +def extract_emails_from_text(text: str) -> Optional[list[str]]: + if not text: + return None + email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + return email_regex.findall(text) class IndeedScraper(Scraper): diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py index 88c0777..8331d36 100644 --- a/src/jobspy/scrapers/linkedin/__init__.py +++ b/src/jobspy/scrapers/linkedin/__init__.py @@ -9,6 +9,7 @@ from datetime import datetime import requests import time +import re from requests.exceptions import ProxyError from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup @@ -23,7 +24,13 @@ from ...jobs import ( JobResponse, JobType, ) -from ...utils import extract_emails_from_text + + +def extract_emails_from_text(text: str) -> Optional[list[str]]: + if not text: + return None + email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + return email_regex.findall(text) class LinkedInScraper(Scraper): @@ -256,3 +263,9 @@ class LinkedInScraper(Scraper): ) return location + +def extract_emails_from_text(text: str) -> Optional[list[str]]: + if not text: + return None + email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + return email_regex.findall(text) \ No newline at end of file diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py index 5ee3a81..c1a0fee 100644 --- a/src/jobspy/scrapers/ziprecruiter/__init__.py +++ b/src/jobspy/scrapers/ziprecruiter/__init__.py @@ -28,7 +28,12 @@ from ...jobs import ( JobType, Country, ) -from ...utils import extract_emails_from_text + +def extract_emails_from_text(text: str) -> Optional[list[str]]: + if not text: + return None + email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") + return email_regex.findall(text) class ZipRecruiterScraper(Scraper):