Compare commits

..

6 Commits

Author SHA1 Message Date
Cullen Watson
628f4dee9c [fix] indeed - min & max values swapped (#54) 2023-10-03 09:22:18 -05:00
Cullen Watson
2e59ab03e3 Merge branch 'main' of https://github.com/cullenwatson/JobSpy 2023-09-28 18:53:59 -05:00
Cullen Watson
008ca61e12 [fix] readd hyperlink param 2023-09-28 18:53:21 -05:00
Cullen Watson
8fc4c3bf90 [docs] readme 2023-09-28 18:35:40 -05:00
Cullen Watson
bff39a2625 [fix] util func 2023-09-28 18:33:14 -05:00
Cullen Watson
c676050dc0 [fix] util func 2023-09-28 18:33:02 -05:00
7 changed files with 31 additions and 16 deletions

View File

@@ -95,7 +95,7 @@ Optional
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
├── easy_apply (bool): filters for jobs that are hosted on LinkedIn
├── country_indeed (enum): filters the country on Indeed (see below for correct spelling)
├── offset (enum): starts the search from an offset (e.g. 25 will start the search from the 25th result)
├── offset (num): starts the search from an offset (e.g. 25 will start the search from the 25th result)
```
### JobPost Schema

View File

@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.1.9"
version = "1.1.12"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
homepage = "https://github.com/cullenwatson/JobSpy"

View File

@@ -163,6 +163,7 @@ def scrape_jobs(
"max_amount",
"currency",
"emails",
"job_url_hyper" if hyperlinks else "job_url",
"description",
]
jobs_formatted_df = jobs_df[desired_order]

View File

@@ -27,7 +27,12 @@ from ...jobs import (
JobType,
)
from .. import Scraper, ScraperInput, Site
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class IndeedScraper(Scraper):
@@ -128,8 +133,8 @@ class IndeedScraper(Scraper):
if interval in CompensationInterval.__members__:
compensation = Compensation(
interval=CompensationInterval[interval],
min_amount=int(extracted_salary.get("max")),
max_amount=int(extracted_salary.get("min")),
min_amount=int(extracted_salary.get("min")),
max_amount=int(extracted_salary.get("max")),
currency=currency,
)

View File

@@ -9,6 +9,7 @@ from datetime import datetime
import requests
import time
import re
from requests.exceptions import ProxyError
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
@@ -23,7 +24,13 @@ from ...jobs import (
JobResponse,
JobType,
)
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class LinkedInScraper(Scraper):
@@ -256,3 +263,9 @@ class LinkedInScraper(Scraper):
)
return location
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)

View File

@@ -28,7 +28,12 @@ from ...jobs import (
JobType,
Country,
)
from ...utils import extract_emails_from_text
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)
class ZipRecruiterScraper(Scraper):

View File

@@ -1,9 +0,0 @@
import re
from typing import Optional
def extract_emails_from_text(text: str) -> Optional[list[str]]:
if not text:
return None
email_regex = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")
return email_regex.findall(text)