mirror of https://github.com/Bunsly/JobSpy
filter works
parent
02a838f8ba
commit
c105c31bab
|
@ -3,15 +3,18 @@ import re
|
|||
from jobspy import Site, scrape_jobs
|
||||
from jobspy.db.job_repository import JobRepository
|
||||
from jobspy.jobs import JobPost
|
||||
from jobspy.scrapers.utils import create_logger
|
||||
from jobspy.telegram_bot import TelegramBot
|
||||
|
||||
logger = create_logger("Main")
|
||||
filter_by_title: list[str] = ["test", "qa", "Lead", "Full Stack", "Fullstack", "Frontend"
|
||||
"data", "automation", "BI", "Principal"]
|
||||
|
||||
|
||||
def filter_jobs_by_title_name(job: JobPost):
|
||||
def filter_jobs_by_title_name(job):
|
||||
for filter_title in filter_by_title:
|
||||
if re.search(filter_title, job.title, re.IGNORECASE):
|
||||
logger.info(f"job filtered out by title: {job.id} , {job.title}")
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -20,20 +23,20 @@ def filter_jobs_by_title_name(job: JobPost):
|
|||
async def main():
|
||||
telegramBot = TelegramBot()
|
||||
jobRepository = JobRepository()
|
||||
|
||||
# sites_to_scrap = [Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED, Site.GOOZALI]
|
||||
sites_to_scrap = [Site.GLASSDOOR]
|
||||
for site in sites_to_scrap:
|
||||
jobs = scrape_jobs(
|
||||
site_name=[Site.LINKEDIN, Site.GLASSDOOR, Site.INDEED],
|
||||
# site_name=[Site.GOOZALI],
|
||||
site_name=[site],
|
||||
search_term="software engineer",
|
||||
google_search_term="software engineer jobs near Tel Aviv Israel since yesterday",
|
||||
locations=["Tel Aviv, Israel", "Ramat Gan, Israel",
|
||||
"Central, Israel", "Rehovot ,Israel"],
|
||||
locations=["Ramat Gan, Israel"],
|
||||
results_wanted=200,
|
||||
hours_old=200,
|
||||
country_indeed='israel',
|
||||
)
|
||||
print(f"Found {len(jobs)} jobs")
|
||||
job = filter(filter_jobs_by_title_name, jobs)
|
||||
logger.info(f"Found {len(jobs)} jobs")
|
||||
jobs = list(filter(filter_jobs_by_title_name, jobs))
|
||||
|
||||
newJobs = jobRepository.insertManyIfNotFound(jobs)
|
||||
|
||||
|
|
Loading…
Reference in New Issue