diff --git a/.gitignore b/.gitignore
index b845ce8..e038225 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
/venv/
/ven/
**/__pycache__/
+**/.pytest_cache/
*.pyc
.env
dist
diff --git a/JobSpy_Demo.ipynb b/JobSpy_Demo.ipynb
index 348259c..98b74ad 100644
--- a/JobSpy_Demo.ipynb
+++ b/JobSpy_Demo.ipynb
@@ -1,12 +1,34 @@
{
"cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "00a94b47-f47b-420f-ba7e-714ef219c006",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from jobspy import scrape_jobs\n",
+ "import pandas as pd"
+ ]
+ },
{
"cell_type": "code",
"execution_count": 2,
- "id": "c3f21577-477d-451e-9914-5d67e8a89075",
- "metadata": {
- "scrolled": true
- },
+ "id": "9f773e6c-d9fc-42cc-b0ef-63b739e78435",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.set_option('display.max_columns', None)\n",
+ "pd.set_option('display.max_rows', None)\n",
+ "pd.set_option('display.width', None)\n",
+ "pd.set_option('display.max_colwidth', 50)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "1253c1f8-9437-492e-9dd3-e7fe51099420",
+ "metadata": {},
"outputs": [
{
"data": {
@@ -46,20 +68,104 @@
"
\n",
" 0 | \n",
" indeed | \n",
- " Firmware Engineer | \n",
- " Advanced Motion Controls | \n",
- " Camarillo | \n",
- " CA | \n",
+ " Mental Health Therapist | \n",
+ " Sandstone Care | \n",
+ " Broomfield | \n",
+ " CO | \n",
" fulltime | \n",
" yearly | \n",
- " 145000 | \n",
- " 110000 | \n",
- " https://www.indeed.com/viewjob?jk=a2e7077fdd3c... | \n",
- " We are looking for an experienced Firmware Eng... | \n",
+ " 68000 | \n",
+ " 57500 | \n",
+ " https://www.indeed.com/viewjob?jk=f5f33d72e030... | \n",
+ " Mental Health Therapist- Broomfield, CO Locati... | \n",
"
\n",
" \n",
" 1 | \n",
" indeed | \n",
+ " .NET Developer | \n",
+ " Noir Consulting | \n",
+ " Irving | \n",
+ " TX | \n",
+ " None | \n",
+ " yearly | \n",
+ " 200000 | \n",
+ " 200000 | \n",
+ " https://www.indeed.com/viewjob?jk=1b22ba65296c... | \n",
+ " .NET Software Engineer, C#, WPF - Irving (Tech... | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " indeed | \n",
+ " Senior Software Engineer | \n",
+ " Johns Hopkins Applied Physics Laboratory (APL) | \n",
+ " Laurel | \n",
+ " MD | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " https://www.indeed.com/viewjob?jk=309eed270a88... | \n",
+ " Description Are you a communications systems d... | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " indeed | \n",
+ " Front End Developer | \n",
+ " Verkada | \n",
+ " San Mateo | \n",
+ " CA | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 285000 | \n",
+ " 120000 | \n",
+ " https://www.indeed.com/viewjob?jk=a3ea45daca75... | \n",
+ " Who We Are Verkada is the largest cloud-based ... | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " indeed | \n",
+ " Software Engineer | \n",
+ " Adobe | \n",
+ " San Jose | \n",
+ " CA | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 142700 | \n",
+ " 73200 | \n",
+ " https://www.indeed.com/viewjob?jk=0f2dc9901fc7... | \n",
+ " Our Company Changing the world through digital... | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " indeed | \n",
+ " Full Stack Developer | \n",
+ " Comcast | \n",
+ " Philadelphia | \n",
+ " PA | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 184663 | \n",
+ " 78789 | \n",
+ " https://www.indeed.com/viewjob?jk=eb5c927221eb... | \n",
+ " Make your mark at Comcast - a Fortune 30 globa... | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " indeed | \n",
+ " Senior Software Engineer | \n",
+ " Smart City Solutions | \n",
+ " | \n",
+ " FL | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 100000 | \n",
+ " 85000 | \n",
+ " https://www.indeed.com/viewjob?jk=ba1945f143a1... | \n",
+ " Smart City hiring a full stack software develo... | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " indeed | \n",
" Computer Engineer | \n",
" Honeywell | \n",
" | \n",
@@ -72,116 +178,32 @@
" Join a team recognized for leadership, innovat... | \n",
"
\n",
" \n",
- " 2 | \n",
- " indeed | \n",
- " Software Engineer | \n",
- " Splunk | \n",
- " Remote | \n",
- " None | \n",
- " fulltime | \n",
- " yearly | \n",
- " 159500 | \n",
- " 116000 | \n",
- " https://www.indeed.com/viewjob?jk=155495ca3f46... | \n",
- " A little about us. Splunk is the key to enterp... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " indeed | \n",
- " Development Operations Engineer | \n",
- " Stratacache | \n",
- " Dayton | \n",
- " OH | \n",
- " fulltime | \n",
- " yearly | \n",
- " 90000 | \n",
- " 83573 | \n",
- " https://www.indeed.com/viewjob?jk=77cf3540c06e... | \n",
- " Stratacache, Inc. delivers in-store retail exp... | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " indeed | \n",
- " Computer Engineer | \n",
- " Honeywell | \n",
- " | \n",
- " None | \n",
- " fulltime | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " https://www.indeed.com/viewjob?jk=7fadbb7c936f... | \n",
- " Join a team recognized for leadership, innovat... | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " indeed | \n",
- " Full Stack Developer | \n",
- " Reinventing Geospatial, Inc. (RGi) | \n",
- " Herndon | \n",
- " VA | \n",
- " fulltime | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " https://www.indeed.com/viewjob?jk=11b2b5b0dd44... | \n",
- " Job Highlights As a Full Stack Software Engine... | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " indeed | \n",
- " Software Engineer | \n",
- " Workiva | \n",
- " Remote | \n",
- " None | \n",
- " None | \n",
- " yearly | \n",
- " 134000 | \n",
- " 79000 | \n",
- " https://www.indeed.com/viewjob?jk=ec3ab6eb9253... | \n",
- " Are you ready to embark on an exciting journey... | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " indeed | \n",
- " Senior Software Engineer | \n",
- " SciTec | \n",
- " Boulder | \n",
- " CO | \n",
- " fulltime | \n",
- " yearly | \n",
- " 164000 | \n",
- " 93000 | \n",
- " https://www.indeed.com/viewjob?jk=781e4cf0cf6d... | \n",
- " SciTec has been awarded multiple government co... | \n",
- "
\n",
- " \n",
" 8 | \n",
" indeed | \n",
" Software Engineer | \n",
- " Microsoft | \n",
- " | \n",
+ " Fidelity Investments | \n",
+ " Westlake | \n",
+ " TX | \n",
" None | \n",
- " fulltime | \n",
- " yearly | \n",
- " 182600 | \n",
- " 94300 | \n",
- " https://www.indeed.com/viewjob?jk=21e05b9e9d96... | \n",
- " At Microsoft we are seeking people who have a ... | \n",
+ " None | \n",
+ " None | \n",
+ " None | \n",
+ " https://www.indeed.com/viewjob?jk=b600392166bb... | \n",
+ " Job Description: Software Engineer in Test The... | \n",
"
\n",
" \n",
" 9 | \n",
" indeed | \n",
- " Software Engineer | \n",
- " Avalon Healthcare Solutions | \n",
- " Remote | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " https://www.indeed.com/viewjob?jk=da35b9bb74a0... | \n",
- " Avalon Healthcare Solutions, headquartered in ... | \n",
+ " Fpga Engineer | \n",
+ " R-DEX Systems, Inc. | \n",
+ " Atlanta | \n",
+ " GA | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 160000 | \n",
+ " 120000 | \n",
+ " https://www.indeed.com/viewjob?jk=a7e9d356c333... | \n",
+ " Title: Senior DSP/FPGA Firmware Engineer Descr... | \n",
"
\n",
" \n",
" 10 | \n",
@@ -236,7 +258,7 @@
" yearly | \n",
" None | \n",
" None | \n",
- " https://www.linkedin.com/jobs/view/3701775201 | \n",
+ " https://www.linkedin.com/jobs/view/3701770659 | \n",
" Description:By bringing together people that u... | \n",
"
\n",
" \n",
@@ -250,7 +272,7 @@
" yearly | \n",
" None | \n",
" None | \n",
- " https://www.linkedin.com/jobs/view/3701772329 | \n",
+ " https://www.linkedin.com/jobs/view/3701769637 | \n",
" Description:By bringing together people that u... | \n",
"
\n",
" \n",
@@ -264,12 +286,26 @@
" yearly | \n",
" None | \n",
" None | \n",
- " https://www.linkedin.com/jobs/view/3701769637 | \n",
+ " https://www.linkedin.com/jobs/view/3701772329 | \n",
" Description:By bringing together people that u... | \n",
"
\n",
" \n",
" 16 | \n",
" linkedin | \n",
+ " Software Engineer - Early Career | \n",
+ " Lockheed Martin | \n",
+ " Fort Worth | \n",
+ " TX | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " None | \n",
+ " None | \n",
+ " https://www.linkedin.com/jobs/view/3701775201 | \n",
+ " Description:By bringing together people that u... | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " linkedin | \n",
" Software Engineer | \n",
" SpiderOak | \n",
" Austin | \n",
@@ -282,20 +318,6 @@
" We're only as strong as our weakest link.In th... | \n",
"
\n",
" \n",
- " 17 | \n",
- " linkedin | \n",
- " Software Engineer - Early Career | \n",
- " Lockheed Martin | \n",
- " Fort Worth | \n",
- " TX | \n",
- " fulltime | \n",
- " yearly | \n",
- " None | \n",
- " None | \n",
- " https://www.linkedin.com/jobs/view/3701770659 | \n",
- " Description:By bringing together people that u... | \n",
- "
\n",
- " \n",
" 18 | \n",
" linkedin | \n",
" Full-Stack Software Engineer | \n",
@@ -326,20 +348,6 @@
"
\n",
" 20 | \n",
" zip_recruiter | \n",
- " (USA) Software Engineer III - Prototype Engine... | \n",
- " Walmart | \n",
- " Dallas | \n",
- " TX | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " None | \n",
- " https://click.appcast.io/track/hcgsw4k?cs=ngp&... | \n",
- " We are currently seeking a highly skilled and ... | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " zip_recruiter | \n",
" Software Engineer - New Grad | \n",
" ZipRecruiter | \n",
" Santa Monica | \n",
@@ -348,53 +356,11 @@
" yearly | \n",
" 130000 | \n",
" 150000 | \n",
- " https://www.ziprecruiter.com/jobs/ziprecruiter... | \n",
- " We offer a hybrid work environment. Most US-ba... | \n",
+ " https://www.ziprecruiter.com/c/ZipRecruiter/Jo... | \n",
+ " Demonstrated foundation in software engineerin... | \n",
"
\n",
" \n",
- " 22 | \n",
- " zip_recruiter | \n",
- " Software Developer | \n",
- " Robert Half | \n",
- " Corpus Christi | \n",
- " TX | \n",
- " fulltime | \n",
- " yearly | \n",
- " 105000 | \n",
- " 115000 | \n",
- " https://www.ziprecruiter.com/jobs/robert-half-... | \n",
- " Robert Half has an opening for a Software Deve... | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " zip_recruiter | \n",
- " Software Engineer | \n",
- " Advantage Technical | \n",
- " Ontario | \n",
- " CA | \n",
- " fulltime | \n",
- " yearly | \n",
- " 100000 | \n",
- " 150000 | \n",
- " https://www.ziprecruiter.com/jobs/advantage-te... | \n",
- " New career opportunity available with major Ma... | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " zip_recruiter | \n",
- " Software Developer | \n",
- " Robert Half | \n",
- " Tucson | \n",
- " AZ | \n",
- " temporary | \n",
- " hourly | \n",
- " 47 | \n",
- " 55 | \n",
- " https://www.ziprecruiter.com/jobs/robert-half-... | \n",
- " Robert Half is accepting inquiries for a SQL S... | \n",
- "
\n",
- " \n",
- " 25 | \n",
+ " 21 | \n",
" zip_recruiter | \n",
" Full Stack Software Engineer | \n",
" ZipRecruiter | \n",
@@ -404,25 +370,11 @@
" yearly | \n",
" 105000 | \n",
" 145000 | \n",
- " https://www.ziprecruiter.com/jobs/ziprecruiter... | \n",
- " We offer a hybrid work environment. Most US-ba... | \n",
+ " https://www.ziprecruiter.com/c/ZipRecruiter/Jo... | \n",
+ " Experience in client side development using Re... | \n",
"
\n",
" \n",
- " 26 | \n",
- " zip_recruiter | \n",
- " Software Developer IV | \n",
- " Kforce Inc. | \n",
- " Mountain View | \n",
- " CA | \n",
- " contract | \n",
- " hourly | \n",
- " 55 | \n",
- " 75 | \n",
- " https://www.kforce.com/Jobs/job.aspx?job=1696~... | \n",
- " Kforce has a client that is seeking a Software... | \n",
- "
\n",
- " \n",
- " 27 | \n",
+ " 22 | \n",
" zip_recruiter | \n",
" Software Developer | Onsite | Omaha, NE - Omaha | \n",
" OneStaff Medical | \n",
@@ -432,36 +384,106 @@
" yearly | \n",
" 60000 | \n",
" 110000 | \n",
- " https://www.ziprecruiter.com/jobs/onestaff-med... | \n",
- " Company Description: We are looking for a well... | \n",
+ " https://www.ziprecruiter.com/c/OneStaff-Medica... | \n",
+ " We are looking for a well-rounded Software Dev... | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " zip_recruiter | \n",
+ " Senior Software Engineer, Onsite [Real-time] | \n",
+ " Raytheon | \n",
+ " McKinney | \n",
+ " TX | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 116000 | \n",
+ " 153000 | \n",
+ " https://jsv3.recruitics.com/redirect?rx_cid=34... | \n",
+ " By joining the Silent Knight team as a Senior ... | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " zip_recruiter | \n",
+ " Senior Software Engineer - TS/SCI **Minimum $2... | \n",
+ " Raytheon | \n",
+ " Dallas | \n",
+ " TX | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 122000 | \n",
+ " 162000 | \n",
+ " https://jsv3.recruitics.com/redirect?rx_cid=34... | \n",
+ " Object Oriented Programming using C++ with Lin... | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " zip_recruiter | \n",
+ " Software Engineer III (full stack, AI/ML, Djan... | \n",
+ " Ayahealthcare | \n",
+ " Remote | \n",
+ " OR | \n",
+ " None | \n",
+ " yearly | \n",
+ " 156000 | \n",
+ " 165000 | \n",
+ " https://click.appcast.io/track/hcbh0qq?cs=ngp&... | \n",
+ " The Software Engineer III will be an integral ... | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " zip_recruiter | \n",
+ " Software Engineer Full Stack | \n",
+ " Generac Power Systems | \n",
+ " Denver | \n",
+ " CO | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 90000 | \n",
+ " 115000 | \n",
+ " https://www.ziprecruiter.com/c/Generac-Power-S... | \n",
+ " As a Software Engineer on the Energy Technolog... | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " zip_recruiter | \n",
+ " Embedded Software Engineer (Fort Worth, TX or ... | \n",
+ " Kubota | \n",
+ " Fort Worth | \n",
+ " TX | \n",
+ " fulltime | \n",
+ " yearly | \n",
+ " 122000 | \n",
+ " 167000 | \n",
+ " https://us62e2.dayforcehcm.com/CandidatePortal... | \n",
+ " Work with a cross-functional team to design, t... | \n",
"
\n",
" \n",
" 28 | \n",
" zip_recruiter | \n",
- " Senior Software Engineer | \n",
- " RightStaff, Inc. | \n",
- " Dallas | \n",
+ " Senior Software Engineer (FT) | \n",
+ " National Indoor RV Center | \n",
+ " Lewisville | \n",
" TX | \n",
" fulltime | \n",
" yearly | \n",
- " 120000 | \n",
- " 180000 | \n",
- " https://www.ziprecruiter.com/jobs/rightstaff-i... | \n",
- " Job Description:We are seeking a talented and ... | \n",
+ " 125000 | \n",
+ " 0 | \n",
+ " https://www.ziprecruiter.com/c/National-Indoor... | \n",
+ " As a Senior Software Engineer, you will: * Des... | \n",
"
\n",
" \n",
" 29 | \n",
" zip_recruiter | \n",
- " Software Developer - .Net Core - 12886 | \n",
- " Walker Elliott | \n",
+ " 2024 Next Gen IT Program | Software Engineerin... | \n",
+ " Southern Glazer's Wine & Spirits | \n",
" Dallas | \n",
" TX | \n",
- " fulltime | \n",
+ " None | \n",
" yearly | \n",
- " 105000 | \n",
- " 130000 | \n",
- " https://www.ziprecruiter.com/jobs/walker-ellio... | \n",
- " Our highly successful DFW based client has bee... | \n",
+ " 70000 | \n",
+ " 0 | \n",
+ " https://click.appcast.io/track/hdsbnae?cs=b4&j... | \n",
+ " Finally, through the work assigned, the analys... | \n",
"
\n",
" \n",
"\n",
@@ -469,219 +491,185 @@
],
"text/plain": [
" site title \\\n",
- "0 indeed Firmware Engineer \n",
- "1 indeed Computer Engineer \n",
- "2 indeed Software Engineer \n",
- "3 indeed Development Operations Engineer \n",
- "4 indeed Computer Engineer \n",
+ "0 indeed Mental Health Therapist \n",
+ "1 indeed .NET Developer \n",
+ "2 indeed Senior Software Engineer \n",
+ "3 indeed Front End Developer \n",
+ "4 indeed Software Engineer \n",
"5 indeed Full Stack Developer \n",
- "6 indeed Software Engineer \n",
- "7 indeed Senior Software Engineer \n",
+ "6 indeed Senior Software Engineer \n",
+ "7 indeed Computer Engineer \n",
"8 indeed Software Engineer \n",
- "9 indeed Software Engineer \n",
+ "9 indeed Fpga Engineer \n",
"10 linkedin Software Engineer \n",
"11 linkedin Software Engineer - Early Career \n",
"12 linkedin Software Engineer - Early Career \n",
"13 linkedin Software Engineer - Early Career \n",
"14 linkedin Software Engineer - Early Career \n",
"15 linkedin Software Engineer - Early Career \n",
- "16 linkedin Software Engineer \n",
- "17 linkedin Software Engineer - Early Career \n",
+ "16 linkedin Software Engineer - Early Career \n",
+ "17 linkedin Software Engineer \n",
"18 linkedin Full-Stack Software Engineer \n",
"19 linkedin Software Engineer \n",
- "20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n",
- "21 zip_recruiter Software Engineer - New Grad \n",
- "22 zip_recruiter Software Developer \n",
- "23 zip_recruiter Software Engineer \n",
- "24 zip_recruiter Software Developer \n",
- "25 zip_recruiter Full Stack Software Engineer \n",
- "26 zip_recruiter Software Developer IV \n",
- "27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
- "28 zip_recruiter Senior Software Engineer \n",
- "29 zip_recruiter Software Developer - .Net Core - 12886 \n",
+ "20 zip_recruiter Software Engineer - New Grad \n",
+ "21 zip_recruiter Full Stack Software Engineer \n",
+ "22 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
+ "23 zip_recruiter Senior Software Engineer, Onsite [Real-time] \n",
+ "24 zip_recruiter Senior Software Engineer - TS/SCI **Minimum $2... \n",
+ "25 zip_recruiter Software Engineer III (full stack, AI/ML, Djan... \n",
+ "26 zip_recruiter Software Engineer Full Stack \n",
+ "27 zip_recruiter Embedded Software Engineer (Fort Worth, TX or ... \n",
+ "28 zip_recruiter Senior Software Engineer (FT) \n",
+ "29 zip_recruiter 2024 Next Gen IT Program | Software Engineerin... \n",
"\n",
- " company_name city state job_type \\\n",
- "0 Advanced Motion Controls Camarillo CA fulltime \n",
- "1 Honeywell None fulltime \n",
- "2 Splunk Remote None fulltime \n",
- "3 Stratacache Dayton OH fulltime \n",
- "4 Honeywell None fulltime \n",
- "5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n",
- "6 Workiva Remote None None \n",
- "7 SciTec Boulder CO fulltime \n",
- "8 Microsoft None fulltime \n",
- "9 Avalon Healthcare Solutions Remote None None \n",
- "10 Fieldguide San Francisco CA fulltime \n",
- "11 Lockheed Martin Sunnyvale CA fulltime \n",
- "12 Lockheed Martin Edwards CA fulltime \n",
- "13 Lockheed Martin Fort Worth TX fulltime \n",
- "14 Lockheed Martin Fort Worth TX fulltime \n",
- "15 Lockheed Martin Fort Worth TX fulltime \n",
- "16 SpiderOak Austin TX fulltime \n",
- "17 Lockheed Martin Fort Worth TX fulltime \n",
- "18 Rain New York NY fulltime \n",
- "19 Nike Portland OR contract \n",
- "20 Walmart Dallas TX None \n",
- "21 ZipRecruiter Santa Monica CA fulltime \n",
- "22 Robert Half Corpus Christi TX fulltime \n",
- "23 Advantage Technical Ontario CA fulltime \n",
- "24 Robert Half Tucson AZ temporary \n",
- "25 ZipRecruiter Phoenix AZ fulltime \n",
- "26 Kforce Inc. Mountain View CA contract \n",
- "27 OneStaff Medical Omaha NE fulltime \n",
- "28 RightStaff, Inc. Dallas TX fulltime \n",
- "29 Walker Elliott Dallas TX fulltime \n",
+ " company_name city state \\\n",
+ "0 Sandstone Care Broomfield CO \n",
+ "1 Noir Consulting Irving TX \n",
+ "2 Johns Hopkins Applied Physics Laboratory (APL) Laurel MD \n",
+ "3 Verkada San Mateo CA \n",
+ "4 Adobe San Jose CA \n",
+ "5 Comcast Philadelphia PA \n",
+ "6 Smart City Solutions FL \n",
+ "7 Honeywell None \n",
+ "8 Fidelity Investments Westlake TX \n",
+ "9 R-DEX Systems, Inc. Atlanta GA \n",
+ "10 Fieldguide San Francisco CA \n",
+ "11 Lockheed Martin Sunnyvale CA \n",
+ "12 Lockheed Martin Edwards CA \n",
+ "13 Lockheed Martin Fort Worth TX \n",
+ "14 Lockheed Martin Fort Worth TX \n",
+ "15 Lockheed Martin Fort Worth TX \n",
+ "16 Lockheed Martin Fort Worth TX \n",
+ "17 SpiderOak Austin TX \n",
+ "18 Rain New York NY \n",
+ "19 Nike Portland OR \n",
+ "20 ZipRecruiter Santa Monica CA \n",
+ "21 ZipRecruiter Phoenix AZ \n",
+ "22 OneStaff Medical Omaha NE \n",
+ "23 Raytheon McKinney TX \n",
+ "24 Raytheon Dallas TX \n",
+ "25 Ayahealthcare Remote OR \n",
+ "26 Generac Power Systems Denver CO \n",
+ "27 Kubota Fort Worth TX \n",
+ "28 National Indoor RV Center Lewisville TX \n",
+ "29 Southern Glazer's Wine & Spirits Dallas TX \n",
"\n",
- " interval min_amount max_amount \\\n",
- "0 yearly 145000 110000 \n",
- "1 None None None \n",
- "2 yearly 159500 116000 \n",
- "3 yearly 90000 83573 \n",
- "4 None None None \n",
- "5 None None None \n",
- "6 yearly 134000 79000 \n",
- "7 yearly 164000 93000 \n",
- "8 yearly 182600 94300 \n",
- "9 None None None \n",
- "10 yearly None None \n",
- "11 yearly None None \n",
- "12 yearly None None \n",
- "13 yearly None None \n",
- "14 yearly None None \n",
- "15 yearly None None \n",
- "16 yearly None None \n",
- "17 yearly None None \n",
- "18 yearly None None \n",
- "19 yearly None None \n",
- "20 None None None \n",
- "21 yearly 130000 150000 \n",
- "22 yearly 105000 115000 \n",
- "23 yearly 100000 150000 \n",
- "24 hourly 47 55 \n",
- "25 yearly 105000 145000 \n",
- "26 hourly 55 75 \n",
- "27 yearly 60000 110000 \n",
- "28 yearly 120000 180000 \n",
- "29 yearly 105000 130000 \n",
+ " job_type interval min_amount max_amount \\\n",
+ "0 fulltime yearly 68000 57500 \n",
+ "1 None yearly 200000 200000 \n",
+ "2 None None None None \n",
+ "3 fulltime yearly 285000 120000 \n",
+ "4 fulltime yearly 142700 73200 \n",
+ "5 fulltime yearly 184663 78789 \n",
+ "6 fulltime yearly 100000 85000 \n",
+ "7 fulltime None None None \n",
+ "8 None None None None \n",
+ "9 fulltime yearly 160000 120000 \n",
+ "10 fulltime yearly None None \n",
+ "11 fulltime yearly None None \n",
+ "12 fulltime yearly None None \n",
+ "13 fulltime yearly None None \n",
+ "14 fulltime yearly None None \n",
+ "15 fulltime yearly None None \n",
+ "16 fulltime yearly None None \n",
+ "17 fulltime yearly None None \n",
+ "18 fulltime yearly None None \n",
+ "19 contract yearly None None \n",
+ "20 fulltime yearly 130000 150000 \n",
+ "21 fulltime yearly 105000 145000 \n",
+ "22 fulltime yearly 60000 110000 \n",
+ "23 fulltime yearly 116000 153000 \n",
+ "24 fulltime yearly 122000 162000 \n",
+ "25 None yearly 156000 165000 \n",
+ "26 fulltime yearly 90000 115000 \n",
+ "27 fulltime yearly 122000 167000 \n",
+ "28 fulltime yearly 125000 0 \n",
+ "29 None yearly 70000 0 \n",
"\n",
" job_url \\\n",
- "0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n",
- "1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
- "2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n",
- "3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n",
- "4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n",
- "5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n",
- "6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n",
- "7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n",
- "8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n",
- "9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n",
+ "0 https://www.indeed.com/viewjob?jk=f5f33d72e030... \n",
+ "1 https://www.indeed.com/viewjob?jk=1b22ba65296c... \n",
+ "2 https://www.indeed.com/viewjob?jk=309eed270a88... \n",
+ "3 https://www.indeed.com/viewjob?jk=a3ea45daca75... \n",
+ "4 https://www.indeed.com/viewjob?jk=0f2dc9901fc7... \n",
+ "5 https://www.indeed.com/viewjob?jk=eb5c927221eb... \n",
+ "6 https://www.indeed.com/viewjob?jk=ba1945f143a1... \n",
+ "7 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
+ "8 https://www.indeed.com/viewjob?jk=b600392166bb... \n",
+ "9 https://www.indeed.com/viewjob?jk=a7e9d356c333... \n",
"10 https://www.linkedin.com/jobs/view/3696158160 \n",
"11 https://www.linkedin.com/jobs/view/3693012711 \n",
"12 https://www.linkedin.com/jobs/view/3700669785 \n",
- "13 https://www.linkedin.com/jobs/view/3701775201 \n",
- "14 https://www.linkedin.com/jobs/view/3701772329 \n",
- "15 https://www.linkedin.com/jobs/view/3701769637 \n",
- "16 https://www.linkedin.com/jobs/view/3707174719 \n",
- "17 https://www.linkedin.com/jobs/view/3701770659 \n",
+ "13 https://www.linkedin.com/jobs/view/3701770659 \n",
+ "14 https://www.linkedin.com/jobs/view/3701769637 \n",
+ "15 https://www.linkedin.com/jobs/view/3701772329 \n",
+ "16 https://www.linkedin.com/jobs/view/3701775201 \n",
+ "17 https://www.linkedin.com/jobs/view/3707174719 \n",
"18 https://www.linkedin.com/jobs/view/3696158877 \n",
"19 https://www.linkedin.com/jobs/view/3693340247 \n",
- "20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n",
- "21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
- "22 https://www.ziprecruiter.com/jobs/robert-half-... \n",
- "23 https://www.ziprecruiter.com/jobs/advantage-te... \n",
- "24 https://www.ziprecruiter.com/jobs/robert-half-... \n",
- "25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
- "26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n",
- "27 https://www.ziprecruiter.com/jobs/onestaff-med... \n",
- "28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n",
- "29 https://www.ziprecruiter.com/jobs/walker-ellio... \n",
+ "20 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
+ "21 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
+ "22 https://www.ziprecruiter.com/c/OneStaff-Medica... \n",
+ "23 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
+ "24 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
+ "25 https://click.appcast.io/track/hcbh0qq?cs=ngp&... \n",
+ "26 https://www.ziprecruiter.com/c/Generac-Power-S... \n",
+ "27 https://us62e2.dayforcehcm.com/CandidatePortal... \n",
+ "28 https://www.ziprecruiter.com/c/National-Indoor... \n",
+ "29 https://click.appcast.io/track/hdsbnae?cs=b4&j... \n",
"\n",
" description \n",
- "0 We are looking for an experienced Firmware Eng... \n",
- "1 Join a team recognized for leadership, innovat... \n",
- "2 A little about us. Splunk is the key to enterp... \n",
- "3 Stratacache, Inc. delivers in-store retail exp... \n",
- "4 Join a team recognized for leadership, innovat... \n",
- "5 Job Highlights As a Full Stack Software Engine... \n",
- "6 Are you ready to embark on an exciting journey... \n",
- "7 SciTec has been awarded multiple government co... \n",
- "8 At Microsoft we are seeking people who have a ... \n",
- "9 Avalon Healthcare Solutions, headquartered in ... \n",
+ "0 Mental Health Therapist- Broomfield, CO Locati... \n",
+ "1 .NET Software Engineer, C#, WPF - Irving (Tech... \n",
+ "2 Description Are you a communications systems d... \n",
+ "3 Who We Are Verkada is the largest cloud-based ... \n",
+ "4 Our Company Changing the world through digital... \n",
+ "5 Make your mark at Comcast - a Fortune 30 globa... \n",
+ "6 Smart City hiring a full stack software develo... \n",
+ "7 Join a team recognized for leadership, innovat... \n",
+ "8 Job Description: Software Engineer in Test The... \n",
+ "9 Title: Senior DSP/FPGA Firmware Engineer Descr... \n",
"10 About us:Fieldguide is establishing a new stat... \n",
"11 Description:By bringing together people that u... \n",
"12 Description:By bringing together people that u... \n",
"13 Description:By bringing together people that u... \n",
"14 Description:By bringing together people that u... \n",
"15 Description:By bringing together people that u... \n",
- "16 We're only as strong as our weakest link.In th... \n",
- "17 Description:By bringing together people that u... \n",
+ "16 Description:By bringing together people that u... \n",
+ "17 We're only as strong as our weakest link.In th... \n",
"18 Rain’s mission is to create the fastest and ea... \n",
"19 Work options: FlexibleWe consider remote, on-p... \n",
- "20 We are currently seeking a highly skilled and ... \n",
- "21 We offer a hybrid work environment. Most US-ba... \n",
- "22 Robert Half has an opening for a Software Deve... \n",
- "23 New career opportunity available with major Ma... \n",
- "24 Robert Half is accepting inquiries for a SQL S... \n",
- "25 We offer a hybrid work environment. Most US-ba... \n",
- "26 Kforce has a client that is seeking a Software... \n",
- "27 Company Description: We are looking for a well... \n",
- "28 Job Description:We are seeking a talented and ... \n",
- "29 Our highly successful DFW based client has bee... "
+ "20 Demonstrated foundation in software engineerin... \n",
+ "21 Experience in client side development using Re... \n",
+ "22 We are looking for a well-rounded Software Dev... \n",
+ "23 By joining the Silent Knight team as a Senior ... \n",
+ "24 Object Oriented Programming using C++ with Lin... \n",
+ "25 The Software Engineer III will be an integral ... \n",
+ "26 As a Software Engineer on the Energy Technolog... \n",
+ "27 Work with a cross-functional team to design, t... \n",
+ "28 As a Senior Software Engineer, you will: * Des... \n",
+ "29 Finally, through the work assigned, the analys... "
]
},
+ "execution_count": 5,
"metadata": {},
- "output_type": "display_data"
+ "output_type": "execute_result"
}
],
"source": [
- "from jobspy import scrape_jobs\n",
- "import pandas as pd\n",
- "\n",
- "jobs: pd.DataFrame = scrape_jobs(\n",
+ "scrape_jobs(\n",
" site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
" search_term=\"software engineer\",\n",
" results_wanted=10\n",
- ")\n",
- "\n",
- "if jobs.empty:\n",
- " print(\"No jobs found.\")\n",
- "else:\n",
- " # 1 print\n",
- " pd.set_option('display.max_columns', None)\n",
- " pd.set_option('display.max_rows', None)\n",
- " pd.set_option('display.width', None)\n",
- " pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n",
- " print(jobs)\n",
- "\n",
- " # 2 display in Jupyter Notebook\n",
- " display(jobs)\n",
- "\n",
- " # 3 output to csv\n",
- " jobs.to_csv('jobs.csv', index=False)"
+ ")"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "my-poetry-env",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "my-poetry-env"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -693,7 +681,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.11"
+ "version": "3.11.4"
}
},
"nbformat": 4,
diff --git a/README.md b/README.md
index 0ef7f16..146dd8c 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,9 @@
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Aggregates the job postings in a Pandas DataFrame
-
+
+![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57)
+
### Installation
`pip install python-jobspy`
@@ -26,18 +28,18 @@ jobs: pd.DataFrame = scrape_jobs(
if jobs.empty:
print("No jobs found.")
else:
- # 1 print
+ #1 print
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
print(jobs)
- # 2 display in Jupyter Notebook
- # display(jobs)
+ #2 display in Jupyter Notebook
+ #display(jobs)
- # 3 output to csv
- # jobs.to_csv('jobs.csv', index=False)
+ #3 output to .csv
+ #jobs.to_csv('jobs.csv', index=False)
```
### Output
@@ -51,8 +53,6 @@ zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica
zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme...
```
### Parameters for `scrape_jobs()`
-
-
```plaintext
Required
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
@@ -87,12 +87,23 @@ JobPost
```
-### FAQ
-
-#### Encountering issues with your queries?
-
-Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
-
-#### Received a response code 429?
-You have been blocked by the job board site for sending too many requests. ZipRecruiter seems to be the most aggressive at the moment. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
+## Frequently Asked Questions
+
+---
+
+**Q: Encountering issues with your queries?**
+**A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, [submit an issue](#).
+
+---
+
+**Q: Received a response code 429?**
+**A:** This indicates that you have been blocked by the job board site for sending too many requests. Currently, **ZipRecruiter** is particularly aggressive with blocking. We recommend:
+
+- Waiting a few seconds between requests.
+- Trying a VPN to change your IP address.
+
+**Note:** Proxy support is in development and coming soon!
+
+---
+
diff --git a/pyproject.toml b/pyproject.toml
index 1d89504..112f343 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
-version = "1.0.1"
+version = "1.0.2"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton ", "Cullen Watson "]
readme = "README.md"
diff --git a/src/jobspy/core/__init__.py b/src/__init__.py
similarity index 100%
rename from src/jobspy/core/__init__.py
rename to src/__init__.py
diff --git a/src/jobspy/__init__.py b/src/jobspy/__init__.py
index f3bb15f..9355d43 100644
--- a/src/jobspy/__init__.py
+++ b/src/jobspy/__init__.py
@@ -24,15 +24,14 @@ def _map_str_to_site(site_name: str) -> Site:
def scrape_jobs(
- site_name: str | Site | List[Site],
- search_term: str,
-
- location: str = "",
- distance: int = None,
- is_remote: bool = False,
- job_type: JobType = None,
- easy_apply: bool = False, # linkedin
- results_wanted: int = 15
+ site_name: str | Site | List[Site],
+ search_term: str,
+ location: str = "",
+ distance: int = None,
+ is_remote: bool = False,
+ job_type: JobType = None,
+ easy_apply: bool = False, # linkedin
+ results_wanted: int = 15,
) -> pd.DataFrame:
"""
Asynchronously scrapes job data from multiple job sites.
@@ -71,48 +70,59 @@ def scrape_jobs(
for site, job_response in results.items():
for job in job_response.jobs:
data = job.dict()
- data['site'] = site
+ data["site"] = site
# Formatting JobType
- data['job_type'] = data['job_type'].value if data['job_type'] else None
+ data["job_type"] = data["job_type"].value if data["job_type"] else None
# Formatting Location
- location_obj = data.get('location')
+ location_obj = data.get("location")
if location_obj and isinstance(location_obj, dict):
- data['city'] = location_obj.get('city', '')
- data['state'] = location_obj.get('state', '')
- data['country'] = location_obj.get('country', 'USA')
+ data["city"] = location_obj.get("city", "")
+ data["state"] = location_obj.get("state", "")
+ data["country"] = location_obj.get("country", "USA")
else:
- data['city'] = None
- data['state'] = None
- data['country'] = None
+ data["city"] = None
+ data["state"] = None
+ data["country"] = None
# Formatting Compensation
- compensation_obj = data.get('compensation')
+ compensation_obj = data.get("compensation")
if compensation_obj and isinstance(compensation_obj, dict):
- data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
- data['min_amount'] = compensation_obj.get('min_amount')
- data['max_amount'] = compensation_obj.get('max_amount')
- data['currency'] = compensation_obj.get('currency', 'USD')
+ data["interval"] = (
+ compensation_obj.get("interval").value
+ if compensation_obj.get("interval")
+ else None
+ )
+ data["min_amount"] = compensation_obj.get("min_amount")
+ data["max_amount"] = compensation_obj.get("max_amount")
+ data["currency"] = compensation_obj.get("currency", "USD")
else:
- data['interval'] = None
- data['min_amount'] = None
- data['max_amount'] = None
- data['currency'] = None
+ data["interval"] = None
+ data["min_amount"] = None
+ data["max_amount"] = None
+ data["currency"] = None
job_df = pd.DataFrame([data])
dfs.append(job_df)
if dfs:
df = pd.concat(dfs, ignore_index=True)
- desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
- 'interval', 'min_amount', 'max_amount', 'job_url', 'description',]
+ desired_order = [
+ "site",
+ "title",
+ "company_name",
+ "city",
+ "state",
+ "job_type",
+ "interval",
+ "min_amount",
+ "max_amount",
+ "job_url",
+ "description",
+ ]
df = df[desired_order]
else:
df = pd.DataFrame()
return df
-
-
-
-
diff --git a/src/jobspy/jobs/__init__.py b/src/jobspy/jobs/__init__.py
index 771e847..4c1bc65 100644
--- a/src/jobspy/jobs/__init__.py
+++ b/src/jobspy/jobs/__init__.py
@@ -19,7 +19,6 @@ class JobType(Enum):
VOLUNTEER = "volunteer"
-
class Location(BaseModel):
country: str = "USA"
city: str = None
@@ -47,10 +46,10 @@ class JobPost(BaseModel):
job_url: str
location: Optional[Location]
- description: str = None
+ description: Optional[str] = None
job_type: Optional[JobType] = None
compensation: Optional[Compensation] = None
- date_posted: date = None
+ date_posted: Optional[date] = None
class JobResponse(BaseModel):
diff --git a/src/jobspy/scrapers/__init__.py b/src/jobspy/scrapers/__init__.py
index 4df004b..b2a8900 100644
--- a/src/jobspy/scrapers/__init__.py
+++ b/src/jobspy/scrapers/__init__.py
@@ -1,5 +1,5 @@
from ..jobs import Enum, BaseModel, JobType, JobResponse
-from typing import List, Dict, Optional, Any
+from typing import List, Optional, Any
class StatusException(Exception):
diff --git a/src/jobspy/scrapers/indeed/__init__.py b/src/jobspy/scrapers/indeed/__init__.py
index 846f4f7..4e20977 100644
--- a/src/jobspy/scrapers/indeed/__init__.py
+++ b/src/jobspy/scrapers/indeed/__init__.py
@@ -1,9 +1,8 @@
import re
-import sys
import math
import json
from datetime import datetime
-from typing import Optional, Tuple, List
+from typing import Optional
import tls_client
import urllib.parse
@@ -11,7 +10,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future
-from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
+from ...jobs import (
+ JobPost,
+ Compensation,
+ CompensationInterval,
+ Location,
+ JobResponse,
+ JobType,
+)
from .. import Scraper, ScraperInput, Site, StatusException
@@ -61,10 +67,7 @@ class IndeedScraper(Scraper):
params["sc"] = "0kf:" + "".join(sc_values) + ";"
response = session.get(self.url + "/jobs", params=params)
- if (
- response.status_code != 200
- and response.status_code != 307
- ):
+ if response.status_code != 200 and response.status_code != 307:
raise StatusException(response.status_code)
soup = BeautifulSoup(response.content, "html.parser")
@@ -136,8 +139,10 @@ class IndeedScraper(Scraper):
return job_post
with ThreadPoolExecutor(max_workers=10) as executor:
- job_results: list[Future] = [executor.submit(process_job, job) for job in
- jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
+ job_results: list[Future] = [
+ executor.submit(process_job, job)
+ for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
+ ]
job_list = [result.result() for result in job_results if result.result()]
diff --git a/src/jobspy/scrapers/linkedin/__init__.py b/src/jobspy/scrapers/linkedin/__init__.py
index b3718ae..e4e8bc9 100644
--- a/src/jobspy/scrapers/linkedin/__init__.py
+++ b/src/jobspy/scrapers/linkedin/__init__.py
@@ -6,7 +6,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from .. import Scraper, ScraperInput, Site
-from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
+from ...jobs import (
+ JobPost,
+ Location,
+ JobResponse,
+ JobType,
+ Compensation,
+ CompensationInterval,
+)
class LinkedInScraper(Scraper):
@@ -117,7 +124,9 @@ class LinkedInScraper(Scraper):
date_posted=date_posted,
job_url=job_url,
job_type=job_type,
- compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
+ compensation=Compensation(
+ interval=CompensationInterval.YEARLY, currency="USD"
+ ),
)
job_list.append(job_post)
if (
diff --git a/src/jobspy/scrapers/ziprecruiter/__init__.py b/src/jobspy/scrapers/ziprecruiter/__init__.py
index ad11d6f..eb35686 100644
--- a/src/jobspy/scrapers/ziprecruiter/__init__.py
+++ b/src/jobspy/scrapers/ziprecruiter/__init__.py
@@ -2,7 +2,7 @@ import math
import json
import re
from datetime import datetime
-from typing import Optional, Tuple, List
+from typing import Optional, Tuple
from urllib.parse import urlparse, parse_qs
import tls_client
@@ -11,7 +11,14 @@ from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future
from .. import Scraper, ScraperInput, Site, StatusException
-from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
+from ...jobs import (
+ JobPost,
+ Compensation,
+ CompensationInterval,
+ Location,
+ JobResponse,
+ JobType,
+)
class ZipRecruiterScraper(Scraper):
@@ -55,7 +62,7 @@ class ZipRecruiterScraper(Scraper):
"search": scraper_input.search_term,
"location": scraper_input.location,
"page": page,
- "form": "jobs-landing"
+ "form": "jobs-landing",
}
if scraper_input.is_remote:
@@ -65,7 +72,9 @@ class ZipRecruiterScraper(Scraper):
params["radius"] = scraper_input.distance
if job_type_value:
- params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
+ params[
+ "refine_by_employment"
+ ] = f"employment_type:employment_type:{job_type_value}"
response = self.session.get(
self.url + "/jobs-search",
@@ -90,11 +99,14 @@ class ZipRecruiterScraper(Scraper):
with ThreadPoolExecutor(max_workers=10) as executor:
if "jobList" in data and data["jobList"]:
jobs_js = data["jobList"]
- job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
+ job_results = [
+ executor.submit(self.process_job_js, job) for job in jobs_js
+ ]
else:
jobs_html = soup.find_all("div", {"class": "job_content"})
- job_results = [executor.submit(self.process_job_html, job) for job in
- jobs_html]
+ job_results = [
+ executor.submit(self.process_job_html, job) for job in jobs_html
+ ]
job_list = [result.result() for result in job_results if result.result()]
@@ -107,8 +119,9 @@ class ZipRecruiterScraper(Scraper):
:return: job_response
"""
-
- pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
+ pages_to_process = max(
+ 3, math.ceil(scraper_input.results_wanted / self.jobs_per_page)
+ )
try:
#: get first page to initialize session
@@ -125,7 +138,6 @@ class ZipRecruiterScraper(Scraper):
job_list += jobs
-
except StatusException as e:
return JobResponse(
success=False,
@@ -162,9 +174,7 @@ class ZipRecruiterScraper(Scraper):
title = job.find("h2", {"class": "title"}).text
company = job.find("a", {"class": "company_name"}).text.strip()
- description, updated_job_url = self.get_description(
- job_url
- )
+ description, updated_job_url = self.get_description(job_url)
if updated_job_url is not None:
job_url = updated_job_url
if description is None:
@@ -173,10 +183,7 @@ class ZipRecruiterScraper(Scraper):
job_type_element = job.find("li", {"class": "perk_item perk_type"})
if job_type_element:
job_type_text = (
- job_type_element.text.strip()
- .lower()
- .replace("-", "")
- .replace(" ", "")
+ job_type_element.text.strip().lower().replace("-", "").replace(" ", "")
)
if job_type_text == "contractor":
job_type_text = "contract"
@@ -201,12 +208,16 @@ class ZipRecruiterScraper(Scraper):
def process_job_js(self, job: dict) -> JobPost:
# Map the job data to the expected fields by the Pydantic model
title = job.get("Title")
- description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
+ description = BeautifulSoup(
+ job.get("Snippet", "").strip(), "html.parser"
+ ).get_text()
company = job.get("OrgName")
location = Location(city=job.get("City"), state=job.get("State"))
try:
- job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
+ job_type = ZipRecruiterScraper.job_type_from_string(
+ job.get("EmploymentType", "").replace("-", "_").lower()
+ )
except ValueError:
# print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
return None
@@ -215,14 +226,14 @@ class ZipRecruiterScraper(Scraper):
salary_parts = formatted_salary.split(" ")
min_salary_str = salary_parts[0][1:].replace(",", "")
- if '.' in min_salary_str:
+ if "." in min_salary_str:
min_amount = int(float(min_salary_str) * 1000)
else:
min_amount = int(min_salary_str.replace("K", "000"))
if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
max_salary_str = salary_parts[2][1:].replace(",", "")
- if '.' in max_salary_str:
+ if "." in max_salary_str:
max_amount = int(float(max_salary_str) * 1000)
else:
max_amount = int(max_salary_str.replace("K", "000"))
@@ -232,10 +243,12 @@ class ZipRecruiterScraper(Scraper):
compensation = Compensation(
interval=CompensationInterval.YEARLY,
min_amount=min_amount,
- max_amount=max_amount
+ max_amount=max_amount,
)
save_job_url = job.get("SaveJobURL", "")
- posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
+ posted_time_match = re.search(
+ r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url
+ )
if posted_time_match:
date_time_str = posted_time_match.group(1)
date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
@@ -269,10 +282,7 @@ class ZipRecruiterScraper(Scraper):
return item
raise ValueError(f"Invalid value for JobType: {value}")
- def get_description(
- self,
- job_page_url: str
- ) -> Tuple[Optional[str], Optional[str]]:
+ def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]:
"""
Retrieves job description by going to the job page url
:param job_page_url:
diff --git a/src/tests/__init__.py b/src/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/tests/test_indeed.py b/src/tests/test_indeed.py
index 1d4ad52..bd9dfd1 100644
--- a/src/tests/test_indeed.py
+++ b/src/tests/test_indeed.py
@@ -1,4 +1,4 @@
-from jobspy import scrape_jobs
+from ..jobspy import scrape_jobs
def test_indeed():