Validation error (#35)

pull/38/head
Cullen Watson 2023-09-03 20:05:31 -05:00 committed by GitHub
parent 69b47a2053
commit 7ae7ecdee8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 519 additions and 486 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
/venv/ /venv/
/ven/ /ven/
**/__pycache__/ **/__pycache__/
**/.pytest_cache/
*.pyc *.pyc
.env .env
dist dist

View File

@ -1,12 +1,34 @@
{ {
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "00a94b47-f47b-420f-ba7e-714ef219c006",
"metadata": {},
"outputs": [],
"source": [
"from jobspy import scrape_jobs\n",
"import pandas as pd"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "c3f21577-477d-451e-9914-5d67e8a89075", "id": "9f773e6c-d9fc-42cc-b0ef-63b739e78435",
"metadata": { "metadata": {},
"scrolled": true "outputs": [],
}, "source": [
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)\n",
"pd.set_option('display.width', None)\n",
"pd.set_option('display.max_colwidth', 50)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1253c1f8-9437-492e-9dd3-e7fe51099420",
"metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
@ -46,20 +68,104 @@
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>indeed</td>\n", " <td>indeed</td>\n",
" <td>Firmware Engineer</td>\n", " <td>Mental Health Therapist</td>\n",
" <td>Advanced Motion Controls</td>\n", " <td>Sandstone Care</td>\n",
" <td>Camarillo</td>\n", " <td>Broomfield</td>\n",
" <td>CA</td>\n", " <td>CO</td>\n",
" <td>fulltime</td>\n", " <td>fulltime</td>\n",
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>145000</td>\n", " <td>68000</td>\n",
" <td>110000</td>\n", " <td>57500</td>\n",
" <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n", " <td>https://www.indeed.com/viewjob?jk=f5f33d72e030...</td>\n",
" <td>We are looking for an experienced Firmware Eng...</td>\n", " <td>Mental Health Therapist- Broomfield, CO Locati...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>indeed</td>\n", " <td>indeed</td>\n",
" <td>.NET Developer</td>\n",
" <td>Noir Consulting</td>\n",
" <td>Irving</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>200000</td>\n",
" <td>200000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=1b22ba65296c...</td>\n",
" <td>.NET Software Engineer, C#, WPF - Irving (Tech...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>Johns Hopkins Applied Physics Laboratory (APL)</td>\n",
" <td>Laurel</td>\n",
" <td>MD</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=309eed270a88...</td>\n",
" <td>Description Are you a communications systems d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indeed</td>\n",
" <td>Front End Developer</td>\n",
" <td>Verkada</td>\n",
" <td>San Mateo</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>285000</td>\n",
" <td>120000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=a3ea45daca75...</td>\n",
" <td>Who We Are Verkada is the largest cloud-based ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Adobe</td>\n",
" <td>San Jose</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>142700</td>\n",
" <td>73200</td>\n",
" <td>https://www.indeed.com/viewjob?jk=0f2dc9901fc7...</td>\n",
" <td>Our Company Changing the world through digital...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>indeed</td>\n",
" <td>Full Stack Developer</td>\n",
" <td>Comcast</td>\n",
" <td>Philadelphia</td>\n",
" <td>PA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>184663</td>\n",
" <td>78789</td>\n",
" <td>https://www.indeed.com/viewjob?jk=eb5c927221eb...</td>\n",
" <td>Make your mark at Comcast - a Fortune 30 globa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>Smart City Solutions</td>\n",
" <td></td>\n",
" <td>FL</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>100000</td>\n",
" <td>85000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=ba1945f143a1...</td>\n",
" <td>Smart City hiring a full stack software develo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>indeed</td>\n",
" <td>Computer Engineer</td>\n", " <td>Computer Engineer</td>\n",
" <td>Honeywell</td>\n", " <td>Honeywell</td>\n",
" <td></td>\n", " <td></td>\n",
@ -72,116 +178,32 @@
" <td>Join a team recognized for leadership, innovat...</td>\n", " <td>Join a team recognized for leadership, innovat...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Splunk</td>\n",
" <td>Remote</td>\n",
" <td>None</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>159500</td>\n",
" <td>116000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
" <td>A little about us. Splunk is the key to enterp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indeed</td>\n",
" <td>Development Operations Engineer</td>\n",
" <td>Stratacache</td>\n",
" <td>Dayton</td>\n",
" <td>OH</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>90000</td>\n",
" <td>83573</td>\n",
" <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
" <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indeed</td>\n",
" <td>Computer Engineer</td>\n",
" <td>Honeywell</td>\n",
" <td></td>\n",
" <td>None</td>\n",
" <td>fulltime</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
" <td>Join a team recognized for leadership, innovat...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>indeed</td>\n",
" <td>Full Stack Developer</td>\n",
" <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
" <td>Herndon</td>\n",
" <td>VA</td>\n",
" <td>fulltime</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
" <td>Job Highlights As a Full Stack Software Engine...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Workiva</td>\n",
" <td>Remote</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>134000</td>\n",
" <td>79000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
" <td>Are you ready to embark on an exciting journey...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>SciTec</td>\n",
" <td>Boulder</td>\n",
" <td>CO</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>164000</td>\n",
" <td>93000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
" <td>SciTec has been awarded multiple government co...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n", " <th>8</th>\n",
" <td>indeed</td>\n", " <td>indeed</td>\n",
" <td>Software Engineer</td>\n", " <td>Software Engineer</td>\n",
" <td>Microsoft</td>\n", " <td>Fidelity Investments</td>\n",
" <td></td>\n", " <td>Westlake</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>fulltime</td>\n", " <td>None</td>\n",
" <td>yearly</td>\n", " <td>None</td>\n",
" <td>182600</td>\n", " <td>None</td>\n",
" <td>94300</td>\n", " <td>https://www.indeed.com/viewjob?jk=b600392166bb...</td>\n",
" <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n", " <td>Job Description: Software Engineer in Test The...</td>\n",
" <td>At Microsoft we are seeking people who have a ...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>9</th>\n", " <th>9</th>\n",
" <td>indeed</td>\n", " <td>indeed</td>\n",
" <td>Software Engineer</td>\n", " <td>Fpga Engineer</td>\n",
" <td>Avalon Healthcare Solutions</td>\n", " <td>R-DEX Systems, Inc.</td>\n",
" <td>Remote</td>\n", " <td>Atlanta</td>\n",
" <td>None</td>\n", " <td>GA</td>\n",
" <td>None</td>\n", " <td>fulltime</td>\n",
" <td>None</td>\n", " <td>yearly</td>\n",
" <td>None</td>\n", " <td>160000</td>\n",
" <td>None</td>\n", " <td>120000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n", " <td>https://www.indeed.com/viewjob?jk=a7e9d356c333...</td>\n",
" <td>Avalon Healthcare Solutions, headquartered in ...</td>\n", " <td>Title: Senior DSP/FPGA Firmware Engineer Descr...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>10</th>\n", " <th>10</th>\n",
@ -236,7 +258,7 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n", " <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
" <td>Description:By bringing together people that u...</td>\n", " <td>Description:By bringing together people that u...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -250,7 +272,7 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701772329</td>\n", " <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
" <td>Description:By bringing together people that u...</td>\n", " <td>Description:By bringing together people that u...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
@ -264,12 +286,26 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>None</td>\n", " <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701769637</td>\n", " <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
" <td>Description:By bringing together people that u...</td>\n", " <td>Description:By bringing together people that u...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>16</th>\n", " <th>16</th>\n",
" <td>linkedin</td>\n", " <td>linkedin</td>\n",
" <td>Software Engineer - Early Career</td>\n",
" <td>Lockheed Martin</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>linkedin</td>\n",
" <td>Software Engineer</td>\n", " <td>Software Engineer</td>\n",
" <td>SpiderOak</td>\n", " <td>SpiderOak</td>\n",
" <td>Austin</td>\n", " <td>Austin</td>\n",
@ -282,20 +318,6 @@
" <td>We're only as strong as our weakest link.In th...</td>\n", " <td>We're only as strong as our weakest link.In th...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>17</th>\n",
" <td>linkedin</td>\n",
" <td>Software Engineer - Early Career</td>\n",
" <td>Lockheed Martin</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n", " <th>18</th>\n",
" <td>linkedin</td>\n", " <td>linkedin</td>\n",
" <td>Full-Stack Software Engineer</td>\n", " <td>Full-Stack Software Engineer</td>\n",
@ -326,20 +348,6 @@
" <tr>\n", " <tr>\n",
" <th>20</th>\n", " <th>20</th>\n",
" <td>zip_recruiter</td>\n", " <td>zip_recruiter</td>\n",
" <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
" <td>Walmart</td>\n",
" <td>Dallas</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&amp;...</td>\n",
" <td>We are currently seeking a highly skilled and ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer - New Grad</td>\n", " <td>Software Engineer - New Grad</td>\n",
" <td>ZipRecruiter</td>\n", " <td>ZipRecruiter</td>\n",
" <td>Santa Monica</td>\n", " <td>Santa Monica</td>\n",
@ -348,53 +356,11 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>130000</td>\n", " <td>130000</td>\n",
" <td>150000</td>\n", " <td>150000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n", " <td>https://www.ziprecruiter.com/c/ZipRecruiter/Jo...</td>\n",
" <td>We offer a hybrid work environment. Most US-ba...</td>\n", " <td>Demonstrated foundation in software engineerin...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>22</th>\n", " <th>21</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer</td>\n",
" <td>Robert Half</td>\n",
" <td>Corpus Christi</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>105000</td>\n",
" <td>115000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
" <td>Robert Half has an opening for a Software Deve...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer</td>\n",
" <td>Advantage Technical</td>\n",
" <td>Ontario</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>100000</td>\n",
" <td>150000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
" <td>New career opportunity available with major Ma...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer</td>\n",
" <td>Robert Half</td>\n",
" <td>Tucson</td>\n",
" <td>AZ</td>\n",
" <td>temporary</td>\n",
" <td>hourly</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
" <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>zip_recruiter</td>\n", " <td>zip_recruiter</td>\n",
" <td>Full Stack Software Engineer</td>\n", " <td>Full Stack Software Engineer</td>\n",
" <td>ZipRecruiter</td>\n", " <td>ZipRecruiter</td>\n",
@ -404,25 +370,11 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>105000</td>\n", " <td>105000</td>\n",
" <td>145000</td>\n", " <td>145000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n", " <td>https://www.ziprecruiter.com/c/ZipRecruiter/Jo...</td>\n",
" <td>We offer a hybrid work environment. Most US-ba...</td>\n", " <td>Experience in client side development using Re...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>26</th>\n", " <th>22</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer IV</td>\n",
" <td>Kforce Inc.</td>\n",
" <td>Mountain View</td>\n",
" <td>CA</td>\n",
" <td>contract</td>\n",
" <td>hourly</td>\n",
" <td>55</td>\n",
" <td>75</td>\n",
" <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
" <td>Kforce has a client that is seeking a Software...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>zip_recruiter</td>\n", " <td>zip_recruiter</td>\n",
" <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n", " <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
" <td>OneStaff Medical</td>\n", " <td>OneStaff Medical</td>\n",
@ -432,36 +384,106 @@
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>60000</td>\n", " <td>60000</td>\n",
" <td>110000</td>\n", " <td>110000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n", " <td>https://www.ziprecruiter.com/c/OneStaff-Medica...</td>\n",
" <td>Company Description: We are looking for a well...</td>\n", " <td>We are looking for a well-rounded Software Dev...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer, Onsite [Real-time]</td>\n",
" <td>Raytheon</td>\n",
" <td>McKinney</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>116000</td>\n",
" <td>153000</td>\n",
" <td>https://jsv3.recruitics.com/redirect?rx_cid=34...</td>\n",
" <td>By joining the Silent Knight team as a Senior ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer - TS/SCI **Minimum $2...</td>\n",
" <td>Raytheon</td>\n",
" <td>Dallas</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>122000</td>\n",
" <td>162000</td>\n",
" <td>https://jsv3.recruitics.com/redirect?rx_cid=34...</td>\n",
" <td>Object Oriented Programming using C++ with Lin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer III (full stack, AI/ML, Djan...</td>\n",
" <td>Ayahealthcare</td>\n",
" <td>Remote</td>\n",
" <td>OR</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>156000</td>\n",
" <td>165000</td>\n",
" <td>https://click.appcast.io/track/hcbh0qq?cs=ngp&amp;...</td>\n",
" <td>The Software Engineer III will be an integral ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer Full Stack</td>\n",
" <td>Generac Power Systems</td>\n",
" <td>Denver</td>\n",
" <td>CO</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>90000</td>\n",
" <td>115000</td>\n",
" <td>https://www.ziprecruiter.com/c/Generac-Power-S...</td>\n",
" <td>As a Software Engineer on the Energy Technolog...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Embedded Software Engineer (Fort Worth, TX or ...</td>\n",
" <td>Kubota</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>122000</td>\n",
" <td>167000</td>\n",
" <td>https://us62e2.dayforcehcm.com/CandidatePortal...</td>\n",
" <td>Work with a cross-functional team to design, t...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>28</th>\n", " <th>28</th>\n",
" <td>zip_recruiter</td>\n", " <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer</td>\n", " <td>Senior Software Engineer (FT)</td>\n",
" <td>RightStaff, Inc.</td>\n", " <td>National Indoor RV Center</td>\n",
" <td>Dallas</td>\n", " <td>Lewisville</td>\n",
" <td>TX</td>\n", " <td>TX</td>\n",
" <td>fulltime</td>\n", " <td>fulltime</td>\n",
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>120000</td>\n", " <td>125000</td>\n",
" <td>180000</td>\n", " <td>0</td>\n",
" <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n", " <td>https://www.ziprecruiter.com/c/National-Indoor...</td>\n",
" <td>Job Description:We are seeking a talented and ...</td>\n", " <td>As a Senior Software Engineer, you will: * Des...</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>29</th>\n", " <th>29</th>\n",
" <td>zip_recruiter</td>\n", " <td>zip_recruiter</td>\n",
" <td>Software Developer - .Net Core - 12886</td>\n", " <td>2024 Next Gen IT Program | Software Engineerin...</td>\n",
" <td>Walker Elliott</td>\n", " <td>Southern Glazer's Wine &amp; Spirits</td>\n",
" <td>Dallas</td>\n", " <td>Dallas</td>\n",
" <td>TX</td>\n", " <td>TX</td>\n",
" <td>fulltime</td>\n", " <td>None</td>\n",
" <td>yearly</td>\n", " <td>yearly</td>\n",
" <td>105000</td>\n", " <td>70000</td>\n",
" <td>130000</td>\n", " <td>0</td>\n",
" <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n", " <td>https://click.appcast.io/track/hdsbnae?cs=b4&amp;j...</td>\n",
" <td>Our highly successful DFW based client has bee...</td>\n", " <td>Finally, through the work assigned, the analys...</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -469,219 +491,185 @@
], ],
"text/plain": [ "text/plain": [
" site title \\\n", " site title \\\n",
"0 indeed Firmware Engineer \n", "0 indeed Mental Health Therapist \n",
"1 indeed Computer Engineer \n", "1 indeed .NET Developer \n",
"2 indeed Software Engineer \n", "2 indeed Senior Software Engineer \n",
"3 indeed Development Operations Engineer \n", "3 indeed Front End Developer \n",
"4 indeed Computer Engineer \n", "4 indeed Software Engineer \n",
"5 indeed Full Stack Developer \n", "5 indeed Full Stack Developer \n",
"6 indeed Software Engineer \n", "6 indeed Senior Software Engineer \n",
"7 indeed Senior Software Engineer \n", "7 indeed Computer Engineer \n",
"8 indeed Software Engineer \n", "8 indeed Software Engineer \n",
"9 indeed Software Engineer \n", "9 indeed Fpga Engineer \n",
"10 linkedin Software Engineer \n", "10 linkedin Software Engineer \n",
"11 linkedin Software Engineer - Early Career \n", "11 linkedin Software Engineer - Early Career \n",
"12 linkedin Software Engineer - Early Career \n", "12 linkedin Software Engineer - Early Career \n",
"13 linkedin Software Engineer - Early Career \n", "13 linkedin Software Engineer - Early Career \n",
"14 linkedin Software Engineer - Early Career \n", "14 linkedin Software Engineer - Early Career \n",
"15 linkedin Software Engineer - Early Career \n", "15 linkedin Software Engineer - Early Career \n",
"16 linkedin Software Engineer \n", "16 linkedin Software Engineer - Early Career \n",
"17 linkedin Software Engineer - Early Career \n", "17 linkedin Software Engineer \n",
"18 linkedin Full-Stack Software Engineer \n", "18 linkedin Full-Stack Software Engineer \n",
"19 linkedin Software Engineer \n", "19 linkedin Software Engineer \n",
"20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n", "20 zip_recruiter Software Engineer - New Grad \n",
"21 zip_recruiter Software Engineer - New Grad \n", "21 zip_recruiter Full Stack Software Engineer \n",
"22 zip_recruiter Software Developer \n", "22 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
"23 zip_recruiter Software Engineer \n", "23 zip_recruiter Senior Software Engineer, Onsite [Real-time] \n",
"24 zip_recruiter Software Developer \n", "24 zip_recruiter Senior Software Engineer - TS/SCI **Minimum $2... \n",
"25 zip_recruiter Full Stack Software Engineer \n", "25 zip_recruiter Software Engineer III (full stack, AI/ML, Djan... \n",
"26 zip_recruiter Software Developer IV \n", "26 zip_recruiter Software Engineer Full Stack \n",
"27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n", "27 zip_recruiter Embedded Software Engineer (Fort Worth, TX or ... \n",
"28 zip_recruiter Senior Software Engineer \n", "28 zip_recruiter Senior Software Engineer (FT) \n",
"29 zip_recruiter Software Developer - .Net Core - 12886 \n", "29 zip_recruiter 2024 Next Gen IT Program | Software Engineerin... \n",
"\n", "\n",
" company_name city state job_type \\\n", " company_name city state \\\n",
"0 Advanced Motion Controls Camarillo CA fulltime \n", "0 Sandstone Care Broomfield CO \n",
"1 Honeywell None fulltime \n", "1 Noir Consulting Irving TX \n",
"2 Splunk Remote None fulltime \n", "2 Johns Hopkins Applied Physics Laboratory (APL) Laurel MD \n",
"3 Stratacache Dayton OH fulltime \n", "3 Verkada San Mateo CA \n",
"4 Honeywell None fulltime \n", "4 Adobe San Jose CA \n",
"5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n", "5 Comcast Philadelphia PA \n",
"6 Workiva Remote None None \n", "6 Smart City Solutions FL \n",
"7 SciTec Boulder CO fulltime \n", "7 Honeywell None \n",
"8 Microsoft None fulltime \n", "8 Fidelity Investments Westlake TX \n",
"9 Avalon Healthcare Solutions Remote None None \n", "9 R-DEX Systems, Inc. Atlanta GA \n",
"10 Fieldguide San Francisco CA fulltime \n", "10 Fieldguide San Francisco CA \n",
"11 Lockheed Martin Sunnyvale CA fulltime \n", "11 Lockheed Martin Sunnyvale CA \n",
"12 Lockheed Martin Edwards CA fulltime \n", "12 Lockheed Martin Edwards CA \n",
"13 Lockheed Martin Fort Worth TX fulltime \n", "13 Lockheed Martin Fort Worth TX \n",
"14 Lockheed Martin Fort Worth TX fulltime \n", "14 Lockheed Martin Fort Worth TX \n",
"15 Lockheed Martin Fort Worth TX fulltime \n", "15 Lockheed Martin Fort Worth TX \n",
"16 SpiderOak Austin TX fulltime \n", "16 Lockheed Martin Fort Worth TX \n",
"17 Lockheed Martin Fort Worth TX fulltime \n", "17 SpiderOak Austin TX \n",
"18 Rain New York NY fulltime \n", "18 Rain New York NY \n",
"19 Nike Portland OR contract \n", "19 Nike Portland OR \n",
"20 Walmart Dallas TX None \n", "20 ZipRecruiter Santa Monica CA \n",
"21 ZipRecruiter Santa Monica CA fulltime \n", "21 ZipRecruiter Phoenix AZ \n",
"22 Robert Half Corpus Christi TX fulltime \n", "22 OneStaff Medical Omaha NE \n",
"23 Advantage Technical Ontario CA fulltime \n", "23 Raytheon McKinney TX \n",
"24 Robert Half Tucson AZ temporary \n", "24 Raytheon Dallas TX \n",
"25 ZipRecruiter Phoenix AZ fulltime \n", "25 Ayahealthcare Remote OR \n",
"26 Kforce Inc. Mountain View CA contract \n", "26 Generac Power Systems Denver CO \n",
"27 OneStaff Medical Omaha NE fulltime \n", "27 Kubota Fort Worth TX \n",
"28 RightStaff, Inc. Dallas TX fulltime \n", "28 National Indoor RV Center Lewisville TX \n",
"29 Walker Elliott Dallas TX fulltime \n", "29 Southern Glazer's Wine & Spirits Dallas TX \n",
"\n", "\n",
" interval min_amount max_amount \\\n", " job_type interval min_amount max_amount \\\n",
"0 yearly 145000 110000 \n", "0 fulltime yearly 68000 57500 \n",
"1 None None None \n", "1 None yearly 200000 200000 \n",
"2 yearly 159500 116000 \n", "2 None None None None \n",
"3 yearly 90000 83573 \n", "3 fulltime yearly 285000 120000 \n",
"4 None None None \n", "4 fulltime yearly 142700 73200 \n",
"5 None None None \n", "5 fulltime yearly 184663 78789 \n",
"6 yearly 134000 79000 \n", "6 fulltime yearly 100000 85000 \n",
"7 yearly 164000 93000 \n", "7 fulltime None None None \n",
"8 yearly 182600 94300 \n", "8 None None None None \n",
"9 None None None \n", "9 fulltime yearly 160000 120000 \n",
"10 yearly None None \n", "10 fulltime yearly None None \n",
"11 yearly None None \n", "11 fulltime yearly None None \n",
"12 yearly None None \n", "12 fulltime yearly None None \n",
"13 yearly None None \n", "13 fulltime yearly None None \n",
"14 yearly None None \n", "14 fulltime yearly None None \n",
"15 yearly None None \n", "15 fulltime yearly None None \n",
"16 yearly None None \n", "16 fulltime yearly None None \n",
"17 yearly None None \n", "17 fulltime yearly None None \n",
"18 yearly None None \n", "18 fulltime yearly None None \n",
"19 yearly None None \n", "19 contract yearly None None \n",
"20 None None None \n", "20 fulltime yearly 130000 150000 \n",
"21 yearly 130000 150000 \n", "21 fulltime yearly 105000 145000 \n",
"22 yearly 105000 115000 \n", "22 fulltime yearly 60000 110000 \n",
"23 yearly 100000 150000 \n", "23 fulltime yearly 116000 153000 \n",
"24 hourly 47 55 \n", "24 fulltime yearly 122000 162000 \n",
"25 yearly 105000 145000 \n", "25 None yearly 156000 165000 \n",
"26 hourly 55 75 \n", "26 fulltime yearly 90000 115000 \n",
"27 yearly 60000 110000 \n", "27 fulltime yearly 122000 167000 \n",
"28 yearly 120000 180000 \n", "28 fulltime yearly 125000 0 \n",
"29 yearly 105000 130000 \n", "29 None yearly 70000 0 \n",
"\n", "\n",
" job_url \\\n", " job_url \\\n",
"0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n", "0 https://www.indeed.com/viewjob?jk=f5f33d72e030... \n",
"1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n", "1 https://www.indeed.com/viewjob?jk=1b22ba65296c... \n",
"2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n", "2 https://www.indeed.com/viewjob?jk=309eed270a88... \n",
"3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n", "3 https://www.indeed.com/viewjob?jk=a3ea45daca75... \n",
"4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n", "4 https://www.indeed.com/viewjob?jk=0f2dc9901fc7... \n",
"5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n", "5 https://www.indeed.com/viewjob?jk=eb5c927221eb... \n",
"6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n", "6 https://www.indeed.com/viewjob?jk=ba1945f143a1... \n",
"7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n", "7 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
"8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n", "8 https://www.indeed.com/viewjob?jk=b600392166bb... \n",
"9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n", "9 https://www.indeed.com/viewjob?jk=a7e9d356c333... \n",
"10 https://www.linkedin.com/jobs/view/3696158160 \n", "10 https://www.linkedin.com/jobs/view/3696158160 \n",
"11 https://www.linkedin.com/jobs/view/3693012711 \n", "11 https://www.linkedin.com/jobs/view/3693012711 \n",
"12 https://www.linkedin.com/jobs/view/3700669785 \n", "12 https://www.linkedin.com/jobs/view/3700669785 \n",
"13 https://www.linkedin.com/jobs/view/3701775201 \n", "13 https://www.linkedin.com/jobs/view/3701770659 \n",
"14 https://www.linkedin.com/jobs/view/3701772329 \n", "14 https://www.linkedin.com/jobs/view/3701769637 \n",
"15 https://www.linkedin.com/jobs/view/3701769637 \n", "15 https://www.linkedin.com/jobs/view/3701772329 \n",
"16 https://www.linkedin.com/jobs/view/3707174719 \n", "16 https://www.linkedin.com/jobs/view/3701775201 \n",
"17 https://www.linkedin.com/jobs/view/3701770659 \n", "17 https://www.linkedin.com/jobs/view/3707174719 \n",
"18 https://www.linkedin.com/jobs/view/3696158877 \n", "18 https://www.linkedin.com/jobs/view/3696158877 \n",
"19 https://www.linkedin.com/jobs/view/3693340247 \n", "19 https://www.linkedin.com/jobs/view/3693340247 \n",
"20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n", "20 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
"21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n", "21 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
"22 https://www.ziprecruiter.com/jobs/robert-half-... \n", "22 https://www.ziprecruiter.com/c/OneStaff-Medica... \n",
"23 https://www.ziprecruiter.com/jobs/advantage-te... \n", "23 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
"24 https://www.ziprecruiter.com/jobs/robert-half-... \n", "24 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
"25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n", "25 https://click.appcast.io/track/hcbh0qq?cs=ngp&... \n",
"26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n", "26 https://www.ziprecruiter.com/c/Generac-Power-S... \n",
"27 https://www.ziprecruiter.com/jobs/onestaff-med... \n", "27 https://us62e2.dayforcehcm.com/CandidatePortal... \n",
"28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n", "28 https://www.ziprecruiter.com/c/National-Indoor... \n",
"29 https://www.ziprecruiter.com/jobs/walker-ellio... \n", "29 https://click.appcast.io/track/hdsbnae?cs=b4&j... \n",
"\n", "\n",
" description \n", " description \n",
"0 We are looking for an experienced Firmware Eng... \n", "0 Mental Health Therapist- Broomfield, CO Locati... \n",
"1 Join a team recognized for leadership, innovat... \n", "1 .NET Software Engineer, C#, WPF - Irving (Tech... \n",
"2 A little about us. Splunk is the key to enterp... \n", "2 Description Are you a communications systems d... \n",
"3 Stratacache, Inc. delivers in-store retail exp... \n", "3 Who We Are Verkada is the largest cloud-based ... \n",
"4 Join a team recognized for leadership, innovat... \n", "4 Our Company Changing the world through digital... \n",
"5 Job Highlights As a Full Stack Software Engine... \n", "5 Make your mark at Comcast - a Fortune 30 globa... \n",
"6 Are you ready to embark on an exciting journey... \n", "6 Smart City hiring a full stack software develo... \n",
"7 SciTec has been awarded multiple government co... \n", "7 Join a team recognized for leadership, innovat... \n",
"8 At Microsoft we are seeking people who have a ... \n", "8 Job Description: Software Engineer in Test The... \n",
"9 Avalon Healthcare Solutions, headquartered in ... \n", "9 Title: Senior DSP/FPGA Firmware Engineer Descr... \n",
"10 About us:Fieldguide is establishing a new stat... \n", "10 About us:Fieldguide is establishing a new stat... \n",
"11 Description:By bringing together people that u... \n", "11 Description:By bringing together people that u... \n",
"12 Description:By bringing together people that u... \n", "12 Description:By bringing together people that u... \n",
"13 Description:By bringing together people that u... \n", "13 Description:By bringing together people that u... \n",
"14 Description:By bringing together people that u... \n", "14 Description:By bringing together people that u... \n",
"15 Description:By bringing together people that u... \n", "15 Description:By bringing together people that u... \n",
"16 We're only as strong as our weakest link.In th... \n", "16 Description:By bringing together people that u... \n",
"17 Description:By bringing together people that u... \n", "17 We're only as strong as our weakest link.In th... \n",
"18 Rains mission is to create the fastest and ea... \n", "18 Rains mission is to create the fastest and ea... \n",
"19 Work options: FlexibleWe consider remote, on-p... \n", "19 Work options: FlexibleWe consider remote, on-p... \n",
"20 We are currently seeking a highly skilled and ... \n", "20 Demonstrated foundation in software engineerin... \n",
"21 We offer a hybrid work environment. Most US-ba... \n", "21 Experience in client side development using Re... \n",
"22 Robert Half has an opening for a Software Deve... \n", "22 We are looking for a well-rounded Software Dev... \n",
"23 New career opportunity available with major Ma... \n", "23 By joining the Silent Knight team as a Senior ... \n",
"24 Robert Half is accepting inquiries for a SQL S... \n", "24 Object Oriented Programming using C++ with Lin... \n",
"25 We offer a hybrid work environment. Most US-ba... \n", "25 The Software Engineer III will be an integral ... \n",
"26 Kforce has a client that is seeking a Software... \n", "26 As a Software Engineer on the Energy Technolog... \n",
"27 Company Description: We are looking for a well... \n", "27 Work with a cross-functional team to design, t... \n",
"28 Job Description:We are seeking a talented and ... \n", "28 As a Senior Software Engineer, you will: * Des... \n",
"29 Our highly successful DFW based client has bee... " "29 Finally, through the work assigned, the analys... "
] ]
}, },
"execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "display_data" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"from jobspy import scrape_jobs\n", "scrape_jobs(\n",
"import pandas as pd\n",
"\n",
"jobs: pd.DataFrame = scrape_jobs(\n",
" site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n", " site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
" search_term=\"software engineer\",\n", " search_term=\"software engineer\",\n",
" results_wanted=10\n", " results_wanted=10\n",
")\n", ")"
"\n",
"if jobs.empty:\n",
" print(\"No jobs found.\")\n",
"else:\n",
" # 1 print\n",
" pd.set_option('display.max_columns', None)\n",
" pd.set_option('display.max_rows', None)\n",
" pd.set_option('display.width', None)\n",
" pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n",
" print(jobs)\n",
"\n",
" # 2 display in Jupyter Notebook\n",
" display(jobs)\n",
"\n",
" # 3 output to csv\n",
" jobs.to_csv('jobs.csv', index=False)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "my-poetry-env", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "my-poetry-env" "name": "python3"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
@ -693,7 +681,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.11" "version": "3.11.4"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -5,7 +5,9 @@
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Aggregates the job postings in a Pandas DataFrame - Aggregates the job postings in a Pandas DataFrame
![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57)
### Installation ### Installation
`pip install python-jobspy` `pip install python-jobspy`
@ -26,18 +28,18 @@ jobs: pd.DataFrame = scrape_jobs(
if jobs.empty: if jobs.empty:
print("No jobs found.") print("No jobs found.")
else: else:
# 1 print #1 print
pd.set_option('display.max_columns', None) pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None) pd.set_option('display.max_rows', None)
pd.set_option('display.width', None) pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
print(jobs) print(jobs)
# 2 display in Jupyter Notebook #2 display in Jupyter Notebook
# display(jobs) #display(jobs)
# 3 output to csv #3 output to .csv
# jobs.to_csv('jobs.csv', index=False) #jobs.to_csv('jobs.csv', index=False)
``` ```
### Output ### Output
@ -51,8 +53,6 @@ zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica
zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme... zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme...
``` ```
### Parameters for `scrape_jobs()` ### Parameters for `scrape_jobs()`
```plaintext ```plaintext
Required Required
├── site_type (List[enum]): linkedin, zip_recruiter, indeed ├── site_type (List[enum]): linkedin, zip_recruiter, indeed
@ -87,12 +87,23 @@ JobPost
``` ```
### FAQ ## Frequently Asked Questions
#### Encountering issues with your queries? ---
Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue. **Q: Encountering issues with your queries?**
**A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, [submit an issue](#).
#### Received a response code 429?
You have been blocked by the job board site for sending too many requests. ZipRecruiter seems to be the most aggressive at the moment. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon. ---
**Q: Received a response code 429?**
**A:** This indicates that you have been blocked by the job board site for sending too many requests. Currently, **ZipRecruiter** is particularly aggressive with blocking. We recommend:
- Waiting a few seconds between requests.
- Trying a VPN to change your IP address.
**Note:** Proxy support is in development and coming soon!
---

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.0.1" version = "1.0.2"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter" description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"] authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
readme = "README.md" readme = "README.md"

View File

@ -24,15 +24,14 @@ def _map_str_to_site(site_name: str) -> Site:
def scrape_jobs( def scrape_jobs(
site_name: str | Site | List[Site], site_name: str | Site | List[Site],
search_term: str, search_term: str,
location: str = "",
location: str = "", distance: int = None,
distance: int = None, is_remote: bool = False,
is_remote: bool = False, job_type: JobType = None,
job_type: JobType = None, easy_apply: bool = False, # linkedin
easy_apply: bool = False, # linkedin results_wanted: int = 15,
results_wanted: int = 15
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Asynchronously scrapes job data from multiple job sites. Asynchronously scrapes job data from multiple job sites.
@ -71,48 +70,59 @@ def scrape_jobs(
for site, job_response in results.items(): for site, job_response in results.items():
for job in job_response.jobs: for job in job_response.jobs:
data = job.dict() data = job.dict()
data['site'] = site data["site"] = site
# Formatting JobType # Formatting JobType
data['job_type'] = data['job_type'].value if data['job_type'] else None data["job_type"] = data["job_type"].value if data["job_type"] else None
# Formatting Location # Formatting Location
location_obj = data.get('location') location_obj = data.get("location")
if location_obj and isinstance(location_obj, dict): if location_obj and isinstance(location_obj, dict):
data['city'] = location_obj.get('city', '') data["city"] = location_obj.get("city", "")
data['state'] = location_obj.get('state', '') data["state"] = location_obj.get("state", "")
data['country'] = location_obj.get('country', 'USA') data["country"] = location_obj.get("country", "USA")
else: else:
data['city'] = None data["city"] = None
data['state'] = None data["state"] = None
data['country'] = None data["country"] = None
# Formatting Compensation # Formatting Compensation
compensation_obj = data.get('compensation') compensation_obj = data.get("compensation")
if compensation_obj and isinstance(compensation_obj, dict): if compensation_obj and isinstance(compensation_obj, dict):
data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None data["interval"] = (
data['min_amount'] = compensation_obj.get('min_amount') compensation_obj.get("interval").value
data['max_amount'] = compensation_obj.get('max_amount') if compensation_obj.get("interval")
data['currency'] = compensation_obj.get('currency', 'USD') else None
)
data["min_amount"] = compensation_obj.get("min_amount")
data["max_amount"] = compensation_obj.get("max_amount")
data["currency"] = compensation_obj.get("currency", "USD")
else: else:
data['interval'] = None data["interval"] = None
data['min_amount'] = None data["min_amount"] = None
data['max_amount'] = None data["max_amount"] = None
data['currency'] = None data["currency"] = None
job_df = pd.DataFrame([data]) job_df = pd.DataFrame([data])
dfs.append(job_df) dfs.append(job_df)
if dfs: if dfs:
df = pd.concat(dfs, ignore_index=True) df = pd.concat(dfs, ignore_index=True)
desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type', desired_order = [
'interval', 'min_amount', 'max_amount', 'job_url', 'description',] "site",
"title",
"company_name",
"city",
"state",
"job_type",
"interval",
"min_amount",
"max_amount",
"job_url",
"description",
]
df = df[desired_order] df = df[desired_order]
else: else:
df = pd.DataFrame() df = pd.DataFrame()
return df return df

View File

@ -19,7 +19,6 @@ class JobType(Enum):
VOLUNTEER = "volunteer" VOLUNTEER = "volunteer"
class Location(BaseModel): class Location(BaseModel):
country: str = "USA" country: str = "USA"
city: str = None city: str = None
@ -47,10 +46,10 @@ class JobPost(BaseModel):
job_url: str job_url: str
location: Optional[Location] location: Optional[Location]
description: str = None description: Optional[str] = None
job_type: Optional[JobType] = None job_type: Optional[JobType] = None
compensation: Optional[Compensation] = None compensation: Optional[Compensation] = None
date_posted: date = None date_posted: Optional[date] = None
class JobResponse(BaseModel): class JobResponse(BaseModel):

View File

@ -1,5 +1,5 @@
from ..jobs import Enum, BaseModel, JobType, JobResponse from ..jobs import Enum, BaseModel, JobType, JobResponse
from typing import List, Dict, Optional, Any from typing import List, Optional, Any
class StatusException(Exception): class StatusException(Exception):

View File

@ -1,9 +1,8 @@
import re import re
import sys
import math import math
import json import json
from datetime import datetime from datetime import datetime
from typing import Optional, Tuple, List from typing import Optional
import tls_client import tls_client
import urllib.parse import urllib.parse
@ -11,7 +10,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future from concurrent.futures import ThreadPoolExecutor, Future
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType from ...jobs import (
JobPost,
Compensation,
CompensationInterval,
Location,
JobResponse,
JobType,
)
from .. import Scraper, ScraperInput, Site, StatusException from .. import Scraper, ScraperInput, Site, StatusException
@ -61,10 +67,7 @@ class IndeedScraper(Scraper):
params["sc"] = "0kf:" + "".join(sc_values) + ";" params["sc"] = "0kf:" + "".join(sc_values) + ";"
response = session.get(self.url + "/jobs", params=params) response = session.get(self.url + "/jobs", params=params)
if ( if response.status_code != 200 and response.status_code != 307:
response.status_code != 200
and response.status_code != 307
):
raise StatusException(response.status_code) raise StatusException(response.status_code)
soup = BeautifulSoup(response.content, "html.parser") soup = BeautifulSoup(response.content, "html.parser")
@ -136,8 +139,10 @@ class IndeedScraper(Scraper):
return job_post return job_post
with ThreadPoolExecutor(max_workers=10) as executor: with ThreadPoolExecutor(max_workers=10) as executor:
job_results: list[Future] = [executor.submit(process_job, job) for job in job_results: list[Future] = [
jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]] executor.submit(process_job, job)
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
]
job_list = [result.result() for result in job_results if result.result()] job_list = [result.result() for result in job_results if result.result()]

View File

@ -6,7 +6,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag from bs4.element import Tag
from .. import Scraper, ScraperInput, Site from .. import Scraper, ScraperInput, Site
from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval from ...jobs import (
JobPost,
Location,
JobResponse,
JobType,
Compensation,
CompensationInterval,
)
class LinkedInScraper(Scraper): class LinkedInScraper(Scraper):
@ -117,7 +124,9 @@ class LinkedInScraper(Scraper):
date_posted=date_posted, date_posted=date_posted,
job_url=job_url, job_url=job_url,
job_type=job_type, job_type=job_type,
compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD") compensation=Compensation(
interval=CompensationInterval.YEARLY, currency="USD"
),
) )
job_list.append(job_post) job_list.append(job_post)
if ( if (

View File

@ -2,7 +2,7 @@ import math
import json import json
import re import re
from datetime import datetime from datetime import datetime
from typing import Optional, Tuple, List from typing import Optional, Tuple
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
import tls_client import tls_client
@ -11,7 +11,14 @@ from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future from concurrent.futures import ThreadPoolExecutor, Future
from .. import Scraper, ScraperInput, Site, StatusException from .. import Scraper, ScraperInput, Site, StatusException
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType from ...jobs import (
JobPost,
Compensation,
CompensationInterval,
Location,
JobResponse,
JobType,
)
class ZipRecruiterScraper(Scraper): class ZipRecruiterScraper(Scraper):
@ -55,7 +62,7 @@ class ZipRecruiterScraper(Scraper):
"search": scraper_input.search_term, "search": scraper_input.search_term,
"location": scraper_input.location, "location": scraper_input.location,
"page": page, "page": page,
"form": "jobs-landing" "form": "jobs-landing",
} }
if scraper_input.is_remote: if scraper_input.is_remote:
@ -65,7 +72,9 @@ class ZipRecruiterScraper(Scraper):
params["radius"] = scraper_input.distance params["radius"] = scraper_input.distance
if job_type_value: if job_type_value:
params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}" params[
"refine_by_employment"
] = f"employment_type:employment_type:{job_type_value}"
response = self.session.get( response = self.session.get(
self.url + "/jobs-search", self.url + "/jobs-search",
@ -90,11 +99,14 @@ class ZipRecruiterScraper(Scraper):
with ThreadPoolExecutor(max_workers=10) as executor: with ThreadPoolExecutor(max_workers=10) as executor:
if "jobList" in data and data["jobList"]: if "jobList" in data and data["jobList"]:
jobs_js = data["jobList"] jobs_js = data["jobList"]
job_results = [executor.submit(self.process_job_js, job) for job in jobs_js] job_results = [
executor.submit(self.process_job_js, job) for job in jobs_js
]
else: else:
jobs_html = soup.find_all("div", {"class": "job_content"}) jobs_html = soup.find_all("div", {"class": "job_content"})
job_results = [executor.submit(self.process_job_html, job) for job in job_results = [
jobs_html] executor.submit(self.process_job_html, job) for job in jobs_html
]
job_list = [result.result() for result in job_results if result.result()] job_list = [result.result() for result in job_results if result.result()]
@ -107,8 +119,9 @@ class ZipRecruiterScraper(Scraper):
:return: job_response :return: job_response
""" """
pages_to_process = max(
pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page)) 3, math.ceil(scraper_input.results_wanted / self.jobs_per_page)
)
try: try:
#: get first page to initialize session #: get first page to initialize session
@ -125,7 +138,6 @@ class ZipRecruiterScraper(Scraper):
job_list += jobs job_list += jobs
except StatusException as e: except StatusException as e:
return JobResponse( return JobResponse(
success=False, success=False,
@ -162,9 +174,7 @@ class ZipRecruiterScraper(Scraper):
title = job.find("h2", {"class": "title"}).text title = job.find("h2", {"class": "title"}).text
company = job.find("a", {"class": "company_name"}).text.strip() company = job.find("a", {"class": "company_name"}).text.strip()
description, updated_job_url = self.get_description( description, updated_job_url = self.get_description(job_url)
job_url
)
if updated_job_url is not None: if updated_job_url is not None:
job_url = updated_job_url job_url = updated_job_url
if description is None: if description is None:
@ -173,10 +183,7 @@ class ZipRecruiterScraper(Scraper):
job_type_element = job.find("li", {"class": "perk_item perk_type"}) job_type_element = job.find("li", {"class": "perk_item perk_type"})
if job_type_element: if job_type_element:
job_type_text = ( job_type_text = (
job_type_element.text.strip() job_type_element.text.strip().lower().replace("-", "").replace(" ", "")
.lower()
.replace("-", "")
.replace(" ", "")
) )
if job_type_text == "contractor": if job_type_text == "contractor":
job_type_text = "contract" job_type_text = "contract"
@ -201,12 +208,16 @@ class ZipRecruiterScraper(Scraper):
def process_job_js(self, job: dict) -> JobPost: def process_job_js(self, job: dict) -> JobPost:
# Map the job data to the expected fields by the Pydantic model # Map the job data to the expected fields by the Pydantic model
title = job.get("Title") title = job.get("Title")
description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text() description = BeautifulSoup(
job.get("Snippet", "").strip(), "html.parser"
).get_text()
company = job.get("OrgName") company = job.get("OrgName")
location = Location(city=job.get("City"), state=job.get("State")) location = Location(city=job.get("City"), state=job.get("State"))
try: try:
job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower()) job_type = ZipRecruiterScraper.job_type_from_string(
job.get("EmploymentType", "").replace("-", "_").lower()
)
except ValueError: except ValueError:
# print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}") # print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
return None return None
@ -215,14 +226,14 @@ class ZipRecruiterScraper(Scraper):
salary_parts = formatted_salary.split(" ") salary_parts = formatted_salary.split(" ")
min_salary_str = salary_parts[0][1:].replace(",", "") min_salary_str = salary_parts[0][1:].replace(",", "")
if '.' in min_salary_str: if "." in min_salary_str:
min_amount = int(float(min_salary_str) * 1000) min_amount = int(float(min_salary_str) * 1000)
else: else:
min_amount = int(min_salary_str.replace("K", "000")) min_amount = int(min_salary_str.replace("K", "000"))
if len(salary_parts) >= 3 and salary_parts[2].startswith("$"): if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
max_salary_str = salary_parts[2][1:].replace(",", "") max_salary_str = salary_parts[2][1:].replace(",", "")
if '.' in max_salary_str: if "." in max_salary_str:
max_amount = int(float(max_salary_str) * 1000) max_amount = int(float(max_salary_str) * 1000)
else: else:
max_amount = int(max_salary_str.replace("K", "000")) max_amount = int(max_salary_str.replace("K", "000"))
@ -232,10 +243,12 @@ class ZipRecruiterScraper(Scraper):
compensation = Compensation( compensation = Compensation(
interval=CompensationInterval.YEARLY, interval=CompensationInterval.YEARLY,
min_amount=min_amount, min_amount=min_amount,
max_amount=max_amount max_amount=max_amount,
) )
save_job_url = job.get("SaveJobURL", "") save_job_url = job.get("SaveJobURL", "")
posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url) posted_time_match = re.search(
r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url
)
if posted_time_match: if posted_time_match:
date_time_str = posted_time_match.group(1) date_time_str = posted_time_match.group(1)
date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ") date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
@ -269,10 +282,7 @@ class ZipRecruiterScraper(Scraper):
return item return item
raise ValueError(f"Invalid value for JobType: {value}") raise ValueError(f"Invalid value for JobType: {value}")
def get_description( def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]:
self,
job_page_url: str
) -> Tuple[Optional[str], Optional[str]]:
""" """
Retrieves job description by going to the job page url Retrieves job description by going to the job page url
:param job_page_url: :param job_page_url:

0
src/tests/__init__.py Normal file
View File

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs from ..jobspy import scrape_jobs
def test_indeed(): def test_indeed():