Validation error (#35)

pull/38/head
Cullen Watson 2023-09-03 20:05:31 -05:00 committed by GitHub
parent 69b47a2053
commit 7ae7ecdee8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 519 additions and 486 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
/venv/
/ven/
**/__pycache__/
**/.pytest_cache/
*.pyc
.env
dist

View File

@ -1,12 +1,34 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "00a94b47-f47b-420f-ba7e-714ef219c006",
"metadata": {},
"outputs": [],
"source": [
"from jobspy import scrape_jobs\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c3f21577-477d-451e-9914-5d67e8a89075",
"metadata": {
"scrolled": true
},
"id": "9f773e6c-d9fc-42cc-b0ef-63b739e78435",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', None)\n",
"pd.set_option('display.width', None)\n",
"pd.set_option('display.max_colwidth', 50)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1253c1f8-9437-492e-9dd3-e7fe51099420",
"metadata": {},
"outputs": [
{
"data": {
@ -46,20 +68,104 @@
" <tr>\n",
" <th>0</th>\n",
" <td>indeed</td>\n",
" <td>Firmware Engineer</td>\n",
" <td>Advanced Motion Controls</td>\n",
" <td>Camarillo</td>\n",
" <td>CA</td>\n",
" <td>Mental Health Therapist</td>\n",
" <td>Sandstone Care</td>\n",
" <td>Broomfield</td>\n",
" <td>CO</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>145000</td>\n",
" <td>110000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n",
" <td>We are looking for an experienced Firmware Eng...</td>\n",
" <td>68000</td>\n",
" <td>57500</td>\n",
" <td>https://www.indeed.com/viewjob?jk=f5f33d72e030...</td>\n",
" <td>Mental Health Therapist- Broomfield, CO Locati...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>indeed</td>\n",
" <td>.NET Developer</td>\n",
" <td>Noir Consulting</td>\n",
" <td>Irving</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>200000</td>\n",
" <td>200000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=1b22ba65296c...</td>\n",
" <td>.NET Software Engineer, C#, WPF - Irving (Tech...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>Johns Hopkins Applied Physics Laboratory (APL)</td>\n",
" <td>Laurel</td>\n",
" <td>MD</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=309eed270a88...</td>\n",
" <td>Description Are you a communications systems d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indeed</td>\n",
" <td>Front End Developer</td>\n",
" <td>Verkada</td>\n",
" <td>San Mateo</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>285000</td>\n",
" <td>120000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=a3ea45daca75...</td>\n",
" <td>Who We Are Verkada is the largest cloud-based ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Adobe</td>\n",
" <td>San Jose</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>142700</td>\n",
" <td>73200</td>\n",
" <td>https://www.indeed.com/viewjob?jk=0f2dc9901fc7...</td>\n",
" <td>Our Company Changing the world through digital...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>indeed</td>\n",
" <td>Full Stack Developer</td>\n",
" <td>Comcast</td>\n",
" <td>Philadelphia</td>\n",
" <td>PA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>184663</td>\n",
" <td>78789</td>\n",
" <td>https://www.indeed.com/viewjob?jk=eb5c927221eb...</td>\n",
" <td>Make your mark at Comcast - a Fortune 30 globa...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>Smart City Solutions</td>\n",
" <td></td>\n",
" <td>FL</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>100000</td>\n",
" <td>85000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=ba1945f143a1...</td>\n",
" <td>Smart City hiring a full stack software develo...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>indeed</td>\n",
" <td>Computer Engineer</td>\n",
" <td>Honeywell</td>\n",
" <td></td>\n",
@ -72,116 +178,32 @@
" <td>Join a team recognized for leadership, innovat...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Splunk</td>\n",
" <td>Remote</td>\n",
" <td>None</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>159500</td>\n",
" <td>116000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
" <td>A little about us. Splunk is the key to enterp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>indeed</td>\n",
" <td>Development Operations Engineer</td>\n",
" <td>Stratacache</td>\n",
" <td>Dayton</td>\n",
" <td>OH</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>90000</td>\n",
" <td>83573</td>\n",
" <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
" <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>indeed</td>\n",
" <td>Computer Engineer</td>\n",
" <td>Honeywell</td>\n",
" <td></td>\n",
" <td>None</td>\n",
" <td>fulltime</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
" <td>Join a team recognized for leadership, innovat...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>indeed</td>\n",
" <td>Full Stack Developer</td>\n",
" <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
" <td>Herndon</td>\n",
" <td>VA</td>\n",
" <td>fulltime</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
" <td>Job Highlights As a Full Stack Software Engine...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Workiva</td>\n",
" <td>Remote</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>134000</td>\n",
" <td>79000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
" <td>Are you ready to embark on an exciting journey...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>indeed</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>SciTec</td>\n",
" <td>Boulder</td>\n",
" <td>CO</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>164000</td>\n",
" <td>93000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
" <td>SciTec has been awarded multiple government co...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Microsoft</td>\n",
" <td></td>\n",
" <td>Fidelity Investments</td>\n",
" <td>Westlake</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>182600</td>\n",
" <td>94300</td>\n",
" <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n",
" <td>At Microsoft we are seeking people who have a ...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=b600392166bb...</td>\n",
" <td>Job Description: Software Engineer in Test The...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>indeed</td>\n",
" <td>Software Engineer</td>\n",
" <td>Avalon Healthcare Solutions</td>\n",
" <td>Remote</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n",
" <td>Avalon Healthcare Solutions, headquartered in ...</td>\n",
" <td>Fpga Engineer</td>\n",
" <td>R-DEX Systems, Inc.</td>\n",
" <td>Atlanta</td>\n",
" <td>GA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>160000</td>\n",
" <td>120000</td>\n",
" <td>https://www.indeed.com/viewjob?jk=a7e9d356c333...</td>\n",
" <td>Title: Senior DSP/FPGA Firmware Engineer Descr...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
@ -236,7 +258,7 @@
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
@ -250,7 +272,7 @@
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
@ -264,12 +286,26 @@
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>linkedin</td>\n",
" <td>Software Engineer - Early Career</td>\n",
" <td>Lockheed Martin</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>linkedin</td>\n",
" <td>Software Engineer</td>\n",
" <td>SpiderOak</td>\n",
" <td>Austin</td>\n",
@ -282,20 +318,6 @@
" <td>We're only as strong as our weakest link.In th...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>linkedin</td>\n",
" <td>Software Engineer - Early Career</td>\n",
" <td>Lockheed Martin</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
" <td>Description:By bringing together people that u...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>linkedin</td>\n",
" <td>Full-Stack Software Engineer</td>\n",
@ -326,20 +348,6 @@
" <tr>\n",
" <th>20</th>\n",
" <td>zip_recruiter</td>\n",
" <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
" <td>Walmart</td>\n",
" <td>Dallas</td>\n",
" <td>TX</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&amp;...</td>\n",
" <td>We are currently seeking a highly skilled and ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer - New Grad</td>\n",
" <td>ZipRecruiter</td>\n",
" <td>Santa Monica</td>\n",
@ -348,53 +356,11 @@
" <td>yearly</td>\n",
" <td>130000</td>\n",
" <td>150000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
" <td>https://www.ziprecruiter.com/c/ZipRecruiter/Jo...</td>\n",
" <td>Demonstrated foundation in software engineerin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer</td>\n",
" <td>Robert Half</td>\n",
" <td>Corpus Christi</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>105000</td>\n",
" <td>115000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
" <td>Robert Half has an opening for a Software Deve...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer</td>\n",
" <td>Advantage Technical</td>\n",
" <td>Ontario</td>\n",
" <td>CA</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>100000</td>\n",
" <td>150000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
" <td>New career opportunity available with major Ma...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer</td>\n",
" <td>Robert Half</td>\n",
" <td>Tucson</td>\n",
" <td>AZ</td>\n",
" <td>temporary</td>\n",
" <td>hourly</td>\n",
" <td>47</td>\n",
" <td>55</td>\n",
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
" <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <th>21</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Full Stack Software Engineer</td>\n",
" <td>ZipRecruiter</td>\n",
@ -404,25 +370,11 @@
" <td>yearly</td>\n",
" <td>105000</td>\n",
" <td>145000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
" <td>https://www.ziprecruiter.com/c/ZipRecruiter/Jo...</td>\n",
" <td>Experience in client side development using Re...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer IV</td>\n",
" <td>Kforce Inc.</td>\n",
" <td>Mountain View</td>\n",
" <td>CA</td>\n",
" <td>contract</td>\n",
" <td>hourly</td>\n",
" <td>55</td>\n",
" <td>75</td>\n",
" <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
" <td>Kforce has a client that is seeking a Software...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <th>22</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
" <td>OneStaff Medical</td>\n",
@ -432,36 +384,106 @@
" <td>yearly</td>\n",
" <td>60000</td>\n",
" <td>110000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n",
" <td>Company Description: We are looking for a well...</td>\n",
" <td>https://www.ziprecruiter.com/c/OneStaff-Medica...</td>\n",
" <td>We are looking for a well-rounded Software Dev...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer, Onsite [Real-time]</td>\n",
" <td>Raytheon</td>\n",
" <td>McKinney</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>116000</td>\n",
" <td>153000</td>\n",
" <td>https://jsv3.recruitics.com/redirect?rx_cid=34...</td>\n",
" <td>By joining the Silent Knight team as a Senior ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer - TS/SCI **Minimum $2...</td>\n",
" <td>Raytheon</td>\n",
" <td>Dallas</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>122000</td>\n",
" <td>162000</td>\n",
" <td>https://jsv3.recruitics.com/redirect?rx_cid=34...</td>\n",
" <td>Object Oriented Programming using C++ with Lin...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer III (full stack, AI/ML, Djan...</td>\n",
" <td>Ayahealthcare</td>\n",
" <td>Remote</td>\n",
" <td>OR</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>156000</td>\n",
" <td>165000</td>\n",
" <td>https://click.appcast.io/track/hcbh0qq?cs=ngp&amp;...</td>\n",
" <td>The Software Engineer III will be an integral ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Engineer Full Stack</td>\n",
" <td>Generac Power Systems</td>\n",
" <td>Denver</td>\n",
" <td>CO</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>90000</td>\n",
" <td>115000</td>\n",
" <td>https://www.ziprecruiter.com/c/Generac-Power-S...</td>\n",
" <td>As a Software Engineer on the Energy Technolog...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Embedded Software Engineer (Fort Worth, TX or ...</td>\n",
" <td>Kubota</td>\n",
" <td>Fort Worth</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>122000</td>\n",
" <td>167000</td>\n",
" <td>https://us62e2.dayforcehcm.com/CandidatePortal...</td>\n",
" <td>Work with a cross-functional team to design, t...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Senior Software Engineer</td>\n",
" <td>RightStaff, Inc.</td>\n",
" <td>Dallas</td>\n",
" <td>Senior Software Engineer (FT)</td>\n",
" <td>National Indoor RV Center</td>\n",
" <td>Lewisville</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>yearly</td>\n",
" <td>120000</td>\n",
" <td>180000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n",
" <td>Job Description:We are seeking a talented and ...</td>\n",
" <td>125000</td>\n",
" <td>0</td>\n",
" <td>https://www.ziprecruiter.com/c/National-Indoor...</td>\n",
" <td>As a Senior Software Engineer, you will: * Des...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>zip_recruiter</td>\n",
" <td>Software Developer - .Net Core - 12886</td>\n",
" <td>Walker Elliott</td>\n",
" <td>2024 Next Gen IT Program | Software Engineerin...</td>\n",
" <td>Southern Glazer's Wine &amp; Spirits</td>\n",
" <td>Dallas</td>\n",
" <td>TX</td>\n",
" <td>fulltime</td>\n",
" <td>None</td>\n",
" <td>yearly</td>\n",
" <td>105000</td>\n",
" <td>130000</td>\n",
" <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n",
" <td>Our highly successful DFW based client has bee...</td>\n",
" <td>70000</td>\n",
" <td>0</td>\n",
" <td>https://click.appcast.io/track/hdsbnae?cs=b4&amp;j...</td>\n",
" <td>Finally, through the work assigned, the analys...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -469,219 +491,185 @@
],
"text/plain": [
" site title \\\n",
"0 indeed Firmware Engineer \n",
"1 indeed Computer Engineer \n",
"2 indeed Software Engineer \n",
"3 indeed Development Operations Engineer \n",
"4 indeed Computer Engineer \n",
"0 indeed Mental Health Therapist \n",
"1 indeed .NET Developer \n",
"2 indeed Senior Software Engineer \n",
"3 indeed Front End Developer \n",
"4 indeed Software Engineer \n",
"5 indeed Full Stack Developer \n",
"6 indeed Software Engineer \n",
"7 indeed Senior Software Engineer \n",
"6 indeed Senior Software Engineer \n",
"7 indeed Computer Engineer \n",
"8 indeed Software Engineer \n",
"9 indeed Software Engineer \n",
"9 indeed Fpga Engineer \n",
"10 linkedin Software Engineer \n",
"11 linkedin Software Engineer - Early Career \n",
"12 linkedin Software Engineer - Early Career \n",
"13 linkedin Software Engineer - Early Career \n",
"14 linkedin Software Engineer - Early Career \n",
"15 linkedin Software Engineer - Early Career \n",
"16 linkedin Software Engineer \n",
"17 linkedin Software Engineer - Early Career \n",
"16 linkedin Software Engineer - Early Career \n",
"17 linkedin Software Engineer \n",
"18 linkedin Full-Stack Software Engineer \n",
"19 linkedin Software Engineer \n",
"20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n",
"21 zip_recruiter Software Engineer - New Grad \n",
"22 zip_recruiter Software Developer \n",
"23 zip_recruiter Software Engineer \n",
"24 zip_recruiter Software Developer \n",
"25 zip_recruiter Full Stack Software Engineer \n",
"26 zip_recruiter Software Developer IV \n",
"27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
"28 zip_recruiter Senior Software Engineer \n",
"29 zip_recruiter Software Developer - .Net Core - 12886 \n",
"20 zip_recruiter Software Engineer - New Grad \n",
"21 zip_recruiter Full Stack Software Engineer \n",
"22 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
"23 zip_recruiter Senior Software Engineer, Onsite [Real-time] \n",
"24 zip_recruiter Senior Software Engineer - TS/SCI **Minimum $2... \n",
"25 zip_recruiter Software Engineer III (full stack, AI/ML, Djan... \n",
"26 zip_recruiter Software Engineer Full Stack \n",
"27 zip_recruiter Embedded Software Engineer (Fort Worth, TX or ... \n",
"28 zip_recruiter Senior Software Engineer (FT) \n",
"29 zip_recruiter 2024 Next Gen IT Program | Software Engineerin... \n",
"\n",
" company_name city state job_type \\\n",
"0 Advanced Motion Controls Camarillo CA fulltime \n",
"1 Honeywell None fulltime \n",
"2 Splunk Remote None fulltime \n",
"3 Stratacache Dayton OH fulltime \n",
"4 Honeywell None fulltime \n",
"5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n",
"6 Workiva Remote None None \n",
"7 SciTec Boulder CO fulltime \n",
"8 Microsoft None fulltime \n",
"9 Avalon Healthcare Solutions Remote None None \n",
"10 Fieldguide San Francisco CA fulltime \n",
"11 Lockheed Martin Sunnyvale CA fulltime \n",
"12 Lockheed Martin Edwards CA fulltime \n",
"13 Lockheed Martin Fort Worth TX fulltime \n",
"14 Lockheed Martin Fort Worth TX fulltime \n",
"15 Lockheed Martin Fort Worth TX fulltime \n",
"16 SpiderOak Austin TX fulltime \n",
"17 Lockheed Martin Fort Worth TX fulltime \n",
"18 Rain New York NY fulltime \n",
"19 Nike Portland OR contract \n",
"20 Walmart Dallas TX None \n",
"21 ZipRecruiter Santa Monica CA fulltime \n",
"22 Robert Half Corpus Christi TX fulltime \n",
"23 Advantage Technical Ontario CA fulltime \n",
"24 Robert Half Tucson AZ temporary \n",
"25 ZipRecruiter Phoenix AZ fulltime \n",
"26 Kforce Inc. Mountain View CA contract \n",
"27 OneStaff Medical Omaha NE fulltime \n",
"28 RightStaff, Inc. Dallas TX fulltime \n",
"29 Walker Elliott Dallas TX fulltime \n",
" company_name city state \\\n",
"0 Sandstone Care Broomfield CO \n",
"1 Noir Consulting Irving TX \n",
"2 Johns Hopkins Applied Physics Laboratory (APL) Laurel MD \n",
"3 Verkada San Mateo CA \n",
"4 Adobe San Jose CA \n",
"5 Comcast Philadelphia PA \n",
"6 Smart City Solutions FL \n",
"7 Honeywell None \n",
"8 Fidelity Investments Westlake TX \n",
"9 R-DEX Systems, Inc. Atlanta GA \n",
"10 Fieldguide San Francisco CA \n",
"11 Lockheed Martin Sunnyvale CA \n",
"12 Lockheed Martin Edwards CA \n",
"13 Lockheed Martin Fort Worth TX \n",
"14 Lockheed Martin Fort Worth TX \n",
"15 Lockheed Martin Fort Worth TX \n",
"16 Lockheed Martin Fort Worth TX \n",
"17 SpiderOak Austin TX \n",
"18 Rain New York NY \n",
"19 Nike Portland OR \n",
"20 ZipRecruiter Santa Monica CA \n",
"21 ZipRecruiter Phoenix AZ \n",
"22 OneStaff Medical Omaha NE \n",
"23 Raytheon McKinney TX \n",
"24 Raytheon Dallas TX \n",
"25 Ayahealthcare Remote OR \n",
"26 Generac Power Systems Denver CO \n",
"27 Kubota Fort Worth TX \n",
"28 National Indoor RV Center Lewisville TX \n",
"29 Southern Glazer's Wine & Spirits Dallas TX \n",
"\n",
" interval min_amount max_amount \\\n",
"0 yearly 145000 110000 \n",
"1 None None None \n",
"2 yearly 159500 116000 \n",
"3 yearly 90000 83573 \n",
"4 None None None \n",
"5 None None None \n",
"6 yearly 134000 79000 \n",
"7 yearly 164000 93000 \n",
"8 yearly 182600 94300 \n",
"9 None None None \n",
"10 yearly None None \n",
"11 yearly None None \n",
"12 yearly None None \n",
"13 yearly None None \n",
"14 yearly None None \n",
"15 yearly None None \n",
"16 yearly None None \n",
"17 yearly None None \n",
"18 yearly None None \n",
"19 yearly None None \n",
"20 None None None \n",
"21 yearly 130000 150000 \n",
"22 yearly 105000 115000 \n",
"23 yearly 100000 150000 \n",
"24 hourly 47 55 \n",
"25 yearly 105000 145000 \n",
"26 hourly 55 75 \n",
"27 yearly 60000 110000 \n",
"28 yearly 120000 180000 \n",
"29 yearly 105000 130000 \n",
" job_type interval min_amount max_amount \\\n",
"0 fulltime yearly 68000 57500 \n",
"1 None yearly 200000 200000 \n",
"2 None None None None \n",
"3 fulltime yearly 285000 120000 \n",
"4 fulltime yearly 142700 73200 \n",
"5 fulltime yearly 184663 78789 \n",
"6 fulltime yearly 100000 85000 \n",
"7 fulltime None None None \n",
"8 None None None None \n",
"9 fulltime yearly 160000 120000 \n",
"10 fulltime yearly None None \n",
"11 fulltime yearly None None \n",
"12 fulltime yearly None None \n",
"13 fulltime yearly None None \n",
"14 fulltime yearly None None \n",
"15 fulltime yearly None None \n",
"16 fulltime yearly None None \n",
"17 fulltime yearly None None \n",
"18 fulltime yearly None None \n",
"19 contract yearly None None \n",
"20 fulltime yearly 130000 150000 \n",
"21 fulltime yearly 105000 145000 \n",
"22 fulltime yearly 60000 110000 \n",
"23 fulltime yearly 116000 153000 \n",
"24 fulltime yearly 122000 162000 \n",
"25 None yearly 156000 165000 \n",
"26 fulltime yearly 90000 115000 \n",
"27 fulltime yearly 122000 167000 \n",
"28 fulltime yearly 125000 0 \n",
"29 None yearly 70000 0 \n",
"\n",
" job_url \\\n",
"0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n",
"1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
"2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n",
"3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n",
"4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n",
"5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n",
"6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n",
"7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n",
"8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n",
"9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n",
"0 https://www.indeed.com/viewjob?jk=f5f33d72e030... \n",
"1 https://www.indeed.com/viewjob?jk=1b22ba65296c... \n",
"2 https://www.indeed.com/viewjob?jk=309eed270a88... \n",
"3 https://www.indeed.com/viewjob?jk=a3ea45daca75... \n",
"4 https://www.indeed.com/viewjob?jk=0f2dc9901fc7... \n",
"5 https://www.indeed.com/viewjob?jk=eb5c927221eb... \n",
"6 https://www.indeed.com/viewjob?jk=ba1945f143a1... \n",
"7 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
"8 https://www.indeed.com/viewjob?jk=b600392166bb... \n",
"9 https://www.indeed.com/viewjob?jk=a7e9d356c333... \n",
"10 https://www.linkedin.com/jobs/view/3696158160 \n",
"11 https://www.linkedin.com/jobs/view/3693012711 \n",
"12 https://www.linkedin.com/jobs/view/3700669785 \n",
"13 https://www.linkedin.com/jobs/view/3701775201 \n",
"14 https://www.linkedin.com/jobs/view/3701772329 \n",
"15 https://www.linkedin.com/jobs/view/3701769637 \n",
"16 https://www.linkedin.com/jobs/view/3707174719 \n",
"17 https://www.linkedin.com/jobs/view/3701770659 \n",
"13 https://www.linkedin.com/jobs/view/3701770659 \n",
"14 https://www.linkedin.com/jobs/view/3701769637 \n",
"15 https://www.linkedin.com/jobs/view/3701772329 \n",
"16 https://www.linkedin.com/jobs/view/3701775201 \n",
"17 https://www.linkedin.com/jobs/view/3707174719 \n",
"18 https://www.linkedin.com/jobs/view/3696158877 \n",
"19 https://www.linkedin.com/jobs/view/3693340247 \n",
"20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n",
"21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
"22 https://www.ziprecruiter.com/jobs/robert-half-... \n",
"23 https://www.ziprecruiter.com/jobs/advantage-te... \n",
"24 https://www.ziprecruiter.com/jobs/robert-half-... \n",
"25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
"26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n",
"27 https://www.ziprecruiter.com/jobs/onestaff-med... \n",
"28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n",
"29 https://www.ziprecruiter.com/jobs/walker-ellio... \n",
"20 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
"21 https://www.ziprecruiter.com/c/ZipRecruiter/Jo... \n",
"22 https://www.ziprecruiter.com/c/OneStaff-Medica... \n",
"23 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
"24 https://jsv3.recruitics.com/redirect?rx_cid=34... \n",
"25 https://click.appcast.io/track/hcbh0qq?cs=ngp&... \n",
"26 https://www.ziprecruiter.com/c/Generac-Power-S... \n",
"27 https://us62e2.dayforcehcm.com/CandidatePortal... \n",
"28 https://www.ziprecruiter.com/c/National-Indoor... \n",
"29 https://click.appcast.io/track/hdsbnae?cs=b4&j... \n",
"\n",
" description \n",
"0 We are looking for an experienced Firmware Eng... \n",
"1 Join a team recognized for leadership, innovat... \n",
"2 A little about us. Splunk is the key to enterp... \n",
"3 Stratacache, Inc. delivers in-store retail exp... \n",
"4 Join a team recognized for leadership, innovat... \n",
"5 Job Highlights As a Full Stack Software Engine... \n",
"6 Are you ready to embark on an exciting journey... \n",
"7 SciTec has been awarded multiple government co... \n",
"8 At Microsoft we are seeking people who have a ... \n",
"9 Avalon Healthcare Solutions, headquartered in ... \n",
"0 Mental Health Therapist- Broomfield, CO Locati... \n",
"1 .NET Software Engineer, C#, WPF - Irving (Tech... \n",
"2 Description Are you a communications systems d... \n",
"3 Who We Are Verkada is the largest cloud-based ... \n",
"4 Our Company Changing the world through digital... \n",
"5 Make your mark at Comcast - a Fortune 30 globa... \n",
"6 Smart City hiring a full stack software develo... \n",
"7 Join a team recognized for leadership, innovat... \n",
"8 Job Description: Software Engineer in Test The... \n",
"9 Title: Senior DSP/FPGA Firmware Engineer Descr... \n",
"10 About us:Fieldguide is establishing a new stat... \n",
"11 Description:By bringing together people that u... \n",
"12 Description:By bringing together people that u... \n",
"13 Description:By bringing together people that u... \n",
"14 Description:By bringing together people that u... \n",
"15 Description:By bringing together people that u... \n",
"16 We're only as strong as our weakest link.In th... \n",
"17 Description:By bringing together people that u... \n",
"16 Description:By bringing together people that u... \n",
"17 We're only as strong as our weakest link.In th... \n",
"18 Rains mission is to create the fastest and ea... \n",
"19 Work options: FlexibleWe consider remote, on-p... \n",
"20 We are currently seeking a highly skilled and ... \n",
"21 We offer a hybrid work environment. Most US-ba... \n",
"22 Robert Half has an opening for a Software Deve... \n",
"23 New career opportunity available with major Ma... \n",
"24 Robert Half is accepting inquiries for a SQL S... \n",
"25 We offer a hybrid work environment. Most US-ba... \n",
"26 Kforce has a client that is seeking a Software... \n",
"27 Company Description: We are looking for a well... \n",
"28 Job Description:We are seeking a talented and ... \n",
"29 Our highly successful DFW based client has bee... "
"20 Demonstrated foundation in software engineerin... \n",
"21 Experience in client side development using Re... \n",
"22 We are looking for a well-rounded Software Dev... \n",
"23 By joining the Silent Knight team as a Senior ... \n",
"24 Object Oriented Programming using C++ with Lin... \n",
"25 The Software Engineer III will be an integral ... \n",
"26 As a Software Engineer on the Energy Technolog... \n",
"27 Work with a cross-functional team to design, t... \n",
"28 As a Senior Software Engineer, you will: * Des... \n",
"29 Finally, through the work assigned, the analys... "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "display_data"
"output_type": "execute_result"
}
],
"source": [
"from jobspy import scrape_jobs\n",
"import pandas as pd\n",
"\n",
"jobs: pd.DataFrame = scrape_jobs(\n",
"scrape_jobs(\n",
" site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
" search_term=\"software engineer\",\n",
" results_wanted=10\n",
")\n",
"\n",
"if jobs.empty:\n",
" print(\"No jobs found.\")\n",
"else:\n",
" # 1 print\n",
" pd.set_option('display.max_columns', None)\n",
" pd.set_option('display.max_rows', None)\n",
" pd.set_option('display.width', None)\n",
" pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n",
" print(jobs)\n",
"\n",
" # 2 display in Jupyter Notebook\n",
" display(jobs)\n",
"\n",
" # 3 output to csv\n",
" jobs.to_csv('jobs.csv', index=False)"
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "my-poetry-env",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "my-poetry-env"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@ -693,7 +681,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.11.4"
}
},
"nbformat": 4,

View File

@ -5,7 +5,9 @@
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Aggregates the job postings in a Pandas DataFrame
![jobspy](https://github.com/cullenwatson/JobSpy/assets/78247585/ec7ef355-05f6-4fd3-8161-a817e31c5c57)
### Installation
`pip install python-jobspy`
@ -26,18 +28,18 @@ jobs: pd.DataFrame = scrape_jobs(
if jobs.empty:
print("No jobs found.")
else:
# 1 print
#1 print
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
print(jobs)
# 2 display in Jupyter Notebook
# display(jobs)
#2 display in Jupyter Notebook
#display(jobs)
# 3 output to csv
# jobs.to_csv('jobs.csv', index=False)
#3 output to .csv
#jobs.to_csv('jobs.csv', index=False)
```
### Output
@ -51,8 +53,6 @@ zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica
zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme...
```
### Parameters for `scrape_jobs()`
```plaintext
Required
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
@ -87,12 +87,23 @@ JobPost
```
### FAQ
#### Encountering issues with your queries?
Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
#### Received a response code 429?
You have been blocked by the job board site for sending too many requests. ZipRecruiter seems to be the most aggressive at the moment. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
## Frequently Asked Questions
---
**Q: Encountering issues with your queries?**
**A:** Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, [submit an issue](#).
---
**Q: Received a response code 429?**
**A:** This indicates that you have been blocked by the job board site for sending too many requests. Currently, **ZipRecruiter** is particularly aggressive with blocking. We recommend:
- Waiting a few seconds between requests.
- Trying a VPN to change your IP address.
**Note:** Proxy support is in development and coming soon!
---

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "python-jobspy"
version = "1.0.1"
version = "1.0.2"
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
authors = ["Zachary Hampton <zachary@zacharysproducts.com>", "Cullen Watson <cullen@cullen.ai>"]
readme = "README.md"

View File

@ -24,15 +24,14 @@ def _map_str_to_site(site_name: str) -> Site:
def scrape_jobs(
site_name: str | Site | List[Site],
search_term: str,
location: str = "",
distance: int = None,
is_remote: bool = False,
job_type: JobType = None,
easy_apply: bool = False, # linkedin
results_wanted: int = 15
site_name: str | Site | List[Site],
search_term: str,
location: str = "",
distance: int = None,
is_remote: bool = False,
job_type: JobType = None,
easy_apply: bool = False, # linkedin
results_wanted: int = 15,
) -> pd.DataFrame:
"""
Asynchronously scrapes job data from multiple job sites.
@ -71,48 +70,59 @@ def scrape_jobs(
for site, job_response in results.items():
for job in job_response.jobs:
data = job.dict()
data['site'] = site
data["site"] = site
# Formatting JobType
data['job_type'] = data['job_type'].value if data['job_type'] else None
data["job_type"] = data["job_type"].value if data["job_type"] else None
# Formatting Location
location_obj = data.get('location')
location_obj = data.get("location")
if location_obj and isinstance(location_obj, dict):
data['city'] = location_obj.get('city', '')
data['state'] = location_obj.get('state', '')
data['country'] = location_obj.get('country', 'USA')
data["city"] = location_obj.get("city", "")
data["state"] = location_obj.get("state", "")
data["country"] = location_obj.get("country", "USA")
else:
data['city'] = None
data['state'] = None
data['country'] = None
data["city"] = None
data["state"] = None
data["country"] = None
# Formatting Compensation
compensation_obj = data.get('compensation')
compensation_obj = data.get("compensation")
if compensation_obj and isinstance(compensation_obj, dict):
data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
data['min_amount'] = compensation_obj.get('min_amount')
data['max_amount'] = compensation_obj.get('max_amount')
data['currency'] = compensation_obj.get('currency', 'USD')
data["interval"] = (
compensation_obj.get("interval").value
if compensation_obj.get("interval")
else None
)
data["min_amount"] = compensation_obj.get("min_amount")
data["max_amount"] = compensation_obj.get("max_amount")
data["currency"] = compensation_obj.get("currency", "USD")
else:
data['interval'] = None
data['min_amount'] = None
data['max_amount'] = None
data['currency'] = None
data["interval"] = None
data["min_amount"] = None
data["max_amount"] = None
data["currency"] = None
job_df = pd.DataFrame([data])
dfs.append(job_df)
if dfs:
df = pd.concat(dfs, ignore_index=True)
desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
'interval', 'min_amount', 'max_amount', 'job_url', 'description',]
desired_order = [
"site",
"title",
"company_name",
"city",
"state",
"job_type",
"interval",
"min_amount",
"max_amount",
"job_url",
"description",
]
df = df[desired_order]
else:
df = pd.DataFrame()
return df

View File

@ -19,7 +19,6 @@ class JobType(Enum):
VOLUNTEER = "volunteer"
class Location(BaseModel):
country: str = "USA"
city: str = None
@ -47,10 +46,10 @@ class JobPost(BaseModel):
job_url: str
location: Optional[Location]
description: str = None
description: Optional[str] = None
job_type: Optional[JobType] = None
compensation: Optional[Compensation] = None
date_posted: date = None
date_posted: Optional[date] = None
class JobResponse(BaseModel):

View File

@ -1,5 +1,5 @@
from ..jobs import Enum, BaseModel, JobType, JobResponse
from typing import List, Dict, Optional, Any
from typing import List, Optional, Any
class StatusException(Exception):

View File

@ -1,9 +1,8 @@
import re
import sys
import math
import json
from datetime import datetime
from typing import Optional, Tuple, List
from typing import Optional
import tls_client
import urllib.parse
@ -11,7 +10,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
from ...jobs import (
JobPost,
Compensation,
CompensationInterval,
Location,
JobResponse,
JobType,
)
from .. import Scraper, ScraperInput, Site, StatusException
@ -61,10 +67,7 @@ class IndeedScraper(Scraper):
params["sc"] = "0kf:" + "".join(sc_values) + ";"
response = session.get(self.url + "/jobs", params=params)
if (
response.status_code != 200
and response.status_code != 307
):
if response.status_code != 200 and response.status_code != 307:
raise StatusException(response.status_code)
soup = BeautifulSoup(response.content, "html.parser")
@ -136,8 +139,10 @@ class IndeedScraper(Scraper):
return job_post
with ThreadPoolExecutor(max_workers=10) as executor:
job_results: list[Future] = [executor.submit(process_job, job) for job in
jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
job_results: list[Future] = [
executor.submit(process_job, job)
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]
]
job_list = [result.result() for result in job_results if result.result()]

View File

@ -6,7 +6,14 @@ from bs4 import BeautifulSoup
from bs4.element import Tag
from .. import Scraper, ScraperInput, Site
from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
from ...jobs import (
JobPost,
Location,
JobResponse,
JobType,
Compensation,
CompensationInterval,
)
class LinkedInScraper(Scraper):
@ -117,7 +124,9 @@ class LinkedInScraper(Scraper):
date_posted=date_posted,
job_url=job_url,
job_type=job_type,
compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
compensation=Compensation(
interval=CompensationInterval.YEARLY, currency="USD"
),
)
job_list.append(job_post)
if (

View File

@ -2,7 +2,7 @@ import math
import json
import re
from datetime import datetime
from typing import Optional, Tuple, List
from typing import Optional, Tuple
from urllib.parse import urlparse, parse_qs
import tls_client
@ -11,7 +11,14 @@ from bs4.element import Tag
from concurrent.futures import ThreadPoolExecutor, Future
from .. import Scraper, ScraperInput, Site, StatusException
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
from ...jobs import (
JobPost,
Compensation,
CompensationInterval,
Location,
JobResponse,
JobType,
)
class ZipRecruiterScraper(Scraper):
@ -55,7 +62,7 @@ class ZipRecruiterScraper(Scraper):
"search": scraper_input.search_term,
"location": scraper_input.location,
"page": page,
"form": "jobs-landing"
"form": "jobs-landing",
}
if scraper_input.is_remote:
@ -65,7 +72,9 @@ class ZipRecruiterScraper(Scraper):
params["radius"] = scraper_input.distance
if job_type_value:
params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
params[
"refine_by_employment"
] = f"employment_type:employment_type:{job_type_value}"
response = self.session.get(
self.url + "/jobs-search",
@ -90,11 +99,14 @@ class ZipRecruiterScraper(Scraper):
with ThreadPoolExecutor(max_workers=10) as executor:
if "jobList" in data and data["jobList"]:
jobs_js = data["jobList"]
job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
job_results = [
executor.submit(self.process_job_js, job) for job in jobs_js
]
else:
jobs_html = soup.find_all("div", {"class": "job_content"})
job_results = [executor.submit(self.process_job_html, job) for job in
jobs_html]
job_results = [
executor.submit(self.process_job_html, job) for job in jobs_html
]
job_list = [result.result() for result in job_results if result.result()]
@ -107,8 +119,9 @@ class ZipRecruiterScraper(Scraper):
:return: job_response
"""
pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
pages_to_process = max(
3, math.ceil(scraper_input.results_wanted / self.jobs_per_page)
)
try:
#: get first page to initialize session
@ -125,7 +138,6 @@ class ZipRecruiterScraper(Scraper):
job_list += jobs
except StatusException as e:
return JobResponse(
success=False,
@ -162,9 +174,7 @@ class ZipRecruiterScraper(Scraper):
title = job.find("h2", {"class": "title"}).text
company = job.find("a", {"class": "company_name"}).text.strip()
description, updated_job_url = self.get_description(
job_url
)
description, updated_job_url = self.get_description(job_url)
if updated_job_url is not None:
job_url = updated_job_url
if description is None:
@ -173,10 +183,7 @@ class ZipRecruiterScraper(Scraper):
job_type_element = job.find("li", {"class": "perk_item perk_type"})
if job_type_element:
job_type_text = (
job_type_element.text.strip()
.lower()
.replace("-", "")
.replace(" ", "")
job_type_element.text.strip().lower().replace("-", "").replace(" ", "")
)
if job_type_text == "contractor":
job_type_text = "contract"
@ -201,12 +208,16 @@ class ZipRecruiterScraper(Scraper):
def process_job_js(self, job: dict) -> JobPost:
# Map the job data to the expected fields by the Pydantic model
title = job.get("Title")
description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
description = BeautifulSoup(
job.get("Snippet", "").strip(), "html.parser"
).get_text()
company = job.get("OrgName")
location = Location(city=job.get("City"), state=job.get("State"))
try:
job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
job_type = ZipRecruiterScraper.job_type_from_string(
job.get("EmploymentType", "").replace("-", "_").lower()
)
except ValueError:
# print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
return None
@ -215,14 +226,14 @@ class ZipRecruiterScraper(Scraper):
salary_parts = formatted_salary.split(" ")
min_salary_str = salary_parts[0][1:].replace(",", "")
if '.' in min_salary_str:
if "." in min_salary_str:
min_amount = int(float(min_salary_str) * 1000)
else:
min_amount = int(min_salary_str.replace("K", "000"))
if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
max_salary_str = salary_parts[2][1:].replace(",", "")
if '.' in max_salary_str:
if "." in max_salary_str:
max_amount = int(float(max_salary_str) * 1000)
else:
max_amount = int(max_salary_str.replace("K", "000"))
@ -232,10 +243,12 @@ class ZipRecruiterScraper(Scraper):
compensation = Compensation(
interval=CompensationInterval.YEARLY,
min_amount=min_amount,
max_amount=max_amount
max_amount=max_amount,
)
save_job_url = job.get("SaveJobURL", "")
posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
posted_time_match = re.search(
r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url
)
if posted_time_match:
date_time_str = posted_time_match.group(1)
date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
@ -269,10 +282,7 @@ class ZipRecruiterScraper(Scraper):
return item
raise ValueError(f"Invalid value for JobType: {value}")
def get_description(
self,
job_page_url: str
) -> Tuple[Optional[str], Optional[str]]:
def get_description(self, job_page_url: str) -> Tuple[Optional[str], Optional[str]]:
"""
Retrieves job description by going to the job page url
:param job_page_url:

0
src/tests/__init__.py Normal file
View File

View File

@ -1,4 +1,4 @@
from jobspy import scrape_jobs
from ..jobspy import scrape_jobs
def test_indeed():