Library Migration (#31)

2026-03-05 03:54:31 -08:00 · 2023-09-03 07:29:25 -07:00
parent 7efece8fe9
commit 153ac35248
36 changed files with 3604 additions and 1473 deletions
--- a/.docker/config.json
+++ b/.docker/config.json
@@ -1,3 +0,0 @@
 {
  "experimental": "enabled"
 }
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -1,33 +0,0 @@
 name: Build and Push Docker Image
 on:
  push:
    branches:
      - main
 jobs:
  build:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v2
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v1
      - name: Login to GitHub Docker Registry
        uses: docker/login-action@v1
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.DOCKER_TOKEN }}
      - name: Build and Push Image
        uses: docker/build-push-action@v2
        with:
          context: .
          file: ./Dockerfile
          push: true
          tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest
          platforms: linux/amd64,linux/arm64
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@@ -0,0 +1,33 @@
 name: Publish Python 🐍 distributions 📦 to PyPI
 on: push
 jobs:
  build-n-publish:
    name: Build and publish Python 🐍 distributions 📦 to PyPI
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v3
    - name: Set up Python
      uses: actions/setup-python@v4
      with:
        python-version: "3.10"
    - name: Install poetry
      run: >-
        python3 -m
        pip install
        poetry
        --user
    - name: Build distribution 📦
      run: >-
        python3 -m
        poetry
        build
    - name: Publish distribution 📦 to PyPI
      if: startsWith(github.ref, 'refs/tags')
      uses: pypa/gh-action-pypi-publish@release/v1
      with:
        password: ${{ secrets.PYPI_API_TOKEN }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -1,89 +0,0 @@
 name: JobSpy API Tests
 on: [push, pull_request]
 jobs:
  test_api:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout repository
      uses: actions/checkout@v2
    - name: Set up Python 3.10
      uses: actions/setup-python@v2
      with:
        python-version: '3.10'
    - name: Install dependencies
      run: pip install -r requirements.txt
    - name: Install jq
      run: sudo apt-get install jq
    - name: Start JobSpy FastAPI app
      run: uvicorn main:app --host 0.0.0.0 --port 8000 &
    - name: Wait for server to be up
      run: |
        for i in {1..10}; do
          curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1
        done
    - name: Check health
      run: |
        health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health)
        if [ "$health_status" != "200" ]; then
          echo "Error: Health check failed with status code $health_status"
          exit 1
        fi
 # not checking currently because of bad ip at Github's servers being blocked
 #    - name: Check HTTP status to POST /api/v1/jobs/
 #      run: |
 #        response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
 #          "site_type": ["indeed", "linkedin"],
 #          "search_term": "software engineer",
 #          "location": "austin, tx",
 #          "distance": 10,
 #          "job_type": "fulltime",
 #          "results_wanted": 5
 #        }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
 #
 #        status_code="${response: -3}"
 #        echo "Received status code: $status_code"
 #
 #        if [ "$status_code" != "200" ]; then
 #          echo "Error: Expected status code 200, but got $status_code"
 #          exit 1
 #        fi
 #
 #        echo "${response::-3}" > response.json
 #        cat response.json
 #
 #    - name: Check error field in response
 #      run: |
 #        global_error=$(jq '.error' response.json)
 #        indeed_error=$(jq '.indeed.error' response.json)
 #        linkedin_error=$(jq '.linkedin.error' response.json)
 #
 #        if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
 #          echo "Error found in response:"
 #          echo "Global Error: $global_error"
 #          echo "Indeed Error: $indeed_error"
 #          echo "LinkedIn Error: $linkedin_error"
 #          exit 1
 #        fi
 #
 #    - name: Verify returned_results in response
 #      run: |
 #        indeed_results=$(jq '.indeed.returned_results' response.json)
 #        linkedin_results=$(jq '.linkedin.returned_results' response.json)
 #
 #        if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
 #          echo "Mismatch in results_wanted and returned_results:"
 #          echo "Indeed: Expected 5, Got $indeed_results"
 #          echo "LinkedIn: Expected 5, Got $linkedin_results"
 #          exit 1
 #        fi
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@
 **/__pycache__/
 *.pyc
 .env
-client_secret.json
+dist
 /.ipynb_checkpoints/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,13 +0,0 @@
 {
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Python: Module",
            "type": "python",
            "request": "launch",
            "module": "uvicorn",
            "args": ["main:app","--reload"]
        }
    ]
 }
--- a/15
+++ b/15
@@ -1,15 +0,0 @@
 FROM python:3.10-slim
 WORKDIR /app
 COPY . /app
 RUN apt-get update && \
    apt-get install -y jq && \
    pip install --no-cache-dir -r requirements.txt
 EXPOSE 8000
 ENV PORT=8000
 CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"
--- a/JobSpy_Demo.ipynb
+++ b/JobSpy_Demo.ipynb
@@ -0,0 +1,702 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c3f21577-477d-451e-9914-5d67e8a89075",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>site</th>\n",
       "      <th>title</th>\n",
       "      <th>company_name</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>job_type</th>\n",
       "      <th>interval</th>\n",
       "      <th>min_amount</th>\n",
       "      <th>max_amount</th>\n",
       "      <th>job_url</th>\n",
       "      <th>description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Firmware Engineer</td>\n",
       "      <td>Advanced Motion Controls</td>\n",
       "      <td>Camarillo</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>145000</td>\n",
       "      <td>110000</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n",
       "      <td>We are looking for an experienced Firmware Eng...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Computer Engineer</td>\n",
       "      <td>Honeywell</td>\n",
       "      <td></td>\n",
       "      <td>None</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=5a1da623ee75...</td>\n",
       "      <td>Join a team recognized for leadership, innovat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Splunk</td>\n",
       "      <td>Remote</td>\n",
       "      <td>None</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>159500</td>\n",
       "      <td>116000</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
       "      <td>A little about us. Splunk is the key to enterp...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Development Operations Engineer</td>\n",
       "      <td>Stratacache</td>\n",
       "      <td>Dayton</td>\n",
       "      <td>OH</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>90000</td>\n",
       "      <td>83573</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
       "      <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Computer Engineer</td>\n",
       "      <td>Honeywell</td>\n",
       "      <td></td>\n",
       "      <td>None</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
       "      <td>Join a team recognized for leadership, innovat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Full Stack Developer</td>\n",
       "      <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
       "      <td>Herndon</td>\n",
       "      <td>VA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
       "      <td>Job Highlights As a Full Stack Software Engine...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Workiva</td>\n",
       "      <td>Remote</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>yearly</td>\n",
       "      <td>134000</td>\n",
       "      <td>79000</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
       "      <td>Are you ready to embark on an exciting journey...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Senior Software Engineer</td>\n",
       "      <td>SciTec</td>\n",
       "      <td>Boulder</td>\n",
       "      <td>CO</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>164000</td>\n",
       "      <td>93000</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
       "      <td>SciTec has been awarded multiple government co...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Microsoft</td>\n",
       "      <td></td>\n",
       "      <td>None</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>182600</td>\n",
       "      <td>94300</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n",
       "      <td>At Microsoft we are seeking people who have a ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>indeed</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Avalon Healthcare Solutions</td>\n",
       "      <td>Remote</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n",
       "      <td>Avalon Healthcare Solutions, headquartered in ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Fieldguide</td>\n",
       "      <td>San Francisco</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3696158160</td>\n",
       "      <td>About us:Fieldguide is establishing a new stat...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Sunnyvale</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3693012711</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Edwards</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3700669785</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Fort Worth</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Fort Worth</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Fort Worth</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>SpiderOak</td>\n",
       "      <td>Austin</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3707174719</td>\n",
       "      <td>We're only as strong as our weakest link.In th...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer - Early Career</td>\n",
       "      <td>Lockheed Martin</td>\n",
       "      <td>Fort Worth</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
       "      <td>Description:By bringing together people that u...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Full-Stack Software Engineer</td>\n",
       "      <td>Rain</td>\n",
       "      <td>New York</td>\n",
       "      <td>NY</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3696158877</td>\n",
       "      <td>Rain’s mission is to create the fastest and ea...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>linkedin</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Nike</td>\n",
       "      <td>Portland</td>\n",
       "      <td>OR</td>\n",
       "      <td>contract</td>\n",
       "      <td>yearly</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://www.linkedin.com/jobs/view/3693340247</td>\n",
       "      <td>Work options: FlexibleWe consider remote, on-p...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
       "      <td>Walmart</td>\n",
       "      <td>Dallas</td>\n",
       "      <td>TX</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&amp;...</td>\n",
       "      <td>We are currently seeking a highly skilled and ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Engineer - New Grad</td>\n",
       "      <td>ZipRecruiter</td>\n",
       "      <td>Santa Monica</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>130000</td>\n",
       "      <td>150000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
       "      <td>We offer a hybrid work environment. Most US-ba...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Developer</td>\n",
       "      <td>Robert Half</td>\n",
       "      <td>Corpus Christi</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>105000</td>\n",
       "      <td>115000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
       "      <td>Robert Half has an opening for a Software Deve...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Engineer</td>\n",
       "      <td>Advantage Technical</td>\n",
       "      <td>Ontario</td>\n",
       "      <td>CA</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>100000</td>\n",
       "      <td>150000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
       "      <td>New career opportunity available with major Ma...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Developer</td>\n",
       "      <td>Robert Half</td>\n",
       "      <td>Tucson</td>\n",
       "      <td>AZ</td>\n",
       "      <td>temporary</td>\n",
       "      <td>hourly</td>\n",
       "      <td>47</td>\n",
       "      <td>55</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
       "      <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Full Stack Software Engineer</td>\n",
       "      <td>ZipRecruiter</td>\n",
       "      <td>Phoenix</td>\n",
       "      <td>AZ</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>105000</td>\n",
       "      <td>145000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
       "      <td>We offer a hybrid work environment. Most US-ba...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Developer IV</td>\n",
       "      <td>Kforce Inc.</td>\n",
       "      <td>Mountain View</td>\n",
       "      <td>CA</td>\n",
       "      <td>contract</td>\n",
       "      <td>hourly</td>\n",
       "      <td>55</td>\n",
       "      <td>75</td>\n",
       "      <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
       "      <td>Kforce has a client that is seeking a Software...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
       "      <td>OneStaff Medical</td>\n",
       "      <td>Omaha</td>\n",
       "      <td>NE</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>60000</td>\n",
       "      <td>110000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n",
       "      <td>Company Description: We are looking for a well...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Senior Software Engineer</td>\n",
       "      <td>RightStaff, Inc.</td>\n",
       "      <td>Dallas</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>120000</td>\n",
       "      <td>180000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n",
       "      <td>Job Description:We are seeking a talented and ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>zip_recruiter</td>\n",
       "      <td>Software Developer - .Net Core - 12886</td>\n",
       "      <td>Walker Elliott</td>\n",
       "      <td>Dallas</td>\n",
       "      <td>TX</td>\n",
       "      <td>fulltime</td>\n",
       "      <td>yearly</td>\n",
       "      <td>105000</td>\n",
       "      <td>130000</td>\n",
       "      <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n",
       "      <td>Our highly successful DFW based client has bee...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             site                                              title  \\\n",
       "0          indeed                                  Firmware Engineer   \n",
       "1          indeed                                  Computer Engineer   \n",
       "2          indeed                                  Software Engineer   \n",
       "3          indeed                    Development Operations Engineer   \n",
       "4          indeed                                  Computer Engineer   \n",
       "5          indeed                               Full Stack Developer   \n",
       "6          indeed                                  Software Engineer   \n",
       "7          indeed                           Senior Software Engineer   \n",
       "8          indeed                                  Software Engineer   \n",
       "9          indeed                                  Software Engineer   \n",
       "10       linkedin                                  Software Engineer   \n",
       "11       linkedin                   Software Engineer - Early Career   \n",
       "12       linkedin                   Software Engineer - Early Career   \n",
       "13       linkedin                   Software Engineer - Early Career   \n",
       "14       linkedin                   Software Engineer - Early Career   \n",
       "15       linkedin                   Software Engineer - Early Career   \n",
       "16       linkedin                                  Software Engineer   \n",
       "17       linkedin                   Software Engineer - Early Career   \n",
       "18       linkedin                       Full-Stack Software Engineer   \n",
       "19       linkedin                                  Software Engineer   \n",
       "20  zip_recruiter  (USA) Software Engineer III - Prototype Engine...   \n",
       "21  zip_recruiter                       Software Engineer - New Grad   \n",
       "22  zip_recruiter                                 Software Developer   \n",
       "23  zip_recruiter                                  Software Engineer   \n",
       "24  zip_recruiter                                 Software Developer   \n",
       "25  zip_recruiter                       Full Stack Software Engineer   \n",
       "26  zip_recruiter                              Software Developer IV   \n",
       "27  zip_recruiter    Software Developer | Onsite | Omaha, NE - Omaha   \n",
       "28  zip_recruiter                           Senior Software Engineer   \n",
       "29  zip_recruiter             Software Developer - .Net Core - 12886   \n",
       "\n",
       "                          company_name            city state   job_type  \\\n",
       "0             Advanced Motion Controls       Camarillo    CA   fulltime   \n",
       "1                            Honeywell                  None   fulltime   \n",
       "2                               Splunk          Remote  None   fulltime   \n",
       "3                          Stratacache          Dayton    OH   fulltime   \n",
       "4                            Honeywell                  None   fulltime   \n",
       "5   Reinventing Geospatial, Inc. (RGi)         Herndon    VA   fulltime   \n",
       "6                              Workiva          Remote  None       None   \n",
       "7                               SciTec         Boulder    CO   fulltime   \n",
       "8                            Microsoft                  None   fulltime   \n",
       "9          Avalon Healthcare Solutions          Remote  None       None   \n",
       "10                          Fieldguide   San Francisco    CA   fulltime   \n",
       "11                     Lockheed Martin       Sunnyvale    CA   fulltime   \n",
       "12                     Lockheed Martin         Edwards    CA   fulltime   \n",
       "13                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
       "14                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
       "15                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
       "16                           SpiderOak          Austin    TX   fulltime   \n",
       "17                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
       "18                                Rain        New York    NY   fulltime   \n",
       "19                                Nike        Portland    OR   contract   \n",
       "20                             Walmart          Dallas    TX       None   \n",
       "21                        ZipRecruiter    Santa Monica    CA   fulltime   \n",
       "22                         Robert Half  Corpus Christi    TX   fulltime   \n",
       "23                 Advantage Technical         Ontario    CA   fulltime   \n",
       "24                         Robert Half          Tucson    AZ  temporary   \n",
       "25                        ZipRecruiter         Phoenix    AZ   fulltime   \n",
       "26                         Kforce Inc.   Mountain View    CA   contract   \n",
       "27                    OneStaff Medical           Omaha    NE   fulltime   \n",
       "28                    RightStaff, Inc.          Dallas    TX   fulltime   \n",
       "29                      Walker Elliott          Dallas    TX   fulltime   \n",
       "\n",
       "   interval min_amount max_amount  \\\n",
       "0    yearly     145000     110000   \n",
       "1      None       None       None   \n",
       "2    yearly     159500     116000   \n",
       "3    yearly      90000      83573   \n",
       "4      None       None       None   \n",
       "5      None       None       None   \n",
       "6    yearly     134000      79000   \n",
       "7    yearly     164000      93000   \n",
       "8    yearly     182600      94300   \n",
       "9      None       None       None   \n",
       "10   yearly       None       None   \n",
       "11   yearly       None       None   \n",
       "12   yearly       None       None   \n",
       "13   yearly       None       None   \n",
       "14   yearly       None       None   \n",
       "15   yearly       None       None   \n",
       "16   yearly       None       None   \n",
       "17   yearly       None       None   \n",
       "18   yearly       None       None   \n",
       "19   yearly       None       None   \n",
       "20     None       None       None   \n",
       "21   yearly     130000     150000   \n",
       "22   yearly     105000     115000   \n",
       "23   yearly     100000     150000   \n",
       "24   hourly         47         55   \n",
       "25   yearly     105000     145000   \n",
       "26   hourly         55         75   \n",
       "27   yearly      60000     110000   \n",
       "28   yearly     120000     180000   \n",
       "29   yearly     105000     130000   \n",
       "\n",
       "                                              job_url  \\\n",
       "0   https://www.indeed.com/viewjob?jk=a2e7077fdd3c...   \n",
       "1   https://www.indeed.com/viewjob?jk=5a1da623ee75...   \n",
       "2   https://www.indeed.com/viewjob?jk=155495ca3f46...   \n",
       "3   https://www.indeed.com/viewjob?jk=77cf3540c06e...   \n",
       "4   https://www.indeed.com/viewjob?jk=7fadbb7c936f...   \n",
       "5   https://www.indeed.com/viewjob?jk=11b2b5b0dd44...   \n",
       "6   https://www.indeed.com/viewjob?jk=ec3ab6eb9253...   \n",
       "7   https://www.indeed.com/viewjob?jk=781e4cf0cf6d...   \n",
       "8   https://www.indeed.com/viewjob?jk=21e05b9e9d96...   \n",
       "9   https://www.indeed.com/viewjob?jk=da35b9bb74a0...   \n",
       "10      https://www.linkedin.com/jobs/view/3696158160   \n",
       "11      https://www.linkedin.com/jobs/view/3693012711   \n",
       "12      https://www.linkedin.com/jobs/view/3700669785   \n",
       "13      https://www.linkedin.com/jobs/view/3701775201   \n",
       "14      https://www.linkedin.com/jobs/view/3701772329   \n",
       "15      https://www.linkedin.com/jobs/view/3701769637   \n",
       "16      https://www.linkedin.com/jobs/view/3707174719   \n",
       "17      https://www.linkedin.com/jobs/view/3701770659   \n",
       "18      https://www.linkedin.com/jobs/view/3696158877   \n",
       "19      https://www.linkedin.com/jobs/view/3693340247   \n",
       "20  https://click.appcast.io/track/hcgsw4k?cs=ngp&...   \n",
       "21  https://www.ziprecruiter.com/jobs/ziprecruiter...   \n",
       "22  https://www.ziprecruiter.com/jobs/robert-half-...   \n",
       "23  https://www.ziprecruiter.com/jobs/advantage-te...   \n",
       "24  https://www.ziprecruiter.com/jobs/robert-half-...   \n",
       "25  https://www.ziprecruiter.com/jobs/ziprecruiter...   \n",
       "26  https://www.kforce.com/Jobs/job.aspx?job=1696~...   \n",
       "27  https://www.ziprecruiter.com/jobs/onestaff-med...   \n",
       "28  https://www.ziprecruiter.com/jobs/rightstaff-i...   \n",
       "29  https://www.ziprecruiter.com/jobs/walker-ellio...   \n",
       "\n",
       "                                          description  \n",
       "0   We are looking for an experienced Firmware Eng...  \n",
       "1   Join a team recognized for leadership, innovat...  \n",
       "2   A little about us. Splunk is the key to enterp...  \n",
       "3   Stratacache, Inc. delivers in-store retail exp...  \n",
       "4   Join a team recognized for leadership, innovat...  \n",
       "5   Job Highlights As a Full Stack Software Engine...  \n",
       "6   Are you ready to embark on an exciting journey...  \n",
       "7   SciTec has been awarded multiple government co...  \n",
       "8   At Microsoft we are seeking people who have a ...  \n",
       "9   Avalon Healthcare Solutions, headquartered in ...  \n",
       "10  About us:Fieldguide is establishing a new stat...  \n",
       "11  Description:By bringing together people that u...  \n",
       "12  Description:By bringing together people that u...  \n",
       "13  Description:By bringing together people that u...  \n",
       "14  Description:By bringing together people that u...  \n",
       "15  Description:By bringing together people that u...  \n",
       "16  We're only as strong as our weakest link.In th...  \n",
       "17  Description:By bringing together people that u...  \n",
       "18  Rain’s mission is to create the fastest and ea...  \n",
       "19  Work options: FlexibleWe consider remote, on-p...  \n",
       "20  We are currently seeking a highly skilled and ...  \n",
       "21  We offer a hybrid work environment. Most US-ba...  \n",
       "22  Robert Half has an opening for a Software Deve...  \n",
       "23  New career opportunity available with major Ma...  \n",
       "24  Robert Half is accepting inquiries for a SQL S...  \n",
       "25  We offer a hybrid work environment. Most US-ba...  \n",
       "26  Kforce has a client that is seeking a Software...  \n",
       "27  Company Description: We are looking for a well...  \n",
       "28  Job Description:We are seeking a talented and ...  \n",
       "29  Our highly successful DFW based client has bee...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from jobscrape import scrape_jobs\n",
    "import pandas as pd\n",
    "\n",
    "jobs: pd.DataFrame = scrape_jobs(\n",
    "    site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
    "    search_term=\"software engineer\",\n",
    "    results_wanted=10\n",
    ")\n",
    "\n",
    "if jobs.empty:\n",
    "    print(\"No jobs found.\")\n",
    "else:\n",
    "\n",
    "    #1 print\n",
    "    pd.set_option('display.max_columns', None)\n",
    "    pd.set_option('display.max_rows', None)\n",
    "    pd.set_option('display.width', None)\n",
    "    pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc\n",
    "    print(jobs)\n",
    "\n",
    "    #2 display in Jupyter Notebook\n",
    "    display(jobs)\n",
    "\n",
    "    #3 output to csv\n",
    "    jobs.to_csv('jobs.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "my-poetry-env",
   "language": "python",
   "name": "my-poetry-env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/README.md
+++ b/README.md
@@ -1,240 +1,100 @@
-# JobSpy AIO Scraper
+# JobSpy
 **JobSpy** is a simple, yet comprehensive, job scraping library.
 ## Features
 - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Returns jobs as JSON or CSV with title, location, company, description & other data
+- Aggregates the job postings in a Pandas DataFrame
 - Imports directly into **Google Sheets**
 - Optional JWT authorization
-![jobspy_gsheet](https://github.com/cullenwatson/JobSpy/assets/78247585/9f0a997c-4e33-4167-b04e-31ab1f606edb)
+### Installation
 `pip install jobscrape`  
  _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ 
 ### Usage
 ```python
 from jobscrape import scrape_jobs
 import pandas as pd
 jobs: pd.DataFrame = scrape_jobs(
    site_name=["indeed", "linkedin", "zip_recruiter"],
    search_term="software engineer",
    results_wanted=10
 )
 if jobs.empty:
    print("No jobs found.")
 else:
    #1 print
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc
    print(jobs)
    #2 display in Jupyter Notebook
    display(jobs)
    #3 output to csv
    jobs.to_csv('jobs.csv', index=False)
 ```
 ### Output
 ```
             site                                              title                    company_name                 city state   job_type interval min_amount max_amount                                            job_url                                        description
           indeed                                  Software Engineer                AMERICAN SYSTEMS            Arlington    VA       None   yearly     200000     150000  https://www.indeed.com/viewjob?jk=5e409e577046...  THIS POSITION COMES WITH A 10K SIGNING BONUS! ...
           indeed                           Senior Software Engineer                TherapyNotes.com         Philadelphia    PA   fulltime   yearly     135000     110000  https://www.indeed.com/viewjob?jk=da39574a40cb...  About Us TherapyNotes is the national leader i...
         linkedin                   Software Engineer - Early Career                 Lockheed Martin            Sunnyvale    CA   fulltime   yearly       None       None      https://www.linkedin.com/jobs/view/3693012711  Description:By bringing together people that u...
         linkedin                       Full-Stack Software Engineer                            Rain             New York    NY   fulltime   yearly       None       None      https://www.linkedin.com/jobs/view/3696158877  Rain’s mission is to create the fastest and ea...
    zip_recruiter                       Software Engineer - New Grad                    ZipRecruiter         Santa Monica    CA   fulltime   yearly     130000     150000  https://www.ziprecruiter.com/jobs/ziprecruiter...  We offer a hybrid work environment. Most US-ba...
    zip_recruiter                                 Software Developer                      TEKsystems              Phoenix    AZ   fulltime   hourly         65         75  https://www.ziprecruiter.com/jobs/teksystems-0...  Top Skills' Details• 6 years of Java developme.```
 ```
 ### Parameters for `scrape_jobs()`
 ### API
 POST `/api/v1/jobs/`
 ### Request Schema
 ```plaintext
 Required
 ├── site_type (List[enum]): linkedin, zip_recruiter, indeed
 └── search_term (str)
 Optional
 ├── location (int)
-├── distance (int)
+├── distance (int): in miles
 ├── job_type (enum): fulltime, parttime, internship, contract
 ├── is_remote (bool)
-├── results_wanted (int): per site_type
+├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
-├── easy_apply (bool): only for linkedin
+├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
 └── output_format (enum): json, csv, gsheet
 ```
 ### Request Example
 ```json
 "site_type": ["indeed", "linkedin"],
 "search_term": "software engineer",
 "location": "austin, tx",
 "distance": 10,
 "job_type": "fulltime",
 "results_wanted": 15
 "output_format": "gsheet"
 ```
 ### Response Schema
 ```plaintext
-site_type (enum): 
+JobPost
-JobResponse
+├── title (str)
-├── success (bool)
+├── company_name (str)
-├── error (str)
+├── job_url (str)
-├── jobs (List[JobPost])
+├── location (object)
-│   └── JobPost
+│   ├── country (str)
-│       ├── title (str)
+│   ├── city (str)
-│       ├── company_name (str)
+│   ├── state (str)
-│       ├── job_url (str)
+├── description (str)
-│       ├── location (object)
+├── job_type (enum)
-│       │   ├── country (str)
+├── compensation (object)
-│       │   ├── city (str)
+│   ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
-│       │   ├── state (str)
+│   ├── min_amount (float)
-│       ├── description (str)
+│   ├── max_amount (float)
-│       ├── job_type (enum)
+│   └── currency (str)
-│       ├── compensation (object)
+└── date_posted (datetime)
-│       │   ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
+
 │       │   ├── min_amount (float)
 │       │   ├── max_amount (float)
 │       │   └── currency (str)
 │       └── date_posted (datetime)
 │
 ├── total_results (int)
 └── returned_results (int) 
 ```
 ### Response Example (GOOGLE SHEETS)
 ```json
 {
    "status": "Successfully uploaded to Google Sheets",
    "error": null,
    "linkedin": null,
    "indeed": null,
    "zip_recruiter": null
 }
 ```
 ### Response Example (JSON)
 ```json
 {
    "indeed": {
        "success": true,
        "error": null,
        "jobs": [
            {
                "title": "Software Engineer",
                "company_name": "INTEL",
                "job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228",
                "location": {
                    "country": "USA",
                    "city": "Austin",
                    "state": "TX",
                },
                "description": "Job Description Designs, develops, tests, and debugs..."
                "job_type": "fulltime",
                "compensation": {
                    "interval": "yearly",
                    "min_amount": 209760.0,
                    "max_amount": 139480.0,
                    "currency": "USD"
                },
                "date_posted": "2023-08-18T00:00:00"
            }, ...
        ],
        "total_results": 845,
        "returned_results": 15
    },
    "linkedin": {
        "success": true,
        "error": null,
        "jobs": [
            {
                "title": "Software Engineer 1",
                "company_name": "Public Partnerships | PPL",
                "job_url": "https://www.linkedin.com/jobs/view/3690013792",
                "location": {
                    "country": "USA",
                    "city": "Austin",
                    "state": "TX",
                },
                "description": "Public Partnerships LLC supports individuals with disabilities..."
                "job_type": null,
                "compensation": null,
                "date_posted": "2023-07-31T00:00:00"
            }, ...
        ],
        "total_results": 2000,
        "returned_results": 15
    }
 }
 ```
 ### Response Example (CSV)
 ```
 Site, Title, Company Name, Job URL, Country, City, State, Job Type, Compensation Interval, Min Amount, Max Amount, Currency, Date Posted, Description
 indeed, Software Engineer, INTEL, https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228, USA, Austin, TX, fulltime, yearly, 209760.0, 139480.0, USD, 2023-08-18T00:00:00, Job Description Designs...
 linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.com/jobs/view/3690013792, USA, Austin, TX, , , , , , 2023-07-31T00:00:00, Public Partnerships LLC supports...
 ```
 ## Installation
 ### Docker Setup
 _Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_
-[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry.
+### FAQ
-To pull the Docker image:
+#### Encountering issues with your queries?
-```bash
+Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
 docker pull ghcr.io/cullenwatson/jobspy:latest
 ```
-#### Params
+#### Received a response code 429?
-  
+This means you've been blocked by the job board site for sending too many requests. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
 By default:
 * Port: `8000`
 * Google sheet name: `JobSpy`
 * Relative path of `client_secret.json` (for Google Sheets, see below to obtain)
 To run the image with these default settings, use:
 Example (Cmd Prompt - Windows):
 ```bash
 docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
 ```
 Example (Unix):
 ```bash
 docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
 ```
 #### Using custom params
  Example: 
   * Port: `8030`
   * Google sheet name: `CustomName`
   * Absolute path of `client_secret.json`: `C:\config\client_secret.json`
  To pass these custom params:
 ```bash
 docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=CustomName -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
 ```
 ### Python installation (alternative to Docker)
 _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_  
 1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
 2. Install the dependencies with `pip install -r requirements.txt`
 4. Run the server with `uvicorn main:app --reload`
 ### Google Sheets Setup
 #### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
  * Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
  * Create credentials -> service account -> create & continue
  * Select role -> basic: editor -> done
  * Click on the email you just created in the service account list
  * Go to the Keys tab -> add key -> create new key -> JSON -> Create
 #### Using the key in the repo
  * Copy the key file into the JobSpy repo as `client_secret.json`
  * Go to [my template sheet](https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing): File -> Make a Copy -> Rename to JobSpy
  * Share the Google sheet with the email located in the field `client_email` in the `client_secret.json` above with editor rights
  * If you changed the name of the sheet:
    - Python install: add `.env` in the repo and add `GSHEET_NAME` param with the sheet name as the value, e.g. `GSHEET_NAME=CustomName`
    - Docker install: use custom param `-e GSHEET_NAME=CustomName` in `docker run` (see above)
 ### How to call the API
 #### [Postman](https://www.postman.com/downloads/) (preferred):
 To use Postman:
 1. Locate the files in the `/postman/` directory.
 2. Import the Postman collection and environment JSON files.
 #### Swagger UI:
 Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
 ## FAQ
 ### I'm having issues with my queries. What should I do?
 Try reducing the number of `results_wanted` and/or broadening the filters. If issues still persist, feel free to submit an issue. 
 ### I'm getting response code 429. What should I do?
 You have been blocked by the job board site for sending too many requests. Wait a couple seconds or use a VPN.
 ### How to enable auth?
 Change `AUTH_REQUIRED` in `/settings.py` to `True`
 The auth uses [supabase](https://supabase.com). Create a project with a `users` table and disable RLS.  
 <img src="https://github.com/cullenwatson/jobspy/assets/78247585/03af18e1-5386-49ad-a2cf-d34232d9d747" width="500">
 Add these three environment variables:
 - `SUPABASE_URL`: go to project settings -> API -> Project URL  
 - `SUPABASE_KEY`: go to project settings -> API -> service_role secret
 - `JWT_SECRET_KEY` - type `openssl rand -hex 32` in terminal to create a 32 byte secret key
 Use these endpoints to register and get an access token: 
 ![image](https://github.com/cullenwatson/jobspy/assets/78247585/c84c33ec-1fe8-4152-9c8c-6c4334aecfc3)
--- a/api/init.py
+++ b/api/init.py
@@ -1,9 +0,0 @@
 from fastapi import APIRouter
 from api.auth import router as auth_router
 from .v1 import router as v1_router
 router = APIRouter(
    prefix="/api",
 )
 router.include_router(v1_router)
 router.include_router(auth_router)
--- a/api/auth/init.py
+++ b/api/auth/init.py
@@ -1,8 +0,0 @@
 from fastapi import APIRouter
 from api.auth.token import router as token_router
 from api.auth.register import router as register_router
 router = APIRouter(prefix="/auth", tags=["auth"])
 router.include_router(token_router)
 router.include_router(register_router)
--- a/api/auth/auth_utils.py
+++ b/api/auth/auth_utils.py
@@ -1,65 +0,0 @@
 from datetime import datetime, timedelta
 from jose import jwt, JWTError
 from fastapi import HTTPException, status, Depends
 from fastapi.security import OAuth2PasswordBearer
 from api.core.users import TokenData
 from api.auth.db_utils import UserInDB, get_user
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
 def create_access_token(data: dict) -> str:
    """
    Creates a JWT token based on the data provided.
    :param data
    :return: encoded_jwt
    """
    to_encode = data.copy()
    expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
    to_encode.update({"exp": expire})
    encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
    return encoded_jwt
 async def get_current_user(token: str = Depends(oauth2_scheme)):
    """
    Returns the current user associated with the provided JWT token.
    :param token
    :raises HTTPException: If the token is invalid or the user does not exist.
    :return: The UserInDB instance associated with the token.
    """
    credential_exception = HTTPException(
        status_code=status.HTTP_401_UNAUTHORIZED,
        detail="Could not validate credentials",
        headers={"WWW-Authenticate": "Bearer"},
    )
    try:
        payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
        username: str = payload.get("sub")
        if username is None:
            raise credential_exception
        token_data = TokenData(username=username)
    except JWTError:
        raise credential_exception
    current_user = get_user(token_data.username)
    if current_user is None:
        raise credential_exception
    return current_user
 async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
    """
    Returns the current user if the user account is active.
    :param current_user: A UserInDB instance representing the current user.
    :raises HTTPException: If the user account is inactive.
    :return: The UserInDB instance if the user account is active.
    """
    if current_user.disabled:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
        )
    return current_user
--- a/api/auth/db_utils.py
+++ b/api/auth/db_utils.py
@@ -1,89 +0,0 @@
 from typing import Optional, Union
 from passlib.context import CryptContext
 from supabase_py import create_client, Client
 from fastapi import HTTPException, status
 from api.core.users import UserInDB
 from settings import SUPABASE_URL, SUPABASE_KEY
 pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
 if SUPABASE_URL:
    supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
 def create_user(user_create: UserInDB):
    """
    Creates a new user record in the 'users' table in Supabase.
    :param user_create: The data of the user to be created.
    :raises HTTPException: If an error occurs while creating the user.
    :return: The result of the insert operation.
    """
    result = supabase.table("users").insert(user_create.dict()).execute()
    print(f"Insert result: {result}")
    if "error" in result and result["error"]:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"User could not be created due to {result['error']['message']}",
        )
    return result
 def get_user(username: str) -> Optional[UserInDB]:
    """
    Retrieves a user from the 'users' table by their username.
    :param username: The username of the user to retrieve.
    :return: The user data if found, otherwise None.
    """
    result = supabase.table("users").select().eq("username", username).execute()
    if "error" in result and result["error"]:
        print(f"Error: {result['error']['message']}")
        return None
    else:
        if result["data"]:
            user_data = result["data"][0]
            return UserInDB(**user_data)
        else:
            return None
 def verify_password(password: str, hashed_password: str) -> bool:
    """
    Verifies a password against a hashed password using the bcrypt hashing algorithm.
    :param password: The plaintext password to verify.
    :param hashed_password: The hashed password to compare against.
    :return: True if the password matches the hashed password, otherwise False.
    """
    return pwd_context.verify(password, hashed_password)
 def get_password_hash(password: str) -> str:
    """
    Hashes a password using the bcrypt hashing algorithm.
    :param password: The plaintext password to hash.
    :return: The hashed password
    """
    return pwd_context.hash(password)
 def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
    """
    Authenticates a user based on their username and password.
    :param username: The username of the user to authenticate.
    :param password: The plaintext password to authenticate.
    :return: The authenticated user if the username and password are correct, otherwise False.
    """
    user = get_user(username)
    if not user:
        return False
    if not verify_password(password, user.hashed_password):
        return False
    return user
--- a/api/auth/register/init.py
+++ b/api/auth/register/init.py
@@ -1,33 +0,0 @@
 from fastapi import APIRouter, HTTPException, status
 from api.core.users import UserCreate, UserInDB
 from api.auth.db_utils import get_user, get_password_hash, create_user
 router = APIRouter(prefix="/register")
@router.post("/", response_model=dict)
 async def register_new_user(user: UserCreate) -> dict:
    """
    Creates new user
    :param user:
    :raises HTTPException: If the username already exists.
    :return: A dictionary containing a detail key with a success message.
    """
    existing_user = get_user(user.username)
    if existing_user is not None:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Username already exists",
        )
    hashed_password = get_password_hash(user.password)
    user_create = UserInDB(
        username=user.username,
        email=user.email,
        full_name=user.full_name,
        hashed_password=hashed_password,
        disabled=False,
    )
    create_user(user_create)
    return {"detail": "User created successfully"}
--- a/api/auth/token/init.py
+++ b/api/auth/token/init.py
@@ -1,30 +0,0 @@
 from fastapi import APIRouter, Depends, HTTPException, status
 from fastapi.security import OAuth2PasswordRequestForm
 from api.core.users import Token
 from api.auth.db_utils import authenticate_user
 from api.auth.auth_utils import create_access_token
 router = APIRouter(prefix="/token")
@router.post("/", response_model=Token)
 async def login_for_access_token(
    form_data: OAuth2PasswordRequestForm = Depends(),
 ) -> Token:
    """
    Authenticates a user and provides an access token.
    :param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
    :raises HTTPException: If the user cannot be authenticated.
    :return: A Token object containing the access token and the token type.
    """
    user = authenticate_user(form_data.username, form_data.password)
    if not user:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Incorrect username or password",
            headers={"WWW-Authenticate": "Bearer"},
        )
    access_token = create_access_token(data={"sub": user.username})
    return Token(access_token=access_token, token_type="bearer")
--- a/api/core/formatters/init.py
+++ b/api/core/formatters/init.py
@@ -1,7 +0,0 @@
 from enum import Enum
 class OutputFormat(Enum):
    CSV = "csv"
    JSON = "json"
    GSHEET = "gsheet"
--- a/api/core/formatters/csv/init.py
+++ b/api/core/formatters/csv/init.py
@@ -1,133 +0,0 @@
 import gspread
 from oauth2client.service_account import ServiceAccountCredentials
 import csv
 from io import StringIO
 from datetime import datetime
 from ...jobs import *
 from ...scrapers import *
 from settings import *
 class CSVFormatter:
    @staticmethod
    def fetch_job_urls(credentials: Any) -> set:
        """
        Fetches all the job urls from the google sheet to prevent duplicates
        :param credentials:
        :return: urls
        """
        try:
            gc = gspread.authorize(credentials)
            sh = gc.open(GSHEET_NAME)
            worksheet = sh.get_worksheet(0)
            data = worksheet.get_all_values()
            job_urls = set()
            for row in data[1:]:
                job_urls.add(row[3])
            return job_urls
        except Exception as e:
            raise e
    @staticmethod
    def upload_to_google_sheet(csv_data: str):
        """
        Appends rows to google sheet
        :param csv_data:
        :return:
        """
        try:
            scope = [
                "https://www.googleapis.com/auth/spreadsheets",
                "https://www.googleapis.com/auth/drive.file",
                "https://www.googleapis.com/auth/drive",
            ]
            credentials = ServiceAccountCredentials.from_json_keyfile_name(
                "client_secret.json", scope
            )
            gc = gspread.authorize(credentials)
            sh = gc.open(GSHEET_NAME)
            worksheet = sh.get_worksheet(0)
            data_string = csv_data.getvalue()
            reader = csv.reader(StringIO(data_string))
            job_urls = CSVFormatter.fetch_job_urls(credentials)
            rows = list(reader)
            for i, row in enumerate(rows):
                if i == 0:
                    continue
                if row[4] in job_urls:
                    continue
                row[6] = format(int(row[6]), ",d") if row[6] else ""
                row[7] = format(int(row[7]), ",d") if row[7] else ""
                worksheet.append_row(row)
        except Exception as e:
            raise e
    @staticmethod
    def generate_filename() -> str:
        """
        Adds a timestamp to the filename header
        :return: filename
        """
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        return f"JobSpy_results_{timestamp}.csv"
    @staticmethod
    def format(jobs: CommonResponse) -> StringIO:
        """
        Transfomr the jobs objects into csv
        :param jobs:
        :return: csv
        """
        output = StringIO()
        writer = csv.writer(output)
        headers = [
            "Title",
            "Company Name",
            "City",
            "State",
            "Job Type",
            "Pay Cycle",
            "Min Amount",
            "Max Amount",
            "Date Posted",
            "Description",
            "Job URL",
        ]
        writer.writerow(headers)
        for site, job_response in jobs.dict().items():
            if isinstance(job_response, dict) and job_response.get("success"):
                for job in job_response["jobs"]:
                    writer.writerow(
                        [
                            job["title"],
                            job["company_name"],
                            job["location"]["city"],
                            job["location"]["state"],
                            job["job_type"].value if job.get("job_type") else "",
                            job["compensation"]["interval"].value
                            if job["compensation"]
                            else "",
                            job["compensation"]["min_amount"]
                            if job["compensation"]
                            else "",
                            job["compensation"]["max_amount"]
                            if job["compensation"]
                            else "",
                            job.get("date_posted", ""),
                            job["description"],
                            job["job_url"],
                        ]
                    )
        output.seek(0)
        return output
--- a/api/core/users/init.py
+++ b/api/core/users/init.py
@@ -1,28 +0,0 @@
 from pydantic import BaseModel
 class User(BaseModel):
    username: str
    full_name: str
    email: str
    disabled: bool = False
 class UserCreate(BaseModel):
    username: str
    full_name: str
    email: str
    password: str
 class UserInDB(User):
    hashed_password: str
 class TokenData(BaseModel):
    username: str
 class Token(BaseModel):
    access_token: str
    token_type: str
--- a/api/v1/init.py
+++ b/api/v1/init.py
@@ -1,11 +0,0 @@
 from fastapi import APIRouter, Depends
 from .jobs import router as jobs_router
 from api.auth.auth_utils import get_active_current_user
 from settings import AUTH_REQUIRED
 if AUTH_REQUIRED:
    router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
 else:
    router = APIRouter(prefix="/v1")
 router.include_router(jobs_router)
--- a/api/v1/jobs/init.py
+++ b/api/v1/jobs/init.py
@@ -1,68 +0,0 @@
 import io
 from fastapi import APIRouter
 from fastapi.responses import StreamingResponse
 from concurrent.futures import ThreadPoolExecutor
 from api.core.scrapers.indeed import IndeedScraper
 from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
 from api.core.scrapers.linkedin import LinkedInScraper
 from api.core.formatters.csv import CSVFormatter
 from api.core.scrapers import (
    ScraperInput,
    Site,
    JobResponse,
    OutputFormat,
    CommonResponse,
 )
 from typing import List, Dict, Tuple, Union
 router = APIRouter(prefix="/jobs", tags=["jobs"])
 SCRAPER_MAPPING = {
    Site.LINKEDIN: LinkedInScraper,
    Site.INDEED: IndeedScraper,
    Site.ZIP_RECRUITER: ZipRecruiterScraper,
 }
@router.post("/")
 async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
    """
    Asynchronously scrapes job data from multiple job sites.
    :param scraper_input:
    :return: scraper_response
    """
    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
        scraper_class = SCRAPER_MAPPING[site]
        scraper = scraper_class()
        scraped_data: JobResponse = scraper.scrape(scraper_input)
        return (site.value, scraped_data)
    with ThreadPoolExecutor(max_workers=3) as executor:
        results = dict(executor.map(scrape_site, scraper_input.site_type))
    scraper_response = CommonResponse(status="JSON response success", **results)
    if scraper_input.output_format == OutputFormat.CSV:
        csv_output = CSVFormatter.format(scraper_response)
        response = StreamingResponse(csv_output, media_type="text/csv")
        response.headers[
            "Content-Disposition"
        ] = f"attachment; filename={CSVFormatter.generate_filename()}"
        return response
    elif scraper_input.output_format == OutputFormat.GSHEET:
        csv_output = CSVFormatter.format(scraper_response)
        try:
            CSVFormatter.upload_to_google_sheet(csv_output)
            return CommonResponse(
                status="Successfully uploaded to Google Sheets", **results
            )
        except Exception as e:
            return CommonResponse(
                status="Failed to upload to Google Sheet", error=repr(e), **results
            )
    else:
        return scraper_response
--- a/jobscrape/init.py
+++ b/jobscrape/init.py
@@ -0,0 +1,121 @@
 import pandas as pd
 from typing import List, Dict, Tuple, Union
 from concurrent.futures import ThreadPoolExecutor
 from .core.jobs import JobType
 from .core.scrapers.indeed import IndeedScraper
 from .core.scrapers.ziprecruiter import ZipRecruiterScraper
 from .core.scrapers.linkedin import LinkedInScraper
 from .core.scrapers import (
    ScraperInput,
    Site,
    JobResponse,
    CommonResponse,
 )
 SCRAPER_MAPPING = {
    Site.LINKEDIN: LinkedInScraper,
    Site.INDEED: IndeedScraper,
    Site.ZIP_RECRUITER: ZipRecruiterScraper,
 }
 def _map_str_to_site(site_name: str) -> Site:
    return Site[site_name.upper()]
 def scrape_jobs(
        site_name: str | Site | List[Site],
        search_term: str,
        location: str = "",
        distance: int = None,
        is_remote: bool = False,
        job_type: JobType = None,
        easy_apply: bool = False,  # linkedin
        results_wanted: int = 15
 ) -> pd.DataFrame:
    """
    Asynchronously scrapes job data from multiple job sites.
    :return: results_wanted: pandas dataframe containing job data
    """
    if type(site_name) == str:
        site_name = _map_str_to_site(site_name)
    site_type = [site_name] if type(site_name) == Site else site_name
    scraper_input = ScraperInput(
        site_type=site_type,
        search_term=search_term,
        location=location,
        distance=distance,
        is_remote=is_remote,
        job_type=job_type,
        easy_apply=easy_apply,
        results_wanted=results_wanted,
    )
    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
        scraper_class = SCRAPER_MAPPING[site]
        scraper = scraper_class()
        scraped_data: JobResponse = scraper.scrape(scraper_input)
        return site.value, scraped_data
    results = {}
    for site in scraper_input.site_type:
        site_value, scraped_data = scrape_site(site)
        results[site_value] = scraped_data
    dfs = []
    for site, job_response in results.items():
        for job in job_response.jobs:
            data = job.dict()
            data['site'] = site
            # Formatting JobType
            data['job_type'] = data['job_type'].value if data['job_type'] else None
            # Formatting Location
            location_obj = data.get('location')
            if location_obj and isinstance(location_obj, dict):
                data['city'] = location_obj.get('city', '')
                data['state'] = location_obj.get('state', '')
                data['country'] = location_obj.get('country', 'USA')
            else:
                data['city'] = None
                data['state'] = None
                data['country'] = None
            # Formatting Compensation
            compensation_obj = data.get('compensation')
            if compensation_obj and isinstance(compensation_obj, dict):
                data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
                data['min_amount'] = compensation_obj.get('min_amount')
                data['max_amount'] = compensation_obj.get('max_amount')
                data['currency'] = compensation_obj.get('currency', 'USD')
            else:
                data['interval'] = None
                data['min_amount'] = None
                data['max_amount'] = None
                data['currency'] = None
            job_df = pd.DataFrame([data])
            dfs.append(job_df)
    if dfs:
        df = pd.concat(dfs, ignore_index=True)
        desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
                         'interval', 'min_amount', 'max_amount',  'job_url', 'description',]
        df = df[desired_order]
    else:
        df = pd.DataFrame()
    return df
--- a/jobscrape/core/init.py
+++ b/jobscrape/core/init.py
--- a/jobscrape/core/jobs/init.py
+++ b/jobscrape/core/jobs/init.py
@@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Optional
 from datetime import date
 from enum import Enum
@@ -19,10 +19,11 @@ class JobType(Enum):
    VOLUNTEER = "volunteer"
 class Location(BaseModel):
    country: str = "USA"
    city: str = None
-    state: str = None
+    state: Optional[str] = None
 class CompensationInterval(Enum):
@@ -35,8 +36,8 @@ class CompensationInterval(Enum):
 class Compensation(BaseModel):
    interval: CompensationInterval
-    min_amount: int
+    min_amount: int = None
-    max_amount: int
+    max_amount: int = None
    currency: str = "USD"
@@ -44,11 +45,11 @@ class JobPost(BaseModel):
    title: str
    company_name: str
    job_url: str
-    location: Location
+    location: Optional[Location]
    description: str = None
-    job_type: JobType = None
+    job_type: Optional[JobType] = None
-    compensation: Compensation = None
+    compensation: Optional[Compensation] = None
    date_posted: date = None
@@ -56,7 +57,7 @@ class JobResponse(BaseModel):
    success: bool
    error: str = None
-    total_results: int = None
+    total_results: Optional[int] = None
    jobs: list[JobPost] = []
@@ -64,6 +65,11 @@ class JobResponse(BaseModel):
    @validator("returned_results", pre=True, always=True)
    def set_returned_results(cls, v, values):
-        if v is None and values.get("jobs"):
+        jobs_list = values.get("jobs")
-            return len(values["jobs"])
+
        if v is None:
            if jobs_list is not None:
                return len(jobs_list)
            else:
                return 0
        return v
--- a/jobscrape/core/scrapers/init.py
+++ b/jobscrape/core/scrapers/init.py
@@ -1,5 +1,4 @@
-from ..jobs import *
+from ..jobs import Enum, BaseModel, JobType, JobResponse
 from ..formatters import OutputFormat
 from typing import List, Dict, Optional, Any
@@ -17,12 +16,11 @@ class Site(Enum):
 class ScraperInput(BaseModel):
    site_type: List[Site]
    search_term: str
    output_format: OutputFormat = OutputFormat.JSON
    location: str = None
-    distance: int = None
+    distance: Optional[int] = None
    is_remote: bool = False
-    job_type: JobType = None
+    job_type: Optional[JobType] = None
    easy_apply: bool = None  # linkedin
    results_wanted: int = 15
--- a/jobscrape/core/scrapers/indeed/init.py
+++ b/jobscrape/core/scrapers/indeed/init.py
@@ -1,22 +1,18 @@
 import re
 import sys
 import math
 import json
 from typing import Optional, Tuple, List
 from datetime import datetime
 from typing import Optional, Tuple, List
 import tls_client
 import urllib.parse
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from fastapi import status
 from api.core.jobs import *
 from api.core.jobs import JobPost
 from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
 from concurrent.futures import ThreadPoolExecutor, Future
-import math
+
-import traceback
+from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
-import sys
+from .. import Scraper, ScraperInput, Site, StatusException
 class ParsingException(Exception):
@@ -66,8 +62,8 @@ class IndeedScraper(Scraper):
        response = session.get(self.url + "/jobs", params=params)
        if (
-            response.status_code != status.HTTP_200_OK
+            response.status_code != 200
-            and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
+            and response.status_code != 307
        ):
            raise StatusException(response.status_code)
@@ -131,7 +127,6 @@ class IndeedScraper(Scraper):
                location=Location(
                    city=job.get("jobLocationCity"),
                    state=job.get("jobLocationState"),
                    postal_code=job.get("jobLocationPostal"),
                ),
                job_type=job_type,
                compensation=compensation,
@@ -140,9 +135,11 @@ class IndeedScraper(Scraper):
            )
            return job_post
-        for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]:
+        with ThreadPoolExecutor(max_workers=10) as executor:
-            job_post = process_job(job)
+            job_results: list[Future] = [executor.submit(process_job, job) for job in
-            job_list.append(job_post)
+                                         jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
        job_list = [result.result() for result in job_results if result.result()]
        return job_list, total_num_jobs
--- a/jobscrape/core/scrapers/linkedin/init.py
+++ b/jobscrape/core/scrapers/linkedin/init.py
@@ -4,10 +4,9 @@ from datetime import datetime
 import requests
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from fastapi import status
-from api.core.scrapers import Scraper, ScraperInput, Site
+from .. import Scraper, ScraperInput, Site
-from api.core.jobs import *
+from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
 class LinkedInScraper(Scraper):
@@ -59,7 +58,7 @@ class LinkedInScraper(Scraper):
                    f"{self.url}/jobs/search", params=params, allow_redirects=True
                )
-                if response.status_code != status.HTTP_200_OK:
+                if response.status_code != 200:
                    return JobResponse(
                        success=False,
                        error=f"Response returned {response.status_code}",
@@ -118,6 +117,7 @@ class LinkedInScraper(Scraper):
                        date_posted=date_posted,
                        job_url=job_url,
                        job_type=job_type,
                        compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
                    )
                    job_list.append(job_post)
                    if (
@@ -185,7 +185,6 @@ class LinkedInScraper(Scraper):
                    employment_type = employment_type_span.get_text(strip=True)
                    employment_type = employment_type.lower()
                    employment_type = employment_type.replace("-", "")
                    print(employment_type)
            return JobType(employment_type)
--- a/jobscrape/core/scrapers/ziprecruiter/init.py
+++ b/jobscrape/core/scrapers/ziprecruiter/init.py
@@ -1,18 +1,17 @@
 import math
 import json
 import re
 from datetime import datetime
 from typing import Optional, Tuple, List
 from urllib.parse import urlparse, parse_qs
 import tls_client
 from fastapi import status
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from concurrent.futures import ThreadPoolExecutor, Future
-from api.core.jobs import JobPost
+from .. import Scraper, ScraperInput, Site, StatusException
-from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
+from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
 from api.core.jobs import *
 class ZipRecruiterScraper(Scraper):
@@ -26,9 +25,12 @@ class ZipRecruiterScraper(Scraper):
        self.jobs_per_page = 20
        self.seen_urls = set()
        self.session = tls_client.Session(
            client_identifier="chrome112", random_tls_extension_order=True
        )
    def scrape_page(
-        self, scraper_input: ScraperInput, page: int, session: tls_client.Session
+        self, scraper_input: ScraperInput, page: int
    ) -> tuple[list[JobPost], int | None]:
        """
        Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
@@ -52,91 +54,47 @@ class ZipRecruiterScraper(Scraper):
        params = {
            "search": scraper_input.search_term,
            "location": scraper_input.location,
            "radius": scraper_input.distance,
            "refine_by_location_type": "only_remote"
            if scraper_input.is_remote
            else None,
            "refine_by_employment": f"employment_type:employment_type:{job_type_value}"
            if job_type_value
            else None,
            "page": page,
            "form": "jobs-landing"
        }
-        response = session.get(
+        if scraper_input.is_remote:
            params["refine_by_location_type"] = "only_remote"
        if scraper_input.distance:
            params["radius"] = scraper_input.distance
        if job_type_value:
            params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
        response = self.session.get(
            self.url + "/jobs-search",
            headers=ZipRecruiterScraper.headers(),
            params=params,
        )
-        if response.status_code != status.HTTP_200_OK:
+        if response.status_code != 200:
            raise StatusException(response.status_code)
-        html_string = response.content
+        html_string = response.text
        soup = BeautifulSoup(html_string, "html.parser")
-        if page == 1:
+        script_tag = soup.find("script", {"id": "js_variables"})
-            script_tag = soup.find("script", {"id": "js_variables"})
+        data = json.loads(script_tag.string)
            data = json.loads(script_tag.string)
        if page == 1:
            job_count = int(data["totalJobCount"].replace(",", ""))
        else:
            job_count = None
        job_posts = soup.find_all("div", {"class": "job_content"})
        def process_job(job: Tag) -> Optional[JobPost]:
            """
            Parses a job from the job content tag
            :param job: BeautifulSoup Tag for one job post
            :return JobPost
            """
            job_url = job.find("a", {"class": "job_link"})["href"]
            if job_url in self.seen_urls:
                return None
            title = job.find("h2", {"class": "title"}).text
            company = job.find("a", {"class": "company_name"}).text.strip()
            description, updated_job_url = ZipRecruiterScraper.get_description(
                job_url, session
            )
            if updated_job_url is not None:
                job_url = updated_job_url
            if description is None:
                description = job.find("p", {"class": "job_snippet"}).text.strip()
            job_type_element = job.find("li", {"class": "perk_item perk_type"})
            if job_type_element:
                job_type_text = (
                    job_type_element.text.strip()
                    .lower()
                    .replace("-", "")
                    .replace(" ", "")
                )
                if job_type_text == "contractor":
                    job_type_text = "contract"
                job_type = JobType(job_type_text)
            else:
                job_type = None
            date_posted = ZipRecruiterScraper.get_date_posted(job)
            job_post = JobPost(
                title=title,
                description=description,
                company_name=company,
                location=ZipRecruiterScraper.get_location(job),
                job_type=job_type,
                compensation=ZipRecruiterScraper.get_compensation(job),
                date_posted=date_posted,
                job_url=job_url,
            )
            return job_post
        with ThreadPoolExecutor(max_workers=10) as executor:
-            job_results: list[Future] = [
+            if "jobList" in data and data["jobList"]:
-                executor.submit(process_job, job) for job in job_posts
+                jobs_js = data["jobList"]
-            ]
+                job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
            else:
                jobs_html = soup.find_all("div", {"class": "job_content"})
                job_results = [executor.submit(self.process_job_html, job) for job in
                               jobs_html]
        job_list = [result.result() for result in job_results if result.result()]
@@ -148,19 +106,17 @@ class ZipRecruiterScraper(Scraper):
        :param scraper_input:
        :return: job_response
        """
        session = tls_client.Session(
            client_identifier="chrome112", random_tls_extension_order=True
        )
-        pages_to_process = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
+
        pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
        try:
            #: get first page to initialize session
-            job_list, total_results = self.scrape_page(scraper_input, 1, session)
+            job_list, total_results = self.scrape_page(scraper_input, 1)
            with ThreadPoolExecutor(max_workers=10) as executor:
                futures: list[Future] = [
-                    executor.submit(self.scrape_page, scraper_input, page, session)
+                    executor.submit(self.scrape_page, scraper_input, page)
                    for page in range(2, pages_to_process + 1)
                ]
@@ -169,6 +125,7 @@ class ZipRecruiterScraper(Scraper):
                    job_list += jobs
        except StatusException as e:
            return JobResponse(
                success=False,
@@ -192,9 +149,129 @@ class ZipRecruiterScraper(Scraper):
        )
        return job_response
    def process_job_html(self, job: Tag) -> Optional[JobPost]:
        """
        Parses a job from the job content tag
        :param job: BeautifulSoup Tag for one job post
        :return JobPost
        """
        job_url = job.find("a", {"class": "job_link"})["href"]
        if job_url in self.seen_urls:
            return None
        title = job.find("h2", {"class": "title"}).text
        company = job.find("a", {"class": "company_name"}).text.strip()
        description, updated_job_url = self.get_description(
            job_url
        )
        if updated_job_url is not None:
            job_url = updated_job_url
        if description is None:
            description = job.find("p", {"class": "job_snippet"}).text.strip()
        job_type_element = job.find("li", {"class": "perk_item perk_type"})
        if job_type_element:
            job_type_text = (
                job_type_element.text.strip()
                .lower()
                .replace("-", "")
                .replace(" ", "")
            )
            if job_type_text == "contractor":
                job_type_text = "contract"
            job_type = JobType(job_type_text)
        else:
            job_type = None
        date_posted = ZipRecruiterScraper.get_date_posted(job)
        job_post = JobPost(
            title=title,
            description=description,
            company_name=company,
            location=ZipRecruiterScraper.get_location(job),
            job_type=job_type,
            compensation=ZipRecruiterScraper.get_compensation(job),
            date_posted=date_posted,
            job_url=job_url,
        )
        return job_post
    def process_job_js(self, job: dict) -> JobPost:
        # Map the job data to the expected fields by the Pydantic model
        title = job.get("Title")
        description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
        company = job.get("OrgName")
        location = Location(city=job.get("City"), state=job.get("State"))
        try:
            job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
        except ValueError:
            # print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
            return None
        formatted_salary = job.get("FormattedSalaryShort", "")
        salary_parts = formatted_salary.split(" ")
        min_salary_str = salary_parts[0][1:].replace(",", "")
        if '.' in min_salary_str:
            min_amount = int(float(min_salary_str) * 1000)
        else:
            min_amount = int(min_salary_str.replace("K", "000"))
        if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
            max_salary_str = salary_parts[2][1:].replace(",", "")
            if '.' in max_salary_str:
                max_amount = int(float(max_salary_str) * 1000)
            else:
                max_amount = int(max_salary_str.replace("K", "000"))
        else:
            max_amount = 0
        compensation = Compensation(
            interval=CompensationInterval.YEARLY,
            min_amount=min_amount,
            max_amount=max_amount
        )
        save_job_url = job.get("SaveJobURL", "")
        posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
        if posted_time_match:
            date_time_str = posted_time_match.group(1)
            date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
            date_posted = date_posted_obj.date()
        else:
            date_posted = date.today()
        job_url = job.get("JobURL")
        return JobPost(
            title=title,
            description=description,
            company_name=company,
            location=location,
            job_type=job_type,
            compensation=compensation,
            date_posted=date_posted,
            job_url=job_url,
        )
        return job_post
    @staticmethod
    def job_type_from_string(value: str) -> Optional[JobType]:
        if not value:
            return None
        if value.lower() == "contractor":
            value = "contract"
        normalized_value = value.replace("_", "")
        for item in JobType:
            if item.value == normalized_value:
                return item
        raise ValueError(f"Invalid value for JobType: {value}")
    def get_description(
-        job_page_url: str, session: tls_client.Session
+            self,
        job_page_url: str
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Retrieves job description by going to the job page url
@@ -202,7 +279,7 @@ class ZipRecruiterScraper(Scraper):
        :param session:
        :return: description or None, response url
        """
-        response = session.get(
+        response = self.session.get(
            job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
        )
        if response.status_code not in range(200, 400):
--- a/main.py
+++ b/main.py
@@ -1,16 +0,0 @@
 from fastapi import FastAPI
 from supabase_py import create_client, Client
 from api import router as api_router
 app = FastAPI(
    title="JobSpy Backend",
    description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
    version="1.0.0",
 )
 app.include_router(api_router)
@app.get("/health", tags=["health"])
 async def health_check():
    return {"message": "JobSpy ready to scrape"}
--- a/poetry.lock
+++ b/poetry.lock
--- a/postman/JobSpy.postman_collection.json
+++ b/postman/JobSpy.postman_collection.json
--- a/postman/JobSpy.postman_environment.json
+++ b/postman/JobSpy.postman_environment.json
@@ -1,15 +0,0 @@
 {
 	"id": "a7ea6d58-8dca-4216-97a9-224dadc1e18f",
 	"name": "JobSpy",
 	"values": [
 		{
 			"key": "access_token",
 			"value": "",
 			"type": "any",
 			"enabled": true
 		}
 	],
 	"_postman_variable_scope": "environment",
 	"_postman_exported_at": "2023-07-09T23:51:36.709Z",
 	"_postman_exported_using": "Postman/10.15.8"
 }
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
 [tool.poetry]
 name = "jobscrape"
 version = "0.1.0"
 description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
 authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>", "Cullen Watson <cullen@cullen.ai>"]
 readme = "README.md"
 [tool.poetry.dependencies]
 python = "^3.10"
 requests = "^2.31.0"
 tls-client = "^0.2.1"
 beautifulsoup4 = "^4.12.2"
 pandas = "^2.1.0"
 pydantic = "^2.3.0"
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.1"
 jupyter = "^1.0.0"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,61 +0,0 @@
 anyio==3.7.1
 atomicwrites==1.4.1
 attrs==23.1.0
 bcrypt==4.0.1
 beautifulsoup4==4.12.2
 cachetools==5.3.1
 certifi==2023.5.7
 cffi==1.15.1
 chardet==4.0.0
 charset-normalizer==3.2.0
 click==8.1.4
 colorama==0.4.6
 cryptography==41.0.1
 dataclasses==0.6
 deprecation==2.1.0
 ecdsa==0.18.0
 exceptiongroup==1.1.2
 fastapi==0.99.1
 google-auth==2.22.0
 google-auth-oauthlib==1.0.0
 gotrue==0.2.0
 gspread==5.10.0
 h11==0.14.0
 httpcore==0.12.3
 httplib2==0.22.0
 httpx==0.16.1
 idna==2.10
 iniconfig==2.0.0
 oauth2client==4.1.3
 oauthlib==3.2.2
 packaging==23.1
 passlib==1.7.4
 pluggy==1.2.0
 postgrest-py==0.4.0
 py==1.11.0
 pyasn1==0.5.0
 pyasn1-modules==0.3.0
 pycparser==2.21
 pydantic==1.10.11
 pyparsing==3.1.1
 pytest==6.2.5
 python-dateutil==2.8.2
 python-dotenv==1.0.0
 python-jose==3.3.0
 python-multipart==0.0.6
 realtime-py==0.1.3
 requests==2.25.1
 requests-oauthlib==1.3.1
 rfc3986==1.5.0
 rsa==4.9
 six==1.16.0
 sniffio==1.3.0
 soupsieve==2.4.1
 starlette==0.27.0
 supabase-py==0.0.2
 tls-client==0.2.1
 toml==0.10.2
 typing_extensions==4.7.1
 urllib3==1.26.16
 uvicorn==0.22.0
 websockets==9.1
--- a/settings.py
+++ b/settings.py
@@ -1,14 +0,0 @@
 from dotenv import load_dotenv
 import os
 load_dotenv()
 # gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
 GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
 # optional autha
 AUTH_REQUIRED = False
 SUPABASE_URL = os.environ.get("SUPABASE_URL")
 SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
 JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
 ACCESS_TOKEN_EXPIRE_MINUTES = 60
 ALGORITHM = "HS256"
--- a/tests/test_indeed.py
+++ b/tests/test_indeed.py
@@ -0,0 +1,10 @@
 from jobscrape import scrape_jobs
 def test_indeed():
    result = scrape_jobs(
        site_name="indeed",
        search_term="software engineer",
    )
    assert result is not None
--- a/tests/test_ziprecruiter.py
+++ b/tests/test_ziprecruiter.py
@@ -0,0 +1,10 @@
 from jobscrape import scrape_jobs
 def test_ziprecruiter():
    result = scrape_jobs(
        site_name="zip_recruiter",
        search_term="software engineer",
    )
    assert result is not None