Library Migration (#31)

2023-09-03 07:29:25 -07:00 · 2023-09-03 07:29:25 -07:00 · 153ac35248
parent 7efece8fe9
commit 153ac35248
36 changed files with 3604 additions and 1473 deletions
--- a/.docker/config.json
+++ b/.docker/config.json
@ -1,3 +0,0 @@
-{
-  "experimental": "enabled"
-}
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -1,33 +0,0 @@
-name: Build and Push Docker Image
-
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Login to GitHub Docker Registry
-        uses: docker/login-action@v1
-        with:
-          registry: ghcr.io
-          username: ${{ github.actor }}
-          password: ${{ secrets.DOCKER_TOKEN }}
-
-      - name: Build and Push Image
-        uses: docker/build-push-action@v2
-        with:
-          context: .
-          file: ./Dockerfile
-          push: true
-          tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest
-          platforms: linux/amd64,linux/arm64
--- a/.github/workflows/publish-to-pypi.yml
+++ b/.github/workflows/publish-to-pypi.yml
@ -0,0 +1,33 @@
+name: Publish Python 🐍 distributions 📦 to PyPI
+on: push
+
+jobs:
+  build-n-publish:
+    name: Build and publish Python 🐍 distributions 📦 to PyPI
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.10"
+
+    - name: Install poetry
+      run: >-
+        python3 -m
+        pip install
+        poetry
+        --user
+
+    - name: Build distribution 📦
+      run: >-
+        python3 -m
+        poetry
+        build
+
+    - name: Publish distribution 📦 to PyPI
+      if: startsWith(github.ref, 'refs/tags')
+      uses: pypa/gh-action-pypi-publish@release/v1
+      with:
+        password: ${{ secrets.PYPI_API_TOKEN }}
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -1,89 +0,0 @@
-name: JobSpy API Tests
-
-on: [push, pull_request]
-
-jobs:
-  test_api:
-    runs-on: ubuntu-latest
-
-    steps:
-    - name: Checkout repository
-      uses: actions/checkout@v2
-
-    - name: Set up Python 3.10
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.10'
-
-    - name: Install dependencies
-      run: pip install -r requirements.txt
-
-    - name: Install jq
-      run: sudo apt-get install jq
-
-    - name: Start JobSpy FastAPI app
-      run: uvicorn main:app --host 0.0.0.0 --port 8000 &
-
-    - name: Wait for server to be up
-      run: |
-        for i in {1..10}; do
-          curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1
-        done
-
-    - name: Check health
-      run: |
-        health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health)
-        
-        if [ "$health_status" != "200" ]; then
-          echo "Error: Health check failed with status code $health_status"
-          exit 1
-        fi
-
-# not checking currently because of bad ip at Github's servers being blocked
-#    - name: Check HTTP status to POST /api/v1/jobs/
-#      run: |
-#        response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
-#          "site_type": ["indeed", "linkedin"],
-#          "search_term": "software engineer",
-#          "location": "austin, tx",
-#          "distance": 10,
-#          "job_type": "fulltime",
-#          "results_wanted": 5
-#        }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
-#
-#        status_code="${response: -3}"
-#        echo "Received status code: $status_code"
-#
-#        if [ "$status_code" != "200" ]; then
-#          echo "Error: Expected status code 200, but got $status_code"
-#          exit 1
-#        fi
-#
-#        echo "${response::-3}" > response.json
-#        cat response.json
-#
-#    - name: Check error field in response
-#      run: |
-#        global_error=$(jq '.error' response.json)
-#        indeed_error=$(jq '.indeed.error' response.json)
-#        linkedin_error=$(jq '.linkedin.error' response.json)
-#
-#        if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
-#          echo "Error found in response:"
-#          echo "Global Error: $global_error"
-#          echo "Indeed Error: $indeed_error"
-#          echo "LinkedIn Error: $linkedin_error"
-#          exit 1
-#        fi
-#
-#    - name: Verify returned_results in response
-#      run: |
-#        indeed_results=$(jq '.indeed.returned_results' response.json)
-#        linkedin_results=$(jq '.linkedin.returned_results' response.json)
-#
-#        if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
-#          echo "Mismatch in results_wanted and returned_results:"
-#          echo "Indeed: Expected 5, Got $indeed_results"
-#          echo "LinkedIn: Expected 5, Got $linkedin_results"
-#          exit 1
-#        fi
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,5 @@
 **/__pycache__/
 *.pyc
 .env
-client_secret.json
+dist
+/.ipynb_checkpoints/
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,13 +0,0 @@
-{
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Python: Module",
-            "type": "python",
-            "request": "launch",
-            "module": "uvicorn",
-            "args": ["main:app","--reload"]
-        }
-        
-    ]
-}
--- a/15
+++ b/15
@ -1,15 +0,0 @@
-FROM python:3.10-slim
-
-WORKDIR /app
-
-COPY . /app
-
-RUN apt-get update && \
-    apt-get install -y jq && \
-    pip install --no-cache-dir -r requirements.txt
-
-EXPOSE 8000
-
-ENV PORT=8000
-
-CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"
--- a/JobSpy_Demo.ipynb
+++ b/JobSpy_Demo.ipynb
@ -0,0 +1,702 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c3f21577-477d-451e-9914-5d67e8a89075",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>site</th>\n",
+       "      <th>title</th>\n",
+       "      <th>company_name</th>\n",
+       "      <th>city</th>\n",
+       "      <th>state</th>\n",
+       "      <th>job_type</th>\n",
+       "      <th>interval</th>\n",
+       "      <th>min_amount</th>\n",
+       "      <th>max_amount</th>\n",
+       "      <th>job_url</th>\n",
+       "      <th>description</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Firmware Engineer</td>\n",
+       "      <td>Advanced Motion Controls</td>\n",
+       "      <td>Camarillo</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>145000</td>\n",
+       "      <td>110000</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n",
+       "      <td>We are looking for an experienced Firmware Eng...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Computer Engineer</td>\n",
+       "      <td>Honeywell</td>\n",
+       "      <td></td>\n",
+       "      <td>None</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=5a1da623ee75...</td>\n",
+       "      <td>Join a team recognized for leadership, innovat...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Splunk</td>\n",
+       "      <td>Remote</td>\n",
+       "      <td>None</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>159500</td>\n",
+       "      <td>116000</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
+       "      <td>A little about us. Splunk is the key to enterp...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Development Operations Engineer</td>\n",
+       "      <td>Stratacache</td>\n",
+       "      <td>Dayton</td>\n",
+       "      <td>OH</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>90000</td>\n",
+       "      <td>83573</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
+       "      <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Computer Engineer</td>\n",
+       "      <td>Honeywell</td>\n",
+       "      <td></td>\n",
+       "      <td>None</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
+       "      <td>Join a team recognized for leadership, innovat...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Full Stack Developer</td>\n",
+       "      <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
+       "      <td>Herndon</td>\n",
+       "      <td>VA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
+       "      <td>Job Highlights As a Full Stack Software Engine...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Workiva</td>\n",
+       "      <td>Remote</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>134000</td>\n",
+       "      <td>79000</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
+       "      <td>Are you ready to embark on an exciting journey...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Senior Software Engineer</td>\n",
+       "      <td>SciTec</td>\n",
+       "      <td>Boulder</td>\n",
+       "      <td>CO</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>164000</td>\n",
+       "      <td>93000</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
+       "      <td>SciTec has been awarded multiple government co...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Microsoft</td>\n",
+       "      <td></td>\n",
+       "      <td>None</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>182600</td>\n",
+       "      <td>94300</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n",
+       "      <td>At Microsoft we are seeking people who have a ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>indeed</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Avalon Healthcare Solutions</td>\n",
+       "      <td>Remote</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n",
+       "      <td>Avalon Healthcare Solutions, headquartered in ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Fieldguide</td>\n",
+       "      <td>San Francisco</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3696158160</td>\n",
+       "      <td>About us:Fieldguide is establishing a new stat...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Sunnyvale</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3693012711</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Edwards</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3700669785</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Fort Worth</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Fort Worth</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Fort Worth</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>SpiderOak</td>\n",
+       "      <td>Austin</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3707174719</td>\n",
+       "      <td>We're only as strong as our weakest link.In th...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer - Early Career</td>\n",
+       "      <td>Lockheed Martin</td>\n",
+       "      <td>Fort Worth</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
+       "      <td>Description:By bringing together people that u...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Full-Stack Software Engineer</td>\n",
+       "      <td>Rain</td>\n",
+       "      <td>New York</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3696158877</td>\n",
+       "      <td>Rain’s mission is to create the fastest and ea...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>linkedin</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Nike</td>\n",
+       "      <td>Portland</td>\n",
+       "      <td>OR</td>\n",
+       "      <td>contract</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://www.linkedin.com/jobs/view/3693340247</td>\n",
+       "      <td>Work options: FlexibleWe consider remote, on-p...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
+       "      <td>Walmart</td>\n",
+       "      <td>Dallas</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "      <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&amp;...</td>\n",
+       "      <td>We are currently seeking a highly skilled and ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Engineer - New Grad</td>\n",
+       "      <td>ZipRecruiter</td>\n",
+       "      <td>Santa Monica</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>130000</td>\n",
+       "      <td>150000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
+       "      <td>We offer a hybrid work environment. Most US-ba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Developer</td>\n",
+       "      <td>Robert Half</td>\n",
+       "      <td>Corpus Christi</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>105000</td>\n",
+       "      <td>115000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
+       "      <td>Robert Half has an opening for a Software Deve...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Engineer</td>\n",
+       "      <td>Advantage Technical</td>\n",
+       "      <td>Ontario</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>100000</td>\n",
+       "      <td>150000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
+       "      <td>New career opportunity available with major Ma...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Developer</td>\n",
+       "      <td>Robert Half</td>\n",
+       "      <td>Tucson</td>\n",
+       "      <td>AZ</td>\n",
+       "      <td>temporary</td>\n",
+       "      <td>hourly</td>\n",
+       "      <td>47</td>\n",
+       "      <td>55</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
+       "      <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Full Stack Software Engineer</td>\n",
+       "      <td>ZipRecruiter</td>\n",
+       "      <td>Phoenix</td>\n",
+       "      <td>AZ</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>105000</td>\n",
+       "      <td>145000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
+       "      <td>We offer a hybrid work environment. Most US-ba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Developer IV</td>\n",
+       "      <td>Kforce Inc.</td>\n",
+       "      <td>Mountain View</td>\n",
+       "      <td>CA</td>\n",
+       "      <td>contract</td>\n",
+       "      <td>hourly</td>\n",
+       "      <td>55</td>\n",
+       "      <td>75</td>\n",
+       "      <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
+       "      <td>Kforce has a client that is seeking a Software...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
+       "      <td>OneStaff Medical</td>\n",
+       "      <td>Omaha</td>\n",
+       "      <td>NE</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>60000</td>\n",
+       "      <td>110000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n",
+       "      <td>Company Description: We are looking for a well...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Senior Software Engineer</td>\n",
+       "      <td>RightStaff, Inc.</td>\n",
+       "      <td>Dallas</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>120000</td>\n",
+       "      <td>180000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n",
+       "      <td>Job Description:We are seeking a talented and ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>zip_recruiter</td>\n",
+       "      <td>Software Developer - .Net Core - 12886</td>\n",
+       "      <td>Walker Elliott</td>\n",
+       "      <td>Dallas</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>fulltime</td>\n",
+       "      <td>yearly</td>\n",
+       "      <td>105000</td>\n",
+       "      <td>130000</td>\n",
+       "      <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n",
+       "      <td>Our highly successful DFW based client has bee...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             site                                              title  \\\n",
+       "0          indeed                                  Firmware Engineer   \n",
+       "1          indeed                                  Computer Engineer   \n",
+       "2          indeed                                  Software Engineer   \n",
+       "3          indeed                    Development Operations Engineer   \n",
+       "4          indeed                                  Computer Engineer   \n",
+       "5          indeed                               Full Stack Developer   \n",
+       "6          indeed                                  Software Engineer   \n",
+       "7          indeed                           Senior Software Engineer   \n",
+       "8          indeed                                  Software Engineer   \n",
+       "9          indeed                                  Software Engineer   \n",
+       "10       linkedin                                  Software Engineer   \n",
+       "11       linkedin                   Software Engineer - Early Career   \n",
+       "12       linkedin                   Software Engineer - Early Career   \n",
+       "13       linkedin                   Software Engineer - Early Career   \n",
+       "14       linkedin                   Software Engineer - Early Career   \n",
+       "15       linkedin                   Software Engineer - Early Career   \n",
+       "16       linkedin                                  Software Engineer   \n",
+       "17       linkedin                   Software Engineer - Early Career   \n",
+       "18       linkedin                       Full-Stack Software Engineer   \n",
+       "19       linkedin                                  Software Engineer   \n",
+       "20  zip_recruiter  (USA) Software Engineer III - Prototype Engine...   \n",
+       "21  zip_recruiter                       Software Engineer - New Grad   \n",
+       "22  zip_recruiter                                 Software Developer   \n",
+       "23  zip_recruiter                                  Software Engineer   \n",
+       "24  zip_recruiter                                 Software Developer   \n",
+       "25  zip_recruiter                       Full Stack Software Engineer   \n",
+       "26  zip_recruiter                              Software Developer IV   \n",
+       "27  zip_recruiter    Software Developer | Onsite | Omaha, NE - Omaha   \n",
+       "28  zip_recruiter                           Senior Software Engineer   \n",
+       "29  zip_recruiter             Software Developer - .Net Core - 12886   \n",
+       "\n",
+       "                          company_name            city state   job_type  \\\n",
+       "0             Advanced Motion Controls       Camarillo    CA   fulltime   \n",
+       "1                            Honeywell                  None   fulltime   \n",
+       "2                               Splunk          Remote  None   fulltime   \n",
+       "3                          Stratacache          Dayton    OH   fulltime   \n",
+       "4                            Honeywell                  None   fulltime   \n",
+       "5   Reinventing Geospatial, Inc. (RGi)         Herndon    VA   fulltime   \n",
+       "6                              Workiva          Remote  None       None   \n",
+       "7                               SciTec         Boulder    CO   fulltime   \n",
+       "8                            Microsoft                  None   fulltime   \n",
+       "9          Avalon Healthcare Solutions          Remote  None       None   \n",
+       "10                          Fieldguide   San Francisco    CA   fulltime   \n",
+       "11                     Lockheed Martin       Sunnyvale    CA   fulltime   \n",
+       "12                     Lockheed Martin         Edwards    CA   fulltime   \n",
+       "13                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
+       "14                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
+       "15                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
+       "16                           SpiderOak          Austin    TX   fulltime   \n",
+       "17                     Lockheed Martin      Fort Worth    TX   fulltime   \n",
+       "18                                Rain        New York    NY   fulltime   \n",
+       "19                                Nike        Portland    OR   contract   \n",
+       "20                             Walmart          Dallas    TX       None   \n",
+       "21                        ZipRecruiter    Santa Monica    CA   fulltime   \n",
+       "22                         Robert Half  Corpus Christi    TX   fulltime   \n",
+       "23                 Advantage Technical         Ontario    CA   fulltime   \n",
+       "24                         Robert Half          Tucson    AZ  temporary   \n",
+       "25                        ZipRecruiter         Phoenix    AZ   fulltime   \n",
+       "26                         Kforce Inc.   Mountain View    CA   contract   \n",
+       "27                    OneStaff Medical           Omaha    NE   fulltime   \n",
+       "28                    RightStaff, Inc.          Dallas    TX   fulltime   \n",
+       "29                      Walker Elliott          Dallas    TX   fulltime   \n",
+       "\n",
+       "   interval min_amount max_amount  \\\n",
+       "0    yearly     145000     110000   \n",
+       "1      None       None       None   \n",
+       "2    yearly     159500     116000   \n",
+       "3    yearly      90000      83573   \n",
+       "4      None       None       None   \n",
+       "5      None       None       None   \n",
+       "6    yearly     134000      79000   \n",
+       "7    yearly     164000      93000   \n",
+       "8    yearly     182600      94300   \n",
+       "9      None       None       None   \n",
+       "10   yearly       None       None   \n",
+       "11   yearly       None       None   \n",
+       "12   yearly       None       None   \n",
+       "13   yearly       None       None   \n",
+       "14   yearly       None       None   \n",
+       "15   yearly       None       None   \n",
+       "16   yearly       None       None   \n",
+       "17   yearly       None       None   \n",
+       "18   yearly       None       None   \n",
+       "19   yearly       None       None   \n",
+       "20     None       None       None   \n",
+       "21   yearly     130000     150000   \n",
+       "22   yearly     105000     115000   \n",
+       "23   yearly     100000     150000   \n",
+       "24   hourly         47         55   \n",
+       "25   yearly     105000     145000   \n",
+       "26   hourly         55         75   \n",
+       "27   yearly      60000     110000   \n",
+       "28   yearly     120000     180000   \n",
+       "29   yearly     105000     130000   \n",
+       "\n",
+       "                                              job_url  \\\n",
+       "0   https://www.indeed.com/viewjob?jk=a2e7077fdd3c...   \n",
+       "1   https://www.indeed.com/viewjob?jk=5a1da623ee75...   \n",
+       "2   https://www.indeed.com/viewjob?jk=155495ca3f46...   \n",
+       "3   https://www.indeed.com/viewjob?jk=77cf3540c06e...   \n",
+       "4   https://www.indeed.com/viewjob?jk=7fadbb7c936f...   \n",
+       "5   https://www.indeed.com/viewjob?jk=11b2b5b0dd44...   \n",
+       "6   https://www.indeed.com/viewjob?jk=ec3ab6eb9253...   \n",
+       "7   https://www.indeed.com/viewjob?jk=781e4cf0cf6d...   \n",
+       "8   https://www.indeed.com/viewjob?jk=21e05b9e9d96...   \n",
+       "9   https://www.indeed.com/viewjob?jk=da35b9bb74a0...   \n",
+       "10      https://www.linkedin.com/jobs/view/3696158160   \n",
+       "11      https://www.linkedin.com/jobs/view/3693012711   \n",
+       "12      https://www.linkedin.com/jobs/view/3700669785   \n",
+       "13      https://www.linkedin.com/jobs/view/3701775201   \n",
+       "14      https://www.linkedin.com/jobs/view/3701772329   \n",
+       "15      https://www.linkedin.com/jobs/view/3701769637   \n",
+       "16      https://www.linkedin.com/jobs/view/3707174719   \n",
+       "17      https://www.linkedin.com/jobs/view/3701770659   \n",
+       "18      https://www.linkedin.com/jobs/view/3696158877   \n",
+       "19      https://www.linkedin.com/jobs/view/3693340247   \n",
+       "20  https://click.appcast.io/track/hcgsw4k?cs=ngp&...   \n",
+       "21  https://www.ziprecruiter.com/jobs/ziprecruiter...   \n",
+       "22  https://www.ziprecruiter.com/jobs/robert-half-...   \n",
+       "23  https://www.ziprecruiter.com/jobs/advantage-te...   \n",
+       "24  https://www.ziprecruiter.com/jobs/robert-half-...   \n",
+       "25  https://www.ziprecruiter.com/jobs/ziprecruiter...   \n",
+       "26  https://www.kforce.com/Jobs/job.aspx?job=1696~...   \n",
+       "27  https://www.ziprecruiter.com/jobs/onestaff-med...   \n",
+       "28  https://www.ziprecruiter.com/jobs/rightstaff-i...   \n",
+       "29  https://www.ziprecruiter.com/jobs/walker-ellio...   \n",
+       "\n",
+       "                                          description  \n",
+       "0   We are looking for an experienced Firmware Eng...  \n",
+       "1   Join a team recognized for leadership, innovat...  \n",
+       "2   A little about us. Splunk is the key to enterp...  \n",
+       "3   Stratacache, Inc. delivers in-store retail exp...  \n",
+       "4   Join a team recognized for leadership, innovat...  \n",
+       "5   Job Highlights As a Full Stack Software Engine...  \n",
+       "6   Are you ready to embark on an exciting journey...  \n",
+       "7   SciTec has been awarded multiple government co...  \n",
+       "8   At Microsoft we are seeking people who have a ...  \n",
+       "9   Avalon Healthcare Solutions, headquartered in ...  \n",
+       "10  About us:Fieldguide is establishing a new stat...  \n",
+       "11  Description:By bringing together people that u...  \n",
+       "12  Description:By bringing together people that u...  \n",
+       "13  Description:By bringing together people that u...  \n",
+       "14  Description:By bringing together people that u...  \n",
+       "15  Description:By bringing together people that u...  \n",
+       "16  We're only as strong as our weakest link.In th...  \n",
+       "17  Description:By bringing together people that u...  \n",
+       "18  Rain’s mission is to create the fastest and ea...  \n",
+       "19  Work options: FlexibleWe consider remote, on-p...  \n",
+       "20  We are currently seeking a highly skilled and ...  \n",
+       "21  We offer a hybrid work environment. Most US-ba...  \n",
+       "22  Robert Half has an opening for a Software Deve...  \n",
+       "23  New career opportunity available with major Ma...  \n",
+       "24  Robert Half is accepting inquiries for a SQL S...  \n",
+       "25  We offer a hybrid work environment. Most US-ba...  \n",
+       "26  Kforce has a client that is seeking a Software...  \n",
+       "27  Company Description: We are looking for a well...  \n",
+       "28  Job Description:We are seeking a talented and ...  \n",
+       "29  Our highly successful DFW based client has bee...  "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from jobscrape import scrape_jobs\n",
+    "import pandas as pd\n",
+    "\n",
+    "jobs: pd.DataFrame = scrape_jobs(\n",
+    "    site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
+    "    search_term=\"software engineer\",\n",
+    "    results_wanted=10\n",
+    ")\n",
+    "\n",
+    "if jobs.empty:\n",
+    "    print(\"No jobs found.\")\n",
+    "else:\n",
+    "\n",
+    "    #1 print\n",
+    "    pd.set_option('display.max_columns', None)\n",
+    "    pd.set_option('display.max_rows', None)\n",
+    "    pd.set_option('display.width', None)\n",
+    "    pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc\n",
+    "    print(jobs)\n",
+    "\n",
+    "    #2 display in Jupyter Notebook\n",
+    "    display(jobs)\n",
+    "\n",
+    "    #3 output to csv\n",
+    "    jobs.to_csv('jobs.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "my-poetry-env",
+   "language": "python",
+   "name": "my-poetry-env"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/README.md
+++ b/README.md
@ -1,240 +1,100 @@
-# JobSpy AIO Scraper
+# JobSpy

+**JobSpy** is a simple, yet comprehensive, job scraping library.
 ## Features

 - Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
- Returns jobs as JSON or CSV with title, location, company, description & other data
- Imports directly into **Google Sheets**
- Optional JWT authorization
+- Aggregates the job postings in a Pandas DataFrame

-![jobspy_gsheet](https://github.com/cullenwatson/JobSpy/assets/78247585/9f0a997c-4e33-4167-b04e-31ab1f606edb)
+### Installation
+`pip install jobscrape`  
+  
+  _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_ 
+
+### Usage
+
+```python
+from jobscrape import scrape_jobs
+import pandas as pd
+
+jobs: pd.DataFrame = scrape_jobs(
+    site_name=["indeed", "linkedin", "zip_recruiter"],
+    search_term="software engineer",
+    results_wanted=10
+)
+
+if jobs.empty:
+    print("No jobs found.")
+else:
+
+    #1 print
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.width', None)
+    pd.set_option('display.max_colwidth', 50)  # set to 0 to see full job url / desc
+    print(jobs)
+
+    #2 display in Jupyter Notebook
+    display(jobs)
+    
+    #3 output to csv
+    jobs.to_csv('jobs.csv', index=False)
+```
+
+### Output
+```
+             site                                              title                    company_name                 city state   job_type interval min_amount max_amount                                            job_url                                        description
+           indeed                                  Software Engineer                AMERICAN SYSTEMS            Arlington    VA       None   yearly     200000     150000  https://www.indeed.com/viewjob?jk=5e409e577046...  THIS POSITION COMES WITH A 10K SIGNING BONUS! ...
+           indeed                           Senior Software Engineer                TherapyNotes.com         Philadelphia    PA   fulltime   yearly     135000     110000  https://www.indeed.com/viewjob?jk=da39574a40cb...  About Us TherapyNotes is the national leader i...
+         linkedin                   Software Engineer - Early Career                 Lockheed Martin            Sunnyvale    CA   fulltime   yearly       None       None      https://www.linkedin.com/jobs/view/3693012711  Description:By bringing together people that u...
+         linkedin                       Full-Stack Software Engineer                            Rain             New York    NY   fulltime   yearly       None       None      https://www.linkedin.com/jobs/view/3696158877  Rain’s mission is to create the fastest and ea...
+    zip_recruiter                       Software Engineer - New Grad                    ZipRecruiter         Santa Monica    CA   fulltime   yearly     130000     150000  https://www.ziprecruiter.com/jobs/ziprecruiter...  We offer a hybrid work environment. Most US-ba...
+    zip_recruiter                                 Software Developer                      TEKsystems              Phoenix    AZ   fulltime   hourly         65         75  https://www.ziprecruiter.com/jobs/teksystems-0...  Top Skills' Details• 6 years of Java developme.```
+```
+### Parameters for `scrape_jobs()`

-### API

-POST `/api/v1/jobs/`
-### Request Schema
 ```plaintext
 Required
 ├── site_type (List[enum]): linkedin, zip_recruiter, indeed
 └── search_term (str)
 Optional
 ├── location (int)
-├── distance (int)
+├── distance (int): in miles
 ├── job_type (enum): fulltime, parttime, internship, contract
 ├── is_remote (bool)
-├── results_wanted (int): per site_type
-├── easy_apply (bool): only for linkedin
-└── output_format (enum): json, csv, gsheet
-```
-### Request Example
-```json
-"site_type": ["indeed", "linkedin"],
-"search_term": "software engineer",
-"location": "austin, tx",
-"distance": 10,
-"job_type": "fulltime",
-"results_wanted": 15
-"output_format": "gsheet"
+├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
+├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
 ```
+
 ### Response Schema
 ```plaintext
-site_type (enum): 
-JobResponse
-├── success (bool)
-├── error (str)
-├── jobs (List[JobPost])
-│   └── JobPost
-│       ├── title (str)
-│       ├── company_name (str)
-│       ├── job_url (str)
-│       ├── location (object)
-│       │   ├── country (str)
-│       │   ├── city (str)
-│       │   ├── state (str)
-│       ├── description (str)
-│       ├── job_type (enum)
-│       ├── compensation (object)
-│       │   ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
-│       │   ├── min_amount (float)
-│       │   ├── max_amount (float)
-│       │   └── currency (str)
-│       └── date_posted (datetime)
-│
-├── total_results (int)
-└── returned_results (int) 
-```
-### Response Example (GOOGLE SHEETS)
-```json
-{
-    "status": "Successfully uploaded to Google Sheets",
-    "error": null,
-    "linkedin": null,
-    "indeed": null,
-    "zip_recruiter": null
-}
-```
-### Response Example (JSON)
-```json
-{
-    "indeed": {
-        "success": true,
-        "error": null,
-        "jobs": [
-            {
-                "title": "Software Engineer",
-                "company_name": "INTEL",
-                "job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228",
-                "location": {
-                    "country": "USA",
-                    "city": "Austin",
-                    "state": "TX",
-                },
-                "description": "Job Description Designs, develops, tests, and debugs..."
-                "job_type": "fulltime",
-                "compensation": {
-                    "interval": "yearly",
-                    "min_amount": 209760.0,
-                    "max_amount": 139480.0,
-                    "currency": "USD"
-                },
-                "date_posted": "2023-08-18T00:00:00"
-            }, ...
-        ],
-        "total_results": 845,
-        "returned_results": 15
-    },
-    "linkedin": {
-        "success": true,
-        "error": null,
-        "jobs": [
-            {
-                "title": "Software Engineer 1",
-                "company_name": "Public Partnerships | PPL",
-                "job_url": "https://www.linkedin.com/jobs/view/3690013792",
-                "location": {
-                    "country": "USA",
-                    "city": "Austin",
-                    "state": "TX",
-                },
-                "description": "Public Partnerships LLC supports individuals with disabilities..."
-                "job_type": null,
-                "compensation": null,
-                "date_posted": "2023-07-31T00:00:00"
-            }, ...
-        ],
-        "total_results": 2000,
-        "returned_results": 15
-    }
-}
-```
-### Response Example (CSV)
-```
-Site, Title, Company Name, Job URL, Country, City, State, Job Type, Compensation Interval, Min Amount, Max Amount, Currency, Date Posted, Description
-indeed, Software Engineer, INTEL, https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228, USA, Austin, TX, fulltime, yearly, 209760.0, 139480.0, USD, 2023-08-18T00:00:00, Job Description Designs...
-linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.com/jobs/view/3690013792, USA, Austin, TX, , , , , , 2023-07-31T00:00:00, Public Partnerships LLC supports...
+JobPost
+├── title (str)
+├── company_name (str)
+├── job_url (str)
+├── location (object)
+│   ├── country (str)
+│   ├── city (str)
+│   ├── state (str)
+├── description (str)
+├── job_type (enum)
+├── compensation (object)
+│   ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
+│   ├── min_amount (float)
+│   ├── max_amount (float)
+│   └── currency (str)
+└── date_posted (datetime)
+
 ```

-## Installation
-### Docker Setup
-_Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_

-[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry.
+### FAQ
  
-To pull the Docker image:
-
-```bash
-docker pull ghcr.io/cullenwatson/jobspy:latest
-```
+#### Encountering issues with your queries?
  
-#### Params
+Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
  
-By default:
-* Port: `8000`
-* Google sheet name: `JobSpy`
-* Relative path of `client_secret.json` (for Google Sheets, see below to obtain)
-
+#### Received a response code 429?
+This means you've been blocked by the job board site for sending too many requests. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
  
-To run the image with these default settings, use:
-    
-Example (Cmd Prompt - Windows):
-```bash
-docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
-```
-  
-Example (Unix):
-```bash
-docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
-```
-  
-#### Using custom params
-
-  Example: 
-   * Port: `8030`
-   * Google sheet name: `CustomName`
-   * Absolute path of `client_secret.json`: `C:\config\client_secret.json`
-
-  To pass these custom params:
-```bash
-docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=CustomName -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
-```
-  
-### Python installation (alternative to Docker)
-_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_  
-1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
-2. Install the dependencies with `pip install -r requirements.txt`
-4. Run the server with `uvicorn main:app --reload`
-  
-### Google Sheets Setup
-  
-#### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
-  * Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
-  * Create credentials -> service account -> create & continue
-  * Select role -> basic: editor -> done
-  * Click on the email you just created in the service account list
-  * Go to the Keys tab -> add key -> create new key -> JSON -> Create
-  
-#### Using the key in the repo
-  * Copy the key file into the JobSpy repo as `client_secret.json`
-  * Go to [my template sheet](https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing): File -> Make a Copy -> Rename to JobSpy
-  * Share the Google sheet with the email located in the field `client_email` in the `client_secret.json` above with editor rights
-  * If you changed the name of the sheet:
-    - Python install: add `.env` in the repo and add `GSHEET_NAME` param with the sheet name as the value, e.g. `GSHEET_NAME=CustomName`
-    - Docker install: use custom param `-e GSHEET_NAME=CustomName` in `docker run` (see above)
-  
-### How to call the API
-  
-#### [Postman](https://www.postman.com/downloads/) (preferred):
-To use Postman:
-1. Locate the files in the `/postman/` directory.
-2. Import the Postman collection and environment JSON files.
-  
-#### Swagger UI:
-Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
-  
-## FAQ
-  
-### I'm having issues with my queries. What should I do?
-  
-Try reducing the number of `results_wanted` and/or broadening the filters. If issues still persist, feel free to submit an issue. 
-  
-### I'm getting response code 429. What should I do?
-You have been blocked by the job board site for sending too many requests. Wait a couple seconds or use a VPN.
-  
-### How to enable auth?
-  
-Change `AUTH_REQUIRED` in `/settings.py` to `True`
-  
-The auth uses [supabase](https://supabase.com). Create a project with a `users` table and disable RLS.  
-  
-<img src="https://github.com/cullenwatson/jobspy/assets/78247585/03af18e1-5386-49ad-a2cf-d34232d9d747" width="500">
-  
-Add these three environment variables:
-  
- `SUPABASE_URL`: go to project settings -> API -> Project URL  
- `SUPABASE_KEY`: go to project settings -> API -> service_role secret
- `JWT_SECRET_KEY` - type `openssl rand -hex 32` in terminal to create a 32 byte secret key
-  
-Use these endpoints to register and get an access token: 
-  
-![image](https://github.com/cullenwatson/jobspy/assets/78247585/c84c33ec-1fe8-4152-9c8c-6c4334aecfc3)
-
--- a/api/init.py
+++ b/api/init.py
@ -1,9 +0,0 @@
-from fastapi import APIRouter
-from api.auth import router as auth_router
-from .v1 import router as v1_router
-
-router = APIRouter(
-    prefix="/api",
-)
-router.include_router(v1_router)
-router.include_router(auth_router)
--- a/api/auth/init.py
+++ b/api/auth/init.py
@ -1,8 +0,0 @@
-from fastapi import APIRouter
-
-from api.auth.token import router as token_router
-from api.auth.register import router as register_router
-
-router = APIRouter(prefix="/auth", tags=["auth"])
-router.include_router(token_router)
-router.include_router(register_router)
--- a/api/auth/auth_utils.py
+++ b/api/auth/auth_utils.py
@ -1,65 +0,0 @@
-from datetime import datetime, timedelta
-
-from jose import jwt, JWTError
-from fastapi import HTTPException, status, Depends
-from fastapi.security import OAuth2PasswordBearer
-
-from api.core.users import TokenData
-from api.auth.db_utils import UserInDB, get_user
-
-oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
-
-
-def create_access_token(data: dict) -> str:
-    """
-    Creates a JWT token based on the data provided.
-    :param data
-    :return: encoded_jwt
-    """
-    to_encode = data.copy()
-    expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
-    to_encode.update({"exp": expire})
-    encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
-    return encoded_jwt
-
-
-async def get_current_user(token: str = Depends(oauth2_scheme)):
-    """
-    Returns the current user associated with the provided JWT token.
-    :param token
-    :raises HTTPException: If the token is invalid or the user does not exist.
-    :return: The UserInDB instance associated with the token.
-    """
-    credential_exception = HTTPException(
-        status_code=status.HTTP_401_UNAUTHORIZED,
-        detail="Could not validate credentials",
-        headers={"WWW-Authenticate": "Bearer"},
-    )
-    try:
-        payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
-        username: str = payload.get("sub")
-        if username is None:
-            raise credential_exception
-        token_data = TokenData(username=username)
-    except JWTError:
-        raise credential_exception
-
-    current_user = get_user(token_data.username)
-    if current_user is None:
-        raise credential_exception
-    return current_user
-
-
-async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
-    """
-    Returns the current user if the user account is active.
-
-    :param current_user: A UserInDB instance representing the current user.
-    :raises HTTPException: If the user account is inactive.
-    :return: The UserInDB instance if the user account is active.
-    """
-    if current_user.disabled:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
-        )
-    return current_user
--- a/api/auth/db_utils.py
+++ b/api/auth/db_utils.py
@ -1,89 +0,0 @@
-from typing import Optional, Union
-
-from passlib.context import CryptContext
-from supabase_py import create_client, Client
-from fastapi import HTTPException, status
-
-from api.core.users import UserInDB
-from settings import SUPABASE_URL, SUPABASE_KEY
-
-pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
-if SUPABASE_URL:
-    supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
-
-
-def create_user(user_create: UserInDB):
-    """
-    Creates a new user record in the 'users' table in Supabase.
-
-    :param user_create: The data of the user to be created.
-    :raises HTTPException: If an error occurs while creating the user.
-    :return: The result of the insert operation.
-    """
-    result = supabase.table("users").insert(user_create.dict()).execute()
-    print(f"Insert result: {result}")
-
-    if "error" in result and result["error"]:
-        raise HTTPException(
-            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
-            detail=f"User could not be created due to {result['error']['message']}",
-        )
-
-    return result
-
-
-def get_user(username: str) -> Optional[UserInDB]:
-    """
-    Retrieves a user from the 'users' table by their username.
-
-    :param username: The username of the user to retrieve.
-    :return: The user data if found, otherwise None.
-    """
-    result = supabase.table("users").select().eq("username", username).execute()
-
-    if "error" in result and result["error"]:
-        print(f"Error: {result['error']['message']}")
-        return None
-    else:
-        if result["data"]:
-            user_data = result["data"][0]
-            return UserInDB(**user_data)
-        else:
-            return None
-
-
-def verify_password(password: str, hashed_password: str) -> bool:
-    """
-    Verifies a password against a hashed password using the bcrypt hashing algorithm.
-
-    :param password: The plaintext password to verify.
-    :param hashed_password: The hashed password to compare against.
-    :return: True if the password matches the hashed password, otherwise False.
-    """
-    return pwd_context.verify(password, hashed_password)
-
-
-def get_password_hash(password: str) -> str:
-    """
-    Hashes a password using the bcrypt hashing algorithm.
-
-    :param password: The plaintext password to hash.
-    :return: The hashed password
-    """
-    return pwd_context.hash(password)
-
-
-def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
-    """
-    Authenticates a user based on their username and password.
-
-    :param username: The username of the user to authenticate.
-    :param password: The plaintext password to authenticate.
-    :return: The authenticated user if the username and password are correct, otherwise False.
-    """
-    user = get_user(username)
-    if not user:
-        return False
-    if not verify_password(password, user.hashed_password):
-        return False
-    return user
--- a/api/auth/register/init.py
+++ b/api/auth/register/init.py
@ -1,33 +0,0 @@
-from fastapi import APIRouter, HTTPException, status
-from api.core.users import UserCreate, UserInDB
-from api.auth.db_utils import get_user, get_password_hash, create_user
-
-router = APIRouter(prefix="/register")
-
-
-@router.post("/", response_model=dict)
-async def register_new_user(user: UserCreate) -> dict:
-    """
-    Creates new user
-    :param user:
-    :raises HTTPException: If the username already exists.
-    :return: A dictionary containing a detail key with a success message.
-    """
-    existing_user = get_user(user.username)
-    if existing_user is not None:
-        raise HTTPException(
-            status_code=status.HTTP_400_BAD_REQUEST,
-            detail="Username already exists",
-        )
-
-    hashed_password = get_password_hash(user.password)
-    user_create = UserInDB(
-        username=user.username,
-        email=user.email,
-        full_name=user.full_name,
-        hashed_password=hashed_password,
-        disabled=False,
-    )
-    create_user(user_create)
-
-    return {"detail": "User created successfully"}
--- a/api/auth/token/init.py
+++ b/api/auth/token/init.py
@ -1,30 +0,0 @@
-from fastapi import APIRouter, Depends, HTTPException, status
-from fastapi.security import OAuth2PasswordRequestForm
-
-from api.core.users import Token
-from api.auth.db_utils import authenticate_user
-from api.auth.auth_utils import create_access_token
-
-router = APIRouter(prefix="/token")
-
-
-@router.post("/", response_model=Token)
-async def login_for_access_token(
-    form_data: OAuth2PasswordRequestForm = Depends(),
-) -> Token:
-    """
-    Authenticates a user and provides an access token.
-    :param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
-    :raises HTTPException: If the user cannot be authenticated.
-    :return: A Token object containing the access token and the token type.
-    """
-    user = authenticate_user(form_data.username, form_data.password)
-    if not user:
-        raise HTTPException(
-            status_code=status.HTTP_401_UNAUTHORIZED,
-            detail="Incorrect username or password",
-            headers={"WWW-Authenticate": "Bearer"},
-        )
-
-    access_token = create_access_token(data={"sub": user.username})
-    return Token(access_token=access_token, token_type="bearer")
--- a/api/core/formatters/init.py
+++ b/api/core/formatters/init.py
@ -1,7 +0,0 @@
-from enum import Enum
-
-
-class OutputFormat(Enum):
-    CSV = "csv"
-    JSON = "json"
-    GSHEET = "gsheet"
--- a/api/core/formatters/csv/init.py
+++ b/api/core/formatters/csv/init.py
@ -1,133 +0,0 @@
-import gspread
-from oauth2client.service_account import ServiceAccountCredentials
-
-import csv
-from io import StringIO
-from datetime import datetime
-
-from ...jobs import *
-from ...scrapers import *
-from settings import *
-
-
-class CSVFormatter:
-    @staticmethod
-    def fetch_job_urls(credentials: Any) -> set:
-        """
-        Fetches all the job urls from the google sheet to prevent duplicates
-        :param credentials:
-        :return: urls
-        """
-        try:
-            gc = gspread.authorize(credentials)
-            sh = gc.open(GSHEET_NAME)
-
-            worksheet = sh.get_worksheet(0)
-            data = worksheet.get_all_values()
-            job_urls = set()
-            for row in data[1:]:
-                job_urls.add(row[3])
-            return job_urls
-        except Exception as e:
-            raise e
-
-    @staticmethod
-    def upload_to_google_sheet(csv_data: str):
-        """
-        Appends rows to google sheet
-        :param csv_data:
-        :return:
-        """
-        try:
-            scope = [
-                "https://www.googleapis.com/auth/spreadsheets",
-                "https://www.googleapis.com/auth/drive.file",
-                "https://www.googleapis.com/auth/drive",
-            ]
-            credentials = ServiceAccountCredentials.from_json_keyfile_name(
-                "client_secret.json", scope
-            )
-            gc = gspread.authorize(credentials)
-            sh = gc.open(GSHEET_NAME)
-
-            worksheet = sh.get_worksheet(0)
-            data_string = csv_data.getvalue()
-            reader = csv.reader(StringIO(data_string))
-
-            job_urls = CSVFormatter.fetch_job_urls(credentials)
-
-            rows = list(reader)
-
-            for i, row in enumerate(rows):
-                if i == 0:
-                    continue
-                if row[4] in job_urls:
-                    continue
-
-                row[6] = format(int(row[6]), ",d") if row[6] else ""
-                row[7] = format(int(row[7]), ",d") if row[7] else ""
-                worksheet.append_row(row)
-        except Exception as e:
-            raise e
-
-    @staticmethod
-    def generate_filename() -> str:
-        """
-        Adds a timestamp to the filename header
-        :return: filename
-        """
-        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-        return f"JobSpy_results_{timestamp}.csv"
-
-    @staticmethod
-    def format(jobs: CommonResponse) -> StringIO:
-        """
-        Transfomr the jobs objects into csv
-        :param jobs:
-        :return: csv
-        """
-        output = StringIO()
-        writer = csv.writer(output)
-
-        headers = [
-            "Title",
-            "Company Name",
-            "City",
-            "State",
-            "Job Type",
-            "Pay Cycle",
-            "Min Amount",
-            "Max Amount",
-            "Date Posted",
-            "Description",
-            "Job URL",
-        ]
-        writer.writerow(headers)
-
-        for site, job_response in jobs.dict().items():
-            if isinstance(job_response, dict) and job_response.get("success"):
-                for job in job_response["jobs"]:
-                    writer.writerow(
-                        [
-                            job["title"],
-                            job["company_name"],
-                            job["location"]["city"],
-                            job["location"]["state"],
-                            job["job_type"].value if job.get("job_type") else "",
-                            job["compensation"]["interval"].value
-                            if job["compensation"]
-                            else "",
-                            job["compensation"]["min_amount"]
-                            if job["compensation"]
-                            else "",
-                            job["compensation"]["max_amount"]
-                            if job["compensation"]
-                            else "",
-                            job.get("date_posted", ""),
-                            job["description"],
-                            job["job_url"],
-                        ]
-                    )
-
-        output.seek(0)
-        return output
--- a/api/core/users/init.py
+++ b/api/core/users/init.py
@ -1,28 +0,0 @@
-from pydantic import BaseModel
-
-
-class User(BaseModel):
-    username: str
-    full_name: str
-    email: str
-    disabled: bool = False
-
-
-class UserCreate(BaseModel):
-    username: str
-    full_name: str
-    email: str
-    password: str
-
-
-class UserInDB(User):
-    hashed_password: str
-
-
-class TokenData(BaseModel):
-    username: str
-
-
-class Token(BaseModel):
-    access_token: str
-    token_type: str
--- a/api/v1/init.py
+++ b/api/v1/init.py
@ -1,11 +0,0 @@
-from fastapi import APIRouter, Depends
-from .jobs import router as jobs_router
-from api.auth.auth_utils import get_active_current_user
-from settings import AUTH_REQUIRED
-
-if AUTH_REQUIRED:
-    router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
-else:
-    router = APIRouter(prefix="/v1")
-
-router.include_router(jobs_router)
--- a/api/v1/jobs/init.py
+++ b/api/v1/jobs/init.py
@ -1,68 +0,0 @@
-import io
-from fastapi import APIRouter
-from fastapi.responses import StreamingResponse
-from concurrent.futures import ThreadPoolExecutor
-
-from api.core.scrapers.indeed import IndeedScraper
-from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
-from api.core.scrapers.linkedin import LinkedInScraper
-from api.core.formatters.csv import CSVFormatter
-from api.core.scrapers import (
-    ScraperInput,
-    Site,
-    JobResponse,
-    OutputFormat,
-    CommonResponse,
-)
-from typing import List, Dict, Tuple, Union
-
-router = APIRouter(prefix="/jobs", tags=["jobs"])
-
-SCRAPER_MAPPING = {
-    Site.LINKEDIN: LinkedInScraper,
-    Site.INDEED: IndeedScraper,
-    Site.ZIP_RECRUITER: ZipRecruiterScraper,
-}
-
-
-@router.post("/")
-async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
-    """
-    Asynchronously scrapes job data from multiple job sites.
-    :param scraper_input:
-    :return: scraper_response
-    """
-
-    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
-        scraper_class = SCRAPER_MAPPING[site]
-        scraper = scraper_class()
-        scraped_data: JobResponse = scraper.scrape(scraper_input)
-        return (site.value, scraped_data)
-
-    with ThreadPoolExecutor(max_workers=3) as executor:
-        results = dict(executor.map(scrape_site, scraper_input.site_type))
-    scraper_response = CommonResponse(status="JSON response success", **results)
-
-    if scraper_input.output_format == OutputFormat.CSV:
-        csv_output = CSVFormatter.format(scraper_response)
-        response = StreamingResponse(csv_output, media_type="text/csv")
-        response.headers[
-            "Content-Disposition"
-        ] = f"attachment; filename={CSVFormatter.generate_filename()}"
-        return response
-
-    elif scraper_input.output_format == OutputFormat.GSHEET:
-        csv_output = CSVFormatter.format(scraper_response)
-        try:
-            CSVFormatter.upload_to_google_sheet(csv_output)
-            return CommonResponse(
-                status="Successfully uploaded to Google Sheets", **results
-            )
-
-        except Exception as e:
-            return CommonResponse(
-                status="Failed to upload to Google Sheet", error=repr(e), **results
-            )
-
-    else:
-        return scraper_response
--- a/jobscrape/init.py
+++ b/jobscrape/init.py
@ -0,0 +1,121 @@
+import pandas as pd
+from typing import List, Dict, Tuple, Union
+
+from concurrent.futures import ThreadPoolExecutor
+
+from .core.jobs import JobType
+from .core.scrapers.indeed import IndeedScraper
+from .core.scrapers.ziprecruiter import ZipRecruiterScraper
+from .core.scrapers.linkedin import LinkedInScraper
+from .core.scrapers import (
+    ScraperInput,
+    Site,
+    JobResponse,
+    CommonResponse,
+)
+
+
+SCRAPER_MAPPING = {
+    Site.LINKEDIN: LinkedInScraper,
+    Site.INDEED: IndeedScraper,
+    Site.ZIP_RECRUITER: ZipRecruiterScraper,
+}
+
+
+def _map_str_to_site(site_name: str) -> Site:
+    return Site[site_name.upper()]
+
+
+def scrape_jobs(
+        site_name: str | Site | List[Site],
+        search_term: str,
+
+        location: str = "",
+        distance: int = None,
+        is_remote: bool = False,
+        job_type: JobType = None,
+        easy_apply: bool = False,  # linkedin
+        results_wanted: int = 15
+) -> pd.DataFrame:
+    """
+    Asynchronously scrapes job data from multiple job sites.
+    :return: results_wanted: pandas dataframe containing job data
+    """
+
+    if type(site_name) == str:
+        site_name = _map_str_to_site(site_name)
+
+    site_type = [site_name] if type(site_name) == Site else site_name
+    scraper_input = ScraperInput(
+        site_type=site_type,
+        search_term=search_term,
+        location=location,
+        distance=distance,
+        is_remote=is_remote,
+        job_type=job_type,
+        easy_apply=easy_apply,
+        results_wanted=results_wanted,
+    )
+
+    def scrape_site(site: Site) -> Tuple[str, JobResponse]:
+        scraper_class = SCRAPER_MAPPING[site]
+        scraper = scraper_class()
+        scraped_data: JobResponse = scraper.scrape(scraper_input)
+
+        return site.value, scraped_data
+
+    results = {}
+    for site in scraper_input.site_type:
+        site_value, scraped_data = scrape_site(site)
+        results[site_value] = scraped_data
+
+    dfs = []
+
+    for site, job_response in results.items():
+        for job in job_response.jobs:
+            data = job.dict()
+            data['site'] = site
+
+            # Formatting JobType
+            data['job_type'] = data['job_type'].value if data['job_type'] else None
+
+            # Formatting Location
+            location_obj = data.get('location')
+            if location_obj and isinstance(location_obj, dict):
+                data['city'] = location_obj.get('city', '')
+                data['state'] = location_obj.get('state', '')
+                data['country'] = location_obj.get('country', 'USA')
+            else:
+                data['city'] = None
+                data['state'] = None
+                data['country'] = None
+
+            # Formatting Compensation
+            compensation_obj = data.get('compensation')
+            if compensation_obj and isinstance(compensation_obj, dict):
+                data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
+                data['min_amount'] = compensation_obj.get('min_amount')
+                data['max_amount'] = compensation_obj.get('max_amount')
+                data['currency'] = compensation_obj.get('currency', 'USD')
+            else:
+                data['interval'] = None
+                data['min_amount'] = None
+                data['max_amount'] = None
+                data['currency'] = None
+
+            job_df = pd.DataFrame([data])
+            dfs.append(job_df)
+
+    if dfs:
+        df = pd.concat(dfs, ignore_index=True)
+        desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
+                         'interval', 'min_amount', 'max_amount',  'job_url', 'description',]
+        df = df[desired_order]
+    else:
+        df = pd.DataFrame()
+
+    return df
+
+
+
+
--- a/jobscrape/core/init.py
+++ b/jobscrape/core/init.py
--- a/jobscrape/core/jobs/init.py
+++ b/jobscrape/core/jobs/init.py
@ -1,4 +1,4 @@
-from typing import Union
+from typing import Union, Optional
 from datetime import date
 from enum import Enum

@ -19,10 +19,11 @@ class JobType(Enum):
    VOLUNTEER = "volunteer"


+
 class Location(BaseModel):
    country: str = "USA"
    city: str = None
-    state: str = None
+    state: Optional[str] = None


 class CompensationInterval(Enum):
@ -35,8 +36,8 @@ class CompensationInterval(Enum):

 class Compensation(BaseModel):
    interval: CompensationInterval
-    min_amount: int
-    max_amount: int
+    min_amount: int = None
+    max_amount: int = None
    currency: str = "USD"


@ -44,11 +45,11 @@ class JobPost(BaseModel):
    title: str
    company_name: str
    job_url: str
-    location: Location
+    location: Optional[Location]

    description: str = None
-    job_type: JobType = None
-    compensation: Compensation = None
+    job_type: Optional[JobType] = None
+    compensation: Optional[Compensation] = None
    date_posted: date = None


@ -56,7 +57,7 @@ class JobResponse(BaseModel):
    success: bool
    error: str = None

-    total_results: int = None
+    total_results: Optional[int] = None

    jobs: list[JobPost] = []

@ -64,6 +65,11 @@ class JobResponse(BaseModel):

    @validator("returned_results", pre=True, always=True)
    def set_returned_results(cls, v, values):
-        if v is None and values.get("jobs"):
-            return len(values["jobs"])
+        jobs_list = values.get("jobs")
+
+        if v is None:
+            if jobs_list is not None:
+                return len(jobs_list)
+            else:
+                return 0
        return v
--- a/jobscrape/core/scrapers/init.py
+++ b/jobscrape/core/scrapers/init.py
@ -1,5 +1,4 @@
-from ..jobs import *
-from ..formatters import OutputFormat
+from ..jobs import Enum, BaseModel, JobType, JobResponse
 from typing import List, Dict, Optional, Any


@ -17,12 +16,11 @@ class Site(Enum):
 class ScraperInput(BaseModel):
    site_type: List[Site]
    search_term: str
-    output_format: OutputFormat = OutputFormat.JSON

    location: str = None
-    distance: int = None
+    distance: Optional[int] = None
    is_remote: bool = False
-    job_type: JobType = None
+    job_type: Optional[JobType] = None
    easy_apply: bool = None  # linkedin

    results_wanted: int = 15
--- a/jobscrape/core/scrapers/indeed/init.py
+++ b/jobscrape/core/scrapers/indeed/init.py
@ -1,22 +1,18 @@
 import re
+import sys
+import math
 import json
-from typing import Optional, Tuple, List
 from datetime import datetime
+from typing import Optional, Tuple, List

 import tls_client
 import urllib.parse
 from bs4 import BeautifulSoup
 from bs4.element import Tag
-from fastapi import status
-
-from api.core.jobs import *
-from api.core.jobs import JobPost
-from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
-
 from concurrent.futures import ThreadPoolExecutor, Future
-import math
-import traceback
-import sys
+
+from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
+from .. import Scraper, ScraperInput, Site, StatusException


 class ParsingException(Exception):
@ -66,8 +62,8 @@ class IndeedScraper(Scraper):
        response = session.get(self.url + "/jobs", params=params)

        if (
-            response.status_code != status.HTTP_200_OK
-            and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
+            response.status_code != 200
+            and response.status_code != 307
        ):
            raise StatusException(response.status_code)

@ -131,7 +127,6 @@ class IndeedScraper(Scraper):
                location=Location(
                    city=job.get("jobLocationCity"),
                    state=job.get("jobLocationState"),
-                    postal_code=job.get("jobLocationPostal"),
                ),
                job_type=job_type,
                compensation=compensation,
@ -140,9 +135,11 @@ class IndeedScraper(Scraper):
            )
            return job_post

-        for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]:
-            job_post = process_job(job)
-            job_list.append(job_post)
+        with ThreadPoolExecutor(max_workers=10) as executor:
+            job_results: list[Future] = [executor.submit(process_job, job) for job in
+                                         jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
+
+        job_list = [result.result() for result in job_results if result.result()]

        return job_list, total_num_jobs

--- a/jobscrape/core/scrapers/linkedin/init.py
+++ b/jobscrape/core/scrapers/linkedin/init.py
@ -4,10 +4,9 @@ from datetime import datetime
 import requests
 from bs4 import BeautifulSoup
 from bs4.element import Tag
-from fastapi import status

-from api.core.scrapers import Scraper, ScraperInput, Site
-from api.core.jobs import *
+from .. import Scraper, ScraperInput, Site
+from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval


 class LinkedInScraper(Scraper):
@ -59,7 +58,7 @@ class LinkedInScraper(Scraper):
                    f"{self.url}/jobs/search", params=params, allow_redirects=True
                )

-                if response.status_code != status.HTTP_200_OK:
+                if response.status_code != 200:
                    return JobResponse(
                        success=False,
                        error=f"Response returned {response.status_code}",
@ -118,6 +117,7 @@ class LinkedInScraper(Scraper):
                        date_posted=date_posted,
                        job_url=job_url,
                        job_type=job_type,
+                        compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
                    )
                    job_list.append(job_post)
                    if (
@ -185,7 +185,6 @@ class LinkedInScraper(Scraper):
                    employment_type = employment_type_span.get_text(strip=True)
                    employment_type = employment_type.lower()
                    employment_type = employment_type.replace("-", "")
-                    print(employment_type)

            return JobType(employment_type)

--- a/jobscrape/core/scrapers/ziprecruiter/init.py
+++ b/jobscrape/core/scrapers/ziprecruiter/init.py
@ -1,18 +1,17 @@
 import math
 import json
+import re
 from datetime import datetime
 from typing import Optional, Tuple, List
 from urllib.parse import urlparse, parse_qs

 import tls_client
-from fastapi import status
 from bs4 import BeautifulSoup
 from bs4.element import Tag
 from concurrent.futures import ThreadPoolExecutor, Future

-from api.core.jobs import JobPost
-from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
-from api.core.jobs import *
+from .. import Scraper, ScraperInput, Site, StatusException
+from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType


 class ZipRecruiterScraper(Scraper):
@ -26,9 +25,12 @@ class ZipRecruiterScraper(Scraper):

        self.jobs_per_page = 20
        self.seen_urls = set()
+        self.session = tls_client.Session(
+            client_identifier="chrome112", random_tls_extension_order=True
+        )

    def scrape_page(
-        self, scraper_input: ScraperInput, page: int, session: tls_client.Session
+        self, scraper_input: ScraperInput, page: int
    ) -> tuple[list[JobPost], int | None]:
        """
        Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
@ -52,91 +54,47 @@ class ZipRecruiterScraper(Scraper):
        params = {
            "search": scraper_input.search_term,
            "location": scraper_input.location,
-            "radius": scraper_input.distance,
-            "refine_by_location_type": "only_remote"
-            if scraper_input.is_remote
-            else None,
-            "refine_by_employment": f"employment_type:employment_type:{job_type_value}"
-            if job_type_value
-            else None,
            "page": page,
+            "form": "jobs-landing"
        }

-        response = session.get(
+        if scraper_input.is_remote:
+            params["refine_by_location_type"] = "only_remote"
+
+        if scraper_input.distance:
+            params["radius"] = scraper_input.distance
+
+        if job_type_value:
+            params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
+
+        response = self.session.get(
            self.url + "/jobs-search",
            headers=ZipRecruiterScraper.headers(),
            params=params,
        )

-        if response.status_code != status.HTTP_200_OK:
+        if response.status_code != 200:
            raise StatusException(response.status_code)

-        html_string = response.content
+        html_string = response.text
        soup = BeautifulSoup(html_string, "html.parser")

-        if page == 1:
-            script_tag = soup.find("script", {"id": "js_variables"})
-            data = json.loads(script_tag.string)
+        script_tag = soup.find("script", {"id": "js_variables"})
+        data = json.loads(script_tag.string)

+        if page == 1:
            job_count = int(data["totalJobCount"].replace(",", ""))
        else:
            job_count = None

-        job_posts = soup.find_all("div", {"class": "job_content"})
-
-        def process_job(job: Tag) -> Optional[JobPost]:
-            """
-            Parses a job from the job content tag
-            :param job: BeautifulSoup Tag for one job post
-            :return JobPost
-            """
-            job_url = job.find("a", {"class": "job_link"})["href"]
-            if job_url in self.seen_urls:
-                return None
-
-            title = job.find("h2", {"class": "title"}).text
-            company = job.find("a", {"class": "company_name"}).text.strip()
-
-            description, updated_job_url = ZipRecruiterScraper.get_description(
-                job_url, session
-            )
-            if updated_job_url is not None:
-                job_url = updated_job_url
-            if description is None:
-                description = job.find("p", {"class": "job_snippet"}).text.strip()
-
-            job_type_element = job.find("li", {"class": "perk_item perk_type"})
-            if job_type_element:
-                job_type_text = (
-                    job_type_element.text.strip()
-                    .lower()
-                    .replace("-", "")
-                    .replace(" ", "")
-                )
-                if job_type_text == "contractor":
-                    job_type_text = "contract"
-                job_type = JobType(job_type_text)
-            else:
-                job_type = None
-
-            date_posted = ZipRecruiterScraper.get_date_posted(job)
-
-            job_post = JobPost(
-                title=title,
-                description=description,
-                company_name=company,
-                location=ZipRecruiterScraper.get_location(job),
-                job_type=job_type,
-                compensation=ZipRecruiterScraper.get_compensation(job),
-                date_posted=date_posted,
-                job_url=job_url,
-            )
-            return job_post
-
        with ThreadPoolExecutor(max_workers=10) as executor:
-            job_results: list[Future] = [
-                executor.submit(process_job, job) for job in job_posts
-            ]
+            if "jobList" in data and data["jobList"]:
+                jobs_js = data["jobList"]
+                job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
+            else:
+                jobs_html = soup.find_all("div", {"class": "job_content"})
+                job_results = [executor.submit(self.process_job_html, job) for job in
+                               jobs_html]

        job_list = [result.result() for result in job_results if result.result()]

@ -148,19 +106,17 @@ class ZipRecruiterScraper(Scraper):
        :param scraper_input:
        :return: job_response
        """
-        session = tls_client.Session(
-            client_identifier="chrome112", random_tls_extension_order=True
-        )

-        pages_to_process = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
+
+        pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))

        try:
            #: get first page to initialize session
-            job_list, total_results = self.scrape_page(scraper_input, 1, session)
+            job_list, total_results = self.scrape_page(scraper_input, 1)

            with ThreadPoolExecutor(max_workers=10) as executor:
                futures: list[Future] = [
-                    executor.submit(self.scrape_page, scraper_input, page, session)
+                    executor.submit(self.scrape_page, scraper_input, page)
                    for page in range(2, pages_to_process + 1)
                ]

@ -169,6 +125,7 @@ class ZipRecruiterScraper(Scraper):

                    job_list += jobs

+
        except StatusException as e:
            return JobResponse(
                success=False,
@ -192,9 +149,129 @@ class ZipRecruiterScraper(Scraper):
        )
        return job_response

+    def process_job_html(self, job: Tag) -> Optional[JobPost]:
+        """
+        Parses a job from the job content tag
+        :param job: BeautifulSoup Tag for one job post
+        :return JobPost
+        """
+        job_url = job.find("a", {"class": "job_link"})["href"]
+        if job_url in self.seen_urls:
+            return None
+
+        title = job.find("h2", {"class": "title"}).text
+        company = job.find("a", {"class": "company_name"}).text.strip()
+
+        description, updated_job_url = self.get_description(
+            job_url
+        )
+        if updated_job_url is not None:
+            job_url = updated_job_url
+        if description is None:
+            description = job.find("p", {"class": "job_snippet"}).text.strip()
+
+        job_type_element = job.find("li", {"class": "perk_item perk_type"})
+        if job_type_element:
+            job_type_text = (
+                job_type_element.text.strip()
+                .lower()
+                .replace("-", "")
+                .replace(" ", "")
+            )
+            if job_type_text == "contractor":
+                job_type_text = "contract"
+            job_type = JobType(job_type_text)
+        else:
+            job_type = None
+
+        date_posted = ZipRecruiterScraper.get_date_posted(job)
+
+        job_post = JobPost(
+            title=title,
+            description=description,
+            company_name=company,
+            location=ZipRecruiterScraper.get_location(job),
+            job_type=job_type,
+            compensation=ZipRecruiterScraper.get_compensation(job),
+            date_posted=date_posted,
+            job_url=job_url,
+        )
+        return job_post
+
+    def process_job_js(self, job: dict) -> JobPost:
+        # Map the job data to the expected fields by the Pydantic model
+        title = job.get("Title")
+        description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
+
+        company = job.get("OrgName")
+        location = Location(city=job.get("City"), state=job.get("State"))
+        try:
+            job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
+        except ValueError:
+            # print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
+            return None
+
+        formatted_salary = job.get("FormattedSalaryShort", "")
+        salary_parts = formatted_salary.split(" ")
+
+        min_salary_str = salary_parts[0][1:].replace(",", "")
+        if '.' in min_salary_str:
+            min_amount = int(float(min_salary_str) * 1000)
+        else:
+            min_amount = int(min_salary_str.replace("K", "000"))
+
+        if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
+            max_salary_str = salary_parts[2][1:].replace(",", "")
+            if '.' in max_salary_str:
+                max_amount = int(float(max_salary_str) * 1000)
+            else:
+                max_amount = int(max_salary_str.replace("K", "000"))
+        else:
+            max_amount = 0
+
+        compensation = Compensation(
+            interval=CompensationInterval.YEARLY,
+            min_amount=min_amount,
+            max_amount=max_amount
+        )
+        save_job_url = job.get("SaveJobURL", "")
+        posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
+        if posted_time_match:
+            date_time_str = posted_time_match.group(1)
+            date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
+            date_posted = date_posted_obj.date()
+        else:
+            date_posted = date.today()
+        job_url = job.get("JobURL")
+
+        return JobPost(
+            title=title,
+            description=description,
+            company_name=company,
+            location=location,
+            job_type=job_type,
+            compensation=compensation,
+            date_posted=date_posted,
+            job_url=job_url,
+        )
+        return job_post
+
    @staticmethod
+    def job_type_from_string(value: str) -> Optional[JobType]:
+        if not value:
+            return None
+
+        if value.lower() == "contractor":
+            value = "contract"
+        normalized_value = value.replace("_", "")
+        for item in JobType:
+            if item.value == normalized_value:
+                return item
+        raise ValueError(f"Invalid value for JobType: {value}")
+
    def get_description(
-        job_page_url: str, session: tls_client.Session
+            self,
+        job_page_url: str
    ) -> Tuple[Optional[str], Optional[str]]:
        """
        Retrieves job description by going to the job page url
@ -202,7 +279,7 @@ class ZipRecruiterScraper(Scraper):
        :param session:
        :return: description or None, response url
        """
-        response = session.get(
+        response = self.session.get(
            job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
        )
        if response.status_code not in range(200, 400):
--- a/main.py
+++ b/main.py
@ -1,16 +0,0 @@
-from fastapi import FastAPI
-
-from supabase_py import create_client, Client
-from api import router as api_router
-
-app = FastAPI(
-    title="JobSpy Backend",
-    description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
-    version="1.0.0",
-)
-app.include_router(api_router)
-
-
-@app.get("/health", tags=["health"])
-async def health_check():
-    return {"message": "JobSpy ready to scrape"}
--- a/poetry.lock
+++ b/poetry.lock
--- a/postman/JobSpy.postman_collection.json
+++ b/postman/JobSpy.postman_collection.json
--- a/postman/JobSpy.postman_environment.json
+++ b/postman/JobSpy.postman_environment.json
@ -1,15 +0,0 @@
-{
-	"id": "a7ea6d58-8dca-4216-97a9-224dadc1e18f",
-	"name": "JobSpy",
-	"values": [
-		{
-			"key": "access_token",
-			"value": "",
-			"type": "any",
-			"enabled": true
-		}
-	],
-	"_postman_variable_scope": "environment",
-	"_postman_exported_at": "2023-07-09T23:51:36.709Z",
-	"_postman_exported_using": "Postman/10.15.8"
-}
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,23 @@
+[tool.poetry]
+name = "jobscrape"
+version = "0.1.0"
+description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
+authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>", "Cullen Watson <cullen@cullen.ai>"]
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.10"
+requests = "^2.31.0"
+tls-client = "^0.2.1"
+beautifulsoup4 = "^4.12.2"
+pandas = "^2.1.0"
+pydantic = "^2.3.0"
+
+
+[tool.poetry.group.dev.dependencies]
+pytest = "^7.4.1"
+jupyter = "^1.0.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,61 +0,0 @@
-anyio==3.7.1
-atomicwrites==1.4.1
-attrs==23.1.0
-bcrypt==4.0.1
-beautifulsoup4==4.12.2
-cachetools==5.3.1
-certifi==2023.5.7
-cffi==1.15.1
-chardet==4.0.0
-charset-normalizer==3.2.0
-click==8.1.4
-colorama==0.4.6
-cryptography==41.0.1
-dataclasses==0.6
-deprecation==2.1.0
-ecdsa==0.18.0
-exceptiongroup==1.1.2
-fastapi==0.99.1
-google-auth==2.22.0
-google-auth-oauthlib==1.0.0
-gotrue==0.2.0
-gspread==5.10.0
-h11==0.14.0
-httpcore==0.12.3
-httplib2==0.22.0
-httpx==0.16.1
-idna==2.10
-iniconfig==2.0.0
-oauth2client==4.1.3
-oauthlib==3.2.2
-packaging==23.1
-passlib==1.7.4
-pluggy==1.2.0
-postgrest-py==0.4.0
-py==1.11.0
-pyasn1==0.5.0
-pyasn1-modules==0.3.0
-pycparser==2.21
-pydantic==1.10.11
-pyparsing==3.1.1
-pytest==6.2.5
-python-dateutil==2.8.2
-python-dotenv==1.0.0
-python-jose==3.3.0
-python-multipart==0.0.6
-realtime-py==0.1.3
-requests==2.25.1
-requests-oauthlib==1.3.1
-rfc3986==1.5.0
-rsa==4.9
-six==1.16.0
-sniffio==1.3.0
-soupsieve==2.4.1
-starlette==0.27.0
-supabase-py==0.0.2
-tls-client==0.2.1
-toml==0.10.2
-typing_extensions==4.7.1
-urllib3==1.26.16
-uvicorn==0.22.0
-websockets==9.1
--- a/settings.py
+++ b/settings.py
@ -1,14 +0,0 @@
-from dotenv import load_dotenv
-import os
-
-load_dotenv()
-# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
-GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
-
-# optional autha
-AUTH_REQUIRED = False
-SUPABASE_URL = os.environ.get("SUPABASE_URL")
-SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
-JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
-ACCESS_TOKEN_EXPIRE_MINUTES = 60
-ALGORITHM = "HS256"
--- a/tests/test_indeed.py
+++ b/tests/test_indeed.py
@ -0,0 +1,10 @@
+from jobscrape import scrape_jobs
+
+
+def test_indeed():
+    result = scrape_jobs(
+        site_name="indeed",
+        search_term="software engineer",
+    )
+
+    assert result is not None
--- a/tests/test_ziprecruiter.py
+++ b/tests/test_ziprecruiter.py
@ -0,0 +1,10 @@
+from jobscrape import scrape_jobs
+
+
+def test_ziprecruiter():
+    result = scrape_jobs(
+        site_name="zip_recruiter",
+        search_term="software engineer",
+    )
+
+    assert result is not None