mirror of
https://github.com/Bunsly/JobSpy.git
synced 2026-03-04 19:44:30 -08:00
Library Migration (#31)
This commit is contained in:
@@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
"experimental": "enabled"
|
|
||||||
}
|
|
||||||
33
.github/workflows/docker-build.yml
vendored
33
.github/workflows/docker-build.yml
vendored
@@ -1,33 +0,0 @@
|
|||||||
name: Build and Push Docker Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v1
|
|
||||||
|
|
||||||
- name: Login to GitHub Docker Registry
|
|
||||||
uses: docker/login-action@v1
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.DOCKER_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build and Push Image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
push: true
|
|
||||||
tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest
|
|
||||||
platforms: linux/amd64,linux/arm64
|
|
||||||
33
.github/workflows/publish-to-pypi.yml
vendored
Normal file
33
.github/workflows/publish-to-pypi.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
name: Publish Python 🐍 distributions 📦 to PyPI
|
||||||
|
on: push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-n-publish:
|
||||||
|
name: Build and publish Python 🐍 distributions 📦 to PyPI
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.10"
|
||||||
|
|
||||||
|
- name: Install poetry
|
||||||
|
run: >-
|
||||||
|
python3 -m
|
||||||
|
pip install
|
||||||
|
poetry
|
||||||
|
--user
|
||||||
|
|
||||||
|
- name: Build distribution 📦
|
||||||
|
run: >-
|
||||||
|
python3 -m
|
||||||
|
poetry
|
||||||
|
build
|
||||||
|
|
||||||
|
- name: Publish distribution 📦 to PyPI
|
||||||
|
if: startsWith(github.ref, 'refs/tags')
|
||||||
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
with:
|
||||||
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
||||||
89
.github/workflows/test.yml
vendored
89
.github/workflows/test.yml
vendored
@@ -1,89 +0,0 @@
|
|||||||
name: JobSpy API Tests
|
|
||||||
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test_api:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
|
||||||
uses: actions/setup-python@v2
|
|
||||||
with:
|
|
||||||
python-version: '3.10'
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: pip install -r requirements.txt
|
|
||||||
|
|
||||||
- name: Install jq
|
|
||||||
run: sudo apt-get install jq
|
|
||||||
|
|
||||||
- name: Start JobSpy FastAPI app
|
|
||||||
run: uvicorn main:app --host 0.0.0.0 --port 8000 &
|
|
||||||
|
|
||||||
- name: Wait for server to be up
|
|
||||||
run: |
|
|
||||||
for i in {1..10}; do
|
|
||||||
curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Check health
|
|
||||||
run: |
|
|
||||||
health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health)
|
|
||||||
|
|
||||||
if [ "$health_status" != "200" ]; then
|
|
||||||
echo "Error: Health check failed with status code $health_status"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# not checking currently because of bad ip at Github's servers being blocked
|
|
||||||
# - name: Check HTTP status to POST /api/v1/jobs/
|
|
||||||
# run: |
|
|
||||||
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
|
|
||||||
# "site_type": ["indeed", "linkedin"],
|
|
||||||
# "search_term": "software engineer",
|
|
||||||
# "location": "austin, tx",
|
|
||||||
# "distance": 10,
|
|
||||||
# "job_type": "fulltime",
|
|
||||||
# "results_wanted": 5
|
|
||||||
# }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
|
|
||||||
#
|
|
||||||
# status_code="${response: -3}"
|
|
||||||
# echo "Received status code: $status_code"
|
|
||||||
#
|
|
||||||
# if [ "$status_code" != "200" ]; then
|
|
||||||
# echo "Error: Expected status code 200, but got $status_code"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# echo "${response::-3}" > response.json
|
|
||||||
# cat response.json
|
|
||||||
#
|
|
||||||
# - name: Check error field in response
|
|
||||||
# run: |
|
|
||||||
# global_error=$(jq '.error' response.json)
|
|
||||||
# indeed_error=$(jq '.indeed.error' response.json)
|
|
||||||
# linkedin_error=$(jq '.linkedin.error' response.json)
|
|
||||||
#
|
|
||||||
# if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
|
|
||||||
# echo "Error found in response:"
|
|
||||||
# echo "Global Error: $global_error"
|
|
||||||
# echo "Indeed Error: $indeed_error"
|
|
||||||
# echo "LinkedIn Error: $linkedin_error"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Verify returned_results in response
|
|
||||||
# run: |
|
|
||||||
# indeed_results=$(jq '.indeed.returned_results' response.json)
|
|
||||||
# linkedin_results=$(jq '.linkedin.returned_results' response.json)
|
|
||||||
#
|
|
||||||
# if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
|
|
||||||
# echo "Mismatch in results_wanted and returned_results:"
|
|
||||||
# echo "Indeed: Expected 5, Got $indeed_results"
|
|
||||||
# echo "LinkedIn: Expected 5, Got $linkedin_results"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -5,4 +5,5 @@
|
|||||||
**/__pycache__/
|
**/__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
.env
|
.env
|
||||||
client_secret.json
|
dist
|
||||||
|
/.ipynb_checkpoints/
|
||||||
13
.vscode/launch.json
vendored
13
.vscode/launch.json
vendored
@@ -1,13 +0,0 @@
|
|||||||
{
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python: Module",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"module": "uvicorn",
|
|
||||||
"args": ["main:app","--reload"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
||||||
15
Dockerfile
15
Dockerfile
@@ -1,15 +0,0 @@
|
|||||||
FROM python:3.10-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y jq && \
|
|
||||||
pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
ENV PORT=8000
|
|
||||||
|
|
||||||
CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"
|
|
||||||
702
JobSpy_Demo.ipynb
Normal file
702
JobSpy_Demo.ipynb
Normal file
@@ -0,0 +1,702 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "c3f21577-477d-451e-9914-5d67e8a89075",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>site</th>\n",
|
||||||
|
" <th>title</th>\n",
|
||||||
|
" <th>company_name</th>\n",
|
||||||
|
" <th>city</th>\n",
|
||||||
|
" <th>state</th>\n",
|
||||||
|
" <th>job_type</th>\n",
|
||||||
|
" <th>interval</th>\n",
|
||||||
|
" <th>min_amount</th>\n",
|
||||||
|
" <th>max_amount</th>\n",
|
||||||
|
" <th>job_url</th>\n",
|
||||||
|
" <th>description</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Firmware Engineer</td>\n",
|
||||||
|
" <td>Advanced Motion Controls</td>\n",
|
||||||
|
" <td>Camarillo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>145000</td>\n",
|
||||||
|
" <td>110000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n",
|
||||||
|
" <td>We are looking for an experienced Firmware Eng...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Computer Engineer</td>\n",
|
||||||
|
" <td>Honeywell</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=5a1da623ee75...</td>\n",
|
||||||
|
" <td>Join a team recognized for leadership, innovat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Splunk</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>159500</td>\n",
|
||||||
|
" <td>116000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
|
||||||
|
" <td>A little about us. Splunk is the key to enterp...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Development Operations Engineer</td>\n",
|
||||||
|
" <td>Stratacache</td>\n",
|
||||||
|
" <td>Dayton</td>\n",
|
||||||
|
" <td>OH</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>90000</td>\n",
|
||||||
|
" <td>83573</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
|
||||||
|
" <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Computer Engineer</td>\n",
|
||||||
|
" <td>Honeywell</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
|
||||||
|
" <td>Join a team recognized for leadership, innovat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Full Stack Developer</td>\n",
|
||||||
|
" <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
|
||||||
|
" <td>Herndon</td>\n",
|
||||||
|
" <td>VA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
|
||||||
|
" <td>Job Highlights As a Full Stack Software Engine...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Workiva</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>134000</td>\n",
|
||||||
|
" <td>79000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
|
||||||
|
" <td>Are you ready to embark on an exciting journey...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Senior Software Engineer</td>\n",
|
||||||
|
" <td>SciTec</td>\n",
|
||||||
|
" <td>Boulder</td>\n",
|
||||||
|
" <td>CO</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>164000</td>\n",
|
||||||
|
" <td>93000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
|
||||||
|
" <td>SciTec has been awarded multiple government co...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Microsoft</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>182600</td>\n",
|
||||||
|
" <td>94300</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n",
|
||||||
|
" <td>At Microsoft we are seeking people who have a ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Avalon Healthcare Solutions</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n",
|
||||||
|
" <td>Avalon Healthcare Solutions, headquartered in ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Fieldguide</td>\n",
|
||||||
|
" <td>San Francisco</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3696158160</td>\n",
|
||||||
|
" <td>About us:Fieldguide is establishing a new stat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Sunnyvale</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3693012711</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>12</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Edwards</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3700669785</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>13</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>14</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>15</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>16</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>SpiderOak</td>\n",
|
||||||
|
" <td>Austin</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3707174719</td>\n",
|
||||||
|
" <td>We're only as strong as our weakest link.In th...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>17</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>18</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Full-Stack Software Engineer</td>\n",
|
||||||
|
" <td>Rain</td>\n",
|
||||||
|
" <td>New York</td>\n",
|
||||||
|
" <td>NY</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3696158877</td>\n",
|
||||||
|
" <td>Rain’s mission is to create the fastest and ea...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>19</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Nike</td>\n",
|
||||||
|
" <td>Portland</td>\n",
|
||||||
|
" <td>OR</td>\n",
|
||||||
|
" <td>contract</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3693340247</td>\n",
|
||||||
|
" <td>Work options: FlexibleWe consider remote, on-p...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>20</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
|
||||||
|
" <td>Walmart</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&...</td>\n",
|
||||||
|
" <td>We are currently seeking a highly skilled and ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>21</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Engineer - New Grad</td>\n",
|
||||||
|
" <td>ZipRecruiter</td>\n",
|
||||||
|
" <td>Santa Monica</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>130000</td>\n",
|
||||||
|
" <td>150000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
|
||||||
|
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>22</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer</td>\n",
|
||||||
|
" <td>Robert Half</td>\n",
|
||||||
|
" <td>Corpus Christi</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>115000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
|
||||||
|
" <td>Robert Half has an opening for a Software Deve...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>23</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Advantage Technical</td>\n",
|
||||||
|
" <td>Ontario</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>100000</td>\n",
|
||||||
|
" <td>150000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
|
||||||
|
" <td>New career opportunity available with major Ma...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>24</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer</td>\n",
|
||||||
|
" <td>Robert Half</td>\n",
|
||||||
|
" <td>Tucson</td>\n",
|
||||||
|
" <td>AZ</td>\n",
|
||||||
|
" <td>temporary</td>\n",
|
||||||
|
" <td>hourly</td>\n",
|
||||||
|
" <td>47</td>\n",
|
||||||
|
" <td>55</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
|
||||||
|
" <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>25</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Full Stack Software Engineer</td>\n",
|
||||||
|
" <td>ZipRecruiter</td>\n",
|
||||||
|
" <td>Phoenix</td>\n",
|
||||||
|
" <td>AZ</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>145000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
|
||||||
|
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>26</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer IV</td>\n",
|
||||||
|
" <td>Kforce Inc.</td>\n",
|
||||||
|
" <td>Mountain View</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>contract</td>\n",
|
||||||
|
" <td>hourly</td>\n",
|
||||||
|
" <td>55</td>\n",
|
||||||
|
" <td>75</td>\n",
|
||||||
|
" <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
|
||||||
|
" <td>Kforce has a client that is seeking a Software...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>27</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
|
||||||
|
" <td>OneStaff Medical</td>\n",
|
||||||
|
" <td>Omaha</td>\n",
|
||||||
|
" <td>NE</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>60000</td>\n",
|
||||||
|
" <td>110000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n",
|
||||||
|
" <td>Company Description: We are looking for a well...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>28</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Senior Software Engineer</td>\n",
|
||||||
|
" <td>RightStaff, Inc.</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>120000</td>\n",
|
||||||
|
" <td>180000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n",
|
||||||
|
" <td>Job Description:We are seeking a talented and ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>29</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer - .Net Core - 12886</td>\n",
|
||||||
|
" <td>Walker Elliott</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>130000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n",
|
||||||
|
" <td>Our highly successful DFW based client has bee...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" site title \\\n",
|
||||||
|
"0 indeed Firmware Engineer \n",
|
||||||
|
"1 indeed Computer Engineer \n",
|
||||||
|
"2 indeed Software Engineer \n",
|
||||||
|
"3 indeed Development Operations Engineer \n",
|
||||||
|
"4 indeed Computer Engineer \n",
|
||||||
|
"5 indeed Full Stack Developer \n",
|
||||||
|
"6 indeed Software Engineer \n",
|
||||||
|
"7 indeed Senior Software Engineer \n",
|
||||||
|
"8 indeed Software Engineer \n",
|
||||||
|
"9 indeed Software Engineer \n",
|
||||||
|
"10 linkedin Software Engineer \n",
|
||||||
|
"11 linkedin Software Engineer - Early Career \n",
|
||||||
|
"12 linkedin Software Engineer - Early Career \n",
|
||||||
|
"13 linkedin Software Engineer - Early Career \n",
|
||||||
|
"14 linkedin Software Engineer - Early Career \n",
|
||||||
|
"15 linkedin Software Engineer - Early Career \n",
|
||||||
|
"16 linkedin Software Engineer \n",
|
||||||
|
"17 linkedin Software Engineer - Early Career \n",
|
||||||
|
"18 linkedin Full-Stack Software Engineer \n",
|
||||||
|
"19 linkedin Software Engineer \n",
|
||||||
|
"20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n",
|
||||||
|
"21 zip_recruiter Software Engineer - New Grad \n",
|
||||||
|
"22 zip_recruiter Software Developer \n",
|
||||||
|
"23 zip_recruiter Software Engineer \n",
|
||||||
|
"24 zip_recruiter Software Developer \n",
|
||||||
|
"25 zip_recruiter Full Stack Software Engineer \n",
|
||||||
|
"26 zip_recruiter Software Developer IV \n",
|
||||||
|
"27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
|
||||||
|
"28 zip_recruiter Senior Software Engineer \n",
|
||||||
|
"29 zip_recruiter Software Developer - .Net Core - 12886 \n",
|
||||||
|
"\n",
|
||||||
|
" company_name city state job_type \\\n",
|
||||||
|
"0 Advanced Motion Controls Camarillo CA fulltime \n",
|
||||||
|
"1 Honeywell None fulltime \n",
|
||||||
|
"2 Splunk Remote None fulltime \n",
|
||||||
|
"3 Stratacache Dayton OH fulltime \n",
|
||||||
|
"4 Honeywell None fulltime \n",
|
||||||
|
"5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n",
|
||||||
|
"6 Workiva Remote None None \n",
|
||||||
|
"7 SciTec Boulder CO fulltime \n",
|
||||||
|
"8 Microsoft None fulltime \n",
|
||||||
|
"9 Avalon Healthcare Solutions Remote None None \n",
|
||||||
|
"10 Fieldguide San Francisco CA fulltime \n",
|
||||||
|
"11 Lockheed Martin Sunnyvale CA fulltime \n",
|
||||||
|
"12 Lockheed Martin Edwards CA fulltime \n",
|
||||||
|
"13 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"14 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"15 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"16 SpiderOak Austin TX fulltime \n",
|
||||||
|
"17 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"18 Rain New York NY fulltime \n",
|
||||||
|
"19 Nike Portland OR contract \n",
|
||||||
|
"20 Walmart Dallas TX None \n",
|
||||||
|
"21 ZipRecruiter Santa Monica CA fulltime \n",
|
||||||
|
"22 Robert Half Corpus Christi TX fulltime \n",
|
||||||
|
"23 Advantage Technical Ontario CA fulltime \n",
|
||||||
|
"24 Robert Half Tucson AZ temporary \n",
|
||||||
|
"25 ZipRecruiter Phoenix AZ fulltime \n",
|
||||||
|
"26 Kforce Inc. Mountain View CA contract \n",
|
||||||
|
"27 OneStaff Medical Omaha NE fulltime \n",
|
||||||
|
"28 RightStaff, Inc. Dallas TX fulltime \n",
|
||||||
|
"29 Walker Elliott Dallas TX fulltime \n",
|
||||||
|
"\n",
|
||||||
|
" interval min_amount max_amount \\\n",
|
||||||
|
"0 yearly 145000 110000 \n",
|
||||||
|
"1 None None None \n",
|
||||||
|
"2 yearly 159500 116000 \n",
|
||||||
|
"3 yearly 90000 83573 \n",
|
||||||
|
"4 None None None \n",
|
||||||
|
"5 None None None \n",
|
||||||
|
"6 yearly 134000 79000 \n",
|
||||||
|
"7 yearly 164000 93000 \n",
|
||||||
|
"8 yearly 182600 94300 \n",
|
||||||
|
"9 None None None \n",
|
||||||
|
"10 yearly None None \n",
|
||||||
|
"11 yearly None None \n",
|
||||||
|
"12 yearly None None \n",
|
||||||
|
"13 yearly None None \n",
|
||||||
|
"14 yearly None None \n",
|
||||||
|
"15 yearly None None \n",
|
||||||
|
"16 yearly None None \n",
|
||||||
|
"17 yearly None None \n",
|
||||||
|
"18 yearly None None \n",
|
||||||
|
"19 yearly None None \n",
|
||||||
|
"20 None None None \n",
|
||||||
|
"21 yearly 130000 150000 \n",
|
||||||
|
"22 yearly 105000 115000 \n",
|
||||||
|
"23 yearly 100000 150000 \n",
|
||||||
|
"24 hourly 47 55 \n",
|
||||||
|
"25 yearly 105000 145000 \n",
|
||||||
|
"26 hourly 55 75 \n",
|
||||||
|
"27 yearly 60000 110000 \n",
|
||||||
|
"28 yearly 120000 180000 \n",
|
||||||
|
"29 yearly 105000 130000 \n",
|
||||||
|
"\n",
|
||||||
|
" job_url \\\n",
|
||||||
|
"0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n",
|
||||||
|
"1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
|
||||||
|
"2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n",
|
||||||
|
"3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n",
|
||||||
|
"4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n",
|
||||||
|
"5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n",
|
||||||
|
"6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n",
|
||||||
|
"7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n",
|
||||||
|
"8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n",
|
||||||
|
"9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n",
|
||||||
|
"10 https://www.linkedin.com/jobs/view/3696158160 \n",
|
||||||
|
"11 https://www.linkedin.com/jobs/view/3693012711 \n",
|
||||||
|
"12 https://www.linkedin.com/jobs/view/3700669785 \n",
|
||||||
|
"13 https://www.linkedin.com/jobs/view/3701775201 \n",
|
||||||
|
"14 https://www.linkedin.com/jobs/view/3701772329 \n",
|
||||||
|
"15 https://www.linkedin.com/jobs/view/3701769637 \n",
|
||||||
|
"16 https://www.linkedin.com/jobs/view/3707174719 \n",
|
||||||
|
"17 https://www.linkedin.com/jobs/view/3701770659 \n",
|
||||||
|
"18 https://www.linkedin.com/jobs/view/3696158877 \n",
|
||||||
|
"19 https://www.linkedin.com/jobs/view/3693340247 \n",
|
||||||
|
"20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n",
|
||||||
|
"21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
|
||||||
|
"22 https://www.ziprecruiter.com/jobs/robert-half-... \n",
|
||||||
|
"23 https://www.ziprecruiter.com/jobs/advantage-te... \n",
|
||||||
|
"24 https://www.ziprecruiter.com/jobs/robert-half-... \n",
|
||||||
|
"25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
|
||||||
|
"26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n",
|
||||||
|
"27 https://www.ziprecruiter.com/jobs/onestaff-med... \n",
|
||||||
|
"28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n",
|
||||||
|
"29 https://www.ziprecruiter.com/jobs/walker-ellio... \n",
|
||||||
|
"\n",
|
||||||
|
" description \n",
|
||||||
|
"0 We are looking for an experienced Firmware Eng... \n",
|
||||||
|
"1 Join a team recognized for leadership, innovat... \n",
|
||||||
|
"2 A little about us. Splunk is the key to enterp... \n",
|
||||||
|
"3 Stratacache, Inc. delivers in-store retail exp... \n",
|
||||||
|
"4 Join a team recognized for leadership, innovat... \n",
|
||||||
|
"5 Job Highlights As a Full Stack Software Engine... \n",
|
||||||
|
"6 Are you ready to embark on an exciting journey... \n",
|
||||||
|
"7 SciTec has been awarded multiple government co... \n",
|
||||||
|
"8 At Microsoft we are seeking people who have a ... \n",
|
||||||
|
"9 Avalon Healthcare Solutions, headquartered in ... \n",
|
||||||
|
"10 About us:Fieldguide is establishing a new stat... \n",
|
||||||
|
"11 Description:By bringing together people that u... \n",
|
||||||
|
"12 Description:By bringing together people that u... \n",
|
||||||
|
"13 Description:By bringing together people that u... \n",
|
||||||
|
"14 Description:By bringing together people that u... \n",
|
||||||
|
"15 Description:By bringing together people that u... \n",
|
||||||
|
"16 We're only as strong as our weakest link.In th... \n",
|
||||||
|
"17 Description:By bringing together people that u... \n",
|
||||||
|
"18 Rain’s mission is to create the fastest and ea... \n",
|
||||||
|
"19 Work options: FlexibleWe consider remote, on-p... \n",
|
||||||
|
"20 We are currently seeking a highly skilled and ... \n",
|
||||||
|
"21 We offer a hybrid work environment. Most US-ba... \n",
|
||||||
|
"22 Robert Half has an opening for a Software Deve... \n",
|
||||||
|
"23 New career opportunity available with major Ma... \n",
|
||||||
|
"24 Robert Half is accepting inquiries for a SQL S... \n",
|
||||||
|
"25 We offer a hybrid work environment. Most US-ba... \n",
|
||||||
|
"26 Kforce has a client that is seeking a Software... \n",
|
||||||
|
"27 Company Description: We are looking for a well... \n",
|
||||||
|
"28 Job Description:We are seeking a talented and ... \n",
|
||||||
|
"29 Our highly successful DFW based client has bee... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from jobscrape import scrape_jobs\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"jobs: pd.DataFrame = scrape_jobs(\n",
|
||||||
|
" site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
|
||||||
|
" search_term=\"software engineer\",\n",
|
||||||
|
" results_wanted=10\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"if jobs.empty:\n",
|
||||||
|
" print(\"No jobs found.\")\n",
|
||||||
|
"else:\n",
|
||||||
|
"\n",
|
||||||
|
" #1 print\n",
|
||||||
|
" pd.set_option('display.max_columns', None)\n",
|
||||||
|
" pd.set_option('display.max_rows', None)\n",
|
||||||
|
" pd.set_option('display.width', None)\n",
|
||||||
|
" pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n",
|
||||||
|
" print(jobs)\n",
|
||||||
|
"\n",
|
||||||
|
" #2 display in Jupyter Notebook\n",
|
||||||
|
" display(jobs)\n",
|
||||||
|
"\n",
|
||||||
|
" #3 output to csv\n",
|
||||||
|
" jobs.to_csv('jobs.csv', index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "my-poetry-env",
|
||||||
|
"language": "python",
|
||||||
|
"name": "my-poetry-env"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
290
README.md
290
README.md
@@ -1,240 +1,100 @@
|
|||||||
# JobSpy AIO Scraper
|
# JobSpy
|
||||||
|
|
||||||
|
**JobSpy** is a simple, yet comprehensive, job scraping library.
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
||||||
- Returns jobs as JSON or CSV with title, location, company, description & other data
|
- Aggregates the job postings in a Pandas DataFrame
|
||||||
- Imports directly into **Google Sheets**
|
|
||||||
- Optional JWT authorization
|
|
||||||
|
|
||||||

|
### Installation
|
||||||
|
`pip install jobscrape`
|
||||||
|
|
||||||
|
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from jobscrape import scrape_jobs
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
jobs: pd.DataFrame = scrape_jobs(
|
||||||
|
site_name=["indeed", "linkedin", "zip_recruiter"],
|
||||||
|
search_term="software engineer",
|
||||||
|
results_wanted=10
|
||||||
|
)
|
||||||
|
|
||||||
|
if jobs.empty:
|
||||||
|
print("No jobs found.")
|
||||||
|
else:
|
||||||
|
|
||||||
|
#1 print
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
pd.set_option('display.max_rows', None)
|
||||||
|
pd.set_option('display.width', None)
|
||||||
|
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
|
||||||
|
print(jobs)
|
||||||
|
|
||||||
|
#2 display in Jupyter Notebook
|
||||||
|
display(jobs)
|
||||||
|
|
||||||
|
#3 output to csv
|
||||||
|
jobs.to_csv('jobs.csv', index=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output
|
||||||
|
```
|
||||||
|
site title company_name city state job_type interval min_amount max_amount job_url description
|
||||||
|
indeed Software Engineer AMERICAN SYSTEMS Arlington VA None yearly 200000 150000 https://www.indeed.com/viewjob?jk=5e409e577046... THIS POSITION COMES WITH A 10K SIGNING BONUS! ...
|
||||||
|
indeed Senior Software Engineer TherapyNotes.com Philadelphia PA fulltime yearly 135000 110000 https://www.indeed.com/viewjob?jk=da39574a40cb... About Us TherapyNotes is the national leader i...
|
||||||
|
linkedin Software Engineer - Early Career Lockheed Martin Sunnyvale CA fulltime yearly None None https://www.linkedin.com/jobs/view/3693012711 Description:By bringing together people that u...
|
||||||
|
linkedin Full-Stack Software Engineer Rain New York NY fulltime yearly None None https://www.linkedin.com/jobs/view/3696158877 Rain’s mission is to create the fastest and ea...
|
||||||
|
zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica CA fulltime yearly 130000 150000 https://www.ziprecruiter.com/jobs/ziprecruiter... We offer a hybrid work environment. Most US-ba...
|
||||||
|
zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme.```
|
||||||
|
```
|
||||||
|
### Parameters for `scrape_jobs()`
|
||||||
|
|
||||||
### API
|
|
||||||
|
|
||||||
POST `/api/v1/jobs/`
|
|
||||||
### Request Schema
|
|
||||||
```plaintext
|
```plaintext
|
||||||
Required
|
Required
|
||||||
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
|
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
|
||||||
└── search_term (str)
|
└── search_term (str)
|
||||||
Optional
|
Optional
|
||||||
├── location (int)
|
├── location (int)
|
||||||
├── distance (int)
|
├── distance (int): in miles
|
||||||
├── job_type (enum): fulltime, parttime, internship, contract
|
├── job_type (enum): fulltime, parttime, internship, contract
|
||||||
├── is_remote (bool)
|
├── is_remote (bool)
|
||||||
├── results_wanted (int): per site_type
|
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
|
||||||
├── easy_apply (bool): only for linkedin
|
├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
|
||||||
└── output_format (enum): json, csv, gsheet
|
|
||||||
```
|
|
||||||
### Request Example
|
|
||||||
```json
|
|
||||||
"site_type": ["indeed", "linkedin"],
|
|
||||||
"search_term": "software engineer",
|
|
||||||
"location": "austin, tx",
|
|
||||||
"distance": 10,
|
|
||||||
"job_type": "fulltime",
|
|
||||||
"results_wanted": 15
|
|
||||||
"output_format": "gsheet"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Response Schema
|
### Response Schema
|
||||||
```plaintext
|
```plaintext
|
||||||
site_type (enum):
|
JobPost
|
||||||
JobResponse
|
├── title (str)
|
||||||
├── success (bool)
|
├── company_name (str)
|
||||||
├── error (str)
|
├── job_url (str)
|
||||||
├── jobs (List[JobPost])
|
├── location (object)
|
||||||
│ └── JobPost
|
│ ├── country (str)
|
||||||
│ ├── title (str)
|
│ ├── city (str)
|
||||||
│ ├── company_name (str)
|
│ ├── state (str)
|
||||||
│ ├── job_url (str)
|
├── description (str)
|
||||||
│ ├── location (object)
|
├── job_type (enum)
|
||||||
│ │ ├── country (str)
|
├── compensation (object)
|
||||||
│ │ ├── city (str)
|
│ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
|
||||||
│ │ ├── state (str)
|
│ ├── min_amount (float)
|
||||||
│ ├── description (str)
|
│ ├── max_amount (float)
|
||||||
│ ├── job_type (enum)
|
│ └── currency (str)
|
||||||
│ ├── compensation (object)
|
└── date_posted (datetime)
|
||||||
│ │ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
|
|
||||||
│ │ ├── min_amount (float)
|
|
||||||
│ │ ├── max_amount (float)
|
|
||||||
│ │ └── currency (str)
|
|
||||||
│ └── date_posted (datetime)
|
|
||||||
│
|
|
||||||
├── total_results (int)
|
|
||||||
└── returned_results (int)
|
|
||||||
```
|
|
||||||
### Response Example (GOOGLE SHEETS)
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"status": "Successfully uploaded to Google Sheets",
|
|
||||||
"error": null,
|
|
||||||
"linkedin": null,
|
|
||||||
"indeed": null,
|
|
||||||
"zip_recruiter": null
|
|
||||||
}
|
|
||||||
```
|
|
||||||
### Response Example (JSON)
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"indeed": {
|
|
||||||
"success": true,
|
|
||||||
"error": null,
|
|
||||||
"jobs": [
|
|
||||||
{
|
|
||||||
"title": "Software Engineer",
|
|
||||||
"company_name": "INTEL",
|
|
||||||
"job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228",
|
|
||||||
"location": {
|
|
||||||
"country": "USA",
|
|
||||||
"city": "Austin",
|
|
||||||
"state": "TX",
|
|
||||||
},
|
|
||||||
"description": "Job Description Designs, develops, tests, and debugs..."
|
|
||||||
"job_type": "fulltime",
|
|
||||||
"compensation": {
|
|
||||||
"interval": "yearly",
|
|
||||||
"min_amount": 209760.0,
|
|
||||||
"max_amount": 139480.0,
|
|
||||||
"currency": "USD"
|
|
||||||
},
|
|
||||||
"date_posted": "2023-08-18T00:00:00"
|
|
||||||
}, ...
|
|
||||||
],
|
|
||||||
"total_results": 845,
|
|
||||||
"returned_results": 15
|
|
||||||
},
|
|
||||||
"linkedin": {
|
|
||||||
"success": true,
|
|
||||||
"error": null,
|
|
||||||
"jobs": [
|
|
||||||
{
|
|
||||||
"title": "Software Engineer 1",
|
|
||||||
"company_name": "Public Partnerships | PPL",
|
|
||||||
"job_url": "https://www.linkedin.com/jobs/view/3690013792",
|
|
||||||
"location": {
|
|
||||||
"country": "USA",
|
|
||||||
"city": "Austin",
|
|
||||||
"state": "TX",
|
|
||||||
},
|
|
||||||
"description": "Public Partnerships LLC supports individuals with disabilities..."
|
|
||||||
"job_type": null,
|
|
||||||
"compensation": null,
|
|
||||||
"date_posted": "2023-07-31T00:00:00"
|
|
||||||
}, ...
|
|
||||||
],
|
|
||||||
"total_results": 2000,
|
|
||||||
"returned_results": 15
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
### Response Example (CSV)
|
|
||||||
```
|
|
||||||
Site, Title, Company Name, Job URL, Country, City, State, Job Type, Compensation Interval, Min Amount, Max Amount, Currency, Date Posted, Description
|
|
||||||
indeed, Software Engineer, INTEL, https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228, USA, Austin, TX, fulltime, yearly, 209760.0, 139480.0, USD, 2023-08-18T00:00:00, Job Description Designs...
|
|
||||||
linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.com/jobs/view/3690013792, USA, Austin, TX, , , , , , 2023-07-31T00:00:00, Public Partnerships LLC supports...
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installation
|
|
||||||
### Docker Setup
|
|
||||||
_Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_
|
|
||||||
|
|
||||||
[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry.
|
### FAQ
|
||||||
|
|
||||||
To pull the Docker image:
|
#### Encountering issues with your queries?
|
||||||
|
|
||||||
```bash
|
|
||||||
docker pull ghcr.io/cullenwatson/jobspy:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Params
|
Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
|
||||||
|
|
||||||
By default:
|
#### Received a response code 429?
|
||||||
* Port: `8000`
|
This means you've been blocked by the job board site for sending too many requests. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
|
||||||
* Google sheet name: `JobSpy`
|
|
||||||
* Relative path of `client_secret.json` (for Google Sheets, see below to obtain)
|
|
||||||
|
|
||||||
|
|
||||||
To run the image with these default settings, use:
|
|
||||||
|
|
||||||
Example (Cmd Prompt - Windows):
|
|
||||||
```bash
|
|
||||||
docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
Example (Unix):
|
|
||||||
```bash
|
|
||||||
docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using custom params
|
|
||||||
|
|
||||||
Example:
|
|
||||||
* Port: `8030`
|
|
||||||
* Google sheet name: `CustomName`
|
|
||||||
* Absolute path of `client_secret.json`: `C:\config\client_secret.json`
|
|
||||||
|
|
||||||
To pass these custom params:
|
|
||||||
```bash
|
|
||||||
docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=CustomName -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
### Python installation (alternative to Docker)
|
|
||||||
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
|
||||||
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
|
|
||||||
2. Install the dependencies with `pip install -r requirements.txt`
|
|
||||||
4. Run the server with `uvicorn main:app --reload`
|
|
||||||
|
|
||||||
### Google Sheets Setup
|
|
||||||
|
|
||||||
#### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
|
|
||||||
* Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
|
|
||||||
* Create credentials -> service account -> create & continue
|
|
||||||
* Select role -> basic: editor -> done
|
|
||||||
* Click on the email you just created in the service account list
|
|
||||||
* Go to the Keys tab -> add key -> create new key -> JSON -> Create
|
|
||||||
|
|
||||||
#### Using the key in the repo
|
|
||||||
* Copy the key file into the JobSpy repo as `client_secret.json`
|
|
||||||
* Go to [my template sheet](https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing): File -> Make a Copy -> Rename to JobSpy
|
|
||||||
* Share the Google sheet with the email located in the field `client_email` in the `client_secret.json` above with editor rights
|
|
||||||
* If you changed the name of the sheet:
|
|
||||||
- Python install: add `.env` in the repo and add `GSHEET_NAME` param with the sheet name as the value, e.g. `GSHEET_NAME=CustomName`
|
|
||||||
- Docker install: use custom param `-e GSHEET_NAME=CustomName` in `docker run` (see above)
|
|
||||||
|
|
||||||
### How to call the API
|
|
||||||
|
|
||||||
#### [Postman](https://www.postman.com/downloads/) (preferred):
|
|
||||||
To use Postman:
|
|
||||||
1. Locate the files in the `/postman/` directory.
|
|
||||||
2. Import the Postman collection and environment JSON files.
|
|
||||||
|
|
||||||
#### Swagger UI:
|
|
||||||
Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
|
|
||||||
|
|
||||||
## FAQ
|
|
||||||
|
|
||||||
### I'm having issues with my queries. What should I do?
|
|
||||||
|
|
||||||
Try reducing the number of `results_wanted` and/or broadening the filters. If issues still persist, feel free to submit an issue.
|
|
||||||
|
|
||||||
### I'm getting response code 429. What should I do?
|
|
||||||
You have been blocked by the job board site for sending too many requests. Wait a couple seconds or use a VPN.
|
|
||||||
|
|
||||||
### How to enable auth?
|
|
||||||
|
|
||||||
Change `AUTH_REQUIRED` in `/settings.py` to `True`
|
|
||||||
|
|
||||||
The auth uses [supabase](https://supabase.com). Create a project with a `users` table and disable RLS.
|
|
||||||
|
|
||||||
<img src="https://github.com/cullenwatson/jobspy/assets/78247585/03af18e1-5386-49ad-a2cf-d34232d9d747" width="500">
|
|
||||||
|
|
||||||
Add these three environment variables:
|
|
||||||
|
|
||||||
- `SUPABASE_URL`: go to project settings -> API -> Project URL
|
|
||||||
- `SUPABASE_KEY`: go to project settings -> API -> service_role secret
|
|
||||||
- `JWT_SECRET_KEY` - type `openssl rand -hex 32` in terminal to create a 32 byte secret key
|
|
||||||
|
|
||||||
Use these endpoints to register and get an access token:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
from fastapi import APIRouter
|
|
||||||
from api.auth import router as auth_router
|
|
||||||
from .v1 import router as v1_router
|
|
||||||
|
|
||||||
router = APIRouter(
|
|
||||||
prefix="/api",
|
|
||||||
)
|
|
||||||
router.include_router(v1_router)
|
|
||||||
router.include_router(auth_router)
|
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
from fastapi import APIRouter
|
|
||||||
|
|
||||||
from api.auth.token import router as token_router
|
|
||||||
from api.auth.register import router as register_router
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
|
||||||
router.include_router(token_router)
|
|
||||||
router.include_router(register_router)
|
|
||||||
@@ -1,65 +0,0 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from jose import jwt, JWTError
|
|
||||||
from fastapi import HTTPException, status, Depends
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
|
|
||||||
from api.core.users import TokenData
|
|
||||||
from api.auth.db_utils import UserInDB, get_user
|
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict) -> str:
|
|
||||||
"""
|
|
||||||
Creates a JWT token based on the data provided.
|
|
||||||
:param data
|
|
||||||
:return: encoded_jwt
|
|
||||||
"""
|
|
||||||
to_encode = data.copy()
|
|
||||||
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
|
||||||
to_encode.update({"exp": expire})
|
|
||||||
encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
return encoded_jwt
|
|
||||||
|
|
||||||
|
|
||||||
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
|
||||||
"""
|
|
||||||
Returns the current user associated with the provided JWT token.
|
|
||||||
:param token
|
|
||||||
:raises HTTPException: If the token is invalid or the user does not exist.
|
|
||||||
:return: The UserInDB instance associated with the token.
|
|
||||||
"""
|
|
||||||
credential_exception = HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Could not validate credentials",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
|
|
||||||
username: str = payload.get("sub")
|
|
||||||
if username is None:
|
|
||||||
raise credential_exception
|
|
||||||
token_data = TokenData(username=username)
|
|
||||||
except JWTError:
|
|
||||||
raise credential_exception
|
|
||||||
|
|
||||||
current_user = get_user(token_data.username)
|
|
||||||
if current_user is None:
|
|
||||||
raise credential_exception
|
|
||||||
return current_user
|
|
||||||
|
|
||||||
|
|
||||||
async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
|
|
||||||
"""
|
|
||||||
Returns the current user if the user account is active.
|
|
||||||
|
|
||||||
:param current_user: A UserInDB instance representing the current user.
|
|
||||||
:raises HTTPException: If the user account is inactive.
|
|
||||||
:return: The UserInDB instance if the user account is active.
|
|
||||||
"""
|
|
||||||
if current_user.disabled:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
|
|
||||||
)
|
|
||||||
return current_user
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from passlib.context import CryptContext
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from fastapi import HTTPException, status
|
|
||||||
|
|
||||||
from api.core.users import UserInDB
|
|
||||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
||||||
if SUPABASE_URL:
|
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
||||||
|
|
||||||
|
|
||||||
def create_user(user_create: UserInDB):
|
|
||||||
"""
|
|
||||||
Creates a new user record in the 'users' table in Supabase.
|
|
||||||
|
|
||||||
:param user_create: The data of the user to be created.
|
|
||||||
:raises HTTPException: If an error occurs while creating the user.
|
|
||||||
:return: The result of the insert operation.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").insert(user_create.dict()).execute()
|
|
||||||
print(f"Insert result: {result}")
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail=f"User could not be created due to {result['error']['message']}",
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def get_user(username: str) -> Optional[UserInDB]:
|
|
||||||
"""
|
|
||||||
Retrieves a user from the 'users' table by their username.
|
|
||||||
|
|
||||||
:param username: The username of the user to retrieve.
|
|
||||||
:return: The user data if found, otherwise None.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").select().eq("username", username).execute()
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
print(f"Error: {result['error']['message']}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if result["data"]:
|
|
||||||
user_data = result["data"][0]
|
|
||||||
return UserInDB(**user_data)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def verify_password(password: str, hashed_password: str) -> bool:
|
|
||||||
"""
|
|
||||||
Verifies a password against a hashed password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to verify.
|
|
||||||
:param hashed_password: The hashed password to compare against.
|
|
||||||
:return: True if the password matches the hashed password, otherwise False.
|
|
||||||
"""
|
|
||||||
return pwd_context.verify(password, hashed_password)
|
|
||||||
|
|
||||||
|
|
||||||
def get_password_hash(password: str) -> str:
|
|
||||||
"""
|
|
||||||
Hashes a password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to hash.
|
|
||||||
:return: The hashed password
|
|
||||||
"""
|
|
||||||
return pwd_context.hash(password)
|
|
||||||
|
|
||||||
|
|
||||||
def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
|
|
||||||
"""
|
|
||||||
Authenticates a user based on their username and password.
|
|
||||||
|
|
||||||
:param username: The username of the user to authenticate.
|
|
||||||
:param password: The plaintext password to authenticate.
|
|
||||||
:return: The authenticated user if the username and password are correct, otherwise False.
|
|
||||||
"""
|
|
||||||
user = get_user(username)
|
|
||||||
if not user:
|
|
||||||
return False
|
|
||||||
if not verify_password(password, user.hashed_password):
|
|
||||||
return False
|
|
||||||
return user
|
|
||||||
@@ -1,33 +0,0 @@
|
|||||||
from fastapi import APIRouter, HTTPException, status
|
|
||||||
from api.core.users import UserCreate, UserInDB
|
|
||||||
from api.auth.db_utils import get_user, get_password_hash, create_user
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/register")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=dict)
|
|
||||||
async def register_new_user(user: UserCreate) -> dict:
|
|
||||||
"""
|
|
||||||
Creates new user
|
|
||||||
:param user:
|
|
||||||
:raises HTTPException: If the username already exists.
|
|
||||||
:return: A dictionary containing a detail key with a success message.
|
|
||||||
"""
|
|
||||||
existing_user = get_user(user.username)
|
|
||||||
if existing_user is not None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Username already exists",
|
|
||||||
)
|
|
||||||
|
|
||||||
hashed_password = get_password_hash(user.password)
|
|
||||||
user_create = UserInDB(
|
|
||||||
username=user.username,
|
|
||||||
email=user.email,
|
|
||||||
full_name=user.full_name,
|
|
||||||
hashed_password=hashed_password,
|
|
||||||
disabled=False,
|
|
||||||
)
|
|
||||||
create_user(user_create)
|
|
||||||
|
|
||||||
return {"detail": "User created successfully"}
|
|
||||||
@@ -1,30 +0,0 @@
|
|||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
|
||||||
from fastapi.security import OAuth2PasswordRequestForm
|
|
||||||
|
|
||||||
from api.core.users import Token
|
|
||||||
from api.auth.db_utils import authenticate_user
|
|
||||||
from api.auth.auth_utils import create_access_token
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/token")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=Token)
|
|
||||||
async def login_for_access_token(
|
|
||||||
form_data: OAuth2PasswordRequestForm = Depends(),
|
|
||||||
) -> Token:
|
|
||||||
"""
|
|
||||||
Authenticates a user and provides an access token.
|
|
||||||
:param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
|
|
||||||
:raises HTTPException: If the user cannot be authenticated.
|
|
||||||
:return: A Token object containing the access token and the token type.
|
|
||||||
"""
|
|
||||||
user = authenticate_user(form_data.username, form_data.password)
|
|
||||||
if not user:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Incorrect username or password",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
access_token = create_access_token(data={"sub": user.username})
|
|
||||||
return Token(access_token=access_token, token_type="bearer")
|
|
||||||
@@ -1,7 +0,0 @@
|
|||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class OutputFormat(Enum):
|
|
||||||
CSV = "csv"
|
|
||||||
JSON = "json"
|
|
||||||
GSHEET = "gsheet"
|
|
||||||
@@ -1,133 +0,0 @@
|
|||||||
import gspread
|
|
||||||
from oauth2client.service_account import ServiceAccountCredentials
|
|
||||||
|
|
||||||
import csv
|
|
||||||
from io import StringIO
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from ...jobs import *
|
|
||||||
from ...scrapers import *
|
|
||||||
from settings import *
|
|
||||||
|
|
||||||
|
|
||||||
class CSVFormatter:
|
|
||||||
@staticmethod
|
|
||||||
def fetch_job_urls(credentials: Any) -> set:
|
|
||||||
"""
|
|
||||||
Fetches all the job urls from the google sheet to prevent duplicates
|
|
||||||
:param credentials:
|
|
||||||
:return: urls
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
gc = gspread.authorize(credentials)
|
|
||||||
sh = gc.open(GSHEET_NAME)
|
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
|
||||||
data = worksheet.get_all_values()
|
|
||||||
job_urls = set()
|
|
||||||
for row in data[1:]:
|
|
||||||
job_urls.add(row[3])
|
|
||||||
return job_urls
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def upload_to_google_sheet(csv_data: str):
|
|
||||||
"""
|
|
||||||
Appends rows to google sheet
|
|
||||||
:param csv_data:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
scope = [
|
|
||||||
"https://www.googleapis.com/auth/spreadsheets",
|
|
||||||
"https://www.googleapis.com/auth/drive.file",
|
|
||||||
"https://www.googleapis.com/auth/drive",
|
|
||||||
]
|
|
||||||
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
|
||||||
"client_secret.json", scope
|
|
||||||
)
|
|
||||||
gc = gspread.authorize(credentials)
|
|
||||||
sh = gc.open(GSHEET_NAME)
|
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
|
||||||
data_string = csv_data.getvalue()
|
|
||||||
reader = csv.reader(StringIO(data_string))
|
|
||||||
|
|
||||||
job_urls = CSVFormatter.fetch_job_urls(credentials)
|
|
||||||
|
|
||||||
rows = list(reader)
|
|
||||||
|
|
||||||
for i, row in enumerate(rows):
|
|
||||||
if i == 0:
|
|
||||||
continue
|
|
||||||
if row[4] in job_urls:
|
|
||||||
continue
|
|
||||||
|
|
||||||
row[6] = format(int(row[6]), ",d") if row[6] else ""
|
|
||||||
row[7] = format(int(row[7]), ",d") if row[7] else ""
|
|
||||||
worksheet.append_row(row)
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def generate_filename() -> str:
|
|
||||||
"""
|
|
||||||
Adds a timestamp to the filename header
|
|
||||||
:return: filename
|
|
||||||
"""
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
return f"JobSpy_results_{timestamp}.csv"
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format(jobs: CommonResponse) -> StringIO:
|
|
||||||
"""
|
|
||||||
Transfomr the jobs objects into csv
|
|
||||||
:param jobs:
|
|
||||||
:return: csv
|
|
||||||
"""
|
|
||||||
output = StringIO()
|
|
||||||
writer = csv.writer(output)
|
|
||||||
|
|
||||||
headers = [
|
|
||||||
"Title",
|
|
||||||
"Company Name",
|
|
||||||
"City",
|
|
||||||
"State",
|
|
||||||
"Job Type",
|
|
||||||
"Pay Cycle",
|
|
||||||
"Min Amount",
|
|
||||||
"Max Amount",
|
|
||||||
"Date Posted",
|
|
||||||
"Description",
|
|
||||||
"Job URL",
|
|
||||||
]
|
|
||||||
writer.writerow(headers)
|
|
||||||
|
|
||||||
for site, job_response in jobs.dict().items():
|
|
||||||
if isinstance(job_response, dict) and job_response.get("success"):
|
|
||||||
for job in job_response["jobs"]:
|
|
||||||
writer.writerow(
|
|
||||||
[
|
|
||||||
job["title"],
|
|
||||||
job["company_name"],
|
|
||||||
job["location"]["city"],
|
|
||||||
job["location"]["state"],
|
|
||||||
job["job_type"].value if job.get("job_type") else "",
|
|
||||||
job["compensation"]["interval"].value
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job["compensation"]["min_amount"]
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job["compensation"]["max_amount"]
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job.get("date_posted", ""),
|
|
||||||
job["description"],
|
|
||||||
job["job_url"],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
output.seek(0)
|
|
||||||
return output
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class User(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
disabled: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class UserCreate(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
password: str
|
|
||||||
|
|
||||||
|
|
||||||
class UserInDB(User):
|
|
||||||
hashed_password: str
|
|
||||||
|
|
||||||
|
|
||||||
class TokenData(BaseModel):
|
|
||||||
username: str
|
|
||||||
|
|
||||||
|
|
||||||
class Token(BaseModel):
|
|
||||||
access_token: str
|
|
||||||
token_type: str
|
|
||||||
@@ -1,11 +0,0 @@
|
|||||||
from fastapi import APIRouter, Depends
|
|
||||||
from .jobs import router as jobs_router
|
|
||||||
from api.auth.auth_utils import get_active_current_user
|
|
||||||
from settings import AUTH_REQUIRED
|
|
||||||
|
|
||||||
if AUTH_REQUIRED:
|
|
||||||
router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
|
|
||||||
else:
|
|
||||||
router = APIRouter(prefix="/v1")
|
|
||||||
|
|
||||||
router.include_router(jobs_router)
|
|
||||||
@@ -1,68 +0,0 @@
|
|||||||
import io
|
|
||||||
from fastapi import APIRouter
|
|
||||||
from fastapi.responses import StreamingResponse
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
from api.core.scrapers.indeed import IndeedScraper
|
|
||||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
|
||||||
from api.core.scrapers.linkedin import LinkedInScraper
|
|
||||||
from api.core.formatters.csv import CSVFormatter
|
|
||||||
from api.core.scrapers import (
|
|
||||||
ScraperInput,
|
|
||||||
Site,
|
|
||||||
JobResponse,
|
|
||||||
OutputFormat,
|
|
||||||
CommonResponse,
|
|
||||||
)
|
|
||||||
from typing import List, Dict, Tuple, Union
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
|
||||||
|
|
||||||
SCRAPER_MAPPING = {
|
|
||||||
Site.LINKEDIN: LinkedInScraper,
|
|
||||||
Site.INDEED: IndeedScraper,
|
|
||||||
Site.ZIP_RECRUITER: ZipRecruiterScraper,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/")
|
|
||||||
async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
|
|
||||||
"""
|
|
||||||
Asynchronously scrapes job data from multiple job sites.
|
|
||||||
:param scraper_input:
|
|
||||||
:return: scraper_response
|
|
||||||
"""
|
|
||||||
|
|
||||||
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
|
|
||||||
scraper_class = SCRAPER_MAPPING[site]
|
|
||||||
scraper = scraper_class()
|
|
||||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
|
||||||
return (site.value, scraped_data)
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
||||||
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
|
||||||
scraper_response = CommonResponse(status="JSON response success", **results)
|
|
||||||
|
|
||||||
if scraper_input.output_format == OutputFormat.CSV:
|
|
||||||
csv_output = CSVFormatter.format(scraper_response)
|
|
||||||
response = StreamingResponse(csv_output, media_type="text/csv")
|
|
||||||
response.headers[
|
|
||||||
"Content-Disposition"
|
|
||||||
] = f"attachment; filename={CSVFormatter.generate_filename()}"
|
|
||||||
return response
|
|
||||||
|
|
||||||
elif scraper_input.output_format == OutputFormat.GSHEET:
|
|
||||||
csv_output = CSVFormatter.format(scraper_response)
|
|
||||||
try:
|
|
||||||
CSVFormatter.upload_to_google_sheet(csv_output)
|
|
||||||
return CommonResponse(
|
|
||||||
status="Successfully uploaded to Google Sheets", **results
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return CommonResponse(
|
|
||||||
status="Failed to upload to Google Sheet", error=repr(e), **results
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
|
||||||
return scraper_response
|
|
||||||
121
jobscrape/__init__.py
Normal file
121
jobscrape/__init__.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from typing import List, Dict, Tuple, Union
|
||||||
|
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from .core.jobs import JobType
|
||||||
|
from .core.scrapers.indeed import IndeedScraper
|
||||||
|
from .core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||||
|
from .core.scrapers.linkedin import LinkedInScraper
|
||||||
|
from .core.scrapers import (
|
||||||
|
ScraperInput,
|
||||||
|
Site,
|
||||||
|
JobResponse,
|
||||||
|
CommonResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
SCRAPER_MAPPING = {
|
||||||
|
Site.LINKEDIN: LinkedInScraper,
|
||||||
|
Site.INDEED: IndeedScraper,
|
||||||
|
Site.ZIP_RECRUITER: ZipRecruiterScraper,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _map_str_to_site(site_name: str) -> Site:
|
||||||
|
return Site[site_name.upper()]
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_jobs(
|
||||||
|
site_name: str | Site | List[Site],
|
||||||
|
search_term: str,
|
||||||
|
|
||||||
|
location: str = "",
|
||||||
|
distance: int = None,
|
||||||
|
is_remote: bool = False,
|
||||||
|
job_type: JobType = None,
|
||||||
|
easy_apply: bool = False, # linkedin
|
||||||
|
results_wanted: int = 15
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Asynchronously scrapes job data from multiple job sites.
|
||||||
|
:return: results_wanted: pandas dataframe containing job data
|
||||||
|
"""
|
||||||
|
|
||||||
|
if type(site_name) == str:
|
||||||
|
site_name = _map_str_to_site(site_name)
|
||||||
|
|
||||||
|
site_type = [site_name] if type(site_name) == Site else site_name
|
||||||
|
scraper_input = ScraperInput(
|
||||||
|
site_type=site_type,
|
||||||
|
search_term=search_term,
|
||||||
|
location=location,
|
||||||
|
distance=distance,
|
||||||
|
is_remote=is_remote,
|
||||||
|
job_type=job_type,
|
||||||
|
easy_apply=easy_apply,
|
||||||
|
results_wanted=results_wanted,
|
||||||
|
)
|
||||||
|
|
||||||
|
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
|
||||||
|
scraper_class = SCRAPER_MAPPING[site]
|
||||||
|
scraper = scraper_class()
|
||||||
|
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||||
|
|
||||||
|
return site.value, scraped_data
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for site in scraper_input.site_type:
|
||||||
|
site_value, scraped_data = scrape_site(site)
|
||||||
|
results[site_value] = scraped_data
|
||||||
|
|
||||||
|
dfs = []
|
||||||
|
|
||||||
|
for site, job_response in results.items():
|
||||||
|
for job in job_response.jobs:
|
||||||
|
data = job.dict()
|
||||||
|
data['site'] = site
|
||||||
|
|
||||||
|
# Formatting JobType
|
||||||
|
data['job_type'] = data['job_type'].value if data['job_type'] else None
|
||||||
|
|
||||||
|
# Formatting Location
|
||||||
|
location_obj = data.get('location')
|
||||||
|
if location_obj and isinstance(location_obj, dict):
|
||||||
|
data['city'] = location_obj.get('city', '')
|
||||||
|
data['state'] = location_obj.get('state', '')
|
||||||
|
data['country'] = location_obj.get('country', 'USA')
|
||||||
|
else:
|
||||||
|
data['city'] = None
|
||||||
|
data['state'] = None
|
||||||
|
data['country'] = None
|
||||||
|
|
||||||
|
# Formatting Compensation
|
||||||
|
compensation_obj = data.get('compensation')
|
||||||
|
if compensation_obj and isinstance(compensation_obj, dict):
|
||||||
|
data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
|
||||||
|
data['min_amount'] = compensation_obj.get('min_amount')
|
||||||
|
data['max_amount'] = compensation_obj.get('max_amount')
|
||||||
|
data['currency'] = compensation_obj.get('currency', 'USD')
|
||||||
|
else:
|
||||||
|
data['interval'] = None
|
||||||
|
data['min_amount'] = None
|
||||||
|
data['max_amount'] = None
|
||||||
|
data['currency'] = None
|
||||||
|
|
||||||
|
job_df = pd.DataFrame([data])
|
||||||
|
dfs.append(job_df)
|
||||||
|
|
||||||
|
if dfs:
|
||||||
|
df = pd.concat(dfs, ignore_index=True)
|
||||||
|
desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
|
||||||
|
'interval', 'min_amount', 'max_amount', 'job_url', 'description',]
|
||||||
|
df = df[desired_order]
|
||||||
|
else:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from typing import Union
|
from typing import Union, Optional
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
@@ -19,10 +19,11 @@ class JobType(Enum):
|
|||||||
VOLUNTEER = "volunteer"
|
VOLUNTEER = "volunteer"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Location(BaseModel):
|
class Location(BaseModel):
|
||||||
country: str = "USA"
|
country: str = "USA"
|
||||||
city: str = None
|
city: str = None
|
||||||
state: str = None
|
state: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class CompensationInterval(Enum):
|
class CompensationInterval(Enum):
|
||||||
@@ -35,8 +36,8 @@ class CompensationInterval(Enum):
|
|||||||
|
|
||||||
class Compensation(BaseModel):
|
class Compensation(BaseModel):
|
||||||
interval: CompensationInterval
|
interval: CompensationInterval
|
||||||
min_amount: int
|
min_amount: int = None
|
||||||
max_amount: int
|
max_amount: int = None
|
||||||
currency: str = "USD"
|
currency: str = "USD"
|
||||||
|
|
||||||
|
|
||||||
@@ -44,11 +45,11 @@ class JobPost(BaseModel):
|
|||||||
title: str
|
title: str
|
||||||
company_name: str
|
company_name: str
|
||||||
job_url: str
|
job_url: str
|
||||||
location: Location
|
location: Optional[Location]
|
||||||
|
|
||||||
description: str = None
|
description: str = None
|
||||||
job_type: JobType = None
|
job_type: Optional[JobType] = None
|
||||||
compensation: Compensation = None
|
compensation: Optional[Compensation] = None
|
||||||
date_posted: date = None
|
date_posted: date = None
|
||||||
|
|
||||||
|
|
||||||
@@ -56,7 +57,7 @@ class JobResponse(BaseModel):
|
|||||||
success: bool
|
success: bool
|
||||||
error: str = None
|
error: str = None
|
||||||
|
|
||||||
total_results: int = None
|
total_results: Optional[int] = None
|
||||||
|
|
||||||
jobs: list[JobPost] = []
|
jobs: list[JobPost] = []
|
||||||
|
|
||||||
@@ -64,6 +65,11 @@ class JobResponse(BaseModel):
|
|||||||
|
|
||||||
@validator("returned_results", pre=True, always=True)
|
@validator("returned_results", pre=True, always=True)
|
||||||
def set_returned_results(cls, v, values):
|
def set_returned_results(cls, v, values):
|
||||||
if v is None and values.get("jobs"):
|
jobs_list = values.get("jobs")
|
||||||
return len(values["jobs"])
|
|
||||||
|
if v is None:
|
||||||
|
if jobs_list is not None:
|
||||||
|
return len(jobs_list)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
return v
|
return v
|
||||||
@@ -1,5 +1,4 @@
|
|||||||
from ..jobs import *
|
from ..jobs import Enum, BaseModel, JobType, JobResponse
|
||||||
from ..formatters import OutputFormat
|
|
||||||
from typing import List, Dict, Optional, Any
|
from typing import List, Dict, Optional, Any
|
||||||
|
|
||||||
|
|
||||||
@@ -17,12 +16,11 @@ class Site(Enum):
|
|||||||
class ScraperInput(BaseModel):
|
class ScraperInput(BaseModel):
|
||||||
site_type: List[Site]
|
site_type: List[Site]
|
||||||
search_term: str
|
search_term: str
|
||||||
output_format: OutputFormat = OutputFormat.JSON
|
|
||||||
|
|
||||||
location: str = None
|
location: str = None
|
||||||
distance: int = None
|
distance: Optional[int] = None
|
||||||
is_remote: bool = False
|
is_remote: bool = False
|
||||||
job_type: JobType = None
|
job_type: Optional[JobType] = None
|
||||||
easy_apply: bool = None # linkedin
|
easy_apply: bool = None # linkedin
|
||||||
|
|
||||||
results_wanted: int = 15
|
results_wanted: int = 15
|
||||||
@@ -1,22 +1,18 @@
|
|||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
import json
|
import json
|
||||||
from typing import Optional, Tuple, List
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from typing import Optional, Tuple, List
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
|
||||||
|
|
||||||
from api.core.jobs import *
|
|
||||||
from api.core.jobs import JobPost
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
|
||||||
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
import math
|
|
||||||
import traceback
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
import sys
|
from .. import Scraper, ScraperInput, Site, StatusException
|
||||||
|
|
||||||
|
|
||||||
class ParsingException(Exception):
|
class ParsingException(Exception):
|
||||||
@@ -66,8 +62,8 @@ class IndeedScraper(Scraper):
|
|||||||
response = session.get(self.url + "/jobs", params=params)
|
response = session.get(self.url + "/jobs", params=params)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
response.status_code != status.HTTP_200_OK
|
response.status_code != 200
|
||||||
and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
|
and response.status_code != 307
|
||||||
):
|
):
|
||||||
raise StatusException(response.status_code)
|
raise StatusException(response.status_code)
|
||||||
|
|
||||||
@@ -131,7 +127,6 @@ class IndeedScraper(Scraper):
|
|||||||
location=Location(
|
location=Location(
|
||||||
city=job.get("jobLocationCity"),
|
city=job.get("jobLocationCity"),
|
||||||
state=job.get("jobLocationState"),
|
state=job.get("jobLocationState"),
|
||||||
postal_code=job.get("jobLocationPostal"),
|
|
||||||
),
|
),
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
compensation=compensation,
|
compensation=compensation,
|
||||||
@@ -140,9 +135,11 @@ class IndeedScraper(Scraper):
|
|||||||
)
|
)
|
||||||
return job_post
|
return job_post
|
||||||
|
|
||||||
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
job_post = process_job(job)
|
job_results: list[Future] = [executor.submit(process_job, job) for job in
|
||||||
job_list.append(job_post)
|
jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
|
||||||
|
|
||||||
|
job_list = [result.result() for result in job_results if result.result()]
|
||||||
|
|
||||||
return job_list, total_num_jobs
|
return job_list, total_num_jobs
|
||||||
|
|
||||||
@@ -4,10 +4,9 @@ from datetime import datetime
|
|||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
from .. import Scraper, ScraperInput, Site
|
||||||
from api.core.jobs import *
|
from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
|
||||||
|
|
||||||
|
|
||||||
class LinkedInScraper(Scraper):
|
class LinkedInScraper(Scraper):
|
||||||
@@ -59,7 +58,7 @@ class LinkedInScraper(Scraper):
|
|||||||
f"{self.url}/jobs/search", params=params, allow_redirects=True
|
f"{self.url}/jobs/search", params=params, allow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != status.HTTP_200_OK:
|
if response.status_code != 200:
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
error=f"Response returned {response.status_code}",
|
error=f"Response returned {response.status_code}",
|
||||||
@@ -118,6 +117,7 @@ class LinkedInScraper(Scraper):
|
|||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url,
|
job_url=job_url,
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
|
compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
|
||||||
)
|
)
|
||||||
job_list.append(job_post)
|
job_list.append(job_post)
|
||||||
if (
|
if (
|
||||||
@@ -185,7 +185,6 @@ class LinkedInScraper(Scraper):
|
|||||||
employment_type = employment_type_span.get_text(strip=True)
|
employment_type = employment_type_span.get_text(strip=True)
|
||||||
employment_type = employment_type.lower()
|
employment_type = employment_type.lower()
|
||||||
employment_type = employment_type.replace("-", "")
|
employment_type = employment_type.replace("-", "")
|
||||||
print(employment_type)
|
|
||||||
|
|
||||||
return JobType(employment_type)
|
return JobType(employment_type)
|
||||||
|
|
||||||
@@ -1,18 +1,17 @@
|
|||||||
import math
|
import math
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, Tuple, List
|
from typing import Optional, Tuple, List
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
from fastapi import status
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
|
|
||||||
from api.core.jobs import JobPost
|
from .. import Scraper, ScraperInput, Site, StatusException
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
from api.core.jobs import *
|
|
||||||
|
|
||||||
|
|
||||||
class ZipRecruiterScraper(Scraper):
|
class ZipRecruiterScraper(Scraper):
|
||||||
@@ -26,9 +25,12 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
|
|
||||||
self.jobs_per_page = 20
|
self.jobs_per_page = 20
|
||||||
self.seen_urls = set()
|
self.seen_urls = set()
|
||||||
|
self.session = tls_client.Session(
|
||||||
|
client_identifier="chrome112", random_tls_extension_order=True
|
||||||
|
)
|
||||||
|
|
||||||
def scrape_page(
|
def scrape_page(
|
||||||
self, scraper_input: ScraperInput, page: int, session: tls_client.Session
|
self, scraper_input: ScraperInput, page: int
|
||||||
) -> tuple[list[JobPost], int | None]:
|
) -> tuple[list[JobPost], int | None]:
|
||||||
"""
|
"""
|
||||||
Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
|
Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
|
||||||
@@ -52,91 +54,47 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
params = {
|
params = {
|
||||||
"search": scraper_input.search_term,
|
"search": scraper_input.search_term,
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
"radius": scraper_input.distance,
|
|
||||||
"refine_by_location_type": "only_remote"
|
|
||||||
if scraper_input.is_remote
|
|
||||||
else None,
|
|
||||||
"refine_by_employment": f"employment_type:employment_type:{job_type_value}"
|
|
||||||
if job_type_value
|
|
||||||
else None,
|
|
||||||
"page": page,
|
"page": page,
|
||||||
|
"form": "jobs-landing"
|
||||||
}
|
}
|
||||||
|
|
||||||
response = session.get(
|
if scraper_input.is_remote:
|
||||||
|
params["refine_by_location_type"] = "only_remote"
|
||||||
|
|
||||||
|
if scraper_input.distance:
|
||||||
|
params["radius"] = scraper_input.distance
|
||||||
|
|
||||||
|
if job_type_value:
|
||||||
|
params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
|
||||||
|
|
||||||
|
response = self.session.get(
|
||||||
self.url + "/jobs-search",
|
self.url + "/jobs-search",
|
||||||
headers=ZipRecruiterScraper.headers(),
|
headers=ZipRecruiterScraper.headers(),
|
||||||
params=params,
|
params=params,
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != status.HTTP_200_OK:
|
if response.status_code != 200:
|
||||||
raise StatusException(response.status_code)
|
raise StatusException(response.status_code)
|
||||||
|
|
||||||
html_string = response.content
|
html_string = response.text
|
||||||
soup = BeautifulSoup(html_string, "html.parser")
|
soup = BeautifulSoup(html_string, "html.parser")
|
||||||
|
|
||||||
if page == 1:
|
script_tag = soup.find("script", {"id": "js_variables"})
|
||||||
script_tag = soup.find("script", {"id": "js_variables"})
|
data = json.loads(script_tag.string)
|
||||||
data = json.loads(script_tag.string)
|
|
||||||
|
|
||||||
|
if page == 1:
|
||||||
job_count = int(data["totalJobCount"].replace(",", ""))
|
job_count = int(data["totalJobCount"].replace(",", ""))
|
||||||
else:
|
else:
|
||||||
job_count = None
|
job_count = None
|
||||||
|
|
||||||
job_posts = soup.find_all("div", {"class": "job_content"})
|
|
||||||
|
|
||||||
def process_job(job: Tag) -> Optional[JobPost]:
|
|
||||||
"""
|
|
||||||
Parses a job from the job content tag
|
|
||||||
:param job: BeautifulSoup Tag for one job post
|
|
||||||
:return JobPost
|
|
||||||
"""
|
|
||||||
job_url = job.find("a", {"class": "job_link"})["href"]
|
|
||||||
if job_url in self.seen_urls:
|
|
||||||
return None
|
|
||||||
|
|
||||||
title = job.find("h2", {"class": "title"}).text
|
|
||||||
company = job.find("a", {"class": "company_name"}).text.strip()
|
|
||||||
|
|
||||||
description, updated_job_url = ZipRecruiterScraper.get_description(
|
|
||||||
job_url, session
|
|
||||||
)
|
|
||||||
if updated_job_url is not None:
|
|
||||||
job_url = updated_job_url
|
|
||||||
if description is None:
|
|
||||||
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
|
||||||
|
|
||||||
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
|
||||||
if job_type_element:
|
|
||||||
job_type_text = (
|
|
||||||
job_type_element.text.strip()
|
|
||||||
.lower()
|
|
||||||
.replace("-", "")
|
|
||||||
.replace(" ", "")
|
|
||||||
)
|
|
||||||
if job_type_text == "contractor":
|
|
||||||
job_type_text = "contract"
|
|
||||||
job_type = JobType(job_type_text)
|
|
||||||
else:
|
|
||||||
job_type = None
|
|
||||||
|
|
||||||
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
|
||||||
|
|
||||||
job_post = JobPost(
|
|
||||||
title=title,
|
|
||||||
description=description,
|
|
||||||
company_name=company,
|
|
||||||
location=ZipRecruiterScraper.get_location(job),
|
|
||||||
job_type=job_type,
|
|
||||||
compensation=ZipRecruiterScraper.get_compensation(job),
|
|
||||||
date_posted=date_posted,
|
|
||||||
job_url=job_url,
|
|
||||||
)
|
|
||||||
return job_post
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
job_results: list[Future] = [
|
if "jobList" in data and data["jobList"]:
|
||||||
executor.submit(process_job, job) for job in job_posts
|
jobs_js = data["jobList"]
|
||||||
]
|
job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
|
||||||
|
else:
|
||||||
|
jobs_html = soup.find_all("div", {"class": "job_content"})
|
||||||
|
job_results = [executor.submit(self.process_job_html, job) for job in
|
||||||
|
jobs_html]
|
||||||
|
|
||||||
job_list = [result.result() for result in job_results if result.result()]
|
job_list = [result.result() for result in job_results if result.result()]
|
||||||
|
|
||||||
@@ -148,19 +106,17 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
:param scraper_input:
|
:param scraper_input:
|
||||||
:return: job_response
|
:return: job_response
|
||||||
"""
|
"""
|
||||||
session = tls_client.Session(
|
|
||||||
client_identifier="chrome112", random_tls_extension_order=True
|
|
||||||
)
|
|
||||||
|
|
||||||
pages_to_process = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
|
|
||||||
|
pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
#: get first page to initialize session
|
#: get first page to initialize session
|
||||||
job_list, total_results = self.scrape_page(scraper_input, 1, session)
|
job_list, total_results = self.scrape_page(scraper_input, 1)
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
futures: list[Future] = [
|
futures: list[Future] = [
|
||||||
executor.submit(self.scrape_page, scraper_input, page, session)
|
executor.submit(self.scrape_page, scraper_input, page)
|
||||||
for page in range(2, pages_to_process + 1)
|
for page in range(2, pages_to_process + 1)
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -169,6 +125,7 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
|
|
||||||
job_list += jobs
|
job_list += jobs
|
||||||
|
|
||||||
|
|
||||||
except StatusException as e:
|
except StatusException as e:
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
@@ -192,9 +149,129 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
)
|
)
|
||||||
return job_response
|
return job_response
|
||||||
|
|
||||||
|
def process_job_html(self, job: Tag) -> Optional[JobPost]:
|
||||||
|
"""
|
||||||
|
Parses a job from the job content tag
|
||||||
|
:param job: BeautifulSoup Tag for one job post
|
||||||
|
:return JobPost
|
||||||
|
"""
|
||||||
|
job_url = job.find("a", {"class": "job_link"})["href"]
|
||||||
|
if job_url in self.seen_urls:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = job.find("h2", {"class": "title"}).text
|
||||||
|
company = job.find("a", {"class": "company_name"}).text.strip()
|
||||||
|
|
||||||
|
description, updated_job_url = self.get_description(
|
||||||
|
job_url
|
||||||
|
)
|
||||||
|
if updated_job_url is not None:
|
||||||
|
job_url = updated_job_url
|
||||||
|
if description is None:
|
||||||
|
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
||||||
|
|
||||||
|
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
||||||
|
if job_type_element:
|
||||||
|
job_type_text = (
|
||||||
|
job_type_element.text.strip()
|
||||||
|
.lower()
|
||||||
|
.replace("-", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
)
|
||||||
|
if job_type_text == "contractor":
|
||||||
|
job_type_text = "contract"
|
||||||
|
job_type = JobType(job_type_text)
|
||||||
|
else:
|
||||||
|
job_type = None
|
||||||
|
|
||||||
|
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
||||||
|
|
||||||
|
job_post = JobPost(
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
company_name=company,
|
||||||
|
location=ZipRecruiterScraper.get_location(job),
|
||||||
|
job_type=job_type,
|
||||||
|
compensation=ZipRecruiterScraper.get_compensation(job),
|
||||||
|
date_posted=date_posted,
|
||||||
|
job_url=job_url,
|
||||||
|
)
|
||||||
|
return job_post
|
||||||
|
|
||||||
|
def process_job_js(self, job: dict) -> JobPost:
|
||||||
|
# Map the job data to the expected fields by the Pydantic model
|
||||||
|
title = job.get("Title")
|
||||||
|
description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
|
||||||
|
|
||||||
|
company = job.get("OrgName")
|
||||||
|
location = Location(city=job.get("City"), state=job.get("State"))
|
||||||
|
try:
|
||||||
|
job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
|
||||||
|
except ValueError:
|
||||||
|
# print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
formatted_salary = job.get("FormattedSalaryShort", "")
|
||||||
|
salary_parts = formatted_salary.split(" ")
|
||||||
|
|
||||||
|
min_salary_str = salary_parts[0][1:].replace(",", "")
|
||||||
|
if '.' in min_salary_str:
|
||||||
|
min_amount = int(float(min_salary_str) * 1000)
|
||||||
|
else:
|
||||||
|
min_amount = int(min_salary_str.replace("K", "000"))
|
||||||
|
|
||||||
|
if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
|
||||||
|
max_salary_str = salary_parts[2][1:].replace(",", "")
|
||||||
|
if '.' in max_salary_str:
|
||||||
|
max_amount = int(float(max_salary_str) * 1000)
|
||||||
|
else:
|
||||||
|
max_amount = int(max_salary_str.replace("K", "000"))
|
||||||
|
else:
|
||||||
|
max_amount = 0
|
||||||
|
|
||||||
|
compensation = Compensation(
|
||||||
|
interval=CompensationInterval.YEARLY,
|
||||||
|
min_amount=min_amount,
|
||||||
|
max_amount=max_amount
|
||||||
|
)
|
||||||
|
save_job_url = job.get("SaveJobURL", "")
|
||||||
|
posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
|
||||||
|
if posted_time_match:
|
||||||
|
date_time_str = posted_time_match.group(1)
|
||||||
|
date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
date_posted = date_posted_obj.date()
|
||||||
|
else:
|
||||||
|
date_posted = date.today()
|
||||||
|
job_url = job.get("JobURL")
|
||||||
|
|
||||||
|
return JobPost(
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
company_name=company,
|
||||||
|
location=location,
|
||||||
|
job_type=job_type,
|
||||||
|
compensation=compensation,
|
||||||
|
date_posted=date_posted,
|
||||||
|
job_url=job_url,
|
||||||
|
)
|
||||||
|
return job_post
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
def job_type_from_string(value: str) -> Optional[JobType]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if value.lower() == "contractor":
|
||||||
|
value = "contract"
|
||||||
|
normalized_value = value.replace("_", "")
|
||||||
|
for item in JobType:
|
||||||
|
if item.value == normalized_value:
|
||||||
|
return item
|
||||||
|
raise ValueError(f"Invalid value for JobType: {value}")
|
||||||
|
|
||||||
def get_description(
|
def get_description(
|
||||||
job_page_url: str, session: tls_client.Session
|
self,
|
||||||
|
job_page_url: str
|
||||||
) -> Tuple[Optional[str], Optional[str]]:
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Retrieves job description by going to the job page url
|
Retrieves job description by going to the job page url
|
||||||
@@ -202,7 +279,7 @@ class ZipRecruiterScraper(Scraper):
|
|||||||
:param session:
|
:param session:
|
||||||
:return: description or None, response url
|
:return: description or None, response url
|
||||||
"""
|
"""
|
||||||
response = session.get(
|
response = self.session.get(
|
||||||
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
|
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
|
||||||
)
|
)
|
||||||
if response.status_code not in range(200, 400):
|
if response.status_code not in range(200, 400):
|
||||||
16
main.py
16
main.py
@@ -1,16 +0,0 @@
|
|||||||
from fastapi import FastAPI
|
|
||||||
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from api import router as api_router
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="JobSpy Backend",
|
|
||||||
description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
|
|
||||||
version="1.0.0",
|
|
||||||
)
|
|
||||||
app.include_router(api_router)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health", tags=["health"])
|
|
||||||
async def health_check():
|
|
||||||
return {"message": "JobSpy ready to scrape"}
|
|
||||||
2435
poetry.lock
generated
Normal file
2435
poetry.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
@@ -1,15 +0,0 @@
|
|||||||
{
|
|
||||||
"id": "a7ea6d58-8dca-4216-97a9-224dadc1e18f",
|
|
||||||
"name": "JobSpy",
|
|
||||||
"values": [
|
|
||||||
{
|
|
||||||
"key": "access_token",
|
|
||||||
"value": "",
|
|
||||||
"type": "any",
|
|
||||||
"enabled": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"_postman_variable_scope": "environment",
|
|
||||||
"_postman_exported_at": "2023-07-09T23:51:36.709Z",
|
|
||||||
"_postman_exported_using": "Postman/10.15.8"
|
|
||||||
}
|
|
||||||
23
pyproject.toml
Normal file
23
pyproject.toml
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
[tool.poetry]
|
||||||
|
name = "jobscrape"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
|
authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
tls-client = "^0.2.1"
|
||||||
|
beautifulsoup4 = "^4.12.2"
|
||||||
|
pandas = "^2.1.0"
|
||||||
|
pydantic = "^2.3.0"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
pytest = "^7.4.1"
|
||||||
|
jupyter = "^1.0.0"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
||||||
@@ -1,61 +0,0 @@
|
|||||||
anyio==3.7.1
|
|
||||||
atomicwrites==1.4.1
|
|
||||||
attrs==23.1.0
|
|
||||||
bcrypt==4.0.1
|
|
||||||
beautifulsoup4==4.12.2
|
|
||||||
cachetools==5.3.1
|
|
||||||
certifi==2023.5.7
|
|
||||||
cffi==1.15.1
|
|
||||||
chardet==4.0.0
|
|
||||||
charset-normalizer==3.2.0
|
|
||||||
click==8.1.4
|
|
||||||
colorama==0.4.6
|
|
||||||
cryptography==41.0.1
|
|
||||||
dataclasses==0.6
|
|
||||||
deprecation==2.1.0
|
|
||||||
ecdsa==0.18.0
|
|
||||||
exceptiongroup==1.1.2
|
|
||||||
fastapi==0.99.1
|
|
||||||
google-auth==2.22.0
|
|
||||||
google-auth-oauthlib==1.0.0
|
|
||||||
gotrue==0.2.0
|
|
||||||
gspread==5.10.0
|
|
||||||
h11==0.14.0
|
|
||||||
httpcore==0.12.3
|
|
||||||
httplib2==0.22.0
|
|
||||||
httpx==0.16.1
|
|
||||||
idna==2.10
|
|
||||||
iniconfig==2.0.0
|
|
||||||
oauth2client==4.1.3
|
|
||||||
oauthlib==3.2.2
|
|
||||||
packaging==23.1
|
|
||||||
passlib==1.7.4
|
|
||||||
pluggy==1.2.0
|
|
||||||
postgrest-py==0.4.0
|
|
||||||
py==1.11.0
|
|
||||||
pyasn1==0.5.0
|
|
||||||
pyasn1-modules==0.3.0
|
|
||||||
pycparser==2.21
|
|
||||||
pydantic==1.10.11
|
|
||||||
pyparsing==3.1.1
|
|
||||||
pytest==6.2.5
|
|
||||||
python-dateutil==2.8.2
|
|
||||||
python-dotenv==1.0.0
|
|
||||||
python-jose==3.3.0
|
|
||||||
python-multipart==0.0.6
|
|
||||||
realtime-py==0.1.3
|
|
||||||
requests==2.25.1
|
|
||||||
requests-oauthlib==1.3.1
|
|
||||||
rfc3986==1.5.0
|
|
||||||
rsa==4.9
|
|
||||||
six==1.16.0
|
|
||||||
sniffio==1.3.0
|
|
||||||
soupsieve==2.4.1
|
|
||||||
starlette==0.27.0
|
|
||||||
supabase-py==0.0.2
|
|
||||||
tls-client==0.2.1
|
|
||||||
toml==0.10.2
|
|
||||||
typing_extensions==4.7.1
|
|
||||||
urllib3==1.26.16
|
|
||||||
uvicorn==0.22.0
|
|
||||||
websockets==9.1
|
|
||||||
14
settings.py
14
settings.py
@@ -1,14 +0,0 @@
|
|||||||
from dotenv import load_dotenv
|
|
||||||
import os
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
|
|
||||||
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
|
|
||||||
|
|
||||||
# optional autha
|
|
||||||
AUTH_REQUIRED = False
|
|
||||||
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
|
||||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
|
||||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
|
||||||
ALGORITHM = "HS256"
|
|
||||||
10
tests/test_indeed.py
Normal file
10
tests/test_indeed.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from jobscrape import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_indeed():
|
||||||
|
result = scrape_jobs(
|
||||||
|
site_name="indeed",
|
||||||
|
search_term="software engineer",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
10
tests/test_ziprecruiter.py
Normal file
10
tests/test_ziprecruiter.py
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
from jobscrape import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_ziprecruiter():
|
||||||
|
result = scrape_jobs(
|
||||||
|
site_name="zip_recruiter",
|
||||||
|
search_term="software engineer",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
||||||
Reference in New Issue
Block a user