mirror of https://github.com/Bunsly/JobSpy
Library Migration (#31)
parent
7efece8fe9
commit
153ac35248
|
@ -1,3 +0,0 @@
|
||||||
{
|
|
||||||
"experimental": "enabled"
|
|
||||||
}
|
|
|
@ -1,33 +0,0 @@
|
||||||
name: Build and Push Docker Image
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v1
|
|
||||||
|
|
||||||
- name: Login to GitHub Docker Registry
|
|
||||||
uses: docker/login-action@v1
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.DOCKER_TOKEN }}
|
|
||||||
|
|
||||||
- name: Build and Push Image
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
context: .
|
|
||||||
file: ./Dockerfile
|
|
||||||
push: true
|
|
||||||
tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest
|
|
||||||
platforms: linux/amd64,linux/arm64
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
name: Publish Python 🐍 distributions 📦 to PyPI
|
||||||
|
on: push
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-n-publish:
|
||||||
|
name: Build and publish Python 🐍 distributions 📦 to PyPI
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: "3.10"
|
||||||
|
|
||||||
|
- name: Install poetry
|
||||||
|
run: >-
|
||||||
|
python3 -m
|
||||||
|
pip install
|
||||||
|
poetry
|
||||||
|
--user
|
||||||
|
|
||||||
|
- name: Build distribution 📦
|
||||||
|
run: >-
|
||||||
|
python3 -m
|
||||||
|
poetry
|
||||||
|
build
|
||||||
|
|
||||||
|
- name: Publish distribution 📦 to PyPI
|
||||||
|
if: startsWith(github.ref, 'refs/tags')
|
||||||
|
uses: pypa/gh-action-pypi-publish@release/v1
|
||||||
|
with:
|
||||||
|
password: ${{ secrets.PYPI_API_TOKEN }}
|
|
@ -1,89 +0,0 @@
|
||||||
name: JobSpy API Tests
|
|
||||||
|
|
||||||
on: [push, pull_request]
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test_api:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Checkout repository
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Set up Python 3.10
|
|
||||||
uses: actions/setup-python@v2
|
|
||||||
with:
|
|
||||||
python-version: '3.10'
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: pip install -r requirements.txt
|
|
||||||
|
|
||||||
- name: Install jq
|
|
||||||
run: sudo apt-get install jq
|
|
||||||
|
|
||||||
- name: Start JobSpy FastAPI app
|
|
||||||
run: uvicorn main:app --host 0.0.0.0 --port 8000 &
|
|
||||||
|
|
||||||
- name: Wait for server to be up
|
|
||||||
run: |
|
|
||||||
for i in {1..10}; do
|
|
||||||
curl -s http://0.0.0.0:8000/api/v1/jobs && break || sleep 1
|
|
||||||
done
|
|
||||||
|
|
||||||
- name: Check health
|
|
||||||
run: |
|
|
||||||
health_status=$(curl -L -s -o /dev/null -w "%{http_code}" http://0.0.0.0:8000/health)
|
|
||||||
|
|
||||||
if [ "$health_status" != "200" ]; then
|
|
||||||
echo "Error: Health check failed with status code $health_status"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# not checking currently because of bad ip at Github's servers being blocked
|
|
||||||
# - name: Check HTTP status to POST /api/v1/jobs/
|
|
||||||
# run: |
|
|
||||||
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
|
|
||||||
# "site_type": ["indeed", "linkedin"],
|
|
||||||
# "search_term": "software engineer",
|
|
||||||
# "location": "austin, tx",
|
|
||||||
# "distance": 10,
|
|
||||||
# "job_type": "fulltime",
|
|
||||||
# "results_wanted": 5
|
|
||||||
# }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
|
|
||||||
#
|
|
||||||
# status_code="${response: -3}"
|
|
||||||
# echo "Received status code: $status_code"
|
|
||||||
#
|
|
||||||
# if [ "$status_code" != "200" ]; then
|
|
||||||
# echo "Error: Expected status code 200, but got $status_code"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# echo "${response::-3}" > response.json
|
|
||||||
# cat response.json
|
|
||||||
#
|
|
||||||
# - name: Check error field in response
|
|
||||||
# run: |
|
|
||||||
# global_error=$(jq '.error' response.json)
|
|
||||||
# indeed_error=$(jq '.indeed.error' response.json)
|
|
||||||
# linkedin_error=$(jq '.linkedin.error' response.json)
|
|
||||||
#
|
|
||||||
# if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
|
|
||||||
# echo "Error found in response:"
|
|
||||||
# echo "Global Error: $global_error"
|
|
||||||
# echo "Indeed Error: $indeed_error"
|
|
||||||
# echo "LinkedIn Error: $linkedin_error"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
||||||
#
|
|
||||||
# - name: Verify returned_results in response
|
|
||||||
# run: |
|
|
||||||
# indeed_results=$(jq '.indeed.returned_results' response.json)
|
|
||||||
# linkedin_results=$(jq '.linkedin.returned_results' response.json)
|
|
||||||
#
|
|
||||||
# if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
|
|
||||||
# echo "Mismatch in results_wanted and returned_results:"
|
|
||||||
# echo "Indeed: Expected 5, Got $indeed_results"
|
|
||||||
# echo "LinkedIn: Expected 5, Got $linkedin_results"
|
|
||||||
# exit 1
|
|
||||||
# fi
|
|
|
@ -5,4 +5,5 @@
|
||||||
**/__pycache__/
|
**/__pycache__/
|
||||||
*.pyc
|
*.pyc
|
||||||
.env
|
.env
|
||||||
client_secret.json
|
dist
|
||||||
|
/.ipynb_checkpoints/
|
|
@ -1,13 +0,0 @@
|
||||||
{
|
|
||||||
"version": "0.2.0",
|
|
||||||
"configurations": [
|
|
||||||
{
|
|
||||||
"name": "Python: Module",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"module": "uvicorn",
|
|
||||||
"args": ["main:app","--reload"]
|
|
||||||
}
|
|
||||||
|
|
||||||
]
|
|
||||||
}
|
|
15
Dockerfile
15
Dockerfile
|
@ -1,15 +0,0 @@
|
||||||
FROM python:3.10-slim
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . /app
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y jq && \
|
|
||||||
pip install --no-cache-dir -r requirements.txt
|
|
||||||
|
|
||||||
EXPOSE 8000
|
|
||||||
|
|
||||||
ENV PORT=8000
|
|
||||||
|
|
||||||
CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"
|
|
|
@ -0,0 +1,702 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "c3f21577-477d-451e-9914-5d67e8a89075",
|
||||||
|
"metadata": {
|
||||||
|
"scrolled": true
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>site</th>\n",
|
||||||
|
" <th>title</th>\n",
|
||||||
|
" <th>company_name</th>\n",
|
||||||
|
" <th>city</th>\n",
|
||||||
|
" <th>state</th>\n",
|
||||||
|
" <th>job_type</th>\n",
|
||||||
|
" <th>interval</th>\n",
|
||||||
|
" <th>min_amount</th>\n",
|
||||||
|
" <th>max_amount</th>\n",
|
||||||
|
" <th>job_url</th>\n",
|
||||||
|
" <th>description</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Firmware Engineer</td>\n",
|
||||||
|
" <td>Advanced Motion Controls</td>\n",
|
||||||
|
" <td>Camarillo</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>145000</td>\n",
|
||||||
|
" <td>110000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=a2e7077fdd3c...</td>\n",
|
||||||
|
" <td>We are looking for an experienced Firmware Eng...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Computer Engineer</td>\n",
|
||||||
|
" <td>Honeywell</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=5a1da623ee75...</td>\n",
|
||||||
|
" <td>Join a team recognized for leadership, innovat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Splunk</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>159500</td>\n",
|
||||||
|
" <td>116000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=155495ca3f46...</td>\n",
|
||||||
|
" <td>A little about us. Splunk is the key to enterp...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Development Operations Engineer</td>\n",
|
||||||
|
" <td>Stratacache</td>\n",
|
||||||
|
" <td>Dayton</td>\n",
|
||||||
|
" <td>OH</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>90000</td>\n",
|
||||||
|
" <td>83573</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=77cf3540c06e...</td>\n",
|
||||||
|
" <td>Stratacache, Inc. delivers in-store retail exp...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Computer Engineer</td>\n",
|
||||||
|
" <td>Honeywell</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=7fadbb7c936f...</td>\n",
|
||||||
|
" <td>Join a team recognized for leadership, innovat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>5</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Full Stack Developer</td>\n",
|
||||||
|
" <td>Reinventing Geospatial, Inc. (RGi)</td>\n",
|
||||||
|
" <td>Herndon</td>\n",
|
||||||
|
" <td>VA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=11b2b5b0dd44...</td>\n",
|
||||||
|
" <td>Job Highlights As a Full Stack Software Engine...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>6</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Workiva</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>134000</td>\n",
|
||||||
|
" <td>79000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=ec3ab6eb9253...</td>\n",
|
||||||
|
" <td>Are you ready to embark on an exciting journey...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>7</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Senior Software Engineer</td>\n",
|
||||||
|
" <td>SciTec</td>\n",
|
||||||
|
" <td>Boulder</td>\n",
|
||||||
|
" <td>CO</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>164000</td>\n",
|
||||||
|
" <td>93000</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=781e4cf0cf6d...</td>\n",
|
||||||
|
" <td>SciTec has been awarded multiple government co...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>8</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Microsoft</td>\n",
|
||||||
|
" <td></td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>182600</td>\n",
|
||||||
|
" <td>94300</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=21e05b9e9d96...</td>\n",
|
||||||
|
" <td>At Microsoft we are seeking people who have a ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>9</th>\n",
|
||||||
|
" <td>indeed</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Avalon Healthcare Solutions</td>\n",
|
||||||
|
" <td>Remote</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.indeed.com/viewjob?jk=da35b9bb74a0...</td>\n",
|
||||||
|
" <td>Avalon Healthcare Solutions, headquartered in ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>10</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Fieldguide</td>\n",
|
||||||
|
" <td>San Francisco</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3696158160</td>\n",
|
||||||
|
" <td>About us:Fieldguide is establishing a new stat...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>11</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Sunnyvale</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3693012711</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>12</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Edwards</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3700669785</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>13</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701775201</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>14</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701772329</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>15</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701769637</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>16</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>SpiderOak</td>\n",
|
||||||
|
" <td>Austin</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3707174719</td>\n",
|
||||||
|
" <td>We're only as strong as our weakest link.In th...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>17</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer - Early Career</td>\n",
|
||||||
|
" <td>Lockheed Martin</td>\n",
|
||||||
|
" <td>Fort Worth</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3701770659</td>\n",
|
||||||
|
" <td>Description:By bringing together people that u...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>18</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Full-Stack Software Engineer</td>\n",
|
||||||
|
" <td>Rain</td>\n",
|
||||||
|
" <td>New York</td>\n",
|
||||||
|
" <td>NY</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3696158877</td>\n",
|
||||||
|
" <td>Rain’s mission is to create the fastest and ea...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>19</th>\n",
|
||||||
|
" <td>linkedin</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Nike</td>\n",
|
||||||
|
" <td>Portland</td>\n",
|
||||||
|
" <td>OR</td>\n",
|
||||||
|
" <td>contract</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://www.linkedin.com/jobs/view/3693340247</td>\n",
|
||||||
|
" <td>Work options: FlexibleWe consider remote, on-p...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>20</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>(USA) Software Engineer III - Prototype Engine...</td>\n",
|
||||||
|
" <td>Walmart</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>None</td>\n",
|
||||||
|
" <td>https://click.appcast.io/track/hcgsw4k?cs=ngp&...</td>\n",
|
||||||
|
" <td>We are currently seeking a highly skilled and ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>21</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Engineer - New Grad</td>\n",
|
||||||
|
" <td>ZipRecruiter</td>\n",
|
||||||
|
" <td>Santa Monica</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>130000</td>\n",
|
||||||
|
" <td>150000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
|
||||||
|
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>22</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer</td>\n",
|
||||||
|
" <td>Robert Half</td>\n",
|
||||||
|
" <td>Corpus Christi</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>115000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
|
||||||
|
" <td>Robert Half has an opening for a Software Deve...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>23</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Engineer</td>\n",
|
||||||
|
" <td>Advantage Technical</td>\n",
|
||||||
|
" <td>Ontario</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>100000</td>\n",
|
||||||
|
" <td>150000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/advantage-te...</td>\n",
|
||||||
|
" <td>New career opportunity available with major Ma...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>24</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer</td>\n",
|
||||||
|
" <td>Robert Half</td>\n",
|
||||||
|
" <td>Tucson</td>\n",
|
||||||
|
" <td>AZ</td>\n",
|
||||||
|
" <td>temporary</td>\n",
|
||||||
|
" <td>hourly</td>\n",
|
||||||
|
" <td>47</td>\n",
|
||||||
|
" <td>55</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/robert-half-...</td>\n",
|
||||||
|
" <td>Robert Half is accepting inquiries for a SQL S...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>25</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Full Stack Software Engineer</td>\n",
|
||||||
|
" <td>ZipRecruiter</td>\n",
|
||||||
|
" <td>Phoenix</td>\n",
|
||||||
|
" <td>AZ</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>145000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/ziprecruiter...</td>\n",
|
||||||
|
" <td>We offer a hybrid work environment. Most US-ba...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>26</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer IV</td>\n",
|
||||||
|
" <td>Kforce Inc.</td>\n",
|
||||||
|
" <td>Mountain View</td>\n",
|
||||||
|
" <td>CA</td>\n",
|
||||||
|
" <td>contract</td>\n",
|
||||||
|
" <td>hourly</td>\n",
|
||||||
|
" <td>55</td>\n",
|
||||||
|
" <td>75</td>\n",
|
||||||
|
" <td>https://www.kforce.com/Jobs/job.aspx?job=1696~...</td>\n",
|
||||||
|
" <td>Kforce has a client that is seeking a Software...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>27</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer | Onsite | Omaha, NE - Omaha</td>\n",
|
||||||
|
" <td>OneStaff Medical</td>\n",
|
||||||
|
" <td>Omaha</td>\n",
|
||||||
|
" <td>NE</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>60000</td>\n",
|
||||||
|
" <td>110000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/onestaff-med...</td>\n",
|
||||||
|
" <td>Company Description: We are looking for a well...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>28</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Senior Software Engineer</td>\n",
|
||||||
|
" <td>RightStaff, Inc.</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>120000</td>\n",
|
||||||
|
" <td>180000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/rightstaff-i...</td>\n",
|
||||||
|
" <td>Job Description:We are seeking a talented and ...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>29</th>\n",
|
||||||
|
" <td>zip_recruiter</td>\n",
|
||||||
|
" <td>Software Developer - .Net Core - 12886</td>\n",
|
||||||
|
" <td>Walker Elliott</td>\n",
|
||||||
|
" <td>Dallas</td>\n",
|
||||||
|
" <td>TX</td>\n",
|
||||||
|
" <td>fulltime</td>\n",
|
||||||
|
" <td>yearly</td>\n",
|
||||||
|
" <td>105000</td>\n",
|
||||||
|
" <td>130000</td>\n",
|
||||||
|
" <td>https://www.ziprecruiter.com/jobs/walker-ellio...</td>\n",
|
||||||
|
" <td>Our highly successful DFW based client has bee...</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" site title \\\n",
|
||||||
|
"0 indeed Firmware Engineer \n",
|
||||||
|
"1 indeed Computer Engineer \n",
|
||||||
|
"2 indeed Software Engineer \n",
|
||||||
|
"3 indeed Development Operations Engineer \n",
|
||||||
|
"4 indeed Computer Engineer \n",
|
||||||
|
"5 indeed Full Stack Developer \n",
|
||||||
|
"6 indeed Software Engineer \n",
|
||||||
|
"7 indeed Senior Software Engineer \n",
|
||||||
|
"8 indeed Software Engineer \n",
|
||||||
|
"9 indeed Software Engineer \n",
|
||||||
|
"10 linkedin Software Engineer \n",
|
||||||
|
"11 linkedin Software Engineer - Early Career \n",
|
||||||
|
"12 linkedin Software Engineer - Early Career \n",
|
||||||
|
"13 linkedin Software Engineer - Early Career \n",
|
||||||
|
"14 linkedin Software Engineer - Early Career \n",
|
||||||
|
"15 linkedin Software Engineer - Early Career \n",
|
||||||
|
"16 linkedin Software Engineer \n",
|
||||||
|
"17 linkedin Software Engineer - Early Career \n",
|
||||||
|
"18 linkedin Full-Stack Software Engineer \n",
|
||||||
|
"19 linkedin Software Engineer \n",
|
||||||
|
"20 zip_recruiter (USA) Software Engineer III - Prototype Engine... \n",
|
||||||
|
"21 zip_recruiter Software Engineer - New Grad \n",
|
||||||
|
"22 zip_recruiter Software Developer \n",
|
||||||
|
"23 zip_recruiter Software Engineer \n",
|
||||||
|
"24 zip_recruiter Software Developer \n",
|
||||||
|
"25 zip_recruiter Full Stack Software Engineer \n",
|
||||||
|
"26 zip_recruiter Software Developer IV \n",
|
||||||
|
"27 zip_recruiter Software Developer | Onsite | Omaha, NE - Omaha \n",
|
||||||
|
"28 zip_recruiter Senior Software Engineer \n",
|
||||||
|
"29 zip_recruiter Software Developer - .Net Core - 12886 \n",
|
||||||
|
"\n",
|
||||||
|
" company_name city state job_type \\\n",
|
||||||
|
"0 Advanced Motion Controls Camarillo CA fulltime \n",
|
||||||
|
"1 Honeywell None fulltime \n",
|
||||||
|
"2 Splunk Remote None fulltime \n",
|
||||||
|
"3 Stratacache Dayton OH fulltime \n",
|
||||||
|
"4 Honeywell None fulltime \n",
|
||||||
|
"5 Reinventing Geospatial, Inc. (RGi) Herndon VA fulltime \n",
|
||||||
|
"6 Workiva Remote None None \n",
|
||||||
|
"7 SciTec Boulder CO fulltime \n",
|
||||||
|
"8 Microsoft None fulltime \n",
|
||||||
|
"9 Avalon Healthcare Solutions Remote None None \n",
|
||||||
|
"10 Fieldguide San Francisco CA fulltime \n",
|
||||||
|
"11 Lockheed Martin Sunnyvale CA fulltime \n",
|
||||||
|
"12 Lockheed Martin Edwards CA fulltime \n",
|
||||||
|
"13 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"14 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"15 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"16 SpiderOak Austin TX fulltime \n",
|
||||||
|
"17 Lockheed Martin Fort Worth TX fulltime \n",
|
||||||
|
"18 Rain New York NY fulltime \n",
|
||||||
|
"19 Nike Portland OR contract \n",
|
||||||
|
"20 Walmart Dallas TX None \n",
|
||||||
|
"21 ZipRecruiter Santa Monica CA fulltime \n",
|
||||||
|
"22 Robert Half Corpus Christi TX fulltime \n",
|
||||||
|
"23 Advantage Technical Ontario CA fulltime \n",
|
||||||
|
"24 Robert Half Tucson AZ temporary \n",
|
||||||
|
"25 ZipRecruiter Phoenix AZ fulltime \n",
|
||||||
|
"26 Kforce Inc. Mountain View CA contract \n",
|
||||||
|
"27 OneStaff Medical Omaha NE fulltime \n",
|
||||||
|
"28 RightStaff, Inc. Dallas TX fulltime \n",
|
||||||
|
"29 Walker Elliott Dallas TX fulltime \n",
|
||||||
|
"\n",
|
||||||
|
" interval min_amount max_amount \\\n",
|
||||||
|
"0 yearly 145000 110000 \n",
|
||||||
|
"1 None None None \n",
|
||||||
|
"2 yearly 159500 116000 \n",
|
||||||
|
"3 yearly 90000 83573 \n",
|
||||||
|
"4 None None None \n",
|
||||||
|
"5 None None None \n",
|
||||||
|
"6 yearly 134000 79000 \n",
|
||||||
|
"7 yearly 164000 93000 \n",
|
||||||
|
"8 yearly 182600 94300 \n",
|
||||||
|
"9 None None None \n",
|
||||||
|
"10 yearly None None \n",
|
||||||
|
"11 yearly None None \n",
|
||||||
|
"12 yearly None None \n",
|
||||||
|
"13 yearly None None \n",
|
||||||
|
"14 yearly None None \n",
|
||||||
|
"15 yearly None None \n",
|
||||||
|
"16 yearly None None \n",
|
||||||
|
"17 yearly None None \n",
|
||||||
|
"18 yearly None None \n",
|
||||||
|
"19 yearly None None \n",
|
||||||
|
"20 None None None \n",
|
||||||
|
"21 yearly 130000 150000 \n",
|
||||||
|
"22 yearly 105000 115000 \n",
|
||||||
|
"23 yearly 100000 150000 \n",
|
||||||
|
"24 hourly 47 55 \n",
|
||||||
|
"25 yearly 105000 145000 \n",
|
||||||
|
"26 hourly 55 75 \n",
|
||||||
|
"27 yearly 60000 110000 \n",
|
||||||
|
"28 yearly 120000 180000 \n",
|
||||||
|
"29 yearly 105000 130000 \n",
|
||||||
|
"\n",
|
||||||
|
" job_url \\\n",
|
||||||
|
"0 https://www.indeed.com/viewjob?jk=a2e7077fdd3c... \n",
|
||||||
|
"1 https://www.indeed.com/viewjob?jk=5a1da623ee75... \n",
|
||||||
|
"2 https://www.indeed.com/viewjob?jk=155495ca3f46... \n",
|
||||||
|
"3 https://www.indeed.com/viewjob?jk=77cf3540c06e... \n",
|
||||||
|
"4 https://www.indeed.com/viewjob?jk=7fadbb7c936f... \n",
|
||||||
|
"5 https://www.indeed.com/viewjob?jk=11b2b5b0dd44... \n",
|
||||||
|
"6 https://www.indeed.com/viewjob?jk=ec3ab6eb9253... \n",
|
||||||
|
"7 https://www.indeed.com/viewjob?jk=781e4cf0cf6d... \n",
|
||||||
|
"8 https://www.indeed.com/viewjob?jk=21e05b9e9d96... \n",
|
||||||
|
"9 https://www.indeed.com/viewjob?jk=da35b9bb74a0... \n",
|
||||||
|
"10 https://www.linkedin.com/jobs/view/3696158160 \n",
|
||||||
|
"11 https://www.linkedin.com/jobs/view/3693012711 \n",
|
||||||
|
"12 https://www.linkedin.com/jobs/view/3700669785 \n",
|
||||||
|
"13 https://www.linkedin.com/jobs/view/3701775201 \n",
|
||||||
|
"14 https://www.linkedin.com/jobs/view/3701772329 \n",
|
||||||
|
"15 https://www.linkedin.com/jobs/view/3701769637 \n",
|
||||||
|
"16 https://www.linkedin.com/jobs/view/3707174719 \n",
|
||||||
|
"17 https://www.linkedin.com/jobs/view/3701770659 \n",
|
||||||
|
"18 https://www.linkedin.com/jobs/view/3696158877 \n",
|
||||||
|
"19 https://www.linkedin.com/jobs/view/3693340247 \n",
|
||||||
|
"20 https://click.appcast.io/track/hcgsw4k?cs=ngp&... \n",
|
||||||
|
"21 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
|
||||||
|
"22 https://www.ziprecruiter.com/jobs/robert-half-... \n",
|
||||||
|
"23 https://www.ziprecruiter.com/jobs/advantage-te... \n",
|
||||||
|
"24 https://www.ziprecruiter.com/jobs/robert-half-... \n",
|
||||||
|
"25 https://www.ziprecruiter.com/jobs/ziprecruiter... \n",
|
||||||
|
"26 https://www.kforce.com/Jobs/job.aspx?job=1696~... \n",
|
||||||
|
"27 https://www.ziprecruiter.com/jobs/onestaff-med... \n",
|
||||||
|
"28 https://www.ziprecruiter.com/jobs/rightstaff-i... \n",
|
||||||
|
"29 https://www.ziprecruiter.com/jobs/walker-ellio... \n",
|
||||||
|
"\n",
|
||||||
|
" description \n",
|
||||||
|
"0 We are looking for an experienced Firmware Eng... \n",
|
||||||
|
"1 Join a team recognized for leadership, innovat... \n",
|
||||||
|
"2 A little about us. Splunk is the key to enterp... \n",
|
||||||
|
"3 Stratacache, Inc. delivers in-store retail exp... \n",
|
||||||
|
"4 Join a team recognized for leadership, innovat... \n",
|
||||||
|
"5 Job Highlights As a Full Stack Software Engine... \n",
|
||||||
|
"6 Are you ready to embark on an exciting journey... \n",
|
||||||
|
"7 SciTec has been awarded multiple government co... \n",
|
||||||
|
"8 At Microsoft we are seeking people who have a ... \n",
|
||||||
|
"9 Avalon Healthcare Solutions, headquartered in ... \n",
|
||||||
|
"10 About us:Fieldguide is establishing a new stat... \n",
|
||||||
|
"11 Description:By bringing together people that u... \n",
|
||||||
|
"12 Description:By bringing together people that u... \n",
|
||||||
|
"13 Description:By bringing together people that u... \n",
|
||||||
|
"14 Description:By bringing together people that u... \n",
|
||||||
|
"15 Description:By bringing together people that u... \n",
|
||||||
|
"16 We're only as strong as our weakest link.In th... \n",
|
||||||
|
"17 Description:By bringing together people that u... \n",
|
||||||
|
"18 Rain’s mission is to create the fastest and ea... \n",
|
||||||
|
"19 Work options: FlexibleWe consider remote, on-p... \n",
|
||||||
|
"20 We are currently seeking a highly skilled and ... \n",
|
||||||
|
"21 We offer a hybrid work environment. Most US-ba... \n",
|
||||||
|
"22 Robert Half has an opening for a Software Deve... \n",
|
||||||
|
"23 New career opportunity available with major Ma... \n",
|
||||||
|
"24 Robert Half is accepting inquiries for a SQL S... \n",
|
||||||
|
"25 We offer a hybrid work environment. Most US-ba... \n",
|
||||||
|
"26 Kforce has a client that is seeking a Software... \n",
|
||||||
|
"27 Company Description: We are looking for a well... \n",
|
||||||
|
"28 Job Description:We are seeking a talented and ... \n",
|
||||||
|
"29 Our highly successful DFW based client has bee... "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from jobscrape import scrape_jobs\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"\n",
|
||||||
|
"jobs: pd.DataFrame = scrape_jobs(\n",
|
||||||
|
" site_name=[\"indeed\", \"linkedin\", \"zip_recruiter\"],\n",
|
||||||
|
" search_term=\"software engineer\",\n",
|
||||||
|
" results_wanted=10\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"if jobs.empty:\n",
|
||||||
|
" print(\"No jobs found.\")\n",
|
||||||
|
"else:\n",
|
||||||
|
"\n",
|
||||||
|
" #1 print\n",
|
||||||
|
" pd.set_option('display.max_columns', None)\n",
|
||||||
|
" pd.set_option('display.max_rows', None)\n",
|
||||||
|
" pd.set_option('display.width', None)\n",
|
||||||
|
" pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc\n",
|
||||||
|
" print(jobs)\n",
|
||||||
|
"\n",
|
||||||
|
" #2 display in Jupyter Notebook\n",
|
||||||
|
" display(jobs)\n",
|
||||||
|
"\n",
|
||||||
|
" #3 output to csv\n",
|
||||||
|
" jobs.to_csv('jobs.csv', index=False)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "efd667ef-fdf0-452a-b5e5-ce6825755be7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "1574dc17-0a42-4655-964f-5c03a6d3deb0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "my-poetry-env",
|
||||||
|
"language": "python",
|
||||||
|
"name": "my-poetry-env"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
290
README.md
290
README.md
|
@ -1,240 +1,100 @@
|
||||||
# JobSpy AIO Scraper
|
# JobSpy
|
||||||
|
|
||||||
|
**JobSpy** is a simple, yet comprehensive, job scraping library.
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
- Scrapes job postings from **LinkedIn**, **Indeed** & **ZipRecruiter** simultaneously
|
||||||
- Returns jobs as JSON or CSV with title, location, company, description & other data
|
- Aggregates the job postings in a Pandas DataFrame
|
||||||
- Imports directly into **Google Sheets**
|
|
||||||
- Optional JWT authorization
|
|
||||||
|
|
||||||
![jobspy_gsheet](https://github.com/cullenwatson/JobSpy/assets/78247585/9f0a997c-4e33-4167-b04e-31ab1f606edb)
|
### Installation
|
||||||
|
`pip install jobscrape`
|
||||||
|
|
||||||
|
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
```python
|
||||||
|
from jobscrape import scrape_jobs
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
jobs: pd.DataFrame = scrape_jobs(
|
||||||
|
site_name=["indeed", "linkedin", "zip_recruiter"],
|
||||||
|
search_term="software engineer",
|
||||||
|
results_wanted=10
|
||||||
|
)
|
||||||
|
|
||||||
|
if jobs.empty:
|
||||||
|
print("No jobs found.")
|
||||||
|
else:
|
||||||
|
|
||||||
|
#1 print
|
||||||
|
pd.set_option('display.max_columns', None)
|
||||||
|
pd.set_option('display.max_rows', None)
|
||||||
|
pd.set_option('display.width', None)
|
||||||
|
pd.set_option('display.max_colwidth', 50) # set to 0 to see full job url / desc
|
||||||
|
print(jobs)
|
||||||
|
|
||||||
|
#2 display in Jupyter Notebook
|
||||||
|
display(jobs)
|
||||||
|
|
||||||
|
#3 output to csv
|
||||||
|
jobs.to_csv('jobs.csv', index=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Output
|
||||||
|
```
|
||||||
|
site title company_name city state job_type interval min_amount max_amount job_url description
|
||||||
|
indeed Software Engineer AMERICAN SYSTEMS Arlington VA None yearly 200000 150000 https://www.indeed.com/viewjob?jk=5e409e577046... THIS POSITION COMES WITH A 10K SIGNING BONUS! ...
|
||||||
|
indeed Senior Software Engineer TherapyNotes.com Philadelphia PA fulltime yearly 135000 110000 https://www.indeed.com/viewjob?jk=da39574a40cb... About Us TherapyNotes is the national leader i...
|
||||||
|
linkedin Software Engineer - Early Career Lockheed Martin Sunnyvale CA fulltime yearly None None https://www.linkedin.com/jobs/view/3693012711 Description:By bringing together people that u...
|
||||||
|
linkedin Full-Stack Software Engineer Rain New York NY fulltime yearly None None https://www.linkedin.com/jobs/view/3696158877 Rain’s mission is to create the fastest and ea...
|
||||||
|
zip_recruiter Software Engineer - New Grad ZipRecruiter Santa Monica CA fulltime yearly 130000 150000 https://www.ziprecruiter.com/jobs/ziprecruiter... We offer a hybrid work environment. Most US-ba...
|
||||||
|
zip_recruiter Software Developer TEKsystems Phoenix AZ fulltime hourly 65 75 https://www.ziprecruiter.com/jobs/teksystems-0... Top Skills' Details• 6 years of Java developme.```
|
||||||
|
```
|
||||||
|
### Parameters for `scrape_jobs()`
|
||||||
|
|
||||||
### API
|
|
||||||
|
|
||||||
POST `/api/v1/jobs/`
|
|
||||||
### Request Schema
|
|
||||||
```plaintext
|
```plaintext
|
||||||
Required
|
Required
|
||||||
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
|
├── site_type (List[enum]): linkedin, zip_recruiter, indeed
|
||||||
└── search_term (str)
|
└── search_term (str)
|
||||||
Optional
|
Optional
|
||||||
├── location (int)
|
├── location (int)
|
||||||
├── distance (int)
|
├── distance (int): in miles
|
||||||
├── job_type (enum): fulltime, parttime, internship, contract
|
├── job_type (enum): fulltime, parttime, internship, contract
|
||||||
├── is_remote (bool)
|
├── is_remote (bool)
|
||||||
├── results_wanted (int): per site_type
|
├── results_wanted (int): number of job results to retrieve for each site specified in 'site_type'
|
||||||
├── easy_apply (bool): only for linkedin
|
├── easy_apply (bool): filters for jobs on LinkedIn that have the 'Easy Apply' option
|
||||||
└── output_format (enum): json, csv, gsheet
|
|
||||||
```
|
|
||||||
### Request Example
|
|
||||||
```json
|
|
||||||
"site_type": ["indeed", "linkedin"],
|
|
||||||
"search_term": "software engineer",
|
|
||||||
"location": "austin, tx",
|
|
||||||
"distance": 10,
|
|
||||||
"job_type": "fulltime",
|
|
||||||
"results_wanted": 15
|
|
||||||
"output_format": "gsheet"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Response Schema
|
### Response Schema
|
||||||
```plaintext
|
```plaintext
|
||||||
site_type (enum):
|
JobPost
|
||||||
JobResponse
|
├── title (str)
|
||||||
├── success (bool)
|
├── company_name (str)
|
||||||
├── error (str)
|
├── job_url (str)
|
||||||
├── jobs (List[JobPost])
|
├── location (object)
|
||||||
│ └── JobPost
|
│ ├── country (str)
|
||||||
│ ├── title (str)
|
│ ├── city (str)
|
||||||
│ ├── company_name (str)
|
│ ├── state (str)
|
||||||
│ ├── job_url (str)
|
├── description (str)
|
||||||
│ ├── location (object)
|
├── job_type (enum)
|
||||||
│ │ ├── country (str)
|
├── compensation (object)
|
||||||
│ │ ├── city (str)
|
│ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
|
||||||
│ │ ├── state (str)
|
│ ├── min_amount (float)
|
||||||
│ ├── description (str)
|
│ ├── max_amount (float)
|
||||||
│ ├── job_type (enum)
|
│ └── currency (str)
|
||||||
│ ├── compensation (object)
|
└── date_posted (datetime)
|
||||||
│ │ ├── interval (CompensationInterval): yearly, monthly, weekly, daily, hourly
|
|
||||||
│ │ ├── min_amount (float)
|
|
||||||
│ │ ├── max_amount (float)
|
|
||||||
│ │ └── currency (str)
|
|
||||||
│ └── date_posted (datetime)
|
|
||||||
│
|
|
||||||
├── total_results (int)
|
|
||||||
└── returned_results (int)
|
|
||||||
```
|
|
||||||
### Response Example (GOOGLE SHEETS)
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"status": "Successfully uploaded to Google Sheets",
|
|
||||||
"error": null,
|
|
||||||
"linkedin": null,
|
|
||||||
"indeed": null,
|
|
||||||
"zip_recruiter": null
|
|
||||||
}
|
|
||||||
```
|
|
||||||
### Response Example (JSON)
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"indeed": {
|
|
||||||
"success": true,
|
|
||||||
"error": null,
|
|
||||||
"jobs": [
|
|
||||||
{
|
|
||||||
"title": "Software Engineer",
|
|
||||||
"company_name": "INTEL",
|
|
||||||
"job_url": "https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228",
|
|
||||||
"location": {
|
|
||||||
"country": "USA",
|
|
||||||
"city": "Austin",
|
|
||||||
"state": "TX",
|
|
||||||
},
|
|
||||||
"description": "Job Description Designs, develops, tests, and debugs..."
|
|
||||||
"job_type": "fulltime",
|
|
||||||
"compensation": {
|
|
||||||
"interval": "yearly",
|
|
||||||
"min_amount": 209760.0,
|
|
||||||
"max_amount": 139480.0,
|
|
||||||
"currency": "USD"
|
|
||||||
},
|
|
||||||
"date_posted": "2023-08-18T00:00:00"
|
|
||||||
}, ...
|
|
||||||
],
|
|
||||||
"total_results": 845,
|
|
||||||
"returned_results": 15
|
|
||||||
},
|
|
||||||
"linkedin": {
|
|
||||||
"success": true,
|
|
||||||
"error": null,
|
|
||||||
"jobs": [
|
|
||||||
{
|
|
||||||
"title": "Software Engineer 1",
|
|
||||||
"company_name": "Public Partnerships | PPL",
|
|
||||||
"job_url": "https://www.linkedin.com/jobs/view/3690013792",
|
|
||||||
"location": {
|
|
||||||
"country": "USA",
|
|
||||||
"city": "Austin",
|
|
||||||
"state": "TX",
|
|
||||||
},
|
|
||||||
"description": "Public Partnerships LLC supports individuals with disabilities..."
|
|
||||||
"job_type": null,
|
|
||||||
"compensation": null,
|
|
||||||
"date_posted": "2023-07-31T00:00:00"
|
|
||||||
}, ...
|
|
||||||
],
|
|
||||||
"total_results": 2000,
|
|
||||||
"returned_results": 15
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
### Response Example (CSV)
|
|
||||||
```
|
|
||||||
Site, Title, Company Name, Job URL, Country, City, State, Job Type, Compensation Interval, Min Amount, Max Amount, Currency, Date Posted, Description
|
|
||||||
indeed, Software Engineer, INTEL, https://www.indeed.com/jobs/viewjob?jk=a2cfbb98d2002228, USA, Austin, TX, fulltime, yearly, 209760.0, 139480.0, USD, 2023-08-18T00:00:00, Job Description Designs...
|
|
||||||
linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.com/jobs/view/3690013792, USA, Austin, TX, , , , , , 2023-07-31T00:00:00, Public Partnerships LLC supports...
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installation
|
|
||||||
### Docker Setup
|
|
||||||
_Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_
|
|
||||||
|
|
||||||
[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry.
|
### FAQ
|
||||||
|
|
||||||
To pull the Docker image:
|
#### Encountering issues with your queries?
|
||||||
|
|
||||||
```bash
|
Try reducing the number of `results_wanted` and/or broadening the filters. If problems persist, please submit an issue.
|
||||||
docker pull ghcr.io/cullenwatson/jobspy:latest
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Params
|
#### Received a response code 429?
|
||||||
|
This means you've been blocked by the job board site for sending too many requests. Consider waiting a few seconds, or try using a VPN. Proxy support coming soon.
|
||||||
By default:
|
|
||||||
* Port: `8000`
|
|
||||||
* Google sheet name: `JobSpy`
|
|
||||||
* Relative path of `client_secret.json` (for Google Sheets, see below to obtain)
|
|
||||||
|
|
||||||
|
|
||||||
To run the image with these default settings, use:
|
|
||||||
|
|
||||||
Example (Cmd Prompt - Windows):
|
|
||||||
```bash
|
|
||||||
docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
Example (Unix):
|
|
||||||
```bash
|
|
||||||
docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Using custom params
|
|
||||||
|
|
||||||
Example:
|
|
||||||
* Port: `8030`
|
|
||||||
* Google sheet name: `CustomName`
|
|
||||||
* Absolute path of `client_secret.json`: `C:\config\client_secret.json`
|
|
||||||
|
|
||||||
To pass these custom params:
|
|
||||||
```bash
|
|
||||||
docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=CustomName -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
|
|
||||||
```
|
|
||||||
|
|
||||||
### Python installation (alternative to Docker)
|
|
||||||
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
|
|
||||||
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
|
|
||||||
2. Install the dependencies with `pip install -r requirements.txt`
|
|
||||||
4. Run the server with `uvicorn main:app --reload`
|
|
||||||
|
|
||||||
### Google Sheets Setup
|
|
||||||
|
|
||||||
#### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
|
|
||||||
* Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
|
|
||||||
* Create credentials -> service account -> create & continue
|
|
||||||
* Select role -> basic: editor -> done
|
|
||||||
* Click on the email you just created in the service account list
|
|
||||||
* Go to the Keys tab -> add key -> create new key -> JSON -> Create
|
|
||||||
|
|
||||||
#### Using the key in the repo
|
|
||||||
* Copy the key file into the JobSpy repo as `client_secret.json`
|
|
||||||
* Go to [my template sheet](https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing): File -> Make a Copy -> Rename to JobSpy
|
|
||||||
* Share the Google sheet with the email located in the field `client_email` in the `client_secret.json` above with editor rights
|
|
||||||
* If you changed the name of the sheet:
|
|
||||||
- Python install: add `.env` in the repo and add `GSHEET_NAME` param with the sheet name as the value, e.g. `GSHEET_NAME=CustomName`
|
|
||||||
- Docker install: use custom param `-e GSHEET_NAME=CustomName` in `docker run` (see above)
|
|
||||||
|
|
||||||
### How to call the API
|
|
||||||
|
|
||||||
#### [Postman](https://www.postman.com/downloads/) (preferred):
|
|
||||||
To use Postman:
|
|
||||||
1. Locate the files in the `/postman/` directory.
|
|
||||||
2. Import the Postman collection and environment JSON files.
|
|
||||||
|
|
||||||
#### Swagger UI:
|
|
||||||
Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
|
|
||||||
|
|
||||||
## FAQ
|
|
||||||
|
|
||||||
### I'm having issues with my queries. What should I do?
|
|
||||||
|
|
||||||
Try reducing the number of `results_wanted` and/or broadening the filters. If issues still persist, feel free to submit an issue.
|
|
||||||
|
|
||||||
### I'm getting response code 429. What should I do?
|
|
||||||
You have been blocked by the job board site for sending too many requests. Wait a couple seconds or use a VPN.
|
|
||||||
|
|
||||||
### How to enable auth?
|
|
||||||
|
|
||||||
Change `AUTH_REQUIRED` in `/settings.py` to `True`
|
|
||||||
|
|
||||||
The auth uses [supabase](https://supabase.com). Create a project with a `users` table and disable RLS.
|
|
||||||
|
|
||||||
<img src="https://github.com/cullenwatson/jobspy/assets/78247585/03af18e1-5386-49ad-a2cf-d34232d9d747" width="500">
|
|
||||||
|
|
||||||
Add these three environment variables:
|
|
||||||
|
|
||||||
- `SUPABASE_URL`: go to project settings -> API -> Project URL
|
|
||||||
- `SUPABASE_KEY`: go to project settings -> API -> service_role secret
|
|
||||||
- `JWT_SECRET_KEY` - type `openssl rand -hex 32` in terminal to create a 32 byte secret key
|
|
||||||
|
|
||||||
Use these endpoints to register and get an access token:
|
|
||||||
|
|
||||||
![image](https://github.com/cullenwatson/jobspy/assets/78247585/c84c33ec-1fe8-4152-9c8c-6c4334aecfc3)
|
|
||||||
|
|
||||||
|
|
|
@ -1,9 +0,0 @@
|
||||||
from fastapi import APIRouter
|
|
||||||
from api.auth import router as auth_router
|
|
||||||
from .v1 import router as v1_router
|
|
||||||
|
|
||||||
router = APIRouter(
|
|
||||||
prefix="/api",
|
|
||||||
)
|
|
||||||
router.include_router(v1_router)
|
|
||||||
router.include_router(auth_router)
|
|
|
@ -1,8 +0,0 @@
|
||||||
from fastapi import APIRouter
|
|
||||||
|
|
||||||
from api.auth.token import router as token_router
|
|
||||||
from api.auth.register import router as register_router
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/auth", tags=["auth"])
|
|
||||||
router.include_router(token_router)
|
|
||||||
router.include_router(register_router)
|
|
|
@ -1,65 +0,0 @@
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from jose import jwt, JWTError
|
|
||||||
from fastapi import HTTPException, status, Depends
|
|
||||||
from fastapi.security import OAuth2PasswordBearer
|
|
||||||
|
|
||||||
from api.core.users import TokenData
|
|
||||||
from api.auth.db_utils import UserInDB, get_user
|
|
||||||
|
|
||||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/token")
|
|
||||||
|
|
||||||
|
|
||||||
def create_access_token(data: dict) -> str:
|
|
||||||
"""
|
|
||||||
Creates a JWT token based on the data provided.
|
|
||||||
:param data
|
|
||||||
:return: encoded_jwt
|
|
||||||
"""
|
|
||||||
to_encode = data.copy()
|
|
||||||
expire = datetime.utcnow() + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
|
|
||||||
to_encode.update({"exp": expire})
|
|
||||||
encoded_jwt = jwt.encode(to_encode, JWT_SECRET_KEY, algorithm=ALGORITHM)
|
|
||||||
return encoded_jwt
|
|
||||||
|
|
||||||
|
|
||||||
async def get_current_user(token: str = Depends(oauth2_scheme)):
|
|
||||||
"""
|
|
||||||
Returns the current user associated with the provided JWT token.
|
|
||||||
:param token
|
|
||||||
:raises HTTPException: If the token is invalid or the user does not exist.
|
|
||||||
:return: The UserInDB instance associated with the token.
|
|
||||||
"""
|
|
||||||
credential_exception = HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Could not validate credentials",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
payload = jwt.decode(token, JWT_SECRET_KEY, algorithms=[ALGORITHM])
|
|
||||||
username: str = payload.get("sub")
|
|
||||||
if username is None:
|
|
||||||
raise credential_exception
|
|
||||||
token_data = TokenData(username=username)
|
|
||||||
except JWTError:
|
|
||||||
raise credential_exception
|
|
||||||
|
|
||||||
current_user = get_user(token_data.username)
|
|
||||||
if current_user is None:
|
|
||||||
raise credential_exception
|
|
||||||
return current_user
|
|
||||||
|
|
||||||
|
|
||||||
async def get_active_current_user(current_user: UserInDB = Depends(get_current_user)):
|
|
||||||
"""
|
|
||||||
Returns the current user if the user account is active.
|
|
||||||
|
|
||||||
:param current_user: A UserInDB instance representing the current user.
|
|
||||||
:raises HTTPException: If the user account is inactive.
|
|
||||||
:return: The UserInDB instance if the user account is active.
|
|
||||||
"""
|
|
||||||
if current_user.disabled:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Inactive user."
|
|
||||||
)
|
|
||||||
return current_user
|
|
|
@ -1,89 +0,0 @@
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from passlib.context import CryptContext
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from fastapi import HTTPException, status
|
|
||||||
|
|
||||||
from api.core.users import UserInDB
|
|
||||||
from settings import SUPABASE_URL, SUPABASE_KEY
|
|
||||||
|
|
||||||
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
|
|
||||||
if SUPABASE_URL:
|
|
||||||
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
|
|
||||||
|
|
||||||
|
|
||||||
def create_user(user_create: UserInDB):
|
|
||||||
"""
|
|
||||||
Creates a new user record in the 'users' table in Supabase.
|
|
||||||
|
|
||||||
:param user_create: The data of the user to be created.
|
|
||||||
:raises HTTPException: If an error occurs while creating the user.
|
|
||||||
:return: The result of the insert operation.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").insert(user_create.dict()).execute()
|
|
||||||
print(f"Insert result: {result}")
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
||||||
detail=f"User could not be created due to {result['error']['message']}",
|
|
||||||
)
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def get_user(username: str) -> Optional[UserInDB]:
|
|
||||||
"""
|
|
||||||
Retrieves a user from the 'users' table by their username.
|
|
||||||
|
|
||||||
:param username: The username of the user to retrieve.
|
|
||||||
:return: The user data if found, otherwise None.
|
|
||||||
"""
|
|
||||||
result = supabase.table("users").select().eq("username", username).execute()
|
|
||||||
|
|
||||||
if "error" in result and result["error"]:
|
|
||||||
print(f"Error: {result['error']['message']}")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
if result["data"]:
|
|
||||||
user_data = result["data"][0]
|
|
||||||
return UserInDB(**user_data)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def verify_password(password: str, hashed_password: str) -> bool:
|
|
||||||
"""
|
|
||||||
Verifies a password against a hashed password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to verify.
|
|
||||||
:param hashed_password: The hashed password to compare against.
|
|
||||||
:return: True if the password matches the hashed password, otherwise False.
|
|
||||||
"""
|
|
||||||
return pwd_context.verify(password, hashed_password)
|
|
||||||
|
|
||||||
|
|
||||||
def get_password_hash(password: str) -> str:
|
|
||||||
"""
|
|
||||||
Hashes a password using the bcrypt hashing algorithm.
|
|
||||||
|
|
||||||
:param password: The plaintext password to hash.
|
|
||||||
:return: The hashed password
|
|
||||||
"""
|
|
||||||
return pwd_context.hash(password)
|
|
||||||
|
|
||||||
|
|
||||||
def authenticate_user(username: str, password: str) -> Union[UserInDB, bool]:
|
|
||||||
"""
|
|
||||||
Authenticates a user based on their username and password.
|
|
||||||
|
|
||||||
:param username: The username of the user to authenticate.
|
|
||||||
:param password: The plaintext password to authenticate.
|
|
||||||
:return: The authenticated user if the username and password are correct, otherwise False.
|
|
||||||
"""
|
|
||||||
user = get_user(username)
|
|
||||||
if not user:
|
|
||||||
return False
|
|
||||||
if not verify_password(password, user.hashed_password):
|
|
||||||
return False
|
|
||||||
return user
|
|
|
@ -1,33 +0,0 @@
|
||||||
from fastapi import APIRouter, HTTPException, status
|
|
||||||
from api.core.users import UserCreate, UserInDB
|
|
||||||
from api.auth.db_utils import get_user, get_password_hash, create_user
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/register")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=dict)
|
|
||||||
async def register_new_user(user: UserCreate) -> dict:
|
|
||||||
"""
|
|
||||||
Creates new user
|
|
||||||
:param user:
|
|
||||||
:raises HTTPException: If the username already exists.
|
|
||||||
:return: A dictionary containing a detail key with a success message.
|
|
||||||
"""
|
|
||||||
existing_user = get_user(user.username)
|
|
||||||
if existing_user is not None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
|
||||||
detail="Username already exists",
|
|
||||||
)
|
|
||||||
|
|
||||||
hashed_password = get_password_hash(user.password)
|
|
||||||
user_create = UserInDB(
|
|
||||||
username=user.username,
|
|
||||||
email=user.email,
|
|
||||||
full_name=user.full_name,
|
|
||||||
hashed_password=hashed_password,
|
|
||||||
disabled=False,
|
|
||||||
)
|
|
||||||
create_user(user_create)
|
|
||||||
|
|
||||||
return {"detail": "User created successfully"}
|
|
|
@ -1,30 +0,0 @@
|
||||||
from fastapi import APIRouter, Depends, HTTPException, status
|
|
||||||
from fastapi.security import OAuth2PasswordRequestForm
|
|
||||||
|
|
||||||
from api.core.users import Token
|
|
||||||
from api.auth.db_utils import authenticate_user
|
|
||||||
from api.auth.auth_utils import create_access_token
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/token")
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/", response_model=Token)
|
|
||||||
async def login_for_access_token(
|
|
||||||
form_data: OAuth2PasswordRequestForm = Depends(),
|
|
||||||
) -> Token:
|
|
||||||
"""
|
|
||||||
Authenticates a user and provides an access token.
|
|
||||||
:param form_data: OAuth2PasswordRequestForm object containing the user's credentials.
|
|
||||||
:raises HTTPException: If the user cannot be authenticated.
|
|
||||||
:return: A Token object containing the access token and the token type.
|
|
||||||
"""
|
|
||||||
user = authenticate_user(form_data.username, form_data.password)
|
|
||||||
if not user:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
||||||
detail="Incorrect username or password",
|
|
||||||
headers={"WWW-Authenticate": "Bearer"},
|
|
||||||
)
|
|
||||||
|
|
||||||
access_token = create_access_token(data={"sub": user.username})
|
|
||||||
return Token(access_token=access_token, token_type="bearer")
|
|
|
@ -1,7 +0,0 @@
|
||||||
from enum import Enum
|
|
||||||
|
|
||||||
|
|
||||||
class OutputFormat(Enum):
|
|
||||||
CSV = "csv"
|
|
||||||
JSON = "json"
|
|
||||||
GSHEET = "gsheet"
|
|
|
@ -1,133 +0,0 @@
|
||||||
import gspread
|
|
||||||
from oauth2client.service_account import ServiceAccountCredentials
|
|
||||||
|
|
||||||
import csv
|
|
||||||
from io import StringIO
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from ...jobs import *
|
|
||||||
from ...scrapers import *
|
|
||||||
from settings import *
|
|
||||||
|
|
||||||
|
|
||||||
class CSVFormatter:
|
|
||||||
@staticmethod
|
|
||||||
def fetch_job_urls(credentials: Any) -> set:
|
|
||||||
"""
|
|
||||||
Fetches all the job urls from the google sheet to prevent duplicates
|
|
||||||
:param credentials:
|
|
||||||
:return: urls
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
gc = gspread.authorize(credentials)
|
|
||||||
sh = gc.open(GSHEET_NAME)
|
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
|
||||||
data = worksheet.get_all_values()
|
|
||||||
job_urls = set()
|
|
||||||
for row in data[1:]:
|
|
||||||
job_urls.add(row[3])
|
|
||||||
return job_urls
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def upload_to_google_sheet(csv_data: str):
|
|
||||||
"""
|
|
||||||
Appends rows to google sheet
|
|
||||||
:param csv_data:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
scope = [
|
|
||||||
"https://www.googleapis.com/auth/spreadsheets",
|
|
||||||
"https://www.googleapis.com/auth/drive.file",
|
|
||||||
"https://www.googleapis.com/auth/drive",
|
|
||||||
]
|
|
||||||
credentials = ServiceAccountCredentials.from_json_keyfile_name(
|
|
||||||
"client_secret.json", scope
|
|
||||||
)
|
|
||||||
gc = gspread.authorize(credentials)
|
|
||||||
sh = gc.open(GSHEET_NAME)
|
|
||||||
|
|
||||||
worksheet = sh.get_worksheet(0)
|
|
||||||
data_string = csv_data.getvalue()
|
|
||||||
reader = csv.reader(StringIO(data_string))
|
|
||||||
|
|
||||||
job_urls = CSVFormatter.fetch_job_urls(credentials)
|
|
||||||
|
|
||||||
rows = list(reader)
|
|
||||||
|
|
||||||
for i, row in enumerate(rows):
|
|
||||||
if i == 0:
|
|
||||||
continue
|
|
||||||
if row[4] in job_urls:
|
|
||||||
continue
|
|
||||||
|
|
||||||
row[6] = format(int(row[6]), ",d") if row[6] else ""
|
|
||||||
row[7] = format(int(row[7]), ",d") if row[7] else ""
|
|
||||||
worksheet.append_row(row)
|
|
||||||
except Exception as e:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def generate_filename() -> str:
|
|
||||||
"""
|
|
||||||
Adds a timestamp to the filename header
|
|
||||||
:return: filename
|
|
||||||
"""
|
|
||||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
return f"JobSpy_results_{timestamp}.csv"
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def format(jobs: CommonResponse) -> StringIO:
|
|
||||||
"""
|
|
||||||
Transfomr the jobs objects into csv
|
|
||||||
:param jobs:
|
|
||||||
:return: csv
|
|
||||||
"""
|
|
||||||
output = StringIO()
|
|
||||||
writer = csv.writer(output)
|
|
||||||
|
|
||||||
headers = [
|
|
||||||
"Title",
|
|
||||||
"Company Name",
|
|
||||||
"City",
|
|
||||||
"State",
|
|
||||||
"Job Type",
|
|
||||||
"Pay Cycle",
|
|
||||||
"Min Amount",
|
|
||||||
"Max Amount",
|
|
||||||
"Date Posted",
|
|
||||||
"Description",
|
|
||||||
"Job URL",
|
|
||||||
]
|
|
||||||
writer.writerow(headers)
|
|
||||||
|
|
||||||
for site, job_response in jobs.dict().items():
|
|
||||||
if isinstance(job_response, dict) and job_response.get("success"):
|
|
||||||
for job in job_response["jobs"]:
|
|
||||||
writer.writerow(
|
|
||||||
[
|
|
||||||
job["title"],
|
|
||||||
job["company_name"],
|
|
||||||
job["location"]["city"],
|
|
||||||
job["location"]["state"],
|
|
||||||
job["job_type"].value if job.get("job_type") else "",
|
|
||||||
job["compensation"]["interval"].value
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job["compensation"]["min_amount"]
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job["compensation"]["max_amount"]
|
|
||||||
if job["compensation"]
|
|
||||||
else "",
|
|
||||||
job.get("date_posted", ""),
|
|
||||||
job["description"],
|
|
||||||
job["job_url"],
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
output.seek(0)
|
|
||||||
return output
|
|
|
@ -1,28 +0,0 @@
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
|
|
||||||
class User(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
disabled: bool = False
|
|
||||||
|
|
||||||
|
|
||||||
class UserCreate(BaseModel):
|
|
||||||
username: str
|
|
||||||
full_name: str
|
|
||||||
email: str
|
|
||||||
password: str
|
|
||||||
|
|
||||||
|
|
||||||
class UserInDB(User):
|
|
||||||
hashed_password: str
|
|
||||||
|
|
||||||
|
|
||||||
class TokenData(BaseModel):
|
|
||||||
username: str
|
|
||||||
|
|
||||||
|
|
||||||
class Token(BaseModel):
|
|
||||||
access_token: str
|
|
||||||
token_type: str
|
|
|
@ -1,11 +0,0 @@
|
||||||
from fastapi import APIRouter, Depends
|
|
||||||
from .jobs import router as jobs_router
|
|
||||||
from api.auth.auth_utils import get_active_current_user
|
|
||||||
from settings import AUTH_REQUIRED
|
|
||||||
|
|
||||||
if AUTH_REQUIRED:
|
|
||||||
router = APIRouter(prefix="/v1", dependencies=[Depends(get_active_current_user)])
|
|
||||||
else:
|
|
||||||
router = APIRouter(prefix="/v1")
|
|
||||||
|
|
||||||
router.include_router(jobs_router)
|
|
|
@ -1,68 +0,0 @@
|
||||||
import io
|
|
||||||
from fastapi import APIRouter
|
|
||||||
from fastapi.responses import StreamingResponse
|
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
|
||||||
|
|
||||||
from api.core.scrapers.indeed import IndeedScraper
|
|
||||||
from api.core.scrapers.ziprecruiter import ZipRecruiterScraper
|
|
||||||
from api.core.scrapers.linkedin import LinkedInScraper
|
|
||||||
from api.core.formatters.csv import CSVFormatter
|
|
||||||
from api.core.scrapers import (
|
|
||||||
ScraperInput,
|
|
||||||
Site,
|
|
||||||
JobResponse,
|
|
||||||
OutputFormat,
|
|
||||||
CommonResponse,
|
|
||||||
)
|
|
||||||
from typing import List, Dict, Tuple, Union
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/jobs", tags=["jobs"])
|
|
||||||
|
|
||||||
SCRAPER_MAPPING = {
|
|
||||||
Site.LINKEDIN: LinkedInScraper,
|
|
||||||
Site.INDEED: IndeedScraper,
|
|
||||||
Site.ZIP_RECRUITER: ZipRecruiterScraper,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/")
|
|
||||||
async def scrape_jobs(scraper_input: ScraperInput) -> CommonResponse:
|
|
||||||
"""
|
|
||||||
Asynchronously scrapes job data from multiple job sites.
|
|
||||||
:param scraper_input:
|
|
||||||
:return: scraper_response
|
|
||||||
"""
|
|
||||||
|
|
||||||
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
|
|
||||||
scraper_class = SCRAPER_MAPPING[site]
|
|
||||||
scraper = scraper_class()
|
|
||||||
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
|
||||||
return (site.value, scraped_data)
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
||||||
results = dict(executor.map(scrape_site, scraper_input.site_type))
|
|
||||||
scraper_response = CommonResponse(status="JSON response success", **results)
|
|
||||||
|
|
||||||
if scraper_input.output_format == OutputFormat.CSV:
|
|
||||||
csv_output = CSVFormatter.format(scraper_response)
|
|
||||||
response = StreamingResponse(csv_output, media_type="text/csv")
|
|
||||||
response.headers[
|
|
||||||
"Content-Disposition"
|
|
||||||
] = f"attachment; filename={CSVFormatter.generate_filename()}"
|
|
||||||
return response
|
|
||||||
|
|
||||||
elif scraper_input.output_format == OutputFormat.GSHEET:
|
|
||||||
csv_output = CSVFormatter.format(scraper_response)
|
|
||||||
try:
|
|
||||||
CSVFormatter.upload_to_google_sheet(csv_output)
|
|
||||||
return CommonResponse(
|
|
||||||
status="Successfully uploaded to Google Sheets", **results
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return CommonResponse(
|
|
||||||
status="Failed to upload to Google Sheet", error=repr(e), **results
|
|
||||||
)
|
|
||||||
|
|
||||||
else:
|
|
||||||
return scraper_response
|
|
|
@ -0,0 +1,121 @@
|
||||||
|
import pandas as pd
|
||||||
|
from typing import List, Dict, Tuple, Union
|
||||||
|
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
from .core.jobs import JobType
|
||||||
|
from .core.scrapers.indeed import IndeedScraper
|
||||||
|
from .core.scrapers.ziprecruiter import ZipRecruiterScraper
|
||||||
|
from .core.scrapers.linkedin import LinkedInScraper
|
||||||
|
from .core.scrapers import (
|
||||||
|
ScraperInput,
|
||||||
|
Site,
|
||||||
|
JobResponse,
|
||||||
|
CommonResponse,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
SCRAPER_MAPPING = {
|
||||||
|
Site.LINKEDIN: LinkedInScraper,
|
||||||
|
Site.INDEED: IndeedScraper,
|
||||||
|
Site.ZIP_RECRUITER: ZipRecruiterScraper,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _map_str_to_site(site_name: str) -> Site:
|
||||||
|
return Site[site_name.upper()]
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_jobs(
|
||||||
|
site_name: str | Site | List[Site],
|
||||||
|
search_term: str,
|
||||||
|
|
||||||
|
location: str = "",
|
||||||
|
distance: int = None,
|
||||||
|
is_remote: bool = False,
|
||||||
|
job_type: JobType = None,
|
||||||
|
easy_apply: bool = False, # linkedin
|
||||||
|
results_wanted: int = 15
|
||||||
|
) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Asynchronously scrapes job data from multiple job sites.
|
||||||
|
:return: results_wanted: pandas dataframe containing job data
|
||||||
|
"""
|
||||||
|
|
||||||
|
if type(site_name) == str:
|
||||||
|
site_name = _map_str_to_site(site_name)
|
||||||
|
|
||||||
|
site_type = [site_name] if type(site_name) == Site else site_name
|
||||||
|
scraper_input = ScraperInput(
|
||||||
|
site_type=site_type,
|
||||||
|
search_term=search_term,
|
||||||
|
location=location,
|
||||||
|
distance=distance,
|
||||||
|
is_remote=is_remote,
|
||||||
|
job_type=job_type,
|
||||||
|
easy_apply=easy_apply,
|
||||||
|
results_wanted=results_wanted,
|
||||||
|
)
|
||||||
|
|
||||||
|
def scrape_site(site: Site) -> Tuple[str, JobResponse]:
|
||||||
|
scraper_class = SCRAPER_MAPPING[site]
|
||||||
|
scraper = scraper_class()
|
||||||
|
scraped_data: JobResponse = scraper.scrape(scraper_input)
|
||||||
|
|
||||||
|
return site.value, scraped_data
|
||||||
|
|
||||||
|
results = {}
|
||||||
|
for site in scraper_input.site_type:
|
||||||
|
site_value, scraped_data = scrape_site(site)
|
||||||
|
results[site_value] = scraped_data
|
||||||
|
|
||||||
|
dfs = []
|
||||||
|
|
||||||
|
for site, job_response in results.items():
|
||||||
|
for job in job_response.jobs:
|
||||||
|
data = job.dict()
|
||||||
|
data['site'] = site
|
||||||
|
|
||||||
|
# Formatting JobType
|
||||||
|
data['job_type'] = data['job_type'].value if data['job_type'] else None
|
||||||
|
|
||||||
|
# Formatting Location
|
||||||
|
location_obj = data.get('location')
|
||||||
|
if location_obj and isinstance(location_obj, dict):
|
||||||
|
data['city'] = location_obj.get('city', '')
|
||||||
|
data['state'] = location_obj.get('state', '')
|
||||||
|
data['country'] = location_obj.get('country', 'USA')
|
||||||
|
else:
|
||||||
|
data['city'] = None
|
||||||
|
data['state'] = None
|
||||||
|
data['country'] = None
|
||||||
|
|
||||||
|
# Formatting Compensation
|
||||||
|
compensation_obj = data.get('compensation')
|
||||||
|
if compensation_obj and isinstance(compensation_obj, dict):
|
||||||
|
data['interval'] = compensation_obj.get('interval').value if compensation_obj.get('interval') else None
|
||||||
|
data['min_amount'] = compensation_obj.get('min_amount')
|
||||||
|
data['max_amount'] = compensation_obj.get('max_amount')
|
||||||
|
data['currency'] = compensation_obj.get('currency', 'USD')
|
||||||
|
else:
|
||||||
|
data['interval'] = None
|
||||||
|
data['min_amount'] = None
|
||||||
|
data['max_amount'] = None
|
||||||
|
data['currency'] = None
|
||||||
|
|
||||||
|
job_df = pd.DataFrame([data])
|
||||||
|
dfs.append(job_df)
|
||||||
|
|
||||||
|
if dfs:
|
||||||
|
df = pd.concat(dfs, ignore_index=True)
|
||||||
|
desired_order = ['site', 'title', 'company_name', 'city', 'state','job_type',
|
||||||
|
'interval', 'min_amount', 'max_amount', 'job_url', 'description',]
|
||||||
|
df = df[desired_order]
|
||||||
|
else:
|
||||||
|
df = pd.DataFrame()
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Union
|
from typing import Union, Optional
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
|
@ -19,10 +19,11 @@ class JobType(Enum):
|
||||||
VOLUNTEER = "volunteer"
|
VOLUNTEER = "volunteer"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Location(BaseModel):
|
class Location(BaseModel):
|
||||||
country: str = "USA"
|
country: str = "USA"
|
||||||
city: str = None
|
city: str = None
|
||||||
state: str = None
|
state: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class CompensationInterval(Enum):
|
class CompensationInterval(Enum):
|
||||||
|
@ -35,8 +36,8 @@ class CompensationInterval(Enum):
|
||||||
|
|
||||||
class Compensation(BaseModel):
|
class Compensation(BaseModel):
|
||||||
interval: CompensationInterval
|
interval: CompensationInterval
|
||||||
min_amount: int
|
min_amount: int = None
|
||||||
max_amount: int
|
max_amount: int = None
|
||||||
currency: str = "USD"
|
currency: str = "USD"
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,11 +45,11 @@ class JobPost(BaseModel):
|
||||||
title: str
|
title: str
|
||||||
company_name: str
|
company_name: str
|
||||||
job_url: str
|
job_url: str
|
||||||
location: Location
|
location: Optional[Location]
|
||||||
|
|
||||||
description: str = None
|
description: str = None
|
||||||
job_type: JobType = None
|
job_type: Optional[JobType] = None
|
||||||
compensation: Compensation = None
|
compensation: Optional[Compensation] = None
|
||||||
date_posted: date = None
|
date_posted: date = None
|
||||||
|
|
||||||
|
|
||||||
|
@ -56,7 +57,7 @@ class JobResponse(BaseModel):
|
||||||
success: bool
|
success: bool
|
||||||
error: str = None
|
error: str = None
|
||||||
|
|
||||||
total_results: int = None
|
total_results: Optional[int] = None
|
||||||
|
|
||||||
jobs: list[JobPost] = []
|
jobs: list[JobPost] = []
|
||||||
|
|
||||||
|
@ -64,6 +65,11 @@ class JobResponse(BaseModel):
|
||||||
|
|
||||||
@validator("returned_results", pre=True, always=True)
|
@validator("returned_results", pre=True, always=True)
|
||||||
def set_returned_results(cls, v, values):
|
def set_returned_results(cls, v, values):
|
||||||
if v is None and values.get("jobs"):
|
jobs_list = values.get("jobs")
|
||||||
return len(values["jobs"])
|
|
||||||
|
if v is None:
|
||||||
|
if jobs_list is not None:
|
||||||
|
return len(jobs_list)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
return v
|
return v
|
|
@ -1,5 +1,4 @@
|
||||||
from ..jobs import *
|
from ..jobs import Enum, BaseModel, JobType, JobResponse
|
||||||
from ..formatters import OutputFormat
|
|
||||||
from typing import List, Dict, Optional, Any
|
from typing import List, Dict, Optional, Any
|
||||||
|
|
||||||
|
|
||||||
|
@ -17,12 +16,11 @@ class Site(Enum):
|
||||||
class ScraperInput(BaseModel):
|
class ScraperInput(BaseModel):
|
||||||
site_type: List[Site]
|
site_type: List[Site]
|
||||||
search_term: str
|
search_term: str
|
||||||
output_format: OutputFormat = OutputFormat.JSON
|
|
||||||
|
|
||||||
location: str = None
|
location: str = None
|
||||||
distance: int = None
|
distance: Optional[int] = None
|
||||||
is_remote: bool = False
|
is_remote: bool = False
|
||||||
job_type: JobType = None
|
job_type: Optional[JobType] = None
|
||||||
easy_apply: bool = None # linkedin
|
easy_apply: bool = None # linkedin
|
||||||
|
|
||||||
results_wanted: int = 15
|
results_wanted: int = 15
|
|
@ -1,22 +1,18 @@
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
import json
|
import json
|
||||||
from typing import Optional, Tuple, List
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from typing import Optional, Tuple, List
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
|
||||||
|
|
||||||
from api.core.jobs import *
|
|
||||||
from api.core.jobs import JobPost
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
|
||||||
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
import math
|
|
||||||
import traceback
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
import sys
|
from .. import Scraper, ScraperInput, Site, StatusException
|
||||||
|
|
||||||
|
|
||||||
class ParsingException(Exception):
|
class ParsingException(Exception):
|
||||||
|
@ -66,8 +62,8 @@ class IndeedScraper(Scraper):
|
||||||
response = session.get(self.url + "/jobs", params=params)
|
response = session.get(self.url + "/jobs", params=params)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
response.status_code != status.HTTP_200_OK
|
response.status_code != 200
|
||||||
and response.status_code != status.HTTP_307_TEMPORARY_REDIRECT
|
and response.status_code != 307
|
||||||
):
|
):
|
||||||
raise StatusException(response.status_code)
|
raise StatusException(response.status_code)
|
||||||
|
|
||||||
|
@ -131,7 +127,6 @@ class IndeedScraper(Scraper):
|
||||||
location=Location(
|
location=Location(
|
||||||
city=job.get("jobLocationCity"),
|
city=job.get("jobLocationCity"),
|
||||||
state=job.get("jobLocationState"),
|
state=job.get("jobLocationState"),
|
||||||
postal_code=job.get("jobLocationPostal"),
|
|
||||||
),
|
),
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
compensation=compensation,
|
compensation=compensation,
|
||||||
|
@ -140,9 +135,11 @@ class IndeedScraper(Scraper):
|
||||||
)
|
)
|
||||||
return job_post
|
return job_post
|
||||||
|
|
||||||
for job in jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
job_post = process_job(job)
|
job_results: list[Future] = [executor.submit(process_job, job) for job in
|
||||||
job_list.append(job_post)
|
jobs["metaData"]["mosaicProviderJobCardsModel"]["results"]]
|
||||||
|
|
||||||
|
job_list = [result.result() for result in job_results if result.result()]
|
||||||
|
|
||||||
return job_list, total_num_jobs
|
return job_list, total_num_jobs
|
||||||
|
|
|
@ -4,10 +4,9 @@ from datetime import datetime
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from fastapi import status
|
|
||||||
|
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site
|
from .. import Scraper, ScraperInput, Site
|
||||||
from api.core.jobs import *
|
from ...jobs import JobPost, Location, JobResponse, JobType, Compensation, CompensationInterval
|
||||||
|
|
||||||
|
|
||||||
class LinkedInScraper(Scraper):
|
class LinkedInScraper(Scraper):
|
||||||
|
@ -59,7 +58,7 @@ class LinkedInScraper(Scraper):
|
||||||
f"{self.url}/jobs/search", params=params, allow_redirects=True
|
f"{self.url}/jobs/search", params=params, allow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != status.HTTP_200_OK:
|
if response.status_code != 200:
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
error=f"Response returned {response.status_code}",
|
error=f"Response returned {response.status_code}",
|
||||||
|
@ -118,6 +117,7 @@ class LinkedInScraper(Scraper):
|
||||||
date_posted=date_posted,
|
date_posted=date_posted,
|
||||||
job_url=job_url,
|
job_url=job_url,
|
||||||
job_type=job_type,
|
job_type=job_type,
|
||||||
|
compensation=Compensation(interval=CompensationInterval.YEARLY, currency="USD")
|
||||||
)
|
)
|
||||||
job_list.append(job_post)
|
job_list.append(job_post)
|
||||||
if (
|
if (
|
||||||
|
@ -185,7 +185,6 @@ class LinkedInScraper(Scraper):
|
||||||
employment_type = employment_type_span.get_text(strip=True)
|
employment_type = employment_type_span.get_text(strip=True)
|
||||||
employment_type = employment_type.lower()
|
employment_type = employment_type.lower()
|
||||||
employment_type = employment_type.replace("-", "")
|
employment_type = employment_type.replace("-", "")
|
||||||
print(employment_type)
|
|
||||||
|
|
||||||
return JobType(employment_type)
|
return JobType(employment_type)
|
||||||
|
|
|
@ -1,18 +1,17 @@
|
||||||
import math
|
import math
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Optional, Tuple, List
|
from typing import Optional, Tuple, List
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
import tls_client
|
import tls_client
|
||||||
from fastapi import status
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from bs4.element import Tag
|
from bs4.element import Tag
|
||||||
from concurrent.futures import ThreadPoolExecutor, Future
|
from concurrent.futures import ThreadPoolExecutor, Future
|
||||||
|
|
||||||
from api.core.jobs import JobPost
|
from .. import Scraper, ScraperInput, Site, StatusException
|
||||||
from api.core.scrapers import Scraper, ScraperInput, Site, StatusException
|
from ...jobs import JobPost, Compensation, CompensationInterval, Location, JobResponse, JobType
|
||||||
from api.core.jobs import *
|
|
||||||
|
|
||||||
|
|
||||||
class ZipRecruiterScraper(Scraper):
|
class ZipRecruiterScraper(Scraper):
|
||||||
|
@ -26,9 +25,12 @@ class ZipRecruiterScraper(Scraper):
|
||||||
|
|
||||||
self.jobs_per_page = 20
|
self.jobs_per_page = 20
|
||||||
self.seen_urls = set()
|
self.seen_urls = set()
|
||||||
|
self.session = tls_client.Session(
|
||||||
|
client_identifier="chrome112", random_tls_extension_order=True
|
||||||
|
)
|
||||||
|
|
||||||
def scrape_page(
|
def scrape_page(
|
||||||
self, scraper_input: ScraperInput, page: int, session: tls_client.Session
|
self, scraper_input: ScraperInput, page: int
|
||||||
) -> tuple[list[JobPost], int | None]:
|
) -> tuple[list[JobPost], int | None]:
|
||||||
"""
|
"""
|
||||||
Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
|
Scrapes a page of ZipRecruiter for jobs with scraper_input criteria
|
||||||
|
@ -52,91 +54,47 @@ class ZipRecruiterScraper(Scraper):
|
||||||
params = {
|
params = {
|
||||||
"search": scraper_input.search_term,
|
"search": scraper_input.search_term,
|
||||||
"location": scraper_input.location,
|
"location": scraper_input.location,
|
||||||
"radius": scraper_input.distance,
|
|
||||||
"refine_by_location_type": "only_remote"
|
|
||||||
if scraper_input.is_remote
|
|
||||||
else None,
|
|
||||||
"refine_by_employment": f"employment_type:employment_type:{job_type_value}"
|
|
||||||
if job_type_value
|
|
||||||
else None,
|
|
||||||
"page": page,
|
"page": page,
|
||||||
|
"form": "jobs-landing"
|
||||||
}
|
}
|
||||||
|
|
||||||
response = session.get(
|
if scraper_input.is_remote:
|
||||||
|
params["refine_by_location_type"] = "only_remote"
|
||||||
|
|
||||||
|
if scraper_input.distance:
|
||||||
|
params["radius"] = scraper_input.distance
|
||||||
|
|
||||||
|
if job_type_value:
|
||||||
|
params["refine_by_employment"] = f"employment_type:employment_type:{job_type_value}"
|
||||||
|
|
||||||
|
response = self.session.get(
|
||||||
self.url + "/jobs-search",
|
self.url + "/jobs-search",
|
||||||
headers=ZipRecruiterScraper.headers(),
|
headers=ZipRecruiterScraper.headers(),
|
||||||
params=params,
|
params=params,
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != status.HTTP_200_OK:
|
if response.status_code != 200:
|
||||||
raise StatusException(response.status_code)
|
raise StatusException(response.status_code)
|
||||||
|
|
||||||
html_string = response.content
|
html_string = response.text
|
||||||
soup = BeautifulSoup(html_string, "html.parser")
|
soup = BeautifulSoup(html_string, "html.parser")
|
||||||
|
|
||||||
if page == 1:
|
script_tag = soup.find("script", {"id": "js_variables"})
|
||||||
script_tag = soup.find("script", {"id": "js_variables"})
|
data = json.loads(script_tag.string)
|
||||||
data = json.loads(script_tag.string)
|
|
||||||
|
|
||||||
|
if page == 1:
|
||||||
job_count = int(data["totalJobCount"].replace(",", ""))
|
job_count = int(data["totalJobCount"].replace(",", ""))
|
||||||
else:
|
else:
|
||||||
job_count = None
|
job_count = None
|
||||||
|
|
||||||
job_posts = soup.find_all("div", {"class": "job_content"})
|
|
||||||
|
|
||||||
def process_job(job: Tag) -> Optional[JobPost]:
|
|
||||||
"""
|
|
||||||
Parses a job from the job content tag
|
|
||||||
:param job: BeautifulSoup Tag for one job post
|
|
||||||
:return JobPost
|
|
||||||
"""
|
|
||||||
job_url = job.find("a", {"class": "job_link"})["href"]
|
|
||||||
if job_url in self.seen_urls:
|
|
||||||
return None
|
|
||||||
|
|
||||||
title = job.find("h2", {"class": "title"}).text
|
|
||||||
company = job.find("a", {"class": "company_name"}).text.strip()
|
|
||||||
|
|
||||||
description, updated_job_url = ZipRecruiterScraper.get_description(
|
|
||||||
job_url, session
|
|
||||||
)
|
|
||||||
if updated_job_url is not None:
|
|
||||||
job_url = updated_job_url
|
|
||||||
if description is None:
|
|
||||||
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
|
||||||
|
|
||||||
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
|
||||||
if job_type_element:
|
|
||||||
job_type_text = (
|
|
||||||
job_type_element.text.strip()
|
|
||||||
.lower()
|
|
||||||
.replace("-", "")
|
|
||||||
.replace(" ", "")
|
|
||||||
)
|
|
||||||
if job_type_text == "contractor":
|
|
||||||
job_type_text = "contract"
|
|
||||||
job_type = JobType(job_type_text)
|
|
||||||
else:
|
|
||||||
job_type = None
|
|
||||||
|
|
||||||
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
|
||||||
|
|
||||||
job_post = JobPost(
|
|
||||||
title=title,
|
|
||||||
description=description,
|
|
||||||
company_name=company,
|
|
||||||
location=ZipRecruiterScraper.get_location(job),
|
|
||||||
job_type=job_type,
|
|
||||||
compensation=ZipRecruiterScraper.get_compensation(job),
|
|
||||||
date_posted=date_posted,
|
|
||||||
job_url=job_url,
|
|
||||||
)
|
|
||||||
return job_post
|
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
job_results: list[Future] = [
|
if "jobList" in data and data["jobList"]:
|
||||||
executor.submit(process_job, job) for job in job_posts
|
jobs_js = data["jobList"]
|
||||||
]
|
job_results = [executor.submit(self.process_job_js, job) for job in jobs_js]
|
||||||
|
else:
|
||||||
|
jobs_html = soup.find_all("div", {"class": "job_content"})
|
||||||
|
job_results = [executor.submit(self.process_job_html, job) for job in
|
||||||
|
jobs_html]
|
||||||
|
|
||||||
job_list = [result.result() for result in job_results if result.result()]
|
job_list = [result.result() for result in job_results if result.result()]
|
||||||
|
|
||||||
|
@ -148,19 +106,17 @@ class ZipRecruiterScraper(Scraper):
|
||||||
:param scraper_input:
|
:param scraper_input:
|
||||||
:return: job_response
|
:return: job_response
|
||||||
"""
|
"""
|
||||||
session = tls_client.Session(
|
|
||||||
client_identifier="chrome112", random_tls_extension_order=True
|
|
||||||
)
|
|
||||||
|
|
||||||
pages_to_process = math.ceil(scraper_input.results_wanted / self.jobs_per_page)
|
|
||||||
|
pages_to_process = max(3, math.ceil(scraper_input.results_wanted / self.jobs_per_page))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
#: get first page to initialize session
|
#: get first page to initialize session
|
||||||
job_list, total_results = self.scrape_page(scraper_input, 1, session)
|
job_list, total_results = self.scrape_page(scraper_input, 1)
|
||||||
|
|
||||||
with ThreadPoolExecutor(max_workers=10) as executor:
|
with ThreadPoolExecutor(max_workers=10) as executor:
|
||||||
futures: list[Future] = [
|
futures: list[Future] = [
|
||||||
executor.submit(self.scrape_page, scraper_input, page, session)
|
executor.submit(self.scrape_page, scraper_input, page)
|
||||||
for page in range(2, pages_to_process + 1)
|
for page in range(2, pages_to_process + 1)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -169,6 +125,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
|
|
||||||
job_list += jobs
|
job_list += jobs
|
||||||
|
|
||||||
|
|
||||||
except StatusException as e:
|
except StatusException as e:
|
||||||
return JobResponse(
|
return JobResponse(
|
||||||
success=False,
|
success=False,
|
||||||
|
@ -192,9 +149,129 @@ class ZipRecruiterScraper(Scraper):
|
||||||
)
|
)
|
||||||
return job_response
|
return job_response
|
||||||
|
|
||||||
|
def process_job_html(self, job: Tag) -> Optional[JobPost]:
|
||||||
|
"""
|
||||||
|
Parses a job from the job content tag
|
||||||
|
:param job: BeautifulSoup Tag for one job post
|
||||||
|
:return JobPost
|
||||||
|
"""
|
||||||
|
job_url = job.find("a", {"class": "job_link"})["href"]
|
||||||
|
if job_url in self.seen_urls:
|
||||||
|
return None
|
||||||
|
|
||||||
|
title = job.find("h2", {"class": "title"}).text
|
||||||
|
company = job.find("a", {"class": "company_name"}).text.strip()
|
||||||
|
|
||||||
|
description, updated_job_url = self.get_description(
|
||||||
|
job_url
|
||||||
|
)
|
||||||
|
if updated_job_url is not None:
|
||||||
|
job_url = updated_job_url
|
||||||
|
if description is None:
|
||||||
|
description = job.find("p", {"class": "job_snippet"}).text.strip()
|
||||||
|
|
||||||
|
job_type_element = job.find("li", {"class": "perk_item perk_type"})
|
||||||
|
if job_type_element:
|
||||||
|
job_type_text = (
|
||||||
|
job_type_element.text.strip()
|
||||||
|
.lower()
|
||||||
|
.replace("-", "")
|
||||||
|
.replace(" ", "")
|
||||||
|
)
|
||||||
|
if job_type_text == "contractor":
|
||||||
|
job_type_text = "contract"
|
||||||
|
job_type = JobType(job_type_text)
|
||||||
|
else:
|
||||||
|
job_type = None
|
||||||
|
|
||||||
|
date_posted = ZipRecruiterScraper.get_date_posted(job)
|
||||||
|
|
||||||
|
job_post = JobPost(
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
company_name=company,
|
||||||
|
location=ZipRecruiterScraper.get_location(job),
|
||||||
|
job_type=job_type,
|
||||||
|
compensation=ZipRecruiterScraper.get_compensation(job),
|
||||||
|
date_posted=date_posted,
|
||||||
|
job_url=job_url,
|
||||||
|
)
|
||||||
|
return job_post
|
||||||
|
|
||||||
|
def process_job_js(self, job: dict) -> JobPost:
|
||||||
|
# Map the job data to the expected fields by the Pydantic model
|
||||||
|
title = job.get("Title")
|
||||||
|
description = BeautifulSoup(job.get("Snippet","").strip(), "html.parser").get_text()
|
||||||
|
|
||||||
|
company = job.get("OrgName")
|
||||||
|
location = Location(city=job.get("City"), state=job.get("State"))
|
||||||
|
try:
|
||||||
|
job_type = ZipRecruiterScraper.job_type_from_string(job.get("EmploymentType", "").replace("-", "_").lower())
|
||||||
|
except ValueError:
|
||||||
|
# print(f"Skipping job due to unrecognized job type: {job.get('EmploymentType')}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
formatted_salary = job.get("FormattedSalaryShort", "")
|
||||||
|
salary_parts = formatted_salary.split(" ")
|
||||||
|
|
||||||
|
min_salary_str = salary_parts[0][1:].replace(",", "")
|
||||||
|
if '.' in min_salary_str:
|
||||||
|
min_amount = int(float(min_salary_str) * 1000)
|
||||||
|
else:
|
||||||
|
min_amount = int(min_salary_str.replace("K", "000"))
|
||||||
|
|
||||||
|
if len(salary_parts) >= 3 and salary_parts[2].startswith("$"):
|
||||||
|
max_salary_str = salary_parts[2][1:].replace(",", "")
|
||||||
|
if '.' in max_salary_str:
|
||||||
|
max_amount = int(float(max_salary_str) * 1000)
|
||||||
|
else:
|
||||||
|
max_amount = int(max_salary_str.replace("K", "000"))
|
||||||
|
else:
|
||||||
|
max_amount = 0
|
||||||
|
|
||||||
|
compensation = Compensation(
|
||||||
|
interval=CompensationInterval.YEARLY,
|
||||||
|
min_amount=min_amount,
|
||||||
|
max_amount=max_amount
|
||||||
|
)
|
||||||
|
save_job_url = job.get("SaveJobURL", "")
|
||||||
|
posted_time_match = re.search(r"posted_time=(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z)", save_job_url)
|
||||||
|
if posted_time_match:
|
||||||
|
date_time_str = posted_time_match.group(1)
|
||||||
|
date_posted_obj = datetime.strptime(date_time_str, "%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
date_posted = date_posted_obj.date()
|
||||||
|
else:
|
||||||
|
date_posted = date.today()
|
||||||
|
job_url = job.get("JobURL")
|
||||||
|
|
||||||
|
return JobPost(
|
||||||
|
title=title,
|
||||||
|
description=description,
|
||||||
|
company_name=company,
|
||||||
|
location=location,
|
||||||
|
job_type=job_type,
|
||||||
|
compensation=compensation,
|
||||||
|
date_posted=date_posted,
|
||||||
|
job_url=job_url,
|
||||||
|
)
|
||||||
|
return job_post
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
def job_type_from_string(value: str) -> Optional[JobType]:
|
||||||
|
if not value:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if value.lower() == "contractor":
|
||||||
|
value = "contract"
|
||||||
|
normalized_value = value.replace("_", "")
|
||||||
|
for item in JobType:
|
||||||
|
if item.value == normalized_value:
|
||||||
|
return item
|
||||||
|
raise ValueError(f"Invalid value for JobType: {value}")
|
||||||
|
|
||||||
def get_description(
|
def get_description(
|
||||||
job_page_url: str, session: tls_client.Session
|
self,
|
||||||
|
job_page_url: str
|
||||||
) -> Tuple[Optional[str], Optional[str]]:
|
) -> Tuple[Optional[str], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
Retrieves job description by going to the job page url
|
Retrieves job description by going to the job page url
|
||||||
|
@ -202,7 +279,7 @@ class ZipRecruiterScraper(Scraper):
|
||||||
:param session:
|
:param session:
|
||||||
:return: description or None, response url
|
:return: description or None, response url
|
||||||
"""
|
"""
|
||||||
response = session.get(
|
response = self.session.get(
|
||||||
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
|
job_page_url, headers=ZipRecruiterScraper.headers(), allow_redirects=True
|
||||||
)
|
)
|
||||||
if response.status_code not in range(200, 400):
|
if response.status_code not in range(200, 400):
|
16
main.py
16
main.py
|
@ -1,16 +0,0 @@
|
||||||
from fastapi import FastAPI
|
|
||||||
|
|
||||||
from supabase_py import create_client, Client
|
|
||||||
from api import router as api_router
|
|
||||||
|
|
||||||
app = FastAPI(
|
|
||||||
title="JobSpy Backend",
|
|
||||||
description="Endpoints for job boardLinkedIn, Indeed, and ZipRecruiterscrapers",
|
|
||||||
version="1.0.0",
|
|
||||||
)
|
|
||||||
app.include_router(api_router)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health", tags=["health"])
|
|
||||||
async def health_check():
|
|
||||||
return {"message": "JobSpy ready to scrape"}
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -1,15 +0,0 @@
|
||||||
{
|
|
||||||
"id": "a7ea6d58-8dca-4216-97a9-224dadc1e18f",
|
|
||||||
"name": "JobSpy",
|
|
||||||
"values": [
|
|
||||||
{
|
|
||||||
"key": "access_token",
|
|
||||||
"value": "",
|
|
||||||
"type": "any",
|
|
||||||
"enabled": true
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"_postman_variable_scope": "environment",
|
|
||||||
"_postman_exported_at": "2023-07-09T23:51:36.709Z",
|
|
||||||
"_postman_exported_using": "Postman/10.15.8"
|
|
||||||
}
|
|
|
@ -0,0 +1,23 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "jobscrape"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "Job scraper for LinkedIn, Indeed & ZipRecruiter"
|
||||||
|
authors = ["Zachary Hampton <69336300+ZacharyHampton@users.noreply.github.com>", "Cullen Watson <cullen@cullen.ai>"]
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.10"
|
||||||
|
requests = "^2.31.0"
|
||||||
|
tls-client = "^0.2.1"
|
||||||
|
beautifulsoup4 = "^4.12.2"
|
||||||
|
pandas = "^2.1.0"
|
||||||
|
pydantic = "^2.3.0"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
pytest = "^7.4.1"
|
||||||
|
jupyter = "^1.0.0"
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
|
@ -1,61 +0,0 @@
|
||||||
anyio==3.7.1
|
|
||||||
atomicwrites==1.4.1
|
|
||||||
attrs==23.1.0
|
|
||||||
bcrypt==4.0.1
|
|
||||||
beautifulsoup4==4.12.2
|
|
||||||
cachetools==5.3.1
|
|
||||||
certifi==2023.5.7
|
|
||||||
cffi==1.15.1
|
|
||||||
chardet==4.0.0
|
|
||||||
charset-normalizer==3.2.0
|
|
||||||
click==8.1.4
|
|
||||||
colorama==0.4.6
|
|
||||||
cryptography==41.0.1
|
|
||||||
dataclasses==0.6
|
|
||||||
deprecation==2.1.0
|
|
||||||
ecdsa==0.18.0
|
|
||||||
exceptiongroup==1.1.2
|
|
||||||
fastapi==0.99.1
|
|
||||||
google-auth==2.22.0
|
|
||||||
google-auth-oauthlib==1.0.0
|
|
||||||
gotrue==0.2.0
|
|
||||||
gspread==5.10.0
|
|
||||||
h11==0.14.0
|
|
||||||
httpcore==0.12.3
|
|
||||||
httplib2==0.22.0
|
|
||||||
httpx==0.16.1
|
|
||||||
idna==2.10
|
|
||||||
iniconfig==2.0.0
|
|
||||||
oauth2client==4.1.3
|
|
||||||
oauthlib==3.2.2
|
|
||||||
packaging==23.1
|
|
||||||
passlib==1.7.4
|
|
||||||
pluggy==1.2.0
|
|
||||||
postgrest-py==0.4.0
|
|
||||||
py==1.11.0
|
|
||||||
pyasn1==0.5.0
|
|
||||||
pyasn1-modules==0.3.0
|
|
||||||
pycparser==2.21
|
|
||||||
pydantic==1.10.11
|
|
||||||
pyparsing==3.1.1
|
|
||||||
pytest==6.2.5
|
|
||||||
python-dateutil==2.8.2
|
|
||||||
python-dotenv==1.0.0
|
|
||||||
python-jose==3.3.0
|
|
||||||
python-multipart==0.0.6
|
|
||||||
realtime-py==0.1.3
|
|
||||||
requests==2.25.1
|
|
||||||
requests-oauthlib==1.3.1
|
|
||||||
rfc3986==1.5.0
|
|
||||||
rsa==4.9
|
|
||||||
six==1.16.0
|
|
||||||
sniffio==1.3.0
|
|
||||||
soupsieve==2.4.1
|
|
||||||
starlette==0.27.0
|
|
||||||
supabase-py==0.0.2
|
|
||||||
tls-client==0.2.1
|
|
||||||
toml==0.10.2
|
|
||||||
typing_extensions==4.7.1
|
|
||||||
urllib3==1.26.16
|
|
||||||
uvicorn==0.22.0
|
|
||||||
websockets==9.1
|
|
14
settings.py
14
settings.py
|
@ -1,14 +0,0 @@
|
||||||
from dotenv import load_dotenv
|
|
||||||
import os
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
|
|
||||||
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
|
|
||||||
|
|
||||||
# optional autha
|
|
||||||
AUTH_REQUIRED = False
|
|
||||||
SUPABASE_URL = os.environ.get("SUPABASE_URL")
|
|
||||||
SUPABASE_KEY = os.environ.get("SUPABASE_KEY")
|
|
||||||
JWT_SECRET_KEY = os.environ.get("JWT_SECRET_KEY")
|
|
||||||
ACCESS_TOKEN_EXPIRE_MINUTES = 60
|
|
||||||
ALGORITHM = "HS256"
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
from jobscrape import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_indeed():
|
||||||
|
result = scrape_jobs(
|
||||||
|
site_name="indeed",
|
||||||
|
search_term="software engineer",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
|
@ -0,0 +1,10 @@
|
||||||
|
from jobscrape import scrape_jobs
|
||||||
|
|
||||||
|
|
||||||
|
def test_ziprecruiter():
|
||||||
|
result = scrape_jobs(
|
||||||
|
site_name="zip_recruiter",
|
||||||
|
search_term="software engineer",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result is not None
|
Loading…
Reference in New Issue