Docker workflow (#24)

pull/29/head
Cullen Watson 2023-08-28 12:15:13 -05:00 committed by GitHub
parent 9eb14a74a3
commit b9b7754703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 153 additions and 59 deletions

32
.github/workflows/docker-build.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: Build and Push Docker Image
on:
push:
branches:
- docker_workflow
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to GitHub Docker Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push Image
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile
push: true
tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest

View File

@ -39,50 +39,51 @@ jobs:
exit 1
fi
- name: Check HTTP status to POST /api/v1/jobs/
run: |
response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
"site_type": ["indeed", "linkedin"],
"search_term": "software engineer",
"location": "austin, tx",
"distance": 10,
"job_type": "fulltime",
"results_wanted": 5
}' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
status_code="${response: -3}"
echo "Received status code: $status_code"
if [ "$status_code" != "200" ]; then
echo "Error: Expected status code 200, but got $status_code"
exit 1
fi
echo "${response::-3}" > response.json
cat response.json
- name: Check error field in response
run: |
global_error=$(jq '.error' response.json)
indeed_error=$(jq '.indeed.error' response.json)
linkedin_error=$(jq '.linkedin.error' response.json)
if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
echo "Error found in response:"
echo "Global Error: $global_error"
echo "Indeed Error: $indeed_error"
echo "LinkedIn Error: $linkedin_error"
exit 1
fi
- name: Verify returned_results in response
run: |
indeed_results=$(jq '.indeed.returned_results' response.json)
linkedin_results=$(jq '.linkedin.returned_results' response.json)
if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
echo "Mismatch in results_wanted and returned_results:"
echo "Indeed: Expected 5, Got $indeed_results"
echo "LinkedIn: Expected 5, Got $linkedin_results"
exit 1
fi
# not checking currently because of bad ip at Github's servers being blocked
# - name: Check HTTP status to POST /api/v1/jobs/
# run: |
# response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
# "site_type": ["indeed", "linkedin"],
# "search_term": "software engineer",
# "location": "austin, tx",
# "distance": 10,
# "job_type": "fulltime",
# "results_wanted": 5
# }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
#
# status_code="${response: -3}"
# echo "Received status code: $status_code"
#
# if [ "$status_code" != "200" ]; then
# echo "Error: Expected status code 200, but got $status_code"
# exit 1
# fi
#
# echo "${response::-3}" > response.json
# cat response.json
#
# - name: Check error field in response
# run: |
# global_error=$(jq '.error' response.json)
# indeed_error=$(jq '.indeed.error' response.json)
# linkedin_error=$(jq '.linkedin.error' response.json)
#
# if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
# echo "Error found in response:"
# echo "Global Error: $global_error"
# echo "Indeed Error: $indeed_error"
# echo "LinkedIn Error: $linkedin_error"
# exit 1
# fi
#
# - name: Verify returned_results in response
# run: |
# indeed_results=$(jq '.indeed.returned_results' response.json)
# linkedin_results=$(jq '.linkedin.returned_results' response.json)
#
# if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
# echo "Mismatch in results_wanted and returned_results:"
# echo "Indeed: Expected 5, Got $indeed_results"
# echo "LinkedIn: Expected 5, Got $linkedin_results"
# exit 1
# fi

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM python:3.10-slim
WORKDIR /app
COPY . /app
RUN apt-get update && \
apt-get install -y jq && \
pip install --no-cache-dir -r requirements.txt
EXPOSE 8000
ENV PORT=8000
CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"

View File

@ -138,13 +138,51 @@ linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.c
```
## Installation
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
2. Install the dependencies with `pip install -r requirements.txt`
4. Run the server with `uvicorn main:app --reload`
## Usage
### Google Sheets Integration (Optional)
## Docker Image (simple)
_Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_
[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry. You can pull and use the image with:
## Usage Docker
To pull the Docker image:
```bash
docker pull ghcr.io/cullenwatson/jobspy:latest
```
### Params
By default,
* `client_secret.json` in current directory (if using Google Sheets, see below to obtain)
* Listens on port `8000`
* Places the jobs into a sheet that is named JobSpy
To run the image with these default settings, use:
Example (Windows):
```bash
docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
```
Example (Unix):
```bash
docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
```
### Using custom params
For example,
* port `8030`,
* path `C:\config\client_secret.json`
* Google sheet name `JobSheet`
```bash
docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=JobSheet -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
```
### Google Sheets Integration
#### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
* Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
@ -161,8 +199,6 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
### How to call the API
#### [Postman](https://www.postman.com/downloads/) (preferred):
To use Postman:
1. Locate the files in the `/postman/` directory.
@ -171,6 +207,12 @@ To use Postman:
#### Swagger UI:
Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
## Python installtion (alternative to Docker)
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
2. Install the dependencies with `pip install -r requirements.txt`
4. Run the server with `uvicorn main:app --reload`
## FAQ
### I'm having issues with my queries. What should I do?

View File

@ -20,7 +20,7 @@ class CSVFormatter:
"https://www.googleapis.com/auth/drive",
]
credentials = ServiceAccountCredentials.from_json_keyfile_name(
GSHEET_JSON_KEY_PATH, scope
"client_secret.json", scope
)
gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME)
@ -43,6 +43,11 @@ class CSVFormatter:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
@staticmethod
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
@staticmethod
def format(jobs: CommonResponse) -> StringIO:
"""

View File

@ -2,9 +2,8 @@ from dotenv import load_dotenv
import os
load_dotenv()
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing)
GSHEET_JSON_KEY_PATH = "client_secret.json"
GSHEET_NAME = "JobSpy"
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
# optional autha
AUTH_REQUIRED = False