Docker workflow (#24)

pull/29/head
Cullen Watson 2023-08-28 12:15:13 -05:00 committed by GitHub
parent 9eb14a74a3
commit b9b7754703
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 153 additions and 59 deletions

32
.github/workflows/docker-build.yml vendored Normal file
View File

@ -0,0 +1,32 @@
name: Build and Push Docker Image
on:
push:
branches:
- docker_workflow
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to GitHub Docker Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.DOCKER_TOKEN }}
- name: Build and Push Image
uses: docker/build-push-action@v2
with:
context: .
file: ./Dockerfile
push: true
tags: ghcr.io/${{ github.repository_owner }}/jobspy:latest

View File

@ -39,50 +39,51 @@ jobs:
exit 1 exit 1
fi fi
- name: Check HTTP status to POST /api/v1/jobs/ # not checking currently because of bad ip at Github's servers being blocked
run: | # - name: Check HTTP status to POST /api/v1/jobs/
response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{ # run: |
"site_type": ["indeed", "linkedin"], # response=$(curl -L -s -X 'POST' -H 'Content-Type: application/json' -d '{
"search_term": "software engineer", # "site_type": ["indeed", "linkedin"],
"location": "austin, tx", # "search_term": "software engineer",
"distance": 10, # "location": "austin, tx",
"job_type": "fulltime", # "distance": 10,
"results_wanted": 5 # "job_type": "fulltime",
}' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}") # "results_wanted": 5
# }' http://0.0.0.0:8000/api/v1/jobs -w "%{http_code}")
status_code="${response: -3}" #
echo "Received status code: $status_code" # status_code="${response: -3}"
# echo "Received status code: $status_code"
if [ "$status_code" != "200" ]; then #
echo "Error: Expected status code 200, but got $status_code" # if [ "$status_code" != "200" ]; then
exit 1 # echo "Error: Expected status code 200, but got $status_code"
fi # exit 1
# fi
echo "${response::-3}" > response.json #
cat response.json # echo "${response::-3}" > response.json
# cat response.json
- name: Check error field in response #
run: | # - name: Check error field in response
global_error=$(jq '.error' response.json) # run: |
indeed_error=$(jq '.indeed.error' response.json) # global_error=$(jq '.error' response.json)
linkedin_error=$(jq '.linkedin.error' response.json) # indeed_error=$(jq '.indeed.error' response.json)
# linkedin_error=$(jq '.linkedin.error' response.json)
if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then #
echo "Error found in response:" # if [[ "$indeed_error" != "null" || "$linkedin_error" != "null" ]]; then
echo "Global Error: $global_error" # echo "Error found in response:"
echo "Indeed Error: $indeed_error" # echo "Global Error: $global_error"
echo "LinkedIn Error: $linkedin_error" # echo "Indeed Error: $indeed_error"
exit 1 # echo "LinkedIn Error: $linkedin_error"
fi # exit 1
# fi
- name: Verify returned_results in response #
run: | # - name: Verify returned_results in response
indeed_results=$(jq '.indeed.returned_results' response.json) # run: |
linkedin_results=$(jq '.linkedin.returned_results' response.json) # indeed_results=$(jq '.indeed.returned_results' response.json)
# linkedin_results=$(jq '.linkedin.returned_results' response.json)
if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then #
echo "Mismatch in results_wanted and returned_results:" # if [[ $indeed_results -ne 5 || $linkedin_results -ne 5 ]]; then
echo "Indeed: Expected 5, Got $indeed_results" # echo "Mismatch in results_wanted and returned_results:"
echo "LinkedIn: Expected 5, Got $linkedin_results" # echo "Indeed: Expected 5, Got $indeed_results"
exit 1 # echo "LinkedIn: Expected 5, Got $linkedin_results"
fi # exit 1
# fi

15
Dockerfile Normal file
View File

@ -0,0 +1,15 @@
FROM python:3.10-slim
WORKDIR /app
COPY . /app
RUN apt-get update && \
apt-get install -y jq && \
pip install --no-cache-dir -r requirements.txt
EXPOSE 8000
ENV PORT=8000
CMD sh -c "uvicorn main:app --host 0.0.0.0 --port $PORT"

View File

@ -138,13 +138,51 @@ linkedin, Software Engineer 1, Public Partnerships | PPL, https://www.linkedin.c
``` ```
## Installation ## Installation
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
2. Install the dependencies with `pip install -r requirements.txt`
4. Run the server with `uvicorn main:app --reload`
## Usage ## Docker Image (simple)
### Google Sheets Integration (Optional) _Requires [Docker Desktop](https://www.docker.com/products/docker-desktop/)_
[JobSpy API Image](https://ghcr.io/cullenwatson/jobspy:latest) is continuously updated and available on GitHub Container Registry. You can pull and use the image with:
## Usage Docker
To pull the Docker image:
```bash
docker pull ghcr.io/cullenwatson/jobspy:latest
```
### Params
By default,
* `client_secret.json` in current directory (if using Google Sheets, see below to obtain)
* Listens on port `8000`
* Places the jobs into a sheet that is named JobSpy
To run the image with these default settings, use:
Example (Windows):
```bash
docker run -v %cd%/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
```
Example (Unix):
```bash
docker run -v $(pwd)/client_secret.json:/app/client_secret.json -p 8000:8000 ghcr.io/cullenwatson/jobspy
```
### Using custom params
For example,
* port `8030`,
* path `C:\config\client_secret.json`
* Google sheet name `JobSheet`
```bash
docker run -v C:\config\client_secret.json:/app/client_secret.json -e GSHEET_NAME=JobSheet -e PORT=8030 -p 8030:8030 ghcr.io/cullenwatson/jobspy
```
### Google Sheets Integration
#### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43) #### Obtaining an Access Key: [Video Guide](https://youtu.be/w533wJuilao?si=5u3m50pRtdhqkg9Z&t=43)
* Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/) * Enable the [Google Sheets & Google Drive API](https://console.cloud.google.com/)
@ -161,8 +199,6 @@ _Python version >= [3.10](https://www.python.org/downloads/release/python-3100/)
### How to call the API ### How to call the API
#### [Postman](https://www.postman.com/downloads/) (preferred): #### [Postman](https://www.postman.com/downloads/) (preferred):
To use Postman: To use Postman:
1. Locate the files in the `/postman/` directory. 1. Locate the files in the `/postman/` directory.
@ -171,6 +207,12 @@ To use Postman:
#### Swagger UI: #### Swagger UI:
Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs). Or you can call the API with the interactive documentation at [localhost:8000/docs](http://localhost:8000/docs).
## Python installtion (alternative to Docker)
_Python version >= [3.10](https://www.python.org/downloads/release/python-3100/) required_
1. Clone this repository `git clone https://github.com/cullenwatson/jobspy`
2. Install the dependencies with `pip install -r requirements.txt`
4. Run the server with `uvicorn main:app --reload`
## FAQ ## FAQ
### I'm having issues with my queries. What should I do? ### I'm having issues with my queries. What should I do?

View File

@ -20,7 +20,7 @@ class CSVFormatter:
"https://www.googleapis.com/auth/drive", "https://www.googleapis.com/auth/drive",
] ]
credentials = ServiceAccountCredentials.from_json_keyfile_name( credentials = ServiceAccountCredentials.from_json_keyfile_name(
GSHEET_JSON_KEY_PATH, scope "client_secret.json", scope
) )
gc = gspread.authorize(credentials) gc = gspread.authorize(credentials)
sh = gc.open(GSHEET_NAME) sh = gc.open(GSHEET_NAME)
@ -43,6 +43,11 @@ class CSVFormatter:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv" return f"JobSpy_results_{timestamp}.csv"
@staticmethod
def generate_filename() -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
return f"JobSpy_results_{timestamp}.csv"
@staticmethod @staticmethod
def format(jobs: CommonResponse) -> StringIO: def format(jobs: CommonResponse) -> StringIO:
""" """

View File

@ -2,9 +2,8 @@ from dotenv import load_dotenv
import os import os
load_dotenv() load_dotenv()
# gsheets (template to copy at https://docs.google.com/spreadsheets/d/1HAnn-aPv-BO4QTEzfIWc-5iw50duyMoTgX8o3RsEOWs/edit?usp=sharing) # gsheets (template to copy at https://docs.google.com/spreadsheets/d/1mOgb-ZGZy_YIhnW9OCqIVvkFwiKFvhMBjNcbakW7BLo/edit?usp=sharing)
GSHEET_JSON_KEY_PATH = "client_secret.json" GSHEET_NAME = os.environ.get("GSHEET_NAME", "JobSpy")
GSHEET_NAME = "JobSpy"
# optional autha # optional autha
AUTH_REQUIRED = False AUTH_REQUIRED = False