Compare commits

...

3 Commits

Author SHA1 Message Date
Piotr Geca 94d413bad1
support for socks5 proxies (#266)
Co-authored-by: Piotr Geca <piotr.geca@npl.co.uk>
2025-04-10 15:53:28 -05:00
Cullen Watson 61205bcc77
chore: version 2025-03-27 21:59:47 -05:00
Nikhil Sasi f1602eca70
Fix date parsing error: prevent negative days by using timedelta (#264)
subtracting extracted "days" from label with current day causes negative days
datetime class rejects negative day association
Use timedelta for proper date limitation

Co-authored-by: NIKHIL S <nikhil_s@nikhilMac.local>
2025-03-27 21:58:42 -05:00
3 changed files with 6 additions and 6 deletions

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import math import math
import random import random
import time import time
from datetime import datetime, date from datetime import datetime, date, timedelta
from typing import Optional from typing import Optional
import regex as re import regex as re
@ -277,7 +277,7 @@ class Naukri(Scraper):
match = re.search(r"(\d+)\s*day", label) match = re.search(r"(\d+)\s*day", label)
if match: if match:
days = int(match.group(1)) days = int(match.group(1))
parsed_date = today.replace(day=today.day - days).date() parsed_date = (today - timedelta(days = days)).date()
log.debug(f"Date parsed: {days} days ago -> {parsed_date}") log.debug(f"Date parsed: {days} days ago -> {parsed_date}")
return parsed_date return parsed_date
elif created_date: elif created_date:

View File

@ -47,11 +47,12 @@ class RotatingProxySession:
"""Utility method to format a proxy string into a dictionary.""" """Utility method to format a proxy string into a dictionary."""
if proxy.startswith("http://") or proxy.startswith("https://"): if proxy.startswith("http://") or proxy.startswith("https://"):
return {"http": proxy, "https": proxy} return {"http": proxy, "https": proxy}
if proxy.startswith("socks5://"):
return {"http": proxy, "https": proxy}
return {"http": f"http://{proxy}", "https": f"http://{proxy}"} return {"http": f"http://{proxy}", "https": f"http://{proxy}"}
class RequestsRotating(RotatingProxySession, requests.Session): class RequestsRotating(RotatingProxySession, requests.Session):
def __init__(self, proxies=None, has_retry=False, delay=1, clear_cookies=False): def __init__(self, proxies=None, has_retry=False, delay=1, clear_cookies=False):
RotatingProxySession.__init__(self, proxies=proxies) RotatingProxySession.__init__(self, proxies=proxies)
requests.Session.__init__(self) requests.Session.__init__(self)
@ -86,7 +87,6 @@ class RequestsRotating(RotatingProxySession, requests.Session):
class TLSRotating(RotatingProxySession, tls_client.Session): class TLSRotating(RotatingProxySession, tls_client.Session):
def __init__(self, proxies=None): def __init__(self, proxies=None):
RotatingProxySession.__init__(self, proxies=proxies) RotatingProxySession.__init__(self, proxies=proxies)
tls_client.Session.__init__(self, random_tls_extension_order=True) tls_client.Session.__init__(self, random_tls_extension_order=True)
@ -344,7 +344,7 @@ desired_order = [
"company_num_employees", "company_num_employees",
"company_revenue", "company_revenue",
"company_description", "company_description",
#naukri-specific fields # naukri-specific fields
"skills", "skills",
"experience_range", "experience_range",
"company_rating", "company_rating",

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry] [tool.poetry]
name = "python-jobspy" name = "python-jobspy"
version = "1.1.79" version = "1.1.80"
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt" description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"] authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
homepage = "https://github.com/cullenwatson/JobSpy" homepage = "https://github.com/cullenwatson/JobSpy"