Compare commits

...

3 Commits

Author SHA1 Message Date
Piotr Geca 94d413bad1
support for socks5 proxies (#266)
Co-authored-by: Piotr Geca <piotr.geca@npl.co.uk>
2025-04-10 15:53:28 -05:00
Cullen Watson 61205bcc77
chore: version 2025-03-27 21:59:47 -05:00
Nikhil Sasi f1602eca70
Fix date parsing error: prevent negative days by using timedelta (#264)
subtracting extracted "days" from label with current day causes negative days
datetime class rejects negative day association
Use timedelta for proper date limitation

Co-authored-by: NIKHIL S <nikhil_s@nikhilMac.local>
2025-03-27 21:58:42 -05:00
3 changed files with 6 additions and 6 deletions

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import math
import random
import time
from datetime import datetime, date
from datetime import datetime, date, timedelta
from typing import Optional
import regex as re
@ -277,7 +277,7 @@ class Naukri(Scraper):
match = re.search(r"(\d+)\s*day", label)
if match:
days = int(match.group(1))
parsed_date = today.replace(day=today.day - days).date()
parsed_date = (today - timedelta(days = days)).date()
log.debug(f"Date parsed: {days} days ago -> {parsed_date}")
return parsed_date
elif created_date:

View File

@ -47,11 +47,12 @@ class RotatingProxySession:
"""Utility method to format a proxy string into a dictionary."""
if proxy.startswith("http://") or proxy.startswith("https://"):
return {"http": proxy, "https": proxy}
if proxy.startswith("socks5://"):
return {"http": proxy, "https": proxy}
return {"http": f"http://{proxy}", "https": f"http://{proxy}"}
class RequestsRotating(RotatingProxySession, requests.Session):
def __init__(self, proxies=None, has_retry=False, delay=1, clear_cookies=False):
RotatingProxySession.__init__(self, proxies=proxies)
requests.Session.__init__(self)
@ -86,7 +87,6 @@ class RequestsRotating(RotatingProxySession, requests.Session):
class TLSRotating(RotatingProxySession, tls_client.Session):
def __init__(self, proxies=None):
RotatingProxySession.__init__(self, proxies=proxies)
tls_client.Session.__init__(self, random_tls_extension_order=True)
@ -344,7 +344,7 @@ desired_order = [
"company_num_employees",
"company_revenue",
"company_description",
#naukri-specific fields
# naukri-specific fields
"skills",
"experience_range",
"company_rating",

View File

@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
[tool.poetry]
name = "python-jobspy"
version = "1.1.79"
version = "1.1.80"
description = "Job scraper for LinkedIn, Indeed, Glassdoor, ZipRecruiter & Bayt"
authors = ["Cullen Watson <cullen@cullenwatson.com>", "Zachary Hampton <zachary@zacharysproducts.com>"]
homepage = "https://github.com/cullenwatson/JobSpy"