Integration Guides
Step-by-step guides for integrating PepeProxy with popular tools and frameworks.
Puppeteer
Puppeteer is a Node.js library for controlling headless Chrome or Chromium browsers.
Installation
npm install puppeteer
# or
bun install puppeteer Basic Setup
import puppeteer from 'puppeteer';
const PROXY_HOST = 'us-01.pepeproxy.com';
const PROXY_PORT = 2333;
const PROXY_USERNAME = 'your_username';
const PROXY_PASSWORD = 'your_password';
(async () => {
// Launch browser with proxy
const browser = await puppeteer.launch({
headless: true,
args: [
`--proxy-server=${PROXY_HOST}:${PROXY_PORT}`,
'--no-sandbox',
'--disable-setuid-sandbox',
],
});
const page = await browser.newPage();
// Authenticate with proxy
await page.authenticate({
username: PROXY_USERNAME,
password: PROXY_PASSWORD,
});
// Navigate to website
await page.goto('https://api.ipify.org?format=json', {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Get page content
const content = await page.content();
console.log('Response:', content);
// Extract specific data
const data = await page.evaluate(() => {
return document.body.textContent;
});
console.log('IP Address:', JSON.parse(data).ip);
await browser.close();
})(); Advanced Configuration
import puppeteer from 'puppeteer';
async function scrapeWithProxy(url, proxyConfig) {
const browser = await puppeteer.launch({
headless: 'new',
args: [
`--proxy-server=${proxyConfig.host}:${proxyConfig.port}`,
'--disable-web-security',
'--disable-features=IsolateOrigins,site-per-process',
],
});
const page = await browser.newPage();
// Set viewport
await page.setViewport({ width: 1920, height: 1080 });
// Set user agent
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
);
// Authenticate
await page.authenticate({
username: proxyConfig.username,
password: proxyConfig.password,
});
// Block unnecessary resources to save traffic
await page.setRequestInterception(true);
page.on('request', (request) => {
const resourceType = request.resourceType();
if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
request.abort();
} else {
request.continue();
}
});
// Navigate
try {
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: 60000,
});
// Extract data
const data = await page.evaluate(() => {
return {
title: document.title,
url: window.location.href,
};
});
return data;
} catch (error) {
console.error('Error:', error.message);
throw error;
} finally {
await browser.close();
}
}
// Usage
const proxyConfig = {
host: 'us-01.pepeproxy.com',
port: 2333,
username: 'your_username',
password: 'your_password',
};
scrapeWithProxy('https://example.com', proxyConfig)
.then((data) => console.log('Success:', data))
.catch((err) => console.error('Failed:', err)); Playwright
Playwright is a modern automation library supporting multiple browsers.
Installation
npm install playwright
# Install browsers
npx playwright install Basic Setup
import { chromium } from 'playwright';
(async () => {
const browser = await chromium.launch({
proxy: {
server: 'http://us-01.pepeproxy.com:2333',
username: 'your_username',
password: 'your_password',
},
});
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://api.ipify.org?format=json');
const content = await page.textContent('body');
console.log('IP:', JSON.parse(content).ip);
await browser.close();
})(); Advanced Configuration
import { chromium } from 'playwright';
async function scrapeWithPlaywright(url) {
const browser = await chromium.launch({
headless: true,
proxy: {
server: 'http://us-01.pepeproxy.com:2333',
username: 'your_username',
password: 'your_password',
},
});
const context = await browser.newContext({
viewport: { width: 1920, height: 1080 },
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
locale: 'en-US',
timezoneId: 'America/New_York',
});
// Block resources to save traffic
await context.route('**/*', (route) => {
const resourceType = route.request().resourceType();
if (['image', 'stylesheet', 'font', 'media'].includes(resourceType)) {
route.abort();
} else {
route.continue();
}
});
const page = await context.newPage();
try {
await page.goto(url, {
waitUntil: 'networkidle',
timeout: 60000,
});
// Wait for specific element
await page.waitForSelector('h1', { timeout: 10000 });
// Extract data
const data = await page.evaluate(() => {
return {
title: document.title,
heading: document.querySelector('h1')?.textContent,
};
});
console.log('Scraped data:', data);
return data;
} catch (error) {
console.error('Error:', error);
throw error;
} finally {
await browser.close();
}
}
scrapeWithPlaywright('https://example.com'); Multiple Browser Support
import { chromium, firefox, webkit } from 'playwright';
async function testMultipleBrowsers() {
const proxy = {
server: 'http://us-01.pepeproxy.com:2333',
username: 'your_username',
password: 'your_password',
};
// Test with Chromium
const chromiumBrowser = await chromium.launch({ proxy });
const chromiumPage = await chromiumBrowser.newPage();
await chromiumPage.goto('https://example.com');
await chromiumBrowser.close();
// Test with Firefox
const firefoxBrowser = await firefox.launch({ proxy });
const firefoxPage = await firefoxBrowser.newPage();
await firefoxPage.goto('https://example.com');
await firefoxBrowser.close();
// Test with WebKit
const webkitBrowser = await webkit.launch({ proxy });
const webkitPage = await webkitBrowser.newPage();
await webkitPage.goto('https://example.com');
await webkitBrowser.close();
} Selenium
Selenium is a widely-used browser automation framework.
Python Setup
pip install selenium webdriver-manager from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
PROXY = "us-01.pepeproxy.com:2333"
PROXY_USER = "your_username"
PROXY_PASS = "your_password"
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument(f'--proxy-server={PROXY}')
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
# Optional: Add auth plugin for proxy authentication
from selenium.webdriver.common.proxy import Proxy, ProxyType
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = PROXY
proxy.ssl_proxy = PROXY
# Initialize driver
driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=chrome_options
)
try:
# Navigate to website
driver.get('https://api.ipify.org?format=json')
# Get page source
content = driver.page_source
print('Response:', content)
# Extract IP
import json
ip_data = json.loads(driver.find_element('tag name', 'body').text)
print('IP Address:', ip_data['ip'])
finally:
driver.quit() With Proxy Authentication
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
import os
def create_proxy_auth_extension(proxy_host, proxy_port, proxy_user, proxy_pass):
"""Create Chrome extension for proxy authentication"""
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
}
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (proxy_host, proxy_port, proxy_user, proxy_pass)
plugin_file = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(plugin_file, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
return plugin_file
# Usage
PROXY_HOST = "us-01.pepeproxy.com"
PROXY_PORT = "8080"
PROXY_USER = "your_username"
PROXY_PASS = "your_password"
# Create extension
proxy_extension = create_proxy_auth_extension(
PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS
)
chrome_options = Options()
chrome_options.add_extension(proxy_extension)
chrome_options.add_argument('--headless')
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get('https://api.ipify.org?format=json')
print(driver.page_source)
finally:
driver.quit()
os.remove(proxy_extension) # Clean up Scrapy
Scrapy is a Python framework for large-scale web scraping.
Installation
pip install scrapy Middleware Configuration
Create middlewares.py:
from scrapy import signals
from scrapy.downloadermiddlewares.httpproxy import HttpProxyMiddleware
from scrapy.exceptions import NotConfigured
import base64
class ProxyMiddleware(HttpProxyMiddleware):
def __init__(self, proxy_url, proxy_user, proxy_pass):
self.proxy_url = proxy_url
self.proxy_user = proxy_user
self.proxy_pass = proxy_pass
@classmethod
def from_crawler(cls, crawler):
settings = crawler.settings
proxy_url = settings.get('PROXY_URL')
proxy_user = settings.get('PROXY_USER')
proxy_pass = settings.get('PROXY_PASS')
if not proxy_url:
raise NotConfigured('PROXY_URL not configured')
return cls(proxy_url, proxy_user, proxy_pass)
def process_request(self, request, spider):
request.meta['proxy'] = self.proxy_url
if self.proxy_user and self.proxy_pass:
auth = f'{self.proxy_user}:{self.proxy_pass}'
encoded = base64.b64encode(auth.encode()).decode()
request.headers['Proxy-Authorization'] = f'Basic {encoded}' Settings Configuration
In settings.py:
# Enable middleware
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': None,
'myproject.middlewares.ProxyMiddleware': 350,
}
# Proxy settings
PROXY_URL = 'http://us-01.pepeproxy.com:2333'
PROXY_USER = 'your_username'
PROXY_PASS = 'your_password'
# Concurrent requests
CONCURRENT_REQUESTS = 8
CONCURRENT_REQUESTS_PER_DOMAIN = 2
# Download delay (be respectful)
DOWNLOAD_DELAY = 2
# User agent
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
# Auto throttle
AUTOTHROTTLE_ENABLED = True
AUTOTHROTTLE_START_DELAY = 1
AUTOTHROTTLE_MAX_DELAY = 10
AUTOTHROTTLE_TARGET_CONCURRENCY = 2.0 Spider Example
import scrapy
class ExampleSpider(scrapy.Spider):
name = 'example'
start_urls = ['https://example.com']
def parse(self, response):
# Extract data
title = response.css('h1::text').get()
yield {
'title': title,
'url': response.url
}
# Follow links
for link in response.css('a::attr(href)').getall():
yield response.follow(link, callback=self.parse) Running the Spider
scrapy crawl example -o output.json cURL
Command-line tool for making HTTP requests.
Basic Usage
curl -x http://username:password@us-01.pepeproxy.com:2333 https://api.ipify.org With Headers
curl -x http://username:password@us-01.pepeproxy.com:2333
-H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
-H "Accept: text/html,application/xhtml+xml"
https://example.com SOCKS5 Proxy
curl --socks5 username:password@us-01.pepeproxy.com:2333 https://api.ipify.org Save Response
curl -x http://username:password@us-01.pepeproxy.com:2333
-o output.html
https://example.com Postman
Configure Proxy
- Open Postman Settings (⚙️ icon)
- Go to “Proxy” tab
- Enable “Use custom proxy configuration”
- Set:
- Proxy Type: HTTP
- Proxy Server:
us-01.pepeproxy.com:2333 - Username:
your_username - Password:
your_password
- Save settings
Test Request
Create a new request:
- Method: GET
- URL:
https://api.ipify.org?format=json - Send → Should return proxy IP
Best Practices for All Tools
1. Handle Errors Gracefully
import time
def retry_with_backoff(func, max_retries=3):
for i in range(max_retries):
try:
return func()
except Exception as e:
if i == max_retries - 1:
raise
wait_time = 2 ** i
print(f"Error: {e}. Retrying in {wait_time}s...")
time.sleep(wait_time) 2. Rotate User Agents
import random
USER_AGENTS = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36'
]
headers = {'User-Agent': random.choice(USER_AGENTS)} 3. Implement Rate Limiting
import time
from datetime import datetime, timedelta
class RateLimiter:
def __init__(self, max_requests, time_window):
self.max_requests = max_requests
self.time_window = time_window
self.requests = []
def wait_if_needed(self):
now = datetime.now()
# Remove old requests outside time window
self.requests = [r for r in self.requests if now - r < self.time_window]
if len(self.requests) >= self.max_requests:
sleep_time = (self.requests[0] + self.time_window - now).total_seconds()
if sleep_time > 0:
time.sleep(sleep_time)
self.requests.append(now)
# Usage: max 10 requests per minute
limiter = RateLimiter(10, timedelta(minutes=1))
for url in urls:
limiter.wait_if_needed()
response = fetch(url) 4. Monitor Traffic Usage
import sys
def track_traffic(response):
"""Track approximate traffic consumed"""
request_size = len(response.request.body or b'') + 200 # headers
response_size = len(response.content) + 200
total_kb = (request_size + response_size) / 1024
print(f"Traffic used: {total_kb:.2f} KB", file=sys.stderr)
return total_kb Need help with a specific integration? Contact support →