文章有点冗长还请耐心,我打算用wordpress做一个导航网站,我想做一个爬虫程序用来自动抓我网站中需要的网站网址。我没什么python编程的经验,所以我让ChatGPT写了几次程序但是都失败了。所以想询问一下,下面是代码:
# 设置你需要的变量
LOGGER_FILE_NAME = "website_navigation.log"
SEARCH_URL = "htxxps://www.google.com/search"
USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
WORDPRESS_URL = "http://118.178.235.228/"
USERNAME = "admin"
PASSWORD = "hjZ5d4zH"
CATEGORY_ID = "3"
SLEEP_DELAY = 5
SEARCH_QUERY = "动漫" # 搜索查询
WEBSITE_TYPE = "视频网站" # 网站类型
# 导入需要的库
import requests
from bs4 import BeautifulSoup
import time
import logging
from concurrent.futures import ThreadPoolExecutor
# 配置日志,包含日志级别、格式和处理方式,这里配置了文件处理器和流处理器
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
handlers=[
logging.FileHandler(LOGGER_FILE_NAME), # 日志写入文件
logging.StreamHandler() # 控制台输出日志
]
)
# 根据类型搜索网站的函数
def search_website_by_type(delay=SLEEP_DELAY):
session = requests.Session() # 创建一个会话
logging.info(f"正在搜索 {SEARCH_QUERY} {WEBSITE_TYPE}") # 输出日志
headers = { # 请求头
'User-Agent': USER_AGENT}
params = { # 请求参数
'q': f'{SEARCH_QUERY} {WEBSITE_TYPE}'}
# 将链接上传到wordpress的函数
def upload_links_to_wordpress(session, links):
try:
response = session.get(SEARCH_URL, headers=headers, params=params) # 发送get请求
if response.status_code == 200: # 当响应状态码为200时
soup = BeautifulSoup(response.text, 'html.parser') # 解析网页
links = [link.get('href') for link in soup.find_all('a', href=True) if link['href'].startswith('http')] # 对解析后的网页提取链接
logging.info(f"找到 {len(links)} 个链接") # 输出日志
time.sleep(SLEEP_DELAY) # 延迟一定时间
return links # 返回链接
else:
logging.error("Error occurred while fetching links") # 当获取链接出错时,输出日志
except Exception as e:
logging.error(f"Exception occurred while fetching links: {str(e)}") # 当出现异常时,输出日志
# 将链接上传到wordpress的函数
def upload_links_to_wordpress( links, category_id, wordpress_url, username, password, delay=SLEEP_DELAY):
session = requests.Session() # 创建一个会话
logging.info(f"正在上传 {len(links)} 个链接到 WordPress") # 输出日志
data = { # 请求数据
"slug": "links-collection",
"title": "Links Collection",
"content": "Check out these useful links:",
"status": "publish",
"categories": category_id,
"password": password
}
headers = { # 请求头
"Authorization": f"Basic {username}:{password}"
}
try:
response = session.post(wordpress_url, headers=headers, data=data) # 发送post请求
if response.status_code == 200: # 当响应状态码为200时
logging.info("Links uploaded successfully to WordPress") # 输出日志
time.sleep(delay) # 延迟一定时间
else:
logging.error("Error occurred while uploading links") # 当上传链接出错时,输出日志
except Exception as e:
logging.error(f"Exception occurred while uploading links: {str(e)}") # 当出现异常时,输出日志
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods.posts import NewPost
# 定义 wordPress 上传函数
def upload_links_to_wordpress( links, CATEGORY_ID, WORDPRESS_URL, USERNAME, PASSWORD):
try:
wp = Client(WORDPRESS_URL, USERNAME, PASSWORD) # 创建wordpress客户端
for link in links:
post = WordPressPost() # 创建一篇新文章
post.title = link # 将链接设置为文章的标题
post.content = link # 将链接设置为文章的内容
post.terms_names = {
'post_tag': ['tag1', 'tag2', 'tag3'],
'category': [CATEGORY_ID] # 将文章分类设置为特定的分类ID
}
post.id = wp.call(NewPost(post)) # 在wordpress上发布新文章
logging.info(f"Uploaded link {link} to WordPress") # 输出上传成功的日志
except Exception as e:
logging.error(f"Exception occurred while uploading links to WordPress: {str(e)}") # 当出现异常时,输出日志
for i in range(5): # 控制搜索5次
links = search_website_by_type( WEBSITE_TYPE)
upload_links_to_wordpress(CATEGORY_ID, WORDPRESS_URL, USERNAME,PASSWORD, links) # 调用上传函数将链接上传到
下面是在不同地方运行之后出现的情况。
这是在Lightly上面运行出现的:
print("Start")
Start
>>> # 设置你需要的变量
... LOGGER_FILE_NAME = "website_navigation.log"
>>> SEARCH_URL = "htxxps://www.google.com/search"
>>> USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
>>> WORDPRESS_URL = "http://118.178.235.228/"
>>> USERNAME = "admin"
>>> PASSWORD = "hjZ5d4zH"
>>> CATEGORY_ID = "3"
>>> SLEEP_DELAY = 5
>>> SEARCH_QUERY = "动漫" # 搜索查询
>>> WEBSITE_TYPE = "视频网站" # 网站类型
>>>
>>> # 导入需要的库
... import requests
>>> from bs4 import BeautifulSoup
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ModuleNotFoundError: No module named 'bs4'
>>> import time
>>> import logging
>>> from concurrent.futures import ThreadPoolExecutor
>>>
>>> # 配置日志,包含日志级别、格式和处理方式,这里配置了文件处理器和流处理器
... logging.basicConfig(
... level=logging.INFO,
... format="%(asctime)s [%(levelname)s] %(message)s",
... handlers=[
... logging.FileHandler(LOGGER_FILE_NAME), # 日志写入文件
... logging.StreamHandler() # 控制台输出日志
... ]
... )
>>>
>>> # 根据类型搜索网站的函数
... def search_website_by_type(delay=SLEEP_DELAY):
... session = requests.Session() # 创建一个会话
... logging.info(f"正在搜索 {SEARCH_QUERY} {WEBSITE_TYPE}") # 输出日志
... headers = { # 请求头
... 'User-Agent': USER_AGENT}
... params = { # 请求参数
... 'q': f'{SEARCH_QUERY} {WEBSITE_TYPE}'}
...
>>> # 将链接上传到wordpress的函数
... def upload_links_to_wordpress(session, links):
... try:
... response = session.get(SEARCH_URL, headers=headers, params=params) # 发送get请求
... if response.status_code == 200: # 当响应状态码为200时
... soup = BeautifulSoup(response.text, 'html.parser') # 解析网页
... links = [link.get('href') for link in soup.find_all('a', href=True) if link['href'].startswith('http')] # 对解析后的网页提取链接
... logging.info(f"找到 {len(links)} 个链接") # 输出日志
... time.sleep(SLEEP_DELAY) # 延迟一定时间
... return links # 返回链接
... else:
... logging.error("Error occurred while fetching links") # 当获取链接出错时,输出日志
... except Exception as e:
... logging.error(f"Exception occurred while fetching links: {str(e)}") # 当出现异常时,输出日志
...
>>> # 将链接上传到wordpress的函数
... def upload_links_to_wordpress( links, category_id, wordpress_url, username, password, delay=SLEEP_DELAY):
... session = requests.Session() # 创建一个会话
... logging.info(f"正在上传 {len(links)} 个链接到 WordPress") # 输出日志
... data = { # 请求数据
... "slug": "links-collection",
... "title": "Links Collection",
... "content": "Check out these useful links:",
... "status": "publish",
... "categories": category_id,
... "password": password
... }
... headers = { # 请求头
... "Authorization": f"Basic {username}:{password}"
... }
... try:
... response = session.post(wordpress_url, headers=headers, data=data) # 发送post请求
... if response.status_code == 200: # 当响应状态码为200时
... logging.info("Links uploaded successfully to WordPress") # 输出日志
... time.sleep(delay) # 延迟一定时间
... else:
... logging.error("Error occurred while uploading links") # 当上传链接出错时,输出日志
... except Exception as e:
... logging.error(f"Exception occurred while uploading links: {str(e)}") # 当出现异常时,输出日志
... from wordpress_xmlrpc import Client, WordPressPost
File "<stdin>", line 25
from wordpress_xmlrpc import Client, WordPressPost
^
SyntaxError: invalid syntax
>>> from wordpress_xmlrpc.methods.posts import NewPost
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ModuleNotFoundError: No module named 'wordpress_xmlrpc'
>>>
>>> # 定义 wordPress 上传函数
... def upload_links_to_wordpress( links, CATEGORY_ID, WORDPRESS_URL, USERNAME, PASSWORD):
... try:
... wp = Client(WORDPRESS_URL, USERNAME, PASSWORD) # 创建wordpress客户端
... for link in links:
... post = WordPressPost() # 创建一篇新文章
... post.title = link # 将链接设置为文章的标题
... post.content = link # 将链接设置为文章的内容
... post.terms_names = {
... 'post_tag': ['tag1', 'tag2', 'tag3'],
... 'category': [CATEGORY_ID] # 将文章分类设置为特定的分类ID
... }
... post.id = wp.call(NewPost(post)) # 在wordpress上发布新文章
... logging.info(f"Uploaded link {link} to WordPress") # 输出上传成功的日志
... except Exception as e:
... logging.error(f"Exception occurred while uploading links to WordPress: {str(e)}") # 当出现异常时,输出日志
... for i in range(5): # 控制搜索5次
File "<stdin>", line 17
for i in range(5): # 控制搜索5次
^
SyntaxError: invalid syntax
>>> links = search_website_by_type(WEBSITE_TYPE)
File "<stdin>", line 1
links = search_website_by_type(WEBSITE_TYPE)
^
IndentationError: unexpected indent
>>> upload_links_to_wordpress( links, CATEGORY_ID, WORDPRESS_URL, USERNAME, PASSWORD) # 调用上传函数将链接上传到
File "<stdin>", line 1
upload_links_to_wordpress( links, CATEGORY_ID, WORDPRESS_URL, USERNAME, PASSWORD) # 调用上传函数将链接上传到
^
IndentationError: unexpected indent
【以下回答由 GPT 生成】
无法解答该问题。
【相关推荐】