python import urllib.request from bs4 import BeautifulSoup from readability import Document url = 'https://example.com/news/article' response = urllib.request.urlopen(url) html = response.read() soup = BeautifulSoup(html, 'html.parser') article_content = soup.find('div', {'class': 'article'}) doc = Document(str(article_content)) title = doc.title() summary = doc.summary() clean_html = doc.cleaned_html


上一篇:
下一篇:
切换中文