折腾:
【未解决】Python同步带缩进的html到WordPress后html被改变缩进丢失问题
期间,
另外顺带优化了:
(1)Evernote的Note的Content 转 BeautifulSoup的Soup
@staticmethod
def noteContentToSoup(curNote):
"""Convert Evernote Note content to BeautifulSoup Soup
Args:
curNote (Note): Evernote Note
Returns:
Soup
Raises:
"""
# noteHtml = curNote.content
# # '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">\n
# # remove fisrt line
# # <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
# # noteHtml = re.sub('<!DOCTYPE en-note SYSTEM "http://xml\.evernote\.com/pub/enml2\.dtd"\s+>', "", noteHtml)
# noteHtml = re.sub('<!DOCTYPE en-note SYSTEM "http://xml\.evernote\.com/pub/enml2\.dtd">\s+', "", noteHtml)
noteHtml = crifanEvernote.getNoteContentHtml(curNote)
soup = utils.htmlToSoup(noteHtml)
# now top node is: html, not en-note
# for debug
# if soup.name != "html":
if soup.name != "[document]":
logging.info("soup.name=%s", soup.name)
return soup(2)BeautifulSoup的Soup 转 Evernote的Note的Content
@staticmethod
def soupToNoteContent(soup):
"""Convert BeautifulSoup Soup to Evernote Note content
Args:
soup (Soup): BeautifulSoup Soup
Returns:
Evernote Note content html(str)
Raises:
"""
# for debug
# if soup.name != "html":
if soup.name != "[document]":
logging.info("soup.name=%s", soup.name)
# soup.name = "en-note" # not work
noteContentHtml = utils.soupToHtml(soup)
# convert <html>...</html> back to <en-note>...</en-note>
noteContentHtml = re.sub('<html>(?P<contentBody>.+)</html>', "<en-note>\g<contentBody></en-note>", noteContentHtml, flags=re.S)
noteContentHtml = crifanEvernote.convertToClosedEnMediaTag(noteContentHtml)
# add first line
# <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
noteContentHtml = '<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">\n' + noteContentHtml
return noteContentHtml其中调用了:
@staticmethod
def convertToClosedEnMediaTag(noteHtml):
"""Process note content html, for special </en-media> will cause error, so need convert:
<en-media hash="7c54d8d29cccfcfe2b48dd9f952b715b" type="image/png"></en-media>
to closed en-media tag:
<en-media hash="7c54d8d29cccfcfe2b48dd9f952b715b" type="image/png" />
Args:
noteHtml (str): Note content html
Returns:
note content html with closed en-media tag (str)
Raises:
"""
noteHtml = re.sub("(?P<enMedia><en-media\s+[^<>]+)>\s*</en-media>", "\g<enMedia> />", noteHtml, flags=re.S)
return noteHtml即可。
转载请注明:在路上 » 【已解决】用Python实现印象笔记Evernote的Note笔记的content和BeautifulSoup的soup的互相转换