import glob
from lxml import html

for file in glob.glob(r"C:\Users\python\Desktop\html\test.html"):
    with open(file, mode='rb') as g:
        t = html.fromstring(g.read())
        text = t.text_content().strip()
        print(text)

import glob
import codecs
from lxml import html


for file in glob.glob(r"C:\Users\python\Desktop\html\test.html"):
    with open(file, mode='rb') as g:
        t = html.fromstring(g.readlines()[7].decode('utf_8'))
        text = t.text_content().strip()
        print(text)

import glob
import codecs
from lxml import html


for file in glob.glob(r"C:\Users\python\Desktop\html\test.html"):
    with open(file, mode='rb') as g:
        t = html.fromstring(g.readlines()[7].decode('utf_8'))
        text = t.text_content().strip()
        print(text)

import glob
import codecs
from lxml import html


for file in glob.glob(r"C:\Users\python\Desktop\html\test.html"):
    with open(file, mode='rb') as g:
        t = html.fromstring(g.readlines()[7].decode('utf_8'))
        text = t.text_content().strip()
        print(text)

【 Python 入門】HTMLファイルからデータを取得する(日本語文字化けも解消)

lxmlのインストール

HTMLファイルからデータの取得(read)

HTMLファイルからデータの取得(readlines)文字化け対応

コメント