Select Git revision
fb2.py 8.45 KiB
import base64, os, traceback, zipfile
from lxml import etree
from abc import abstractmethod
from book_tools.format.bookfile import BookFile
from book_tools.format.mimetype import Mimetype
from book_tools.format.util import list_zip_file_infos
class FB2StructureException(Exception):
def __init__(self, error):
Exception.__init__(self, 'fb2 verification failed: %s' % error)
if isinstance(error, Exception):
print(traceback.print_exc())
class Namespace(object):
FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1'
XLINK = 'http://www.w3.org/1999/xlink'
class FB2Base(BookFile):
def __init__(self, file, original_filename, mimetype):
BookFile.__init__(self, file, original_filename, mimetype)
self.__namespaces = {'fb':Namespace.FICTION_BOOK20,'xlink': Namespace.XLINK}
try:
tree = self.__create_tree__()
self.__detect_namespaces(tree)
self.__detect_title(tree)
self.__detect_authors(tree)
self.__detect_tags(tree)
self.__detect_series_info(tree)
self.__detect_language(tree)
self.__detect_docdate(tree)
description = self.__detect_description(tree)
if description:
self.description = description.strip()
except FB2StructureException as error:
raise error
except Exception as error:
raise FB2StructureException(error)
@abstractmethod
def __create_tree__(self):
return None
def extract_cover_internal(self, working_dir):
try:
tree = self.__create_tree__()
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
res = tree.xpath('/fb:FictionBook/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
content = base64.b64decode(res[0].text)
with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file:
cover_file.write(content)
return ('cover.jpeg', False)
except:
return (None, False)
def extract_cover_memory(self):
try:
tree = self.__create_tree__()
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
if len(res) == 0:
res = tree.xpath('/fb:FictionBook/fb:body//fb:image', namespaces=self.__namespaces)
cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
print(cover_id)
res = tree.xpath('/fb:FictionBook/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
content = base64.b64decode(res[0].text)
return content
except Exception as err:
print("exception Extract %s"%err)
return None
def __detect_namespaces(self, tree):
if tree.getroot().tag.find(Namespace.FICTION_BOOK21) > 0:
self.__namespaces['fb'] = Namespace.FICTION_BOOK21
return None
def __detect_title(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces)
if len(res) == 0:
res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]')
if len(res) > 0:
self.__set_title__(res[0].text)
return None
def __detect_docdate(self, tree):
is_attrib = 1
res = tree.xpath('/fb:FictionBook/fb:description/fb:document-info/fb:date/@value', namespaces=self.__namespaces)
if len(res) == 0:
res = tree.xpath('/FictionBook/description/document-info/date/@value')
if len(res) == 0:
is_attrib = 0
res = tree.xpath('/fb:FictionBook/fb:description/fb:document-info/fb:date', namespaces=self.__namespaces)
if len(res) == 0:
is_attrib = 0
res = tree.xpath('/FictionBook/description/document-info/date')
if len(res) > 0:
self.__set_docdate__(res[0] if is_attrib else res[0].text)
return None
def __detect_authors(self, tree):
use_namespaces = True
def subnode_text(node, name):
if use_namespaces:
subnode = node.find('fb:' + name, namespaces=self.__namespaces)
else:
subnode = node.find(name)
text = subnode.text if subnode is not None else ''
return text or ''
def add_author_from_node(node):
first_name = subnode_text(node, 'first-name')
middle_name = subnode_text(node, 'middle-name')
last_name = subnode_text(node, 'last-name')
self.__add_author__(' '.join([first_name, middle_name, last_name]), last_name)
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=self.__namespaces)
if len(res) == 0:
use_namespaces = False
res = tree.xpath('/FictionBook/description/title-info/author')
for node in res:
add_author_from_node(node)
def __detect_language(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:lang', namespaces=self.__namespaces)
if len(res) == 0:
use_namespaces = False
res = tree.xpath('/FictionBook/description/title-info/lang')
if len(res) > 0:
self.language_code = res[0].text
def __detect_tags(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:genre', namespaces=self.__namespaces)
if len(res) == 0:
use_namespaces = False
res = tree.xpath('/FictionBook/description/title-info/genre')
for node in res:
self.__add_tag__(node.text)
def __detect_series_info(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:sequence', namespaces=self.__namespaces)
if len(res) == 0:
use_namespaces = False
res = tree.xpath('/FictionBook/description/title-info/sequence')
if len(res) > 0:
title = BookFile.__normalise_string__(res[0].get('name'))
index = BookFile.__normalise_string__(res[0].get('number'))
if title:
self.series_info = {
'title': title,
'index': index or None
}
def __detect_description(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:annotation', namespaces=self.__namespaces)
if len(res) == 0:
res = tree.xpath('/FictionBook/description/title-info/annotation')
if len(res) > 0:
return etree.tostring(res[0], encoding='utf-8', method='text')
return None
class FB2(FB2Base):
def __init__(self, file, original_filename):
FB2Base.__init__(self, file, original_filename, Mimetype.FB2)
def __create_tree__(self):
try:
self.file.seek(0,0)
return etree.parse(self.file)
except Exception as err:
raise FB2StructureException('the file is not a valid XML (%s)'%err)
def __exit__(self, kind, value, traceback):
pass
class FB2Zip(FB2Base):
def __init__(self, file, original_filename):
self.__zip_file = zipfile.ZipFile(file)
try:
if self.__zip_file.testzip():
raise FB2StructureException('broken zip archive')
self.__infos = list_zip_file_infos(self.__zip_file)
if len(self.__infos) != 1:
raise FB2StructureException('archive contains %s files' % len(self.__infos))
except FB2StructureException as error:
self.__zip_file.close()
raise error
except Exception as error:
self.__zip_file.close()
raise FB2StructureException(error)
FB2Base.__init__(self, file, original_filename, Mimetype.FB2_ZIP)
def __create_tree__(self):
with self.__zip_file.open(self.__infos[0]) as entry:
try:
return etree.fromstring(entry.read(50 * 1024 * 1024))
except:
raise FB2StructureException('\'%s\' is not a valid XML' % self.__infos[0].filename)
def __exit__(self, kind, value, traceback):
self.__zip_file.__exit__(kind, value, traceback)
pass