Skip to content
Snippets Groups Projects
Select Git revision
  • 817f598044f050134eb44361d201f73ec3b30eaf
  • master default protected
  • v0.5
  • dependabot/pip/certifi-2022.12.7
  • dependabot/pip/pillow-9.3.0
  • dependabot/pip/lxml-4.9.1
  • dependabot/pip/django-2.2.28
  • dependabot/pip/cryptography-3.2
  • v0.48-devel
  • v0.47-devel
  • v0.47-devel-slashes
  • v0.46-devel
  • v0.45.1-fixes
  • v0.45-devel
  • v0.44-devel
  • v0.43-devel-reader
  • v0.43-devel
  • v0.42-devel
  • v0.42-devel-epub
  • v0.41-devel
  • v0.47
  • v0.45
  • v0.45-2
  • v0.45-1
  • v0.44
  • v0.43
  • v0.42
  • v0.41
  • v0.40
  • v0.39
  • v0.38
  • v0.37
  • v0.36
  • v0.33
  • v0.23.4
  • v0.23.4-1
36 results

fb2.py

Blame
  • user avatar
    Dmitry Shelepnev authored
    817f5980
    History
    fb2.py 8.45 KiB
    import base64, os, traceback, zipfile
    from lxml import etree
    from abc import abstractmethod
    
    from book_tools.format.bookfile import BookFile
    from book_tools.format.mimetype import Mimetype
    from book_tools.format.util import list_zip_file_infos
    
    class FB2StructureException(Exception):
        def __init__(self, error):
            Exception.__init__(self, 'fb2 verification failed: %s' % error)
            if isinstance(error, Exception):
                print(traceback.print_exc())
    
    class Namespace(object):
        FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0'
        FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1'
        XLINK = 'http://www.w3.org/1999/xlink'
    
    class FB2Base(BookFile):
        def __init__(self, file, original_filename, mimetype):
            BookFile.__init__(self, file, original_filename, mimetype)
            self.__namespaces = {'fb':Namespace.FICTION_BOOK20,'xlink': Namespace.XLINK}
            try:
                tree = self.__create_tree__()
                self.__detect_namespaces(tree)
                self.__detect_title(tree)
                self.__detect_authors(tree)
                self.__detect_tags(tree)
                self.__detect_series_info(tree)
                self.__detect_language(tree)
                self.__detect_docdate(tree)
                description = self.__detect_description(tree)
                if description:
                    self.description = description.strip()
            except FB2StructureException as error:
                raise error
            except Exception as error:
                raise FB2StructureException(error)
    
        @abstractmethod
        def __create_tree__(self):
            return None
    
        def extract_cover_internal(self, working_dir):
            try:
                tree = self.__create_tree__()
                res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
                cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
                res = tree.xpath('/fb:FictionBook/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
                content = base64.b64decode(res[0].text)
                with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file:
                    cover_file.write(content)
                return ('cover.jpeg', False)
            except:
                return (None, False)
    
        def extract_cover_memory(self):
            try:
                tree = self.__create_tree__()
                res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
                if len(res) == 0:
                    res = tree.xpath('/fb:FictionBook/fb:body//fb:image', namespaces=self.__namespaces)
                cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
                print(cover_id)
                res = tree.xpath('/fb:FictionBook/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
                content = base64.b64decode(res[0].text)
                return content
            except Exception as err:
                print("exception Extract %s"%err)
                return None
    
        def __detect_namespaces(self, tree):
            if tree.getroot().tag.find(Namespace.FICTION_BOOK21) > 0:
                self.__namespaces['fb'] = Namespace.FICTION_BOOK21
            return None
    
        def __detect_title(self, tree):
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces)
            if len(res) == 0:
                res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]')
            if len(res) > 0:
                self.__set_title__(res[0].text)
    
            return None
    
        def __detect_docdate(self, tree):
            is_attrib = 1
            res = tree.xpath('/fb:FictionBook/fb:description/fb:document-info/fb:date/@value', namespaces=self.__namespaces)
            if len(res) == 0:
                res = tree.xpath('/FictionBook/description/document-info/date/@value')
            if len(res) == 0:
                is_attrib = 0
                res = tree.xpath('/fb:FictionBook/fb:description/fb:document-info/fb:date', namespaces=self.__namespaces)
            if len(res) == 0:
                is_attrib = 0
                res = tree.xpath('/FictionBook/description/document-info/date')
            if len(res) > 0:
                self.__set_docdate__(res[0] if is_attrib else res[0].text)
    
            return None
    
        def __detect_authors(self, tree):
            use_namespaces = True
    
            def subnode_text(node, name):
                if use_namespaces:
                    subnode = node.find('fb:' + name, namespaces=self.__namespaces)
                else:
                    subnode = node.find(name)
                text = subnode.text if subnode is not None else ''
                return text or ''
    
            def add_author_from_node(node):
                first_name = subnode_text(node, 'first-name')
                middle_name = subnode_text(node, 'middle-name')
                last_name = subnode_text(node, 'last-name')
                self.__add_author__(' '.join([first_name, middle_name, last_name]), last_name)
    
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=self.__namespaces)
            if len(res) == 0:
                use_namespaces = False
                res = tree.xpath('/FictionBook/description/title-info/author')
    
            for node in res:
                add_author_from_node(node)
    
        def __detect_language(self, tree):
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:lang', namespaces=self.__namespaces)
            if len(res) == 0:
                use_namespaces = False
                res = tree.xpath('/FictionBook/description/title-info/lang')
            if len(res) > 0:
                self.language_code = res[0].text
    
        def __detect_tags(self, tree):
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:genre', namespaces=self.__namespaces)
            if len(res) == 0:
                use_namespaces = False
                res = tree.xpath('/FictionBook/description/title-info/genre')
            for node in res:
                self.__add_tag__(node.text)
    
        def __detect_series_info(self, tree):
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:sequence', namespaces=self.__namespaces)
            if len(res) == 0:
                use_namespaces = False
                res = tree.xpath('/FictionBook/description/title-info/sequence')
            if len(res) > 0:
                title = BookFile.__normalise_string__(res[0].get('name'))
                index = BookFile.__normalise_string__(res[0].get('number'))
                if title:
                    self.series_info = {
                        'title': title,
                        'index': index or None
                    }
    
        def __detect_description(self, tree):
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:annotation', namespaces=self.__namespaces)
            if len(res) == 0:
                res = tree.xpath('/FictionBook/description/title-info/annotation')
            if len(res) > 0:
                return etree.tostring(res[0], encoding='utf-8', method='text')
    
            return None
    
    class FB2(FB2Base):
        def __init__(self, file, original_filename):
            FB2Base.__init__(self, file, original_filename, Mimetype.FB2)
    
        def __create_tree__(self):
            try:
                self.file.seek(0,0)
                return etree.parse(self.file)
            except Exception as err:
                raise FB2StructureException('the file is not a valid XML (%s)'%err)
    
        def __exit__(self, kind, value, traceback):
            pass
    
    class FB2Zip(FB2Base):
        def __init__(self, file, original_filename):
            self.__zip_file = zipfile.ZipFile(file)
            try:
                if self.__zip_file.testzip():
                    raise FB2StructureException('broken zip archive')
                self.__infos = list_zip_file_infos(self.__zip_file)
                if len(self.__infos) != 1:
                    raise FB2StructureException('archive contains %s files' % len(self.__infos))
            except FB2StructureException as error:
                self.__zip_file.close()
                raise error
            except Exception as error:
                self.__zip_file.close()
                raise FB2StructureException(error)
    
            FB2Base.__init__(self, file, original_filename, Mimetype.FB2_ZIP)
    
        def __create_tree__(self):
            with self.__zip_file.open(self.__infos[0]) as entry:
                try:
                    return etree.fromstring(entry.read(50 * 1024 * 1024))
                except:
                    raise FB2StructureException('\'%s\' is not a valid XML' % self.__infos[0].filename)
    
        def __exit__(self, kind, value, traceback):
            self.__zip_file.__exit__(kind, value, traceback)
            pass