Add __detect_namespaces to FB2 class

a3d85998 · Dmitry Shelepnev · 52886546 · a3d85998 · a3d85998 · a3d85998
Commit a3d85998 authored Feb 26, 2017 by Dmitry Shelepnev
--- a/book_tools/format/__init__.py
+++ b/book_tools/format/__init__.py
@@ -2,6 +2,7 @@
 import os
 import zipfile
 from xml import sax
+from io import BytesIO
 from book_tools.format.mimetype import Mimetype
@@ -42,9 +43,9 @@ class __detector:
        else:
            return Mimetype.OCTET_STREAM
-def detect_mime(file):
+def detect_mime(file, original_filename):
    FB2_ROOT = 'FictionBook'
-    mime = __detector.file(file.name)
+    mime = __detector.file(original_filename)
    try:
        if mime == Mimetype.XML or mime == Mimetype.FB2:
@@ -76,7 +77,8 @@ def detect_mime(file):
 def create_bookfile(file, original_filename):
    if isinstance(file, str):
        file = open(file, 'rb')
-    mimetype = detect_mime(file)
+    file = BytesIO(file.read())
+    mimetype = detect_mime(file,original_filename)
    if mimetype == Mimetype.EPUB:
        return EPub(file, original_filename)
    elif mimetype == Mimetype.FB2:

--- a/book_tools/format/fb2.py
+++ b/book_tools/format/fb2.py
@@ -13,15 +13,17 @@ class FB2StructureException(Exception):
            print(traceback.print_exc())
 class Namespace(object):
-    FICTION_BOOK = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+    FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+    FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1'
    XLINK = 'http://www.w3.org/1999/xlink'
 class FB2Base(BookFile):
    def __init__(self, file, original_filename, mimetype):
        BookFile.__init__(self, file, original_filename, mimetype)
-        self.__namespaces = {'fb': Namespace.FICTION_BOOK, 'xlink': Namespace.XLINK}
+        self.__namespaces = {'xlink': Namespace.XLINK}
        try:
            tree = self.__create_tree__()
+            self.__detect_namespaces(tree)
            self.__detect_title(tree)
            self.__detect_authors(tree)
            self.__detect_tags(tree)
@@ -45,7 +47,7 @@ class FB2Base(BookFile):
            tree = self.__create_tree__()
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
            cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
-            res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
+            res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            content = base64.b64decode(res[0].text)
            with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file:
                cover_file.write(content)
@@ -58,17 +60,21 @@ class FB2Base(BookFile):
            tree = self.__create_tree__()
            res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
            cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
-            res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
+            res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
            content = base64.b64decode(res[0].text)
            return content
        except Exception as err:
-            print(err)
+            return None
+    def __detect_namespaces(self, tree):
+        tag = tree.getroot().tag
+        self.__namespaces['fb'] = Namespace.FICTION_BOOK20 if tag.find(Namespace.FICTION_BOOK20)>0 else Namespace.FICTION_BOOK21
        return None
    def __detect_title(self, tree):
        res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces)
        if len(res) == 0:
-            res = tree.xpath('/FictionBook/description/title-info/book-title')
+            res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]')
        if len(res) > 0:
            self.__set_title__(res[0].text)

--- a/book_tools/pymobi/mobi.py
+++ b/book_tools/pymobi/mobi.py
@@ -260,7 +260,7 @@ class BookMobi(object):
            f = open(file, 'rb')
        else:
            f = file
-        self.filename = f.name
        self.f = f
        self.f.seek(0,0)
        # palm database header

--- a/opds_catalog/sopdscan.py
+++ b/opds_catalog/sopdscan.py
@@ -5,13 +5,13 @@ import time
 import datetime
 import logging
 import re
 from book_tools.format import create_bookfile
 from django.db import transaction
 from opds_catalog import fb2parse, opdsdb
 from opds_catalog import inpx_parser
-#from opds_catalog import settings
 import opds_catalog.zipf as zipfile
 from constance import config
@@ -207,9 +207,9 @@ class opdsScanner:
                try:
                    book_data = create_bookfile(file, name)
-                except:
+                except Exception as err:
                    book_data = None
-                    self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping...')
+                    self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping... (Error: %s)'%err)
                    self.bad_books += 1
                if book_data:

--- a/opds_catalog/tests/test_scan.py
+++ b/opds_catalog/tests/test_scan.py
@@ -65,7 +65,7 @@ class scanTestCase(TestCase):
        self.assertEqual(book.catalog.path, self.test_zip)
        self.assertEqual(book.catalog.cat_name, self.test_zip)
        self.assertEqual(book.catalog.cat_type, 1)
-        self.assertEqual(book.docdate, "130552595662030000")
+        self.assertEqual(book.docdate, "2014-09-15")
        self.assertEqual(book.title, "Любовь в жизни Обломова")
        self.assertEqual(book.avail, 2)
        self.assertEqual(book.authors.count(), 1)
@@ -86,7 +86,7 @@ class scanTestCase(TestCase):
        self.assertEqual(book.path, self.test_zip)
        self.assertEqual(book.cat_type, 1)
        self.assertEqual(book.title, "Драконьи Услуги")
-        self.assertEqual(book.authors.get(full_name="Куприянов Денис").search_full_name, "КУПРИЯНОВ ДЕНИС")
+        self.assertEqual(book.authors.get(full_name="Куприянов Денис Валерьевич").search_full_name, "КУПРИЯНОВ ДЕНИС ВАЛЕРЬЕВИЧ")
    def test_scanall(self):
        """ Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """