Skip to content
Snippets Groups Projects
Commit a3d85998 authored by Dmitry Shelepnev's avatar Dmitry Shelepnev
Browse files

Add __detect_namespaces to FB2 class

parent 52886546
Branches
Tags
No related merge requests found
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import os import os
import zipfile import zipfile
from xml import sax from xml import sax
from io import BytesIO
from book_tools.format.mimetype import Mimetype from book_tools.format.mimetype import Mimetype
...@@ -42,9 +43,9 @@ class __detector: ...@@ -42,9 +43,9 @@ class __detector:
else: else:
return Mimetype.OCTET_STREAM return Mimetype.OCTET_STREAM
def detect_mime(file): def detect_mime(file, original_filename):
FB2_ROOT = 'FictionBook' FB2_ROOT = 'FictionBook'
mime = __detector.file(file.name) mime = __detector.file(original_filename)
try: try:
if mime == Mimetype.XML or mime == Mimetype.FB2: if mime == Mimetype.XML or mime == Mimetype.FB2:
...@@ -76,7 +77,8 @@ def detect_mime(file): ...@@ -76,7 +77,8 @@ def detect_mime(file):
def create_bookfile(file, original_filename): def create_bookfile(file, original_filename):
if isinstance(file, str): if isinstance(file, str):
file = open(file, 'rb') file = open(file, 'rb')
mimetype = detect_mime(file) file = BytesIO(file.read())
mimetype = detect_mime(file,original_filename)
if mimetype == Mimetype.EPUB: if mimetype == Mimetype.EPUB:
return EPub(file, original_filename) return EPub(file, original_filename)
elif mimetype == Mimetype.FB2: elif mimetype == Mimetype.FB2:
......
...@@ -13,15 +13,17 @@ class FB2StructureException(Exception): ...@@ -13,15 +13,17 @@ class FB2StructureException(Exception):
print(traceback.print_exc()) print(traceback.print_exc())
class Namespace(object): class Namespace(object):
FICTION_BOOK = 'http://www.gribuser.ru/xml/fictionbook/2.0' FICTION_BOOK20 = 'http://www.gribuser.ru/xml/fictionbook/2.0'
FICTION_BOOK21 = 'http://www.gribuser.ru/xml/fictionbook/2.1'
XLINK = 'http://www.w3.org/1999/xlink' XLINK = 'http://www.w3.org/1999/xlink'
class FB2Base(BookFile): class FB2Base(BookFile):
def __init__(self, file, original_filename, mimetype): def __init__(self, file, original_filename, mimetype):
BookFile.__init__(self, file, original_filename, mimetype) BookFile.__init__(self, file, original_filename, mimetype)
self.__namespaces = {'fb': Namespace.FICTION_BOOK, 'xlink': Namespace.XLINK} self.__namespaces = {'xlink': Namespace.XLINK}
try: try:
tree = self.__create_tree__() tree = self.__create_tree__()
self.__detect_namespaces(tree)
self.__detect_title(tree) self.__detect_title(tree)
self.__detect_authors(tree) self.__detect_authors(tree)
self.__detect_tags(tree) self.__detect_tags(tree)
...@@ -45,7 +47,7 @@ class FB2Base(BookFile): ...@@ -45,7 +47,7 @@ class FB2Base(BookFile):
tree = self.__create_tree__() tree = self.__create_tree__()
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
content = base64.b64decode(res[0].text) content = base64.b64decode(res[0].text)
with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file: with open(os.path.join(working_dir, 'cover.jpeg'), 'wb') as cover_file:
cover_file.write(content) cover_file.write(content)
...@@ -58,17 +60,21 @@ class FB2Base(BookFile): ...@@ -58,17 +60,21 @@ class FB2Base(BookFile):
tree = self.__create_tree__() tree = self.__create_tree__()
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces) res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:coverpage/fb:image', namespaces=self.__namespaces)
cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:] cover_id = res[0].get('{' + Namespace.XLINK + '}href')[1:]
res = tree.xpath('//fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces) res = tree.xpath('/fb:binary[@id="%s"]' % cover_id, namespaces=self.__namespaces)
content = base64.b64decode(res[0].text) content = base64.b64decode(res[0].text)
return content return content
except Exception as err: except Exception as err:
print(err) return None
def __detect_namespaces(self, tree):
tag = tree.getroot().tag
self.__namespaces['fb'] = Namespace.FICTION_BOOK20 if tag.find(Namespace.FICTION_BOOK20)>0 else Namespace.FICTION_BOOK21
return None return None
def __detect_title(self, tree): def __detect_title(self, tree):
res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces) res = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title', namespaces=self.__namespaces)
if len(res) == 0: if len(res) == 0:
res = tree.xpath('/FictionBook/description/title-info/book-title') res = tree.xpath('/*[local-name() = "FictionBook"]/*[local-name() = "description"]/*[local-name() = "title-info"]/*[local-name() = "book-title"]')
if len(res) > 0: if len(res) > 0:
self.__set_title__(res[0].text) self.__set_title__(res[0].text)
......
...@@ -260,7 +260,7 @@ class BookMobi(object): ...@@ -260,7 +260,7 @@ class BookMobi(object):
f = open(file, 'rb') f = open(file, 'rb')
else: else:
f = file f = file
self.filename = f.name
self.f = f self.f = f
self.f.seek(0,0) self.f.seek(0,0)
# palm database header # palm database header
......
...@@ -5,13 +5,13 @@ import time ...@@ -5,13 +5,13 @@ import time
import datetime import datetime
import logging import logging
import re import re
from book_tools.format import create_bookfile from book_tools.format import create_bookfile
from django.db import transaction from django.db import transaction
from opds_catalog import fb2parse, opdsdb from opds_catalog import fb2parse, opdsdb
from opds_catalog import inpx_parser from opds_catalog import inpx_parser
#from opds_catalog import settings
import opds_catalog.zipf as zipfile import opds_catalog.zipf as zipfile
from constance import config from constance import config
...@@ -207,9 +207,9 @@ class opdsScanner: ...@@ -207,9 +207,9 @@ class opdsScanner:
try: try:
book_data = create_bookfile(file, name) book_data = create_bookfile(file, name)
except: except Exception as err:
book_data = None book_data = None
self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping...') self.logger.warning(rel_path + ' - ' + name + ' Book parse error, skipping... (Error: %s)'%err)
self.bad_books += 1 self.bad_books += 1
if book_data: if book_data:
......
...@@ -65,7 +65,7 @@ class scanTestCase(TestCase): ...@@ -65,7 +65,7 @@ class scanTestCase(TestCase):
self.assertEqual(book.catalog.path, self.test_zip) self.assertEqual(book.catalog.path, self.test_zip)
self.assertEqual(book.catalog.cat_name, self.test_zip) self.assertEqual(book.catalog.cat_name, self.test_zip)
self.assertEqual(book.catalog.cat_type, 1) self.assertEqual(book.catalog.cat_type, 1)
self.assertEqual(book.docdate, "130552595662030000") self.assertEqual(book.docdate, "2014-09-15")
self.assertEqual(book.title, "Любовь в жизни Обломова") self.assertEqual(book.title, "Любовь в жизни Обломова")
self.assertEqual(book.avail, 2) self.assertEqual(book.avail, 2)
self.assertEqual(book.authors.count(), 1) self.assertEqual(book.authors.count(), 1)
...@@ -86,7 +86,7 @@ class scanTestCase(TestCase): ...@@ -86,7 +86,7 @@ class scanTestCase(TestCase):
self.assertEqual(book.path, self.test_zip) self.assertEqual(book.path, self.test_zip)
self.assertEqual(book.cat_type, 1) self.assertEqual(book.cat_type, 1)
self.assertEqual(book.title, "Драконьи Услуги") self.assertEqual(book.title, "Драконьи Услуги")
self.assertEqual(book.authors.get(full_name="Куприянов Денис").search_full_name, "КУПРИЯНОВ ДЕНИС") self.assertEqual(book.authors.get(full_name="Куприянов Денис Валерьевич").search_full_name, "КУПРИЯНОВ ДЕНИС ВАЛЕРЬЕВИЧ")
def test_scanall(self): def test_scanall(self):
""" Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """ """ Тестирование процедуры scanall (извлекает метаданные из книг и помещает в БД) """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment