文章

释放odoo原力分布式架构改造

2018-11-12 | 18 minutes read |标签 odoo 架构 |分类 开发 odoo

释放odoo原力 分布式架构改造

背景

在很多公司都觉得odoo系统只适合中小企业,究其原因,大部分都是认为是odoo性能比较差,甚至有同学认为是python性能影响,诚然python性能比不过java这种编译静态型语言,但是性能不至于像odoo表现这么差,在研究其源码后,我发现odoo现有架构天然缺陷。

odoo现有架构

绘图1

odoo本身架构是一个单体运用,session 是通过文件系统存储的,ormcache 是基于运用自身内存的,我们首先分析下这两块代码,本文代码基于odoo10社区版

session部分

class Root(object):
    """Root WSGI application for the OpenERP Web Client.
    """
    def __init__(self):
        self._loaded = False

    @lazy_property
    def session_store(self):
        # Setup http sessions
        path = odoo.tools.config.session_dir
        _logger.debug('HTTP sessions stored in: %s', path)
        return werkzeug.contrib.sessions.FilesystemSessionStore(path, session_class=OpenERPSession)

    @lazy_property
    def nodb_routing_map(self):
        _logger.info("Generating nondb routing")
        return routing_map([''] + odoo.conf.server_wide_modules, True)

    def __call__(self, environ, start_response):
        """ Handle a WSGI request
        """
        if not self._loaded:
            self._loaded = True
            self.load_addons()
        return self.dispatch(environ, start_response)

    def load_addons(self):
        """ Load all addons from addons path containing static files and
        controllers and configure them.  """
        # TODO should we move this to ir.http so that only configured modules are served ?
        statics = {}
        for addons_path in odoo.modules.module.ad_paths:
            for module in sorted(os.listdir(str(addons_path))):
                if module not in addons_module:
                    mod_path = opj(addons_path, module)
                    manifest_path = module_manifest(mod_path)
                    path_static = opj(addons_path, module, 'static')
                    if manifest_path and os.path.isdir(path_static):
                        manifest = ast.literal_eval(open(manifest_path).read())
                        if not manifest.get('installable', True):
                            continue
                        manifest['addons_path'] = addons_path
                        _logger.debug("Loading %s", module)
                        if 'odoo.addons' in sys.modules:
                            m = __import__('odoo.addons.' + module)
                        else:
                            m = None
                        addons_module[module] = m
                        addons_manifest[module] = manifest
                        statics['/%s/static' % module] = path_static

        if statics:
            _logger.info("HTTP Configuring static files")
        app = werkzeug.wsgi.SharedDataMiddleware(self.dispatch, statics, cache_timeout=STATIC_CACHE)
        self.dispatch = DisableCacheMiddleware(app)

    def setup_session(self, httprequest):
        # recover or create session
        session_gc(self.session_store)

        sid = httprequest.args.get('session_id')
        explicit_session = True
        if not sid:
            sid =  httprequest.headers.get("X-Openerp-Session-Id")
        if not sid:
            sid = httprequest.cookies.get('session_id')
            explicit_session = False
        if sid is None:
            httprequest.session = self.session_store.new()
        else:
            httprequest.session = self.session_store.get(sid)
        return explicit_session

    def setup_db(self, httprequest):
        db = httprequest.session.db
        # Check if session.db is legit
        if db:
            if db not in db_filter([db], httprequest=httprequest):
                _logger.warn("Logged into database '%s', but dbfilter "
                             "rejects it; logging session out.", db)
                httprequest.session.logout()
                db = None

        if not db:
            httprequest.session.db = db_monodb(httprequest)

    def setup_lang(self, httprequest):
        if "lang" not in httprequest.session.context:
            alang = httprequest.accept_languages.best or "en-US"
            try:
                code, territory, _, _ = babel.core.parse_locale(alang, sep='-')
                if territory:
                    lang = '%s_%s' % (code, territory)
                else:
                    lang = babel.core.LOCALE_ALIASES[code]
            except (ValueError, KeyError):
                lang = 'en_US'
            httprequest.session.context["lang"] = lang

    def get_request(self, httprequest):
        # deduce type of request
        if httprequest.args.get('jsonp'):
            return JsonRequest(httprequest)
        if httprequest.mimetype in ("application/json", "application/json-rpc"):
            return JsonRequest(httprequest)
        else:
            return HttpRequest(httprequest)

    def get_response(self, httprequest, result, explicit_session):
        if isinstance(result, Response) and result.is_qweb:
            try:
                result.flatten()
            except(Exception), e:
                if request.db:
                    result = request.registry['ir.http']._handle_exception(e)
                else:
                    raise

        if isinstance(result, basestring):
            response = Response(result, mimetype='text/html')
        else:
            response = result

        # save to cache if requested and possible
        if getattr(request, 'cache_save', False) and response.status_code == 200:
            response.freeze()
            r = response.response
            if isinstance(r, list) and len(r) == 1 and isinstance(r[0], str):
                request.registry.cache[request.cache_save] = {
                    'content': r[0],
                    'mimetype': response.headers['Content-Type'],
                    'time': time.time(),
                }

        if httprequest.session.should_save:
            if httprequest.session.rotate:
                self.session_store.delete(httprequest.session)
                httprequest.session.sid = self.session_store.generate_key()
                httprequest.session.modified = True
            self.session_store.save(httprequest.session)
        # We must not set the cookie if the session id was specified using a http header or a GET parameter.
        # There are two reasons to this:
        # - When using one of those two means we consider that we are overriding the cookie, which means creating a new
        #   session on top of an already existing session and we don't want to create a mess with the 'normal' session
        #   (the one using the cookie). That is a special feature of the Session Javascript class.
        # - It could allow session fixation attacks.
        if not explicit_session and hasattr(response, 'set_cookie'):
            response.set_cookie('session_id', httprequest.session.sid, max_age=90 * 24 * 60 * 60)

        return response

    def dispatch(self, environ, start_response):
        """
        Performs the actual WSGI dispatching for the application.
        """
        try:
            httprequest = werkzeug.wrappers.Request(environ)
            httprequest.app = self
            httprequest.parameter_storage_class = werkzeug.datastructures.ImmutableOrderedMultiDict
            threading.current_thread().url = httprequest.url

            explicit_session = self.setup_session(httprequest)
            self.setup_db(httprequest)
            self.setup_lang(httprequest)

            request = self.get_request(httprequest)

            def _dispatch_nodb():
                try:
                    func, arguments = self.nodb_routing_map.bind_to_environ(request.httprequest.environ).match()
                except werkzeug.exceptions.HTTPException, e:
                    return request._handle_exception(e)
                request.set_handler(func, arguments, "none")
                result = request.dispatch()
                return result

            with request:
                db = request.session.db
                if db:
                    try:
                        odoo.registry(db).check_signaling()
                        with odoo.tools.mute_logger('odoo.sql_db'):
                            ir_http = request.registry['ir.http']
                    except (AttributeError, psycopg2.OperationalError, psycopg2.ProgrammingError):
                        # psycopg2 error or attribute error while constructing
                        # the registry. That means either
                        # - the database probably does not exists anymore
                        # - the database is corrupted
                        # - the database version doesnt match the server version
                        # Log the user out and fall back to nodb
                        request.session.logout()
                        # If requesting /web this will loop
                        if request.httprequest.path == '/web':
                            result = werkzeug.utils.redirect('/web/database/selector')
                        else:
                            result = _dispatch_nodb()
                    else:
                        result = ir_http._dispatch()
                        ir_http.pool.signal_caches_change()
                else:
                    result = _dispatch_nodb()

                response = self.get_response(httprequest, result, explicit_session)
            return response(environ, start_response)

        except werkzeug.exceptions.HTTPException, e:
            return e(environ, start_response)

    def get_db_router(self, db):
        if not db:
            return self.nodb_routing_map
        return request.registry['ir.http'].routing_map()

这是odoo后端在接受到请求后,第一个处理对象ROOT全部代码,通过web server 种的hook,将会依次调用setup_session,setup_db,setup_lang等方法获得处理后的http request 对象。这里需要注意是session_store方法,注意他被lazy_property装饰器装饰过,简单可以理解为这个是属性,直接根据self.sesson_store可以获得其值,这个方法主要返回一个session处理器,odoo原生是使用OpenERPSession,是基于werkzeug.contrib.sessions.Session这个python原装包,其代码如下

class OpenERPSession(werkzeug.contrib.sessions.Session):
    def __init__(self, *args, **kwargs):
        self.inited = False
        self.modified = False
        self.rotate = False
        super(OpenERPSession, self).__init__(*args, **kwargs)
        self.inited = True
        self._default_values()
        self.modified = False

    def __getattr__(self, attr):
        return self.get(attr, None)
    def __setattr__(self, k, v):
        if getattr(self, "inited", False):
            try:
                object.__getattribute__(self, k)
            except:
                return self.__setitem__(k, v)
        object.__setattr__(self, k, v)

    def authenticate(self, db, login=None, password=None, uid=None):
        """
        Authenticate the current user with the given db, login and
        password. If successful, store the authentication parameters in the
        current session and request.

        :param uid: If not None, that user id will be used instead the login
                    to authenticate the user.
        """

        if uid is None:
            wsgienv = request.httprequest.environ
            env = dict(
                base_location=request.httprequest.url_root.rstrip('/'),
                HTTP_HOST=wsgienv['HTTP_HOST'],
                REMOTE_ADDR=wsgienv['REMOTE_ADDR'],
            )
            uid = dispatch_rpc('common', 'authenticate', [db, login, password, env])
        else:
            security.check(db, uid, password)
        self.db = db
        self.uid = uid
        self.login = login
        self.password = password
        request.uid = uid
        request.disable_db = False

        if uid: self.get_context()
        return uid

    def check_security(self):
        """
        Check the current authentication parameters to know if those are still
        valid. This method should be called at each request. If the
        authentication fails, a :exc:`SessionExpiredException` is raised.
        """
        if not self.db or not self.uid:
            raise SessionExpiredException("Session expired")
        security.check(self.db, self.uid, self.password)

    def logout(self, keep_db=False):
        for k in self.keys():
            if not (keep_db and k == 'db'):
                del self[k]
        self._default_values()
        self.rotate = True

    def _default_values(self):
        self.setdefault("db", None)
        self.setdefault("uid", None)
        self.setdefault("login", None)
        self.setdefault("password", None)
        self.setdefault("context", {})

    def get_context(self):
        """
        Re-initializes the current user's session context (based on his
        preferences) by calling res.users.get_context() with the old context.

        :returns: the new context
        """
        assert self.uid, "The user needs to be logged-in to initialize his context"
        self.context = request.env['res.users'].context_get() or {}
        self.context['uid'] = self.uid
        self._fix_lang(self.context)
        return self.context

    def _fix_lang(self, context):
        """ OpenERP provides languages which may not make sense and/or may not
        be understood by the web client's libraries.

        Fix those here.

        :param dict context: context to fix
        """
        lang = context.get('lang')

        # inane OpenERP locale
        if lang == 'ar_AR':
            lang = 'ar'

        # lang to lang_REGION (datejs only handles lang_REGION, no bare langs)
        if lang in babel.core.LOCALE_ALIASES:
            lang = babel.core.LOCALE_ALIASES[lang]

        context['lang'] = lang or 'en_US'

    def save_action(self, action):
        """
        This method store an action object in the session and returns an integer
        identifying that action. The method get_action() can be used to get
        back the action.

        :param the_action: The action to save in the session.
        :type the_action: anything
        :return: A key identifying the saved action.
        :rtype: integer
        """
        saved_actions = self.setdefault('saved_actions', {"next": 1, "actions": {}})
        # we don't allow more than 10 stored actions
        if len(saved_actions["actions"]) >= 10:
            del saved_actions["actions"][min(saved_actions["actions"])]
        key = saved_actions["next"]
        saved_actions["actions"][key] = action
        saved_actions["next"] = key + 1
        self.modified = True
        return key

    def get_action(self, key):
        """
        Gets back a previously saved action. This method can return None if the action
        was saved since too much time (this case should be handled in a smart way).

        :param key: The key given by save_action()
        :type key: integer
        :return: The saved action or None.
        :rtype: anything
        """
        saved_actions = self.get('saved_actions', {})
        return saved_actions.get("actions", {}).get(key)

    def save_request_data(self):
        import uuid
        req = request.httprequest
        files = werkzeug.datastructures.MultiDict()
        # NOTE we do not store files in the session itself to avoid loading them in memory.
        #      By storing them in the session store, we ensure every worker (even ones on other
        #      servers) can access them. It also allow stale files to be deleted by `session_gc`.
        for f in req.files.values():
            storename = 'werkzeug_%s_%s.file' % (self.sid, uuid.uuid4().hex)
            path = os.path.join(root.session_store.path, storename)
            with open(path, 'w') as fp:
                f.save(fp)
            files.add(f.name, (storename, f.filename, f.content_type))
        self['serialized_request_data'] = {
            'form': req.form,
            'files': files,
        }

    @contextlib.contextmanager
    def load_request_data(self):
        data = self.pop('serialized_request_data', None)
        files = werkzeug.datastructures.MultiDict()
        try:
            if data:
                # regenerate files filenames with the current session store
                for name, (storename, filename, content_type) in data['files'].iteritems():
                    path = os.path.join(root.session_store.path, storename)
                    files.add(name, (path, filename, content_type))
                yield werkzeug.datastructures.CombinedMultiDict([data['form'], files])
            else:
                yield None
        finally:
            # cleanup files
            for f, _, _ in files.values():
                try:
                    os.unlink(f)
                except IOError:
                    pass

这部分代码是处理session具体方法,比如登陆,退出,等相关处理办法,可以理解为动作类,注意这个类中使用了的@contextlib.contextmanager装饰器 这是为了将构建的http request对象与session处理类保证其上下文一直。

思路通过redis改造session处理办法,从而让odoo具有横向扩张的可能性

Root类中session_store方法替换werkzeug.contrib.sessions.FilesystemSessionStore官方包,并实现其save,delete,get,list等行为

特别提示session_gc这个方法需要在外面处理下,不然他还会去删除存储本地session文件而不是redis缓存,造成找不到文件从而造成500错误。

ormcache改造

ormcache分为两部分,一部分为系统刚启动时候作为模块载入缓存这部分代码主要在modules模块中的registry.py

中,其功能主要保证各个包载入顺序,从而让python模块正确动态载入覆盖相关行为,其缓存内容多是类和方法,不容易对其使用reids缓存,其主要困难是,模型方法中使用 的self.env保存的对象或者方法无法被序列化,这部分如果有高手成功了改造可以联系我(邮箱:songyuchaoemail@163.com),我主要改造部分为模型中存在的model.pool.cache这里面缓存,这部分大部分都是正常类型数据缓存,首先我们先分析odoo自身LRU缓存

class LRU(object):
    """
    Implementation of a length-limited O(1) LRU queue.
    Built for and used by PyPE:
    http://pype.sourceforge.net
    Copyright 2003 Josiah Carlson.
    """

    def __init__(self, count, pairs=[]):
        self._lock = threading.RLock()
        self.count = max(count, 1)
        self.d = {}
        self.first = None
        self.last = None
        for key, value in pairs:
            self[key] = value

    @synchronized()
    def __contains__(self, obj):
        return obj in self.d

    def get(self, obj, val=None):
        try:
            return self[obj]
        except KeyError:
            return val

    @synchronized()
    def __getitem__(self, obj):
        a = self.d[obj].me
        self[a[0]] = a[1]
        return a[1]

    @synchronized()
    def __setitem__(self, obj, val):
        if obj in self.d:
            del self[obj]
        nobj = LRUNode(self.last, (obj, val))
        if self.first is None:
            self.first = nobj
        if self.last:
            self.last.next = nobj
        self.last = nobj
        self.d[obj] = nobj
        if len(self.d) > self.count:
            if self.first == self.last:
                self.first = None
                self.last = None
                return
            a = self.first
            a.next.prev = None
            self.first = a.next
            a.next = None
            del self.d[a.me[0]]
            del a

    @synchronized()
    def __delitem__(self, obj):
        nobj = self.d[obj]
        if nobj.prev:
            nobj.prev.next = nobj.next
        else:
            self.first = nobj.next
        if nobj.next:
            nobj.next.prev = nobj.prev
        else:
            self.last = nobj.prev
        del self.d[obj]

    @synchronized()
    def __iter__(self):
        cur = self.first
        while cur is not None:
            cur2 = cur.next
            yield cur.me[1]
            cur = cur2

    @synchronized()
    def __len__(self):
        return len(self.d)

    @synchronized()
    def iteritems(self):
        cur = self.first
        while cur is not None:
            cur2 = cur.next
            yield cur.me
            cur = cur2

    @synchronized()
    def iterkeys(self):
        return iter(self.d)

    @synchronized()
    def itervalues(self):
        for i, j in self.iteritems():
            yield j

    @synchronized()
    def keys(self):
        return self.d.keys()

    @synchronized()
    def pop(self, key):
        v = self[key]
        del self[key]
        return v

    @synchronized()
    def clear(self):
        self.d = {}
        self.first = None
        self.last = None

其代码本身代码 逻辑并不复杂,但是大家需要注意其大量使用的synchronized装饰器,为什么odoo开启works后,性能蜜汁无效,多与这部分有关,下面是这个装饰器源代码

def synchronized(lock_attr='_lock'):
    def decorator(func):
        @wraps(func)
        def wrapper(self, *args, **kwargs):
            lock = getattr(self, lock_attr)
            try:
                lock.acquire()
                return func(self, *args, **kwargs)
            finally:
                lock.release()
        return wrapper
    return decorator

odoo自身的LRU 为了保证数据一致性,为这个类加入一把锁,在多线程情况下,多个线程抢夺这边锁会造成GIL问题(什么是GIL问题自行goggle)

ormcache改造思路,通过使用reids自身单线程数据一致性(相当于乐观锁)以及其强大性能将odoo从一致性问题造成性能衰减解放出来,并且在分布式部署中共享缓存。

改造代码

class RedisLRU(object):
    def __init__(self,db_name):
        self.redis = RedisPoolConnection()
        self.db_name = "".join(("ormcache:",db_name))

    def __getitem__(self, item):
        if self._check_name(item[1].__name__):
            raise KeyError
        table_name, key = self.key(item)
        val = self.redis.hget(table_name, key)
        if not val:
            raise KeyError
        obj = self.to_obj(val)
        return obj

    def to_obj(self, val):
        loads = dill.loads(zlib.decompress(val))
        return loads

    def key(self, item):
        print item
        if len(item)== 1:
            return self.db_name,item[0]
        item_list = item[1:-1]
        fn_name = item[1].__name__
        if len(item_list)>1:
            return self.db_name, (item[0],fn_name,)+ item_list[1:-1]
        else:
            return self.db_name,(item[0],fn_name,)

    def _check_name(self, method):
        return method in ("_compute_domain")

    def __setitem__(self, key, value):
        if isinstance(value, (str, int, list, dict, set)):
            dumps = self._str_zlib(value)
            db_name, key = self.key(key)
            self.redis.hset(db_name, key, dumps)

    def _str_zlib(self, value):
        dumps = zlib.compress(dill.dumps(value))
        return dumps

    def __delitem__(self, key):
        self.redis.delete(*self.key(key))

    def pop(self, key):
        key = self.key(key)
        val = self.redis.get(key)
        self.redis.delete(key)
        if not val:
            raise KeyError
        return val

    def get(self, key, val=None):
        table_name, key = self.get(key)
        return self.redis.hget(table_name, key, val)

    def get_cache_time(self):
        seconds = self.get_config_value("ormcache_expire") or ONE_WEEK_IN_SECONDS
        return seconds + random.randint(0, 9)

    def get_config_value(self, key, default=None):
        return odoo.tools.config.get(key, default)

    def clear(self):
        self.redis.delete(self.db_name)

    def iteritems(self):
        hgetall = self.redis.hgetall(self.db_name)
        for key, val in hgetall.iteritems():
            yield {key, self.to_obj(val)}

    def iterkeys(self):
        return iter(self.redis.hgetall(self.db_name))

    def itervalues(self):
        for key, v in self.redis.hgetall(self.db_name).iteritems():
            yield v

    def keys(self):
        return self.redis.hgetall(self.db_name).keys()

    def __len__(self):
        self.redis.keys("ormcache*")

静态文件改造

odoo将通过less编译的文件存储于数据库ir_attachment表中,url表示路径,从而造成两方面问题,第一静态文件请求需要经过数据库查询,第二odoo服务器本身还要处理静态文件,

改造思路就是将静态文件直接存储到文件系统中,而不是通过数据库再转义下,并且通过nginx直接代理出去,降低odoo服务器处理静态文件的压力。

源码分析

class IrAttachment(models.Model):
    """Attachments are used to link binary files or url to any openerp document.

    External attachment storage
    ---------------------------

    The computed field ``datas`` is implemented using ``_file_read``,
    ``_file_write`` and ``_file_delete``, which can be overridden to implement
    other storage engines. Such methods should check for other location pseudo
    uri (example: hdfs://hadoopserver).

    The default implementation is the file:dirname location that stores files
    on the local filesystem using name based on their sha1 hash
    """
    _name = 'ir.attachment'
    _order = 'id desc'

    @api.depends('res_model', 'res_id')
    def _compute_res_name(self):
        for attachment in self:
            if attachment.res_model and attachment.res_id:
                record = self.env[attachment.res_model].browse(attachment.res_id)
                attachment.res_name = record.display_name

    @api.model
    def _storage(self):
        return self.env['ir.config_parameter'].sudo().get_param('ir_attachment.location', 'file')

    @api.model
    def _filestore(self):
        return config.filestore(self._cr.dbname)

    @api.model
    def force_storage(self):
        """Force all attachments to be stored in the currently configured storage"""
        if not self.env.user._is_admin():
            raise AccessError(_('Only administrators can execute this action.'))

        # domain to retrieve the attachments to migrate
        domain = {
            'db': [('store_fname', '!=', False)],
            'file': [('db_datas', '!=', False)],
        }[self._storage()]

        for attach in self.search(domain):
            attach.write({'datas': attach.datas})
        return True

    @api.model
    def _full_path(self, path):
        # sanitize path
        path = re.sub('[.]', '', path)
        path = path.strip('/\\')
        return os.path.join(self._filestore(), path)

    @api.model
    def _get_path(self, bin_data, sha):
        # retro compatibility
        fname = sha[:3] + '/' + sha
        full_path = self._full_path(fname)
        if os.path.isfile(full_path):
            return fname, full_path        # keep existing path

        # scatter files across 256 dirs
        # we use '/' in the db (even on windows)
        fname = sha[:2] + '/' + sha
        full_path = self._full_path(fname)
        dirname = os.path.dirname(full_path)
        if not os.path.isdir(dirname):
            os.makedirs(dirname)
        return fname, full_path

    @api.model
    def _file_read(self, fname, bin_size=False):
        full_path = self._full_path(fname)
        r = ''
        try:
            if bin_size:
                r = human_size(os.path.getsize(full_path))
            else:
                r = open(full_path,'rb').read().encode('base64')
        except (IOError, OSError):
            _logger.info("_read_file reading %s", full_path, exc_info=True)
        return r

    @api.model
    def _file_write(self, value, checksum):
        bin_value = value.decode('base64')
        fname, full_path = self._get_path(bin_value, checksum)
        if not os.path.exists(full_path):
            try:
                with open(full_path, 'wb') as fp:
                    fp.write(bin_value)
                # add fname to checklist, in case the transaction aborts
                self._mark_for_gc(fname)
            except IOError:
                _logger.info("_file_write writing %s", full_path, exc_info=True)
        return fname

    @api.model
    def _file_delete(self, fname):
        # simply add fname to checklist, it will be garbage-collected later
        self._mark_for_gc(fname)

    def _mark_for_gc(self, fname):
        """ Add ``fname`` in a checklist for the filestore garbage collection. """
        # we use a spooldir: add an empty file in the subdirectory 'checklist'
        full_path = os.path.join(self._full_path('checklist'), fname)
        if not os.path.exists(full_path):
            dirname = os.path.dirname(full_path)
            if not os.path.isdir(dirname):
                with tools.ignore(OSError):
                    os.makedirs(dirname)
            open(full_path, 'ab').close()

    @api.model
    def _file_gc(self):
        """ Perform the garbage collection of the filestore. """
        if self._storage() != 'file':
            return

        # Continue in a new transaction. The LOCK statement below must be the
        # first one in the current transaction, otherwise the database snapshot
        # used by it may not contain the most recent changes made to the table
        # ir_attachment! Indeed, if concurrent transactions create attachments,
        # the LOCK statement will wait until those concurrent transactions end.
        # But this transaction will not see the new attachements if it has done
        # other requests before the LOCK (like the method _storage() above).
        cr = self._cr
        cr.commit()

        # prevent all concurrent updates on ir_attachment while collecting!
        cr.execute("LOCK ir_attachment IN SHARE MODE")

        # retrieve the file names from the checklist
        checklist = {}
        for dirpath, _, filenames in os.walk(self._full_path('checklist')):
            dirname = os.path.basename(dirpath)
            for filename in filenames:
                fname = "%s/%s" % (dirname, filename)
                checklist[fname] = os.path.join(dirpath, filename)

        # determine which files to keep among the checklist
        whitelist = set()
        for names in cr.split_for_in_conditions(checklist):
            cr.execute("SELECT store_fname FROM ir_attachment WHERE store_fname IN %s", [names])
            whitelist.update(row[0] for row in cr.fetchall())

        # remove garbage files, and clean up checklist
        removed = 0
        for fname, filepath in checklist.iteritems():
            if fname not in whitelist:
                try:
                    os.unlink(self._full_path(fname))
                    removed += 1
                except (OSError, IOError):
                    _logger.info("_file_gc could not unlink %s", self._full_path(fname), exc_info=True)
            with tools.ignore(OSError):
                os.unlink(filepath)

        # commit to release the lock
        cr.commit()
        _logger.info("filestore gc %d checked, %d removed", len(checklist), removed)

    @api.depends('store_fname', 'db_datas')
    def _compute_datas(self):
        bin_size = self._context.get('bin_size')
        for attach in self:
            if attach.store_fname:
                attach.datas = self._file_read(attach.store_fname, bin_size)
            else:
                attach.datas = attach.db_datas

    def _inverse_datas(self):
        location = self._storage()
        for attach in self:
            # compute the fields that depend on datas
            value = attach.datas
            bin_data = value and value.decode('base64') or ''
            vals = {
                'file_size': len(bin_data),
                'checksum': self._compute_checksum(bin_data),
                'index_content': self._index(bin_data, attach.datas_fname, attach.mimetype),
                'store_fname': False,
                'db_datas': value,
            }
            if value and location != 'db':
                # save it to the filestore
                vals['store_fname'] = self._file_write(value, vals['checksum'])
                vals['db_datas'] = False

            # take current location in filestore to possibly garbage-collect it
            fname = attach.store_fname
            # write as superuser, as user probably does not have write access
            super(IrAttachment, attach.sudo()).write(vals)
            if fname:
                self._file_delete(fname)

    def _compute_checksum(self, bin_data):
        """ compute the checksum for the given datas
            :param bin_data : datas in its binary form
        """
        # an empty file has a checksum too (for caching)
        return hashlib.sha1(bin_data or '').hexdigest()

    def _compute_mimetype(self, values):
        """ compute the mimetype of the given values
            :param values : dict of values to create or write an ir_attachment
            :return mime : string indicating the mimetype, or application/octet-stream by default
        """
        mimetype = None
        if values.get('mimetype'):
            mimetype = values['mimetype']
        if not mimetype and values.get('datas_fname'):
            mimetype = mimetypes.guess_type(values['datas_fname'])[0]
        if not mimetype and values.get('url'):
            mimetype = mimetypes.guess_type(values['url'])[0]
        if values.get('datas') and (not mimetype or mimetype == 'application/octet-stream'):
            mimetype = guess_mimetype(values['datas'].decode('base64'))
        return mimetype or 'application/octet-stream'

    def _check_contents(self, values):
        mimetype = values['mimetype'] = self._compute_mimetype(values)
        xml_like = 'ht' in mimetype or 'xml' in mimetype # hta, html, xhtml, etc.
        force_text = (xml_like and (not self.env.user._is_admin() or
            self.env.context.get('attachments_mime_plainxml')))
        if force_text:
            values['mimetype'] = 'text/plain'
        return values

    @api.model
    def _index(self, bin_data, datas_fname, file_type):
        """ compute the index content of the given filename, or binary data.
            This is a python implementation of the unix command 'strings'.
            :param bin_data : datas in binary form
            :return index_content : string containing all the printable character of the binary data
        """
        index_content = False
        if file_type:
            index_content = file_type.split('/')[0]
            if index_content == 'text': # compute index_content only for text type
                words = re.findall("[^\x00-\x1F\x7F-\xFF]{4,}", bin_data)
                index_content = ustr("\n".join(words))
        return index_content

    name = fields.Char('Attachment Name', required=True)
    datas_fname = fields.Char('File Name')
    description = fields.Text('Description')
    res_name = fields.Char('Resource Name', compute='_compute_res_name', store=True)
    res_model = fields.Char('Resource Model', readonly=True, help="The database object this attachment will be attached to.")
    res_field = fields.Char('Resource Field', readonly=True)
    res_id = fields.Integer('Resource ID', readonly=True, help="The record id this is attached to.")
    create_date = fields.Datetime('Date Created', readonly=True)
    create_uid = fields.Many2one('res.users', string='Owner', readonly=True)
    company_id = fields.Many2one('res.company', string='Company', change_default=True,
                                 default=lambda self: self.env['res.company']._company_default_get('ir.attachment'))
    type = fields.Selection([('url', 'URL'), ('binary', 'File')],
                            string='Type', required=True, default='binary', change_default=True,
                            help="You can either upload a file from your computer or copy/paste an internet link to your file.")
    url = fields.Char('Url', index=True, size=1024)
    public = fields.Boolean('Is public document')

    # the field 'datas' is computed and may use the other fields below
    datas = fields.Binary(string='File Content', compute='_compute_datas', inverse='_inverse_datas')
    db_datas = fields.Binary('Database Data')
    store_fname = fields.Char('Stored Filename')
    file_size = fields.Integer('File Size', readonly=True)
    checksum = fields.Char("Checksum/SHA1", size=40, index=True, readonly=True)
    mimetype = fields.Char('Mime Type', readonly=True)
    index_content = fields.Text('Indexed Content', readonly=True, prefetch=False)

    @api.model_cr_context
    def _auto_init(self):
        res = super(IrAttachment, self)._auto_init()
        self._cr.execute('SELECT indexname FROM pg_indexes WHERE indexname = %s', ('ir_attachment_res_idx',))
        if not self._cr.fetchone():
            self._cr.execute('CREATE INDEX ir_attachment_res_idx ON ir_attachment (res_model, res_id)')
            self._cr.commit()
        return res

    @api.model
    def check(self, mode, values=None):
        """Restricts the access to an ir.attachment, according to referred model
        In the 'document' module, it is overriden to relax this hard rule, since
        more complex ones apply there.
        """
        # collect the records to check (by model)
        model_ids = defaultdict(set)            # {model_name: set(ids)}
        require_employee = False
        if self:
            self._cr.execute('SELECT res_model, res_id, create_uid, public FROM ir_attachment WHERE id IN %s', [tuple(self.ids)])
            for res_model, res_id, create_uid, public in self._cr.fetchall():
                if public and mode == 'read':
                    continue
                if not (res_model and res_id):
                    if create_uid != self._uid:
                        require_employee = True
                    continue
                model_ids[res_model].add(res_id)
        if values and values.get('res_model') and values.get('res_id'):
            model_ids[values['res_model']].add(values['res_id'])

        # check access rights on the records
        for res_model, res_ids in model_ids.iteritems():
            # ignore attachments that are not attached to a resource anymore
            # when checking access rights (resource was deleted but attachment
            # was not)
            if res_model not in self.env:
                require_employee = True
                continue
            records = self.env[res_model].browse(res_ids).exists()
            if len(records) < len(res_ids):
                require_employee = True
            # For related models, check if we can write to the model, as unlinking
            # and creating attachments can be seen as an update to the model
            records.check_access_rights('write' if mode in ('create', 'unlink') else mode)
            records.check_access_rule(mode)

        if require_employee:
            if not (self.env.user._is_admin() or self.env.user.has_group('base.group_user')):
                raise AccessError(_("Sorry, you are not allowed to access this document."))

    @api.model
    def _search(self, args, offset=0, limit=None, order=None, count=False, access_rights_uid=None):
        # add res_field=False in domain if not present; the arg[0] trick below
        # works for domain items and '&'/'|'/'!' operators too
        if not any(arg[0] in ('id', 'res_field') for arg in args):
            args.insert(0, ('res_field', '=', False))

        ids = super(IrAttachment, self)._search(args, offset=offset, limit=limit, order=order,
                                                count=False, access_rights_uid=access_rights_uid)

        if self._uid == SUPERUSER_ID:
            # rules do not apply for the superuser
            return len(ids) if count else ids

        if not ids:
            return 0 if count else []

        # Work with a set, as list.remove() is prohibitive for large lists of documents
        # (takes 20+ seconds on a db with 100k docs during search_count()!)
        orig_ids = ids
        ids = set(ids)

        # For attachments, the permissions of the document they are attached to
        # apply, so we must remove attachments for which the user cannot access
        # the linked document.
        # Use pure SQL rather than read() as it is about 50% faster for large dbs (100k+ docs),
        # and the permissions are checked in super() and below anyway.
        model_attachments = defaultdict(lambda: defaultdict(set))   # {res_model: {res_id: set(ids)}}
        self._cr.execute("""SELECT id, res_model, res_id, public FROM ir_attachment WHERE id IN %s""", [tuple(ids)])
        for row in self._cr.dictfetchall():
            if not row['res_model'] or row['public']:
                continue
            # model_attachments = {res_model: {res_id: set(ids)}}
            model_attachments[row['res_model']][row['res_id']].add(row['id'])

        # To avoid multiple queries for each attachment found, checks are
        # performed in batch as much as possible.
        for res_model, targets in model_attachments.iteritems():
            if res_model not in self.env:
                continue
            if not self.env[res_model].check_access_rights('read', False):
                # remove all corresponding attachment ids
                ids.difference_update(itertools.chain(*targets.itervalues()))
                continue
            # filter ids according to what access rules permit
            target_ids = list(targets)
            allowed = self.env[res_model].with_context(active_test=False).search([('id', 'in', target_ids)])
            for res_id in set(target_ids).difference(allowed.ids):
                ids.difference_update(targets[res_id])

        # sort result according to the original sort ordering
        result = [id for id in orig_ids if id in ids]
        return len(result) if count else list(result)

    @api.multi
    def read(self, fields=None, load='_classic_read'):
        self.check('read')
        return super(IrAttachment, self).read(fields, load=load)

    @api.multi
    def write(self, vals):
        self.check('write', values=vals)
        # remove computed field depending of datas
        for field in ('file_size', 'checksum'):
            vals.pop(field, False)
        if 'mimetype' in vals or 'datas' in vals:
            vals = self._check_contents(vals)
        return super(IrAttachment, self).write(vals)

    @api.multi
    def copy(self, default=None):
        self.check('write')
        return super(IrAttachment, self).copy(default)

    @api.multi
    def unlink(self):
        self.check('unlink')

        # First delete in the database, *then* in the filesystem if the
        # database allowed it. Helps avoid errors when concurrent transactions
        # are deleting the same file, and some of the transactions are
        # rolled back by PostgreSQL (due to concurrent updates detection).
        to_delete = set(attach.store_fname for attach in self if attach.store_fname)
        res = super(IrAttachment, self).unlink()
        for file_path in to_delete:
            self._file_delete(file_path)

        return res

    @api.model
    def create(self, values):
        # remove computed field depending of datas
        for field in ('file_size', 'checksum'):
            values.pop(field, False)
        values = self._check_contents(values)
        self.browse().check('write', values=values)
        return super(IrAttachment, self).create(values)

    @api.model
    def action_get(self):
        return self.env['ir.actions.act_window'].for_xml_id('base', 'action_attachment')

通过修改IrAttachment模型中的_file_write,_file_read,_file_delete,_file_gc 等文件管理方法,改造其行为使用可以直接映射文件地址。

改造后配合云公司解决方案最终架构

图片1