SF.net SVN: gar:[23488] csw/mgar/gar/v2/lib/web/releases_web.py

Wed Apr 23 10:11:41 CEST 2014

Revision: 23488
          http://sourceforge.net/p/gar/code/23488
Author:   wahwah
Date:     2014-04-23 08:11:40 +0000 (Wed, 23 Apr 2014)
Log Message:
-----------
releases-web: Don't allow duplicate uploads

If a file with certain name is in the catalog, don't allow to overwrite it
with new content.

Modified Paths:
--------------
    csw/mgar/gar/v2/lib/web/releases_web.py

Modified: csw/mgar/gar/v2/lib/web/releases_web.py
===================================================================

--- csw/mgar/gar/v2/lib/web/releases_web.py	2014-04-23 08:11:24 UTC (rev 23487)
+++ csw/mgar/gar/v2/lib/web/releases_web.py	2014-04-23 08:11:40 UTC (rev 23488)
@@ -3,16 +3,17 @@
 
 # A webpy application to allow HTTP access to the checkpkg database.
 
-import sys
-import os
-
 import base64
-import cjson
 import datetime
 import hashlib
 import logging
+import os
+import sys
+import tempfile
+
+import cjson
+import lockfile
 import sqlobject
-import tempfile
 import web
 
 from lib.python import checkpkg_lib
@@ -24,7 +25,8 @@
 from lib.python import relational_util
 from lib.web import web_lib
 
-from lib.web import web_lib
+# Read and write files in chunks of this size:
+CHUNK_SIZE = 2 * (1024 ** 2)
 
 urls = (
   r'/', 'Index',
@@ -77,6 +79,24 @@
     return ""
 
 
+def FileMd5(fd):
+  file_hash = hashlib.md5()
+  # Don't read the whole file into memory at once, do it in small chunks.
+  data = fd.read(CHUNK_SIZE)
+  while data:
+    file_hash.update(data)
+    data = fd.read(CHUNK_SIZE)
+  return file_hash.hexdigest()
+
+
+def CopyFile(infd, outfd):
+  infd.seek(0)
+  data = infd.read(CHUNK_SIZE)
+  while data:
+    os.write(outfd, data)
+    data = infd.read(CHUNK_SIZE)
+
+
 class Srv4List(object):
 
   def POST(self):
@@ -90,44 +110,65 @@
     web.header(
         'Content-type',
         'application/x-vnd.opencsw.pkg;type=upload-results')
-    file_hash = hashlib.md5()
-    # Don't read the whole file into memory at once, do it in small chunks.
-    chunk_size = 2 * 1024 * 1024
-    data = x['srv4_file'].file.read(chunk_size)
-    while data:
-      file_hash.update(data)
-      data = x['srv4_file'].file.read(chunk_size)
-    data_md5_sum = file_hash.hexdigest()
+    data_md5_sum = FileMd5(x['srv4_file'].file)
     declared_md5_sum = x['md5_sum']
     basename = x['basename']
     save_attempt = False
-    if declared_md5_sum == data_md5_sum:
+    error = False
+    if declared_md5_sum != data_md5_sum:
+      raise web.conflict()
+    try:
+      srv4 = models.Srv4FileStats.selectBy(md5_sum=data_md5_sum).getOne()
+    except sqlobject.main.SQLObjectNotFound:
+      messages.append("File %s not found in the db." % data_md5_sum)
+      raise web.preconditionfailed()
+
+    if not srv4.use_to_generate_catalogs:
+      messages.append(
+          '%r is not a package meant to be in catalogs '
+          '(e.g. not CSW).' % basename)
+      raise web.preconditionfailed()
+
+    target_path = os.path.join(ALLPKGS_DIR, basename)
+    lock_path = os.path.join('/tmp', '{0}.lock'.format(basename))
+    target_file_lock = lockfile.FileLock(lock_path)
+
+    # Have to get this lock to avoid two processes writing to the same file at
+    # the same time.
+    with target_file_lock:
+      on_disk_md5_sum = data_md5_sum
+      try:
+        with open(target_path, 'r') as fd:
+          on_disk_md5_sum = FileMd5(fd)
+      except IOError:
+        # The file doesn't exist yet. That's OK.
+        pass
+      if data_md5_sum != on_disk_md5_sum:
+        messages.append(
+            '%r already exists in the database with md5 sum %s. '
+            'The uploaded file is %s. We cannot overwrite the old filename '
+            'with new content.' % (basename, on_disk_md5_sum, data_md5_sum))
+        raise web.conflict()
+
       save_attempt = True
-      try:
-        srv4 = models.Srv4FileStats.selectBy(md5_sum=data_md5_sum).getOne()
-        if srv4.use_to_generate_catalogs:
-          # FieldStorage by default unlinks the temporary local file as soon as
-          # it's been opened. Therefore, we have to take care of writing data
-          # to the target location in an atomic way.
-          fd, tmp_filename = tempfile.mkstemp(dir=ALLPKGS_DIR)
-          x['srv4_file'].file.seek(0)
-          data = x['srv4_file'].file.read(chunk_size)
-          while data:
-            os.write(fd, data)
-            data = x['srv4_file'].file.read(chunk_size)
-          os.close(fd)
-          target_path = os.path.join(ALLPKGS_DIR, basename)
-          os.rename(tmp_filename, target_path)
-          # Since mkstemp creates files with mode 0600 by default:
-          os.chmod(target_path, 0644)
-      except sqlobject.main.SQLObjectNotFound:
-        messages.append("File %s not found in the db." % data_md5_sum)
+      # FieldStorage by default unlinks the temporary local file as soon as
+      # it's been opened. Therefore, we have to take care of writing data
+      # to the target location in an atomic way.
+      fd, tmp_filename = tempfile.mkstemp(dir=ALLPKGS_DIR)
+      CopyFile(x['srv4_file'].file, fd)
+      os.close(fd)
+      target_path = os.path.join(ALLPKGS_DIR, basename)
+      os.rename(tmp_filename, target_path)
+      # Since mkstemp creates files with mode 0600 by default:
+      os.chmod(target_path, 0644)
     messages.append({
         "received_md5": data_md5_sum,
         "declared_md5": declared_md5_sum,
         "save_attempt": save_attempt,
     })
-    return cjson.encode(messages)
+    response = cjson.encode(messages)
+    web.header('Content-Length', str(len(response)))
+    return response
 
 
 class Srv4Detail(object):

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.