[Scipy-svn] r3493 - in trunk/scipy/io: . tests
scipy-svn@scip...
scipy-svn@scip...
Fri Nov 2 18:33:52 CDT 2007
Author: chris.burns
Date: 2007-11-02 18:33:49 -0500 (Fri, 02 Nov 2007)
New Revision: 3493
Modified:
trunk/scipy/io/datasource.py
trunk/scipy/io/tests/test_datasource.py
Log:
Add more datasource tests and documentation. Fix some path resolution bugs.
Modified: trunk/scipy/io/datasource.py
===================================================================
--- trunk/scipy/io/datasource.py 2007-11-02 20:09:04 UTC (rev 3492)
+++ trunk/scipy/io/datasource.py 2007-11-02 23:33:49 UTC (rev 3493)
@@ -1,7 +1,7 @@
"""A file interface for handling local and remote data files.
-The goal of datasource is to abstract some of the file system operations when
+The goal of datasource is to abstract some of the file system operations when
dealing with data files so the researcher doesn't have to know all the
-low-level details. Through datasource, a researcher can obtain and use a
+low-level details. Through datasource, a researcher can obtain and use a
file with one function call, regardless of location of the file.
DataSource is meant to augment standard python libraries, not replace them.
@@ -25,7 +25,7 @@
>>> # './www.google.com/index.html'
>>> # opens the file and returns a file object.
>>> fp = ds.open('http://www.google.com/index.html')
- >>>
+ >>>
>>> # Use the file as you normally would
>>> fp.read()
>>> fp.close()
@@ -61,6 +61,31 @@
_file_openers = {".gz":gzip.open, ".bz2":bz2.BZ2File, None:file}
+def open(path, mode='r', destpath=os.curdir):
+ """Open ``path`` with ``mode`` and return the file object.
+
+ If ``path`` is an URL, it will be downloaded, stored in the DataSource
+ directory and opened from there.
+
+ *Parameters*:
+
+ path : {string}
+
+ mode : {string}, optional
+
+ destpath : {string}, optional
+ Destination directory where URLs will be downloaded and stored.
+
+ *Returns*:
+
+ file object
+
+ """
+
+ ds = DataSource(destpath)
+ return ds.open(path, mode)
+
+
class DataSource (object):
"""A generic data source file (file, http, ftp, ...).
@@ -69,27 +94,27 @@
details of downloading the file, allowing you to simply pass in a valid
file path (or URL) and obtain a file object.
- Methods:
+ *Methods*:
- exists : test if the file exists locally or remotely
- abspath : get absolute path of the file in the DataSource directory
- open : open the file
- Example URL DataSource::
+ *Example URL DataSource*::
# Initialize DataSource with a local directory, default is os.curdir.
ds = DataSource('/home/guido')
-
+
# Open remote file.
# File will be downloaded and opened from here:
# /home/guido/site/xyz.txt
ds.open('http://fake.xyz.web/site/xyz.txt')
-
- Example using DataSource for temporary files::
+ *Example using DataSource for temporary files*::
+
# Initialize DataSource with 'None' for the local directory.
ds = DataSource(None)
-
+
# Open local file.
# Opened file exists in a temporary directory like:
# /tmp/tmpUnhcvM/foobar.txt
@@ -99,13 +124,13 @@
*Notes*:
BUG : URLs require a scheme string ('http://') to be used.
www.google.com will fail.
-
+
>>> repos.exists('www.google.com/index.html')
False
>>> repos.exists('http://www.google.com/index.html')
True
-
+
"""
def __init__(self, destpath=os.curdir):
@@ -118,6 +143,7 @@
self._istmpdest = True
def __del__(self):
+ # Remove temp directories
if self._istmpdest:
rmtree(self._destpath)
@@ -161,7 +187,7 @@
def _isurl(self, path):
"""Test if path is a net location. Tests the scheme and netloc."""
-
+
# BUG : URLs require a scheme string ('http://') to be used.
# www.google.com will fail.
# Should we prepend the scheme for those that don't have it and
@@ -214,9 +240,15 @@
"""
# Build list of possible local file paths
- filelist = self._possible_names(self.abspath(path))
- if self._isurl(path):
- # Add list of possible remote urls
+ if not self._isurl(path):
+ # Valid local paths
+ filelist = self._possible_names(path)
+ # Paths in self._destpath
+ filelist += self._possible_names(self.abspath(path))
+ else:
+ # Cached URLs in self._destpath
+ filelist = self._possible_names(self.abspath(path))
+ # Remote URLs
filelist = filelist + self._possible_names(path)
for name in filelist:
@@ -250,6 +282,13 @@
"""
+ # TODO: This should be more robust. Handles case where path includes
+ # the destpath, but not other sub-paths. Failing case:
+ # path = /home/guido/datafile.txt
+ # destpath = /home/alex/
+ # upath = self.abspath(path)
+ # upath == '/home/alex/home/guido/datafile.txt'
+
# handle case where path includes self._destpath
splitpath = path.split(self._destpath, 2)
if len(splitpath) > 1:
@@ -263,9 +302,9 @@
Test if ``path`` exists as (and in this order):
- a local file.
- - a remote URL that have been downloaded and stored locally in the
+ - a remote URL that have been downloaded and stored locally in the
DataSource directory.
- - a remote URL that has not been downloaded, but is valid and
+ - a remote URL that has not been downloaded, but is valid and
accessible.
*Parameters*:
@@ -284,25 +323,30 @@
*Notes*
When ``path`` is an URL, ``exist`` will return True if it's either
- stored locally in the DataSource directory, or is a valid remote
+ stored locally in the DataSource directory, or is a valid remote
URL. DataSource does not discriminate between to two, the file
is accessible if it exists in either location.
"""
+ # Test local path
+ if os.path.exists(path):
+ return True
+
+ # Test cached url
upath = self.abspath(path)
if os.path.exists(upath):
return True
- elif self._isurl(path):
+
+ # Test remote url
+ if self._isurl(path):
try:
netfile = urlopen(path)
- # just validate existence, nothing more.
del(netfile)
return True
except URLError:
return False
- else:
- return False
+ return False
def open(self, path, mode='r'):
"""Open ``path`` with ``mode`` and return the file object.
@@ -323,11 +367,11 @@
"""
- # TODO: There is no support for opening a file for writing which
+ # TODO: There is no support for opening a file for writing which
# doesn't exist yet (creating a file). Should there be?
# TODO: Add a ``subdir`` parameter for specifying the subdirectory
- # used to store URLs in self._destpath.
+ # used to store URLs in self._destpath.
if self._isurl(path) and self._iswritemode(mode):
raise ValueError("URLs are not writeable")
@@ -344,20 +388,21 @@
class Repository (DataSource):
- """A data repository where multiple DataSource's share a base URL/directory.
+ """A data Repository where multiple DataSource's share a base URL/directory.
- Use a Repository when you will be working with multiple files from one
- base URL or directory. Initialize the Respository with the base URL,
- then refer to each file only by it's filename.
+ Repository extends DataSource by prepending a base URL (or directory) to
+ all the files it handles. Use a Repository when you will be working with
+ multiple files from one base URL. Initialize the Respository with the
+ base URL, then refer to each file by it's filename only.
- Methods:
+ *Methods*:
- exists : test if the file exists locally or remotely
- abspath : get absolute path of the file in the DataSource directory
- open : open the file
- Toy example::
-
+ *Toy example*::
+
# Analyze all files in the repository.
repos = Repository('/home/user/data/dir/')
for filename in filelist:
@@ -375,6 +420,9 @@
DataSource.__init__(self, destpath=destpath)
self._baseurl = baseurl
+ def __del__(self):
+ DataSource.__del__(self)
+
def _fullpath(self, path):
"""Return complete path for path. Prepends baseurl if necessary."""
splitpath = path.split(self._baseurl, 2)
@@ -385,27 +433,25 @@
return result
def _findfile(self, path):
- """Extend DataSource method to add baseurl to ``path``."""
- #print 'Repository._findfile:', path
+ """Extend DataSource method to prepend baseurl to ``path``."""
return DataSource._findfile(self, self._fullpath(path))
def abspath(self, path):
- """Extend DataSource method to add baseurl to ``path``."""
+ """Extend DataSource method to prepend baseurl to ``path``."""
return DataSource.abspath(self, self._fullpath(path))
- def exists(self, path):
- """Extend DataSource method to add baseurl to ``path``."""
- #print 'Respository.exists:', path
+ def exists(self, path):
+ """Extend DataSource method to prepend baseurl to ``path``."""
return DataSource.exists(self, self._fullpath(path))
def open(self, path, mode='r'):
- """Extend DataSource method to add baseurl to ``path``."""
- #print 'Repository.open:', path
+ """Extend DataSource method to prepend baseurl to ``path``."""
return DataSource.open(self, self._fullpath(path), mode)
def listdir(self):
'''List files in the source Repository.'''
if self._isurl(self._baseurl):
- raise NotImplementedError
+ raise NotImplementedError, \
+ "Directory listing of URLs, not supported yet."
else:
return os.listdir(self._baseurl)
Modified: trunk/scipy/io/tests/test_datasource.py
===================================================================
--- trunk/scipy/io/tests/test_datasource.py 2007-11-02 20:09:04 UTC (rev 3492)
+++ trunk/scipy/io/tests/test_datasource.py 2007-11-02 23:33:49 UTC (rev 3493)
@@ -136,8 +136,14 @@
self.assertEqual(self.ds.exists(invalid_httpurl()), False)
def test_ValidFile(self):
+ # Test valid file in destpath
tmpfile = valid_textfile(self.tmpdir)
assert self.ds.exists(tmpfile)
+ # Test valid local file not in destpath
+ localdir = mkdtemp()
+ tmpfile = valid_textfile(localdir)
+ assert self.ds.exists(tmpfile)
+ rmtree(localdir)
def test_InvalidFile(self):
tmpfile = invalid_textfile(self.tmpdir)
@@ -183,9 +189,11 @@
class TestRespositoryAbspath(NumpyTestCase):
def setUp(self):
- self.repos = datasource.Repository(valid_baseurl(), None)
+ self.tmpdir = mkdtemp()
+ self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
def tearDown(self):
+ rmtree(self.tmpdir)
del self.repos
def test_ValidHTTP(self):
@@ -196,6 +204,54 @@
self.assertEqual(local_path, filepath)
+class TestRepositoryExists(NumpyTestCase):
+ def setUp(self):
+ self.tmpdir = mkdtemp()
+ self.repos = datasource.Repository(valid_baseurl(), self.tmpdir)
+
+ def tearDown(self):
+ rmtree(self.tmpdir)
+ del self.repos
+
+ def test_ValidFile(self):
+ # Create local temp file
+ tmpfile = valid_textfile(self.tmpdir)
+ assert self.repos.exists(tmpfile)
+
+ def test_InvalidFile(self):
+ tmpfile = invalid_textfile(self.tmpdir)
+ self.assertEqual(self.repos.exists(tmpfile), False)
+
+ def test_RemoveHTTPFile(self):
+ assert self.repos.exists(valid_httpurl())
+
+ def test_CachedHTTPFile(self):
+ localfile = valid_httpurl()
+ # Create a locally cached temp file with an URL based
+ # directory structure. This is similar to what Repository.open
+ # would do.
+ scheme, netloc, upath, pms, qry, frg = urlparse(localfile)
+ local_path = os.path.join(self.repos._destpath, netloc)
+ os.mkdir(local_path, 0700)
+ tmpfile = valid_textfile(local_path)
+ assert self.repos.exists(tmpfile)
+
+
+class TestOpenFunc(NumpyTestCase):
+ def setUp(self):
+ self.tmpdir = mkdtemp()
+
+ def tearDown(self):
+ rmtree(self.tmpdir)
+
+ def test_DataSourceOpen(self):
+ local_file = valid_textfile(self.tmpdir)
+ # Test case where destpath is passed in
+ assert datasource.open(local_file, destpath=self.tmpdir)
+ # Test case where default destpath is used
+ assert datasource.open(local_file)
+
+
if __name__ == "__main__":
NumpyTest().run()
More information about the Scipy-svn
mailing list