Top

Module gs_helper

# USGS Landsat Imagery Util
#
#
# Author: developmentseed
# Contributer: scisco
#
# License: CC0 1.0 Universal

import os
import subprocess
from zipfile import ZipFile
import tarfile

from dateutil.parser import parse

try:
    from general_helper import check_create_folder, create_paired_list, exit
    import settings
except ImportError:
    import sys
    sys.path.append(os.path
                      .abspath(os.path
                                 .join(os.path.dirname(__file__))))
    from general_helper import check_create_folder, create_paired_list, exit
    import settings



class GsHelper(object):

    def __init__(self):
        self.scene_file_url = settings.SCENE_FILE_URL
        self.download_dir = settings.DOWNLOAD_DIR
        self.zip_dir = settings.ZIP_DIR
        self.unzip_dir = settings.UNZIP_DIR
        self.scene_file = settings.SCENE_FILE
        self.source_url = settings.SOURCE_URL

        # Make sure download directory exist
        check_create_folder(self.download_dir)

    def search(self, rows_paths, start=None, end=None):
        """ Search in Landsat's scene_list file. The file is stored as .zip on
        Google Storage and includes the gs url of all available images.

        Example of file information on scene_list.zip:
        gs://earthengine-public/landsat/L8/232/096/LT82320962013127LGN01.tar.bz

        Arguments:
            rows_paths - a string of paired values. e.g. '003,003,001,001'
            start - a string containing the start date. e.g. 12/23/2014
            end - a string containing the end date. e.g. 12/24/2014

        Return:
            a list containing download urls. e.g.:
            ['gs://earthengine-public/landsat/L8/232/094/LT82320942013127LGN01.tar.bz',
             'gs://earthengine-public/landsat/L8/232/093/LT82320932013127LGN01.tar.bz']
        """

        # Turning rows and paths to paired tuples
        try:
            paired = create_paired_list(rows_paths)
        except ValueError, e:
            exit('Error: %s' % e.args[0])

        files = []
        self._fetch_gs_scence_list()
        file = open(self.scene_file, 'r')

        files.extend(self._search_scene_list(scene=file,
                                             query=paired,
                                             start=parse(start),
                                             end=parse(end)))

        return files

    def single_download(self, row, path, name, sat_type='L8'):
        url = '%s/%s/%s/%s/%s.tar.bz' % (self.source_url,
                                         sat_type,
                                         path,
                                         row,
                                         name)
        """ Download single image from Landsat on Google Storage

        Arguments:
            row - string in this format xxx, e.g. 003
            path - string in this format xxx, e.g. 003
            name - zip file name without .tar.bz e.g. LT81360082013127LGN01
            sat_type - e.g. L7, L8, ...
        """
        try:
            subprocess.check_call(
                ["gsutil", "cp", "-n", url, "%s/%s" % (self.zip_dir,
                                                       '%s.tar.bz' % name)])
            return True
        except subprocess.CalledProcessError:
            return False

    def batch_download(self, image_list):
        """
        Download batch group of images

        Arguments:
            image_list - a list of google storage urls e.g.
                ['gs://earthengine-public/landsat/L8/136/008/'
                      'LT81360082013127LGN01.tar.bz',
                      'gs://earthengine-public/landsat/L8/136/008/'
                      'LT81360082013127LGN01.tar.bz']
        """
        try:
            self._download_images(image_list)
            return True
        except subprocess.CalledProcessError:
            return False

    def unzip(self):
        """
        Unzip all files stored at settings.ZIP_DIR and save them in
        settings.UNZIP_DIR
        """
        return self._unzip_images()

    #################
    # Private Methods
    #################

    def _fetch_gs_scence_list(self):

        if not os.path.isfile(self.scene_file):
            # Download the file
            subprocess.call(
                ["gsutil", "cp", "-n",
                 self.scene_file_url, "%s.zip" % self.scene_file])

            # Unzip the file
            zip = ZipFile('%s.zip' % self.scene_file, 'r')
            zip.extractall(path=self.download_dir)
            zip.close()

            print "scene_file unziped"

#       return open(self.scene_file, 'r')

    def _search_scene_list(self, scene, query, start=None, end=None):
        """
        Search scene_list for the provided rows, paths and date range.

        Arguments:
            query - a list of paired tuples e.g.[('003', '003'),('003', '004')]
            start - a datetime object
            end - a datetime object
        """

        file_list = []
        found = 0

        # Query date range
        start_year = start.timetuple().tm_year
        end_year = end.timetuple().tm_year
        start_jd = start.timetuple().tm_yday
        end_jd = end.timetuple().tm_yday

        if start and end:
            print ('Searching for images from %s to %s'
                   % (start.strftime('%b %d, %Y'),
                       end.strftime('%b %d, %Y')))

        print 'Rows and Paths searched: '
        print query

        scene.seek(0)
        for line in scene:
            url = line.split('/')
            file_name = url[len(url) - 1]
            f_query = (file_name[3:6], file_name[6:9])
            jd = int(file_name[13:16].lstrip('0'))  # Julian Day
            year = int(file_name[9:13])

            if f_query in query:
                if start and end:
                    if year == start_year and year == end_year:
                        if jd >= start_jd and jd <= end_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif year == start_year:
                        if jd >= start_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif year == end_year:
                        if jd <= end_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif (year > start_year and year < end_year):
                        file_list.append(line.replace('\n', ''))
                        found += 1
                else:
                    file_list.append(line.replace('\n', ''))
                    found += 1

        print "Search completed! %s images found." % found
        return file_list

    def _download_images(self, files):

        check_create_folder(self.zip_dir)

        print "Downloading %s files from Google Storage..." % len(files)

        for url in files:
            url_brk = url.split('/')
            image_name = url_brk[len(url_brk) - 1]
            subprocess.check_call(
                ["gsutil", "cp", "-n", url,
                 "%s/%s" % (self.zip_dir, image_name)])

    def _unzip_images(self):
        images = os.listdir(self.zip_dir)
        check_create_folder(self.unzip_dir)

        for image in images:
            # Get the image name for creating folder
            image_name = image.split('.')

            if image_name[0] and self._check_if_not_unzipped(image_name[0]):
                # Create folder
                check_create_folder('%s/%s' % (self.unzip_dir, image_name[0]))

                print "Unzipping %s ...be patient!" % image
                # Unzip
                tar = tarfile.open('%s/%s' % (self.zip_dir, image))
                tar.extractall(path='%s/%s' % (self.unzip_dir, image_name[0]))
                tar.close()

            return True
        return False

    def _check_if_not_unzipped(self, folder_name):
        if os.path.exists('%s/%s' % (self.unzip_dir, folder_name)):
            print "%s is already unzipped" % folder_name
            return False
        else:
            return True

Index

Classes

class GsHelper

class GsHelper(object):

    def __init__(self):
        self.scene_file_url = settings.SCENE_FILE_URL
        self.download_dir = settings.DOWNLOAD_DIR
        self.zip_dir = settings.ZIP_DIR
        self.unzip_dir = settings.UNZIP_DIR
        self.scene_file = settings.SCENE_FILE
        self.source_url = settings.SOURCE_URL

        # Make sure download directory exist
        check_create_folder(self.download_dir)

    def search(self, rows_paths, start=None, end=None):
        """ Search in Landsat's scene_list file. The file is stored as .zip on
        Google Storage and includes the gs url of all available images.

        Example of file information on scene_list.zip:
        gs://earthengine-public/landsat/L8/232/096/LT82320962013127LGN01.tar.bz

        Arguments:
            rows_paths - a string of paired values. e.g. '003,003,001,001'
            start - a string containing the start date. e.g. 12/23/2014
            end - a string containing the end date. e.g. 12/24/2014

        Return:
            a list containing download urls. e.g.:
            ['gs://earthengine-public/landsat/L8/232/094/LT82320942013127LGN01.tar.bz',
             'gs://earthengine-public/landsat/L8/232/093/LT82320932013127LGN01.tar.bz']
        """

        # Turning rows and paths to paired tuples
        try:
            paired = create_paired_list(rows_paths)
        except ValueError, e:
            exit('Error: %s' % e.args[0])

        files = []
        self._fetch_gs_scence_list()
        file = open(self.scene_file, 'r')

        files.extend(self._search_scene_list(scene=file,
                                             query=paired,
                                             start=parse(start),
                                             end=parse(end)))

        return files

    def single_download(self, row, path, name, sat_type='L8'):
        url = '%s/%s/%s/%s/%s.tar.bz' % (self.source_url,
                                         sat_type,
                                         path,
                                         row,
                                         name)
        """ Download single image from Landsat on Google Storage

        Arguments:
            row - string in this format xxx, e.g. 003
            path - string in this format xxx, e.g. 003
            name - zip file name without .tar.bz e.g. LT81360082013127LGN01
            sat_type - e.g. L7, L8, ...
        """
        try:
            subprocess.check_call(
                ["gsutil", "cp", "-n", url, "%s/%s" % (self.zip_dir,
                                                       '%s.tar.bz' % name)])
            return True
        except subprocess.CalledProcessError:
            return False

    def batch_download(self, image_list):
        """
        Download batch group of images

        Arguments:
            image_list - a list of google storage urls e.g.
                ['gs://earthengine-public/landsat/L8/136/008/'
                      'LT81360082013127LGN01.tar.bz',
                      'gs://earthengine-public/landsat/L8/136/008/'
                      'LT81360082013127LGN01.tar.bz']
        """
        try:
            self._download_images(image_list)
            return True
        except subprocess.CalledProcessError:
            return False

    def unzip(self):
        """
        Unzip all files stored at settings.ZIP_DIR and save them in
        settings.UNZIP_DIR
        """
        return self._unzip_images()

    #################
    # Private Methods
    #################

    def _fetch_gs_scence_list(self):

        if not os.path.isfile(self.scene_file):
            # Download the file
            subprocess.call(
                ["gsutil", "cp", "-n",
                 self.scene_file_url, "%s.zip" % self.scene_file])

            # Unzip the file
            zip = ZipFile('%s.zip' % self.scene_file, 'r')
            zip.extractall(path=self.download_dir)
            zip.close()

            print "scene_file unziped"

#       return open(self.scene_file, 'r')

    def _search_scene_list(self, scene, query, start=None, end=None):
        """
        Search scene_list for the provided rows, paths and date range.

        Arguments:
            query - a list of paired tuples e.g.[('003', '003'),('003', '004')]
            start - a datetime object
            end - a datetime object
        """

        file_list = []
        found = 0

        # Query date range
        start_year = start.timetuple().tm_year
        end_year = end.timetuple().tm_year
        start_jd = start.timetuple().tm_yday
        end_jd = end.timetuple().tm_yday

        if start and end:
            print ('Searching for images from %s to %s'
                   % (start.strftime('%b %d, %Y'),
                       end.strftime('%b %d, %Y')))

        print 'Rows and Paths searched: '
        print query

        scene.seek(0)
        for line in scene:
            url = line.split('/')
            file_name = url[len(url) - 1]
            f_query = (file_name[3:6], file_name[6:9])
            jd = int(file_name[13:16].lstrip('0'))  # Julian Day
            year = int(file_name[9:13])

            if f_query in query:
                if start and end:
                    if year == start_year and year == end_year:
                        if jd >= start_jd and jd <= end_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif year == start_year:
                        if jd >= start_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif year == end_year:
                        if jd <= end_jd:
                            file_list.append(line.replace('\n', ''))
                            found += 1
                    elif (year > start_year and year < end_year):
                        file_list.append(line.replace('\n', ''))
                        found += 1
                else:
                    file_list.append(line.replace('\n', ''))
                    found += 1

        print "Search completed! %s images found." % found
        return file_list

    def _download_images(self, files):

        check_create_folder(self.zip_dir)

        print "Downloading %s files from Google Storage..." % len(files)

        for url in files:
            url_brk = url.split('/')
            image_name = url_brk[len(url_brk) - 1]
            subprocess.check_call(
                ["gsutil", "cp", "-n", url,
                 "%s/%s" % (self.zip_dir, image_name)])

    def _unzip_images(self):
        images = os.listdir(self.zip_dir)
        check_create_folder(self.unzip_dir)

        for image in images:
            # Get the image name for creating folder
            image_name = image.split('.')

            if image_name[0] and self._check_if_not_unzipped(image_name[0]):
                # Create folder
                check_create_folder('%s/%s' % (self.unzip_dir, image_name[0]))

                print "Unzipping %s ...be patient!" % image
                # Unzip
                tar = tarfile.open('%s/%s' % (self.zip_dir, image))
                tar.extractall(path='%s/%s' % (self.unzip_dir, image_name[0]))
                tar.close()

            return True
        return False

    def _check_if_not_unzipped(self, folder_name):
        if os.path.exists('%s/%s' % (self.unzip_dir, folder_name)):
            print "%s is already unzipped" % folder_name
            return False
        else:
            return True

Ancestors (in MRO)

Instance variables

var download_dir

var scene_file

var scene_file_url

var source_url

var unzip_dir

var zip_dir

Methods

def __init__(

self)

def __init__(self):
    self.scene_file_url = settings.SCENE_FILE_URL
    self.download_dir = settings.DOWNLOAD_DIR
    self.zip_dir = settings.ZIP_DIR
    self.unzip_dir = settings.UNZIP_DIR
    self.scene_file = settings.SCENE_FILE
    self.source_url = settings.SOURCE_URL
    # Make sure download directory exist
    check_create_folder(self.download_dir)

def batch_download(

self, image_list)

Download batch group of images

Arguments: image_list - a list of google storage urls e.g. ['gs://earthengine-public/landsat/L8/136/008/' 'LT81360082013127LGN01.tar.bz', 'gs://earthengine-public/landsat/L8/136/008/' 'LT81360082013127LGN01.tar.bz']

def batch_download(self, image_list):
    """
    Download batch group of images
    Arguments:
        image_list - a list of google storage urls e.g.
            ['gs://earthengine-public/landsat/L8/136/008/'
                  'LT81360082013127LGN01.tar.bz',
                  'gs://earthengine-public/landsat/L8/136/008/'
                  'LT81360082013127LGN01.tar.bz']
    """
    try:
        self._download_images(image_list)
        return True
    except subprocess.CalledProcessError:
        return False

def search(

self, rows_paths, start=None, end=None)

Search in Landsat's scene_list file. The file is stored as .zip on Google Storage and includes the gs url of all available images.

Example of file information on scene_list.zip: gs://earthengine-public/landsat/L8/232/096/LT82320962013127LGN01.tar.bz

Arguments: rows_paths - a string of paired values. e.g. '003,003,001,001' start - a string containing the start date. e.g. 12/23/2014 end - a string containing the end date. e.g. 12/24/2014

Return: a list containing download urls. e.g.: ['gs://earthengine-public/landsat/L8/232/094/LT82320942013127LGN01.tar.bz', 'gs://earthengine-public/landsat/L8/232/093/LT82320932013127LGN01.tar.bz']

def search(self, rows_paths, start=None, end=None):
    """ Search in Landsat's scene_list file. The file is stored as .zip on
    Google Storage and includes the gs url of all available images.
    Example of file information on scene_list.zip:
    gs://earthengine-public/landsat/L8/232/096/LT82320962013127LGN01.tar.bz
    Arguments:
        rows_paths - a string of paired values. e.g. '003,003,001,001'
        start - a string containing the start date. e.g. 12/23/2014
        end - a string containing the end date. e.g. 12/24/2014
    Return:
        a list containing download urls. e.g.:
        ['gs://earthengine-public/landsat/L8/232/094/LT82320942013127LGN01.tar.bz',
         'gs://earthengine-public/landsat/L8/232/093/LT82320932013127LGN01.tar.bz']
    """
    # Turning rows and paths to paired tuples
    try:
        paired = create_paired_list(rows_paths)
    except ValueError, e:
        exit('Error: %s' % e.args[0])
    files = []
    self._fetch_gs_scence_list()
    file = open(self.scene_file, 'r')
    files.extend(self._search_scene_list(scene=file,
                                         query=paired,
                                         start=parse(start),
                                         end=parse(end)))
    return files

def single_download(

self, row, path, name, sat_type='L8')

def single_download(self, row, path, name, sat_type='L8'):
    url = '%s/%s/%s/%s/%s.tar.bz' % (self.source_url,
                                     sat_type,
                                     path,
                                     row,
                                     name)
    """ Download single image from Landsat on Google Storage
    Arguments:
        row - string in this format xxx, e.g. 003
        path - string in this format xxx, e.g. 003
        name - zip file name without .tar.bz e.g. LT81360082013127LGN01
        sat_type - e.g. L7, L8, ...
    """
    try:
        subprocess.check_call(
            ["gsutil", "cp", "-n", url, "%s/%s" % (self.zip_dir,
                                                   '%s.tar.bz' % name)])
        return True
    except subprocess.CalledProcessError:
        return False

def unzip(

self)

Unzip all files stored at settings.ZIP_DIR and save them in settings.UNZIP_DIR

def unzip(self):
    """
    Unzip all files stored at settings.ZIP_DIR and save them in
    settings.UNZIP_DIR
    """
    return self._unzip_images()

Documentation generated by pdoc 0.2.4. pdoc is in the public domain with the UNLICENSE.