#! /usr/bin/python
# -*- coding: iso-8859-15 -*-

##
## bilderspur_gallery2_plugin.py
##  - a part of Bilderspur, a tool for converting Google Earth into
##    a photo album
##
##    This module utilizes a Google Sitemap file to locate the processed
##    images at a remote site / web foto album and to generate references
##    into this online album instead of to the local files.
##
## See http://www.ohrner.net/ for latest news and updates, please.
## 
## Copyright (C) 2007  Gunter Ohrner "gunter _(@)_ ohrner.net"
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
##

from bilderspur import BilderspurException


import os
import re
import socket
import urllib
from xml.parsers import expat
from xml.sax import saxutils

debug = True


BILDERSPUR_PLUGIN_API_VERSION = 1


def init(BILDERSPUR_APP, plugin_args):
	global debug

	socket.setdefaulttimeout(20)
	
	return Gallery2BilderspurPlugin(BILDERSPUR_APP, plugin_args)


class WebImageInfoRecord:

	def __init__(self, url, width, height):
		self.url = url
		self.width = width
		self.height = height


class Gallery2BilderspurPlugin( object ):

	def __init__(self, BILDERSPUR_APP, sitemap_file_name):
		global debug

		self.actions = { BILDERSPUR_APP.ACTION_THUMBNAIL_GENERATION:
										 self.generateThumbnailLink,
										 BILDERSPUR_APP.ACTION_IMAGE_DETAIL_GENERATION:
										 self.generateDetailHtmlForImage }

		self.BILDERSPUR_APP = BILDERSPUR_APP
		
		sitemap_file = urllib.urlopen(sitemap_file_name)
		page_url_list = parseSitemap(sitemap_file)
		sitemap_file.close()

		## fetch URL protocol and host/port specification
		if len(page_url_list) > 0:
			self.url_base = getUrlBase(page_url_list[0])

		if debug:
			print 'URL base string: %s' % self.url_base

		## We will now build a simple index which maps each file's name to
		## the corresponding URL.
		## This way we're able to generate the correct references and
		## hyperlinks into the KML file.
		self.name_pageurl_map = mapName2PageUrl(page_url_list)
		
		## The following index will map image names to its image file URLs.
		## As this informaition cannot be derived from the sitemap file and
		## a web lookup is required, we build it lazily, fetching files only
		## as required.
		self.name_webimageinfo_map = {}
		
		## We'll use the following pattern to extract the actual image URL
		## from the image's Gallery2 page.
		self.image_url_pattern = re.compile(r'<img src="([^"]*/main\.php\?g2_view=core\.DownloadItem&amp;g2_itemId=[0-9]+&amp;g2_serialNumber=[0-9]+)" width="([0-9]+)" height="([0-9]+)" alt="[^"]*"/>', re.S)

		## FIXME: Shortcut URLs...
		self.image_url_pattern = re.compile(r'<img src="([^"]+)" width="([0-9]+)" height="([0-9]+)" id="[^"]*" class="ImageFrame_image" alt="[^"]*" longdesc="[^"]*"/>', re.S)
		BILDERSPUR_APP.registerHookExpression(BILDERSPUR_APP.HOOK_IMAGE_ANALYSIS_POST,
																					self.registerImage)


	def getSupportedActions(self):
		return self.actions.keys()


	def getActionFunction(self, action_name):
		return self.actions[action_name]


	def getPageUrlByImgName(self, image_name):
		return self.name_pageurl_map[image_name]


	def hasWebImageInfoForImgName(self, image_name):
		return image_name in self.name_webimageinfo_map


	def getWebImageInfoByImgName(self, image_name):
		return self.name_webimageinfo_map[image_name]


	def computeWebImageInfoForImgName(self, image_name):
		url = self.getPageUrlByImgName(image_name)
		if debug:
			print 'reading %s: %s...' % (image_name, url)
		page_handle = urllib.urlopen(url)
		page = unicode(page_handle.read(), 'utf-8')
		page_handle.close()
		if debug:
			print 'done, len: %d.' % len(page)

		mo = self.image_url_pattern.search(page)

		if mo:
			## web page content is HTML/XML escaped
			image_url = self.url_base + saxutils.unescape(mo.group(1))
			width = mo.group(2)
			height = mo.group(3)
			if debug:
				print image_url, width, height
			self.name_webimageinfo_map[image_name] = WebImageInfoRecord(
				image_url, width, height)
			return True
		else:
			return False


	def registerImage(self, iir):
		global debug

		img_basename = os.path.basename(iir.filename)

		if not self.hasWebImageInfoForImgName(img_basename):
			if debug:
				print 'Registering image %s...' % img_basename
			try:
				if not self.computeWebImageInfoForImgName(img_basename):
					raise BilderspurException("No details could be found for "
																		'image "%s".' % iir.filename)
			except BilderspurException:
					raise
			except Exception, e:
					raise BilderspurException("A technical error orrcured while "
																		+ ('querying details for image "%s": '
																			 % iir.filename)
																		+ repr(e))
			

	def generateDetailHtmlForImage(self, iir, width, height, url):
		"""Default HTML generator for the details document of the
		generated placemarks.
		This default method generates an <img />-tag with the image's
		path as its src-attribute and a paragraph containing the
		image comment, if one exists.
		If 'url' is not None, the src-attribute is generated by
		prepending the given URL to the image's file name.
		"""

		img_basename = os.path.basename(iir.filename)

		wii_record = self.getWebImageInfoByImgName(img_basename)

		page_url = self.getPageUrlByImgName(img_basename)
		image_url = wii_record.url

		html_width = u' width="%s"' % wii_record.width
		html_height = u' height="%s"' % wii_record.height

		## The URLs MUST not be HTML-escaped at this place, as GE
		## does not seem to unescape them before use.
		img_body_str = u'<a href="%s"><img%s%s src="%s" /></a>' \
									 % (page_url.replace('"', '&quot;'),
											html_width, html_height,
											image_url.replace('"', '&quot;'))

		if 'EXIF DateTimeOriginal' in iir.tags:
			datetimestr = iir.tags['EXIF DateTimeOriginal'].values
		else:
			datetimestr = ''

		if iir.comment != None:
			commentstr = iir.comment
		else:
			commentstr = u''

		result_str = u'''<table width="100%%">
		<tr><td colspan="2" align="center"><strong>%s</strong></td></tr>
		<tr><td colspan="2" align="center">%s</td></tr>
		<tr><td>%s</td><td align="right">%s</td></tr>
		</table>''' % (commentstr, img_body_str,
									 saxutils.escape(img_basename),
									 datetimestr)

		return self.BILDERSPUR_APP.GeneratorResult(result_str, [])


	def generateThumbnailLink(self, thumb_file_name, image_list, url):
		"""Default generator for the thumbnail image 'href' text.
		This default method generates an <href />-tag with the image's
		path as its text contents.
		If 'url' is not None, the src-attribute is generated by
		prepending the given URL to the image's file name.
		"""
		len(image_list) ## shut up pychecker's "image_list is unused" warning...

		thumb_basename = os.path.basename(thumb_file_name)

		wii_record = self.getWebImageInfoByImgName(thumb_basename)

		image_url = wii_record.url

		return self.BILDERSPUR_APP.GeneratorResult(image_url, [])


def parseSitemap(sitemap_file):
	"""Returns a list of all image page URLs found within the sitemap
	file."""
	return TrivialSaxSitemapParser(sitemap_file).page_url_list


class TrivialSaxSitemapParser( object ):
	"""Parse all file entries (loc-elements) from the sitemap in an
	extremely simplistic way.
	This class may be interesting as it shows how to avoid global state in
	a SAX parser, taking advantage of Python's dynamic language features.
	The whole parser state is limited to an instance of this class and
	we're not using a single global variable.
	A list of all URLs found will be stored in self.page_url_list as a
	side effect of parsing the file."""
	
	def __init__(self, sitemap_file):
		self.collect = False
		self.page_url_list = []
		p = expat.ParserCreate()

		p.StartElementHandler = self.startElement
		p.EndElementHandler = self.endElement
		p.CharacterDataHandler = self.charData

		p.ParseFile(sitemap_file)


	def startElement(self, name, attrs):
		if name == 'loc':
			self.collect = True
			self.url_string = ''

	def endElement(self, name):
		if name == 'loc':
			self.collect = False
			self.page_url_list.append(self.url_string)
			self.url_string = None

	def charData(self, data):
		if self.collect:
			self.url_string += data


def getUrlBase(url):
	mo = re.match(r'^([a-zA-Z]+://[^/]+)', url)
	if mo:
		return mo.group(1)
	else:
		return ''


def mapName2PageUrl(url_list):
	name_pattern = re.compile(r'^[a-z]+://.+/([^/]+).html')

	name_url_map = {}

	for url in url_list:
		mo = name_pattern.match(url)

		if mo:
			image_name = mo.group(1)
			## doesn't check if the image already exists, can't distinguish
			## identically named images anyway
			name_url_map[image_name] = url
		else:
			if debug: print 'Unparsebare URL: "%s"' % url

	return name_url_map


#if __name__ == '__main__':
#	initPlugin(None, 'site1host_sitemap.xml')