#! /usr/bin/python
# -*- coding: iso-8859-15 -*-

##
## vibezdb.py
##  - a Python library module to extract data from a Trekstor Vibez's DB
## See http://www.ohrner.net/ for latest news and updates, please.
## 
## $Id$
## $URL$
## 
## Copyright (C) 2006-2007  Gunter Ohrner "gunter _(@)_ ohrner.net"
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 2 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
##

KIND_DIR  = 'dir'      ## directory entry
KIND_TAXI = 'taxi'     ## data files
KIND_TUNE = 'tune'     ## songs
KIND_PLS  = 'playlist' ## playlist file
KIND_JPEG = 'jpeg'     ## JPEG Image
KIND_CAM  = 'camera'   ## ???


CODEC_MP3 = 'mp3'
CODEC_VORBIS = 'vorbis'


debug = False
#debug = True

def bestr2num(strg):
	'''Convert a number in big endian binary encoding into an
	integer in local host encoding. TODO: Is there a Python function
	to do this more efficiently or more elegantly?'''
	num = 0L
	strlen = len(strg)
	for i in range(0, strlen):
		num += long(ord(strg[i])) << (i * 8)

	return num


class FileRecord:
	'''Details about a song, all time information is represented
	as standard unix timestamps (integer seconds since the epoch)
	if not explicitely mentioned differently.'''
	def __init__(self):
		self.id = None
		self.flag1 = None ## unknown
		self.flag2 = None ## unknown
		self.durationms = None ## track length in milliseconds
		self.filesize = None ## size of file in bytes
		self.freq = None  ## frequency of media file in kHz, 0 if N/A
		self.mtime = None ## last modification time
		self.flag4 = None ## unknown
		self.flag5 = None ## unknown
		self.flag6 = None ## unknown
		self.flag7 = None ## unknown
		self.ltime = None ## last listening time (time the song ended!)
		self.lcount = None  ## listen counter
		self.trackno = None ## number of track within an album
		self.unknowntime1 = None ## unknown time stamp, initially mtime
		self.kind = None  ## string describing kind of file (see KIND_*-constants)
		self.type = None  ## string describing type of media (see CODEC_*-constants)
		self.artist = None
		self.album = None
		self.title = None
		self.bitrate = None ## bitrate in 1/1000th bits/sec
		self.genre = None
		self.year = None
		self.filename = None
		self.unknowntime2 = None

	def __str__(self):
		return 'ID: %x, Title: %s, Size: %d, ' \
					 'Flag1: %x, Flag2: %x, Flag4: %x, Flag5: %x, ' \
					 'Flag6: %x, Flag7: %x' \
					 % (self.id, self.title, self.filesize,
							self.flag1, self.flag2, self.flag4, self.flag5,
							self.flag6, self.flag7)


class VibezDbImportException( Exception ):
	def __init__(self, text):
		Exception.__init__(self, text)


class VibezDb:
	def __init__(self, db_data):
		## The DB seems to start with a fixed tag.
		if db_data[0:4] != '\x02\x00\x00\x00':
			raise VibezDbImportException('Unexpected start tag: "%s"' % db_data[0:4])

		## Second 32 bit word and last 32 bit word are a DB modification
		## UNIX timestamp
		if db_data[4:8] != db_data[-4:]:
			raise VibezDbImportException('DB frame timestamps do not match: '
																	 '"%s" vs. "%s"' \
																	 % (db_data[4:8], db_data[-4:]))

		self.mtime = bestr2num(db_data[4:8])

		## start importing the file metadata
		cur_ofs = 0x325 ## seems to be fixed
		## I first thought that the number of DB entries would appear
		## repeated twice as a 32 bit integer value at this offset,
		## but that does not exactly seem to be case. Both numbers are
		## often identically and seem to represent counts, but I've also
		## seen deviations between both!
		self.cnt1 = bestr2num(db_data[cur_ofs:cur_ofs+4])
		self.cnt2 = bestr2num(db_data[cur_ofs+4:cur_ofs+8])
		if debug: print self.cnt1, self.cnt2
		if self.cnt1 != self.cnt2:
			if debug:
				print 'Count deviation: %d vs. %d!' % (self.cnt1, self.cnt2)
			#raise VibezDbImportException('Invalid DB entry count values: '
			#														 '"%s" vs. "%s"' \
			#														 % (self.cnt1, self.cnt2))

		## now a list of "cnt" file IDs follows
		cur_ofs = self.prepareRecords(db_data, cur_ofs+8, self.cnt2)

		## In the following, there are a few lists of values, each list
		## contains one value for each file in the DB.
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag1')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag2')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'durationms')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'filesize')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'freq')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'mtime')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag4')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag5')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag6')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'flag7')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'ltime')
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'lcount', 2)
		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'trackno', 2)

		## After the Track Number, there seem to be 5 bytes per track
		## for all tracks counted by cnt2. I'm still unsure how they're
		## split ie. if it's really cnt2 * 5 bytes blocks, or rather
		## cnt2 * 1 bytes + cnt2 * 4 bytes (more likely) or even something
		## else.
		## We skip these bytes and land directly on the size specification
		## of the following data block. This block is the first in the DB
		## which features a list of File ID / Value pairs, instead of just
		## pure values.

		## Timsetamp block length specification offset
		cur_ofs += self.cnt2 + 4*self.cnt2

		## This 32 bit word specifies the length of the following block
		## (including a (seemingly) constant 32 bit marker in the following
		## word) in bytes.
		block_len = bestr2num(db_data[cur_ofs:cur_ofs+4])
		cur_ofs += 4 ## advance offset over block length field

		## The next word after the length specifier seems to have a fixed
		## value of '\xff\x02\x00\x00'.
		sync_tag = '\xff\x02\x00\x00'
		
		if db_data[cur_ofs:cur_ofs+4] != sync_tag:
			raise VibezDbImportException('Parser out of sync: '
																	 'Timestamp block sync marker not found '
																	 'at offset %d.' % cur_ofs)
		
		## Sanity check: The real block length should be less than or equal
		## to expected length bytes and - without the sync marker - a
		## multiple of 8, as it carries 2-word-pairs.
		expected_length = self.cnt1 * 4 * 2 + 4
		expected_length2 = self.cnt2 * 4 * 2 + 4
		
		if expected_length < block_len or (block_len - 4) % 8 != 0:
			#raise VibezDbImportException
			if debug:
				print ('Unexpected timestamp block length '
							 'encountered: '
							 '"%s" vs. "%s" vs. "%s"' \
							 % (block_len, expected_length, expected_length2))

		cur_ofs += 4 ## advance offset to start of data area

		## now an area with record id/timestamp value pairs (kvp) follows
		cur_ofs = self.updateRecordsKvp(db_data, cur_ofs, block_len-4,
																	 'unknowntime1')

		## After this KVP list, there are "cnt2" words of pointers or
		## offsets / lengths, followed by a block of data. ("2c block")
		## The pointers / offsets may specify the location of some track
		## / file data within this block.
		## Not every file seems to have data assigned inside this block
		## (offset word is 0x00000000), in fact the whole block may have
		## the length "0".

		## advance offset over pointer list to 2c block length field
		cur_ofs += self.cnt2 * 4

		block_len = bestr2num(db_data[cur_ofs:cur_ofs+4])

		if debug:
			print '2c block len: ', block_len

		## advance offset over length field
		cur_ofs += 4

		## Skip "2c block": We cannot currently parse it, but also do
		## not need the information stored inside, whatever it may be.
		cur_ofs += block_len

		for block_type in [ 'kind', 'type', 'artist', 'album', 'title' ]:
			cur_ofs = self.updateRecordsWithTag(db_data, cur_ofs, block_type)

		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'bitrate')

		for block_type in [ 'genre', 'year', 'filename' ]:
			cur_ofs = self.updateRecordsWithTag(db_data, cur_ofs, block_type)

		cur_ofs = self.updateRecordsWithVal(db_data, cur_ofs, 'unknowntime2')

		## We stop parsing the DB here, we collected all the information
		## which we need.


	def prepareRecords(self, db_data, offset, file_count):
		self.records = []
		self.rec_by_id = {}

		for i in xrange(0, file_count):
			rec = FileRecord()
			rec.id = bestr2num(db_data[offset:offset+4])
			if debug: print rec.id
			self.records.append(rec)
			self.rec_by_id[rec.id] = rec
			offset += 4

		return offset


	def updateRecordsWithVal(self, db_data, offset, field_name, val_len = 4):
		for rec in self.records:
			rec.__dict__[field_name] = bestr2num(db_data[offset:offset+val_len])
			if debug: print field_name, ':', `rec.__dict__[field_name]`
			offset += val_len

		return offset

	
	def updateRecordsKvp(self, db_data, offset, data_len, field_name,
											val_len = 4):
		cnt_keys = 0 ## debug
		cnt_vals = 0
		while data_len > 0:
			cnt_keys += 1
			rec_id = bestr2num(db_data[offset:offset+4])
			offset += 4
			data_len -= 4
			if rec_id != 0x2FF:
				cnt_vals += 1
				value = bestr2num(db_data[offset:offset+val_len])
				offset += val_len
				data_len -= val_len

				rec = self.rec_by_id[rec_id]
				rec.__dict__[field_name] = value
				if debug: print field_name, ':', `rec.__dict__[field_name]`

		return offset


	def updateRecordsWithTag(self, db_data, offset, field_name):
		## The first word stores the tag data block length
		tag_data_len = bestr2num(db_data[offset:offset+4])
		if debug:
			print 'Processing %s tag block, reported to have %d bytes of data.' \
						% (field_name, tag_data_len)
		offset += 4 ## advance offset over data length field

		## The tag data starts after a record if list (len(self.records) * 4)
		tag_block_ofs = offset + self.cnt2 * 4
		if debug:
			print 'Tag-data start:', `db_data[tag_block_ofs:tag_block_ofs+10]`
		cache = {}
		max_block_ofs = tag_block_ofs
		
		for rec in self.records:
			tag_ofs = bestr2num(db_data[offset:offset+4])
			if not tag_ofs in cache:
				tag = self.getAsciiZString(db_data, tag_block_ofs,
																	 tag_ofs)
				cache[tag_ofs] = tag
				max_block_ofs = max(max_block_ofs,
														tag_block_ofs + tag_ofs + len(tag) + 1)
				if debug:
					print 'Ofs:', tag_block_ofs, tag_ofs, max_block_ofs
			else:
				tag = cache[tag_ofs]
				
			rec.__dict__[field_name] = tag
			if debug: print field_name, ':', tag
			offset += 4

		if tag_block_ofs + tag_data_len != max_block_ofs:
			raise VibezDbImportException('Calculated stop offset was %d vs. '
																	 'a measured offset of %d.' \
																	 % (tag_block_ofs + tag_data_len,
																			max_block_ofs))

		return tag_block_ofs + tag_data_len


	def getAsciiZString(self, db_data, block_ofs, string_ofs):
		ofs = block_ofs + string_ofs
		## print 'getAsciiZString[%d:'% (ofs,)
		end_ofs = db_data.find('\x00', ofs)
		## print '%d]' % (end_ofs,)
		return db_data[ofs:end_ofs]


if __name__ == '__main__':
	import os
	import sys
	import time

	try:
		import mmap

		db_size = os.stat(sys.argv[1]).st_size
		db_fd = os.open(sys.argv[1], os.O_RDONLY)
		db_str = mmap.mmap(db_fd, db_size, access = mmap.ACCESS_READ)

	except ImportError:
		## IronPython and JPython have no mmap
		db_file = open(sys.argv[1])
		db_str = db_file.read()
		db_file.close()

	db = VibezDb(db_str)

	print time.ctime(db.mtime)

	slot_start = None
	for rec in db.records:
		if rec.artist in [ 'Juno Reactor' ]:
			slot_ende = rec.ltime
			if slot_start is not None:
				print 'Slot:', slot_ende - slot_start
			slot_start = slot_ende
			print rec.title, rec.durationms/1000.0, \
						time.strftime('%Y-%m-%d %H-%M-%S',
													time.localtime(rec.ltime))

	print 'Ende'