'''
Created on May 08, 2013
Edited on November 12 2015

@author: Mengxue CAO
'''

from __future__ import division
from copy import deepcopy
import random
import math

################################
# read data from file
################################
def read_file(filename):
	f=open(filename)   # open the file and read
	s=f.read()         # convert the content into string 
	f.close()          # close the file
	return s           # return the string

################################
# read trained gmap from file
################################
def read_gmap(filename):
	f=open(filename)   # open the file and read
	s=f.read()         # convert the content into string 
	f.close()          # close the file

	l=s.split('\n')
	l.remove(l[-1])
	l.remove(l[0])	

	for i in range(len(l)):			# split the string in the list into list by '\t'
		l[i] = l[i].split('\t')	
	
	gmap = []

	for i in range(len(l)):
		node = {}
		node['position'] = l[i][0][1:-1].split(',')
		node['weight_vector'] = l[i][1][1:-1].split(',')
		node['lable'] = []
		node['distance'] = []
	
		for j in range(len(node['position'])):
			node['position'][j] = int(node['position'][j])

		for j in range(len(node['weight_vector'])):
			node['weight_vector'][j] = float(node['weight_vector'][j])

		gmap.append(node)

	return gmap           # return the gmap


################################
# read full gmap from file
################################
def read_full_gmap(filename):
	f=open(filename)   # open the file and read
	s=f.read()         # convert the content into string 
	f.close()          # close the file

	l=s.split('\n')
	l.remove(l[-1])
	l.remove(l[0])
	

	for i in range(len(l)):			# split the string in the list into list by '\t'
		l[i] = l[i].split('\t')	
	
	full_gmap = []

	for i in range(len(l)):
		node = {}
		node['position'] = l[i][0][1:-1].split(',')
		node['weight_vector'] = l[i][1][1:-1].split(',')
		node['error_value'] = 0
		node['is_boundary'] = int(l[i][4])
		node['is_grow'] = 0
		node['hits'] = 0
		#node['distance'] = []
	
		for j in range(len(node['position'])):
			node['position'][j] = int(node['position'][j])

		for j in range(len(node['weight_vector'])):
			node['weight_vector'][j] = float(node['weight_vector'][j])

		full_gmap.append(node)

	return full_gmap           # return the full_gmap


################################
# randomly initialize weight vectors
################################
def initial_weight_vector(n):
	vector=[]
	for i in range(n):
		vector.append(0.5+0.1*random.random()*random.randint(-1,1))	# randomly initialize weight vectors
	return vector

################################
# calculate euclidea distance
################################
def calculate_euclidean_distance(l_input, l_node):
	distance = 0
	for i in range(len(l_input)):		# sum up difference
		distance = distance + (l_input[i]-l_node[i]) ** 2
	euclidean_distance = math.sqrt(distance)	# take the square root of the sum
	
	return euclidean_distance


def calculate_cosine_distance(l_input, l_node):
	part_up = 0
	a_sq = 0
	b_sq = 0
	for i in range(len(l_input)):		# sum up difference
		part_up = part_up + l_input[i] * l_node[i]
		a_sq = a_sq + l_input[i] ** 2
		b_sq = b_sq + l_node[i] ** 2
	part_down = math.sqrt(a_sq * b_sq)
	cosine_distance = 1 - part_up/part_down	# take the square root of the sum

	return cosine_distance

################################
# calculate Growth Threshold
################################
def calculate_GT(D, SF):
	GT = -D * math.log(SF)
	return GT

################################
# calculate Growth Threshold
# for high dimensional data
################################
def calculate_GT_HD(D, SF):
	GT = -math.log(D) * math.log(SF)
	return GT

################################
# calculate NS
################################
def calculate_NS(N):
	ns = math.log(N)/math.log(10)
	if ns < 1:
		NS =1
	else:
		NS = ns

	return NS

################################
# calculate Mean Squared Error
################################
def calculate_mse(error, N):
	mse = error / N
	return mse

################################
# get index of list by the value
# of dictionary keys
################################
def get_position_index(l_gmap, l_position):
	index = 0
	for i in range(len(l_gmap)):
		if l_gmap[i]['position'] == l_position:
			index = i
			break
	return index

####################################
# find the Winner by calculating 
# the Euclidean discatnce
####################################
def find_winner(gmap, testing_vector, testing_lable, measure):
	measure_distance = []		# initialize list container for caculation results

	if measure == 'euclidean':
		for i in range(len(gmap)):	
			distance = calculate_euclidean_distance(testing_vector, gmap[i]['weight_vector']) # calculate euclidean distance
			measure_distance.append(distance)
	elif measure == 'cosine':
		for i in range(len(gmap)):	
			distance = calculate_cosine_distance(testing_vector, gmap[i]['weight_vector']) # calculate euclidean distance
			measure_distance.append(distance)
	elif measure == 'both':
		for i in range(len(gmap)):	
			distance_1 = calculate_euclidean_distance(testing_vector[:-10], gmap[i]['weight_vector'][:-10]) # calculate distance by euclidean
			distance_2 = calculate_cosine_distance(testing_vector[-10:], gmap[i]['weight_vector'][-10:]) # calculate distance by cosine
			distance = distance_1 + distance_2
			measure_distance.append(distance)

	winner_index = measure_distance.index(min(measure_distance))	# get the node index of winner
	distance = min(measure_distance)					# get distance for winner

	# record lable
	gmap[winner_index]['lable'].append(testing_lable)

	# record distance
	gmap[winner_index]['distance'].append(distance)

	gmap_info = [gmap, distance]

	return gmap_info

################################
# check number of neighbours
################################
def check_number_of_neighbours(l_taken, l_node):
	num=0
	for i in l_node:
		if i in l_taken:
			num=num+1
	return num


################################
# check whether the node is a 
# boundary node
################################
def check_is_boundary(l_position, l_taken):
	is_taken = 0	

	position_0 = [l_position[0] - 1, l_position[1]]	# [x-1, y]
	position_1 = [l_position[0] + 1, l_position[1]]	# [x+1, y]
	position_2 = [l_position[0], l_position[1] - 1]	# [x, y-1]
	position_3 = [l_position[0], l_position[1] + 1]	# [x, y+1]

	positions = [position_0, position_1, position_2, position_3]

	for i in positions:
		if i in l_taken:
			is_taken = is_taken +1

	if is_taken == 4:
		is_boundary = 0
	else:
		is_boundary = 1

	return is_boundary


################################
# update "is_boundary" for the
# whole gmap
################################
def update_is_boundary(gmap, l_taken):
	for i in range(len(gmap)):
		if check_is_boundary(gmap[i]['position'], l_taken):
			gmap[i]['is_boundary'] = 1
		else:
			gmap[i]['is_boundary'] = 0

	return gmap

################################
# reorder the gmap by distance
################################
def reorder(gmap):
	for i in range(len(gmap)):
		d = []
		n = 1
		for j in range(len(gmap[i]['distance'])):
			if gmap[i]['distance'][j] not in d:
				d.append(gmap[i]['distance'][j])
			else:
				gmap[i]['distance'][j] = gmap[i]['distance'][j] + 0.00000000000001 * n
				n = n + 1

		order = deepcopy(gmap[i]['distance'])
		order.sort()
		lable = []
		for j in range(len(order)):
			lable.append(gmap[i]['lable'][gmap[i]['distance'].index(order[j])])

		gmap[i]['lable'] = deepcopy(lable)
		gmap[i]['distance'] = deepcopy(order)
	return gmap

################################
# reorder the gmap by distance
################################
def reorder_gmap(gmap):
	for i in range(len(gmap)):
		if len(gmap[i]['distance']) > 1:
			d = []
			n = 1
			for j in range(len(gmap[i]['distance'])):
				if gmap[i]['distance'][j] not in d:
					d.append(gmap[i]['distance'][j])
				else:
					gmap[i]['distance'][j] = gmap[i]['distance'][j] + 0.00000000000001 * n
					n = n + 1

			order = deepcopy(gmap[i]['distance'])
			order.sort()
			lable = []
			for j in range(len(order)):
				lable.append(gmap[i]['lable'][gmap[i]['distance'].index(order[j])])
	
			gmap[i]['lable'] = deepcopy(lable)
			gmap[i]['distance'] = deepcopy(order)
	return gmap
		

################################
# write the gmap into files
################################
def write_gmap(gmap, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.
	f.write('Position\tWeight vector\tHits\n')
	for i in range(len(gmap)):
		s = str(gmap[i]['position'])+'\t'+str(gmap[i]['weight_vector'])+'\t'+str(gmap[i]['hits'])+'\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write the gmap for retrain into files
################################
def write_gmap_retrain(gmap, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.
	f.write('Position\tWeight vector\tError value\tHits\tis boundary\tis grow\n')
	for i in range(len(gmap)):
		s = str(gmap[i]['position']) + '\t' + str(gmap[i]['weight_vector']) + '\t' + str(gmap[i]['error_value']) + '\t' + str(gmap[i]['hits']) + '\t' + str(gmap[i]['is_boundary']) + '\t' + str(gmap[i]['is_grow']) + '\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write the words whose distance < 0.5 into files
################################
def write_gmap_distance(gmap, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.
	f.write('Word\tDistance\n')
	for i in range(len(gmap)):
		for j in range(len(gmap[i]['distance'])):
			if gmap[i]['distance'][j] <= 0.5:
				s = gmap[i]['lable'][j]+'\t' + str(gmap[i]['distance'][j]) + '\n'
				f.write(s)
	
	f.close()              # close the file


################################
# write the position and vector
# information into matlab file
################################
def write_gmap_matlab(gmap, matlab_filename):
	f = open(matlab_filename,'w')   # create a file to write. If existed, overwrite it.
	for i in range(len(gmap)):
		position = ''
		weight_vector = ''
		for j in range(len(gmap[i]['position'])):
			position = position + str(gmap[i]['position'][j]) + ','
		for j in range(len(gmap[i]['weight_vector'])):
			weight_vector = weight_vector + str(gmap[i]['weight_vector'][j]) + ','
		f.write(position + weight_vector + '\n')
	
	f.close()              # close the file


################################
# write the check result into files
################################
def write_check_gmap(gmap, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.
	f.write('Position\tLable\tDistance\n')
	for i in range(len(gmap)):
		s = str(gmap[i]['position'])+'\t'+str(gmap[i]['lable'])+'\t'+str(gmap[i]['distance'])+'\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write the position and vector
# information into matlab file
################################
def write_check_gmap_lable_matlab(gmap, matlab_filename):
	f = open(matlab_filename,'w')   # create a file to write. If existed, overwrite it.
	for i in range(len(gmap)):
		lable = ''
		s = ''
		
		if gmap[i]['lable'] == []:
			s = s + 'NaN,' + '\n'

		else:
			for j in range(len(gmap[i]['lable'])):
				lable = lable + str(gmap[i]['lable'][j]) + ','
			s = lable + '\n'
		f.write(s)
	
	f.close()              # close the file
	

################################
# write the position and vector
# information into matlab file
################################
def write_check_distance_gmap_matlab(gmap, matlab_filename):
	f = open(matlab_filename,'w')   # create a file to write. If existed, overwrite it.
	for i in range(len(gmap)):
		position = ''
		lable = ''
		distance = ''
		s = ''
		for j in range(len(gmap[i]['position'])):
			position = position + str(gmap[i]['position'][j]) + ','
		
		if gmap[i]['distance'] == []:
			s = s + position + 'NaN,' + '\n'
		else:
			for j in range(len(gmap[i]['distance'])):
				distance = str(gmap[i]['distance'][j]) + ','
				s = s + position + distance + '\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write the position and vector
# information into matlab file
################################
def write_check_distance_mean_gmap_matlab(gmap, matlab_filename):
	f = open(matlab_filename,'w')   # create a file to write. If existed, overwrite it.
	for i in range(len(gmap)):
		position = ''
		lable = ''
		distance = ''
		s = ''
		for j in range(len(gmap[i]['position'])):
			position = position + str(gmap[i]['position'][j]) + ','
		
		if gmap[i]['distance'] == []:
			s = s + position + 'NaN,' + '\n'
		else:
			distance = sum(gmap[i]['distance'])/len(gmap[i]['distance'])
			s = s + position + str(distance) + '\n'
		f.write(s)
	
	f.close()              # close the file



################################
# write the gmap into files
################################
def write_data(training_token, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(training_token)):
		s = '\t'.join(training_token[i]) + '\n'
		f.write(s)
	
	f.close()              # close the file



################################
# write the gmap into files
################################
def write_data_check(records, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(records)):
		s = records[i][0] + '\t'
		s = s + str(records[i][2]) + '\t\t' + '\t'.join(records[i][1]) + '\n'
		f.write(s)
	
	f.close()              # close the file



################################
# write the gmap into files
################################
def write_check_identical(records, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(records)):
		s = '\t'.join(records[i]) +'\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write item into files
################################
def write_item(l_item, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(l_item)):
		s = str(l_item[i]) + '\n'
		f.write(s)
	
	f.close()              # close the file


################################
# write items into files
################################
def write_items(l_item_1, l_item_2, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(l_item_1)):
		s = str(l_item_1[i]) + '\t' + str(l_item_2[i]) + '\n'
		f.write(s)
	
	f.close()              # close the file

################################
# write items into files
################################
def write_two_items(l_item, filename):
	f = open(filename,'w')   # create a file to write. If existed, overwrite it.

	for i in range(len(l_item)):
		s = '\t'.join(l_item[i]) + '\n'
		f.write(s)
	
	f.close()              # close the file
