# cache.tcl --
#
#       A web cache object.
#
# Copyright (c) 1998-2002 The Regents of the University of California.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# A. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
# B. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
# C. Neither the names of the copyright holders nor the names of its
#    contributors may be used to endorse or promote products derived from this
#    software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#
# A web cache object. It takes care of storing web contents
# on local disk index by the associated urls. It is also
# responsible for fetching the contents directly from the
# origin server if the data is not in the local cache.
#
Class SRMv2_Cache

#
# The web cache constructor. The <i>dir</i> argument
# is the directory to put the cache.
#
SRMv2_Cache public init { netspec {dir ~/.mash/cache/} } {
	$self next

	# srmv2 related variables
	$self instvar session_ source_
	set ab [new AddressBlock $netspec]
	set addr [$ab addr]
	set port [$ab sport]
	set ttl  [$ab ttl]
	delete $ab

	set session_ [srm_create_session $addr $port $ttl]
	set source_ [srm_create_source $session_]
	srm_set_callbacks $session_ srm_recv srm_should_recover srm_read_adu \
		srm_source_update srm_recv_cid

	SRMv2_Cache instvar id2obj_
	set id2obj_($session_) $self
	set id2obj_($source_) $self

	# cache related variables
	$self instvar dir_ index_ index_filename_
	$self create_dir $dir
	set dir_ [glob $dir]

	set index_filename_ [file join $dir_ index.db]
	if {! [catch {set f [open $index_filename_]}] } {
		while 1 {
			set line [gets $f]
			if [eof $f] {
				close $f
				break
			}
			set index_([lindex $line 0]) [lindex $line 1]
		}
	}
}

#
# Called by the cache control after winning the response timer
# war. If the data is in the local cache, get it from there.
# Otherwise, fetch it from the origin server.
#
SRMv2_Cache public send_data { url } {
	$self instvar sockets_ source_ index_ cid_names_ wcp_
	$self tkvar buffer_

	puts "wc: send_data $url"

	if [info exists index_($url)] {
		# allocate a cid for this url
		set p [split [$self name_2_cid $url] ,]
		set source [lindex $p 0]
		set cid [lindex $p 1]
		if { $cid == "" || $source != $source_} {
			set cid [srm_calloc $source_ 0 0xffffffff $url]
			set cid_names_($source_,$cid) $url
		}

		# send the data to the session
		srm_send $source_ $cid 0 $buffer_

		# callback to proxy to hand data to browser
		if [info exists sockets_($url)] {
			$wcp_ done_fetch $url $sockets_($url) $index_($url)
			unset sockets_($url)
		}
	} else {
		# need to fetch data from origin server
		$self fetch $url
	}
}

#
# Called when received data from the cache session. The
# data is stored in the tkvar <i>buffer_</i> to deal with
# binary data.
#
SRMv2_Cache public recv_data { source cid seqno } {
	$self instvar sockets_ wcp_ index_

	set url [$self cid_2_name $source $cid]

	puts "wc: recv_data $url"

	# store the data onto local cache
	$self put $url

	# callback to proxy to hand data to browser
	if [info exists sockets_($url)] {
		$wcp_ done_fetch $url $sockets_($url) $index_($url)
		unset sockets_($url)
	}
}

SRMv2_Cache public recv_cid { source cid parent_cid name } {
	$self instvar cid_names_

	puts "wc: recv_cid $name"
	set cid_names_($source,$cid) $name
}

SRMv2_Cache public should_recover { source cid sseq eseq } {
	$self instvar sockets_

	# recover only if this cache has a pending request from
	# the browser. otherwise, we do  not recover and can always
	# do a downcall if we need the data later.

	set url [$self cid_2_name $source $cid]
	if { $url != "" && [info exists sockets_($url)] } {
		puts "wc: should_recover yes $cid $url"
		return 1
	} else {
		puts "wc: should_recover no $cid $url"
		return 0
	}
}

#
# Fetch the contents of <i>url</i> from the origin server.
# This is non-blocking and calls done_fetch when the
# transactionis finished.
#
SRMv2_Cache private fetch { url } {
	puts "wc: fetching... $url"
	set token [::http::geturl $url -command "$self done_fetch"]
}

#
# Called when start_fetch is finished. This puts the contents
# fetched into the cache by passing the data to the put method.
# Do a callback to cache control to notify the data is here.
#
SRMv2_Cache private done_fetch { token } {
	$self instvar sockets_ wcp_ index_
	$self tkvar buffer_
	upvar #0 $token state

	puts "wc: done_fetch $state(url)"

	set buffer_ $state(body)
	set url $state(url)

	# put the data onto disk
	$self put $url

	# send the data to session
	$self send_data $state(url)
}


#
# Given a cid, this method returns the application level
# name (url) associated with it.
#
SRMv2_Cache public cid_2_name { source cid } {
	$self instvar cid_names_

	if [info exists cid_names_($source,$cid)] {
		set url $cid_names_($source,$cid)
	} else {
		set url [srm_get_container_name $source $cid]
	}
}

#
# Give a name (url), this method returns the (source,cid) pair
# associated with it or an empty if it does not exist.
#
SRMv2_Cache public name_2_cid { url } {
	$self instvar source_ cid_names_

	foreach p [array names cid_names_] {
		if { "$cid_names_($p)" == "$url" } {
			return [split $p ,]
		}
	}
	return ""
}

#
# Returns a filename if the url is local in the cache.
# Otherwise, an empty string is returned.
#
SRMv2_Cache public hit { url } {
	$self instvar index_
	if [info exists index_($url)] {
		return $index_($url)
	} else {
		return ""
	}
}

#
# Called to initiate the loop to access the cache. For example,
# from casting director in mashcast, or full page request from
# infocaster, or from the browser proxy. The data if exists, is
# stored in the tkvar <i>buffer_</i>.
#
SRMv2_Cache public get { url { socket "" } } {
	$self instvar index_ sockets_ wcp_ wcc_
	$self tkvar buffer_

	if { $socket != "" } {
		set sockets_($url) $socket
	}

	if [info exists index_($url)] {
		puts "wc: get from disk"

		# callback to proxy if it is requested from there
		if [info exists sockets_($url)] {
			$wcp_ done_fetch $url $sockets_($url) $index_($url)
			unset sockets_($url)
		}
	} else {
		puts "wc: get from others"

		set m [$self name_2_cid $url]
		if { $m != "" } {
			# if a mapping exists already, this means the
			# data is already in some other cache, so
			# we do a srm repair request on this cid.
			# note: we assume there is always only one adu
			# in any container

			srm_recover [lindex $m 0] [lindex $m 1] 0 0
		} else {
			# the data is not in local disk nor other caches
			# so start a timer loop

			$wcc_ create_get_timer $url
		}
	}
}

#
# Put the contents of url into the cache. The data is stored
# in the tkvar <i>buffer_</i>.
#
SRMv2_Cache public put { url } {
	$self instvar index_ dir_ index_filename_ sockets_ wcp_ wcc_
	$self tkvar buffer_

	if { ![info exists index_($url)] } {
		set update_index_file 1
	}

	set name cache[clock clicks]
	set index_($url) [file join $dir_ $name[file extension $url]]

	set f [open $index_($url) w 0644]

	fconfigure $f -translation binary
	puts -nonewline $f $buffer_
	close $f

	# write the index file
	if [catch {set f [open $index_filename_ a]}] {
		set f [open $index_filename_ w 0644]
	}

	puts $f [list $url $index_($url)]
	close $f

	# tell cache control to cancel timers related for this url
	$wcc_ cancel_all_timers $url
}

#
# Delete everything in the cache.
#
SRMv2_Cache public flush { } {
	$self instvar index_ dir_
	file delete -force -- [glob -nocomplain [file join $dir_ *]]
	catch {unset index_}
}

SRMv2_Cache private create_dir { path } {
	if { ![file isdirectory $path] } {
		set dir ""
		foreach split [file split $path] {
			set dir [file join $dir $split]
			if { ![file exists $dir] } {
				# this command will cause an error
				# if it is not possible to create the dir
				file mkdir $dir
			}
		}
	}
}


#
# SRMv2 tcl callbacks
#
proc srm_recv { src cid seqno data } {
	set cache [SRMv2_Cache set id2obj_($src)]
	$cache tkvar buffer_
	set buffer_ $data
	$cache recv_data $src $cid $seqno
}

proc srm_read_adu { src cid seqno } {
	set cache [SRMv2_Cache set id2obj_($src)]
	set url [$cache cid_2_name $src $cid]
	set filename [$cache hit $url]
	if { $url != "" && $filename != "" } {
		$cache get $url
	}

	$cache tkvar buffer_
	return $buffer_
}

proc srm_should_recover { src cid sseq eseq } {
	set cache [SRMv2_Cache set id2obj_($src)]
	$cache should_recover $src $cid $sseq $eseq
}


proc srm_recv_cid { src cid parent_cid name } {
	set cache [SRMv2_Cache set id2obj_($src)]
	$cache recv_cid $src $cid $parent_cid $name
}

proc srm_source_update { src name } {
	# returns nothing
	SRMv2_Cache set id2obj_($src) \
		[SRMv2_Cache set id2obj_([srm_get_session $src])]
}

