<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' xmlns:georss='http://www.georss.org/georss' xmlns:gd='http://schemas.google.com/g/2005' xmlns:thr='http://purl.org/syndication/thread/1.0'><id>tag:blogger.com,1999:blog-953024975153422094</id><updated>2012-01-30T15:31:25.069-05:00</updated><category term='2009'/><category term='D-Lib Magazine'/><category term='web page title'/><category term='WWW 2010'/><category term='IETF 78'/><category term='publications'/><category term='enterprise research'/><category term='screen scraping'/><category term='lexical signature'/><category term='Microsoft Research Faculty Summit'/><category term='Memento-Datetime'/><category term='Personal Archiving'/><category term='NDIIPP 2011'/><category term='book of the dead'/><category term='api'/><category term='Zurich'/><category term='HTTP'/><category term='WCI'/><category term='trip report'/><category term='travel'/><category term='Hany'/><category term='Memento'/><category term='CNI'/><category term='data portability'/><category term='kdd'/><category term='OAI'/><category term='NDIIPP 2010'/><category term='TR'/><category term='CS 796/896'/><category term='MementoFox'/><category term='404 error'/><category term='NSF/JISC Workshop'/><category term='JCDL 2010'/><category term='Content Negotiation'/><category term='Foo Camp 2010'/><category term='kdd2011'/><category term='facebook'/><category term='HT'/><category term='Hypertext'/><category term='RDF'/><category term='WWW'/><category term='NDSA'/><category term='ODU'/><category term='Web Archiving'/><category term='Add-on'/><category term='Data Mining'/><category term='acm'/><category term='Cross Country'/><category term='Internet Draft'/><category term='Radiation'/><category term='JCDL 2011'/><category term='LDOW 2010'/><category term='Code{4}Lib'/><category term='Map'/><category term='records management'/><category term='OAC'/><category term='Firefox'/><category term='Educause Review'/><category term='Object Reuse and Exchange'/><category term='LIBER'/><category term='Japan'/><category term='delicious'/><category term='IETF'/><category term='InDP'/><category term='plotting'/><category term='JCDL 2009'/><category term='NFL'/><category term='ORE'/><category term='JCDL'/><category term='Internet archive'/><category term='dissertation'/><category term='Python'/><category term='Grasshopper'/><category term='NDIIPP 2009'/><category term='Microsoft'/><category term='Data-Driven Science'/><category term='RIBDA'/><category term='LDOW'/><category term='rediscover missing web pages'/><category term='scrapbook'/><category term='HT 2010'/><category term='2011'/><category term='spring 2012'/><category term='Web Site Reconstruction'/><category term='NDIIPP'/><category term='digital preservation'/><category term='Foo Camp'/><category term='Archive Facebook'/><category term='CS 795/895'/><category term='archive'/><category term='Digital Preservation Award'/><category term='Library of Congress'/><category term='agile'/><category term='IIPC'/><category term='Internships'/><category term='Conference'/><category term='Washington DC'/><category term='timemap'/><category term='Hypertext 2010'/><category term='Communications of the ACM'/><category term='Last-Modified'/><category term='Cloud'/><category term='Time Travel'/><category term='DOE'/><category term='Silicon Valley'/><category term='WAC'/><category term='research'/><category term='OAI-ORE'/><category term='Annotations'/><category term='statistical computing'/><category term='IIPC 2011'/><category term='Irony'/><category term='CNI Fall 2009'/><category term='InDP 2009'/><category term='Extension'/><category term='2010'/><category term='Synchronicity'/><category term='Doctoral Consortium'/><category term='Superbowl'/><category term='YouTube'/><category term='PowerPoint'/><category term='award'/><category term='Hypertext 2009'/><category term='Google'/><category term='Switzerland'/><category term='Warrick'/><category term='PDA2011'/><category term='tags'/><category term='MITRE'/><category term='PageRank'/><category term='CACM'/><category term='San Francisco'/><category term='IEEE TCDL'/><category term='technical report'/><category term='Roadtrip'/><category term='data set'/><category term='social media'/><category term='cs 495/595'/><category term='CS 751/851'/><category term='Football'/><category term='OAC 2011'/><category term='R'/><category term='Digging Into Data'/><title type='text'>Web Science and Digital Libraries Research Group</title><subtitle type='html'>Research and Teaching Updates from the Web Science and Digital Libraries Research Group at Old Dominion University.</subtitle><link rel='http://schemas.google.com/g/2005#feed' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/posts/default'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default?max-results=100'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/'/><link rel='hub' href='http://pubsubhubbub.appspot.com/'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><generator version='7.00' uri='http://www.blogger.com'>Blogger</generator><openSearch:totalResults>91</openSearch:totalResults><openSearch:startIndex>1</openSearch:startIndex><openSearch:itemsPerPage>100</openSearch:itemsPerPage><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-393858016801583142</id><published>2012-01-24T11:36:00.000-05:00</published><updated>2012-01-24T11:36:11.296-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Web Site Reconstruction'/><category scheme='http://www.blogger.com/atom/ns#' term='Warrick'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><title type='text'>2012-01-23: Release of Warrick 2.0 Beta</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-rcX3oU_XEpM/Txx3P1RY7iI/AAAAAAAAALY/-DzqJG4gTZg/s1600/logo.gif" imageanchor="1" style="clear: left; float: left; margin-right: 1em; margin-bottom: 1em;"&gt;&lt;img src="http://2.bp.blogspot.com/-rcX3oU_XEpM/Txx3P1RY7iI/AAAAAAAAALY/-DzqJG4gTZg/s320/logo.gif" border="0" height="130" width="80"&gt;&lt;/a&gt;&lt;/div&gt;After a long hiatus, the &lt;a href="http://warrick.cs.odu.edu/"&gt;Warrick&lt;/a&gt; tool has been resurrected with some modifications. Warrick is a free utility for reconstructing (or recovering) a website. The &lt;a href="http://frankmccown.blogspot.com/2006/01/reconstructing-websites-with-warrick.html"&gt;original version of Warrick&lt;/a&gt; discovered archived versions of resources by searching the Web Infrastructure (which includes search engine caches and the &lt;a href="http://www.archive.org/"&gt;Internet Archive&lt;/a&gt;) for archived versions of web resources. It would automatically download and organize the best versions of the archived resources and package them into a copy of the deleted site.&lt;br /&gt;&lt;br /&gt;As discussed by Warrick's creator, &lt;a href="http://www.harding.edu/fmccown/"&gt;Frank McCown&lt;/a&gt;, the original version of Warrick &lt;a href="http://frankmccown.blogspot.com/2010/01/warrick-is-back-in-action.html"&gt;was prone to breaking due to frequent changes to search engine APIs and archive URLs&lt;/a&gt;. Warrick 2.0, adapted from Dr. McCown's original code by &lt;a href="http://www.justinfbrunelle.com/"&gt;Justin F. Brunelle&lt;/a&gt;, interfaces with the &lt;a href="http://www.mementoweb.org/"&gt;Memento framework&lt;/a&gt; via the mcurl program (developed by &lt;a href="http://www.cs.odu.edu/%7Eaalsum/"&gt;Ahmed AlSum&lt;/a&gt;). By incorporating Memento timemaps, Warrick no longer has the responsibility of directly searching and communicating with the caches and archives, or learning about new repositories. Instead, Memento handles the interface and communication with the archives, allowing Warrick to remain unaffected by API or URL changes. This makes Warrick more resistant to failures when repositories change, appear, or disappear. Memento allows Warrick to provide additional functionality, such as the ability to recover sites from a specific point in time by utilizing timemaps and the mcurl program.&lt;br /&gt;&lt;br /&gt;Warrick 2.0 has already been helping individuals recover lost web sites. Dag Forssell reached out with the following message:&lt;br /&gt;&lt;br /&gt;&lt;blockquote&gt;"I just Googled the idea of restoring a website from the Wayback Machine and discovered your work on Warrick. .... Perhaps you can use my project as one of your guinea pigs.&lt;br /&gt;&lt;br /&gt;I am restoring a book by Professor of Law Hugh Gibbons and found when listing his references that he created a website on the principles of law in 2002, then abandoned it in 2006 or so, when he retired. I have downloaded 143 htm files from the Wayback Machine. The site looks complete. But of course, each file comes with its own folder (css, jpgs and such) and the links all point back to the wayback machine. Cleaning it all up will be a lot of work.&lt;br /&gt;&lt;br /&gt;If it is in the cards for you ... to take this under your wing, I will be overjoyed."&lt;/blockquote&gt;&lt;br /&gt;Dag's site was successfully recovered, and helped us to work out some last remaining bugs before our beta release. Further, Dag mentioned that utilizing Warrick eliminated much of the effort on his part by allowing Warrick to deduplicate resources downloaded from the Internet Archive and arrange the resources in the correct site structure. Further, it allow him a deeper understanding of how the resources interact within the page. His recovered content will reportedly be available live at &lt;a href="http://www.biologyoflaw.org/"&gt;http://www.biologyoflaw.org/&lt;/a&gt; (the website is not available at the time of this blog posting).&lt;br /&gt;&lt;br /&gt;We are happy to announce the release of the Beta source of the project which can be downloaded from its &lt;a href="http://code.google.com/p/warrick/"&gt;Google Code&lt;/a&gt; site. &lt;a href="http://code.google.com/p/warrick/wiki/README"&gt;Installation&lt;/a&gt; and &lt;a href="http://code.google.com/p/warrick/wiki/About_Warrick"&gt;usage&lt;/a&gt; instructions are available from the Google Code site.&lt;br /&gt;&lt;br /&gt;Warrick is run with a series of command line flags, or options. These are largely unchanged from the original Warrick, but some flags are new. For example, the user now has the &lt;font face="courier"&gt;&lt;b&gt;-dr&lt;/b&gt;&lt;/font&gt; and &lt;font face="courier"&gt;&lt;b&gt;-R&lt;/b&gt;&lt;/font&gt; options. The &lt;font face="courier"&gt;&lt;b&gt;-dr&lt;/b&gt;&lt;/font&gt; option allows the user to specify the date at which the site should be recovered. For example:&lt;br /&gt;&lt;br /&gt;&lt;font face="courier"&gt;&lt;b&gt;warrick.pl -dr 2004-02-01 http://www.cs.odu.edu/&lt;/b&gt;&lt;/font&gt;&lt;br /&gt;&lt;br /&gt;will recover the ODU Computer Science homepage as close as possible to February 1st, 2004.&lt;br /&gt;&lt;br /&gt;&lt;font face="courier"&gt;&lt;b&gt;-R&lt;/b&gt;&lt;/font&gt; is the resume flag, which allows a user to resume a suspended reconstruction job from a saved file.&lt;br /&gt;&lt;br /&gt;Let's say we run the following recovery job:&lt;br /&gt;&lt;br /&gt;&lt;font face="courier"&gt;&lt;b&gt;warrick.pl -D MyRecoveryDirectory -k -n 100 http://www.justinfbrunelle.com/&lt;/b&gt;&lt;/font&gt;&lt;br /&gt;&lt;br /&gt;This will recovery Justin Brunelle's homepage into the directory &lt;font face="courier"&gt;MyRecoveryDirectory&lt;/font&gt; with the &lt;font face="courier"&gt;&lt;b&gt;-D&lt;/b&gt;&lt;/font&gt; flag, convert all links from absolute to be relative to the local disk with the &lt;font face="courier"&gt;&lt;b&gt;-k&lt;/b&gt;&lt;/font&gt; flag, and stop the recovery after 100 resources are recovered with the &lt;font face="courier"&gt;&lt;b&gt;-n&lt;/b&gt;&lt;/font&gt; flag.&lt;br /&gt;&lt;br /&gt;When Warrick completes the recovery session of 100 files, it saves the recovery state in a save file. A user can resume the state by using the &lt;font face="courier"&gt;&lt;b&gt;-R&lt;/b&gt;&lt;/font&gt; flag as follows:&lt;br /&gt;&lt;br /&gt;&lt;font face="courier"&gt;&lt;b&gt;warrick.pl -R MYSAVEFILE.save &lt;/b&gt;&lt;/font&gt;&lt;br /&gt;&lt;br /&gt;This will resume the suspended job stored in &lt;font face="courier"&gt;MYSAVEFILE.save&lt;/font&gt;. This will recover an additional 100 files.&lt;br /&gt;&lt;br /&gt;For a visual example, let's look at one of the aforementioned commands and demonstrate how it can recover a page.&lt;br /&gt;&lt;br /&gt;&lt;font face="courier"&gt;&lt;b&gt;warrick.pl -dr 2004-02-01 http://www.cs.odu.edu/&lt;/b&gt;&lt;/font&gt;&lt;br /&gt;&lt;br /&gt;We can visit the current (as of 2012-01-23) ODU CS website (&lt;a href="http://www.cs.odu.edu/"&gt;http://www.cs.odu.edu/&lt;/a&gt;) to see the following representation:&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-uBW-Ef1-tHw/Tx34WI4a9hI/AAAAAAAAALk/5G1SvK0oX-M/s1600/current_cs_page.png" imageanchor="1" style="clear: left; margin-right: 1em; margin-bottom: 1em;"&gt;&lt;img src="http://2.bp.blogspot.com/-uBW-Ef1-tHw/Tx34WI4a9hI/AAAAAAAAALk/5G1SvK0oX-M/s320/current_cs_page.png" border="0" height="214" width="320"&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;To get an idea of what Warrick will recover, we can observe the ODU CS homepage archived at the Internet Archive on &lt;a href="http://web.archive.org/web/20040206001742/http://www.cs.odu.edu/"&gt;2004-02-06&lt;/a&gt;.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-bu7_oRQt_sA/Tx34tcYThfI/AAAAAAAAALw/c6DTs2SJ3jM/s1600/wayback_cs_page.png" imageanchor="1" style="clear: left; margin-right: 1em; margin-bottom: 1em;"&gt;&lt;img src="http://4.bp.blogspot.com/-bu7_oRQt_sA/Tx34tcYThfI/AAAAAAAAALw/c6DTs2SJ3jM/s320/wayback_cs_page.png" border="0" height="177" width="320"&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;After running Warrick, we can view the reconstructed page at my local directory.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/--r0KzlfazCA/Tx35E8_gbyI/AAAAAAAAAL8/Wssfl_UZBXg/s1600/recovered_cs_page.png" imageanchor="1" style="clear: left; margin-right: 1em; margin-bottom: 1em;"&gt;&lt;img src="http://3.bp.blogspot.com/--r0KzlfazCA/Tx35E8_gbyI/AAAAAAAAAL8/Wssfl_UZBXg/s320/recovered_cs_page.png" border="0" height="213" width="320"&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;This resource is nearly identical to the copy at the Internet Archive. The branding at the top of the page has been removed to keep the representation as faithful as possible to the original resource at the time of archiving. However, we can see from the reporting that all 10 of the recovered resources that make up the recovered page came from the Internet Archive.&lt;br /&gt;&lt;font face="courier"&gt;&lt;br /&gt;#############################################&lt;br /&gt;Memento Timegate Accesses: 11&lt;br /&gt;Internet Archive Contributions: 10&lt;br /&gt;Bing Contributions: 0&lt;br /&gt;Google Contributions: 0&lt;br /&gt;WebCitation Contributions: 0&lt;br /&gt;Diigo Contributions: 0&lt;br /&gt;UK Archives Contributions: 0&lt;br /&gt;URIs obtained from lister Queries: 0&lt;br /&gt;####&lt;br /&gt;Total recoveries completed: 10&lt;br /&gt;Number of cache resources used: 0&lt;br /&gt;Number of resources overwritten: 0&lt;br /&gt;Number of avoided overwrites: 0&lt;br /&gt;Total failed recoveries: 1&lt;br /&gt;Images recovered: 8&lt;br /&gt;HTML pages recovered: 1&lt;br /&gt;Other resources recovered: 1&lt;br /&gt;URIs left in the Frontier: 0&lt;br /&gt;#############################################&lt;br /&gt;&lt;/font&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;More examples can be viewed in the README file available in the source archive or at Warrick's &lt;a href="http://code.google.com/p/warrick/w/list"&gt;Google Code Wiki&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;Please note that this is only a beta version of the software. Also, it is only runnable via Perl on a Linux command-line. A new version of &lt;a href="http://warrick.cs.odu.edu/"&gt;warrick.cs.odu.edu&lt;/a&gt; is in development and will be release soon. This web interface will allow users to run Warrick from a browser which will provide tech-savvy and non-tech-savvy users, alike, to benefit from Warrick.&lt;br /&gt;&lt;br /&gt;If you utilize Warrick to recover a web site, we are very interested in learning about your experience; this will help us improve Warrick for future users. Please reach out to us via email by joining the WarrickRecovery Google Group (warrickrecovery@googlegroups.com) to learn how you may help.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;--Justin F. Brunelle&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-393858016801583142?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/393858016801583142/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-23-release-of-warrick-20-beta.html#comment-form' title='3 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/393858016801583142'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/393858016801583142'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-23-release-of-warrick-20-beta.html' title='2012-01-23: Release of Warrick 2.0 Beta'/><author><name>Justin F Brunelle</name><uri>http://www.blogger.com/profile/00580381835470799911</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_AmWWXD7g2JA/TG0DJ_mEneI/AAAAAAAAAAM/_AvbhphHU8I/S220/2010-05-24+17.06.03.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-rcX3oU_XEpM/Txx3P1RY7iI/AAAAAAAAALY/-DzqJG4gTZg/s72-c/logo.gif' height='72' width='72'/><thr:total>3</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5285920999253314908</id><published>2012-01-22T14:18:00.000-05:00</published><updated>2012-01-22T14:18:07.495-05:00</updated><title type='text'>2012-01-221: 2011 NFL Season Conference Championship</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-v5mZJEcJ3Kw/TxxeGQ4rEOI/AAAAAAAAAJU/sWF1YeARlZI/s1600/389px-NFL_playoffs_logo_new.svg.png" imageanchor="1" style="clear:right; float:right; margin-left:1em; margin-bottom:1em"&gt;&lt;img border="0" height="222" width="320" src="http://3.bp.blogspot.com/-v5mZJEcJ3Kw/TxxeGQ4rEOI/AAAAAAAAAJU/sWF1YeARlZI/s320/389px-NFL_playoffs_logo_new.svg.png" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;The NFL Conference championship games are today. Our models have a tendency to reward teams that can pass the ball well as Passing efficiency correlates with wins rather well. Therefore it is no surprise that two out of the three models predict New England will win over Baltimore. However the Neural Network is predicting that it will be a close game and that New England will not cover the spread of 7 points.&lt;br /&gt;&lt;br /&gt;The San Francisco / New York game is going to be a good game to watch. Both teams are very close but the Giants have the edge on passing efficiency.&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NE&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At SF&lt;/td&gt;     &lt;td&gt;1&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt; &lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5285920999253314908?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5285920999253314908/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-221-2011-nfl-season-conference.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5285920999253314908'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5285920999253314908'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-221-2011-nfl-season-conference.html' title='2012-01-221: 2011 NFL Season Conference Championship'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-v5mZJEcJ3Kw/TxxeGQ4rEOI/AAAAAAAAAJU/sWF1YeARlZI/s72-c/389px-NFL_playoffs_logo_new.svg.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-7411980486300875626</id><published>2012-01-01T13:27:00.000-05:00</published><updated>2012-01-01T13:27:41.382-05:00</updated><title type='text'>2012-01-01: 2011 NFL Season Week 17</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-V4vbHcG4Nvk/TwCjK85anwI/AAAAAAAAAJE/6mUaQvEQ0zs/s1600/images.jpg" imageanchor="1" style="clear:right; float:right; margin-left:1em; margin-bottom:1em"&gt;&lt;img border="0" height="225" width="225" src="http://4.bp.blogspot.com/-V4vbHcG4Nvk/TwCjK85anwI/AAAAAAAAAJE/6mUaQvEQ0zs/s320/images.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;The last week of the regular season games is here. Week 17 traditionally exhibits greater statistical dispersion than the other weeks. Teams that have locked in playoff spots will be resting the starting players and teams that do not have a chance at the playoffs may be looking for a better draft pick for next year. &lt;br /&gt;&lt;br /&gt;Our algorithms once again have picked Green Bay to win but most likely they will rest Aaron Rodgers and most of the starters and Detroit will win the game. Green Bay is an enigma this year, they are 14-1 so far and they have given up more yards  than they have gained over the year which invites some &lt;a href="http://smartfootball.com/stats/the-packers-have-allowed-more-yards-than-theyve-gained-but-what-does-that-mean"&gt;interesting analysis&lt;/a&gt;.&lt;br /&gt;   &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At PHI&lt;/td&gt;    &lt;td&gt;10&lt;/td&gt;    &lt;td&gt;WAS&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At ATL&lt;/td&gt;    &lt;td&gt;14&lt;/td&gt;    &lt;td&gt;TB&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;SF&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;At STL&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At MIN&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At GB&lt;/td&gt;    &lt;td&gt;8&lt;/td&gt;    &lt;td&gt;DET&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At NYG&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;NYG&lt;/td&gt;    &lt;td&gt;NYG&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At NO&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;CAR&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At HOU&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;2&lt;/td&gt;    &lt;td&gt;At CIN&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;PIT&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;CLE&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At JAX&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;IND&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At MIA&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;    &lt;td&gt;MIA&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At NE&lt;/td&gt;    &lt;td&gt;11&lt;/td&gt;    &lt;td&gt;BUF&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At OAK&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;SD&lt;/td&gt;    &lt;td&gt;OAK&lt;/td&gt;    &lt;td&gt;SD&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At DEN&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;    &lt;td&gt;DEN&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At ARI&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;    &lt;td&gt;ARI&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;  &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-7411980486300875626?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/7411980486300875626/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-01-2011-nfl-season-week-17.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7411980486300875626'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7411980486300875626'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2012/01/2012-01-01-2011-nfl-season-week-17.html' title='2012-01-01: 2011 NFL Season Week 17'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-V4vbHcG4Nvk/TwCjK85anwI/AAAAAAAAAJE/6mUaQvEQ0zs/s72-c/images.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-7013582859445797364</id><published>2011-12-17T08:47:00.000-05:00</published><updated>2011-12-17T08:47:07.885-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NFL'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><title type='text'>2011-12-15: 2011 NFL Season Week 15</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-zpDq5v1u9FI/TuyRCMSiJ-I/AAAAAAAAAI0/DcEJMwQdSZo/s1600/3ef58cb078c0ef12a5d7beabb9392913.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="151" src="http://1.bp.blogspot.com/-zpDq5v1u9FI/TuyRCMSiJ-I/AAAAAAAAAI0/DcEJMwQdSZo/s200/3ef58cb078c0ef12a5d7beabb9392913.jpg" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;So far this year all three of the prediction algorithms are 68% correct straight up. This is better than the predictions of most of the NFL "experts" such as the guys at &lt;a href="http://espn.go.com/nfl/picks"&gt;ESPN&lt;/a&gt;. Last year we ended up right below 70% correct as well. Breaking the 70% barrier over the season seems to be rather hard to do as seen on the &lt;a href="http://www.thepredictiontracker.com/nflresults.php"&gt;Prediction Tracker&lt;/a&gt;. Looking into the statistics of those games reveals some interesting information. In the majority of those games, the losing team had better box scores but still lost the game. We had thought that incorporating the betting line data this year would have had impact but the accuracy of the straight up predictions is not significantly better than last year.&lt;br /&gt;&lt;br /&gt;The season isn't over yet and anything can happen so here are the predictions for week 15.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;at TB&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at NYG&lt;/td&gt;     &lt;td&gt;10&lt;/td&gt;     &lt;td&gt;WAS&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;  &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;at KC&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;at MIN&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at CHI&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at BUF&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at HOU&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;at IND&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;at STL&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at OAK&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;at DEN&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at PHI&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at ARI&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at SD&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;at SF&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-7013582859445797364?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/7013582859445797364/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-15-2011-nfl-season-week-15.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7013582859445797364'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7013582859445797364'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-15-2011-nfl-season-week-15.html' title='2011-12-15: 2011 NFL Season Week 15'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-zpDq5v1u9FI/TuyRCMSiJ-I/AAAAAAAAAI0/DcEJMwQdSZo/s72-c/3ef58cb078c0ef12a5d7beabb9392913.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-813850509109377128</id><published>2011-12-14T12:36:00.007-05:00</published><updated>2011-12-14T14:13:30.074-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='timemap'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='acm'/><category scheme='http://www.blogger.com/atom/ns#' term='Python'/><title type='text'>2011-12-14 Python &amp; Memento Presentation for the ODU ACM</title><content type='html'>Earlier this semester, I was invited to present &lt;a href="http://www.python.org/"&gt;Python&lt;/a&gt; at an &lt;a href="http://cs.odu.edu/~acm"&gt;ODU ACM&lt;/a&gt; &lt;a href="http://cs.odu.edu/~acm/index.php?topic=29"&gt;meeting&lt;/a&gt;.  I presented a brief overview of the Python language and followed up with a code walk through of the code I use to parse Memento timemaps in my current research.&lt;br /&gt;&lt;br /&gt;Python, of course, has advantages and disadvantages compared to other languages.  Since most ODU undergrads have experience with C++, the presentation presents Python with respect to C++.  Pythons advantages include a fast development cycle and an extensive collection of community libraries.  Its primary disadvantage compared to C++ is execution speed.  My experience is that Python is sometimes over 100 times slower.&lt;br /&gt;&lt;br /&gt;Python's basic syntax and semantics are straight forward, so the presentation focused on the Python equivalents of commonly-used C++ constructs and the differences between static (C++) and dynamic (Python) typing.  Python's implementation of high-level data types (lists, dictionaries, tuples, and sets) and functional code were compared to the complexity of the C++ equivalents.&lt;br /&gt;&lt;br /&gt;&lt;div style="width: 425px; margin-left:auto; margin-right:auto;" id="__ss_10592547"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/ScottAinsworth/odu-acm-python-memento-presentation" title="ODU ACM Python &amp;amp; Memento Presentation"&gt;ODU ACM Python &amp;amp; Memento Presentation&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse10592547" height="355" width="425"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=acm-python-111214113316-phpapp02&amp;amp;stripped_title=odu-acm-python-memento-presentation&amp;amp;userName=ScottAinsworth"&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;param name="wmode" value="transparent"&gt;&lt;embed name="__sse10592547" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=acm-python-111214113316-phpapp02&amp;amp;stripped_title=odu-acm-python-memento-presentation&amp;amp;userName=ScottAinsworth" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" wmode="transparent" height="355" width="425"&gt;&lt;/object&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;To bring all the pieces together, I did a code walk through of the &lt;a href="https://webspace.cs.odu.edu/%7Esainswor/uploads/Downloads/timemap.py"&gt;python.py&lt;/a&gt; module I use to parse &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; timemaps (see the Memento &lt;a href="http://www.mementoweb.org/guide/quick-intro/"&gt;Introduction&lt;/a&gt; and &lt;a href="http://www.mementoweb.org/guide/rfc/ID/"&gt;Internet Draft&lt;/a&gt; for more information).  The module has two classes.  The &lt;strong&gt;TimeMap&lt;/strong&gt; class is a parser and dictionary for timemap data.  The &lt;strong&gt;TimeMapTokenizer&lt;/strong&gt; class is a tokenizer for link-style timemaps.&lt;br /&gt;&lt;br /&gt;To load a timemap, a new instance of TimeMap is created using the timemap's URI, which is the constructor's only argument.  A TimeMapTokenizer instance returns individual tokens, simplifying the parsing code in the &lt;code&gt;get_next_link&lt;/code&gt; function.  TimeMap implements the &lt;code&gt;__getitem__&lt;/code&gt; function, allowing it to act as a Python dictionary.  TimeMapTokenizer implements the &lt;code&gt;__iter__&lt;/code&gt; and &lt;code&gt;next&lt;/code&gt; functions, which the use of Python iteratation constructs over the list of tokens.&lt;br /&gt;&lt;br /&gt;— Scott G. Ainsworth&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-813850509109377128?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/813850509109377128/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-14-python-memento-presentation.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/813850509109377128'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/813850509109377128'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-14-python-memento-presentation.html' title='2011-12-14 Python &amp; Memento Presentation for the ODU ACM'/><author><name>Scott G. Ainsworth</name><uri>http://www.blogger.com/profile/05860551179796856679</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='http://1.bp.blogspot.com/_QW0GAj2ACVM/SlPyfPe_UsI/AAAAAAAAAAY/_xp0tiXXfQw/S220/Photo+4+touched+up,+diffuse+glow+2.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-1071621795678617925</id><published>2011-12-14T11:28:00.004-05:00</published><updated>2012-01-09T22:09:31.252-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='spring 2012'/><category scheme='http://www.blogger.com/atom/ns#' term='cs 495/595'/><title type='text'>2011-12-14: CS 495/595 Web Server Development for Spring 2012</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-WRJquZsVYY8/TujPHg2UlBI/AAAAAAAAAic/CmkUKuQpHH4/s1600/curl-image.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 142px;" src="http://1.bp.blogspot.com/-WRJquZsVYY8/TujPHg2UlBI/AAAAAAAAAic/CmkUKuQpHH4/s200/curl-image.png" alt="" id="BLOGGER_PHOTO_ID_5686022257539060754" border="0" /&gt;&lt;/a&gt;The only WS-DL related class that will be offered in spring 2012 is CS 495/595 "Web Server Development".  I had planned to offer CS 751/851 "Introduction to Digital Libraries, but I've taught that the last two springs and it has been a while since I've taught the web server development class (the last offering was actually from &lt;a href="http://www.cs.odu.edu/%7Emklein/"&gt;Martin Klein&lt;/a&gt; in &lt;a href="http://www.cs.odu.edu/%7Emklein/teaching/cs595-s10/"&gt;spring 2010&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;The premise of this course is that the best way to &lt;span style="font-style: italic;"&gt;really&lt;/span&gt; get to know &lt;a href="http://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol"&gt;HTTP&lt;/a&gt; is to build a fully-functional web server from scratch in the language of your choice.  That sounds simple enough, but it becomes quite challenging, in part because if you do a poor job at design at the beginning you have to live with the consequences the entire semester.  On the other hand, do a good job up front and each assignment will just drop into place (hello, &lt;a href="http://en.wikipedia.org/wiki/Software_design"&gt;software design&lt;/a&gt;).  Along the way, you'll also become quite familiar with reading &lt;a href="http://en.wikipedia.org/wiki/Request_for_Comments"&gt;RFCs&lt;/a&gt; and the &lt;a href="http://en.wikipedia.org/wiki/Representational_state_transfer"&gt;REST&lt;/a&gt; architectural model.&lt;br /&gt;&lt;br /&gt;Take a look at &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/"&gt;past offerings&lt;/a&gt; of the class for an idea of what the structure will be.  The CRNs are &lt;a href="https://www.leoonline.odu.edu/plsql/web/bwckschd.p_disp_detail_sched?term_in=201120&amp;amp;crn_in=35757"&gt;35757&lt;/a&gt; (CS 495) and &lt;a href="https://www.leoonline.odu.edu/plsql/web/bwckschd.p_disp_detail_sched?term_in=201120&amp;amp;crn_in=35758"&gt;35758&lt;/a&gt; (CS 595).  The class will be on Tuesdays, 4:20 -- 7:00 pm in r. 2120.&lt;br /&gt;&lt;br /&gt;--Michael&lt;br /&gt;&lt;br /&gt;2012-01-09 edit: The &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/cs595-s12/"&gt;class homepage&lt;/a&gt; is now available.&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-1071621795678617925?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/1071621795678617925/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-14-cs-495595-web-server.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/1071621795678617925'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/1071621795678617925'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-14-cs-495595-web-server.html' title='2011-12-14: CS 495/595 Web Server Development for Spring 2012'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-WRJquZsVYY8/TujPHg2UlBI/AAAAAAAAAic/CmkUKuQpHH4/s72-c/curl-image.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6521967093114348623</id><published>2011-12-08T04:07:00.004-05:00</published><updated>2011-12-08T04:20:49.982-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Cross Country'/><category scheme='http://www.blogger.com/atom/ns#' term='Cloud'/><category scheme='http://www.blogger.com/atom/ns#' term='Roadtrip'/><category scheme='http://www.blogger.com/atom/ns#' term='Internships'/><category scheme='http://www.blogger.com/atom/ns#' term='Silicon Valley'/><category scheme='http://www.blogger.com/atom/ns#' term='PowerPoint'/><category scheme='http://www.blogger.com/atom/ns#' term='Microsoft'/><title type='text'>2011-12-08: Summer Microsoft Internship</title><content type='html'>It all started in San Francisco airport while waiting to get my luggage on my way to the &lt;a href="http://ws-dl.blogspot.com/2011/03/2011-03-04-personal-digital-archiving.html"&gt;PDA2011&lt;/a&gt; conference. The recruiter from Microsoft called me to inform me that I have been accepted to intern at &lt;a href="http://careers.microsoft.com/careers/en/us/siliconvalley.aspx"&gt;Microsoft Silicon Valley&lt;/a&gt; this summer. I was ecstatic and after a couple of months of bureaucracy and a ton of documents I was ready to leave Norfolk by the end of May. Since I haven’t been on an adventure or a trip for a long time, and since I will definitely need a car in California for the three months of the summer, I decided to drive &lt;a href="https://www.facebook.com/profile.php?id=100002533126429"&gt;my car&lt;/a&gt; all across the continent. I have always wanted to make a road trip like that where I can stop in every city or town along the way, check out their attractions and eat from their authentic cuisines.&lt;br /&gt;&lt;br /&gt;At the same time, our colleague and best friend Moustafa Aly managed to secure a job at &lt;a href="http://www.a2z.com/all-locations/san-francisco/"&gt;Amazon’s engineering office in San Francisco&lt;/a&gt;. So when he knew I was going to drive all the way there he told me: “forget the plane, I will join you!”&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-Mez9cQS8fIU/TpxM0iAXvCI/AAAAAAAABPo/Iw5JBD8pBkY/s1600/252026_200916373286055_100001030803006_553630_24480_n.jpg"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 200px; height: 150px;" src="http://4.bp.blogspot.com/-Mez9cQS8fIU/TpxM0iAXvCI/AAAAAAAABPo/Iw5JBD8pBkY/s200/252026_200916373286055_100001030803006_553630_24480_n.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5664486896690510882" /&gt;&lt;/a&gt;&lt;br /&gt;We left Norfolk on the 24th, set the odometer of the car to 0 and having in mind since we are information retrieval and social networking people we will make our status updates and check-ins on Facebook our trip’s record keeper. We picked the route, filled up the car and drove. From Norfolk, stopping at &lt;a href="http://maps.google.com/maps?q=Richmond,+VA&amp;hl=en&amp;sll=37.0625,-95.677068&amp;sspn=59.597077,135.263672&amp;vpsrc=0&amp;hnear=Richmond,+Virginia&amp;t=m&amp;z=12"&gt;Richmond&lt;/a&gt; and &lt;a href="http://maps.google.com/maps?q=Nashville,+TN&amp;hl=en&amp;sll=37.540725,-77.436048&amp;sspn=0.236838,0.528374&amp;vpsrc=0&amp;hnear=Nashville,+Davidson,+Tennessee&amp;t=m&amp;z=11"&gt;Nashville&lt;/a&gt; we drove through a &lt;a href="http://en.wikipedia.org/wiki/May_21%E2%80%9326,_2011_tornado_outbreak_sequence"&gt;tornado passing Tennessee&lt;/a&gt;, almost ran out of gas in &lt;a href="http://maps.google.com/maps?q=Texas&amp;hl=en&amp;sll=36.16589,-86.784443&amp;sspn=0.482271,1.056747&amp;vpsrc=0&amp;hnear=Texas&amp;t=m&amp;z=6"&gt;Texas&lt;/a&gt; in the middle of no where, changed the &lt;a href="http://www.timetemperature.com/tzus/time_zone.shtml"&gt;clock twice in one day&lt;/a&gt;, eating the best steak I have ever had in Texas and the best burritos on earth in &lt;a href="http://maps.google.com/maps?q=Las+cruces&amp;hl=en&amp;ll=32.319924,-106.763763&amp;spn=0.252416,0.528374&amp;sll=37.540725,-77.436048&amp;sspn=0.236838,0.528374&amp;vpsrc=0&amp;hnear=Las+Cruces,+Dona+Ana,+New+Mexico&amp;t=m&amp;z=12"&gt;Las Cruses&lt;/a&gt;, playing with rockets in &lt;a href="http://en.wikipedia.org/wiki/White_Sands_Missile_Range"&gt;White sands missile range&lt;/a&gt;, passing over the &lt;a href="http://en.wikipedia.org/wiki/Hoover_Dam"&gt;Hoover dam&lt;/a&gt; and the burning the car’s &lt;a href="http://www.autopartswarehouse.com/shop_parts/a-fs-c_compressor/isuzu/rodeo.html"&gt;AC compressor&lt;/a&gt; in the desert of &lt;a href="http://maps.google.com/maps?q=Nevada&amp;hl=en&amp;sll=32.319924,-106.763763&amp;sspn=0.252416,0.528374&amp;vpsrc=0&amp;hnear=Nevada&amp;t=m&amp;z=7"&gt;Nevada&lt;/a&gt; we finally made it to &lt;a href="http://maps.google.com/maps?q=Las+Vegas&amp;hl=en&amp;sll=37.0625,-95.677068&amp;sspn=59.597077,135.263672&amp;vpsrc=0&amp;hnear=Las+Vegas,+Clark,+Nevada&amp;t=m&amp;z=11"&gt;Las Vegas&lt;/a&gt; where we wanted to spend an entire day relaxing. Next day we started driving and after 9 more hours we made it to &lt;a href="http://maps.google.com/maps?q=San+Francisco&amp;hl=en&amp;sll=36.114646,-115.172816&amp;sspn=0.482588,1.056747&amp;vpsrc=0&amp;hnear=San+Francisco,+California&amp;t=m&amp;z=12"&gt;San Francisco&lt;/a&gt; finishing &lt;a href="http://maps.google.com/maps?saddr=Norfolk,+VA&amp;daddr=Richmond,+VA+to:Roanoke,+VA+to:Nashville,+TN+to:Little+Rock,+AR+to:Dallas,+TX+to:White+Sands+Missile+Complex,+NM+to:Las+Cruces,+NM+to:Las+Vegas,+NV+to:San+Francisco,+CA&amp;hl=en&amp;ll=34.885931,-99.360352&amp;spn=29.841241,67.631836&amp;sll=36.879621,-99.360352&amp;sspn=29.130954,67.631836&amp;geocode=FVFMMgIdT_hz-ylFyiJTOpe6iTEKHnrOfxCZqw%3BFXXTPAIdcGti-yntyZlXCRGxiTHFPELibT7Yvw%3BFbq1OAIdzTA8-yn9ZqlqTQxNiDGbmLDby76dJA%3BFQLZJwIdRcbT-ik9kOsTMuxkiDGg2umh0Lk_fQ%3BFXEwEgIdxcV_-imbVh-hNKHShzEXW_MNEPUFNA%3BFYuI9AEdfWg7-ilLl0V79xlMhjGPZ0f2pJvsuQ%3BFdot-wEddUip-SHUC8bKzP9TOg%3BFcQp7QEdeuqi-SlpmAZFxxrehjG9Mj_xHdBtYw%3BFdYQJwIdMJoi-SnRffWkgre-gDGjebPV5tXMOg%3BFVJmQAIdKAe0-CkhAGkAbZqFgDH_rXbwZxNQSg&amp;vpsrc=0&amp;mra=ls&amp;t=m&amp;z=5"&gt;3559.6 miles in 5.5 days&lt;/a&gt;.&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-Xl04Su4zm8w/TpxPehASJhI/AAAAAAAABQA/Ag588W6X-g8/s1600/284292_10150342985566729_502786728_9436712_842477_n.jpg"&gt;&lt;img style="float:left; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 200px; height: 150px;" src="http://3.bp.blogspot.com/-Xl04Su4zm8w/TpxPehASJhI/AAAAAAAABQA/Ag588W6X-g8/s200/284292_10150342985566729_502786728_9436712_842477_n.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5664489816999470610" /&gt;&lt;/a&gt;&lt;br /&gt;Working at Microsoft Silicon Valley definitely has its perks. The location was amazing and the engineers there are really incredible. I joined the &lt;a href="http://www.microsoft.com/en-us/office365/online-software.aspx#fbid=gS9w3De8zua"&gt;office 365&lt;/a&gt; server-side team for PowerPoint where I shared my office with another intern from &lt;a href="http://berkeley.edu/"&gt;UC Berkeley&lt;/a&gt;. Working with this team I had the most liberty I had in years working for companies. We sat together and set the goals I need to reach for this internship and they gave me the entire freedom to pick the way I was going to build it, which is more my style in working. I was supposed to start the implementation of a certain fraction of the distribution and investigate two other things but to my surprise they liked what I did with the first task so they decided to modify my internship goals to finish this project completely, reach ship quality and release it in the next version. With this I passed all the phases of software development from meeting with managers, architects and program managers to setting the design to development to finally quality and integration testing. Finally I had to demo my work to the three department managers to see if this could be incorporated in the next shipping release, and to my delight they were fascinated by it and it will be shipped!&lt;br /&gt;&lt;br /&gt;The first day I attended the orientation and they gave us an overview to what we will be doing this summer and how are we going to be evaluated. Our mentors then came and took us and I was introduced to my team, the &lt;a href="http://en.wikipedia.org/wiki/Microsoft_PowerPoint"&gt;PowerPoint&lt;/a&gt; team. Immediately after that I was introduced to the available projects and I choose the one that was more appealing to me. Immediately after that I was granted permissions to access the codebase. Imagine having the source code of both PowerPoint and the server cloud back-end, it felt awesome! for the next two weeks I tried to break in the thousands of lines of code and produced a prototype proof of concept that I was on the right track. By the end of the first week I set my internship goals with my mentor but after the fast prototype I produced I was called to a meeting with both the test and the product management team, I was representing the development team. They decided to change my goals completely to actually build the entire feature and its backend support from scratch and have the opportunity to ship it. Knowing the task in hand of rebuilding the PowerPoint backend on the cloud with the appropriate interface to match the latest award-wining rich-client application I had to go back to the basics. I had several one-on-ones with the development team of PowerPoint client-side to understand piece by piece the functionality of each module of the application. The problem with a project like PowerPoint that it is fairly old and fairly stable with more than 20+ years of development and thousands of legacy code. I was completely lost in the beginning but my mentor didn't let me stumble much, I was practically staying in his office the first couple of weeks. We used C++ and C in the backend with javascript and C# for the matching interface. This was the trickiest part, the ability to match functionalities between two very different frameworks. At a certain point I found a severe gap in the design document related to the functionality. I talked with my manager and he told me a change like the one you want in the design document needs to be escalated. A couple of hours later I was sitting in a room full of Microsoft's elite developers, testers, PMs and managers, the least of which has 7 years work experience under his belt,...and me! That what I loved about Microsoft, even though I was just an intern I owned the project and they appreciated that. I explained my case and it was approved and the design document was changed! I was so proud of myself that day.&lt;br /&gt;&lt;br /&gt;The atmosphere within the office was relaxing, cool, upbeat and always challenging. I can fairly say I was spoiled this summer. I was residing in the corporate housing complexes where I got a &lt;a href="http://www.oakwood.com/cms/apartments.html"&gt;spacious studio apartment&lt;/a&gt; fully furnished with maid service that come clean weekly! Courts, swimming pool and a huge hot tub all provided for free within the apartment complex. Every other week the recruiters and the PR managers created an event, party or outing for all the interns on campus. We went hiking, bowling, watching movies and they even flew us to Seattle to visit the headquarters for the summer intern event. They paid flight tickets, the luxury hotel and even a car rental. &lt;a href="http://en.wikipedia.org/wiki/Steven_Sinofsky"&gt;Steven Sinofsky&lt;/a&gt; gave us a wonderful presentation where they show us classified sneak peeks to the all-new amazing &lt;a href="http://www.youtube.com/watch?v=p92QfWOw88I"&gt;Windows 8&lt;/a&gt; and I was genuinely impressed. At the company store we got lots of t-shirts, games and gadgets with our employee discount. After that they rented the Zoo for us since we were about 1000 interns from all over the country and they got us the “&lt;a href="http://www.davematthewsband.com/"&gt;Dave Matthews&lt;/a&gt;” band and gave each one of us a &lt;a href="http://blog.seattlepi.com/microsoft/2011/08/01/microsoft-interns-get-dave-matthews-concert-xboxes-spoiled/"&gt;brand new xbox360 with Kinect&lt;/a&gt;!&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-hlzfDFNu6LQ/TpxNkBrmtpI/AAAAAAAABP0/tWHAPahlqeA/s1600/263253_10150753944850323_533655322_19776508_2643495_n.jpg"&gt;&lt;img style="float:right; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 134px; height: 200px;" src="http://1.bp.blogspot.com/-hlzfDFNu6LQ/TpxNkBrmtpI/AAAAAAAABP0/tWHAPahlqeA/s200/263253_10150753944850323_533655322_19776508_2643495_n.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5664487712647198354" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;It was definitely unique and rewarding to work with all those interns from the top universities all over the country: &lt;a href="http://www.google.com/url?sa=t&amp;rct=j&amp;q=mit&amp;source=web&amp;cd=1&amp;ved=0CD4QFjAA&amp;url=http%3A%2F%2Fweb.mit.edu%2F&amp;ei=7nrgTpGGPIfk0QGZ2uHBBw&amp;usg=AFQjCNFGEpEnwRBMPQvRT7ueDZqPQAU23g"&gt;MIT&lt;/a&gt;, UC Berkeley, &lt;a href="http://www.google.com/url?sa=t&amp;rct=j&amp;q=stanford%20cs&amp;source=web&amp;cd=1&amp;ved=0CCwQFjAA&amp;url=http%3A%2F%2Fcs.stanford.edu%2F&amp;ei=CnvgTty6HKjz0gHmsMjSBw&amp;usg=AFQjCNFPfptXyf0-jyKcLUfh36TI716RMg"&gt;Stanford&lt;/a&gt;, …etc. I asked around and I found that I was the only representative from ODU so I was definitely proud and tried to behave. Me and the other interns became friends and since most of us are residing on the same apartment complex we gathered almost every night and on the weekends we went and discovered the city and the surrounding area. Unfortunately I didn’t join them in the &lt;a href="http://en.wikipedia.org/wiki/Yosemite_National_Park"&gt;Yosemite&lt;/a&gt; hiking/camping trip, as I was sick that day. One day we all decided to wear suits and sunglasses all day at work and call it "&lt;a href="http://www.businessinsider.com/the-ultimate-guide-to-learning-brogramming-the-hard-way-2011-9"&gt;Brogramming&lt;/a&gt;" day. Someone took a photo of us and it gone &lt;a href="https://www.facebook.com/photo.php?fbid=258405124188873&amp;set=a.215440105152042.69259.215387745157278&amp;type=3"&gt;viral on twitter and facebook&lt;/a&gt;!&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-GlUybb-kDhU/TpxQQjs-aaI/AAAAAAAABQM/cy3tM9yYEbk/s1600/263231_10150748724315323_533655322_19715472_161298_n.jpg"&gt;&lt;img style="float:left; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 134px; height: 200px;" src="http://3.bp.blogspot.com/-GlUybb-kDhU/TpxQQjs-aaI/AAAAAAAABQM/cy3tM9yYEbk/s200/263231_10150748724315323_533655322_19715472_161298_n.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5664490676717250978" /&gt;&lt;/a&gt;&lt;br /&gt;In conclusion I feel honored and blessed for being able to work at this wonderful fascinating place with all those extremely intelligent colleagues. My manager/team lead told me on my first day one thing that I believe it changed everything. He said you were only an intern during the 2-hour orientation session, now consider yourself a full time software engineer and own your work. This definitely helped me to shine, participate, own my work, suggest enhancements, which actually were considered, and we changed the design document. Now, I can proudly say that my product is being used currently by millions of users; probably you are using it right now!&lt;br /&gt;&lt;br /&gt;-- Hany SalahEldeen&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6521967093114348623?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6521967093114348623/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-08-summer-microsoft-internship.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6521967093114348623'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6521967093114348623'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-08-summer-microsoft-internship.html' title='2011-12-08: Summer Microsoft Internship'/><author><name>Hany SalahEldeen</name><uri>http://www.blogger.com/profile/06304841890215312435</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://1.bp.blogspot.com/-U-tAHHzl3Ok/TxNag880ahI/AAAAAAAABgc/-mvRIYmi_3Q/s220/339232_10150962841245323_533655322_21244974_36126378_o.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-Mez9cQS8fIU/TpxM0iAXvCI/AAAAAAAABPo/Iw5JBD8pBkY/s72-c/252026_200916373286055_100001030803006_553630_24480_n.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5531987442795598239</id><published>2011-12-07T22:26:00.001-05:00</published><updated>2011-12-07T22:27:04.482-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NFL'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><title type='text'>2011-12-07: 2011 NFL Season Week 14</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-Us6Qza4nWcU/TuAT9FUnj1I/AAAAAAAAAIo/v1ObWU1TS6k/s1600/images.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" src="http://1.bp.blogspot.com/-Us6Qza4nWcU/TuAT9FUnj1I/AAAAAAAAAIo/v1ObWU1TS6k/s1600/images.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Week 14 of the 2011 NFL season is upon us. Talk of play-off teams and Superbowl probabilities fill the airwaves even more than Christmas music. Sitting in traffic on the drive home from work tonight I was listening to a few on-air personalities discussing Green Bay and New England for the Superbowl. Green Bay has already clinched a playoff berth and many people would say they are headed to the Superbowl this year. The comment that caught my attention was that the defense for both teams was terrible this year and the only reason they were doing well this year is that their offenses were so good that they could "outscore their mistakes".&lt;br /&gt;&lt;br /&gt;This led me to think about the Colts without Peyton Manning this year. For the past 3 or 4 years the Colts with Manning as their quarterback have dominated the sport. It would seem that they built the entire team around Manning. The Colts would run up the score on offense and then the opposing team would be forced to attempt to pass often just to catch up. Then the Colts defense would focus on the opposing teams quarterback to keep him from making plays. Now this year without Manning the Colts have no game. Are Green Bay and New England in a similar situation? &lt;br /&gt;&lt;br /&gt;Contemplating statistics during rush hour traffic is a good way to become a statistic so I did not get much more in depth listening to the show, but after arriving home I ran some SQL queries to check the veracity of the claims made by the radio show.&lt;br /&gt;&lt;br /&gt;Indeed it is true that the defense for both Green Bay and New England  have given up more than the average number of yards this year. They are both almost dead last in defensive performance. Here is a  list of the teams with the average number of yards given up per play on  both passing and rushing plays. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;table border="1" cellpadding="0" class="MsoNormalTable" style="mso-cellspacing: 1.5pt; mso-yfti-tbllook: 1184;"&gt;&lt;tbody&gt;&lt;tr style="mso-yfti-firstrow: yes; mso-yfti-irow: 0;"&gt;   &lt;td style="background: silver; padding: .75pt .75pt .75pt .75pt;"&gt;&lt;div class="MsoNormal"&gt;Team&lt;/div&gt;&lt;/td&gt;   &lt;td style="background: silver; padding: .75pt .75pt .75pt .75pt;"&gt;&lt;div class="MsoNormal"&gt;Yards given up per play&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 1;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Atlanta&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;4.3638&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 2;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Pittsburgh&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;4.7886&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 3;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Baltimore&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;4.8224&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 4;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Houston&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;4.9090&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 5;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Cincinnati&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.1182&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 6;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;San   Francisco&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.1645&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 7;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;New   York&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.1786&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 8;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Cleveland&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.2142&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 9;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Jacksonville&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.2292&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 10;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Seattle&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.3491&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 11;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Tennessee&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.3568&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 12;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Washington&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.4359&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 13;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Detroit&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.5313&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 14;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Arizona&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.5506&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 15;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Miami&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.5726&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 16;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Denver&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.6300&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 17;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Kansas   City&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.6887&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 18;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Chicago&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.6887&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 19;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Oakland&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.7423&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 20;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Dallas&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.7464&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 21;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;San   Diego&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.7867&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 22;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Minnesota&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.8281&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 23;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;St.   Louis&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.8436&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 24;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Indianapolis&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;5.8923&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 25;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Philadelphia&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.0145&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 26;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Buffalo&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.0374&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 27;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;New   York&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.0508&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 28;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;New   Orleans&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.0600&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 29;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Carolina&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.3130&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 30;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;New   England&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.3642&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 31;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Tampa   Bay&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.4829&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr style="mso-yfti-irow: 32; mso-yfti-lastrow: yes;"&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;Green   Bay&lt;/div&gt;&lt;/td&gt;   &lt;td style="padding: .75pt .75pt .75pt .75pt;" valign="top"&gt;&lt;div class="MsoNormal"&gt;6.5041&lt;/div&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;I have a feeling that a team with a balanced offense and a good pass defense like Pittsburgh or Baltimore could give New England and/or Green Bay a tough time in the post season but maybe we will cover that next week.&lt;br /&gt;&lt;br /&gt;The predictions for week 14 are:&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at PIT&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at BAL&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;     &lt;td&gt;at CIN&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;      &lt;td&gt;HOU&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at GB&lt;/td&gt;     &lt;td&gt;15&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;      &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at NYJ&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;      &lt;td&gt;KC&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at DET&lt;/td&gt;      &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;      &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;at TEN&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;at MIA&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;15&lt;/td&gt;     &lt;td&gt;at WAS&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;      &lt;td&gt;NE&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;at CAR&lt;/td&gt;      &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at JAX&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;      &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;JAX&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;SF&lt;/td&gt;      &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;at ARI&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;      &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at SD&lt;/td&gt;     &lt;td&gt;12&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at DAL&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;      &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at SEA&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;        &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5531987442795598239?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5531987442795598239/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-07-2011-nfl-season-week-14.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5531987442795598239'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5531987442795598239'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-07-2011-nfl-season-week-14.html' title='2011-12-07: 2011 NFL Season Week 14'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-Us6Qza4nWcU/TuAT9FUnj1I/AAAAAAAAAIo/v1ObWU1TS6k/s72-c/images.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-8408085472430744601</id><published>2011-12-01T13:08:00.000-05:00</published><updated>2011-12-01T13:08:37.757-05:00</updated><title type='text'>2011-12-01: 2011 NFL Season Week 13</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-liiUVwRLVIY/Ttew0Y6NkeI/AAAAAAAAAIg/-MFsl80Eli8/s1600/images.jpg" imageanchor="1" style="clear:right; float:right; margin-left:1em; margin-bottom:1em"&gt;&lt;img border="0" height="225" width="225" src="http://4.bp.blogspot.com/-liiUVwRLVIY/Ttew0Y6NkeI/AAAAAAAAAIg/-MFsl80Eli8/s320/images.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;Week 13 of the 2011 NFL season is upon us. This week New England is a 20 point favorite over Indianapolis. 20 points is rather rather significant for a line value. In fact since 2002 there have only been six games with a line value of 20 or greater. Of those six games, New England was the favorite in five of them. In none of the five games did New England cover the spread but they came close to covering the spread in the 2007 game against Miami winning by 21 points with a 22 point line value.   &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;PHI &lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;at SEA&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;TEN&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;at BUF&lt;/td&gt;    &lt;td&gt;BUF&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at CHI&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;MIA&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;at OAK&lt;/td&gt;    &lt;td&gt;MIA&lt;/td&gt;    &lt;td&gt;OAK&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at PIT&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;CIN&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;1&lt;/td&gt;    &lt;td&gt;at CLE&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;NYJ&lt;/td&gt;    &lt;td&gt;1&lt;/td&gt;    &lt;td&gt;WAS&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at HOU&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;    &lt;td&gt;HOU&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;CAR&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;at TB&lt;/td&gt;    &lt;td&gt;TB&lt;/td&gt;    &lt;td&gt;CAR&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at NO&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;DET&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At MIN&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;DEN&lt;/td&gt;    &lt;td&gt;DEN&lt;/td&gt;    &lt;td&gt;DEN&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at SF&lt;/td&gt;    &lt;td&gt;10&lt;/td&gt;    &lt;td&gt;STL&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;8&lt;/td&gt;    &lt;td&gt;at ARI&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;GB&lt;/td&gt;    &lt;td&gt;2&lt;/td&gt;    &lt;td&gt;NYG&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;10&lt;/td&gt;    &lt;td&gt;IND&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;SD&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;SD&lt;/td&gt;    &lt;td&gt;SD&lt;/td&gt;  &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-8408085472430744601?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/8408085472430744601/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-01-2011-nfl-season-week-13.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8408085472430744601'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8408085472430744601'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/12/2011-12-01-2011-nfl-season-week-13.html' title='2011-12-01: 2011 NFL Season Week 13'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-liiUVwRLVIY/Ttew0Y6NkeI/AAAAAAAAAIg/-MFsl80Eli8/s72-c/images.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6139241573519526647</id><published>2011-11-24T15:31:00.001-05:00</published><updated>2011-11-25T10:17:35.085-05:00</updated><title type='text'>2011-11-24: 2011 NFL Season Week 12</title><content type='html'>Happy Thanksgiving!&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-hSCT7fXuZzk/Ts6kLrnpT4I/AAAAAAAAAIU/RIAhI60JK88/s1600/images.jpg" imageanchor="1" style="clear:right; float:right; margin-left:1em; margin-bottom:1em"&gt;&lt;img border="0" height="225" width="225" src="http://4.bp.blogspot.com/-hSCT7fXuZzk/Ts6kLrnpT4I/AAAAAAAAAIU/RIAhI60JK88/s320/images.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;I apologize for posting these a little late but I have been cooking food for the past three days. When I was not cooking I was reading papers about modifications to Support Vector Models to get some ideas to improve the accuracy of our predictions. &lt;br /&gt;&lt;a href="http://dl.acm.org/citation.cfm?id=1148205"&gt;Adapting Ranking SVM to Document Retrieval&lt;/a&gt; concentrated on a modification of the hinge loss function when training the model to increase accuracy. &lt;a href="http://www.mitpressjournals.org/doi/abs/10.1162/neco.2007.19.5.1155"&gt;Training a Support Vector Machine in the Primal&lt;/a&gt; points out that much literature jumps right to the dual optimization aspect of SVMs and does not pay enough attention to the primal problem. A portion of the paper mentions replacing the hinge loss function with one that is differentiable such as the Huber loss function.&lt;br /&gt;&lt;br /&gt;While experimenting with SVM training I observed an interesting data point. Using NFL statistics from 2002 to 2010, one of the training methods assigned the following weights to the teams.&lt;br /&gt;&lt;table&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 width=64 style='height:15.0pt;width:48pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right width=67 style='width:50pt'&gt;&lt;a name=test&gt;1.1276&lt;/a&gt;&lt;/td&gt;   &lt;td width=148 style='width:111pt'&gt;&lt;a name=test2.&gt;Indianapolis Colts&lt;/a&gt;&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;1.1055&lt;/td&gt;   &lt;td&gt;New England Patriots&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.5704&lt;/td&gt;   &lt;td&gt;Philadelphia Eagles&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.4269&lt;/td&gt;   &lt;td&gt;Pittsburgh Steelers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.3317&lt;/td&gt;   &lt;td&gt;Tennessee Titans&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.3184&lt;/td&gt;   &lt;td&gt;San Diego Chargers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.2922&lt;/td&gt;   &lt;td&gt;Baltimore Ravens&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.2248&lt;/td&gt;   &lt;td&gt;New Orleans Saints&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.1976&lt;/td&gt;   &lt;td&gt;Green Bay Packers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.1718&lt;/td&gt;   &lt;td&gt;Jacksonville Jaguars&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.1621&lt;/td&gt;   &lt;td&gt;Tampa Bay Buccaneers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.0957&lt;/td&gt;   &lt;td&gt;Carolina Panthers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.0946&lt;/td&gt;   &lt;td&gt;Atlanta Falcons&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.0877&lt;/td&gt;   &lt;td&gt;New York Jets&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.0704&lt;/td&gt;   &lt;td&gt;Chicago Bears&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;0.0504&lt;/td&gt;   &lt;td&gt;New York Giants&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.0569&lt;/td&gt;   &lt;td&gt;Dallas Cowboys&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.0689&lt;/td&gt;   &lt;td&gt;Buffalo Bills&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.0740&lt;/td&gt;   &lt;td&gt;Miami Dolphins&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.0812&lt;/td&gt;   &lt;td&gt;Houston Texans&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.1085&lt;/td&gt;   &lt;td&gt;Denver Broncos&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.1752&lt;/td&gt;   &lt;td&gt;Cincinnati Bengals&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.2420&lt;/td&gt;   &lt;td&gt;Kansas City Chiefs&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.3457&lt;/td&gt;   &lt;td&gt;Cleveland Browns&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.3631&lt;/td&gt;   &lt;td&gt;Minnesota Vikings&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.3634&lt;/td&gt;   &lt;td&gt;Seattle Seahawks&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.4150&lt;/td&gt;   &lt;td&gt;Arizona Cardinals&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.5037&lt;/td&gt;   &lt;td&gt;St. Louis Rams&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.5121&lt;/td&gt;   &lt;td&gt;Oakland Raiders&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.5633&lt;/td&gt;   &lt;td&gt;Washington Redskins&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.6925&lt;/td&gt;   &lt;td&gt;San Francisco 49ers&lt;/td&gt;  &lt;/tr&gt;&lt;tr height=20 style='height:15.0pt'&gt;   &lt;td height=20 style='height:15.0pt'&gt;&lt;/td&gt;   &lt;td class=xl65 align=right&gt;-0.7623&lt;/td&gt;   &lt;td&gt;Detroit Lions&lt;/td&gt;  &lt;/tr&gt;&lt;/table&gt;&lt;br /&gt;Well it is time for me to break out the whipping cream and make some homemade whip cream for the pies I baked the other day. Here are the picks for this week.&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;at DET&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at DAL&lt;/td&gt;     &lt;td&gt;8&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at BAL&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;  &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at STL&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at NYJ&lt;/td&gt;     &lt;td&gt;1&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at CIN&lt;/td&gt;     &lt;td&gt;8&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;at JAX&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;  &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;at IND&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at TEN&lt;/td&gt;     &lt;td&gt;20&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at ATL&lt;/td&gt;     &lt;td&gt;11&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at OAK&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at SEA&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;     &lt;td&gt;WAS&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;at PHI&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at SD&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;11&lt;/td&gt;     &lt;td&gt;at KC&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at NO&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6139241573519526647?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6139241573519526647/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-24-2011-nfl-season-week-12.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6139241573519526647'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6139241573519526647'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-24-2011-nfl-season-week-12.html' title='2011-11-24: 2011 NFL Season Week 12'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-hSCT7fXuZzk/Ts6kLrnpT4I/AAAAAAAAAIU/RIAhI60JK88/s72-c/images.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3597588216530726814</id><published>2011-11-17T16:02:00.000-05:00</published><updated>2011-11-17T16:02:36.263-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Football'/><title type='text'>2011-11-17: 2011 NFL Season Week 11</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-zHfNwY_ANOI/TsVuvhJMUpI/AAAAAAAAAIE/3h7nfnBF4lI/s1600/NFL-Week-11.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-zHfNwY_ANOI/TsVuvhJMUpI/AAAAAAAAAIE/3h7nfnBF4lI/s1600/NFL-Week-11.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;Thursday Night Football, this week the NY Jets play at Denver. The Jets have a number of players on the injured list this week. Even with those injuries all three of our algorithms picked the Jets to win on Thursday.&lt;br /&gt;&lt;br /&gt;The Jets injury list is not as bad as some of the other teams. Philadelphia's quarterback, Vick has two broken ribs and has not been at practice all week. Kansas City's quarterback Matt Cassel underwent hand surgery and will probably be out for the rest of the season. &lt;br /&gt;&lt;br /&gt;A weakness of our algorithms is that they are heavily based on this years performance to date. A major injury to an important player that may or may not have an impact of game performance is not really taken into account. That is one of the reasons we have incorporated the Line data this year. Hoping that the "Collective Intelligence" of the crowd would help to point out teams that may not perform differently. &lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt; &lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;  &lt;tr&gt;    &lt;td&gt;NYJ&lt;/td&gt;    &lt;td&gt;4.5&lt;/td&gt;    &lt;td&gt;at DEN&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at ATL&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;BUF&lt;/td&gt;    &lt;td&gt;3.6&lt;/td&gt;    &lt;td&gt;at MIA&lt;/td&gt;    &lt;td&gt;MIA&lt;/td&gt;    &lt;td&gt;BUF&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at BAL&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;CIN&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;at CLE&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at MIN&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;OAK&lt;/td&gt;    &lt;td&gt;MIN&lt;/td&gt;    &lt;td&gt;OAK&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at DET&lt;/td&gt;    &lt;td&gt;11&lt;/td&gt;    &lt;td&gt;CAR&lt;/td&gt;    &lt;td&gt;DET&lt;/td&gt;    &lt;td&gt;DET&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at GB&lt;/td&gt;    &lt;td&gt;18&lt;/td&gt;    &lt;td&gt;TB&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;8&lt;/td&gt;    &lt;td&gt;WAS&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at SF&lt;/td&gt;    &lt;td&gt;8&lt;/td&gt;    &lt;td&gt;ARI&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at STL&lt;/td&gt;    &lt;td&gt;2&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;    &lt;td&gt;SEA&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at CHI&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;SD&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at NYG&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;    &lt;td&gt;NYG&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;at NE&lt;/td&gt;    &lt;td&gt;8&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;  &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3597588216530726814?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3597588216530726814/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-17-2011-nfl-season-week-11.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3597588216530726814'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3597588216530726814'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-17-2011-nfl-season-week-11.html' title='2011-11-17: 2011 NFL Season Week 11'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-zHfNwY_ANOI/TsVuvhJMUpI/AAAAAAAAAIE/3h7nfnBF4lI/s72-c/NFL-Week-11.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-4050163856620979853</id><published>2011-11-10T20:20:00.002-05:00</published><updated>2011-11-10T21:23:40.519-05:00</updated><title type='text'>2011-11-10: 2011 NFL Season Week 10</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://4.bp.blogspot.com/-vmEYO5DaGIM/TrxyBv_9z1I/AAAAAAAAAH8/0iPOV_T72SE/s1600/NFL-Sched-Week-10.jpg" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="160" src="http://4.bp.blogspot.com/-vmEYO5DaGIM/TrxyBv_9z1I/AAAAAAAAAH8/0iPOV_T72SE/s320/NFL-Sched-Week-10.jpg" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;Thursday Night Football is back! The match-up for tonight features the San Diego Chargers at home vs the Oakland Raiders. &lt;br /&gt;&lt;br /&gt;This is a very close matchup according to the stats. San Diego has a offensive pass efficiency of 7.2 and Oakland 6.7. Oakland has a better run game but not by much. The defensive ratings are almost exactly the same with San Diego leading by a little bit.&lt;br /&gt;&lt;br /&gt;The SVM and Neural Network both chose San Diego to win but the PageRank algorithm decided Oakland was a better choice.&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At SAN&lt;/td&gt;     &lt;td&gt;8.5&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;SAN&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;   &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;PIT&lt;/td&gt;    &lt;td&gt;1.5&lt;/td&gt;    &lt;td&gt;At CIN&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;    &lt;td&gt;PIT&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At KC&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;DEN&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;    &lt;td&gt;KC&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At IND&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;    &lt;td&gt;JAX&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At DAL&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;BUF&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;    &lt;td&gt;DAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;HOU&lt;/td&gt;    &lt;td&gt;7&lt;/td&gt;    &lt;td&gt;At TB&lt;/td&gt;    &lt;td&gt;HOU&lt;/td&gt;    &lt;td&gt;HOU&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At CAR&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;    &lt;td&gt;Car&lt;/td&gt;    &lt;td&gt;TEN&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At MIA&lt;/td&gt;    &lt;td&gt;5&lt;/td&gt;    &lt;td&gt;WAS&lt;/td&gt;    &lt;td&gt;MIA&lt;/td&gt;    &lt;td&gt;WAS&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At ATL&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;    &lt;td&gt;ATL&lt;/td&gt;    &lt;td&gt;NO&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At CHI&lt;/td&gt;    &lt;td&gt;6&lt;/td&gt;    &lt;td&gt;DET&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;    &lt;td&gt;CHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At CLE&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;STL&lt;/td&gt;    &lt;td&gt;CLE&lt;/td&gt;    &lt;td&gt;STL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At PHI&lt;/td&gt;    &lt;td&gt;11&lt;/td&gt;    &lt;td&gt;ARI&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;    &lt;td&gt;PHI&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;4&lt;/td&gt;    &lt;td&gt;At SEA&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;    &lt;td&gt;BAL&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;NYG&lt;/td&gt;    &lt;td&gt;3&lt;/td&gt;    &lt;td&gt;At SF&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;    &lt;td&gt;SF&lt;/td&gt;  &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;10&lt;/td&gt;    &lt;td&gt;At NYJ&lt;/td&gt;    &lt;td&gt;NE&lt;/td&gt;    &lt;td&gt;NYJ&lt;/td&gt; &lt;/tr&gt; &lt;tr&gt;    &lt;td&gt;At GB&lt;/td&gt;    &lt;td&gt;14&lt;/td&gt;    &lt;td&gt;MIN&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;    &lt;td&gt;GB&lt;/td&gt;  &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-4050163856620979853?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/4050163856620979853/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-10-2011-nfl-season-week-10.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4050163856620979853'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4050163856620979853'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-10-2011-nfl-season-week-10.html' title='2011-11-10: 2011 NFL Season Week 10'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-vmEYO5DaGIM/TrxyBv_9z1I/AAAAAAAAAH8/0iPOV_T72SE/s72-c/NFL-Sched-Week-10.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3801948925135563579</id><published>2011-11-10T11:27:00.000-05:00</published><updated>2011-11-10T11:27:55.667-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Warrick'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><category scheme='http://www.blogger.com/atom/ns#' term='Archive Facebook'/><title type='text'>2011-11-10: Day in the Life of a Computer Scientist</title><content type='html'>&lt;a href='http://www.odu.edu/'&gt;Old Dominion University&lt;/a&gt; has a &lt;a href='http://www.cs.odu.edu/~brunelle/cs110/'&gt;freshmen computer science course&lt;/a&gt; that focuses on what it means to be a computer scientist. This course discusses career opportunities, current research being performed, and serves to debunk myths and misconceptions about the field of computer science. Such myths include: we never talk to humans, we code our entire lives away, and we are nocturnal. I was invited to be a guest lecturer for the class last night. Even though the last myth is sometimes true, I did my best to touch on each of these talking points during the presentation embedded below.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_10103063"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/jbrunelle008/day-in-the-life-of-a-computer-scientist" title="Day in the Life of a Computer Scientist"&gt;Day in the Life of a Computer Scientist&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse10103063" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=cs110pres-111110060345-phpapp02&amp;stripped_title=day-in-the-life-of-a-computer-scientist&amp;userName=jbrunelle008" /&gt;&lt;param name="allowFullScreen" value="true"/&gt;&lt;param name="allowScriptAccess" value="always"/&gt;&lt;embed name="__sse10103063" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=cs110pres-111110060345-phpapp02&amp;stripped_title=day-in-the-life-of-a-computer-scientist&amp;userName=jbrunelle008" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/jbrunelle008"&gt;Justin Brunelle&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;The first topics I spoke about in the presentation generated the majority of questions and discussion. I spoke first about the digital preservation work being performed in the WS-DL group at ODU. This, of course, included discussing the &lt;a href='https://addons.mozilla.org/en-US/firefox/addon/archivefacebook/'&gt;ArchiveFacebok&lt;/a&gt;, &lt;a href='http://warrick.cs.odu.edu/'&gt;Warrick&lt;/a&gt;, and &lt;a href='http://www.mementoweb.org/'&gt;Memento&lt;/a&gt; projects at a cursory level. During our discussion, we hit on the &lt;a href='http://blog.dshr.org/2011_09_01_archive.html'&gt;issues of copyright&lt;/a&gt; and &lt;a href='http://en.wikipedia.org/wiki/Web_crawling'&gt;web crawling&lt;/a&gt;, and why, as computer scientists, we find these problems interesting. We briefly talked about revisitation policies and change-rate studies of web pages that are important for search engines and archival methods. (Interested readers should direct their attention to &lt;a href='http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.18.1519&amp;rep=rep1&amp;type=pdf'&gt;Cho and Garcia-Molina's work (1999)&lt;/a&gt; for a canonical study on recrawl and page change rates.) We also discussed why computing theory (not just development) is important in the current research being performed in the academic and industrial communities. &lt;br /&gt;&lt;br /&gt;The remainder of my talk was a description of what I do on a daily basis as a professional computer scientist. I mentioned that I worked at &lt;a href='http://www.mitre.org/'&gt;The MITRE Corporation&lt;/a&gt; as a developer and researcher, and discussed what my job entails. For example, I practice &lt;a href='http://ws-dl.blogspot.com/2011/11/2011-11-05-agile-engineering-odus-acm.html'&gt;Agile engineering&lt;/a&gt;, work with people on a daily basis, and probably only spend less than a quarter of my time in actual development. The remainder of my time is spent in testing cycles, working with customers to find direction for products, writing documentation, and other "non-coding" aspects of software development. Further, I discussed that MITRE is unique company in that it is a &lt;a href='http://en.wikipedia.org/wiki/List_of_federally_funded_research_and_development_centers'&gt;Federally Funded Research and Development Center (FFRDC)&lt;/a&gt;, and supports the US government in an advisory roll. This point illustrated that there are variety of opportunities available to computer scientists, and not all of them are at traditional corporations.&lt;br /&gt;&lt;br /&gt;My lecture was meant to illustrate that a professional developer doesn't sit in a dark cubicle all night hammering out code, and goes weeks without human interaction. More importantly, this presentation provided examples of work being done in industry and academia, and how the degree they are earning will benefit them in their career.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;--Justin F. Brunelle&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3801948925135563579?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3801948925135563579/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-10-day-in-life-of-computer.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3801948925135563579'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3801948925135563579'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-10-day-in-life-of-computer.html' title='2011-11-10: Day in the Life of a Computer Scientist'/><author><name>Justin F Brunelle</name><uri>http://www.blogger.com/profile/00580381835470799911</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_AmWWXD7g2JA/TG0DJ_mEneI/AAAAAAAAAAM/_AvbhphHU8I/S220/2010-05-24+17.06.03.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6250655037567525389</id><published>2011-11-05T23:21:00.001-04:00</published><updated>2011-11-06T13:20:39.011-05:00</updated><title type='text'>2011-11-4: 2011 NFL Season Week 9</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-pR8eb8NRQVQ/TrX3dI0Br9I/AAAAAAAAAH0/AjH6oVFfIt4/s1600/nfl-week-91-120x120.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" src="http://3.bp.blogspot.com/-pR8eb8NRQVQ/TrX3dI0Br9I/AAAAAAAAAH0/AjH6oVFfIt4/s1600/nfl-week-91-120x120.png" /&gt;&lt;/a&gt;&lt;/div&gt;Week 9 is the last of the bye weeks for this year. Two games that have a higher level of chatter this week and should be good games to watch are the New England vs Giants game and the Pittsburgh vs Baltimore game. &lt;br /&gt;&lt;br /&gt;The Patriots with Brady and the Giants with Manning, both have potent veteran quarterbacks and these game will probably play on the passing efficiency of both teams. The offensive passing efficiency of both teams is comparable at about 7.8 yards, however the Giants pass defense is better with only 5.9 yards given up compared to 7.5 for New England.&lt;br /&gt;&lt;br /&gt;The rhetoric for the Baltimore vs Pittsburgh game has been rather lively. The Baltimore defense is possibly one of the best this year with a pass defense of only 4.8 yards given up although the offense is has not been stellar with a pass efficiency of 5.8 yards. Pittsburgh's pass offense is better, rated at 7.02 yards while their pass defense, while still decent, is not as good as Baltimore's with a rating of 5.1 yards. &lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt; &lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;        &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;   &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                 &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;0.5&lt;/td&gt;     &lt;td&gt;At IND&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NO&lt;/td&gt;     &lt;td&gt;12&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At HOU&lt;/td&gt;     &lt;td&gt;9&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At BUF&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At KC&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;4.5&lt;/td&gt;     &lt;td&gt;At WAS&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;  &lt;td&gt;At DAL&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At OAK&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;  &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At TEN&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At ARI&lt;/td&gt;     &lt;td&gt;7&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NE&lt;/td&gt;     &lt;td&gt;6&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At SD&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;  &lt;td&gt;At PIT&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At PHI&lt;/td&gt;     &lt;td&gt;8&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6250655037567525389?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6250655037567525389/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-4-2011-nfl-season-week-9.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6250655037567525389'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6250655037567525389'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-4-2011-nfl-season-week-9.html' title='2011-11-4: 2011 NFL Season Week 9'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-pR8eb8NRQVQ/TrX3dI0Br9I/AAAAAAAAAH0/AjH6oVFfIt4/s72-c/nfl-week-91-120x120.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-4303315738317715581</id><published>2011-11-05T06:59:00.000-04:00</published><updated>2011-11-05T06:59:26.907-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='agile'/><category scheme='http://www.blogger.com/atom/ns#' term='acm'/><category scheme='http://www.blogger.com/atom/ns#' term='MITRE'/><title type='text'>2011-11-05: Agile Engineering - ODU's ACM Meeting</title><content type='html'>I was invited to present an overview on &lt;a href="http://en.wikipedia.org/wiki/Agile_software_development"&gt;Agile Development&lt;/a&gt; to &lt;a href="http://www.odu.edu/"&gt;Old Dominion University&lt;/a&gt;'s &lt;a href="http://www.cs.odu.edu/%7Eacm/"&gt;ACM chapter&lt;/a&gt;. More specifically, I gave an overview of the &lt;a href="http://en.wikipedia.org/wiki/Scrum_%28development%29"&gt;Scrum method&lt;/a&gt;. My work in &lt;a href="http://www.mitre.org/"&gt;MITRE&lt;/a&gt;'s Agile Engineering department has allowed me to practice Agile methodologies in the work force. Through this presentation, I shared my experiences with the members of the ACM.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-19kKCCIiKhw/TrUWyxLjOJI/AAAAAAAAAK4/oCPbog6rldo/s1600/ACM%2BFlyer%2B11_3%2BMeeting%2B%25281%2529-page-001.jpg" imageanchor="1" style="margin-left:1em; margin-right:1em"&gt;&lt;img border="0" height="320" width="247" src="http://2.bp.blogspot.com/-19kKCCIiKhw/TrUWyxLjOJI/AAAAAAAAAK4/oCPbog6rldo/s320/ACM%2BFlyer%2B11_3%2BMeeting%2B%25281%2529-page-001.jpg" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="width: 425px;" id="__ss_10032925"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/jbrunelle008/agile-engineering-odu-acm" title="Agile Engineering - ODU ACM" target="_blank"&gt;Agile Engineering - ODU ACM&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/10032925" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt; &lt;div style="padding: 5px 0pt 12px;"&gt;View more &lt;a href="http://www.slideshare.net/" target="_blank"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/jbrunelle008" target="_blank"&gt;Justin Brunelle&lt;/a&gt; &lt;/div&gt;&lt;/div&gt;&lt;br /&gt;Agile engineering's main focus is a shift from a linear development model. &lt;a href="http://en.wikipedia.org/wiki/Waterfall_model"&gt;The Waterfall model&lt;/a&gt; is the classic example of a linear process model. Agile focuses on a cyclic and adaptive model. One of the main focuses of Agile is to receive and incorporate user feedback into the development process in order to produce a better product for the user. Also, it allows the product owner to garner greater control over a project.&lt;br /&gt;&lt;br /&gt;Each cycle in Agile includes all of the traditional development steps: Requirements, Design, Implementation, Verification, and Assessment/Maintenance. These cycles are sometimes called sprints. At the conclusion of each sprint, a fully releasable product should be available. That is, the end of the sprint produces a product that has been through all of the necessary development steps and can be sold as a subset of the end-goal product. This provides the benefit of having a complete and deliverable product even if funding is cut or production must be halted.&lt;br /&gt;&lt;br /&gt;An Agile development model provides the benefit of Failing Early. This means the development team can encounter and solve errors earlier in the development process and solve them when the costs are lower. An overly simplistic example would be the selection of a database. If MySQL is chosen at the beginning of a project using Agile, the development team would know earlier in the process if it was suitable for the solution. However, in the Waterfall model, it is possible to not understand the requirements until too late in the process to make a cheap switch.&lt;br /&gt;&lt;br /&gt;An ODU WS-DL alumnus (&lt;a href='http://www.carlton-northern.com/'&gt;Carlton Northern&lt;/a&gt;) has been instrumental in releasing a &lt;a href="http://mitre.org/work/tech_papers/2011/11_0401/"&gt;handbook for implementing Agile methods&lt;/a&gt;. This handbook provides guidelines for implementing Agile methods in the government (specifically the DoD) environment.&lt;br /&gt;&lt;br /&gt;These resources should serve as an introduction to Agile methods and the benefits of using this development model.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;-- Justin F. Brunelle&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-4303315738317715581?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/4303315738317715581/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-05-agile-engineering-odus-acm.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4303315738317715581'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4303315738317715581'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/11/2011-11-05-agile-engineering-odus-acm.html' title='2011-11-05: Agile Engineering - ODU&apos;s ACM Meeting'/><author><name>Justin F Brunelle</name><uri>http://www.blogger.com/profile/00580381835470799911</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_AmWWXD7g2JA/TG0DJ_mEneI/AAAAAAAAAAM/_AvbhphHU8I/S220/2010-05-24+17.06.03.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-19kKCCIiKhw/TrUWyxLjOJI/AAAAAAAAAK4/oCPbog6rldo/s72-c/ACM%2BFlyer%2B11_3%2BMeeting%2B%25281%2529-page-001.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-989019902362083576</id><published>2011-10-29T22:54:00.000-04:00</published><updated>2011-10-29T22:54:27.844-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NFL'/><category scheme='http://www.blogger.com/atom/ns#' term='Football'/><title type='text'>2011-10-28: 2011 NFL Season Week 8</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-_BeLOsGKCms/Tqx313lguuI/AAAAAAAAAHg/cRm_U3F9FHo/s1600/nfl-week8.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="108" src="http://1.bp.blogspot.com/-_BeLOsGKCms/Tqx313lguuI/AAAAAAAAAHg/cRm_U3F9FHo/s200/nfl-week8.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;I am back from San Diego and while I ran into some computer problems while I was there, thankfully the results of my trip were much better than the results of last weeks predictions.&lt;br /&gt;&lt;br /&gt;Our discrete winner predictor is based on a Sequential Minimization Optimization (SMO) method for training the Support Vector Model (SVM). In our experiments, the SVM has proven to be one of the best binary classifiers for predicting the winner/loser of NFL games.&lt;br /&gt;&lt;br /&gt;As I mentioned a few weeks ago, this year we have incorporated the betting line data into the classification model as a form of collective intelligence. The betting line data quickly began to dominate the output of the prediction model followed by passing efficiency and turnovers in importance to the outcome. The result of favoring the betting line is that the classifier usually follows the favorite and when there are a number of upsets like last week, then our results are below expectations.&lt;br /&gt;&lt;br /&gt;Indeed many of &lt;a href="http://espn.go.com/nfl/picks/_/week/7"&gt;the experts&lt;/a&gt; did not fare that well either last week. This led me to think about how " the experts" and the hypothetical average NFL fan make their choices. Are the fans influencing the betting line with with their bets or is the line influencing the bets of the fans. Some form of endogeneity rearing its head and interfering with the model.&lt;br /&gt;&lt;br /&gt;Washington and Buffalo will be playing in Toronto this week. &amp;nbsp; &lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/td&gt;       &lt;td&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/td&gt;            &lt;td&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/td&gt;           &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;                &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;         &lt;td&gt;At TEN&lt;/td&gt;     &lt;td&gt;3&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;  &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At HOU&lt;/td&gt;     &lt;td&gt;7.2&lt;/td&gt;     &lt;td&gt;JAX&lt;/td&gt;     &lt;td&gt;At HOU&lt;/td&gt;     &lt;td&gt;At HOU&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At CAR&lt;/td&gt;     &lt;td&gt;3.4&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;6.8&lt;/td&gt;     &lt;td&gt;At STL&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;  &lt;td&gt;At BAL&lt;/td&gt;     &lt;td&gt;9.3&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NYG&lt;/td&gt;     &lt;td&gt;8.4&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;  &lt;td&gt;NYG&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At BUF&lt;/td&gt;     &lt;td&gt;4.4&lt;/td&gt;     &lt;td&gt;WAS&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At DEN&lt;/td&gt;     &lt;td&gt;1.7&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At PIT&lt;/td&gt;     &lt;td&gt;0.8&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At SF&lt;/td&gt;     &lt;td&gt;5.6&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;  &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;2.7&lt;/td&gt;     &lt;td&gt;At SEA&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At PHI&lt;/td&gt;     &lt;td&gt;4.2&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;  &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;0.8&lt;/td&gt;     &lt;td&gt;At KC&lt;/td&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt; &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-989019902362083576?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/989019902362083576/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-28-2011-nfl-season-week-8.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/989019902362083576'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/989019902362083576'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-28-2011-nfl-season-week-8.html' title='2011-10-28: 2011 NFL Season Week 8'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-_BeLOsGKCms/Tqx313lguuI/AAAAAAAAAHg/cRm_U3F9FHo/s72-c/nfl-week8.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-4943524404287677824</id><published>2011-10-23T12:21:00.000-04:00</published><updated>2011-10-23T12:21:29.629-04:00</updated><title type='text'>2011-10-22: 2011 NFL Season Week 7</title><content type='html'>I have been on travel in San Diego all this week and I have had computer issues the entire time. Therefore I am posting these picks at the last minute and do not have much in the way of commentary especially since I was on an airplane during most of the games this past Sunday. I would like to thank my wife for typing in the commands I told her over the phone so that we could run the algorithms in order to get the picks done. I would not have been able to do it without her.&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt;     &lt;td&gt;&lt;span title="Team Spotting Points in a Bet Against the Point Spread."&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;      &lt;td&gt;&lt;span title="Short for Point Spread. Number of Points Subtracted from Final Score of Favorite to Determine Winner of a Point Spread Based Wager."&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;           &lt;td&gt;&lt;span title="Team Receiving Points in a Bet With the Point Spread."&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;            &lt;td&gt;&lt;span title="Discrete."&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;               &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;2.2&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;at CAR&lt;/td&gt;     &lt;td&gt;4.9&lt;/td&gt;&lt;td&gt;WAS&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;WAS&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;0.3&lt;/td&gt;     &lt;td&gt;At NYJ&lt;/td&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At CLE&lt;/td&gt;     &lt;td&gt;2.7&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At TEN&lt;/td&gt;     &lt;td&gt;5&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At MIA&lt;/td&gt;     &lt;td&gt;1.3&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At DET&lt;/td&gt;     &lt;td&gt;3.6&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At OAK&lt;/td&gt;     &lt;td&gt;5.5&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;4.9&lt;/td&gt;     &lt;td&gt;At ARI&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At DAL&lt;/td&gt;     &lt;td&gt;10.3&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;1.5&lt;/td&gt;     &lt;td&gt;At MIN&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NO&lt;/td&gt;     &lt;td&gt;4.5&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;4&lt;/td&gt;     &lt;td&gt;At JAX&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;JAX&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-4943524404287677824?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/4943524404287677824/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-22-2011-nfl-season-week-7.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4943524404287677824'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4943524404287677824'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-22-2011-nfl-season-week-7.html' title='2011-10-22: 2011 NFL Season Week 7'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6290335335110176774</id><published>2011-10-16T00:15:00.000-04:00</published><updated>2011-10-16T00:15:05.433-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NFL'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Football'/><title type='text'>2011-10-14: 2011 NFL Season Week 6</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-TicgypH_xb0/TphjB5BdGOI/AAAAAAAAAHM/ic12xmUSKmk/s1600/nfl-week6.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="174" src="http://3.bp.blogspot.com/-TicgypH_xb0/TphjB5BdGOI/AAAAAAAAAHM/ic12xmUSKmk/s320/nfl-week6.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;Our neural network predictor was 68% correct straight up this past week but overall our results were not awe inspiring. Two of the games that almost everyone got wrong were the Eagles-Bills and Seahawks-Giants games. In both games the favorite lost and one of the crucial stats was interceptions. Michael Vick of the Eagles threw four interceptions and Eli Manning threw three for the Giants. This is completely out of character for either of the quarterbacks.&lt;br /&gt;&lt;br /&gt;So far this year our Support Vector Machine (SVM) predictor has tracked the favorites very closely. With the addition of the line data this year, the line value has driven the output of the SVM. Ignoring the Line values, passing efficiency and turnovers forced by the defense have been two of the most dominant statistics.&lt;br /&gt;&lt;br /&gt;Predictions for week 6:&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="3" cellspacing="6" cols="4" style="width: 480px;"&gt;&lt;tbody&gt;&lt;tr&gt;     &lt;td&gt;&lt;span title="Team Spotting Points in a Bet Against the Point Spread."&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Short for Point Spread. Number of Points Subtracted from Final Score of Favorite to Determine Winner of a Point Spread Based Wager."&gt;&lt;b&gt;Spread&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Team Receiving Points in a Bet With the Point Spread."&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Discrete."&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Pagerank"&gt;&lt;b&gt;Pagerank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At GB&lt;/td&gt;     &lt;td&gt;14.5&lt;/td&gt;     &lt;td&gt;STL&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At PIT&lt;/td&gt;     &lt;td&gt;9.5&lt;/td&gt;     &lt;td&gt;JAX&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;-0.7&lt;/td&gt;     &lt;td&gt;At WAS&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;WAS&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At DET&lt;/td&gt;     &lt;td&gt;3.4&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;     &lt;td&gt;SF &lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At ATL&lt;/td&gt;     &lt;td&gt;-0.3&lt;/td&gt;     &lt;td&gt;CAR&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;     &lt;td&gt;ATL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At CIN&lt;/td&gt;     &lt;td&gt;3.2&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;CIN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NYG&lt;/td&gt;     &lt;td&gt;4.7&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;     &lt;td&gt;NYG &lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At BAL&lt;/td&gt;     &lt;td&gt;5.6&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;     &lt;td&gt;BAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At OAK&lt;/td&gt;     &lt;td&gt;6.4&lt;/td&gt;     &lt;td&gt;CLE&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NE&lt;/td&gt;     &lt;td&gt;8.6&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;DAL&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;-4.5&lt;/td&gt;     &lt;td&gt;At TB&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At CHI&lt;/td&gt;     &lt;td&gt;-1.7&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NYJ&lt;/td&gt;     &lt;td&gt;3.8&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;MIA&lt;/td&gt;     &lt;td&gt;NYJ &lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6290335335110176774?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6290335335110176774/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-14-2011-nfl-season-week-6.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6290335335110176774'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6290335335110176774'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-14-2011-nfl-season-week-6.html' title='2011-10-14: 2011 NFL Season Week 6'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-TicgypH_xb0/TphjB5BdGOI/AAAAAAAAAHM/ic12xmUSKmk/s72-c/nfl-week6.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6309311214713077523</id><published>2011-10-07T10:45:00.001-04:00</published><updated>2011-10-07T10:47:13.642-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NFL'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Football'/><title type='text'>2011-10-06: Week 5 2011 NFL Season</title><content type='html'>&lt;a href="http://1.bp.blogspot.com/-EX3YawYH0gY/To0UkZyshYI/AAAAAAAAAHE/b4jGBDxe5PA/s1600/nfl-week5.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="107" src="http://1.bp.blogspot.com/-EX3YawYH0gY/To0UkZyshYI/AAAAAAAAAHE/b4jGBDxe5PA/s200/nfl-week5.png" width="200" /&gt;&lt;/a&gt;Week 4 performance was rather pleasing. Straight up and against the spread were 80% and 75% correct. Buffalo lost with that last minute field goal and as to why Philadelphia fell apart in the second half and lost a 20 point lead has been the subject of numerous commentator's discussions. Hopefully the predictions continue to perform at this level but pessimism indicates that they will regress to the mean.&amp;nbsp; &lt;br /&gt;&lt;br /&gt;Week 5 of the NFL season means the commencement of bye weeks. This week's teams on bye are the Baltimore Ravens, Cleveland Browns, Dallas Cowboys, Miami Dolphins, St. Louis Rams and Washington Redskins.&lt;br /&gt;&lt;br /&gt;For comparison purposes we have included one of the better performing algorithms from the past two years. The &lt;a href="http://infolab.stanford.edu/%7Ebackrub/google.html"&gt;PageRank algorithm &lt;/a&gt; that we modified to indicate strong teams averaged 68% for straight up predictions over the past two years. A more detailed explanation is provided in one of our &lt;a href="http://ws-dl.blogspot.com/2009/12/nfl-playoff-outlook.html"&gt;previous posts&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;The predictions for Week 5:&lt;br /&gt;&lt;br /&gt;&lt;table border="0" cellpadding="2" cellspacing="8" cols="5" style="width: 562px;"&gt;&lt;tbody&gt;&lt;tr&gt;     &lt;td&gt;&lt;span title="Favorite"&gt;&lt;b&gt;Favorite&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Line"&gt;&lt;b&gt;Line&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Underdog"&gt;&lt;b&gt;Underdog&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="Discrete"&gt;&lt;b&gt;Discrete&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;     &lt;td&gt;&lt;span title="PageRank"&gt;&lt;b&gt;PageRank&lt;/b&gt;&lt;/span&gt;&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At IND&lt;/td&gt;     &lt;td&gt;6.3&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt;     &lt;td&gt;IND&lt;/td&gt;     &lt;td&gt;KC&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At MIN&lt;/td&gt;     &lt;td&gt;4.6&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;     &lt;td&gt;MIN&lt;/td&gt;     &lt;td&gt;ARI&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At BUF&lt;/td&gt;     &lt;td&gt;1&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;PHI&lt;/td&gt;     &lt;td&gt;BUF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At HOU&lt;/td&gt;     &lt;td&gt;3.4&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;     &lt;td&gt;HOU&lt;/td&gt;     &lt;td&gt;OAK&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;    &lt;td&gt;At CAR&lt;/td&gt;     &lt;td&gt;0.2&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;     &lt;td&gt;NO&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;CIN&lt;/td&gt;     &lt;td&gt;3.2&lt;/td&gt;     &lt;td&gt;At JAC&lt;/td&gt;     &lt;td&gt;JAC&lt;/td&gt;     &lt;td&gt;JAC&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;    &lt;td&gt;At PIT&lt;/td&gt;     &lt;td&gt;1&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;     &lt;td&gt;PIT&lt;/td&gt;     &lt;td&gt;TEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NYG&lt;/td&gt;     &lt;td&gt;9.7&lt;/td&gt;     &lt;td&gt;SEA&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;     &lt;td&gt;NYG&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At SF&lt;/td&gt;     &lt;td&gt;2&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;TB&lt;/td&gt;     &lt;td&gt;SF&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At NE&lt;/td&gt;     &lt;td&gt;11.3&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;     &lt;td&gt;NE&lt;/td&gt;     &lt;td&gt;NYJ&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;1.3&lt;/td&gt;     &lt;td&gt;At DEN&lt;/td&gt;     &lt;td&gt;SD&lt;/td&gt;     &lt;td&gt;DEN&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;2.9&lt;/td&gt;     &lt;td&gt;At ATL&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;     &lt;td&gt;GB&lt;/td&gt;  &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;At DET&lt;/td&gt;     &lt;td&gt;5.3&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;CHI&lt;/td&gt;     &lt;td&gt;DET&lt;/td&gt;   &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6309311214713077523?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6309311214713077523/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-06-week-5-2011-nfl-season.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6309311214713077523'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6309311214713077523'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-10-06-week-5-2011-nfl-season.html' title='2011-10-06: Week 5 2011 NFL Season'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-EX3YawYH0gY/To0UkZyshYI/AAAAAAAAAHE/b4jGBDxe5PA/s72-c/nfl-week5.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6785534522393851292</id><published>2011-10-02T12:04:00.001-04:00</published><updated>2011-10-02T15:13:19.475-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Football'/><title type='text'>2011-10-02: 2011 NFL Season Under Way</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-0Js_rGcRSsM/ToieoQEPzhI/AAAAAAAAAG8/iVyB_4xWF60/s1600/nfl2011.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img border="0" height="200" src="http://3.bp.blogspot.com/-0Js_rGcRSsM/ToieoQEPzhI/AAAAAAAAAG8/iVyB_4xWF60/s200/nfl2011.png" width="200" /&gt;&lt;/a&gt;&lt;/div&gt;The 2011 NFL season is underway and we are ready to put some of our improved algorithms to the test. Last year we primarily used box score data for our predictions. This resulted in adequate performance but nothing spectacular.&lt;br /&gt;&lt;br /&gt;This year we are increasing the collective intelligence quotient in our algorithm by incorporating betting line data and line movement. The purpose of the betting line is to make the sportsbooks money by splitting the betting population in half. The line will move as a result of betting pressure presented by the betting population. e.g. The favorite team is favored by 5 points. Many bettors may feel that the favorite team is not that good and place bets on the underdog. With an unbalanced wager profile the sportsbook has the potential to lose money so they will move the bet line until the incoming bets are equal on each side. This movement is a form of collective intelligence of the betting population.&lt;br /&gt;&lt;br /&gt;Another change this year is that in addition to choosing the winner as a discrete value (winner or loser) we will also predict the line value as a continuous variable. This line value is what we think the line should be. If the favorite team is favored by 5pts and we predict 3pts it may be wise to vote on the underdog. However if the favorite team is favored by 2pts and we predict 7pts, the favorite is the better option.&lt;br /&gt;&lt;br /&gt;Without further ado, here is what we are looking at for week 4:&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;th&gt;Time&lt;/th&gt;&lt;th&gt;Favorite&lt;/th&gt;&lt;th&gt;Line&lt;/th&gt;&lt;th&gt;Underdog&lt;/th&gt;&lt;th&gt;Discrete&lt;/th&gt;&lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;At Dallas&lt;/td&gt;     &lt;td&gt;3.1&lt;/td&gt;     &lt;td&gt;Detroit&lt;/td&gt;     &lt;td&gt;Dallas&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;      &lt;td&gt;New Orleans&lt;/td&gt;     &lt;td&gt;2.5&lt;/td&gt;     &lt;td&gt;At Jacksonville&lt;/td&gt;     &lt;td&gt;New Orleans&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;      &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;At Philadelphia&lt;/td&gt;     &lt;td&gt;8.6&lt;/td&gt;     &lt;td&gt;San Francisco&lt;/td&gt;     &lt;td&gt;Philadelphia&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;Washington&lt;/td&gt;     &lt;td&gt;2.2&lt;/td&gt;     &lt;td&gt;At St. Louis&lt;/td&gt;     &lt;td&gt;Washington&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;Tennessee&lt;/td&gt;     &lt;td&gt;4.3&lt;/td&gt;     &lt;td&gt;At Cleveland&lt;/td&gt;      &lt;td&gt;Tennessee&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;At Cincinnati&lt;/td&gt;     &lt;td&gt;2.3&lt;/td&gt;      &lt;td&gt;Buffalo&lt;/td&gt;     &lt;td&gt;Buffalo&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;Minnesota&lt;/td&gt;      &lt;td&gt;3.7&lt;/td&gt;     &lt;td&gt;At Kansas City&lt;/td&gt;     &lt;td&gt;Kansas City&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 1:00 ET&lt;/td&gt;      &lt;td&gt;Carolina&lt;/td&gt;     &lt;td&gt;0.7&lt;/td&gt;     &lt;td&gt;At Chicago&lt;/td&gt;     &lt;td&gt;Chicago&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;      &lt;td&gt;10/2 1:00 ET&lt;/td&gt;     &lt;td&gt;At Houston&lt;/td&gt;     &lt;td&gt;0.9&lt;/td&gt;     &lt;td&gt;Pittsburgh&lt;/td&gt;     &lt;td&gt;Houston&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 4:05 ET&lt;/td&gt;     &lt;td&gt;Atlanta&lt;/td&gt;     &lt;td&gt;2.0&lt;/td&gt;     &lt;td&gt;At Seattle&lt;/td&gt;     &lt;td&gt;Atlanta&lt;/td&gt;    &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 4:05 ET&lt;/td&gt;     &lt;td&gt;NY Giants&lt;/td&gt;     &lt;td&gt;0.3&lt;/td&gt;     &lt;td&gt;At Arizona&lt;/td&gt;      &lt;td&gt;NY Giants&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 4:15 ET&lt;/td&gt;     &lt;td&gt;At San Diego&lt;/td&gt;     &lt;td&gt;5.2&lt;/td&gt;      &lt;td&gt;Miami&lt;/td&gt;     &lt;td&gt;San Diego&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 4:15 ET&lt;/td&gt;     &lt;td&gt;At Green Bay&lt;/td&gt;      &lt;td&gt;9.5&lt;/td&gt;     &lt;td&gt;Denver&lt;/td&gt;     &lt;td&gt;Green Bay&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;     &lt;td&gt;10/2 4:15 ET&lt;/td&gt;      &lt;td&gt;New England&lt;/td&gt;     &lt;td&gt;4.5&lt;/td&gt;     &lt;td&gt;At Oakland&lt;/td&gt;     &lt;td&gt;New England&lt;/td&gt;   &lt;/tr&gt;&lt;tr&gt;      &lt;td&gt;10/2 8:25 ET&lt;/td&gt;     &lt;td&gt;At Baltimore&lt;/td&gt;     &lt;td&gt;6.8&lt;/td&gt;     &lt;td&gt;NY Jets&lt;/td&gt;     &lt;td&gt;Baltimore&lt;/td&gt;        &lt;/tr&gt;&lt;tr&gt;    &lt;td&gt;10/3 8:35 ET&lt;/td&gt;    &lt;td&gt;At Tampa Bay&lt;/td&gt;    &lt;td&gt;1.3&lt;/td&gt;    &lt;td&gt;Indianapolis&lt;/td&gt;    &lt;td&gt;Tampa Bay&lt;/td&gt;      &lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;--Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6785534522393851292?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6785534522393851292/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-nfl-season-under-way.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6785534522393851292'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6785534522393851292'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/10/2011-nfl-season-under-way.html' title='2011-10-02: 2011 NFL Season Under Way'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-0Js_rGcRSsM/ToieoQEPzhI/AAAAAAAAAG8/iVyB_4xWF60/s72-c/nfl2011.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-9219903284545929456</id><published>2011-09-14T14:11:00.010-04:00</published><updated>2011-09-15T10:37:50.638-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='dissertation'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='Synchronicity'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><title type='text'>2011-09-14: Dissertation Completed</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-98nWkZgHrmo/TnDu669wBPI/AAAAAAAAAEY/Xl0_IYNeHm0/s1600/odu_seal.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 100px; height: 100px;" src="http://2.bp.blogspot.com/-98nWkZgHrmo/TnDu669wBPI/AAAAAAAAAEY/Xl0_IYNeHm0/s200/odu_seal.png" alt="" id="BLOGGER_PHOTO_ID_5652280228378051826" border="0" /&gt;&lt;/a&gt;I am very happy to write about the successful completion of my dissertation work in the &lt;a href="http://www.cs.odu.edu/"&gt;Computer Science Department&lt;/a&gt; at &lt;a href="http://www.odu.edu/"&gt;Old Dominion University&lt;/a&gt;.&lt;br /&gt;My dissertation is titled "Using the Web Infrastructure for Real Time Recovery of Missing Web Pages" and, as the title suggests, it makes several contributions in the areas of digital data preservation and information retrieval. In brief, the &lt;a href="http://www.cs.odu.edu/%7Emln/pubs/phd/klein-phd-dissertation.pdf"&gt;dissertation&lt;/a&gt; evaluates multiple techniques for a "just-in-time" approach to web page preservation. We, for example, investigate the suitability of &lt;a href="http://ws-dl.blogspot.com/2010/07/travel-report-for-hypertext-and-jcdl.html"&gt;lexical signatures and web page titles to rediscover missing content&lt;/a&gt;. These two methods are based on old copies of the pages provided by the &lt;a href="http://mementoweb.org/"&gt;Memento&lt;/a&gt; framework. We also analyze the performance of tags that users have created to annotate pages as well as &lt;a href="http://ws-dl.blogspot.com/2011/07/2011-07-05-jcdl-2011-trip-report.html"&gt;the most salient terms derived from a page's link neighborhood&lt;/a&gt; as methods to find missing pages.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/-cdOGgkBczZQ/TnD0qg4ogiI/AAAAAAAAAEg/Kq6Aq9qWDFE/s1600/shrimp.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 100px; height: 78px;" src="http://1.bp.blogspot.com/-cdOGgkBczZQ/TnD0qg4ogiI/AAAAAAAAAEg/Kq6Aq9qWDFE/s200/shrimp.png" alt="" id="BLOGGER_PHOTO_ID_5652286543569125922" border="0" /&gt;&lt;/a&gt;On the practical side, the dissertation introduces &lt;a href="http://ws-dl.blogspot.com/2011/06/2011-06-10-launching-synchronicity.html"&gt;Synchronicity&lt;/a&gt;, a Firefox add-on that implements all evaluated methods for web page recovery. It catches 404 "Page not Found"  errors when they occur and offers alternatives in real-time, while the user is browsing.  I concluded writing my thesis in June, defended on July 18th and got the degree officially awarded in August 2011.&lt;br /&gt;&lt;div style="width: 425px;" id="__ss_9244464"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/martinklein0815/dissertation-defense-9244464" title="Dissertation Defense" target="_blank"&gt;Dissertation Defense&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/9244464" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt; &lt;div style="padding: 5px 0pt 12px;"&gt; View more &lt;a href="http://www.slideshare.net/" target="_blank"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/martinklein0815" target="_blank"&gt;Martin Klein&lt;/a&gt; &lt;/div&gt; &lt;/div&gt;&lt;br /&gt;&lt;br /&gt;It goes without saying this work would not have been possible without the outstanding support from my dissertation committee. It consisted of the internal members &lt;a href="http://www.cs.odu.edu/%7Emweigle/"&gt;Dr. Michele C. Weigle&lt;/a&gt;, &lt;a href="http://www.cs.odu.edu/%7Eyaohang/"&gt;Dr. Yaohang Li&lt;/a&gt; and &lt;a href="http://www.cs.odu.edu/%7Ezubair/"&gt;Dr. Mohammad Zubair&lt;/a&gt; and the external members &lt;a href="http://public.lanl.gov/herbertv/home/"&gt;Dr. Herbert Van de Sompel&lt;/a&gt; and &lt;a href="http://21sided.o-r-g.org/"&gt;Dr. Robert Sanderson&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;I am deeply grateful to my advisor &lt;a href="http://www.cs.odu.edu/%7Emln/"&gt;Dr. Michael L. Nelson&lt;/a&gt; for his eternal patience and superior guidance and mentoring. He truly is a role model for all aspiring academics (and he took me to a &lt;a href="http://www.hokiesports.com/football/stats/showstats.html?13279"&gt;Hokies football game&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;I am now looking back at six years of taking classes (MS and Ph.D. level), passing diagnostic and candidacy exams, conducting countless experiments, publishing over 20 research papers (and writing even more), teaching two classes, giving numerous guest lectures and I can finally give an answer to the ever annoying question: "When are you going to be done?".&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/-BpRwu1jfIK8/TnD3x3cYSmI/AAAAAAAAAEw/Eq2Xp2lOBYs/s1600/lanl_logo.gif"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 92px;" src="http://2.bp.blogspot.com/-BpRwu1jfIK8/TnD3x3cYSmI/AAAAAAAAAEw/Eq2Xp2lOBYs/s200/lanl_logo.gif" alt="" id="BLOGGER_PHOTO_ID_5652289968418605666" border="0" /&gt;&lt;/a&gt;As my next step, I am very excited to join the &lt;a href="http://library.lanl.gov/"&gt;Research Library&lt;/a&gt; at the &lt;a href="http://www.lanl.gov/"&gt;Los Alamos National Laboratory&lt;/a&gt; as a Postdoctoral Researcher. I will work with Herbert and Rob on Memento and on making time-based access of web resources more convenient and, of course, will enjoy the green and red chili!&lt;br /&gt;&lt;br /&gt;--&lt;br /&gt;martin&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-9219903284545929456?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/9219903284545929456/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/09/2011-09-14-dissertation-completed.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9219903284545929456'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9219903284545929456'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/09/2011-09-14-dissertation-completed.html' title='2011-09-14: Dissertation Completed'/><author><name>martin klein</name><uri>http://www.blogger.com/profile/13289299995516244353</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://4.bp.blogspot.com/_cJsqIApA0c0/SkAya34Wh5I/AAAAAAAAAAM/XvoXQjYUpzc/s1600-R/mk.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-98nWkZgHrmo/TnDu669wBPI/AAAAAAAAAEY/Xl0_IYNeHm0/s72-c/odu_seal.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-110439779704459295</id><published>2011-08-28T19:39:00.008-04:00</published><updated>2011-09-04T22:15:43.826-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='kdd2011'/><category scheme='http://www.blogger.com/atom/ns#' term='trip report'/><category scheme='http://www.blogger.com/atom/ns#' term='travel'/><category scheme='http://www.blogger.com/atom/ns#' term='research'/><category scheme='http://www.blogger.com/atom/ns#' term='social media'/><category scheme='http://www.blogger.com/atom/ns#' term='Data Mining'/><category scheme='http://www.blogger.com/atom/ns#' term='Conference'/><category scheme='http://www.blogger.com/atom/ns#' term='acm'/><category scheme='http://www.blogger.com/atom/ns#' term='kdd'/><title type='text'>2011-08-28: KDD 2011 Trip Report</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;&lt;div&gt;&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;Author:&amp;nbsp;&lt;a href="http://carlton-northern.com/"&gt;Carlton Northern&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The &lt;a href="http://www.kdd.org/kdd2011/"&gt;SIGKDD 2011&lt;/a&gt; conference took place August 21 - 24 at the Hyatt Manchester in San Diego, CA. &amp;nbsp;Researchers from all over the world interested in knowledge discovery and data mining were in&amp;nbsp;attendance. &amp;nbsp;This conference in particular has a heavy statistical analysis flavor and many presentations were math intensive.&lt;br /&gt;&lt;br /&gt;I&amp;nbsp;was invited to present my masters project research at the &lt;a href="http://vivo-onto.slis.indiana.edu/MDS2011/"&gt;Mining Data Semantics&lt;/a&gt; (MDS2011) Workshop of KDD. &amp;nbsp;In this paper, we present an approach to find social media profiles of people from an organization. &amp;nbsp;This is possible due to the links created between members an organization. For instance, co-workers or students will likely friend each other creating hyperlinks between their respective accounts. &amp;nbsp;These links, if public, can be mined and used to disambiguate other profiles that may share the same names as those individuals we are searching for. &amp;nbsp;The following figure shows the amount of profiles found from the ODU Computer Science student body for each respective social media site and the links found between them.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-U1K4XeQpSZw/TlrL2MkYVsI/AAAAAAAABsU/3U9zJOXwy8E/s1600/Picture1.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="356" src="http://3.bp.blogspot.com/-U1K4XeQpSZw/TlrL2MkYVsI/AAAAAAAABsU/3U9zJOXwy8E/s400/Picture1.png" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;This picture represents the actual students themselves and the links between them. &amp;nbsp;Black nodes are undergrads, green nodes are grads, and red nodes are members of the WS-DL research group.&lt;br /&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-JgsUQIRpmUQ/Tlz9NG1wFII/AAAAAAAABsY/AmD7sVK3kNs/s1600/Picture2.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="398" src="http://3.bp.blogspot.com/-JgsUQIRpmUQ/Tlz9NG1wFII/AAAAAAAABsY/AmD7sVK3kNs/s400/Picture2.jpg" width="400" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;These are the slides:&lt;br /&gt;&lt;div id="__ss_8964483" style="width: 425px;"&gt;&lt;strong style="display: block; margin: 12px 0 4px;"&gt;&lt;a href="http://www.slideshare.net/carlton.northern/an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles" title="An Unsupervised Approach to Discovering and Disambiguating Social Media Profiles"&gt;An Unsupervised Approach to Discovering and Disambiguating Social Media Profiles&lt;/a&gt;&lt;/strong&gt;&lt;object height="355" id="__sse8964483" width="425"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=mds2011-110822134348-phpapp02&amp;amp;stripped_title=an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles&amp;amp;userName=carlton.northern" /&gt;&lt;param name="allowFullScreen" value="true"/&gt;&lt;param name="allowScriptAccess" value="always"/&gt;&lt;embed name="__sse8964483" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=mds2011-110822134348-phpapp02&amp;amp;stripped_title=an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles&amp;amp;userName=carlton.northern" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;br /&gt;&lt;div style="padding: 5px 0 12px;"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/carlton.northern"&gt;carlton.northern&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;Here is the paper:&lt;br /&gt;&lt;div id="__ss_8964542" style="width: 477px;"&gt;&lt;strong style="display: block; margin: 12px 0 4px;"&gt;&lt;a href="http://www.slideshare.net/carlton.northern/mds-2011-paper-an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles" title="MDS 2011 Paper: An Unsupervised Approach to Discovering and Disambiguating Social Media Profiles"&gt;MDS 2011 Paper: An Unsupervised Approach to Discovering and Disambiguating Social Media Profiles&lt;/a&gt;&lt;/strong&gt;&lt;object height="510" id="__sse8964542" width="477"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/doc_player.swf?doc=mds2011-110822135429-phpapp01&amp;amp;stripped_title=mds-2011-paper-an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles&amp;amp;userName=carlton.northern" /&gt;&lt;param name="allowFullScreen" value="true"/&gt;&lt;param name="allowScriptAccess" value="always"/&gt;&lt;embed name="__sse8964542" src="http://static.slidesharecdn.com/swf/doc_player.swf?doc=mds2011-110822135429-phpapp01&amp;amp;stripped_title=mds-2011-paper-an-unsupervised-approach-to-discovering-and-disambiguating-social-media-profiles&amp;amp;userName=carlton.northern" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="477" height="510"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;br /&gt;&lt;div style="padding: 5px 0 12px;"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;documents&lt;/a&gt; from &lt;a href="http://www.slideshare.net/carlton.northern"&gt;carlton.northern&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;I've synopsized some of the interesting presentations&amp;nbsp;from the conference:&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://www.stanford.edu/~boyd/"&gt;Stephen Boyd&lt;/a&gt; - Stanford University "From Embedded Real-Time to Large-Scale Distributed". &amp;nbsp;Stephen Boyd's talk focused on his current research area of convex&amp;nbsp;optimization. &amp;nbsp;He explained that convex optimization is a mathematical technique in which many complex problems of model fitting, resource allocation, engineering design, etc. can be transformed to a simple convex optimization problem to be solved and then transformed back into the original problem to get the solution. &amp;nbsp;He went on to explain how this can be implemented in real-time embedded systems sych as a hard disk drive head seek problem, to large distributed system such as California's power grid.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://www.amolghoting.net/blog/"&gt;Amol Ghoting&lt;/a&gt; - IBM "NIMBLE: A Toolkit for the Implementation of Parallel Data Mining and Machine Learning Algorithms on MapReduce". &amp;nbsp;Use Hadoop to write a map function and a reduce function where you can map anything to a (key, value) pair. &amp;nbsp;The problem with Hadoop is that it has a two-stage data flow which can be cumbersome for programming. &amp;nbsp;Also, job scheduling and data mangement is handled by the user. &amp;nbsp;Lastly, code-reuse and portability is diminished. &amp;nbsp;This toolkit tries to make the key features of Hadoop available to developers but without a Hadoop specific implementation. &amp;nbsp;NIMBLE actually decouples algorithm computation from data management, parallel communications and control. &amp;nbsp;It does this through using a series of basic datasets and basic tasks that create a DAG. &amp;nbsp;Tasks can spawn other tasks. &amp;nbsp;With this structure in place, simultaneous data and tasks parallelism is achievable.&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;div class="MsoNormal"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;a href="http://www.cbse.ucsc.edu/people/haussler"&gt;David Haussler&lt;/a&gt; – UC Santa Cruz “Cancer Genomics”. &amp;nbsp;&lt;/span&gt;&lt;span style="line-height: 115%;"&gt;DNA sequencing cost has reduceddramatically. &amp;nbsp;DNA sequencing was following Moore’s lawbut is now reducing cost 10 fold every two years.&amp;nbsp; Can now cheaply sequence entire genomes.&amp;nbsp; Created the Cancer Genome Atlas.&amp;nbsp; 10,000 tumors will be sequenced in the nexttwo years using this Atlas.&amp;nbsp; Cancergenome sequencing will soon be a standard clinical practice.&amp;nbsp; Because each persons DNA is different, andeach tumor resulting from a persons DNA is different, a huge computationalprocessing problem looms in the near distant future.&lt;/span&gt;&lt;span style="line-height: 115%;"&gt;&amp;nbsp;&lt;/span&gt;&lt;/div&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;a href="http://www.cs.ucsb.edu/~metwally/"&gt;Ahmed Metwally&lt;/a&gt; - Google. "Estimating the number of people behind an IP Address". &amp;nbsp;&lt;/span&gt;Most research assumes that there is 1 person using 1 IP address, but this is not the case. &amp;nbsp;IP's also change size of users, for instance, a hotel with a conference will have many more users possibly using the same IP address than usual. &amp;nbsp;So, how would one estimate the amount of these users in a non-intrusive way? &amp;nbsp;One method is to look at trusted cookie counts. &amp;nbsp;Another method is to look at diverse traffic. &amp;nbsp;Google caps traffic volume per IP to stop people from gaming the system using the same IP address. &amp;nbsp;Google knows how many users share an IP address because they are logged in with a username and password to Googles sites. &amp;nbsp;However, some of Googles traffic is from users that don't have a Google account. &amp;nbsp;This research is for those who want to filter users without asking them for any identification, thus preserving their privacy. &amp;nbsp;This method is currently being used at Google for determining click fraud.&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;a href="http://research.google.com/pubs/author38217.html"&gt;D. Scully&lt;/a&gt; - Google "Detecting Adverserial Advertisements in the Wild". &amp;nbsp;&lt;/span&gt;An adversarial advertiser would be an advertiser that uses Google AdWords or AdSense to advertise misleading products like counterfeit goods or scams. &amp;nbsp;Most ads are good, only a small amount are bad. &amp;nbsp;Using in-house trained people to hand build rule based models. &amp;nbsp;Allowing these people to hand-build the rules gave a great incentive and improved morale rather than just having them do repetitive tasks over and over again. &amp;nbsp;Automated methods are being used as well, but this part of the presentation went right over my head.&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;a href="http://datamining.rutgers.edu/CDMBA/default.asp"&gt;Chunyu Luo&lt;/a&gt; - University of Tennessee "Enhanced Investment Decisions in P2P Lending: An Investor Composition Perspective". &amp;nbsp;&lt;/span&gt;In this paper, they are trying to decide which loans are worthwhile to invest, in other words, what makes a good loan? &amp;nbsp;Use a bipartite investment network with one side investors and the other investees and the edges between them loans. &amp;nbsp;Each loan can be considered a composition of many investors. &amp;nbsp;The idea is that by looking at the past performance of the other investors of a given loan, you can improve your prediction of the return rate for that loan. &amp;nbsp;Performed experiment from dataset of prosper.com. &amp;nbsp;The composition method far outperformed the average return of investment.&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family: inherit; line-height: 115%;"&gt;&lt;a href="http://www.cs.csi.cuny.edu/~imberman/"&gt;Susan Imberman&lt;/a&gt; - College of Staten Island "From Market Baskets to Mole Rats: &amp;nbsp;Using Data Mining Techniques to Analyze RFID Data Describing Laboratory Animal Behavior". &amp;nbsp;&lt;/span&gt;This paper presents the data mining techniques used in analyzing RFID data from a colony of Mole Rats. &amp;nbsp;Much like we use RFID in cars for tolls like EZ Pass, they are using RFID on Mole Rats and when they pass specific points of the colony (a series of pipes and rooms) they collect that sample. &amp;nbsp;They used k-means clustering which showed animal place preference. &amp;nbsp;Used an adjacency matrix to get an idea of which Mole Rats liked to be near one another. &amp;nbsp;This created 3 distinct sub graphs which corresponded well to the different colony structure of Mole Rats, queen workers, large workers and small workers. &amp;nbsp;Next they correlated common transactions made in the grocery store with items in a basket to repeat behavior of Mole Rats.&lt;br /&gt;&lt;br /&gt;After the conference ended on Wednesday, &lt;a href="http://en.wikipedia.org/wiki/Hurricane_Irene_(2011)"&gt;Hurricane Irene&lt;/a&gt; was on track for a direct hit to Hampton Roads. &amp;nbsp;My flight was scheduled to arrive in Norfolk Friday night which was cutting it very close to the storm hitting on Saturday. &amp;nbsp;So I decided to extend the trip till Monday and ride out the storm here in sunny San Diego. &amp;nbsp;In total, I managed to miss a &lt;a href="http://en.wikipedia.org/wiki/Hurricane_Irene_(2011)"&gt;hurricane&lt;/a&gt;, a &lt;a href="http://hamptonroads.com/2011/08/tornado-rips-through-sandbridge-damages-homes"&gt;tornado&lt;/a&gt;, an &lt;a href="http://www.nytimes.com/2011/08/24/us/24quake.html?pagewanted=all"&gt;earthquake&lt;/a&gt;, and a &lt;a href="http://hamptonroads.com/2011/08/dismal-swamp-fire-officials-prepare-irene"&gt;swamp fire&lt;/a&gt;. &amp;nbsp;I think I made a good decision...&lt;/div&gt;&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-110439779704459295?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/110439779704459295/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/08/2011-08-24-kdd-2011-trip-report.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/110439779704459295'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/110439779704459295'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/08/2011-08-24-kdd-2011-trip-report.html' title='2011-08-28: KDD 2011 Trip Report'/><author><name>Carlton Northern</name><uri>http://www.blogger.com/profile/07251369322162897601</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='24' height='32' src='http://bp3.blogger.com/_2p-nbVtAey0/R9XKOh6q5XI/AAAAAAAAACI/ffhPDOG_gio/S220/600x400_3548673_600x400.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-U1K4XeQpSZw/TlrL2MkYVsI/AAAAAAAABsU/3U9zJOXwy8E/s72-c/Picture1.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5211599518994202568</id><published>2011-08-28T15:36:00.006-04:00</published><updated>2011-08-30T11:47:34.292-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='CS 795/895'/><title type='text'>2011-08-28: Fall 2011 WS-DL Classes</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-AYjcxZQXX2c/TlqY1x9zs0I/AAAAAAAAAhE/HYuNphZnTsQ/s1600/search-engines-book-cover.jpeg"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 154px; height: 200px;" src="http://3.bp.blogspot.com/-AYjcxZQXX2c/TlqY1x9zs0I/AAAAAAAAAhE/HYuNphZnTsQ/s200/search-engines-book-cover.jpeg" alt="" id="BLOGGER_PHOTO_ID_5645993132575404866" border="0" /&gt;&lt;/a&gt;The Web Science and Digital Libraries Research Group is offering two classes for the fall 2011 semester.  &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/cs895-f11/"&gt;CS 895 Web-Based Information Retrieval&lt;/a&gt; will be offered on Tuesdays, 4:20-7:00 in room 2120 of the ECS building.  This class will use the recent &lt;a href="http://www.search-engines-book.com/"&gt;Croft, Metzler &amp;amp; Strohman book&lt;/a&gt; as the required text, and the &lt;a href="http://nlp.stanford.edu/IR-book/"&gt;Manning, Ragahavan, &amp;amp; Schutze book&lt;/a&gt; as the recommended text.  By choosing the former book as the primary guide for the course, we are intentionally provided a strong engineering component to the class (i.e., a level of coding and development is expected) as opposed to just a theoretical exploration of information retrieval.  &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/cs751-s11/"&gt;CS 751/851 Introduction to Digital Libraries&lt;/a&gt; is not a prerequisite, but it would help to be familiar with the material covered in that class.&lt;br /&gt;&lt;br /&gt;Dr. Weigle will be teaching &lt;a href="http://www.cs.odu.edu/%7Emweigle/CS795-F11/Home"&gt;CS 795/895 Information Visualization&lt;/a&gt; on Thursdays, 9:30-12:15 in room 2120 of the ECS building.  This class is a follow-on to the &lt;a href="http://www.cs.odu.edu/%7Emweigle/CS796-S11/Home"&gt;CS 796/896 Visual Analytics Seminar&lt;/a&gt; from last semester, but that course is not a prerequisite for this semester's course.  Unlike the seminar from last semester, this course will be a "regular" course that will count toward one of the four courses required in the "&lt;a href="https://webspace.cs.odu.edu/%7Egpd/Production/phd/mediawiki/index.php/PhD_Qualifying_Process"&gt;advanced-level course requirement&lt;/a&gt;".&lt;br /&gt;&lt;br /&gt;Students wishing to become active in the WS-DL research group should take one or both of these courses.&lt;br /&gt;&lt;br /&gt;--Michael&lt;br /&gt;&lt;br /&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5211599518994202568?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5211599518994202568/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/08/fall-2011-ws-dl-classes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5211599518994202568'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5211599518994202568'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/08/fall-2011-ws-dl-classes.html' title='2011-08-28: Fall 2011 WS-DL Classes'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-AYjcxZQXX2c/TlqY1x9zs0I/AAAAAAAAAhE/HYuNphZnTsQ/s72-c/search-engines-book-cover.jpeg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3423037095236952124</id><published>2011-08-02T14:57:00.004-04:00</published><updated>2011-08-02T15:14:34.891-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='Internet archive'/><title type='text'>2011-07-26: Universal Access to All Knowledge</title><content type='html'>&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;&lt;a href="http://2.bp.blogspot.com/-CkWBZAwpq6I/TjLgoPE7A9I/AAAAAAAAAAY/aam8OW_m2v0/s1600/iasummary.png" imageanchor="1" style="clear: right; float: right; margin-bottom: 1em; margin-left: 1em;"&gt;&lt;img src="http://2.bp.blogspot.com/-CkWBZAwpq6I/TjLgoPE7A9I/AAAAAAAAAAY/aam8OW_m2v0/s200/iasummary.png" border="0" height="186" width="200" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;span style=";font-family:Calibri;font-size:100%;"  &gt;On July 26, 2011, the Web Science and Digital Library group at Old Dominion University hosted Kris Carpenter Negulescu, Director of the Web Group at the &lt;a href="http://www.archive.org/"&gt;Internet Archive&lt;/a&gt; who gave a talk entitled “Universal Access to All Knowledge”. The presentation started with an introduction about what the Internet Archive is, then, she gave us some information about what are the archived materials in Internet Archive for now: &lt;a href="http://www.archive.org/details/texts"&gt;Text&lt;/a&gt; (+2.9M books),  &lt;a href="http://www.archive.org/details/movies"&gt;Moving Images&lt;/a&gt; (+542,500 items),  &lt;a href="http://www.archive.org/details/audio"&gt;Audio&lt;/a&gt; (+950,000 items), Television broadcast (+1M hours), &lt;a href="http://www.archive.org/web/web.php"&gt;Web Pages&lt;/a&gt; (+150 billion pages). Moreover, she gave an overview about some of the special collections such as &lt;/span&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;&lt;a href="http://www.archive-it.org/k12/"&gt;K-12 students&lt;/a&gt; and &lt;/span&gt;&lt;span style=";font-family:Calibri;font-size:100%;" class="Apple-style-span"  &gt;&lt;a href="http://nasaimages.org/"&gt;NASA images&lt;/a&gt;.&lt;/span&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;span style=";font-family:Calibri;font-size:100%;"  &gt;After that, Kris explained the common collection strategies that are used by the Internet Archive to crawl the web. Frequently, they are doing a broad survey for the wide range domains such as .com, .net, .org, etc. They also considered the frequency of change for these websites and gave more support to the sites without archiving capabilities. &lt;/span&gt;&lt;span style=";font-family:Calibri;font-size:100%;" class="Apple-style-span"  &gt;Internet Archive has a special focus on the exhaustive websites when the web master decided to shutdown the website and would like to take a snapshot for the last time (for example, &lt;a href="http://geocities.yahoo.com/"&gt;geocities&lt;/a&gt;).&lt;/span&gt;&lt;span style=";font-family:Calibri;font-size:100%;" class="Apple-style-span"  &gt; Another strategy is crawling by topics or specific collections based on a feedback from researchers or experts in this topic. In general, the key inputs for the URIs seeds are nominations from various partners (e.g., domain experts, trusted directories, Wikipedia)&lt;/span&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;.&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;div  style="margin: 0px;font-family:times new roman;"&gt;&lt;span style="font-size:100%;"&gt;Kris explained the methods of access to the web archive. The default method is "Host based" by browsing the website as it was using the &lt;a href="http://www.waybackmachine.org/"&gt;WayBack Machine&lt;/a&gt;. &lt;/span&gt;&lt;span style="font-size:100%;"&gt;In addition &lt;/span&gt;&lt;span style="font-size:100%;"&gt;to other novel techniques such as full-text search and metadata/catalog look-ups; building an API's mirroring UI based access is also undergoing.&lt;/span&gt;&lt;/div&gt;&lt;div  style="font-family:times new roman;"&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style=";font-family:Calibri;font-size:100%;"  &gt;Finally, she gave an overview about some special projects such as: &lt;/span&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;&lt;br /&gt;&lt;/span&gt;&lt;ul  style="font-family:times new roman;"&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;Data mining and extraction&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;Link domain of &lt;a href="https://webarchive.jira.com/wiki/display/search/20th+Century+Find+v2"&gt;20thCF&lt;/a&gt; or of &lt;/span&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;an entire domain (e.g., .uk) from 1996-2010&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;Dynamic, on-demand archiving of video and &lt;/span&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;Wikipedia annotation&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:100%;"&gt;Semantic data extraction, metadata services&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;The colloquium was recorded and is available below.&lt;br /&gt;&lt;br /&gt;&lt;/span&gt;&lt;div class="separator"  style="clear: both; text-align: center;font-family:times new roman;"&gt;&lt;span style="font-size:100%;"&gt;&lt;a href="https://www.cs.odu.edu/vod.php?id=36"&gt;&lt;img src="http://4.bp.blogspot.com/-zhLmVnDuOv0/TjLfq7n7B3I/AAAAAAAAAAU/wsBN8_4123Q/s1600/krisVideo.png" border="0" /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;&lt;span style=";font-family:times new roman;font-size:100%;"  &gt;&lt;br /&gt;-- Ahmed AlSum&lt;br /&gt;&lt;/span&gt;&lt;div class="separator" style="clear: both; text-align: center; font-family: verdana;"&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3423037095236952124?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3423037095236952124/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/08/2011-07-26-universal-access-to-all.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3423037095236952124'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3423037095236952124'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/08/2011-07-26-universal-access-to-all.html' title='2011-07-26: Universal Access to All Knowledge'/><author><name>Ahmed AlSum</name><uri>http://www.blogger.com/profile/14971715056697082883</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-CkWBZAwpq6I/TjLgoPE7A9I/AAAAAAAAAAY/aam8OW_m2v0/s72-c/iasummary.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-2356812328259415863</id><published>2011-07-28T12:09:00.016-04:00</published><updated>2011-09-09T11:33:44.610-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='404 error'/><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='Irony'/><title type='text'>2011-07-28: Web Video Discussing Preservation Disappears After 24 Hours</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-60NJkJSWdxo/TjGLFUNzcLI/AAAAAAAAAfc/gWM2t3SvhTM/s1600/snap_47.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 134px;" src="http://4.bp.blogspot.com/-60NJkJSWdxo/TjGLFUNzcLI/AAAAAAAAAfc/gWM2t3SvhTM/s200/snap_47.png" alt="" id="BLOGGER_PHOTO_ID_5634437532259414194" border="0" /&gt;&lt;/a&gt;One week ago (July 21, 2011) I was fortunate enough to be invited to speak about Web Archiving on &lt;a href="http://www.ctv.ca/canadaam/"&gt;Canada AM&lt;/a&gt;, sort of like the &lt;a href="http://en.wikipedia.org/wiki/Today_%28NBC_program%29"&gt;Today Show&lt;/a&gt; or &lt;a href="http://en.wikipedia.org/wiki/Good_Morning_America"&gt;Good Morning America&lt;/a&gt; in the US.  I was asked to appear on the program in part because of the &lt;a href="http://www.washingtonpost.com/local/education/old-dominion-u-professor-is-trying-to-save-internet-history/2011/07/13/gIQAS1EYKI_story.html"&gt;July 17, 2011 article&lt;/a&gt; in the Washington Post, which followed a &lt;a href="http://chronicle.com/blogs/wiredcampus/old-dominion-u-researchers-ask-how-much-of-the-web-is-archived/32068"&gt;July 6, 2011 blog post&lt;/a&gt; for the Chronicle of Higher Education, which was based on a &lt;a href="http://ws-dl.blogspot.com/2011/06/2011-06-23-how-much-of-web-is-archived.html"&gt;June 23, 2011 blog post&lt;/a&gt; about our &lt;a href="http://ws-dl.blogspot.com/2011/07/2011-07-05-jcdl-2011-trip-report.html"&gt;JCDL 2011&lt;/a&gt; paper "&lt;a href="http://www.cs.odu.edu/%7Emweigle/papers/ainsworth-jcdl11.pdf"&gt;How Much of the Web is Archived?&lt;/a&gt;".  In other words,  the process went like this: step 1 - get lucky &amp;amp; step 2 - let &lt;a href="http://en.wikipedia.org/wiki/Preferential_attachment"&gt;preferential attachment&lt;/a&gt; do its thing.&lt;br /&gt;&lt;br /&gt;I was able to do the appearance in Washington DC, while attending the &lt;a href="http://ws-dl.blogspot.com/2011/07/2011-07-25-ndsandiipp-partner-meetup.html"&gt;NDSA/NDIIPP 2011 Partner Meetup&lt;/a&gt;.  The morning of July 21, I took a taxi to an ABC studio in DC, did the interview (about 4 minutes) and took a taxi back to the conference in time to make the morning session.   I had not been on TV before and was both nervous and excited.  The local and Canadian crew made the entire experience painless and the whole interview was over right as I started to get comfortable.&lt;br /&gt;&lt;br /&gt;Given the short time, I tried to stress two topics: the first is that the ODU/LANL &lt;a href="http://mementoweb.org/"&gt;Memento&lt;/a&gt; project is not a new archive, but rather a way to leverage all existing web archives at once (this is a common misunderstanding we've experienced in the past).  The other point I tried to make was that much of our cultural discourse occurs on the web and we should try to preserve as much of that as possible (including things like &lt;a href="http://icanhascheezburger.com/"&gt;lolcats&lt;/a&gt;) because we (collectively) do a bad job at predicting what will be important in the future.  Shortly after airing, the video segments was available on-line at:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;http://www.ctv.ca/canadaam/?video=504307&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;As the URI suggests, this is the homepage for Canada AM (http://www.ctv.ca/canadaam/), but with an argument ("?video=504307") specifying which video segment (i.e., each individual story -- not the entire morning's show) to display.  I shared the video URI with colleagues, friends, and family and was enjoying my 4 minutes of fame (I should still have &lt;a href="http://en.wikipedia.org/wiki/15_minutes_of_fame"&gt;11&lt;/a&gt; left in the bank).  I had not made a local copy of the video because their web site obfuscated the actual URI of the streaming video, I had to finish the rest of the conference and drive back to Norfolk, and I thought I would have the time to figure it out after I returned.&lt;br /&gt;&lt;br /&gt;So imagine my surprise on Friday at about lunch time when I reload the URI and do not see the video, but instead a newly redesigned Canada AM web page!  The video of me making the point that we should save web resources lasted approximately 24 hours.   I don't mean to seem ungrateful for the opportunity Canada AM afforded me, but as a professor I try to see everything as a teaching opportunity, so here it goes...&lt;br /&gt;&lt;br /&gt;Sometime on Friday morning (July 22), the entire web site was redesigned and the old URIs no longer worked (cf. "&lt;a href="http://www.w3.org/Provider/Style/URI.html"&gt;Cool URIs Don't Change&lt;/a&gt;").  The video id was an argument and is now silently ignored, so even worse than a &lt;a href="http://en.wikipedia.org/wiki/HTTP_404"&gt;404&lt;/a&gt; you now get a "&lt;a href="http://en.wikipedia.org/wiki/HTTP_404#Soft_404"&gt;soft 404&lt;/a&gt;":&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;% curl -I http://www.ctv.ca/canadaam/\?video=504307&lt;/span&gt; &lt;span style="font-family:courier new;"&gt;&lt;br /&gt;HTTP/1.1 200 OK&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Server: Apache/2.2.14 (Ubuntu)&lt;/span&gt; &lt;span style="font-family:courier new;"&gt;&lt;br /&gt;Content-Type: text/html&lt;/span&gt; &lt;span style="font-family:courier new;"&gt;&lt;br /&gt;X-Varnish: 2550613724&lt;/span&gt; &lt;span style="font-family:courier new;"&gt;&lt;br /&gt;Date: Thu, 28 Jul 2011 16:55:48 GMT&lt;/span&gt; &lt;span style="font-family:courier new;"&gt;&lt;br /&gt;Connection: keep-alive&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The soft 404 means people clicking on the original video link in Facebook, Twitter, email, etc. won't even see an error page -- they see the new site, but without the video or indication that the video is missing.  The new site has a link titled "watch full shows", with the URI:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;http://www.ctv.ca/canadaAMPlayer/index.html&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;Which is textually described as the "Canada AM Video Archive", but the archive begins on July 22, 2011 -- one day after my appearance!  The new segments are available at URIs of the form:&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;http://www.ctv.ca/canadaAMPlayer/index.html?video=504933&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The older videos are not available, not even as an argument to the new URI, which also returns a soft 404 (i.e., the video is not available despite the 200 response):&lt;br /&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;% curl -I http://www.ctv.ca/canadaAMPlayer/index.html\?video=504307&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;HTTP/1.1 200 OK&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Server: Apache/2.2.14 (Ubuntu)&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Content-Type: text/html&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;X-Varnish: 2550976182&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Date: Thu, 28 Jul 2011 17:35:35 GMT&lt;/span&gt;&lt;br /&gt;&lt;span style="font-family:courier new;"&gt;Connection: keep-alive&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;The video ids seem to be continuous (i.e., they did not appear to start over with "1"), so &lt;a href="http://httpd.apache.org/docs/2.0/misc/rewriteguide.html"&gt;URL rewriting&lt;/a&gt; could easily make all the old video URIs continue to work, unless whatever &lt;a href="http://en.wikipedia.org/wiki/Content_management_system"&gt;CMS&lt;/a&gt; that hosted those videos has been retired with no &lt;a href="http://en.wikipedia.org/wiki/Digital_preservation#Migration"&gt;migration path&lt;/a&gt; forward.&lt;br /&gt;&lt;br /&gt;Here are some screen shots of the newly redesigned home page (left) and the video archive page (right) from July 22:&lt;br /&gt;&lt;br /&gt;&lt;div style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-k6GI5hi620g/TjGWcHD6reI/AAAAAAAAAfs/ior8gOItwP4/s1600/canada-am.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 151px;" src="http://2.bp.blogspot.com/-k6GI5hi620g/TjGWcHD6reI/AAAAAAAAAfs/ior8gOItwP4/s200/canada-am.png" alt="" id="BLOGGER_PHOTO_ID_5634450018493181410" border="0" /&gt;&lt;/a&gt;&lt;a href="http://1.bp.blogspot.com/-9c1Tf-jGXTs/TjGWkfVI6QI/AAAAAAAAAf0/tE1HZidUQ6A/s1600/canada-am-archive.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 151px;" src="http://1.bp.blogspot.com/-9c1Tf-jGXTs/TjGWkfVI6QI/AAAAAAAAAf0/tE1HZidUQ6A/s200/canada-am-archive.png" alt="" id="BLOGGER_PHOTO_ID_5634450162446821634" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;Of course, I did not think to make a screen shot of the original home page, or the page of my video because I thought it would live longer than 24 hours!  I was able to find a recent (December 8, 2010) copy in the &lt;a href="http://web.archive.org/"&gt;Internet Archive's Wayback Machine&lt;/a&gt;:&lt;br /&gt;&lt;br /&gt;&lt;a style="font-family: courier new;" href="http://web.archive.org/web/20101208084455/http://www.ctv.ca/canadaam/"&gt;http://web.archive.org/web/20101208084455/http://www.ctv.ca/canadaam/&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;And I also pushed the two pages above to &lt;a href="http://webcitation.org/"&gt;WebCite&lt;/a&gt;, which nicely contrasts two styles of giving URIs for archived pages (&lt;a href="http://www.mementoweb.org/guide/quick-intro/"&gt;URI-M in Memento parlance&lt;/a&gt;):&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;&lt;a style="font-family: courier new;" href="http://www.webcitation.org/60NizRC0o"&gt;http://www.webcitation.org/60NizRC0o&lt;/a&gt;&lt;br /&gt;&lt;a style="font-family: courier new;" href="http://www.webcitation.org/60Nj60H8D"&gt;http://www.webcitation.org/60Nj60H8D&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The IA's URIs violate the &lt;a href="http://www.w3.org/"&gt;W3C&lt;/a&gt; "good practice" of &lt;a href="http://www.w3.org/TR/webarch/#uri-opacity"&gt;URI opacity&lt;/a&gt;, but they sure are handy for humans.  WebCite actually offers both styles of URIs, for example the latter of the two URIs above is equivalent to:&lt;br /&gt;&lt;br /&gt;&lt;a style="font-family: courier new;" href="http://www.webcitation.org/query?url=http%3A%2F%2Fwww.ctv.ca%2FcanadaAMPlayer%2Findex.html&amp;amp;date=2011-07-22"&gt;http://www.webcitation.org/query?url=http%3A%2F%2Fwww.ctv.ca%2FcanadaAMPlayer%2Findex.html&amp;amp;date=2011-07-22&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;But the resulting &lt;a href="http://en.wikipedia.org/wiki/Percent-encoding"&gt;URI encoding&lt;/a&gt;, while technically correct, is not conducive to easy memorizing and exploration by humans.  Different styles of using a URI as an argument to another URI will be explored in a future blog post.&lt;br /&gt;&lt;br /&gt;Fortunately I was given a DVD of the session, from which I was able to rip a copy and upload it to YouTube, provided below with the dual interests of vanity and pedagogy.  I'm not sure about its status with respect to copyright, so it might disappear in the future as well.  It should be covered under &lt;a href="http://en.wikipedia.org/wiki/Fair_use"&gt;fair use&lt;/a&gt;, but I &lt;a href="http://www.chillingeffects.org/"&gt;would not count on it&lt;/a&gt;.  However, that is also a topic for another blog post...&lt;br /&gt;&lt;br /&gt;&lt;iframe src="http://www.youtube.com/embed/qU3vIC8hxl4" allowfullscreen="" frameborder="0" height="349" width="425"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-2356812328259415863?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/2356812328259415863/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-28-web-video-discussing.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2356812328259415863'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2356812328259415863'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-28-web-video-discussing.html' title='2011-07-28: Web Video Discussing Preservation Disappears After 24 Hours'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-60NJkJSWdxo/TjGLFUNzcLI/AAAAAAAAAfc/gWM2t3SvhTM/s72-c/snap_47.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-9214576476028678372</id><published>2011-07-21T19:05:00.190-04:00</published><updated>2011-07-25T22:55:25.846-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='NDIIPP 2011'/><category scheme='http://www.blogger.com/atom/ns#' term='YouTube'/><category scheme='http://www.blogger.com/atom/ns#' term='NDSA'/><category scheme='http://www.blogger.com/atom/ns#' term='archive'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='NDIIPP'/><category scheme='http://www.blogger.com/atom/ns#' term='Washington DC'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><category scheme='http://www.blogger.com/atom/ns#' term='Conference'/><category scheme='http://www.blogger.com/atom/ns#' term='Archive Facebook'/><category scheme='http://www.blogger.com/atom/ns#' term='Library of Congress'/><title type='text'>2011-07-25: NDSA/NDIIPP Partner Meetup 2011 Trip Report</title><content type='html'>&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-RjCID3Cy5Jc/TinDyR2PBgI/AAAAAAAAAEo/i_RYzyDgY8I/s1600/capitol.png"&gt;&lt;img style="cursor: pointer; text-align: center; margin: auto; width: 250px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://4.bp.blogspot.com/-RjCID3Cy5Jc/TinDyR2PBgI/AAAAAAAAAEo/i_RYzyDgY8I/s400/capitol.png" alt="" id="BLOGGER_PHOTO_ID_5632248077555271170" align="right" border="0" /&gt;&lt;/a&gt;The NDSA/NDIIPP (&lt;a href="http://twitter.com/ndiipp"&gt;@ndiipp&lt;/a&gt;) Partner Meetup  took place July 19-21 at the Hyatt Regency Washington on Capitol Hill in Washington, DC. Technical and non-technical joined together to form an aggregated consortium of archivists, librarians, digital media specialists and concerned parties. Three representatives from the &lt;span style=""&gt; &lt;/span&gt;&lt;a href="http://ws-dl.blogspot.com/"&gt;ODU Web Sciences and Digital Libraries group&lt;/a&gt; attended to make archivists aware of tools they had developed to accomplish the common goal of web archiving.&lt;/p&gt;  &lt;p class="MsoNormal"&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;WS-DL’s Comtributions to the&lt;span style=""&gt; &lt;/span&gt;NDSA/NDIPP Meetup&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a href="http://cs.odu.edu/%7Emkelly"&gt;&lt;b&gt;Mat Kelly&lt;/b&gt;&lt;/a&gt; presented the &lt;a href="http://www.mozilla.com/en-US/firefox/new/"&gt;Mozilla Firefox&lt;/a&gt; add-on &lt;a href="http://ws-dl.blogspot.com/2009/09/archivefacebook.html"&gt;Archive Facebook&lt;/a&gt; to a breakout group of presentations specifically targeting web archiving. The redesigned and re-architected add-on allows a user to archive the content of his/her Facebook account with the result being truly WYSIWYG versus Facebook’s native offerings of a content dump.&lt;/p&gt;&lt;div style="width: 425px; margin: auto;" id="__ss_8672852"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/matkelly01/ndiippndsa-2011-archive-facebook" title="NDIIPP/NDSA 2011 - Archive Facebook" target="_blank"&gt;NDIIPP/NDSA 2011 - Archive Facebook&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8672852" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;p class="MsoNormal"&gt;&lt;span style="font-weight: bold;"&gt;Vivens Ndatinya&lt;/span&gt; showed the workings of a tool he is currently building with his presentation,&lt;span style=""&gt;  &lt;/span&gt;“Creating Persistent Links to YouTube Music Videos”. The software serves as a medium between a user and YouTube where, if a music video has been deleted or removed, the proxy will search for a comparable or official substitute and seamlessly forward the user to the resource for which he/she was looking.&lt;/p&gt;&lt;div style="width: 425px; margin: auto;"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/matkelly01/ndiippndsa-2011-youtubelinkrestoration" title="NDIIPP/NDSA 2011 - YouTube Link Restoration" target="_blank"&gt;NDIIPP/NDSA 2011 - YouTube Link Restoration&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8672863" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;p class="MsoNormal"&gt;&lt;b&gt;&lt;a href="http://cs.odu.edu/%7Emln"&gt;Michael Nelson&lt;/a&gt;&lt;/b&gt; presented "How Much of the Web is Archived?", which was also presented at &lt;a href="http://ws-dl.blogspot.com/2011/07/2011-07-05-jcdl-2011-trip-report.html"&gt;JCDL 2011&lt;/a&gt;. By examining links on &lt;a href="http://www.dmoz.org/"&gt;DMOZ&lt;/a&gt;, &lt;a href="http://delicious.com/"&gt;delicious&lt;/a&gt;, &lt;a href="http://bit.ly/"&gt;bit.ly&lt;/a&gt; and search engines and cross-referencing the links with various archives, they were able to establish the criteria for likelihood of archival rate and conclude the amount of the web that is archived with, "It depends on the source of the URIs".&lt;/p&gt;&lt;div style="width: 425px; margin: auto;"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/aalsum/jcdl-howmuchisarchived" title="How Much of the Web is Archived? JCDL 2011" target="_blank"&gt;How Much of the Web is Archived? JCDL 2011&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8341312" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;br /&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;The Speakers&lt;/span&gt;&lt;/b&gt;&lt;br /&gt;&lt;p class="MsoNormal"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://4.bp.blogspot.com/-AGOiYN-pt0Y/Tim33j_SL6I/AAAAAAAAADo/Cozjn3aJEvk/s320/Martha%2BAnderson.png" alt="" id="BLOGGER_PHOTO_ID_5632234974184877986" align="right" /&gt;&lt;b&gt;Martha Anderson&lt;/b&gt; (&lt;a href="http://twitter.com/MarthaBunton"&gt;@MarthaBunton&lt;/a&gt;), the director of program management for the &lt;a href="http://www.digitalpreservation.gov/"&gt;N&lt;/a&gt;&lt;a href="http://www.digitalpreservation.gov/"&gt;ational Digital Information Infrastructure and Preservation Program&lt;/a&gt; (NDIIPP), exclaimed that “We are growing!” in her introductory presentation, citing the increasing numbers of members in the group and the larger breadth of the scope of the members’ specializations. “ She introduced the theme of the conference "Make It Work" and stated that the conference’s 3 days were broken up by the respective keywords of “Open”, in that all presenters were committed to openness, “Solve”, where all speakers presented studies on creative approaches toward solving their problems and “Connect”, which had a focus on community building and relationships.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;img src="http://3.bp.blogspot.com/-p28qTJ8uLoQ/Tim1IHzTmtI/AAAAAAAAADg/a_puXK8xfVQ/s320/IMG_3811_v4_reasonably_small.jpg" style="margin: 0pt 0pt 10px 10px; cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" alt="" id="BLOGGER_PHOTO_ID_5632231960141339346" align="right" /&gt;&lt;b&gt;Tim O’Reilly &lt;/b&gt;(&lt;a href="http://twitter.com/timoreilly"&gt;@timoreilly&lt;/a&gt;), founder and CEO of &lt;a href="http://oreilly.com/"&gt;O’Reilly Media Inc&lt;/a&gt;., kicked off the list of speakers providing insightful one-liners such as “Forgetting makes room for new things”, “Design more systems that have their own memory” and “We’re engaged in the wholesale destruction of our history”. He listed two of his pasts failures where his process of archiving could have been improved:&lt;/p&gt;&lt;p class="MsoNormal"&gt;In 1993 he created one of the first websites but neglected to archive it. “Things that turn out to be historic”, he stated, “aren’t deemed to be historical at the time” – a theme that reverberated through many other presentations.&lt;/p&gt;&lt;p class="MsoNormal"&gt;His second past failure of preservation was in 1998 when he attended the inaugural Open Source Summit (link?), where the term “Open Source” was officially born. Learning from his 1993 failure, he diligently built an archive and linked to all of the relevant content but neglected to deep link the archiving, which meant all of the information that was coupled with his coverage was no longer available at time of access.&lt;/p&gt;    &lt;p class="MsoNormal"&gt;O’Reilly rhetorically queried the audience, “What kind of tools do we need in the everyday practice of the digital world to encourage presentation?” He stated that we have to consider the widely divergent scenarios if we are to archive effectively. He reiterated that the tools we have should be adapted to assure that it is more likely that archived would survive when things went awry. “What matters?”, Tim stated, again referencing his two failures and answering his own question. He emphasized that our current perspective of what matters is temporally subjective and that we are likely neglecting to archive collections we now consider trivial.&lt;/p&gt;  &lt;p class="MsoNormal"&gt;To close up, Tim emphasized that there should be an exception in copyright for the sake of archiving so that our past will be preserved.&lt;/p&gt;  &lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-Q6oiLxdUW7E/Tim4KJz_LfI/AAAAAAAAADw/bfNsg7xN6FM/s1600/hr_wed-118_reasonably_small.jpg"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://3.bp.blogspot.com/-Q6oiLxdUW7E/Tim4KJz_LfI/AAAAAAAAADw/bfNsg7xN6FM/s320/hr_wed-118_reasonably_small.jpg" alt="" id="BLOGGER_PHOTO_ID_5632235293575687666" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Yancey Strickler&lt;/b&gt; (&lt;a href="http://twitter.com/ystrickler"&gt;@ystrickler&lt;/a&gt;) came on next to speak about his project &lt;a href="http://www.kickstarter.com/"&gt;Kickstarter&lt;/a&gt;, a funding platform for creative projects. Kickstarter works on an all-or-nothing approach of fund-raising where users can offer monetary support for projects they believe worthwhile with no commitment if the project fails to get funded. Yancey spoke of a tipping point in the funding process, where a large majority get funding to and sometimes beyond the threshold after attain 30% of their goal. Those that donate to the project are forbidden from being rewarded with equity but the fundee usually provides something priceless in return, like a photo for a donator from a project where a girl wished to sail around the world or the ability to be first to purchase a potentially popular iPod accessory that neglected to get traditional backing.&lt;/p&gt;  &lt;p class="MsoNormal"&gt;Kickstarter takes only a very small (5% of the raised funds) to remain sustainable but only receives these if the project gets funded. With this, Kickstarter and the projects both grow. “One day”, Yancey said, “we’ll hopefully be a cultural institution”.&lt;/p&gt;  &lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-KBeAnWrYWtU/Tim5B6pYsTI/AAAAAAAAAD4/aR_UnNLEMho/s1600/flickr_turquoise_hands_2_reasonably_small.jpg"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://3.bp.blogspot.com/-KBeAnWrYWtU/Tim5B6pYsTI/AAAAAAAAAD4/aR_UnNLEMho/s320/flickr_turquoise_hands_2_reasonably_small.jpg" alt="" id="BLOGGER_PHOTO_ID_5632236251577364786" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Michael Edson&lt;/b&gt; (&lt;a href="https://twitter.com/mpedson"&gt;@mpedson&lt;/a&gt;) of the &lt;a href="http://www.si.edu/"&gt;Smithsonian Institution&lt;/a&gt; came on next after a short break with his presentation “Let us go boldly into the present”. Michael emphasized that the time to archive is now and that “today is the future that all of the visionaries wrote about.” To do so, he gave five “design patterns” that we should exhibit to assure that the present is archived:&lt;/p&gt;&lt;ol&gt;&lt;li&gt;Extra-terrestrial Space Auditor is a concept best depicted by an extraterrestrial that examines an organization, blind to its current workings, and provokes the organization to do a self-analysis as to whether it is performing as it should in terms of business practices, HR, etc having been potentially skewed in operation by the baggage of the last epoch.&lt;/li&gt;&lt;li&gt;On Ramp and Loading Docks encourages the mindset that successful preservation is not about building infrastructure but rather creating movement.&lt;/li&gt;&lt;li&gt;Edge to Core suggests that the best work is done on the fringes of an institution where subject matter experts exist. “An organization”, Michael said, “should develop a process that brings in and bootstraps these experts so their ideas can scale.”&lt;/li&gt;&lt;li&gt;Self Awareness about organization change patterns states that there are predictable miscommunications and general crankiness in an organization between innovators and managements.&lt;/li&gt;&lt;li&gt;Focus on the mission was Michael’s observation that of the 80 to 90 organization that he had spoken to in the last few years, the ones that were not suffering their pursuit of worth know the outcomes they want in society.&lt;/li&gt;&lt;/ol&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-EEXvzkSu0j8/Tim5sbGaSPI/AAAAAAAAAEA/8RFUxhZ07M0/s1600/19136987.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://3.bp.blogspot.com/-EEXvzkSu0j8/Tim5sbGaSPI/AAAAAAAAAEA/8RFUxhZ07M0/s320/19136987.jpg" alt="" id="BLOGGER_PHOTO_ID_5632236981843544306" align="right" /&gt;&lt;/a&gt;After Michael, &lt;b&gt;Aaron Presnall&lt;/b&gt; (&lt;a href="http://www.jeffersoninst.org/blogs/aaron-presnall"&gt;blog&lt;/a&gt;) of the &lt;a href="http://www.jeffersoninst.org/"&gt;Jefferson Institute&lt;/a&gt; came on to speak about “Tools for informing public decision-making”. A continuing project of his was to assist those at the National Archives of Serbia in archiving their documents.&lt;span style=""&gt;  &lt;/span&gt;Many of these documents are of great interest, as they document the recent struggles and secession of the country and have immediate application (such as implication for war crimes) if preserved. Using the tools available, some unconventional, Aaron assisted those interested in moving from the dissolving stacks of papers to a digital form. He then built a management tool and genericized the tool to allow it to be reused in instances beyond Serbia. He has since been queried by Bosnia, who wishes to do the same as Serbia and because of the generic setup Aaron has created, the information Bosnia has to offer will not be lost.&lt;/p&gt;&lt;p class="MsoNormal"&gt;With Aaron being the last presenter for the day, &lt;a href="http://www.uvasci.org/about-us/abby-smith/"&gt;Abby Rumsey&lt;/a&gt; moderated a panel discussion/Q&amp;amp;A with all of the Day One speakers., first hoping to address Martha's question, "How do we make it work?" She first asked Aaron how to connect demand of archiving with the supply of skill and if there is something that needs to be in-place to make these connections easier. He replied with the need to communicate the success of individual cases to much broader audience, convey the lessons learned and establish best practices for performing such an archiving session. He admitted that it's difficult "to make archiving sexy" but popularized projects such as &lt;a href="http://www.historypin.com/"&gt;History Pin&lt;/a&gt; get people thinking and both energize and popularize the task of archiving.&lt;/p&gt;&lt;p class="MsoNormal"&gt;Tim O'Reilly expounded on Aaron's reply, referencing a collection of railway edition books from the 1880s that were bounded by people that found the works both valuable and beautiful. "When some individual finds something that would otherwise be disposable and finds it beautiful and a keepsake", Tim said,  "that's a wonderful impulse for preservation". He continued, "When we allow things to be reused by individuals, it really appeals to value of fair use." He went on to speak about how intellectual property fights against preservation and what we can do to preserve things of value is to give them more freedom.&lt;/p&gt;&lt;p class="MsoNormal"&gt;Abby then questioned Michael Edson about how his approach of Edge-to-Core has had an impact on The Smithsonian. Michael gave the example of how the Smithsonian handled the inception of the world-wide web with no business process in-place. "Because the institution took a decentralized approach to managing content and ideas", he said, "there was no existing infrastructure to make order out of the web. It's been a series of opportunistic efforts to pick the pieces of the low hanging fruit and bring them to the center of the organization to achieve scalability and a greater impact."&lt;/p&gt;&lt;p class="MsoNormal"&gt;Yancey was then asked, "How do you get something where the connections are so profoundly personal into something that really scale to the level we think about with digital preservation?", citing &lt;a href="http://www.wikipedia.org/"&gt;Wikipedia&lt;/a&gt;'s scaling issues. Yancey alluded to Wikipedia's moderation challenges in terms of curation with, "What happened if I'm a guy that knows a lot about a topic you're concerned with archiving and I decide to reach out and tell you everything I know and all of the ways to be wrong? What do I get to contribute? Do I have any voice whatsoever?" Aaron replied with, "Exactly, that's a tremendous challenge and whether 80% of time you're right, 20% of time you could be fundamentally, deeply, troublingly wrong."&lt;/p&gt;&lt;p class="MsoNormal"&gt;The Q&amp;amp;A was followed up with a reception accompanied with 30-or-so poster displays. Of particular interest to the WS-DL members was the &lt;a href="https://wiki.umiacs.umd.edu/adapt/index.php/Ace:Main"&gt;&lt;b&gt;Ace Audit Manager and Integrity Management System&lt;/b&gt;&lt;/a&gt;, an integrity auditing system for archives, which would prove useful in both the &lt;i&gt;Memento&lt;/i&gt; and &lt;i&gt;Archive Facebook&lt;/i&gt; projects. This closed out day one.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-ARHvF23dgiU/TinAhe38xYI/AAAAAAAAAEI/DItyLQ3zS-4/s1600/20110509A279-crop1-295x300.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://1.bp.blogspot.com/-ARHvF23dgiU/TinAhe38xYI/AAAAAAAAAEI/DItyLQ3zS-4/s320/20110509A279-crop1-295x300.jpg" alt="" id="BLOGGER_PHOTO_ID_5632244490459465090" align="right" /&gt;&lt;/a&gt;Day two started with a presentation from &lt;b&gt;Helen Hockx-Yu&lt;/b&gt; from the &lt;a href="http://www.bl.uk/"&gt;British Library&lt;/a&gt;. "In the UK", she said, "there are tow archives - The UK web archive and the UK Government Web Archive." She spoke further that there was pending legislation that would limit the viewing of archives to on-site within the library. "Web archiving in the UK", she said "is only 10 years old at the British library - much younger than Internet Archive." One notable part of the collection, to which she said the British Library found accidentally, is the oldest archived website - that of the British Library's website from 1995, which was found stored away on a library's server.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-PJgqWtNPuvc/TinClT2kT5I/AAAAAAAAAEQ/cdO5CwNxUms/s1600/pioneer_cruse.jpg"&gt;&lt;img style="cursor: pointer; width: 150px; height: 150px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://1.bp.blogspot.com/-PJgqWtNPuvc/TinClT2kT5I/AAAAAAAAAEQ/cdO5CwNxUms/s320/pioneer_cruse.jpg" alt="" id="BLOGGER_PHOTO_ID_5632246755243610002" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Tricia Cruse&lt;/b&gt; of the &lt;a href="http://www.cdlib.org/"&gt;California Digital Library&lt;/a&gt; spoke next about "Curation approaches in a public university system", stating "We're seeing an ever-increasing amount and degree of diversity of content. While our budgets were going down, we have had to do more with less."She also spoke of &lt;a href="http://www.cdlib.org/services/uc3/ezid/"&gt;EZID&lt;/a&gt;, a system for users to create unique identifiers for their archived content; &lt;a href="http://merritt.cdlib.org/"&gt;UC3 Merritt&lt;/a&gt;, a place where collaboration for researchers can happen and data can be stored and shared and Digital Curation for Excel (DCXL), an open source Microsoft Excel add-in that allows working in Excel to be easier for versioning, archiving and applying unique identifiers.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-zZ9BjLuoaHg/TitYTOuYMHI/AAAAAAAAAFo/gHMdcMzAPiQ/s1600/brighton-jack-cropped.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://2.bp.blogspot.com/-zZ9BjLuoaHg/TitYTOuYMHI/AAAAAAAAAFo/gHMdcMzAPiQ/s200/brighton-jack-cropped.jpg" alt="" id="BLOGGER_PHOTO_ID_5632692846350315634" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Jack Brighton&lt;/b&gt; of &lt;a href="http://will.illinois.edu/"&gt;WILL&lt;/a&gt;, a radio/television station in Illinois, spoke of "Archiving at Web Speed". Jack spoke of his efforts in preserving the stations broadcasts using &lt;a href="http://pbcore.org/"&gt;PBCore&lt;/a&gt; and emphasized the need for the adaptation of the archiving process to make it as painless as possible for those that did not necessarily see the value of the content at the current time.&lt;/p&gt;&lt;br /&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-D_e1xez2ed0/TinGikoq5XI/AAAAAAAAAEw/7bpZbL1rQ5o/s1600/vershbow.png"&gt;&lt;img style="cursor: pointer; width: 128px; height: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://4.bp.blogspot.com/-D_e1xez2ed0/TinGikoq5XI/AAAAAAAAAEw/7bpZbL1rQ5o/s200/vershbow.png" alt="" id="BLOGGER_PHOTO_ID_5632251106255627634" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Ben Vershbow&lt;/b&gt; (&lt;a href="http://twitter.com/subsublibrary"&gt;@subsublibrary&lt;/a&gt;) from the &lt;a href="http://www.nypl.org/"&gt;New York Public Library&lt;/a&gt; finished up the first session of the day with his presentation, "Bringing in the Crowd". He cited a project his group created, "&lt;a href="http://menus.nypl.org/"&gt;What's on the Menu?&lt;/a&gt;", which was a crowd-sourced effort to transcribe old menus. He believes that there is an untapped reservoir of time and through crowd coordination and building datasets, people will be willing to devote their time for free.&lt;/p&gt;&lt;p class="MsoNormal"&gt;Subsequent to Ben's presentation, the crowd broke up into three groups for workshops. The three topics of the workshops were "And the winner is..: How does a community recognize achievement?", "Tales from the crypt: What are the emerging practices of large scale storage" and "Special Interest Session: Web Archiving: Pecha Kucha and discussion of emerging topics in Web archiving". Because Vivens and Mat presented at the latter of the three, the WS-DL members attended and participated in the third session.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-zxiHMOFQuog/TitYo6tjQsI/AAAAAAAAAFw/2fG3buD8dmU/s1600/mackenzie-smith-200x150.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://4.bp.blogspot.com/-zxiHMOFQuog/TitYo6tjQsI/AAAAAAAAAFw/2fG3buD8dmU/s200/mackenzie-smith-200x150.jpg" alt="" id="BLOGGER_PHOTO_ID_5632693218935259842" align="right" /&gt;&lt;/a&gt;Presentations resumed after the breakout session with the theme of Open Source Tools and Community. The first presentation was by &lt;b&gt;MacKenzie Smith&lt;/b&gt; (&lt;a href="http://www.mit.edu/%7Ekenzie/"&gt;website&lt;/a&gt;) of &lt;a href="http://www.mit.edu/"&gt;MIT&lt;/a&gt; with "&lt;a href="http://www.simile-widgets.org/exhibit3/"&gt;Exhibit3@MIT&lt;/a&gt;: Lessons learned from 10 years of the &lt;a href="http://simile.mit.edu/"&gt;Simile Projet&lt;/a&gt; for building library open source software". MacKenzie stated that "Everybody's a curator" and "If we're creating these tools for the public, how can we assure that these tools will flow into the organizations, as many die? When you're doing a project that's open source", he continued, "you need to design for that community from the beginning." MacKenzie went on to say that metrics should be used to assure that you can tell the chance of success of the open source project, you're more likely to have a sustainable project if you have an audience "outside of this room" (i.e. outside of the archiving community) and that maintenance of the code has to be done by those that are committed, not just casual developers.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-zYKx4DVVTfQ/TitZO_lp5xI/AAAAAAAAAF4/D7aHNr0lysA/s1600/sharon.png"&gt;&lt;img style="cursor: pointer; width: 128px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://2.bp.blogspot.com/-zYKx4DVVTfQ/TitZO_lp5xI/AAAAAAAAAF4/D7aHNr0lysA/s200/sharon.png" alt="" id="BLOGGER_PHOTO_ID_5632693873079346962" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Sharon Leon&lt;/b&gt; (&lt;a href="http://chnm.gmu.edu/staff/sharon-leon/"&gt;website&lt;/a&gt;) of &lt;a href="http://www.gmu.edu/"&gt;George Mason University&lt;/a&gt; then presented "&lt;a href="http://omeka.org/"&gt;Omeka&lt;/a&gt;: from digital exhibits to web publishing platform". Omeka is a plug-in based Content Management System (CMS) modeled off of Wordpress that emphasizes extensibility. Sharon repeatedly emphasized the openness of the platform and that her group "specifically fights against Flash for re-use", as wrapping content in a Flash-based application limits access to the content within. She also mentioned that in developing a grant-funded open source project, one should not spend all of the funds on the development of the project but rather should put funds toward workshops, outreach and marketing of the product.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-TkE28vVZZDw/TitVv1gINEI/AAAAAAAAAFg/FHieWXC6iMA/s1600/kimpton_200.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; height: 128px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://1.bp.blogspot.com/-TkE28vVZZDw/TitVv1gINEI/AAAAAAAAAFg/FHieWXC6iMA/s200/kimpton_200.jpg" alt="" id="BLOGGER_PHOTO_ID_5632690039260984386" align="right" /&gt;&lt;/a&gt;&lt;b&gt;Michele Kimpton&lt;/b&gt; (&lt;a href="http://www.digitalpreservation.gov/partners/pioneers/detail_kimpton.html"&gt;website&lt;/a&gt;) spoke of ways to go beyond grant funding once it's exhausted with "Building and sustaining open source communities through the life cycle: &lt;a href="http://www.dspace.org/"&gt;Dspace&lt;/a&gt;, Fedora and &lt;a href="http://www.duraspace.org/duracloud.php"&gt;DuraCloud&lt;/a&gt; case studies". Her group has create &lt;a href="http://expertvoices.nsdl.org/duraspace/2011/07/21/national-digital-stewardship-alliance-ndsa-members-gather-in-washington%E2%80%93part-one/"&gt;a write-up&lt;/a&gt; on the Meetup.&lt;/p&gt;&lt;p class="MsoNormal"&gt;Following Michele was another breakout session of concurrent workshops with each having the topics of "Tools at risk", "I can haz standardz" and "Developing cutting-edge internship programs in digital preservation: What are the essential elements?". The WS-DL group attended "I can haz standarz", which disappointingly was more about the inability of the non-technical in building a tool for data management rather than about the standards themselves. As the group were all of technical mind, this was clearly the wrong workshop of the three to attend.&lt;/p&gt;&lt;p class="MsoNormal"&gt;After another short break was a third set of concurrent workshops: "Digital preservation in a box: What are the key resources for digital preservation and education and outreach?", "Slaying the dragons: What is at risk and how do we rescue it?" and "The Challenge challenge: What are ways we can spark digital preservation innovation". The WS-DL group attended the third of the three. There, the attendees were broken into groups with each group being tasked to discuss a single topic in-depth with varying concerns in each group. Unlike the previous workshop, one topic was specifically technical - that of investigating how one assures archive integrity from a host and how to go about performing an audit on the collections stored. The WS-DL group along with Michelle Gallinger (&lt;a href="http://twitter.com/mgallinger"&gt;@mgallinger&lt;/a&gt;), Professor Micah Beck (&lt;a href="http://web.eecs.utk.edu/%7Embeck/"&gt;website&lt;/a&gt;), Mike Smorul (&lt;a href="http://twitter.com/msmorul"&gt;@msmorul&lt;/a&gt;) and a couple others devised the Storage Ping concept, which would require those that host collections to enable a client induced check on the server's collection integrity.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-n0oSNnpZw10/TitVf_2QFyI/AAAAAAAAAFY/30A-LM9_rBw/s1600/rosenthal2-rev.jpg"&gt;&lt;img style="cursor: pointer; width: 100px; height: 111px; padding: 5px; border: 1px solid rgb(153, 153, 153);" src="http://1.bp.blogspot.com/-n0oSNnpZw10/TitVf_2QFyI/AAAAAAAAAFY/30A-LM9_rBw/s200/rosenthal2-rev.jpg" alt="" id="BLOGGER_PHOTO_ID_5632689767160223522" align="right" /&gt;&lt;/a&gt;Day 3 started out with an introduction by Martha Anderson and the followed with the first presenter, &lt;b&gt;David Rosenthal&lt;/b&gt; (&lt;a href="http://lockss.stanford.edu/lockss/David_S.H._Rosenthal"&gt;website&lt;/a&gt;) of &lt;a href="http://stanford.edu/"&gt;Stanford University&lt;/a&gt; on "Cloud Storage for LOCKSS Boxes". LOCKSS (Lots Of Copies Keeps Stuff Safe) boxes are dedicated computers with local storage that communicate with each other and repair any damages of data. David discussed challenges of speed he encountered when developing his system and conveyed a method of assuring integrity of data and assurance of data's existence on a remote server by prepending a nonce. He has recently been working with students at &lt;a href="http://www.cmu.edu/"&gt;Carnegie Melon University&lt;/a&gt; to develop a crawling process that he described as being "a pretty robust approach to form filling." He also expressed some difficulty he has had in the past with archiving AJAX-based contents but emphasized that his archiving process was different than others', as he does not use &lt;a href="http://crawler.archive.org/"&gt;Heritrix&lt;/a&gt;, the crawler used by &lt;a href="http://www.archive.org/"&gt;The Internet Archive&lt;/a&gt;.&lt;/p&gt;&lt;p class="MsoNormal"&gt;After David, &lt;b&gt;Cal Lee&lt;/b&gt; (&lt;a href="http://www.ils.unc.edu/callee/"&gt;website&lt;/a&gt;) of &lt;a href="http://www.unc.edu/index.htm"&gt;UNC Chapel Hill&lt;/a&gt; analyzed the four NDIIPP State projects:  &lt;/p&gt;&lt;ol&gt;&lt;li&gt;&lt;a href="http://www.digitalpreservation.gov/partners/states_az/states_az.html"&gt;Persistent Digital Archives and Library System&lt;/a&gt; (PeDALS) by Arizona&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.digitalpreservation.gov/partners/states_mn/states_mn.html"&gt;A Model Technological and Social Architecture for the Preservation of State Government Digital Information&lt;/a&gt; by Minnesota Historical Society&lt;/li&gt;&lt;li&gt;Geomap (GIS Data headed by North Carolina for Center Geographic Information and Analysis)&lt;/li&gt;&lt;li&gt;Multi-state Preservation Consortium by Washington State Archives&lt;/li&gt;&lt;/ol&gt;&lt;p class="MsoNormal"&gt;The questions he asked about each projects included: &lt;/p&gt;&lt;ul&gt;&lt;li&gt;What are the main factors that drove the project in the first place?&lt;/li&gt;&lt;li&gt;What brought these about?&lt;/li&gt;&lt;li&gt;Who was involved and why?&lt;/li&gt;&lt;li&gt;What were the activities they engaged in before this?&lt;/li&gt;&lt;/ul&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-kgSOenWnVWg/TitU4OJTQNI/AAAAAAAAAFQ/Hx9ehlJ2muk/s1600/Bob_Horton_WEB.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://1.bp.blogspot.com/-kgSOenWnVWg/TitU4OJTQNI/AAAAAAAAAFQ/Hx9ehlJ2muk/s200/Bob_Horton_WEB.jpg" alt="" id="BLOGGER_PHOTO_ID_5632689083803451602" align="right" /&gt;&lt;/a&gt;&lt;p&gt;&lt;/p&gt;&lt;p class="MsoNormal"&gt;Following Cal was &lt;b&gt;Robert Horton&lt;/b&gt; from the Minnesota Historical Society who presented his slide-less report of his NDIIPP-sponsored project.  Cal spoke of a soon-to-be enacted uniform law for the preservation and authentication and access to electronic legislative records. The legislation will define the required usage of digital Signatures to sign all legislative content online.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;b&gt;Peter Krogh&lt;/b&gt; (&lt;a href="http://twitter.com/peterkrogh"&gt;@peterkrogh&lt;/a&gt;) of the American Society of Media Photographers spoke next with, "Extending the reach of &lt;a href="http://www.blogger.com/www.dpBestflow.org"&gt;www.dpBestflow.org&lt;/a&gt;". Peter had been investigating means of collaboration and methods to get people to archive by conveying the task of archiving in a way that will appeal to the would-be archivist. &lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-uJ5lnlR57lI/TitNB_qOBEI/AAAAAAAAAFA/o1PV8N5Aw8E/s1600/n565267130_1681489_7665.jpg"&gt;&lt;img style="cursor: pointer; width: 124px; height: 200px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://3.bp.blogspot.com/-uJ5lnlR57lI/TitNB_qOBEI/AAAAAAAAAFA/o1PV8N5Aw8E/s200/n565267130_1681489_7665.jpg" alt="" id="BLOGGER_PHOTO_ID_5632680455620658242" align="right" /&gt;&lt;/a&gt;After a break, summaries of the &lt;a href="http://ws-dl.blogspot.com/2010/12/2010-12-06-memento-wins-2010-digital.html"&gt;2010 DPA finalists&lt;/a&gt; sponsored by the Library of Congress were presented.  WS-DL's own &lt;b&gt;Michael L. Nelson&lt;/b&gt; (&lt;a href="http://cs.odu.edu/%7Emln"&gt;website&lt;/a&gt;) reported on the &lt;a href="http://mementoweb.org/"&gt;Memento&lt;/a&gt; project (joint work with &lt;a style="font-weight: bold;" href="http://twitter.com/hvdsomp"&gt;Herbert Van de Sompel&lt;/a&gt; (who gave the &lt;a href="http://www.dpconline.org/advocacy/awards/dp-award-2010"&gt;original presentation&lt;/a&gt; in London in December 2010) and &lt;a style="font-weight: bold;" href="http://twitter.com/azaroth42"&gt;Robert Sanderson&lt;/a&gt; of &lt;a href="http://library.lanl.gov/"&gt;LANL&lt;/a&gt;) which was referenced multiple times by other presenters throughout the meetup. Dr. Nelson stated that there is currently a disconnect in viewing web archives, as there is no seamless way to go from the past and the present. Memento overcomes "being stuck in the perpetual now" by leveraging content that currently exists in the web archives and provides a bi-directional means to view different versions of a web site on-the-fly. Michael stated that Memento does not create web archives but rather puts the notion of time onto the web.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-A3qJBdyeXCc/TitMlbw6grI/AAAAAAAAAE4/gOp874xw2-Q/s1600/berman.jpg"&gt;&lt;img style="cursor: pointer; width: 120px; height: 160px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://1.bp.blogspot.com/-A3qJBdyeXCc/TitMlbw6grI/AAAAAAAAAE4/gOp874xw2-Q/s200/berman.jpg" alt="" id="BLOGGER_PHOTO_ID_5632679964948726450" align="right" /&gt;&lt;/a&gt;Following Michael's presentation was &lt;b&gt;Fran Berman &lt;/b&gt;(&lt;a href="http://www.digitalpreservation.gov/partners/pioneers/detail_berman.html"&gt;website&lt;/a&gt;)&lt;b&gt; &lt;/b&gt;of Rensselaer Polytechnic Institute with "Economics and Digital Preservation", a final report of the &lt;a href="http://brtf.sdsc.edu/"&gt;Blue Ribbon Task Force&lt;/a&gt; (BRTF), whose mission is to promote sustainable digital preservation and access. Fran spoke of BRTF's investigation of the technical, economical and social problem. "Infrastructure is not free", she said, "and the preservation and access to our data is not free. Because it is not free and because there are so many interesting solutions, you see it as a multivariate problem. " She stated that the Task Force wanted to do a deep dive into the economics of the problem of cost for digital preservation. &lt;/p&gt;&lt;p class="MsoNormal"&gt;"Our charge was to do roughly three things", Fran enumerated:&lt;/p&gt;&lt;ol&gt;&lt;li&gt;Assemble a representative group of experts with broad perspective and influence.&lt;/li&gt;&lt;li&gt;Look at the problem space: how can we structure it and understand us in a way that helps us take action.&lt;/li&gt;&lt;li&gt;Come up with actionable recommendations.&lt;/li&gt;&lt;/ol&gt;&lt;p class="MsoNormal"&gt;The BRTF created a report with it recommendations.&lt;/p&gt;&lt;p class="MsoNormal"&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-gi886W2vyO8/TitUhO_fGUI/AAAAAAAAAFI/Lm7k_Gmnt_A/s1600/109642811.jpg"&gt;&lt;img style="cursor: pointer; width: 128px; border: 1px solid rgb(153, 153, 153); padding: 5px;" src="http://1.bp.blogspot.com/-gi886W2vyO8/TitUhO_fGUI/AAAAAAAAAFI/Lm7k_Gmnt_A/s200/109642811.jpg" alt="" id="BLOGGER_PHOTO_ID_5632688688893729090" align="right" /&gt;&lt;/a&gt;The final presentation of the conference was by &lt;b&gt;Kari Kraus &lt;/b&gt;(&lt;a href="http://twitter.com/karikraus"&gt;@karikraus&lt;/a&gt;) of the &lt;a href="http://www.umd.edu/"&gt;University of Maryland&lt;/a&gt; with "Preserving Virtual Worlds" (&lt;a href="http://www.lis.illinois.edu/people/faculty/jmcdonou"&gt;&lt;span style="font-weight: bold;"&gt;Jerry McDonough&lt;/span&gt;&lt;/a&gt; gave the original presentation in London). Kari spoke of her attempts at preserving virtual worlds with repeatedly referencing example from &lt;a href="http://secondlife.com/"&gt;Second Life&lt;/a&gt;. The project was a multi-institution, multi-disciplinary project by &lt;a href="http://www.illinois.edu/"&gt;University of Illinois at Urbana-Champaign&lt;/a&gt;, Stanford University, &lt;a href="http://www.rit.edu/"&gt;Rochester Institute of Technology&lt;/a&gt; and The University of Maryland that investigating preserving virtual worlds for their aesthetic merit as well as their economic significance. "We believe there is tremendous cultural importance to these artifacts.", she said, "We believe video games represent the limit case of what we can do with digital preservation. If we can figure out how to save a classic first-person shooter game like &lt;a href="http://www.idsoftware.com/games/doom/doom-ultimate/"&gt;Doom&lt;/a&gt;, we'll have a better chance of preserving computational simulations of genetic evolution or climate change or the galactic behavior of star systems."&lt;/p&gt;&lt;p class="MsoNormal"&gt;She said that their mission was very practical: they needed to ingest game bits into institutional repositories and provide packaging standards for doing that. Other examples of virtual worlds she mentioned were investigated were &lt;a href="http://pdp-1.computerhistory.org/pdp-1/?f=theme&amp;amp;s=4&amp;amp;ss=3"&gt;Spacewar&lt;/a&gt;, Adventure (interactive fiction) and Mystery House (interactive fiction) among others.&lt;/p&gt;&lt;b&gt;&lt;span class="Apple-style-span"&gt;In Closing&lt;/span&gt;&lt;/b&gt;&lt;p class="MsoNormal"&gt;Neither of the WS-DL student presenters had presented at a meetup/conference of this caliber before, which made the experience more than worthwhile. Much was learned about the various efforts of the archiving community and WS-DL's projects gained exposure. Further, we were made aware of others' efforts and found some resources that we hope to integrate into our research in the near future.&lt;br /&gt;&lt;/p&gt;&lt;br /&gt;— Mat Kelly&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-9214576476028678372?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/9214576476028678372/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-25-ndsandiipp-partner-meetup.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9214576476028678372'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9214576476028678372'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-25-ndsandiipp-partner-meetup.html' title='2011-07-25: NDSA/NDIIPP Partner Meetup 2011 Trip Report'/><author><name>Mat Kelly</name><uri>http://www.blogger.com/profile/00028636665413750827</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='http://2.bp.blogspot.com/-cEbC3yb_wMU/TiiwV__8X1I/AAAAAAAAAC0/AKj3uvCPnqs/s220/39168_10100480714860381_2004483_70553125_1840790_n%2B%25281%2529.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-RjCID3Cy5Jc/TinDyR2PBgI/AAAAAAAAAEo/i_RYzyDgY8I/s72-c/capitol.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6881160951317640023</id><published>2011-07-21T17:48:00.004-04:00</published><updated>2011-07-22T12:41:13.213-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='LIBER'/><category scheme='http://www.blogger.com/atom/ns#' term='ORE'/><category scheme='http://www.blogger.com/atom/ns#' term='OAC'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='RDF'/><title type='text'>2011-07-21: Towards a Machine-Actionable Scholarly Communication System</title><content type='html'>I've told all the members of my research group they should watch this, so I thought I might as well make the same recommendation to the rest of the world... &lt;a href="http://twitter.com/hvdsomp"&gt;Herbert Van de Sompel&lt;/a&gt; presented "Towards a Machine-Actionable Scholarly Communication System" at &lt;a href="http://bibliotecnica.upc.edu/LIBER2011/"&gt;LIBER 2011&lt;/a&gt; in Barcelona, Spain on June 30, 2011.&lt;br /&gt;&lt;br /&gt;You really have to simultaneously watch the video and review the slides to get the full impact of the presentation.  The first part is a succinct review of various projects, but starting at  slide 16 ("nanopublications") things really get interesting.  Well worth the 40 minute investment.&lt;br /&gt;&lt;br /&gt;&lt;div style="width: 425px;" id="__ss_8494633"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/hvdsomp/towards-a-machineactionable-scholarly-communication-system" title="Towards a Machine-Actionable Scholarly Communication System" target="_blank"&gt;Towards a Machine-Actionable Scholarly Communication System&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8494633" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt; &lt;div style="padding: 5px 0pt 12px;"&gt; View more &lt;a href="http://www.slideshare.net/" target="_blank"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/hvdsomp" target="_blank"&gt;Herbert Van de Sompel&lt;/a&gt; &lt;/div&gt; &lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;embed src="http://upcommons.upc.edu/video/videoplayer/player.swf" type="application/x-shockwave-flash" pluginspage="http://www.macromedia.com/go/getflashplayer" allowfullscreen="true" flashvars="file=http://upcommons.upc.edu/video/bitstream/2099.2/2520/1/sompel.mp4" height="234" width="416"&gt;&lt;/embed&gt;&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6881160951317640023?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6881160951317640023/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-21-towards-machine-actionable.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6881160951317640023'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6881160951317640023'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-21-towards-machine-actionable.html' title='2011-07-21: Towards a Machine-Actionable Scholarly Communication System'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-457659956168851332</id><published>2011-07-05T17:23:00.007-04:00</published><updated>2011-07-14T09:20:08.701-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='JCDL'/><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='Synchronicity'/><category scheme='http://www.blogger.com/atom/ns#' term='JCDL 2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Library of Congress'/><category scheme='http://www.blogger.com/atom/ns#' term='Internet archive'/><title type='text'>2011-07-05: JCDL 2011 Trip Report</title><content type='html'>&lt;div&gt;&lt;strong&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-UH8YmXdTZIo/Th43OiRZBrI/AAAAAAAAACo/jsAWzyko-Fk/s1600/jcdl2011-header-banner.jpg"&gt;&lt;img id="BLOGGER_PHOTO_ID_5628997307117143730" style="display: block; margin: 0px 0pt 0pt; text-align: center; width: 100%; height: 212px;" src="http://4.bp.blogspot.com/-UH8YmXdTZIo/Th43OiRZBrI/AAAAAAAAACo/jsAWzyko-Fk/s320/jcdl2011-header-banner.jpg" alt="" border="0" /&gt;&lt;/a&gt;&lt;/strong&gt;&lt;/div&gt;&lt;p&gt;&lt;a href="http://www.jcdl2011.org/"&gt;JCDL 2011&lt;/a&gt; (&lt;a href="https://twitter.com/#%21/search/%23jcdl2011"&gt;#jcdl2011&lt;/a&gt;) was held June 13–16 in Ottawa, Ontario, Canada.  The weather was beautiful and the conference sessions wonderful.  The &lt;a href="http://ws-dl.blogspot.com/"&gt;ODU Web Sciences and Digital Libraries&lt;/a&gt; team was fortunate enough to to have six of its members attend, present three short papers, and demonstrate the &lt;a href="https://addons.mozilla.org/en-US/firefox/addon/synchronicity/"&gt;Synchronicity&lt;/a&gt; Firefox extension.&lt;/p&gt;&lt;p&gt;&lt;strong&gt;Our Contributions to JCDL 2011&lt;/strong&gt;&lt;/p&gt;&lt;p&gt;&lt;strong&gt;&lt;/strong&gt;&lt;a href="http://webspace.cs.odu.edu/%7Eaalsum/index.htm"&gt;Ahmed Alsum&lt;/a&gt; presented &lt;a href="http://ws-dl.blogspot.com/2011/06/2011-06-23-how-much-of-web-is-archived.html"&gt;How Much of the Web is Archived?&lt;/a&gt; This paper approximates the amount of the Web that is archived using four URI sources.  From this data, we observe significant variation in archival rate in URIs from different sources.  So, how much of the web is archived? It depends on which &lt;span style="font-style: italic;"&gt;web&lt;/span&gt; you mean. (&lt;a href="http://webspace.cs.odu.edu/%7Esainswor/wiki/uploads/Publications/ainsworth-jcdl11.pdf"&gt;pdf&lt;/a&gt;, &lt;a href="http://www.slideshare.net/aalsum/jcdl-howmuchisarchived"&gt;slides&lt;/a&gt;).&lt;/p&gt;&lt;div style="width: 425px; margin-left: auto; margin-right: auto;"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px; text-align: center;"&gt;&lt;a title="How Much of the Web is Archived? JCDL 2011" href="http://www.slideshare.net/aalsum/jcdl-howmuchisarchived" target="_blank"&gt;How Much of the Web is Archived? JCDL 2011&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8341312?rel=0" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;p&gt;&lt;br /&gt;&lt;a href="http://webspace.cs.odu.edu/%7Emklein/"&gt;Martin Klein&lt;/a&gt; presented &lt;a href="http://www.cs.odu.edu/%7Emklein/publications/jcdl2011_sp_mklein.pdf"&gt;Rediscovering Missing Web Pages Using Link Neighborhood Lexical Signatures&lt;/a&gt;, which details a method for discovering missing web pages (the dreaded &lt;a href="http://en.wikipedia.org/wiki/HTTP_404"&gt;404&lt;/a&gt;).  Martin also demonstrated &lt;a href="http://ws-dl.blogspot.com/2011/06/2011-06-10-launching-synchronicity.html"&gt;Synchronicity&lt;/a&gt;, a Firefox extension that uses lexical signatures (and other methods) for automatically rediscovering missing web pages in real time (&lt;a href="http://www.cs.odu.edu/%7Emklein/publications/jcdl2011_sp_mklein.pdf"&gt;pdf&lt;/a&gt;, &lt;a href="http://www.slideshare.net/martinklein0815/rediscovering-missing-web-pages-using-link-neighborhood-lexical-signatures"&gt;slides&lt;/a&gt;).&lt;/p&gt;&lt;div style="width: 425px; margin-left: auto; margin-right: auto;"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px; text-align: center;"&gt;&lt;a title="Rediscovering Missing Web Pages Using Link Neighborhood Lexical Signatures" href="http://www.slideshare.net/martinklein0815/rediscovering-missing-web-pages-using-link-neighborhood-lexical-signatures" target="_blank"&gt;Rediscovering Missing Web Pages&lt;br /&gt;Using Link Neighborhood Lexical Signatures&lt;/a&gt;&lt;/strong&gt;&lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8339796?rel=0" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;p&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://webspace.cs.odu.edu/%7Eaalasaad/"&gt;Abdulla Alasaadi&lt;/a&gt; presented &lt;a href="http://www.cs.odu.edu/%7Eaalasaad/OAC/Alasaadi-JCDL11.pdf"&gt;Persistent Annotations Deserve New URIs&lt;/a&gt;, which describes a method for creating new persistent URIs for annotations and creating persistent, independent&lt;br /&gt;archived versions of all resources involved in the annotation (&lt;a href="http://www.cs.odu.edu/%7Eaalasaad/OAC/Alasaadi-JCDL11.pdf"&gt;pdf&lt;/a&gt;, &lt;a href="http://www.slideshare.net/alasaadi81/persistent-annotations-deserve-new-uris"&gt;slides&lt;/a&gt;).&lt;/p&gt;&lt;div style="width: 425px; margin-left: auto; margin-right: auto;"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px; text-align: center;"&gt;&lt;a title="Persistent Annotations Deserve New URIs" href="http://www.slideshare.net/alasaadi81/persistent-annotations-deserve-new-uris" target="_blank"&gt;Persistent Annotations Deserve New URIs&lt;/a&gt;&lt;/strong&gt;&lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8406550?rel=0" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;p&gt;&lt;strong&gt;The Web Archive Globalization Workshop&lt;/strong&gt;&lt;/p&gt;&lt;p&gt;&lt;strong&gt;&lt;/strong&gt;After the conference, the members of the Web Science and Digital Libraries team attended the &lt;a href="http://cs.harding.edu/wag2011/"&gt;Web Archive Globalization Workshop&lt;/a&gt;. This workshop focused current initiatives and future possibilities. Eric Hetzner provided insight into the &lt;a href="http://www.cdlib.org/"&gt;California Digital Library&lt;/a&gt;'s web archiving activities. The &lt;a href="http://www.loc.gov/"&gt;Library of Congress&lt;/a&gt;'s &lt;a href="http://www.digitalpreservation.gov/news/2011/20110120_news_taylor.html"&gt;Nicholas Taylor&lt;/a&gt; told us about the &lt;a href="http://www.digitalpreservation.gov/"&gt;Library's&lt;/a&gt; &lt;a href="http://www.digitalpreservation.gov/"&gt;digital preservation&lt;/a&gt; initiatives. Brad Tofel of the &lt;a href="http://www.archive.org/"&gt;Internet Archive&lt;/a&gt; gave us the low down the future of web archive formats (&lt;a href="https://webarchive.jira.com/wiki/display/Heritrix/ARC+File+Format"&gt;ARC&lt;/a&gt;, &lt;a href="https://webarchive.jira.com/wiki/display/Heritrix/WARC+%28Web+ARChive%29"&gt;WARC&lt;/a&gt;, &lt;a href="https://webarchive.jira.com/wiki/display/Iresearch/Web+Archive+Transformation+%28WAT%29+Specification%2C+Utilities%2C+and+Usage+Overview"&gt;WAT&lt;/a&gt;) and the &lt;a href="http://wayback.archive.org/web/"&gt;Wayback Machine&lt;/a&gt;. Robert Sanderson with the &lt;a href="http://library.lanl.gov/"&gt;LANL Research Library&lt;/a&gt; provided an overview of current &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; infrastructure. There was much discussion about current archiving challenges including management of huge volumes of information, copyright considerations, and the challenges of making the archives accessable to researchers and the public. (slides)&lt;/p&gt;&lt;p&gt;The workshop was organized by:&lt;/p&gt;&lt;ul&gt;&lt;li&gt;Frank McCown, Harding University&lt;/li&gt;&lt;li&gt;Hector Garcia-Molina, Stanford University&lt;/li&gt;&lt;li&gt;Michael L. Nelson, Old Dominion University&lt;/li&gt;&lt;li&gt;Andreas Paepcke, Stanford University&lt;/li&gt;&lt;/ul&gt;&lt;p&gt;&lt;strong&gt;Keynotes&lt;/strong&gt;&lt;/p&gt;&lt;p&gt;The opening keynote, "Leaving the Cathedral and Entering the Bazaar: Library and Archives Canada Engages Canada’s Digital Society," was given by &lt;a href="http://nlc-bnc.ca/whats-new/013-395-e.html"&gt;Daniel J. Caron&lt;/a&gt;, the current Librarian and Archivist of Canada.  Mr. Caron discussed the issues and opportunities faced by national libraries as they transition from an analog to digital environment. He compared the situation to the cultural and process differences put forward by Eric. S. Raymond in &lt;a href="http://en.wikipedia.org/wiki/The_Cathedral_and_the_Bazaar"&gt;The Cathedral and the Bazaar&lt;/a&gt;.  It was an excellent talk and I really got the impression that Mr. Caron understood the transition required and the chaos inherent with a technological change of this magnitude.&lt;/p&gt;&lt;p&gt;Wednesday's open talk was given by IBM's &lt;a href="https://researcher.ibm.com/researcher/view.php?person=us-joan.dimicco"&gt;Joan Morris DiMicco&lt;/a&gt;.  "Data Narratives: Telling Stories with Data" (&lt;a href="http://www.google.com/url?sa=t&amp;amp;source=web&amp;amp;cd=4&amp;amp;ved=0CCwQFjAD&amp;amp;url=http%3A%2F%2Fthevcl.com%2Fpresentations%2FJCDL-keynote-final.pdf&amp;amp;rct=j&amp;amp;q=Data%20Narratives%3A%20Telling%20Stories%20with%20Data%20Keynote%20Speaker%20Joan%20Morris%20DiMicco&amp;amp;ei=PAEdTuXOIuHs0gHLk4n7Bw&amp;amp;usg=AFQjCNEsiXgjsiXMkKXMT9YCmIkEVHmu3g&amp;amp;sig2=7e7HmpxtmAdpczvYdr8uyQ&amp;amp;cad=rja"&gt;slides&lt;/a&gt;) focused on current reasearch at IBM into data visualization as storytelling medium.  She defined at story as concrete, temporal, purposeful, and emotional.  Brief presentations of visualizing legislative text with &lt;a href="http://manybills.researchlabs.ibm.com/"&gt;Many Bills&lt;/a&gt;, &lt;a href="http://www.research.ibm.com/haifa/projects/imt/social/sand_vis.shtml"&gt;SaNDVis&lt;/a&gt; social relationship search, and the impact of visualizations on group behavior &lt;a href="https://researcher.ibm.com/researcher/view.php?person=us-joan.dimicco"&gt;Second Messenger&lt;/a&gt;.&lt;/p&gt;&lt;p&gt;&lt;a href="http://seos.uvic.ca/people/barnes.htm"&gt;Christopher R. Barnes&lt;/a&gt;, the director of &lt;a href="http://www.neptunecanada.ca/"&gt;NEPTUNE Canada&lt;/a&gt;, described the NEPTUNE Canada cabled ocean observatory using many wonderful illustrations and photographs.  He then went on to describe the digital library problem he and his team face: the 4+ (and growing) gigabytes of data collected daily by the project.  This data is used by over 8,000 user.  Storage, cataloging, and access are ever growing challenges the digital library and preservation communities can help with.&lt;/p&gt;&lt;p&gt;&lt;strong&gt;Session Highlights&lt;/strong&gt;&lt;br /&gt;&lt;br /&gt;Two or three session ran simultaneously durng the conference and I was not able to attend all presentations.&lt;br /&gt;&lt;br /&gt;Session 1 presented automated methods to assist human understanding of texts.  There were full papers on improving understanding of historical word sense variation (&lt;a href="http://www.perseus.tufts.edu/publications/bamman-11.pdf"&gt;Measuring Historical Word Sense Variation&lt;/a&gt;) and improving information extraction from PDF books (&lt;a href="http://portal.acm.org/citation.cfm?id=1998079"&gt;Structure Extractions from PDF-based Book Documents&lt;/a&gt;); and a short paper on using syntactic dependency parse tree to learn expected patters between lexical arguments (&lt;a href="http://portal.acm.org/citation.cfm?id=1998080"&gt;Word Order Matters: Measuring Topic Coherence with Lexical Structure&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;Session 5 explored rediscovery of missing web content, a topic near and dear to us.  This session included two of our short papers and full papers on using patterns to efficiently implement web archiving (&lt;a href="http://www-poleia.lip6.fr/%7Ebensaadm/JCDL2011.pdf"&gt;Archiving the Web Using Page Changes Patterns: A Case Study&lt;/a&gt;) and identifying academic home pages (&lt;a href="http://www.personal.psu.edu/gud111/jcdl11_homepages.pdf"&gt;On Identifying Academic Homepages for Digital Libraries&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;The impact of copyright on access and use was covered in session 7.  The attitudes of the social-media savvy were explored  (&lt;a href="http://www.csdl.tamu.edu/%7Emarshall/jcdl2011-marshall-shipman-preliminary.pdf"&gt;The Ownership and Reuse of Visual Media&lt;/a&gt;) and the implications of data quality problems in national bibliographies were explored in (&lt;a href="http://portal.acm.org/citation.cfm?id=1998109"&gt;Using National Bibliographies for Rights Clearance&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;Session 8 looked at methods to annotate the Web.  Rob Sanderson presented &lt;a href="http://www.shared-canvas.org/"&gt;SharedCanvas&lt;/a&gt; (&lt;a href="http://arxiv.org/pdf/1104.2925"&gt;preprint&lt;/a&gt;, &lt;a href="http://www.slideshare.net/azaroth42/sharedcanvas-a-collaborative-modelfor-medieval-manuscript-layout-dissemination"&gt;slides&lt;/a&gt;). There was also a paper on combining superimposed information with digital libraries (&lt;a href="http://eprints.cs.vt.edu/archive/00001142/01/2011jcdl-superidr-qual-v5-3.pdf"&gt;Use of Subimages in Fish Species Identification: A Qualitative Study&lt;/a&gt;).  Our &lt;a href="http://www.cs.odu.edu/%7Eaalasaad/OAC/Alasaadi-JCDL11.pdf"&gt;Persistent Annotations Deserve New URIs&lt;/a&gt; short paper was also presented in this session.&lt;br /&gt;&lt;br /&gt;Session 11 and 12 looked at the needs and abilities of user and improving the digital library experience.  &lt;a href="http://portal.acm.org/citation.cfm?id=1998126"&gt;Understanding Digital Library Adoption: A Use Diffusion Approach&lt;/a&gt; and In the &lt;a href="http://portal.acm.org/citation.cfm?id=1998127"&gt;Bookshop: Examining Popular Search Strategies&lt;/a&gt; studied how users interact with digital libraries. Improving recommendations was looked at from several perspectives (&lt;a href="http://portal.acm.org/citation.cfm?id=1998131"&gt;A Social Network-Aware Top-N Recommender System using GPU&lt;/a&gt;, Serendipitous Recommendation for Scholarly Papers Considering Relations Among Researchers, and &lt;a href="http://www.google.com/url?sa=t&amp;amp;source=web&amp;amp;cd=3&amp;amp;ved=0CCQQFjAC&amp;amp;url=http%3A%2F%2Fwww.comp.nus.edu.sg%2F%7Ekanmy%2Fpapers%2Fjcdl56s-ly.pdf&amp;amp;rct=j&amp;amp;q=Product%20Review%20Summarization%20from%20a%20Deeper%20Perspective&amp;amp;ei=L_0cTpTnAqLZ0QGeud3KBw&amp;amp;usg=AFQjCNE7O7VJCj8LhDm03X0oUB9b41EYVg&amp;amp;sig2=pEUR0xW_CFW61nBPc2199Q&amp;amp;cad=rja"&gt;Product Review Summarization from a Deeper Perspective&lt;/a&gt;).&lt;/p&gt;&lt;p&gt;&lt;strong&gt;Other Perspectives on JCDL 2011&lt;/strong&gt;&lt;/p&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://heathersdarkroom.blogspot.com/2011/06/jcdl-2011.html"&gt;Heather's Darkroom&lt;/a&gt; has good descriptions of the first two keynotes and the &lt;a href="http://www.csdl.tamu.edu/%7Emarshall/jcdl2011-marshall-shipman-preliminary.pdf"&gt;The Ownership and Reuse of Visual Media&lt;/a&gt; paper.&lt;/li&gt;&lt;li&gt;Kayleigh Ayn Bohémier has a 4-part post (&lt;a href="http://kabohemi.mysite.syr.edu/2011/jcdl-part-1-le-dialecte-quebecois-nest-pas-le-francais-parisien-ne-protestez-pas/"&gt;part 1&lt;/a&gt;) on the conference and the conference experience in Ottawa.&lt;/li&gt;&lt;li&gt;Some of the presentation slides are on the slideshare &lt;a href="http://www.slideshare.net/event/jcdl2011/slideshows"&gt;JCDL 2011 event page&lt;/a&gt;.&lt;/li&gt;&lt;li&gt;The Digital Repositories Workshop &lt;a href="http://blogs.unimelb.edu.au/libraryintelligencer/2011/06/21/slides-of-the-digital-repositories-workshop-at-jcdl/"&gt;slides&lt;/a&gt; are also available.&lt;/li&gt;&lt;/ul&gt;&lt;p&gt;— Scott G. Ainsworth&lt;/p&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-457659956168851332?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/457659956168851332/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-05-jcdl-2011-trip-report.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/457659956168851332'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/457659956168851332'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/07/2011-07-05-jcdl-2011-trip-report.html' title='2011-07-05: JCDL 2011 Trip Report'/><author><name>Scott G. Ainsworth</name><uri>http://www.blogger.com/profile/05860551179796856679</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='http://1.bp.blogspot.com/_QW0GAj2ACVM/SlPyfPe_UsI/AAAAAAAAAAY/_xp0tiXXfQw/S220/Photo+4+touched+up,+diffuse+glow+2.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-UH8YmXdTZIo/Th43OiRZBrI/AAAAAAAAACo/jsAWzyko-Fk/s72-c/jcdl2011-header-banner.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-8654791598965917129</id><published>2011-06-23T10:30:00.001-04:00</published><updated>2011-07-04T18:24:29.238-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='JCDL 2011'/><title type='text'>2011-06-23: How Much of the Web is Archived?</title><content type='html'>There are many questions to ask about web archiving and digital preservation - why is archiving important? what should be archived? what is currently being archived? how often should pages be archived?&lt;br /&gt;&lt;br /&gt;The short paper &lt;a href="http://www.cs.odu.edu/%7Emweigle/papers/ainsworth-jcdl11.pdf"&gt;"How Much of the Web is Archived?"&lt;/a&gt; (Scott G. Ainsworth, Ahmed AlSum, Hany SalahEldeen, Michele C. Weigle, and Michael L. Nelson), published at JCDL 2011, is our first step at determining to what extent the web is being archived and by which archives.&lt;br /&gt;&lt;br /&gt;To address this question, we sampled URIs from four sources to estimate the percentage of archived URIs and the number and frequency of archived versions.  We chose 1000 URIs from each of the following sources:&lt;ol&gt;&lt;li&gt;&lt;a href="http://www.dmoz.org/"&gt;Open Directory Project (DMOZ)&lt;/a&gt; - sampled from all URIs (July 2000 - Oct 2010)&lt;/li&gt;&lt;li&gt;&lt;a href="http://delicious.com/"&gt;Delicious&lt;/a&gt; - random URIs from the &lt;a href="http://www.delicious.com/recent/"&gt;Recent Bookmarks list&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://bitly.com/"&gt;Bitly&lt;/a&gt; - random hash values generated and dereferenced&lt;/li&gt;&lt;li&gt;search engine caches (&lt;a href="http://www.google.com/"&gt;Google&lt;/a&gt;, &lt;a href="http://www.bing.com/"&gt;Bing&lt;/a&gt;, &lt;a href="http://www.yahoo.com/"&gt;Yahoo!&lt;/a&gt;) - random sample of URIs from queries of 5-grams (using &lt;a href="http://googleresearch.blogspot.com/2006/08/all-our-n-gram-are-belong-to-you.html"&gt;Google's N-gram data&lt;/a&gt;)&lt;/li&gt;&lt;/ol&gt;For each of the sample URIs (4000 in all), we used &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; to discover archived versions, or mementos, of the URI.&lt;br /&gt;&lt;br /&gt;We categorize the archives as &lt;a href="http://www.archive.org/"&gt;Internet Archive&lt;/a&gt; (using the &lt;a href="http://classic-web.archive.org/collections/web.html"&gt;classic Wayback Machine&lt;/a&gt;), search engine caches (Google, Bing, and Yahoo!), and other (e.g.,  &lt;a href="http://www.diigo.com/"&gt;Diigo&lt;/a&gt;, &lt;a href="http://www.archive-it.org/"&gt;Archive-It&lt;/a&gt;, &lt;a href="http://www.nationalarchives.gov.uk/"&gt;UK National Archives&lt;/a&gt;, &lt;a href="http://www.webcitation.org/"&gt;WebCite&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;Our first set of graphs (click on each graph for a larger version) shows the mementos discovered for each URI, ordered by the first observation date.  These are separated by sample set, so the y-axis on each graph runs from 0-1000.  Brown dots indicate mementos discovered at the Internet Archive, blue dots indicate those found in search engine caches, and red dots indicate mementos found at other archives.&lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-nRgvLWKurYg/TgNJjTdBy3I/AAAAAAAAAEU/xNT-U1JLGOY/s1600/mementosScatterDmoz.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://4.bp.blogspot.com/-nRgvLWKurYg/TgNJjTdBy3I/AAAAAAAAAEU/xNT-U1JLGOY/s320/mementosScatterDmoz.png" alt="" id="BLOGGER_PHOTO_ID_5621417630754786162" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-lFUgFBgQ2Bo/TgNJjR35nDI/AAAAAAAAAEc/DCIsq9T5_Vg/s1600/mementosScatterDelicious.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://4.bp.blogspot.com/-lFUgFBgQ2Bo/TgNJjR35nDI/AAAAAAAAAEc/DCIsq9T5_Vg/s320/mementosScatterDelicious.png" alt="" id="BLOGGER_PHOTO_ID_5621417630330625074" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center"&gt;DMOZ&lt;/td&gt;&lt;td align="center"&gt;Delicious&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-vwqdYG610ws/TgNJjh_k3UI/AAAAAAAAAEk/6Yl5RPry9js/s1600/mementosScatterBitly.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://4.bp.blogspot.com/-vwqdYG610ws/TgNJjh_k3UI/AAAAAAAAAEk/6Yl5RPry9js/s320/mementosScatterBitly.png" alt="" id="BLOGGER_PHOTO_ID_5621417634657787202" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-O1fT3EutFGg/TgNJkKs0LxI/AAAAAAAAAEs/KiqKHxO_2uU/s1600/mementosScatterSE.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://2.bp.blogspot.com/-O1fT3EutFGg/TgNJkKs0LxI/AAAAAAAAAEs/KiqKHxO_2uU/s320/mementosScatterSE.png" alt="" id="BLOGGER_PHOTO_ID_5621417645584953106" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center"&gt;Bitly&lt;/td&gt;&lt;td align="center"&gt;Search engines&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;There are a few interesting observations:&lt;ul&gt;&lt;li&gt;&lt;i&gt;DMOZ URIs are well-represented, especially in the Internet Archive.&lt;/i&gt;  There are two likely reasons for this: DMOZ is the primary source for seed URIs for the Internet Archive and the DMOZ sample contains more old URIs than the other sources.&lt;/li&gt;&lt;br /&gt;&lt;li&gt;&lt;i&gt;Bitly URIs are very poorly represented.&lt;/i&gt;  The majority of Bitly URIs are not found in any archive.  This is currently under further investigation.&lt;/li&gt;&lt;br /&gt;&lt;li&gt;&lt;i&gt;There is a large gap in mementos found in the Internet Archive, starting in 2008.&lt;/i&gt; We suspect this is because of the use of the classic version of the Wayback Machine.&lt;/li&gt;&lt;/ul&gt;The second set of graphs shows the relationship between the density of mementos for a URI and the URI's age.  The x-axis is the estimated creation date of the URI, and the y-axis is the number of mementos found for this URI.  Large dots indicate that several URIs had similar creation dates and number of mementos. We show density guidelines for 0.5, 1, and 2 mementos created per month.&lt;br /&gt;&lt;br /&gt;&lt;table&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-7ops0UofMss/TgNKkZccgnI/AAAAAAAAAFg/QFvOMZKdEik/s1600/mementosPerUriScatterDmoz.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://3.bp.blogspot.com/-7ops0UofMss/TgNKkZccgnI/AAAAAAAAAFg/QFvOMZKdEik/s320/mementosPerUriScatterDmoz.png" alt="" id="BLOGGER_PHOTO_ID_5621418749054452338" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-w-ShIdIvqps/TgNKj3wwuqI/AAAAAAAAAFY/0HZdUT-NlDI/s1600/mementosPerUriScatterDelicious.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://1.bp.blogspot.com/-w-ShIdIvqps/TgNKj3wwuqI/AAAAAAAAAFY/0HZdUT-NlDI/s320/mementosPerUriScatterDelicious.png" alt="" id="BLOGGER_PHOTO_ID_5621418740012858018" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center"&gt;DMOZ&lt;/td&gt;&lt;td align="center"&gt;Delicious&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-no5HoZ0Xzx4/TgNKj3EyY9I/AAAAAAAAAFQ/CMqfpW4ub0o/s1600/mementosPerUriScatterBitly.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://3.bp.blogspot.com/-no5HoZ0Xzx4/TgNKj3EyY9I/AAAAAAAAAFQ/CMqfpW4ub0o/s320/mementosPerUriScatterBitly.png" alt="" id="BLOGGER_PHOTO_ID_5621418739828417490" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;td&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-doqUKw8vLUs/TgNKjtUnKJI/AAAAAAAAAFI/8t-LAW2PEA0/s1600/mementosPerUriScatterSE.png"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 218px; height: 120px;" src="http://3.bp.blogspot.com/-doqUKw8vLUs/TgNKjtUnKJI/AAAAAAAAAFI/8t-LAW2PEA0/s320/mementosPerUriScatterSE.png" alt="" id="BLOGGER_PHOTO_ID_5621418737210435730" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td align="center"&gt;Bitly&lt;/td&gt;&lt;td align="center"&gt;Search engines&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;From these graphs, we make the following observations:&lt;ul&gt;&lt;li&gt;Many of the DMOZ URIs are archived at least once every 2 months.&lt;/li&gt;&lt;li&gt;Older Delicious URIs have many mementos.&lt;/li&gt;&lt;li&gt;A few Bitly URIs have many mementos, but most URIs have 0-10 mementos.&lt;/li&gt;&lt;/ul&gt;So, how much of the web is archived?  Depends on which "web" you mean.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-PC8lxMc4HlQ/TgIzD3cWIxI/AAAAAAAAACI/NfWm2FBHshg/s1600/how-much-archived.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; cursor: pointer; width: 320px; height: 175px;" src="http://2.bp.blogspot.com/-PC8lxMc4HlQ/TgIzD3cWIxI/AAAAAAAAACI/NfWm2FBHshg/s320/how-much-archived.png" alt="" id="BLOGGER_PHOTO_ID_5621111426427265810" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Ahmed presented this work at &lt;a href="http://jcdl2011.org/"&gt;JCDL 2011&lt;/a&gt;.  His presentation slides are below:&lt;br /&gt;&lt;br /&gt;&lt;div style="width: 425px;" id="__ss_8341312"&gt;&lt;strong style="display: block; margin: 5px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/aalsum/jcdl-howmuchisarchived" title="How Much of the Web is Archived? JCDL 2011"&gt;How Much of the Web is Archived? JCDL 2011&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8341312" marginwidth="0" marginheight="0" scrolling="no" width="425" frameborder="0" height="355"&gt;&lt;/iframe&gt;&lt;/div&gt;&lt;br /&gt;This work supported in part by the Library of Congress and NSF &lt;a href="http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=1009392"&gt;IIS 1009392&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;-Michele&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-8654791598965917129?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/8654791598965917129/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-23-how-much-of-web-is-archived.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8654791598965917129'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8654791598965917129'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-23-how-much-of-web-is-archived.html' title='2011-06-23: How Much of the Web is Archived?'/><author><name>Michele C. Weigle</name><uri>http://www.blogger.com/profile/10870102923048345858</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='27' src='http://4.bp.blogspot.com/_reFG55XOIFE/TKXXim2GQ1I/AAAAAAAAAAM/mg6lAXQvvI8/s1600-R/mweigle.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-nRgvLWKurYg/TgNJjTdBy3I/AAAAAAAAAEU/xNT-U1JLGOY/s72-c/mementosScatterDmoz.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-596681401589053300</id><published>2011-06-23T03:54:00.007-04:00</published><updated>2011-07-04T18:23:30.974-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Annotations'/><category scheme='http://www.blogger.com/atom/ns#' term='ORE'/><category scheme='http://www.blogger.com/atom/ns#' term='OAC'/><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><title type='text'>2011-06-29: OAC Demo of SVG and Constrained Targets</title><content type='html'>&lt;div dir="ltr" style="text-align: left;" trbidi="on"&gt;&lt;div&gt;Online annotating service is a tool that helps to annotate different resources with different authors and give this annotation a separate URI that can be shared using a Facebook post, blog post, tweet, etc.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;Web annotations can be described as a relation between different resources with different media types like text, image, audio, or video.  The web annotation service will be able to provide:&lt;/div&gt;&lt;div&gt;&lt;ul style="text-align: left;"&gt;&lt;li&gt;A unique URI for every annotation.&lt;/li&gt;&lt;li&gt;Persistent annotations.&lt;/li&gt;&lt;li&gt;Annotate specific part of media.&lt;/li&gt;&lt;li&gt;Keep track of the resources.&lt;/li&gt;&lt;li&gt;Present annotation in browser.&lt;/li&gt;&lt;li&gt;Meet the &lt;a href="http://www.openannotation.org/"&gt;OAC&lt;/a&gt; model requirements (&lt;a href="http://www.openannotation.org/spec/alpha3/"&gt;alpha3 release&lt;/a&gt;) .&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Open Annotation Model:&lt;/b&gt;&lt;/div&gt;&lt;div&gt;This service will generate annotations that meet the OAC model specification.  In an annotation that contains different resources, the OAC will introduce a new resource that describes the relationships between the resources that make the annotation.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Example:&lt;/b&gt;&lt;/div&gt;&lt;div&gt;A user who is interested in wildlife is browsing a page about elephants in Africa, and he was interested in the map the shows where the elephants live exactly.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;table class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;" align="center" cellpadding="0" cellspacing="0"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-1Wq1yewS9v8/TgpVRd83uNI/AAAAAAAAAA8/9RFe2Y3-zUk/s1600/image001.jpg" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img src="http://3.bp.blogspot.com/-1Wq1yewS9v8/TgpVRd83uNI/AAAAAAAAAA8/9RFe2Y3-zUk/s1600/image001.jpg" border="0" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;&lt;a href="http://bioweb.uwlax.edu/bio203/s2007/shah_rach/map.JPG"&gt;http://bioweb.uwlax.edu/bio203/s2007/shah_rach/map.JPG&lt;/a&gt;&lt;/span&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;/div&gt;&lt;div&gt;&lt;div style="text-align: left;"&gt;&lt;br /&gt;The user relates this image to another image that shows how people kill the elephants in order to sell their expensive tusks in another website.  Now, the tusks picture annotates the map, and shows the reason behind the decreasing number in elephants in central Africa.&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-zfNKC40ihDs/TgpWEQFsOKI/AAAAAAAAABA/-PHuIloLRWM/s1600/image002.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img src="http://1.bp.blogspot.com/-zfNKC40ihDs/TgpWEQFsOKI/AAAAAAAAABA/-PHuIloLRWM/s320/image002.jpg" border="0" width="320" height="200" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;How does it work?&lt;/b&gt;&lt;/div&gt;&lt;div&gt;The process starts at the client side, where the user creates his annotation using the &lt;a href="http://code.google.com/p/svg-edit/"&gt;SVG-Edit&lt;/a&gt;. SVG-Edit is an open source plugin that has been designed to create SVG graphs in local desks. SVG-Edit has been modified to meet our requirement where we can edit the graph online and send the results to our annotation online service.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-CYsUhADTApI/TgpRydeZjNI/AAAAAAAAAA0/f3JdA71ewJI/s1600/image005.jpg" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img src="http://1.bp.blogspot.com/-CYsUhADTApI/TgpRydeZjNI/AAAAAAAAAA0/f3JdA71ewJI/s320/image005.jpg" border="0" width="320" height="285" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;SVG will enable the annotator to annotate specific parts of the image using any shape.  This will solve the problem of the W3C media fragmentation specification that supports rectangular shapes only.&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-VxrtJNxKwh4/TgpWLtt5WuI/AAAAAAAAABE/qziMtSSwSNY/s1600/image006.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img src="http://3.bp.blogspot.com/-VxrtJNxKwh4/TgpWLtt5WuI/AAAAAAAAABE/qziMtSSwSNY/s400/image006.png" border="0" width="400" height="176" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;After creating the annotation using the SVG-Edit, the annotation data will be sent to our online service that does the following:&lt;br /&gt;&lt;ul style="text-align: left;"&gt;&lt;li&gt;Pushes all related resources to the &lt;a href="http://www.webcitation.org/"&gt;WebCite&lt;/a&gt; archive, in this case, each resource will have at least two copies with different URIs, one of them is the archived copy.&lt;/li&gt;&lt;li&gt;The service will generate an RDF file mentioning the relationships between the resources.&lt;/li&gt;&lt;li&gt;With all the different URIs generated of the resources and their archived copies, a resource map will be created for every annotation created. The associated resource map will aggregate all the resources that are related to this annotation. The resource map will be referred to by the link-header when the page gets dereferenced.&lt;/li&gt;&lt;li&gt;Since the generated URI of the annotation will be long, another short URI will be generated using the bit.ly URI shortening service. The new short URI will make it easy for the annotation to be shared on tweets or Facebook posts.&lt;/li&gt;&lt;li&gt;At the end of the annotating process, the user will get a simple and short URI that can be easily posted in user’s mail, twitter or Facebook.&lt;/li&gt;&lt;li&gt;When users dereference the URI they get the annotation back. &lt;/li&gt;&lt;/ul&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;table class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;" align="center" cellpadding="0" cellspacing="0"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://2.bp.blogspot.com/-dS7OA9I-Eqc/TgpYPc-3hMI/AAAAAAAAABM/i_T6pCb384c/s1600/image008.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img src="http://2.bp.blogspot.com/-dS7OA9I-Eqc/TgpYPc-3hMI/AAAAAAAAABM/i_T6pCb384c/s400/image008.png" border="0" width="400" height="228" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Pushing the annotation data to the web-service.&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;table class="tr-caption-container" style="margin-left: auto; margin-right: auto; text-align: center;" align="center" cellpadding="0" cellspacing="0"&gt;&lt;tbody&gt;&lt;tr&gt;&lt;td style="text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/-D9AkbLnti6k/TgpYItw42mI/AAAAAAAAABI/IYA6Je43EHc/s1600/image010.png" imageanchor="1" style="margin-left: auto; margin-right: auto;"&gt;&lt;img src="http://3.bp.blogspot.com/-D9AkbLnti6k/TgpYItw42mI/AAAAAAAAABI/IYA6Je43EHc/s400/image010.png" border="0" width="400" height="233" /&gt;&lt;/a&gt;&lt;/td&gt;&lt;/tr&gt;&lt;tr&gt;&lt;td class="tr-caption" style="text-align: center;"&gt;Retrieving the annotation using its URI.&lt;/td&gt;&lt;/tr&gt;&lt;/tbody&gt;&lt;/table&gt;&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;You can check the video (&lt;a href="http://bit.ly/Annotate"&gt;http://bit.ly/Annotate&lt;/a&gt;) to watch a demonstration on how this service works.&lt;/div&gt;&lt;br /&gt;&lt;object width="425" height="349"&gt;&lt;param name="movie" value="http://www.youtube.com/v/eztnz1oOxGc?version=3&amp;amp;hl=en_US&amp;amp;rel=0"&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;param name="allowscriptaccess" value="always"&gt;&lt;embed src="http://www.youtube.com/v/eztnz1oOxGc?version=3&amp;amp;hl=en_US&amp;amp;rel=0" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="349"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;br /&gt;&lt;br /&gt;&lt;div&gt;For more details, you can refer to the paper "Persistent Annotations Deserve New URIs" which has been published in JCDL 2011, and the slides are below:&lt;/div&gt;&lt;br /&gt;&lt;div id="__ss_8406550" style="width: 425px;"&gt;&lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/alasaadi81/persistent-annotations-deserve-new-uris" title="Persistent Annotations Deserve New URIs"&gt;Persistent Annotations Deserve New URIs&lt;/a&gt;&lt;/strong&gt; &lt;iframe marginheight="0" marginwidth="0" src="http://www.slideshare.net/slideshow/embed_code/8406550" scrolling="no" width="425" frameborder="0" height="355"&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;div style="padding: 5px 0pt 12px;"&gt;&lt;br /&gt;&lt;/div&gt;&lt;/div&gt;&lt;div&gt;This service will help you in:&lt;/div&gt;&lt;div&gt;&lt;ul style="text-align: left;"&gt;&lt;li&gt;Minting new URIs for the annotations.&lt;/li&gt;&lt;li&gt;Annotating the media fragments was made possible using the SVG and its media tags.&lt;/li&gt;&lt;li&gt;Using the web archives solves the issue of keeping the annotation persistent over time.&lt;/li&gt;&lt;li&gt;Keep track of all the related resources using ORE Resource Maps.&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;Links:&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;ul style="text-align: left;"&gt;&lt;li&gt;&lt;a href="http://cs.odu.edu/%7Eaalasaad/OAC/Alasaadi-JCDL11.pdf"&gt;Persistent Annotations Deserve New URIs&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://bit.ly/Annotate"&gt;http://bit.ly/Annotate&lt;/a&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.slideshare.net/alasaadi81/persistent-annotations-deserve-new-uris"&gt;http://www.slideshare.net/alasaadi81/persistent-annotations-deserve-new-uris&lt;/a&gt;&lt;/li&gt;&lt;/ul&gt;&lt;/div&gt;&lt;div&gt;This work supported in part by the NSF &lt;a href="http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=1009392"&gt;IIS 1009392&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;-- Abdulla&lt;/div&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-596681401589053300?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/596681401589053300/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-29-oac-demo-of-svg-and.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/596681401589053300'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/596681401589053300'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-29-oac-demo-of-svg-and.html' title='2011-06-29: OAC Demo of SVG and Constrained Targets'/><author><name>Alasaadi</name><uri>http://www.blogger.com/profile/09884264678980670081</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-1Wq1yewS9v8/TgpVRd83uNI/AAAAAAAAAA8/9RFe2Y3-zUk/s72-c/image001.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-4020540139120220245</id><published>2011-06-18T13:16:00.005-04:00</published><updated>2011-06-18T14:49:51.981-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Digging Into Data'/><title type='text'>2011-06-18: Report on the 2011 Digging into Data Challenge Conference</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://criminalintent.org/2011/06/criminal-intent-the-poster-2/"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 144px;" src="http://4.bp.blogspot.com/-3u7oDXUM58c/TfzdzRh81GI/AAAAAAAAAdM/u0H-9AglcfE/s200/digging-into-data-criminal-intent.jpg" alt="" id="BLOGGER_PHOTO_ID_5619610308000404578" border="0" /&gt;&lt;/a&gt;On June 9-10 I attended the &lt;a href="http://www.diggingintodata.org/tabid/184/Default.aspx"&gt;2011 Digging into Data Challenge Conference&lt;/a&gt; in Washington DC, which was a status report of the &lt;a href="http://www.diggingintodata.org/Home/AwardRecipients2009/tabid/175/Default.aspx"&gt;eight projects&lt;/a&gt; selected during the initial 2009 Digging into Data Challenge.&lt;br /&gt;&lt;br /&gt;Unfortunately, due to traffic challenges to and from the conference, I was able to catch only one half of the sessions.  &lt;a href="http://chronicle.com/blogs/wiredcampus/author/jhoward"&gt;Jennifer Howard&lt;/a&gt; of the &lt;a href="http://chronicle.com/"&gt;Chronicle of Higher Education&lt;/a&gt; gives a good summary of the sessions (&lt;a href="http://chronicle.com/blogs/wiredcampus/digging-into-data-in-the-humanities-day-one/31674"&gt;day 1&lt;/a&gt; and &lt;a href="http://chronicle.com/blogs/wiredcampus/digging-into-data-day-2-making-tools-and-using-them/31704"&gt;day 2&lt;/a&gt;). &lt;br /&gt;&lt;br /&gt;The highlights of the sessions I attended included the "&lt;a href="http://criminalintent.org/"&gt;Data Mining with Criminal Intent&lt;/a&gt;" project (whose poster is shown above), which includes the use of the &lt;a href="http://voyeurtools.org/"&gt;Voyeur Tools&lt;/a&gt; for text collection summarization on the "&lt;a href="http://www.oldbaileyonline.org/"&gt;Old Bailey&lt;/a&gt;", a corpus of criminal court proceedings in London 1674-1913.  Also interesting was the "&lt;a href="http://www.diggingintodata.org/Home/AwardRecipients2009/DiggingintotheEnlightenment/tabid/177/Default.aspx"&gt;Mapping the Republic of Letters&lt;/a&gt;" project, which is basically social network analysis based on the letter exchanges of prominent scientists and intellectuals during the 18th century.  Also of note was &lt;a href="http://tonyhey.net/"&gt;Tony Hey&lt;/a&gt;'s keynote.  Although his video/slides are not yet available from the conference website, you can get an idea of his presentation by looking at his &lt;a href="http://www.oscon.com/oscon2009/"&gt;OSCON 2009&lt;/a&gt; presentation (&lt;a href="http://www.microsoft.com/presspass/download/exec/Hey/2009/07-23OSCON.ppt"&gt;slides&lt;/a&gt;, &lt;a href="http://www.youtube.com/watch?v=aIM45LCiCRU"&gt;video&lt;/a&gt;), although Digging Into Data presentation was more recent and expanded.  Interesting projects that I learned of included: &lt;a href="http://www.digitalnarratives.net/"&gt;Digital Narratives&lt;/a&gt;, &lt;a href="http://nodexl.codeplex.com/"&gt;NodeXL&lt;/a&gt;, and &lt;a href="http://research.microsoft.com/en-us/projects/zentity/"&gt;Zentity&lt;/a&gt;. &lt;br /&gt;&lt;br /&gt;The conference had a new-to-me format about which I'm not entirely sure how I feel.  The project PIs would present the status and highlights of their projects for 45 minutes, and then a respondent not involved with the project would present a rebuttal / evaluation / response / contextualization.  The respondents that I saw were gracious and complimentary, but I heard during the breaks that was not necessarily the case for at least one of the respondents that I missed. &lt;br /&gt;&lt;br /&gt;There is a &lt;a href="http://www.diggingintodata.org/LinkClick.aspx?fileticket=TmrE8XiBAFw%3d&amp;amp;tabid=149"&gt;2011 Digging Into Data Challenge&lt;/a&gt;, although with less than a week between the conference and the due date of June 16 it is not clear to me how much the experiences of the previous participants could be incorporated into the 2011 submissions. &lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-4020540139120220245?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/4020540139120220245/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-18-report-on-2011-digging-into.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4020540139120220245'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4020540139120220245'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-18-report-on-2011-digging-into.html' title='2011-06-18: Report on the 2011 Digging into Data Challenge Conference'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-3u7oDXUM58c/TfzdzRh81GI/AAAAAAAAAdM/u0H-9AglcfE/s72-c/digging-into-data-criminal-intent.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-9222599880838846178</id><published>2011-06-17T15:43:00.006-04:00</published><updated>2011-06-18T11:57:29.588-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='404 error'/><category scheme='http://www.blogger.com/atom/ns#' term='data set'/><category scheme='http://www.blogger.com/atom/ns#' term='rediscover missing web pages'/><category scheme='http://www.blogger.com/atom/ns#' term='book of the dead'/><title type='text'>201-06-17: The "Book of the Dead" Corpus</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://en.wikipedia.org/wiki/Necronomicon"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 261px; height: 195px;" src="http://upload.wikimedia.org/wikipedia/commons/0/0c/Necronomicon_prop.jpg" alt="" border="0" /&gt;&lt;/a&gt;We are delighted to introduce the &lt;span style="font-weight: bold;"&gt;"Book of the Dead"&lt;/span&gt;, a corpus of missing web pages. The corpus contains 233 URIs all of which are dead meaning they result in a 404 "Page not Found" response. The pages were collected during a crawl conducted by the &lt;a href="http://www.loc.gov/"&gt;Library of Congress&lt;/a&gt; for web pages related to the topics of federal elections and terror between 2004 and 2006.&lt;br /&gt;&lt;br /&gt;We created the corpus to test the performance of our methods to rediscover missing web pages introduced in the paper "&lt;a href="http://cs.odu.edu/%7Emklein/publications/jcdl2010_mklein.pdf"&gt;Evaluating Methods to Rediscover Missing Web Pages from the Web Infrastructure&lt;/a&gt;" published at &lt;a href="http://jcdl.org/archived-conf-sites/jcdl2010/"&gt;JCDL 2010&lt;/a&gt;. In addition we now thankfully have &lt;a href="http://ws-dl.blogspot.com/2011/06/2011-06-10-launching-synchronicity.html"&gt;Synchronicity&lt;/a&gt;, a tool that can help overcome the 404 detriment to everyone's browsing experience in real time.&lt;br /&gt;&lt;br /&gt;To the best of our knowledge the Book of the Dead is the first corpus of this kind. It is publicly available and we are hopeful that fellow researchers can benefit from it by conducting related work. The corpus can be downloaded at: &lt;a style="font-weight: bold;" href="http://bit.ly/Book-of-the-Dead"&gt;http://bit.ly/Book-of-the-Dead&lt;/a&gt;&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;br /&gt;&lt;/span&gt;And one more thing... not only does the corpus include the missing URIs, it also contains a best guess of what each of the URIs used to be about. We used Amazon's &lt;a href="http://www.mturk.com/"&gt;Mechanical Turk&lt;/a&gt; and asked workers to guess what the content of the missing pages used to. We only provided the URIs and the general topics elections and terror. The workers were supposed to just analyze the URI and draw their conclusions. Sometime this can be an easy task, for example the URI:&lt;br /&gt;&lt;br /&gt;http://www.de.lp.org/election2004/morris.html&lt;br /&gt;&lt;br /&gt;is clearly about an election event in 2004. Maybe one could know that "lp" stands for Libertarian Party and "de" for Delaware. Now this URI makes real sense and most likely "Morris" was a candidate running for office during the elections.&lt;br /&gt;&lt;br /&gt;All together the Book of the Dead now offers missing URIs and their estimated "aboutness" which makes it a valuable dataset for retrieval and archival research.&lt;br /&gt;--&lt;br /&gt;martin&lt;br /&gt;&lt;span style="font-weight: bold;"&gt;&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-9222599880838846178?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/9222599880838846178/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/06/201-06-17-book-of-dead-corpus.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9222599880838846178'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/9222599880838846178'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/06/201-06-17-book-of-dead-corpus.html' title='201-06-17: The &quot;Book of the Dead&quot; Corpus'/><author><name>martin klein</name><uri>http://www.blogger.com/profile/13289299995516244353</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://4.bp.blogspot.com/_cJsqIApA0c0/SkAya34Wh5I/AAAAAAAAAAM/XvoXQjYUpzc/s1600-R/mk.jpg'/></author><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-2787770377075702148</id><published>2011-06-10T15:14:00.013-04:00</published><updated>2011-06-10T16:23:43.609-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='404 error'/><category scheme='http://www.blogger.com/atom/ns#' term='rediscover missing web pages'/><category scheme='http://www.blogger.com/atom/ns#' term='Extension'/><category scheme='http://www.blogger.com/atom/ns#' term='Add-on'/><category scheme='http://www.blogger.com/atom/ns#' term='Synchronicity'/><category scheme='http://www.blogger.com/atom/ns#' term='Firefox'/><title type='text'>2011-06-10: Launching Synchronicity - A Firefox Add-on for Rediscovering Missing Web Pages in Real Time</title><content type='html'>&lt;div style="text-align: center;"&gt;&lt;span class="Apple-style-span"  style="color:#0000EE;"&gt;&lt;u&gt;&lt;br /&gt;&lt;/u&gt;&lt;/span&gt;&lt;/div&gt;&lt;a href="http://3.bp.blogspot.com/-tVWmG5vNW-w/TfJt8oJohaI/AAAAAAAAACg/8Yoeka7h6Bk/s1600/shrimp.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 200px; height: 156px;" src="http://3.bp.blogspot.com/-tVWmG5vNW-w/TfJt8oJohaI/AAAAAAAAACg/8Yoeka7h6Bk/s200/shrimp.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5616672573621831074" /&gt;&lt;/a&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Today we introduce &lt;/span&gt;&lt;a href="https://addons.mozilla.org/en-US/firefox/addon/synchronicity/"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Synchronicity&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;, a Firefox extension that supports the user in rediscovering missing web pages. It triggers on the occurrence of 404 "Page not Found" errors, provides archived copies of the missing page as well as five methods to query search engines for the new location of the page (in case it has moved) or to obtain a good enough replacement page (in case the page is really gone).&lt;/span&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Synchronicity works in real time and helps to overcome the detriment of link rot in the web.&lt;/span&gt;&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span"  style="font-size:large;"&gt;Installation:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Download the add-on from &lt;/span&gt;&lt;a href="https://addons.mozilla.org/en-US/firefox/addon/synchronicity/"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;https://addons.mozilla.org/en-US/firefox/addon/synchronicity&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt; and follow the installation instructions. After restarting Firefox you will notice Synchronicity's shrimp icon in the right corner of the status bar.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;b&gt;&lt;span class="Apple-style-span"  style="font-size:large;"&gt;Usage:&lt;/span&gt;&lt;/b&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Whenever a 404 "Page not Found" error occurs the little icon will change colors and turn to notify the user that it has caught the error. Just click once on the red icon and the Synchronicity panel will load up.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Synchronicity utilizes the &lt;/span&gt;&lt;a href="http://www.mementoweb.org/"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Memento&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt; framework to obtain archived copies of a page. On startup &lt;/span&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;you are in the &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Archived Version &lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;tab where two visualizations of all available archived copies are offered.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;The &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;TimeGraph &lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;is a static image giving an overview of the number of copies available per year. Three drop down boxes enable you to pick a particular copy by date and have it display in the main browser window.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;The &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;TimeLine &lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;offers a "zoomable" way to explore the copies in dependence of the time they were archived. Each copy is represented by the icon of its hosting archive. You can click on the icon to receive metadata about the copy and see a link that will display the copy. You can also filter the copies by their archive.&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;a href="http://1.bp.blogspot.com/-9rIWYZ8dNFU/TfJ59oq-uPI/AAAAAAAAACo/WpmP_Em4uJ4/s1600/synchronicity_timeline.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://1.bp.blogspot.com/-9rIWYZ8dNFU/TfJ59oq-uPI/AAAAAAAAACo/WpmP_Em4uJ4/s320/synchronicity_timeline.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5616685785081100530" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 320px; height: 204px; " /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal;"&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-weight: normal; "&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Based on these copies Synchronicity provides two content based methods:&lt;/span&gt;&lt;/span&gt;&lt;/b&gt;&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;ol&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;the title of the page&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;the keywords (lexical signature) of the page&lt;/span&gt;&lt;/li&gt;&lt;/ol&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;that both can be used as queries against Google, Yahoo! and Bing. The idea is that these queries represent the "aboutness" of the missing page and hence make a good query to discover the page at its new location (URI) or a discover a good enough replacement page that satisfies the user's information need.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;a href="http://1.bp.blogspot.com/--JIi5rJRtlo/TfJ6lAi84zI/AAAAAAAAACw/fjbC339C8l4/s1600/synchronicity_keywords.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://1.bp.blogspot.com/--JIi5rJRtlo/TfJ6lAi84zI/AAAAAAAAACw/fjbC339C8l4/s320/synchronicity_keywords.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5616686461504774962" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 320px; height: 204px; " /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Synchronicity can further obtain tags from &lt;/span&gt;&lt;a href="http://www.delicious.com/"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Delicious&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt; created by users to annotate the page. Even thought tags are sparse, if available they can make a well performing search engine query. Additionally Synchronicity will extract the most salient keywords from pages that link to the missing page (link neighborhood lexical signature) that again can be used as a query.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Lastly Synchronicity offers a convenient way to modify the URL that caused the 404 error and try. The idea is that maybe shortening the path will get where you want to go.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;These last three methods can be applied if no archived copy of the missing page can be found.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Synchronicity provides a straight forward interface but also enables more experienced users to modify all parameters underlying the extraction of titles, keywords, tags and extended keywords. The &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;Expert Interface &lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;lets you for example show the titles of the last &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;n&lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt; copies where you specify the value of &lt;/span&gt;&lt;i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;n&lt;/span&gt;&lt;/i&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;. It also enables you to &lt;/span&gt;&lt;span class="Apple-style-span"&gt;&lt;span class="Apple-style-span"  style="font-size:small;"&gt;pick a particular copy to extract the keywords from and change many more parameters.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style=" ;font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style=" ;font-size:medium;"&gt;&lt;a href="http://2.bp.blogspot.com/-XhDY88Dw-jI/TfJ66cqwFGI/AAAAAAAAAC4/u4B0Jp9ddRY/s1600/synchronicity_ei_title.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img src="http://2.bp.blogspot.com/-XhDY88Dw-jI/TfJ66cqwFGI/AAAAAAAAAC4/u4B0Jp9ddRY/s320/synchronicity_ei_title.png" border="0" alt="" id="BLOGGER_PHOTO_ID_5616686829830935650" style="display: block; margin-top: 0px; margin-right: auto; margin-bottom: 10px; margin-left: auto; text-align: center; cursor: pointer; width: 320px; height: 204px; " /&gt;&lt;/a&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style=" ;font-size:medium;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span"  style=" ;font-size:medium;"&gt;&lt;span class="Apple-style-span"  style=" ;font-size:16px;"&gt;&lt;div&gt;&lt;b&gt;&lt;span class="Apple-style-span" style="font-size: large;"&gt;Notes:&lt;/span&gt;&lt;/b&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Synchronicity is a beta release so do not let it perform open-heart surgery on your mother-in-law!&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;It was developed within the the &lt;/span&gt;&lt;a href="http://www.ws-dl.blogspot.com/"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;WS-DL research group&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; in the &lt;/span&gt;&lt;a href="http://www.cs.odu.edu/"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Computer Science Department&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; at &lt;/span&gt;&lt;a href="http://www.odu.edu/"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Old Dominion University&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; by Moustafa Aly and &lt;/span&gt;&lt;a href="http://www.cs.odu.edu/~mklein/"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Martin Klein&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt; under supervision of &lt;/span&gt;&lt;a href="http://www.cs.odu.edu/~mln/"&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Dr. Michael L. Nelson&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;.&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;Please send your feedback, comments and suggestions for improvement to &lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;synchronicity-info@googlegroups.com&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;--&lt;/span&gt;&lt;/div&gt;&lt;div&gt;&lt;span class="Apple-style-span" style="font-size: small;"&gt;martin&lt;/span&gt;&lt;/div&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-2787770377075702148?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/2787770377075702148/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-10-launching-synchronicity.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2787770377075702148'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2787770377075702148'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/06/2011-06-10-launching-synchronicity.html' title='2011-06-10: Launching Synchronicity - A Firefox Add-on for Rediscovering Missing Web Pages in Real Time'/><author><name>martin klein</name><uri>http://www.blogger.com/profile/13289299995516244353</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://4.bp.blogspot.com/_cJsqIApA0c0/SkAya34Wh5I/AAAAAAAAAAM/XvoXQjYUpzc/s1600-R/mk.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-tVWmG5vNW-w/TfJt8oJohaI/AAAAAAAAACg/8Yoeka7h6Bk/s72-c/shrimp.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3551562008599993577</id><published>2011-05-20T10:46:00.005-04:00</published><updated>2011-05-23T09:22:50.915-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='IIPC'/><category scheme='http://www.blogger.com/atom/ns#' term='IIPC 2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><title type='text'>2011-05-20: Report on the 2011 IIPC General Assembly</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-ndlB9L_x3gI/TdZ_m8aneFI/AAAAAAAAAcw/Ygxm7n2MG6s/s1600/iipc.gif"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 61px;" src="http://3.bp.blogspot.com/-ndlB9L_x3gI/TdZ_m8aneFI/AAAAAAAAAcw/Ygxm7n2MG6s/s200/iipc.gif" alt="" id="BLOGGER_PHOTO_ID_5608810692966053970" border="0" /&gt;&lt;/a&gt;I spent the week of May 9--13 at the &lt;a href="http://www.kb.nl/hrd/congressen/iipc2011/"&gt;KB&lt;/a&gt; in The Hague, the Netherlands for the &lt;a href="http://www.netpreserve.org/events/2011GA.php"&gt;2011 IIPC General Assembly&lt;/a&gt;.  Joining me there was &lt;a href="http://www.blogger.com/profile/11515949745829159856"&gt;Rob Sanderson&lt;/a&gt; of LANL.  Rob had attended the &lt;a href="http://www.netpreserve.org/events/singapore.php"&gt;2010 GA&lt;/a&gt; in Singapore, but this was my first IIPC and I learned a great deal.&lt;br /&gt;&lt;br /&gt;The first day was open to the public in a special session entitled "&lt;a href="http://www.netpreserve.org/events/2011GAoutofthebox.php"&gt;Out of the Box: Building and Using Web Archive Collections&lt;/a&gt;", of which I missed most because I was taking a nap after arriving the morning of May 9.  Fortunately, &lt;a href="http://www.blogger.com/profile/13015271073441769932"&gt;Inge Angevaare&lt;/a&gt; prepared a &lt;a href="http://digitaalduurzaam.blogspot.com/2011/05/web-archiving-international-arena-iipc.html"&gt;comprehensive summary of the first day&lt;/a&gt;.  I believe presentations and a video of highlights from the first day will be available from the IIPC site shortly.&lt;br /&gt;&lt;br /&gt;The next three days were spent in the IIPC plenary and working groups.  Rob gave a high-level Memento status report on Tuesday, and Rob and I gave a more detailed tutorial later in the day:&lt;br /&gt;&lt;br /&gt;&lt;div style="width: 425px;" id="__ss_8066600"&gt; &lt;strong style="display: block; margin: 12px 0pt 4px;"&gt;&lt;a href="http://www.slideshare.net/hvdsomp/memento-updated-technical-details-may-2011" title="Memento: Updated technical details (May 2011)"&gt;Memento: Updated technical details (May 2011)&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/8066600" marginwidth="0" marginheight="0" frameborder="0" height="355" scrolling="no" width="425"&gt;&lt;/iframe&gt; &lt;div style="padding: 5px 0pt 12px;"&gt; View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/hvdsomp"&gt;Herbert Van de Sompel&lt;/a&gt; &lt;/div&gt; &lt;/div&gt;&lt;br /&gt;&lt;br /&gt;Wednesday and Thursday were largely spent meeting with the &lt;a href="http://www.netpreserve.org/about/awg.php"&gt;Access Working Group&lt;/a&gt; discussing a pilot project that would, using &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt;, allow harvesting and re-exposing of web page metadata from various IIPC member national libraries to the public.  The goal is to have a large-scale, working demo of using Memento to aggregate the metadata about IIPC members' archives for the 2012 GA (to be held in Washington DC).&lt;br /&gt;&lt;br /&gt;One of the things I learned at the IIPC is that many national libraries are archiving their national top-level domains (e.g., &lt;a href="http://www.bnf.fr/"&gt;BNF&lt;/a&gt; archiving *.fr web sites), but rather restrictive intellectual property laws prevent the libraries from opening their archives off-site (in other words, you have to travel to the BNF to view their *.fr archives).  I suppose I had been spoiled by the relatively unencumbered approach afforded to the &lt;a href="http://www.archive.org/about/about.php"&gt;Internet Archive&lt;/a&gt;.  Of course, we'd like to see these archives completely opened in the future, but the ability to advertise their contents in a machine-readable manner is a good first step.&lt;br /&gt;&lt;br /&gt;Friday was an excellent hands-on tutorial led by Brad, Aaron, and Vinay from the Internet Archive (&lt;a href="http://www.archive.org/about/bios.php"&gt;bios&lt;/a&gt;) about processing WARC, CDX, and WAT files using &lt;a href="http://hadoop.apache.org/"&gt;Hadoop&lt;/a&gt; and &lt;a href="http://pig.apache.org/"&gt;Pig&lt;/a&gt;.  Vinay provided a page that gathers &lt;a href="http://www.archive.org/%7Evinay/iipc-2011/"&gt;all the appropriate links&lt;/a&gt; into one place (the data files were distributed via thumb drive).&lt;br /&gt;&lt;br /&gt;Rob left on Saturday morning, passing &lt;a href="http://public.lanl.gov/herbertv/"&gt;Herbert&lt;/a&gt; on the train as he arrived Saturday for his 2 month visit to &lt;a href="http://www.dans.knaw.nl/en/content/data-archive"&gt;DANS&lt;/a&gt;.  Herbert and I spent the day catching up while touring The Hague and Delft.&lt;br /&gt;&lt;br /&gt;Inge also blogged about the closing of the conference ("&lt;a href="http://digitaalduurzaam.blogspot.com/2011/05/memento-sparks-optimism-at-closing-of.html"&gt;Memento Sparks Optimism at Closing of IIPC 2011&lt;/a&gt;"), and the Twitter hashtag was "&lt;a href="http://twitter.com/search?q=%23IIPC"&gt;#iipc&lt;/a&gt;".  I'll update this entry when additional information from IIPC is posted.&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3551562008599993577?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3551562008599993577/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/05/2011-05-20-report-on-2011-iipc-general.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3551562008599993577'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3551562008599993577'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/05/2011-05-20-report-on-2011-iipc-general.html' title='2011-05-20: Report on the 2011 IIPC General Assembly'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-ndlB9L_x3gI/TdZ_m8aneFI/AAAAAAAAAcw/Ygxm7n2MG6s/s72-c/iipc.gif' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3863638654693281820</id><published>2011-04-12T18:04:00.020-04:00</published><updated>2011-04-14T11:21:51.543-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='HTTP'/><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='Time Travel'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><category scheme='http://www.blogger.com/atom/ns#' term='Code{4}Lib'/><category scheme='http://www.blogger.com/atom/ns#' term='MementoFox'/><category scheme='http://www.blogger.com/atom/ns#' term='Firefox'/><category scheme='http://www.blogger.com/atom/ns#' term='Content Negotiation'/><title type='text'>2011-04-13: Implementing Time Travel for the Web</title><content type='html'>&lt;div style="font-family:arial;"&gt;Recent trends in digital libraries are towards integration with the &lt;a href="http://www.w3.org/TR/webarch/"&gt;architecture of the World Wide Web&lt;/a&gt;. The &lt;a href="http://ws-dl.blogspot.com/2010/12/2010-12-06-memento-wins-2010-digital.html"&gt;award-winning&lt;/a&gt; &lt;a href="http://www.mementoweb.org/"&gt;Memento Project&lt;/a&gt; proposes &lt;a href="https://datatracker.ietf.org/doc/draft-vandesompel-memento/"&gt;extending HTTP&lt;/a&gt; to provide protocol-level access to mementos (archived previous states) of web resources. Using content negotiation and other protocol operations, rather than archive-specific methods, Memento provides the digital library and preservation community with a standardized method to navigate between the original resource and its mementos.&lt;/div&gt;&lt;br /&gt;&lt;div style="text-align: center; font-family: arial;"&gt;&lt;a href="http://3.bp.blogspot.com/-zlRDQMs_bXA/TaYwiPfWLXI/AAAAAAAAABg/NH_jafT6BhI/s1600/figure4.png" onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 320px; height: 242px;" src="http://3.bp.blogspot.com/-zlRDQMs_bXA/TaYwiPfWLXI/AAAAAAAAABg/NH_jafT6BhI/s320/figure4.png" alt="" id="BLOGGER_PHOTO_ID_5595212951886114162" border="0" /&gt;&lt;/a&gt;&lt;span style="font-size:85%;"&gt;Memento Client State Chart&lt;/span&gt;&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div  style="font-family:arial;"&gt;The ODU Web Sciences and Digital Libraries Research Group has partnered with the &lt;a href="http://library.lanl.gov/"&gt;LANL Research Library&lt;/a&gt; to create Memento and develop prototype Memento-compliant client and server implementations. A variety of Memento clients have been created, tested, and co-evolved along with the Memento protocol. There is now a &lt;a href="https://addons.mozilla.org/en-us/firefox/addon/mementofox/"&gt;FireFox extension&lt;/a&gt;, Internet Explorer browser helper object, and &lt;a href="http://webkit.org/"&gt;WebKit&lt;/a&gt;-based &lt;a href="http://code.google.com/p/memento-browser/"&gt;Android browser&lt;/a&gt;. The design and technical solutions identified during the development of these clients will be of interest to those considering implementation of a Memento-based platform, especially on the client side, and the interactions are also important for building conformant server-side systems.&lt;/div&gt;&lt;br /&gt;&lt;div style="text-align: center; font-family: arial;"&gt;&lt;a href="http://4.bp.blogspot.com/-s8MA1CplYa4/TaYw7Q8a9II/AAAAAAAAABo/qMZlemvg-rw/s1600/figure6.png"&gt;&lt;img style="display:block; margin:0px auto 10px; text-align:center;cursor:pointer; cursor:hand;width: 320px; height: 229px;" src="http://4.bp.blogspot.com/-s8MA1CplYa4/TaYw7Q8a9II/AAAAAAAAABo/qMZlemvg-rw/s320/figure6.png" alt="" id="BLOGGER_PHOTO_ID_5595213381773227138" border="0" /&gt;&lt;/a&gt;MementoFox Screenshot&lt;br /&gt;&lt;/div&gt;&lt;br /&gt;&lt;div style="font-family: arial;"&gt;The full article can be found at:&lt;br /&gt;&lt;br /&gt;&lt;/div&gt;&lt;div style="font-family: arial; padding-left: 2.5ex"&gt;Robert Sanderson, Harihar Shankar, Scott Ainsworth, Frank McCown, and Sam Adams. Implementing Time Travel for the Web. code{4}lib Journal, Issue 13, 2011-04-11. &lt;a href="http://journal.code4lib.org/articles/4979"&gt;http://journal.code4lib.org/articles/4979&lt;/a&gt;.&lt;/div&gt;&lt;br /&gt;&lt;div style="font-family: arial;"&gt;-- Scott G. Ainsworth &lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3863638654693281820?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3863638654693281820/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-13-implementing-time-travel-for.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3863638654693281820'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3863638654693281820'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-13-implementing-time-travel-for.html' title='2011-04-13: Implementing Time Travel for the Web'/><author><name>Scott G. Ainsworth</name><uri>http://www.blogger.com/profile/05860551179796856679</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='32' src='http://1.bp.blogspot.com/_QW0GAj2ACVM/SlPyfPe_UsI/AAAAAAAAAAY/_xp0tiXXfQw/S220/Photo+4+touched+up,+diffuse+glow+2.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/-zlRDQMs_bXA/TaYwiPfWLXI/AAAAAAAAABg/NH_jafT6BhI/s72-c/figure4.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-7094938814489540606</id><published>2011-04-08T16:06:00.002-04:00</published><updated>2011-04-08T16:08:40.565-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Map'/><category scheme='http://www.blogger.com/atom/ns#' term='Radiation'/><category scheme='http://www.blogger.com/atom/ns#' term='Japan'/><category scheme='http://www.blogger.com/atom/ns#' term='Python'/><category scheme='http://www.blogger.com/atom/ns#' term='R'/><title type='text'>2011-04-08: Radiation Map of Japan</title><content type='html'>The devastation wrought by the 11 March earthquake in Japan,&lt;br /&gt;and the depths of the human misery left in the wake of the massive Tsunami have left many people awestruck. The size of the quake itself was enormous and many people have had a hard time comprehending just how big this earthquake was. Some sites like &lt;a HREF="http://www.japanquakemap.com"&gt;Japan Quake Map&lt;/A&gt; help us to comprehend the magnitude of this event. As a result of the earthquake and tsunami the nuclear reactor at Dai-ichi was severely damaged and has been leaking radiation. The radiation readings have been made available by &lt;a HREF="http://eq.wide.ad.jp/index_en.html"&gt;WIDE&lt;/A&gt; and Japan's &lt;a HREF="http://www.bousai.ne.jp/eng/index.html"&gt;Nuclear Safety Division&lt;/A&gt;. &lt;br /&gt;&lt;br /&gt;The idea was to use &lt;a HREF="http://www.r-project.org/"&gt;R&lt;/A&gt; to create an informative map of Japan showing the radiation levels of the different prefectures. &lt;a HREF="http://www.python.org/"&gt;Python&lt;/A&gt; was used to import the data from both of the web sites and insert it into a MySQL database. The format of both of the pages was understandably quite dynamic and resulted in the python script needing to be tweaked quite often. Sometimes it was easier to just copy and paste the data in a spreadsheet and then export as a csv to import into the database.&lt;br /&gt;&lt;br /&gt;For the map, the shapefiles included in the R distribution were not working out so shapefiles for Japan from &lt;a HREF="http://www.fas.harvard.edu/~chgis/japan/datasets.html"&gt;Harvard Asia Studies&lt;/A&gt; were used. These shapefiles combined with the plotPolys() command produced a higher quality map than the standard shapefiles.&lt;br /&gt;&lt;br /&gt;The readings for most prefectures were rather reliable however in Fukushima and in Miyagi the readings were sporadic. Miyagi was hardest hit by the tsunami and most of the area was destroyed. It appears that most of the readings were from mobile units and there are gaps in the coverage. If there were no readings available for a given day they were estimated using the surrounding readings both spatially and temporally. In Fukushima which is the location of the reactor, there were many monitoring sites set up but they seemed to come and go over the course of time. For the purposes of this map, the sites located between 20km to 30km from the reactor were averaged together to give a reading for the Fukushima prefecture.&lt;br /&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/-vJZkO8IBRRA/TZ9n4a9ukDI/AAAAAAAAAGQ/jM34PayF1jM/s1600/japan_map_4_April.png" imageanchor="1" style="margin-left:1em; margin-right:1em"&gt;&lt;img border="0" height="320" width="320" src="http://1.bp.blogspot.com/-vJZkO8IBRRA/TZ9n4a9ukDI/AAAAAAAAAGQ/jM34PayF1jM/s320/japan_map_4_April.png" /&gt;&lt;/a&gt;&lt;/div&gt;Using R, the average daily radiation for each of the 47 prefectures was calculated. The maximum and minimum values were used to create a color gradient for the map. Most of the readings were low with only one or two high readings. This did not lend itself well to a smooth color gradient so the log of the values was used to create the color gradient.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.cs.odu.edu/~gszalkow/japan/japan_rad_map_april.R"&gt;R code for Map&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The radiation level values were each assigned a hex color value and then merged into a vector that matched the prefecture names in the shapefile. Nested for loops are usually a bad idea, especially in R, suggestions for a more elegant solution are welcome. plotPolys() takes care of making the map and the only thing left was to reverse the logarithm function to get the real values back and add a legend.&lt;br /&gt;&lt;br /&gt;All of the maps from 16 March to 4 April were combined into an avi using:&lt;br /&gt;&lt;blockquote&gt;mencoder mf://*.png -mf fps=1:type=png -ovc lavc -lavcopts vcodec=mpeg4 -oac copy -o output.avi &lt;/blockquote&gt;&lt;iframe title="YouTube video player" width="480" height="390" src="http://www.youtube.com/embed/Zy8Kmu2E_Qw" frameborder="0" allowfullscreen&gt;&lt;/iframe&gt;&lt;br /&gt;&lt;br /&gt;Shapefiles from China Historical GIS Project, "Tokugawa Japan GIS, Demo Version." Feb 2004&lt;br /&gt;&lt;br /&gt;-- Greg Szalkowski&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-7094938814489540606?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/7094938814489540606/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-08-radiation-map-of-japan.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7094938814489540606'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7094938814489540606'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-08-radiation-map-of-japan.html' title='2011-04-08: Radiation Map of Japan'/><author><name>Greg Szalkowski</name><uri>http://www.blogger.com/profile/00452319871186722387</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-vJZkO8IBRRA/TZ9n4a9ukDI/AAAAAAAAAGQ/jM34PayF1jM/s72-c/japan_map_4_April.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-8424620350353272268</id><published>2011-04-08T06:02:00.000-04:00</published><updated>2011-04-08T06:02:20.032-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='records management'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='enterprise research'/><category scheme='http://www.blogger.com/atom/ns#' term='MITRE'/><title type='text'>2011-04-07: MITRE Records Expo Trip Report</title><content type='html'>I have just returned from &lt;a href='http://www.mitre.org/'&gt;MITRE&lt;/a&gt;'s Records Expo on &lt;a href='http://www.mitre.org/about/locations/va_mclean_mitre2.html'&gt;MITRE's Campus in McLean, VA&lt;/a&gt;. The Records Expo is designed to raise awareness of the &lt;a href='http://www.archives.gov/about/laws/fed-agencies.html'&gt;archival responsibilities of employees&lt;/a&gt; within MITRE, and also inform our sponsors about the archives and records management work we're doing. I was invited to present some of the research being done in digital preservation at ODU and MITRE. (George Despres and I have recently received funding to perform digital preservation research on the digital objects living within the corporate intranet. Our research was explained at the Expo.)&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_7555134"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/jbrunelle008/records-expo" title="Records expo"&gt;Records expo&lt;/a&gt;&lt;/strong&gt; &lt;iframe src="http://www.slideshare.net/slideshow/embed_code/7555134" width="425" height="355" frameborder="0" marginwidth="0" marginheight="0" scrolling="no"&gt;&lt;/iframe&gt; &lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/jbrunelle008"&gt;Justin Brunelle&lt;/a&gt; &lt;/div&gt;&lt;/div&gt;&lt;br /&gt;We set up booths in the MITRE 2 building, equipped with big-screen TVs with slide shows about other archival and records management systems being pioneered at MITRE (the slides are For Office Use Only, and cannot be shared in this blog). Several MITRE employees attended and listened to presentations given by the archives team and the records management teams at MITRE, as well as George and I. A former &lt;a href='http://www.lockheedmartin.com/'&gt;Lockheed Martin&lt;/a&gt; employee that had worked on the &lt;a href='http://www.archives.gov/era/rms/'&gt;NARA records management system&lt;/a&gt; was also in attendance.&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;I spoke to all of the attendees about &lt;a href="http://www.mementoweb.org"&gt;Memento&lt;/a&gt; and how it will be used to allow users to browse archives within MITRE's intranet as part of George and my research. Most of the attendees had some experience with the &lt;a href='http://www.archive.org/web/web.php'&gt;WayBack Machine&lt;/a&gt;, and had a cursory knowledge of web archiving, but most  weren't familiar with Memento. All were extremely interested in hearing about the research further, and some have already been in touch with me requesting additional information.&lt;br /&gt;&lt;br /&gt;It was helpful and informative to meet with other professionals working in "The Real World" and attempting to solve the same problems being researched in academia. There were also some additional approaches to archival and records management problems, such as using the &lt;a href='http://en.wikipedia.org/wiki/Cloud_computing'&gt;Cloud&lt;/a&gt; as a repository, and archiving &lt;a href='http://en.wikipedia.org/wiki/Enterprise_social_networking'&gt;corporate social networking&lt;/a&gt; content.&lt;br /&gt;&lt;br /&gt;--Justin F. Brunelle&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-8424620350353272268?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/8424620350353272268/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-07-mitre-records-expo-trip.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8424620350353272268'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/8424620350353272268'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/04/2011-04-07-mitre-records-expo-trip.html' title='2011-04-07: MITRE Records Expo Trip Report'/><author><name>Justin F Brunelle</name><uri>http://www.blogger.com/profile/00580381835470799911</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_AmWWXD7g2JA/TG0DJ_mEneI/AAAAAAAAAAM/_AvbhphHU8I/S220/2010-05-24+17.06.03.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-3584928420882551586</id><published>2011-03-25T14:32:00.004-04:00</published><updated>2011-03-27T10:31:21.510-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Annotations'/><category scheme='http://www.blogger.com/atom/ns#' term='OAC'/><category scheme='http://www.blogger.com/atom/ns#' term='OAC 2011'/><title type='text'>2011-03-25: OAC Phase II Workshop Trip Report</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/-kLtOpDXlfjs/TYzgBWWUAWI/AAAAAAAAAcI/Z38SJROR_uA/s1600/oac-logo-ds.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 181px; height: 180px;" src="http://4.bp.blogspot.com/-kLtOpDXlfjs/TYzgBWWUAWI/AAAAAAAAAcI/Z38SJROR_uA/s200/oac-logo-ds.png" alt="" id="BLOGGER_PHOTO_ID_5588087551443599714" border="0" /&gt;&lt;/a&gt;I've just finished attending the &lt;a href="http://www.openannotation.org/"&gt;Open Annotation Collaboration&lt;/a&gt; (OAC) &lt;a href="http://www.openannotation.org/phaseIIworkshop.html"&gt;Phase II Workshop&lt;/a&gt; in Chicago, IL (March 24-25, 2011).  The quality of the presentations was very high and I was surprised at how much the OAC community has grown in a relatively short time.  Although I've served on OAC technical review panels before and my student, Abdulla Alasaadi, has worked on a small prototype (to be presented at &lt;a href="http://www.jcdl2011.org/"&gt;JCDL 2011&lt;/a&gt;) for using SVG instead of the &lt;a href="http://www.w3.org/2008/WebVideo/Fragments/"&gt;W3C Media Fragments&lt;/a&gt; for specifying an annotation target, I haven't been keeping up with the &lt;a href="http://groups.google.com/group/oac-discuss"&gt;OAC community&lt;/a&gt; as closely as I should.&lt;br /&gt;&lt;br /&gt;The Workshop has &lt;a href="http://www.openannotation.org/WorkshopAgenda.html"&gt;all the presentations online&lt;/a&gt;, as well as a &lt;a href="http://www.openannotation.org/wiki/index.php/Main_Page"&gt;wiki&lt;/a&gt; that contains various commentary, use cases, etc. (also, the hash tag is "&lt;a href="http://twitter.com/#search?q=%23oacwkshp"&gt;#oacwkshp&lt;/a&gt;").  Although all of the presentations generated a lot of discussion from the attendees, the presentations that I learned the most from were:&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;a href="http://www.openannotation.org/documents/GerberHunterOAC.pdf"&gt;Annotation Supporting Collaborative Development of Scholarly Editions&lt;/a&gt; (&lt;a href="http://itee.uq.edu.au/%7Ejane/"&gt;Jane Hunter&lt;/a&gt; and &lt;a href="http://itee.uq.edu.au/%7Eagerber/"&gt;Anna Gerber&lt;/a&gt;) -- a detailed description of &lt;a href="http://www.austlit.edu.au/"&gt;AustLit &lt;/a&gt;&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.openannotation.org/documents/AnnotationRDF_Chicago.pdf"&gt;Annotating the Biomedical Literature through Text Mining&lt;/a&gt; (&lt;a href="http://compbio.ucdenver.edu/Hunter_lab/Verspoor/Home.html"&gt;Karin Verspoor&lt;/a&gt;) -- automatically annotating / extracting triples from the biomedical literature&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.slideshare.net/paolociccarese/history-and-overview-of-ao-annotation-ontology"&gt;Annotation Ontology and SWAN Annotation Tool&lt;/a&gt; (&lt;a href="http://www.paolociccarese.info/"&gt;Paolo Ciccarese&lt;/a&gt;) -- similar to the above (perhaps with a broader spectrum of manual &amp;lt;--&amp;gt; automatic annotation)&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.openannotation.org/documents/OACDMSTechMarch2011.pdf"&gt;&lt;i&gt;Shared Canvas&lt;/i&gt;: Interoperability for Digitized Medieval MSS Repositories&lt;/a&gt; (&lt;a href="http://twitter.com/bla222"&gt;Ben Albritton&lt;/a&gt; and &lt;a href="http://twitter.com/azaroth42"&gt;Rob Sanderson&lt;/a&gt;) -- I never realized how complicated medieval manuscripts could be...&lt;br /&gt;&lt;/li&gt;&lt;li&gt;&lt;a href="http://www.openannotation.org/documents/YUMA_OAC_Chicago.pdf"&gt;Historic Map Annotations with YUMA&lt;/a&gt; (&lt;a href="http://cs.univie.ac.at/bernhard.haslhofer"&gt;Bernhard Haslhofer&lt;/a&gt;) -- an impressive &lt;a href="http://dme.ait.ac.at/annotation/"&gt;geospatial demo&lt;/a&gt;&lt;br /&gt;&lt;/li&gt;&lt;li&gt;An OAC-Compliant Toolbox (&lt;a href="http://users.drew.edu/sbradsha/"&gt;Shannon Bradshaw&lt;/a&gt;) (slides not available yet) -- a toolkit approach for implementing the "scholarly primitives": &lt;span class="status-body"&gt;&lt;span class="status-content"&gt;&lt;span class="entry-content"&gt;discover, examine, compare, annotate, organize, synthesize, cite.&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;Also of special note was the &lt;a href="http://www.openannotation.org/documents/rs_hvds_oac_intro.pdf"&gt;OAC model overview&lt;/a&gt; from &lt;a href="http://twitter.com/hvdsomp"&gt;Herbert&lt;/a&gt; and Rob, but I was already pretty familiar with that.  Again, I encourage you to look through &lt;a href="http://www.openannotation.org/WorkshopAgenda.html"&gt;all the slides&lt;/a&gt; since each presentation was well received.&lt;br /&gt;&lt;br /&gt;Thanks to &lt;a href="http://www.library.illinois.edu/faculty/TimCole3.htm"&gt;Tim Cole&lt;/a&gt; for organizing such a successful workshop.&lt;br /&gt;&lt;br /&gt;-- Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-3584928420882551586?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/3584928420882551586/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-25-oac-phase-ii-workshop-trip.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3584928420882551586'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/3584928420882551586'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-25-oac-phase-ii-workshop-trip.html' title='2011-03-25: OAC Phase II Workshop Trip Report'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/-kLtOpDXlfjs/TYzgBWWUAWI/AAAAAAAAAcI/Z38SJROR_uA/s72-c/oac-logo-ds.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-2704470235256519670</id><published>2011-03-21T20:58:00.012-04:00</published><updated>2011-03-22T21:29:56.233-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Grasshopper'/><category scheme='http://www.blogger.com/atom/ns#' term='ODU'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='TR'/><title type='text'>2011-03-21: Grasshopper, prepare yourself.  It is time to speak of graphs and digital libraries and other things.</title><content type='html'>Announcing the publication of an Old Dominion Computer Science Department technical report and an homage to Davide Carradine, Keye Luke and the television series Kung Fu.&lt;br /&gt;&lt;br /&gt;"Grasshopper."&lt;br /&gt;&lt;br /&gt;"Yes, Master Po?"&lt;br /&gt;&lt;br /&gt;"Grasshopper, you have passed many tests of strength, agility and stamina. But that is not enough.  There are other trials you must pass before you are permitted to attempt to lift the fiery brazier.  I will ask you a series of questions.&lt;br /&gt;&lt;br /&gt;“Let us begin.  What is a graph?"&lt;br /&gt;&lt;br /&gt;"Master; a graph is a mathematical construct made of objects that may, or may not be connected to each other."&lt;br /&gt;&lt;br /&gt;"Grasshopper, how does a graph relate to digital libraries and the world where we live?"&lt;br /&gt;&lt;br /&gt;"Master; a graph is composed of nodes (or vertices) that can be connected in a pairwise manner with edges (or arcs).  In the world of Facebook, people take the place of nodes and the connection that is made when one person “friends” another creates an edge.  In the World Wide Web, pages can be nodes and navigational links can be edges.  In digital libraries, a complex digital object, with all its contents and metadata, can be a node and the URIs of other digital objects are its edges.  In our Shaolin temple, you and I can be nodes and your teachings are our edge."&lt;br /&gt;&lt;br /&gt;"Grasshopper, explain what you mean by objects that in a graph may or may not be connected to each other."&lt;br /&gt;&lt;br /&gt;"Master; one can think of the Internet as a graph made up of routers as nodes and cables as edges.  If a cable between two routers is severed then the Internet can still function.  Not as fully as before, but it will still function."&lt;br /&gt;&lt;br /&gt;"Grasshopper, are you saying that a graph that is not connected can still function, al beit at a lower level?"&lt;br /&gt;&lt;br /&gt;"Yes, Master."&lt;br /&gt;&lt;br /&gt;"Grasshopper, I have in one hand a graph and in the other a hira shuriken. I will answer three questions and then you must cause as much damage as you can to the graph."&lt;br /&gt;&lt;br /&gt;"Master; may I see the graph?"&lt;br /&gt;&lt;br /&gt;"No.  That is one question.   I will tell you the name of one node.  It is called 5."&lt;br /&gt;&lt;br /&gt;"Master; to whom is 5 connected?"&lt;br /&gt;&lt;br /&gt;"5 is connected to 4, 6, 8 and 9.  That is two questions."&lt;br /&gt;&lt;br /&gt;"Master who is connected to 4, 6, 8 and 9?"&lt;br /&gt;&lt;br /&gt;"4 is connected to 2, 3 and 5. 6 is connected to 5, 7 and 11.  8 is connected to 2, 5 and 7.  9 is connected to 5 and 10. That is three questions.  Now Grasshopper, you must select one node to remove with the shuriken."&lt;br /&gt;&lt;br /&gt;"Master, I choose to attack node 5 because it has a the highest a 1 as its vertex centrality betweenness, while all others are far less than 0.5."&lt;br /&gt;&lt;br /&gt;"Grasshopper, you have chosen wisely. Now, when node 5 is removed, how much damage has been done to the graph you have discovered?"&lt;br /&gt;&lt;br /&gt;"Master, the damage is 0.29 after the first deletion."&lt;br /&gt;&lt;br /&gt;"Grasshopper, here is the total graph.  What will be the damage to the &lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-p9U0488PWRM/TYf296KRcpI/AAAAAAAAABo/RfgvgF10zMQ/s1600/neighborSize-003.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 320px; height: 247px;" src="http://2.bp.blogspot.com/-p9U0488PWRM/TYf296KRcpI/AAAAAAAAABo/RfgvgF10zMQ/s320/neighborSize-003.png" alt="" id="BLOGGER_PHOTO_ID_5586705406221185682" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;discovered graph and to the total graph after the 5th deletion if I were to tell you the friends of the friends of the friends of 5?"&lt;br /&gt;&lt;br /&gt;"Master, the damage to the discovered graph would be 0.89 and 0.68 for the total graph."&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/-HhEQ47FH44s/TYf7hvhycFI/AAAAAAAAABw/qpbhdQyGDfE/s1600/damageVertex.png"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 320px; height: 247px;" src="http://2.bp.blogspot.com/-HhEQ47FH44s/TYf7hvhycFI/AAAAAAAAABw/qpbhdQyGDfE/s320/damageVertex.png" alt="" id="BLOGGER_PHOTO_ID_5586710419888828498" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;"Grasshopper, you have answered well.  How and where did you learn these things?"&lt;br /&gt;&lt;br /&gt;"Master; I read the technical report: Connectivity Damage to a Graph by the Removal of an Edge or a Vertex.”&lt;br /&gt;&lt;br /&gt;“Grasshopper, tell me more about this report.”&lt;br /&gt;&lt;br /&gt;"Master; it has an abstract that reads:&lt;br /&gt;&lt;br /&gt;“The approach of quantifying the damage inflicted on a graph in Albert, Jeong and Barab´asi’s (AJB) report “Error and Attack Tolerance of Complex Networks” using the size of the largest connected component and the average size of the remaining components does not capture our intuitive idea of the damage to a graph caused by disconnections. We evaluate an alternative metric based on average inverse path lengths (AIPLs) that better fits our intuition that a graph can still be reasonably functional even when it is disconnected. We compare our metric with AJB’s using a test set of graphs and report the differences. AJB’s report should not be confused with a report by Crucitti et al. with the same name.&lt;br /&gt;&lt;br /&gt;“Based on our analysis of graphs of different sizes and types, and using various numerical and statistical tools; the ratio of the average inverse path lengths of a connected graph of the same size as the sum of the size of the fragments of the disconnected graph can be used as a metric about the damage of a graph by the removal of an edge or a node. This damage is reported in the range (0,1) where 0 means that the removal had no effect on the graph’s capability to perform its functions. A 1 means that the graph is totally dysfunctional. We exercise our metric on a Collection of sample graphs that have been subjected to various attack profiles that focus on edge, node or degree betweenness values.&lt;br /&gt;&lt;br /&gt;“We believe that this metric can be used to quantify the damage done to the graph by an attacker, and that it can be used in evaluating the positive effect of adding additional edges to an existing graph.”&lt;br /&gt;&lt;br /&gt;“Grasshopper, where did you find this report?”&lt;br /&gt;&lt;br /&gt;"Master; I found it at: &lt;a href="http://arxiv.org/abs/1103.3075" target="_blank"&gt;http://arxiv.org/abs/1103.3075&lt;/a&gt;"&lt;br /&gt;&lt;br /&gt;"Grasshopper, go and prepare for your next trial."&lt;br /&gt;&lt;br /&gt;"Yes, Master."&lt;br /&gt;&lt;br /&gt;Dr. Michael Nelson played the part of the Master Po.  The part of Grasshopper is poorly played.&lt;br /&gt;&lt;br /&gt;Chuck Cartledge&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-2704470235256519670?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/2704470235256519670/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-11-grasshopper-prepare-yourself.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2704470235256519670'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2704470235256519670'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-11-grasshopper-prepare-yourself.html' title='2011-03-21: Grasshopper, prepare yourself.  It is time to speak of graphs and digital libraries and other things.'/><author><name>Chuck Cartledge</name><uri>http://www.blogger.com/profile/16705475580746924597</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/-p9U0488PWRM/TYf296KRcpI/AAAAAAAAABo/RfgvgF10zMQ/s72-c/neighborSize-003.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-2458541503254721280</id><published>2011-03-09T14:28:00.005-05:00</published><updated>2011-03-09T16:35:45.401-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='tags'/><category scheme='http://www.blogger.com/atom/ns#' term='delicious'/><category scheme='http://www.blogger.com/atom/ns#' term='api'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='screen scraping'/><title type='text'>2011-03-09: Adventures with the Delicious API</title><content type='html'>I recently conducted an experiment on tags provided from the bookmarking site &lt;span style="font-weight:bold;"&gt;delicious.com&lt;/span&gt;. The goal was to obtain a decent sized sample set of URIs and tags that users have used to annotate the URIs. The website provides a &lt;span style="font-style:italic;"&gt;recent tool&lt;/span&gt; that automatically redirects to a somewhat random URI that was recently annotated by some Delicious user. By parsing the HTTP headers I was able to grab the redirect URI and therefore build a corpus of 5000 unique URIs. The URI for the tool is &lt;a href="http://www.delicious.com/recent/?random=1"&gt; http://www.delicious.com/recent/?random=1&lt;/a&gt;.&lt;br /&gt;As the second step I needed to obtain the corresponding tags for each URI. I tried to be a good programmer and used the Delicious API to query for the tags instead of parsing the web interface. In order to use the API (v1) you need an account with Delicious/Yahoo. The request for &lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;https://username:pwd@api.del.icio.us/v1/posts/suggest?url=http://www.google.com/&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;for example returns an XML formated response with the top five popular tags:&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;search google search engine engine web&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;The API returns at most the top five tags per URI even though there may be more than five visible through the web interface.&lt;br /&gt;However, I split my URI set into 5 batches and ran five times a thousand queries with the same account and from the same IP address, all within 30 minutes. To my surprise I noticed that roughly 50% of the URIs did not return any tags even though they are indexed by Delicious. My intentions were good but a 50% loss was too much so I turned my attention to screen scraping the HTML page. You need to generate the md5 hash value for each URI  (including http://) and append to the proper URI. For example for &lt;span style="font-style:italic;"&gt;http://www.google.com&lt;/span&gt; you need to request&lt;br /&gt;&lt;blockquote&gt;&lt;br /&gt;http://www.delicious.com/url/ff90821feeb2b02a33a6f9fc8e5f3fcd&lt;br /&gt;&lt;/blockquote&gt;&lt;br /&gt;By parsing the source with simple regular expressions you can extract at most the top 30 tags and the frequency how often users have used this tag for this URI. This path turned out to be fast, reliable and provides better results since you get more than just five tags.&lt;br /&gt;&lt;br /&gt;The discrepancy between the API and the web interface however raised some questions and so I will share some statistics about my data and provide theories trying to explain the observed behavior:&lt;br /&gt;I only collected 4969 unique URIs. Apparently the recent tool distinguishes between e.g. google.com and www.google.com and possibly www.google.com/&lt;br /&gt;&lt;br /&gt;The API did not return any tags for 78 URIs but the web interface provided tags for all 4969 URIs. Maybe the API accesses a smaller index than the web interface? The recent tool however may pull data from the "live" index. Similar behavior was observed by &lt;a href="http://www.harding.edu/fmccown/"&gt;Frank McCown&lt;/a&gt; for &lt;a href="http://scholar.google.com/scholar?cluster=14964781822608638506&amp;hl=en&amp;as_sdt=0,47"&gt;search engine caches (JCDL 2007)&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;I got down to 78 URIs from originally 50% by distributing the queries over five different IP addresses and re-querying the API dozens of times stretched over an entire day. The API seems to be sensitive to high frequency requests or is simply not very powerful.&lt;br /&gt;&lt;br /&gt;For the 78 URIs I obtained a mean of 23.2 tags with a standard deviation of 7.8. The minimum number of tags was two (for one URI) and the maximum was 30 (for 38 URIs). 51 of the 78 URIs had 20 or more and 73 URIs had 10 or more tags through the web interface. This just underlines the point: &lt;span style="font-weight:bold;"&gt;the API is not reliable&lt;/span&gt;.&lt;br /&gt;&lt;br /&gt;I further found that in 465 cases the API returned less than five tags where the web interface returned more tags. This "under reporting" (meaning the API should have reported the top five) is another strong indicator for the API pulling from a smaller and possibly dated index.&lt;br /&gt;&lt;br /&gt;One can argue whether or not the order of tags matters. I found that out of the 4891 URIs with tags from the API 1759 had a different order compared to the web interface data. 191 times I observed a change at rank 1. These changes account for 718 times where terms were added or removed from the union of both tag sets (API vs web interface). On average 1.11 moved in or out of the intersection of both sets.&lt;br /&gt;&lt;br /&gt;The moral of all this? As much as you may appreciate an API, in the case of Delicious you can obtain more (better?) data by screen scraping the HTML page.&lt;br /&gt;&lt;br /&gt;--&lt;br /&gt;martin&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-2458541503254721280?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/2458541503254721280/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-09-adventures-with-delicious.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2458541503254721280'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2458541503254721280'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-09-adventures-with-delicious.html' title='2011-03-09: Adventures with the Delicious API'/><author><name>martin klein</name><uri>http://www.blogger.com/profile/13289299995516244353</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://4.bp.blogspot.com/_cJsqIApA0c0/SkAya34Wh5I/AAAAAAAAAAM/XvoXQjYUpzc/s1600-R/mk.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-1066829312898469008</id><published>2011-03-04T02:25:00.082-05:00</published><updated>2011-03-20T14:23:25.776-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='San Francisco'/><category scheme='http://www.blogger.com/atom/ns#' term='PDA2011'/><category scheme='http://www.blogger.com/atom/ns#' term='Conference'/><category scheme='http://www.blogger.com/atom/ns#' term='Internet archive'/><category scheme='http://www.blogger.com/atom/ns#' term='Personal Archiving'/><title type='text'>2011-03-04: Personal Digital Archiving Conference 2011</title><content type='html'>Last week, along with &lt;a href="http://www.cs.odu.edu/%7Emln/"&gt;Dr. Nelson&lt;/a&gt;, I attended the 2nd annual conference of &lt;a href="http://www.personalarchiving.com/"&gt;Personal Digital Archiving&lt;/a&gt; held at the Internet Archive in the heart of the &lt;a href="http://en.wikipedia.org/wiki/San_Francisco"&gt;foggy city&lt;/a&gt;, San Francisco. The weather was not on our side as the sunny state was facing the &lt;a href="http://www.sfgate.com/cgi-bin/article.cgi?f=/c/a/2011/02/23/MNHV1HT2T6.DTL"&gt;worst weather in quite a while&lt;/a&gt;. This didn't turn my spirit down as I was excited to be in room with experts and passionate geniuses whose collective &lt;a href="http://en.wikipedia.org/wiki/Intelligence_quotient"&gt;IQ&lt;/a&gt; could cause an &lt;a href="http://en.wikipedia.org/wiki/Integer_overflow"&gt;integer over-flow&lt;/a&gt;!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/-nnHIVxY5BXY/TXWa7Nsx2aI/AAAAAAAAAFA/Bc9toFNyzdI/s1600/IMG_20110224_130456.jpg"&gt;&lt;img style="display: block; margin: 0px auto 10px; text-align: center; cursor: pointer; width: 450px; height: 300px;" src="http://1.bp.blogspot.com/-nnHIVxY5BXY/TXWa7Nsx2aI/AAAAAAAAAFA/Bc9toFNyzdI/s320/IMG_20110224_130456.jpg" alt="" id="BLOGGER_PHOTO_ID_5581537655276034466" border="0"&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The general atmosphere was really nice; participants were very friendly and eager to introduce themselves and get to know you. I got exposed to a ton of ideas, projects and insights over coffee sometimes while other times just going up and down the stairs. My only regret is that I don't have a contact card as I got a bunch of them; I got to get me some of these!&lt;br /&gt;&lt;br /&gt;So that the readers can relive this experience with me I have divided the conference into &lt;a href="http://www.personalarchiving.com/2011-schedule/"&gt;two days&lt;/a&gt; each in turn is divided into sessions. I will try to highlight a thing or two from each session and I will try to find the videos for the entire conference.&lt;br /&gt;&lt;br /&gt;Day 1:&lt;br /&gt;&lt;br /&gt;At 9am the conference started with &lt;a href="http://en.wikipedia.org/wiki/Brewster_Kahle"&gt;Brewster Kahle&lt;/a&gt; and &lt;a href="http://www.ubois.com/about-press-contact/"&gt;Jeff Ubois&lt;/a&gt; introducing &lt;a href="http://research.microsoft.com/en-us/people/cathymar/"&gt;Cathy Marshall&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-cathymarshall"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Microsoft Research who gave an amazing keynote entitled: "People are People and things change". It was a really insightful speech summarizing problems we face in dealing with data and backups. She gave examples from her own experience with her computer and the process of "not" backing it up, &lt;a href="http://twitter.com/ccmarshall"&gt;her tweets&lt;/a&gt; which needed to be backed up as well. She came across the note that even when we backup stuff we tend to replicate the entire folders and make copies not maintain an organized list of the resources, people always think that archiving data should be done by someone else.&lt;br /&gt;&lt;br /&gt;Gary Wright&lt;a href="http://www.archive.org/details/PDA2011-garywright"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top"  border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Family search gave an insight from his paper entitled "&lt;a href="https://wiki.familysearch.org/en/White_Paper:_Preserving_Your_Family_History_Records_Digitally"&gt;Preserving your family history records digitally&lt;/a&gt;" (&lt;a href="http://www.legacydox.com/"&gt;Legacy Dox&lt;/a&gt;) about best archiving practices. He also introduced the &lt;a href="http://www.millenniata.com/"&gt;Millenniata Disc&lt;/a&gt; for data preservation. &lt;a href="http://twitter.com/emsscurator"&gt;Jeremy Leighton John&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-jeremyleightonjohn"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from the British Library came up next and discussed ways of processing digital manuscripts. &lt;a href="http://www.evancarroll.net/"&gt;Evan Caroll&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-evancarroll"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, the author and founder of &lt;a href="http://www.thedigitalbeyond.com/"&gt;TheDigitalBeyond&lt;/a&gt;, followed. He discussed a very interesting question: What happens to your digital assets when you die? Who gains access to them? Do you want them to be destroyed?  He also discusses why certain assets grow to have more importance according to the sentimental value behind them.&lt;br /&gt;&lt;br /&gt;After the break &lt;a href="http://www.personal.psu.edu/esc10/blogs/cahoy/"&gt;Ellysa Cahoy&lt;/a&gt; and &lt;a href="http://www.ed.psu.edu/education/default.asp?which=468"&gt;Scott McDonald&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-ellysacahoy-scottmcdonald"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Penn State University proposed their ideas and project by which faculty members help in the archiving process on their level further more. &lt;a href="http://www.jzissman.com/"&gt;Judith Zissman&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-judithzissman"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;used her software design expertise to discuss a very interesting idea about Agile Archiving. She discussed how to implement the &lt;a href="http://agilemanifesto.org/"&gt;Agile Manifesto&lt;/a&gt; but in the personal archiving process. I wish she could have given further examples though. After that &lt;a href="http://wanderingstan.com/"&gt;Stan James&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-stanjames"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, who later became my friend, introduced “&lt;a href="http://www.slideshare.net/wanderingstan/smallest-day-personal-digital-archiving"&gt;The Smallest Day&lt;/a&gt;”. A project Stan and his father set up together to collect, archive, arrange, tag and connect all their family photos, documents, letters, postcards and more. It is an awesome project that utilized lots interesting technologies like &lt;a href="http://www.nuance.com/dragon/index.htm"&gt;Dragon software&lt;/a&gt; for &lt;a href="http://en.wikipedia.org/wiki/Speech_recognition"&gt;voice recognition&lt;/a&gt;, &lt;a href="https://www.mturk.com/mturk/welcome"&gt;Mechanical Turk&lt;/a&gt;, &lt;a href="http://www.ancestry.com/"&gt;Ancestry.com&lt;/a&gt;…etc. &lt;a href="http://www.lis.illinois.edu/people/faculty/loriken"&gt;Lori Kendall&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-lorikendall"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;followed discussing and defining the concept of “personal” in regards to archiving by giving an example from her ancestors photos. &lt;a href="http://ascii.textfiles.com/"&gt;Jason Scott&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-jasonscott"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;followed with a wonderful speech first presenting himself as a &lt;a href="http://www.textfiles.com/"&gt;collector&lt;/a&gt;. Then discussing that it is not enough to keep things safe but we need to make those collections &lt;a href="http://www.archiveteam.org/index.php?title=Main_Page"&gt;accessible and available online&lt;/a&gt;. He discussed in agony the catastrophe of dropping down &lt;a href="http://en.wikipedia.org/wiki/GeoCities"&gt;Geocities&lt;/a&gt; and its consequences. On a side note &lt;a href="http://twitter.com/sockington"&gt;Jason Scott’s cat&lt;/a&gt; has 1.4 million followers on twitter and ranked on the &lt;a href="http://twitaholic.com/sockington/"&gt;top 200 to be followed&lt;/a&gt;!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-yjMFU_gA_MQ/TXWbL1qAB-I/AAAAAAAAAFI/BZVVxiPymgs/s1600/IMG_20110224_130819.jpg"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 239px; height: 320px;" src="http://3.bp.blogspot.com/-yjMFU_gA_MQ/TXWbL1qAB-I/AAAAAAAAAFI/BZVVxiPymgs/s320/IMG_20110224_130819.jpg" alt="" id="BLOGGER_PHOTO_ID_5581537940879706082" border="0"&gt;&lt;/a&gt;&lt;br /&gt;Lunch was next, it was quite refreshing to discuss with other people ideas and thoughts. We had a tour in the internet archive and I took some photos, we saw &lt;a href="http://en.wikipedia.org/wiki/Microform"&gt;microfilm readers&lt;/a&gt;, &lt;a href="http://www.diybookscanner.org/"&gt;Book scanners&lt;/a&gt; (not the first time to see one, I saw a whole &lt;a href="http://www.bibalex.org/isis/frontend/projects/ProjectDetails.aspx?id=//3r+dP2wp23h6ShocDoYg=="&gt;bunch of them&lt;/a&gt; in &lt;a href="http://www.bibalex.org/Home/Default_EN.aspx"&gt;Alexandria Library&lt;/a&gt;). After lunch &lt;a href="http://www.educause.edu/Community/MemDir/Profiles/BirkinJamesDiana/66781"&gt;Birkin Diana&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-birkindiana"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Brown University talked about Metadata on archived items and ways to enhance this metadata. &lt;a href="http://library.ucar.edu/about/people/klegg.php"&gt;Kate Legg&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-katelegg"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from &lt;a href="http://www.ncar.ucar.edu/"&gt;NCAR&lt;/a&gt; (National Center for Atmospheric Research) presented the center’s project in enabling ease of access to archived content and stated &lt;a href="http://en.wikipedia.org/wiki/Warren_M._Washington"&gt;Warren Washington&lt;/a&gt;’s &lt;a href="http://library.ucar.edu/collections/washington/"&gt;collection&lt;/a&gt; they have as a model that could be adopted in subsequent collections.  From Bookism, &lt;a href="http://www.bookism.org/open/"&gt;Jay Datema&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-jaydatema"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;discussed the issue of compatibility and creating standards of archiving (suggested why not make archive.txt file same as &lt;a href="http://www.robotstxt.org/"&gt;robots.txt&lt;/a&gt; and &lt;a href="http://humanstxt.org/"&gt;humans.txt&lt;/a&gt; files). He also talked about the paper of Jeremy John “&lt;a href="http://www.nature.com/nature/journal/v459/n7248/full/459775a.html"&gt;The future of saving our past&lt;/a&gt;”. The next session was by &lt;a href="http://www.google.com/url?sa=t&amp;amp;source=web&amp;amp;cd=1&amp;amp;ved=0CBQQFjAA&amp;amp;url=http%3A%2F%2Fbengross.com%2F&amp;amp;ei=22x1Td_JH4i6sQOMo-nMBA&amp;amp;usg=AFQjCNEjqLztSBK6xL7B_qB66boETw_URQ"&gt;Ben Gross&lt;/a&gt; and &lt;a href="http://evan.status.net/"&gt;Evan Prodromou&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-evanprodromou"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;where they started by discussing the Social Data aspect added to archiving. They discussed &lt;a href="http://www.foaf-project.org/"&gt;FOAF&lt;/a&gt;, &lt;a href="http://sioc-project.org/"&gt;SIOC&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Atom_%28standard%29"&gt;ATOM&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/OpenDD"&gt;OpenDD&lt;/a&gt;, &lt;a href="http://www.google.com/url?sa=t&amp;amp;source=web&amp;amp;cd=1&amp;amp;sqi=2&amp;amp;ved=0CBQQFjAA&amp;amp;url=http%3A%2F%2Fpocoproject.org%2Fblog%2F%3Fp%3D24&amp;amp;ei=kG11Tc_8FYXSsAPKovjRBA&amp;amp;usg=AFQjCNHiqhiB-xrUfU3SoOebn4dfESKPgA"&gt;POCO&lt;/a&gt; and &lt;a href="http://activitystrea.ms/"&gt;Activity Streams&lt;/a&gt;. &lt;a href="http://www.connectedaction.net/marc-smith/"&gt;Marc A. Smith&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-marcsmith"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;discussed in a very interesting way where in the social graph an individual is located. He utilized &lt;a href="http://www.mapxl.com/"&gt;MapXL&lt;/a&gt;, &lt;a href="http://nodexl.codeplex.com/"&gt;NodeXL &lt;/a&gt;to visualize the &lt;a href="http://www.senate.gov/general/contact_information/senators_cfm.cfm"&gt;US Senates&lt;/a&gt; and to my surprise (and my lack of political knowledge) clusters started to appear which showed the &lt;a href="http://en.wikipedia.org/wiki/Right-wing_politics"&gt;Right&lt;/a&gt; and &lt;a href="http://en.wikipedia.org/wiki/Left-wing_politics"&gt;Left &lt;/a&gt;wings. From Berkeley &lt;a href="http://people.ischool.berkeley.edu/%7Eray/"&gt;Ray Larson&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-raylarson"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;came up next and introduced the &lt;a href="http://socialarchive.iath.virginia.edu/"&gt;SNAC project&lt;/a&gt; discussing also the Authority control and show the two possibilities: Having several names for one person, and having several persons with same name.&lt;br /&gt;&lt;br /&gt;Financials and Economics were the theme of the next panel&lt;a href="http://www.archive.org/details/PDA2011-paneldiscussion"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;where Jeff Ubois&lt;a href="http://www.archive.org/details/PDA2011-jeffubois"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, Brewster Kahle&lt;a href="http://www.archive.org/details/PDA2011-brewsterkahle"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, &lt;a href="http://www.nsf.gov/staff/staff_bio.jsp?lan=sgriffin&amp;amp;org=IIS"&gt;Steve Griffin&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-stevegriffin"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;and &lt;a href="http://lockss.stanford.edu/lockss/David_S.H._Rosenthal"&gt;David Rosenthal&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-davidrosenthal"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;discussed the costs of archiving and showed that the bottleneck was in the scanning process as 80% of the cost goes in the human part of the process. 10 cents per page was the claimed number and discussed that a box of paper can cost from $200 to $680 to be archived. Those are the pay-once models like &lt;a href="http://www.prestoprime.org/"&gt;Presto Prime&lt;/a&gt; which costs $2000 per Terabyte to be preserved forever. You can notice that the hardware is the least cost as it is merely $50/TB. The LOCKSS system was introduced as well by Rosenthal. The closing keynote was made by &lt;a href="http://fitz.blogspot.com/"&gt;Brian Fetzpatrick&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-brianfitzpatrick"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Google and &lt;a href="http://www.dataliberation.org/"&gt;DataLiberation.org&lt;/a&gt;. He showed statistics of the &lt;a href="http://www.nytimes.com/2011/01/29/technology/internet/29cutoff.html"&gt;Internet cut-off on Egypt&lt;/a&gt; the last month as an example of control over the data. He argued that there is a necessity to make data free from the framework beneath and introduce the Import/Export button to &lt;a href="http://www.dataliberation.org/google"&gt;all products&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;After dinner and reception the demos session started by &lt;a href="http://www.wahm-solution.com/joanne-lang.html"&gt;Joanne Lang&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-joannelang"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;and her project “&lt;a href="http://www.aboutone.com/"&gt;About One&lt;/a&gt;” which is an amazing tool to gather information, data, documents and content for the family to help organize and manage life. The slogan was small pieces of information can build a connected life. &lt;a href="http://www.dlib.org/dlib/july09/authors/07authors.html"&gt;Michael Ashenfelder&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-mikeashenfelter"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from the Library of Congress talked next. &lt;a href="http://jwa.org/encyclopedia/author/weissman-deborah"&gt;Debbie Weissman&lt;/a&gt; discussed the possibility of claiming ownership of preserved content. &lt;a href="http://longnow.org/people/staff/laura/"&gt;Laura Welcher&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-laurawelcher"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from the &lt;a href="http://www.longnow.org/"&gt;Long Now Foundation&lt;/a&gt; came up next and introduced the &lt;a href="http://rosettaproject.org/"&gt;Rosetta Project&lt;/a&gt; aiming to archive 7000 language for fear that some languages can go extinct. She also introduced a hierarchy of languages, language commons and sources in a wiki-like theme. &lt;a href="http://twitter.com/skostal"&gt;Susam Kostal&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-susankostal"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from &lt;a href="http://www.sanfranmag.com/story/hoarders-hit-delete"&gt;San Francisco magazine&lt;/a&gt; discussed the concept of digital &lt;a href="http://en.wikipedia.org/wiki/Compulsive_hoarding"&gt;Hoarding &lt;/a&gt;and its relation to physical hoarding. Then &lt;a href="http://www.linkedin.com/pub/jonathan-good/6/19a/794"&gt;Jonathan Good&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-jonathangood"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;(whom I had a very nice chat earlier about the Egyptian revolution) gave a demo on his project &lt;a href="http://1000memories.com/"&gt;1000memories.com&lt;/a&gt; how friends and loved ones can remember a passed person by collectively gather his/her photos, testimonials, or even start a grant in his/her name. He also showed a dedicated page for the &lt;a href="http://1000memories.com/egypt"&gt;384 martyrs&lt;/a&gt; who died in the Egyptian revolution each photo linked to a dedicated profile so that people don’t forget who were those people and get to know what their lives were. The day was concluded by &lt;a href="http://twitter.com/denimsmith"&gt;Denim Smith&lt;/a&gt;’s&lt;a href="http://www.archive.org/details/PDA2011-denimsmith"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;presentation on his project “My Internet Cooperation”.&lt;br /&gt;&lt;br /&gt;Day 2:&lt;br /&gt;&lt;br /&gt;The second day was also really interesting but definitely shorter. It started with a keynote speech by &lt;a href="http://www.cni.org/staff/clifford_index.html"&gt;Clifford Lynch&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-cliffordlynch"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;where he gave a very insightful talk about the different forms of exposure of the personal documents to the public. He discussed how the personal archiving concept evolved from just individual private shards accessed individually to shared content with the spread of social media till it finally reached the public domain. He also argued that we need an archive “button” in lots of the digital media.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://www.danreetz.com/"&gt;Danial Reetz&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-danielreetz"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;with the &lt;a href="http://en.wikipedia.org/wiki/Do_it_yourself"&gt;DIY&lt;/a&gt; Book Scanner gave a very interesting talk with a different focus, it was on cameras and technologies. Initially it started when he made “&lt;a href="http://www.instructables.com/id/DIY-High-Speed-Book-Scanner-from-Trash-and-Cheap-C/"&gt;an instructable&lt;/a&gt;” on how to build a cheap book scanner. He discussed how cameras vary in power and how the production is affected by users requirements in enhancements. He argued that sometimes users wanted the best modified photo not the best “real” one, &lt;a href="http://www.hp.com/canada/portal/hho/digital_photography/tours/slimming/index.html"&gt;slimming cameras&lt;/a&gt;, &lt;a href="http://blogs.consumerreports.org/electronics/2011/03/panasonic-dmc-fx78-lets-you-do-in-camera-makeovers.html"&gt;face enhancement lenses&lt;/a&gt;…etc. He wondered if it would have been better to invest in adding “document capturing” capabilities to cameras, perhaps &lt;a href="http://en.wikipedia.org/wiki/Optical_character_recognition"&gt;OCR &lt;/a&gt;too. &lt;a href="http://twitter.com/DwightSwanson"&gt;Dwight Swanson&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-dwightswanson"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;followed up next discussing Home Movies, their evolution and archival. &lt;a href="http://www.gigapan.org/viewProfile.php?userid=353"&gt;Rich Gibson&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-richgibson"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from &lt;a href="http://www.gigapan.org/"&gt;Gigapan &lt;/a&gt;Project showed how Extreme close-up images can give more stories (like in the Italian fashion runway zooming to the designer on the tag).&lt;br /&gt;&lt;br /&gt;After the break 2 poets and writers, &lt;a href="http://en.wordpress.com/tag/devin-becker/"&gt;Devin Becker&lt;/a&gt; and &lt;a href="http://www.colliernogues.com/index.php?/sample/"&gt;Collier Nogues&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-devinbecker-colliernogues"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, made a &lt;a href="http://www.surveymonkey.com/s/digitalarchiving"&gt;survey &lt;/a&gt;on a broad group of writers and their methods of saving their documents and writings, their archiving and organization. &lt;a href="http://www.google.com/url?sa=t&amp;amp;source=web&amp;amp;cd=7&amp;amp;ved=0CEcQFjAG&amp;amp;url=http%3A%2F%2Fiit.academia.edu%2FHongZhang%2FAbout&amp;amp;ei=VpJ1TcG3HNCC0QHZpdjUBg&amp;amp;usg=AFQjCNEjQTpAT__sHS0EOmbeuVnkO2Sshg"&gt;Hong Zhang&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-hongzhang"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;a PhD student from the &lt;a href="http://illinois.edu/"&gt;University of Illinois&lt;/a&gt; followed then &lt;a href="http://rpi.academia.edu/JasonZalinger"&gt;Jason Zalinger&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-jasonzalinger"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Rensselaer Polytechnic Institute in New york. Jason presented his study on possible enhancements to Google’s Gmail by adding these concepts: Forget label for unwanted emails, Digital Regret for undoing the send, Sleep on It for postponing confirming the send, Word Cloud,…etc.  &lt;a href="http://www.computing.dcu.ie/%7Eadoherty/"&gt;Aiden Doherty&lt;/a&gt; and &lt;a href="http://www.computing.dcu.ie/%7Ecgurrin/"&gt;Cathal Gurrin&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-cathalgurrin-aidendoherty"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Dublin University presented a very interesting and intriguing concept which is &lt;a href="http://en.wikipedia.org/wiki/Lifelog"&gt;LifeLog&lt;/a&gt;. A small wearable device that logs, takes snapshots, &lt;a href="http://en.wikipedia.org/wiki/Global_Positioning_System"&gt;GPS &lt;/a&gt;coordinates, temperature sensors…etc and store them in a searchable memory platform. They have been wearing these devices for the last 4.5 years!&lt;br /&gt;&lt;br /&gt;&lt;a href="http://en.wikipedia.org/wiki/Ted_Nelson"&gt;Ted Nelson&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-tednelson"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;gave an awesome speech about how if things were designed differently from the beginning it would have been better. He argued that the documents on the computer are the biggest example. The slides he had didn’t work initially but later that day he showed an amazing demo for &lt;a href="http://www.xanadu.com/"&gt;Xanadu&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-tednelsonxanadu"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, a project he is working on for a long time and it introduced a very new data structure which is the multidimensional cells which I found fascinating! &lt;a href="http://en.wikipedia.org/wiki/Edward_Feigenbaum"&gt;Ed Feigenbaum&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-edfeigenbaum"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from Stanford introduced &lt;a href="http://www.freebase.com/view/en/stanford_university_salt_project"&gt;SALT &lt;/a&gt;(Self Archiving Legacy Toolkit) and talked about the initiative they started at Stanford. Then &lt;a href="http://www.dougengelbart.org/about/ce-bio.html"&gt;Christina Englebart&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-christinaengelbart"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;, daughter of &lt;a href="http://en.wikipedia.org/wiki/Douglas_Engelbart"&gt;Douglas Englebart&lt;/a&gt; (the inventor of the mouse), gave a presentation about her institute's work in collecting the digital artifacts regarding &lt;a href="http://dougengelbart.org/library/engelbart-archives.html"&gt;her father’s legacy&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;Lunch was as the day before a good opportunity to mix, mingle and exchange ideas. It helped a lot that it was sunny so most of the people had lunch outside in the sun. When we came back &lt;a href="http://www.ils.unc.edu/callee/"&gt;Cal Lee&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-callee"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from the University of North Carolina introduced the &lt;a href="http://en.wikipedia.org/wiki/Forensic_science"&gt;Forensics &lt;/a&gt;aspect in Digital preservation. &lt;a href="http://www.sis.pitt.edu/%7Ercox/"&gt;Richard Cox&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-richardcox"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;from University of Pittsburgh followed up next then &lt;a href="http://matienzo.org/"&gt;Mark Matienzo&lt;/a&gt; from Yale University Library and &lt;a href="http://www.ameliaabreu.com/"&gt;Amelia Abreu&lt;/a&gt; from University of Washington&lt;a href="http://www.archive.org/details/PDA2011-markmatienzo-ameliaabreu"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a href="http://research.microsoft.com/en-us/um/people/gbell/"&gt;Gordon Bell&lt;/a&gt; from Microsoft Research came up next with a talk about his life experience in the health aspect. He illustrated how he gathered all his records from the very first one from decades ago in order to have a better picture on his health situation after he had a heart attack. The project &lt;a href="http://research.microsoft.com/en-us/projects/mylifebits/"&gt;MyLifeBits &lt;/a&gt;shows this initiative. &lt;a href="http://twitter.com/khassounah"&gt;Khaled Hassounah&lt;/a&gt; came up next and introduced a very successful &lt;a href="http://en.wikipedia.org/wiki/Personal_health_record"&gt;PHR&lt;/a&gt; (Personal Health Record) service named &lt;a href="http://www.medhelp.org/"&gt;MedHelp&lt;/a&gt;. Then &lt;a href="http://twitter.com/lbranagan"&gt;Linda Branagan&lt;/a&gt; from &lt;a href="http://www.medweb.com/"&gt;Medweb &lt;/a&gt;argued the difference between &lt;a href="http://en.wikipedia.org/wiki/Electronic_medical_record"&gt;EMR &lt;/a&gt;(Electronic Medical Record) and PHR (Health Panel Video&lt;a href="http://www.archive.org/details/PDA2011-personalhealthdata"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/-krzQRBkV72U/TXWbdniFlBI/AAAAAAAAAFQ/ul1spnfF3AQ/s1600/IMG_20110224_130836.jpg"&gt;&lt;img style="float: right; margin: 0pt 0pt 10px 10px; cursor: pointer; width: 239px; height: 320px;" src="http://3.bp.blogspot.com/-krzQRBkV72U/TXWbdniFlBI/AAAAAAAAAFQ/ul1spnfF3AQ/s320/IMG_20110224_130836.jpg" alt="" id="BLOGGER_PHOTO_ID_5581538246326064146" border="0"&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;After this final break, &lt;a href="http://elizabethchurchill.com/"&gt;Elizabeth Churchill&lt;/a&gt; from Yahoo! Research lead a panel discussing forensics in the digital world&lt;a href="http://www.archive.org/details/PDA2011-forensics"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;. &lt;a href="http://sils.unc.edu/people/faculty/post-doc-faculty"&gt;Kam Woods&lt;/a&gt; from the University of North Carolina presented the &lt;a href="http://accessdata.com/products/forensic-investigation/ftk"&gt;Forensic Toolkit Imager&lt;/a&gt; and Sam Miester from the University of Maryland discussed data from failed businesses like the &lt;a href="http://www.shrwood.com/About-Sherwood"&gt;Sherwood&lt;/a&gt; case.&lt;br /&gt;&lt;br /&gt;As a grand finale, the Author &lt;a href="http://en.wikipedia.org/wiki/Rudy_Rucker"&gt;Rudy Rucker&lt;/a&gt;&lt;a href="http://www.archive.org/details/PDA2011-rudyrucker"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;gave a &lt;a href="http://www.rudyrucker.com/blog/2011/02/24/speakage/"&gt;very interesting talk&lt;/a&gt; filled with insightful thoughts, humor and sarcasm discussing Digital Immortality by creating a digital replica of thought and memories which he named it &lt;a href="http://www.rudyrucker.com/lifebox/"&gt;LifeBox&lt;/a&gt;. LifeBox acts as a bot that can imitate your responses and be able to answer and give opinions based on your thoughts of memories it can stay forever even after your death for your great grand children.&lt;br /&gt;&lt;br /&gt;As a summary it was an amazing conference, not just because I attended those 48 sessions but it gave me a priceless opportunity to meet those bright individuals and broaden my scope of thoughts. As a matter of fact I was inspired to come up with several ideas for my thesis proposal!&lt;br /&gt;&lt;br /&gt;Also on other note, I found out that the size of the internet is 20x8x8 ft and it is &lt;a href="http://www.metafilter.com/80299/Internet-Archives-new-data-center-in-a-box"&gt;located in a parking lot in Santa Clara&lt;/a&gt; California.&lt;br /&gt;&lt;br /&gt;For more about the conference from another prospective please check out Collin Thorman's &lt;a href="http://litbrarian.wordpress.com/2011/02/"&gt;blog posts&lt;/a&gt;, the Library Of Congress's &lt;a href="http://www.digitalpreservation.gov/news/2011/20110303_news_pda_conference.html"&gt;news page&lt;/a&gt;, Dick Eastman's &lt;a href="http://blog.eogn.com/eastmans_online_genealogy/2011/03/personal-digital-archiving-conference.html"&gt;blog&lt;/a&gt;, Christina Engelbart &lt;a href="http://collectiveiq.wordpress.com/2011/03/01/personal-digital-archiving-conference-2011/"&gt;collective IQ post&lt;/a&gt;, Ellysa Cahoy's &lt;a href="http://www.personal.psu.edu/esc10/blogs/E-Tech/2011/02/personal-digital-archiving-201.html"&gt;blog&lt;/a&gt;, Don Hawkins's &lt;a href="http://www.theconferencecircuit.com/2011/03/07/understanding-personal-digital-archives-cliff-lynchs-keynote-address/"&gt;article&lt;/a&gt;, collection of posts on The Waki Librarian's &lt;a href="http://thewakilibrarian.wordpress.com/2011/02/24/"&gt;day1&lt;/a&gt;, &lt;a href="http://thewakilibrarian.wordpress.com/2011/02/25/"&gt;day2&lt;/a&gt; and &lt;a href="http://twitter.com/#%21/search?q=%23pda2011"&gt;#PDA2011&lt;/a&gt; on twitter.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-weight:bold;"&gt;(2011-03-20 Update:)&lt;/span&gt; I have associated links to video recording of each of the sessions, press the&lt;a href="about:blank"&gt;&lt;img style="cursor:pointer; cursor:hand;width: 11px; height: 11px; margin-top: -2px;" src="http://2.bp.blogspot.com/-Na8bHQ-ypRw/TYWnuTr1LsI/AAAAAAAAAFs/ZOBmKl0aDjo/s320/video_icon.gif" align="top" border="0" alt=""id="BLOGGER_PHOTO_ID_5586055326822903490" /&gt;&lt;/a&gt;and it will display it, photos from the conference can be seen &lt;a href="http://www.archive.org/stream/pda2011photos#page/n0/mode/2up"&gt;here&lt;/a&gt;, courtesy of The Internet Archive and Jeff Ubois.&lt;br /&gt;&lt;br /&gt;-- Hany SalahEldeen&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-1066829312898469008?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/1066829312898469008/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-04-personal-digital-archiving.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/1066829312898469008'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/1066829312898469008'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/03/2011-03-04-personal-digital-archiving.html' title='2011-03-04: Personal Digital Archiving Conference 2011'/><author><name>Hany SalahEldeen</name><uri>http://www.blogger.com/profile/06304841890215312435</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://1.bp.blogspot.com/-U-tAHHzl3Ok/TxNag880ahI/AAAAAAAABgc/-mvRIYmi_3Q/s220/339232_10150962841245323_533655322_21244974_36126378_o.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://1.bp.blogspot.com/-nnHIVxY5BXY/TXWa7Nsx2aI/AAAAAAAAAFA/Bc9toFNyzdI/s72-c/IMG_20110224_130456.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5453966474681768655</id><published>2011-02-08T20:29:00.000-05:00</published><updated>2011-02-08T20:29:55.195-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='publications'/><category scheme='http://www.blogger.com/atom/ns#' term='lexical signature'/><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='technical report'/><category scheme='http://www.blogger.com/atom/ns#' term='rediscover missing web pages'/><title type='text'>2011-02-08: An Evaluation of Link Neighborhood Lexical Signatures to Rediscover Missing Web Pages</title><content type='html'>&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: left;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif; white-space: pre-wrap;"&gt;The final project for my master's degree focused on the problem of “missing” web pages, those URIs that return an error result when retrieved. &amp;nbsp;When a web page is no longer available at a given URI, it may be available at a new URI, and this research proposes and demonstrates a new method for finding the new URI.&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Prior research has proposed using the lexical signature of a page as a search query to find the same or similar content at a new URI. &amp;nbsp;A lexical signature (LS) is a few words that are used in that page much more often than they are used in other pages on the Web, and so are thought to describe what the page is about. &amp;nbsp;That LS is then used as a search query which will hopefully find the target page in its results.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Previously-proposed methods for using an LS to find a new URI required either that the page be analyzed before being lost (ref: P&amp;amp;W) or that cached or archived versions of the page be available for analysis. &amp;nbsp;If the page had not previously been analyzed and no cached copies existed, then these methods could not hope to recover the missing page.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: Times;"&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://3.bp.blogspot.com/_6bX5Cg9lBLY/TVHsKeAIYgI/AAAAAAAAAZw/UDLq79RG8is/s1600/fig-link-neighborhood.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="254" src="http://3.bp.blogspot.com/_6bX5Cg9lBLY/TVHsKeAIYgI/AAAAAAAAAZw/UDLq79RG8is/s320/fig-link-neighborhood.png" width="320" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;We propose a new method for calculating an LS of a page using only its backlinks, that is the pages that link to the missing page.  The target page together with its backlinks make up the link neighborhood, a trivial example of which is shown above. &amp;nbsp;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Backlinks are retrieved from a search engine (we used Yahoo!).  We process each backlink page to find the text it uses to link to the target page. &amp;nbsp;&lt;/span&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;"&gt;This text is known as ‘anchor text’.  We take all of the terms from the anchor texts of all of the backlinks, and calculate the Term Frequency - Inverse Document Frequency (TF-IDF) value of each.  That is, we find the terms that are used commonly to link to the target page and are less common on the rest of the Internet.  We take the terms with highest TFIDF to be the lexical signature.  We use the LS as a query back to the search engine, and if the method is successful, we find our target URI at the top of the results.  An example of this process is shown below.&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span class="Apple-style-span" style="white-space: pre-wrap;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="separator" style="clear: both; text-align: center;"&gt;&lt;a href="http://1.bp.blogspot.com/_6bX5Cg9lBLY/TVHsCgq5NRI/AAAAAAAAAZs/jwYz8nPsgT4/s1600/fig-method-example.png" imageanchor="1" style="margin-left: 1em; margin-right: 1em;"&gt;&lt;img border="0" height="320" src="http://1.bp.blogspot.com/_6bX5Cg9lBLY/TVHsCgq5NRI/AAAAAAAAAZs/jwYz8nPsgT4/s320/fig-method-example.png" width="286" /&gt;&lt;/a&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif; white-space: pre-wrap;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif; white-space: pre-wrap;"&gt;We consider several variations of this method in order to draw conclusions about the most effective parameters. &amp;nbsp;First, we find that including second-level backlinks is not helpful; they add too much noise to the LS and decrease its effectiveness. &amp;nbsp;This confirms the intuition that second-level backlinks, those pages that don’t link directly to a target page but instead link to the target page’s backlinks, are not as closely related to the target page, and will therefore provide less relevant terms.&lt;/span&gt;&lt;/div&gt;&lt;div style="background-color: transparent; margin-bottom: 0px; margin-left: 0px; margin-right: 0px; margin-top: 0px;"&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Second, we show that only the anchor text provides useful terms to the LS. &amp;nbsp;We also experimented using the anchor text plus five words on either side of the link, or anchor +/-10 words, as well as using all words on the page. &amp;nbsp;We found that using only the anchor text provides the best-performing LS, and every step further away from the anchor text led to worse performance.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Third, we show that a passable LS can be created using only the first ten backlinks retrieved from the search engine. &amp;nbsp;Using the first hundred or thousand backlinks yields a slightly-better performing LS in some cases, but we argue that due to the increased cost associated with retrieving and processing 90 or 990 extra pages, the increased performance isn’t worth it.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Lastly, we recommend using a 4-term LS. &amp;nbsp;This is noteworthy in that most other LS research has concluded that a 5- or 7-term LS is ideal, depending on the desired performance characteristics. &amp;nbsp;We posit that fewer terms are preferable because we are drawing the terms from pages other than the page for which we are searching, therefore we run the risk of including terms which do not appear in the target page, which would likely exclude the target page from a search for the LS. &amp;nbsp;By using fewer terms, there is less risk that we include a word from a backlink page that doesn’t exist in the target page.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;Using our recommended method, the target URI appears as the first result for the LS in 56% of our test cases.&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;br /&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;The full report can be found at:&lt;/span&gt;&lt;/span&gt;&lt;blockquote&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;Jeb Ware, Martin Klein, Michael L. Nelson, &lt;a href="http://arxiv.org/abs/1102.0930"&gt;An Evaluation of Link Neighborhood Lexical Signatures to Rediscover Missing Web Pages&lt;/a&gt;, Technical Report arXiv:1102.0930, February 2011.&lt;/span&gt;&lt;/blockquote&gt;&lt;span class="Apple-style-span" style="font-family: Verdana, sans-serif;"&gt;&lt;span style="background-color: transparent; background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;&lt;/span&gt;&lt;span style="background-color: transparent; color: black; font-style: normal; font-weight: normal; text-decoration: none; vertical-align: baseline; white-space: pre-wrap;"&gt;-- Jeb Ware&lt;/span&gt;&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5453966474681768655?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5453966474681768655/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/02/2011-02-08-evaluation-of-link.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5453966474681768655'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5453966474681768655'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/02/2011-02-08-evaluation-of-link.html' title='2011-02-08: An Evaluation of Link Neighborhood Lexical Signatures to Rediscover Missing Web Pages'/><author><name>Jeb</name><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='16' height='16' src='http://img2.blogblog.com/img/b16-rounded.gif'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_6bX5Cg9lBLY/TVHsKeAIYgI/AAAAAAAAAZw/UDLq79RG8is/s72-c/fig-link-neighborhood.png' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-437431583148551310</id><published>2011-01-12T18:29:00.014-05:00</published><updated>2011-01-12T21:02:14.955-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='2011'/><category scheme='http://www.blogger.com/atom/ns#' term='statistical computing'/><category scheme='http://www.blogger.com/atom/ns#' term='CS 796/896'/><category scheme='http://www.blogger.com/atom/ns#' term='R'/><category scheme='http://www.blogger.com/atom/ns#' term='plotting'/><title type='text'>R Tutorial</title><content type='html'>As part of Dr. Weigle's &lt;a href="http://www.cs.odu.edu/%7Emweigle/CS796-S11/Home" target="blank"&gt;CS 796/896 Visual Analytics Seminar&lt;/a&gt; I will offer a tutorial on the statistical computing software &lt;a href="http://www.r-project.org/" target="blank"&gt;R&lt;/a&gt;. I will give an hands-on introduction into data input/output, simple data manipulation, and (of course) plotting.  If you for example always wondered how to fill data vectors, import data from MySQL databases, compute the mean and standard deviation, execute logic, ranking and sorting operations, compute correlations and linear regressions, use loops and write functions as well as create scatter-, bar-, box- and other plots and all that in R, you will enjoy this tutorial.  The tutorial is targeted towards course participants which have a natural interest in data visualization but it also has merit for other MS and Ph.D. students doing research and consequently dealing with and plotting data.&lt;br /&gt;&lt;div&gt;&lt;div&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.cs.odu.edu/%7Emklein/cs796/lecture/pics/ia_observations_ed_shin_2dec_inv_col3_png.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 200px;" src="http://www.cs.odu.edu/%7Emklein/cs796/lecture/pics/ia_observations_ed_shin_2dec_inv_col3_png.png" alt="" border="0" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;/div&gt;&lt;div&gt;While this introduction naturally can not cover all aspects of R and does not claim to be exhaustive, it will help students getting started with the software and the material will serve as an "almanac" in the future to generate pretty graphs such as the one shown here.&lt;/div&gt;&lt;/div&gt;&lt;div&gt;&lt;div&gt;CS796/896 is not open to the public but the material covered in the tutorial can be obtained &lt;a href="http://www.cs.odu.edu/%7Emklein/cs796/lecture/" target="blank"&gt;here&lt;/a&gt;.&lt;/div&gt;&lt;div&gt;If you have any questions, comments or would like to contribute to augment the materials do not hesitate to contact me.&lt;/div&gt;&lt;/div&gt;&lt;div&gt;--&lt;/div&gt;&lt;div&gt;martin&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-437431583148551310?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/437431583148551310/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/01/r-tutorial.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/437431583148551310'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/437431583148551310'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/01/r-tutorial.html' title='R Tutorial'/><author><name>martin klein</name><uri>http://www.blogger.com/profile/13289299995516244353</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://4.bp.blogspot.com/_cJsqIApA0c0/SkAya34Wh5I/AAAAAAAAAAM/XvoXQjYUpzc/s1600-R/mk.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6059579723153026933</id><published>2011-01-11T00:39:00.002-05:00</published><updated>2011-01-11T00:46:24.676-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='CS 796/896'/><category scheme='http://www.blogger.com/atom/ns#' term='CS 751/851'/><title type='text'>2011-01-11: WS-DL Spring 2011 Classes</title><content type='html'>We are fortunate to have two classes of interest to WS-DL members this semester:  &lt;a href="http://www.cs.odu.edu/%7Emln/"&gt;Dr. Nelson&lt;/a&gt;'s CS 751/851 &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/cs751-s11/"&gt;Introduction to Digital Libraries&lt;/a&gt; and &lt;a href="http://www.cs.odu.edu/%7Emweigle/"&gt;Dr. Weigle&lt;/a&gt;'s CS 796/896 &lt;a href="http://www.cs.odu.edu/%7Emweigle/CS796-S11/Home"&gt;Visual Analytics Seminar&lt;/a&gt;.  They'll run back-to-back on Tuesdays in ECSB r. 2120, with 796/896 beginning at 1:30pm and 751/851 beginning at 4:20pm. &lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6059579723153026933?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6059579723153026933/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2011/01/2011-01-11-ws-dl-spring-2011-classes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6059579723153026933'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6059579723153026933'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2011/01/2011-01-11-ws-dl-spring-2011-classes.html' title='2011-01-11: WS-DL Spring 2011 Classes'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-451531758673196764</id><published>2010-12-27T00:04:00.008-05:00</published><updated>2010-12-27T00:26:18.567-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Internships'/><category scheme='http://www.blogger.com/atom/ns#' term='Zurich'/><category scheme='http://www.blogger.com/atom/ns#' term='Google'/><category scheme='http://www.blogger.com/atom/ns#' term='Switzerland'/><title type='text'>2010-12-27: Google Summer Internship, Zürich Switzerland</title><content type='html'>"Hello Hany!...We are glad to inform you that you have been accepted in the summer internship program this year in Google Zürich GmBH!". Call me a geek but these were the best words I have ever heard! I now work for Google, well in one way or another!&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_ajH_vbhB1sY/TRghfEQpfTI/AAAAAAAAADw/oVdWI7AyHvU/s1600/google1.jpg"&gt;&lt;img style="float:left; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 320px; height: 240px;" src="http://4.bp.blogspot.com/_ajH_vbhB1sY/TRghfEQpfTI/AAAAAAAAADw/oVdWI7AyHvU/s320/google1.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5555226957964737842" /&gt;&lt;/a&gt;After struggling with the visa issues I finally got my Swiss Schengen visa and the work permit. The Swiss people are very strict and precise, they thought I was 2 persons, one named Hany Khalil, and the other Hany SalahEldeen! Well I don't blame &lt;br /&gt;them (fyi, in Egypt we don't have the concept of family name, your name is a concatenation of your ancestors names, my name then my father's, then his father's...etc). All my life I have been called Hany SalahEldeen but for some reason the American embassy in Cairo decided that my grandfather's name Khalil suits me better.&lt;br /&gt;&lt;br /&gt;"Ich spreche kein Deutsch!" or "I don't speak German" Was the sentence I was repeating to my self on the plane to Zürich, you will never know when it could become handy sometimes! I was brushing up my old French as well, which seemed useless after I arrived to Zürich to realize that French is the main language in Geneva not Zürich. But I didn't care...I was in Google!....I am a Googler!...I even got an email address with my first name @google.com!&lt;br /&gt;&lt;br /&gt;On the 6th of July I landed in Geneva, then I took the train to &lt;a href="http://en.wikipedia.org/wiki/Z%C3%BCrich_Hauptbahnhof"&gt;Zürich Hauptbahnhof&lt;/a&gt; (which means main station, try to keep up with the German words, or should I say Swiss-German words?). The Swiss really fascinate me, they know the real concept of time (well, they have the best clocks in the world). If you want to call something really punctual or accurate you say it's Swiss, or clock-work which also implies... Swiss. I was dragging my bag from the station, still can barely walk from my leg surgery I reached the tram station. When they say it will arrive 6:43 they actually mean it. I arrived to the student residence of &lt;a href="http://www.ethz.ch/index_EN"&gt;ETH University&lt;/a&gt; where I sublet a room for the next 3 months, settled my stuff and fell asleep.&lt;br /&gt;&lt;br /&gt;At 9 am next morning I was in the &lt;a href="http://maps.google.com/maps/place?cid=7847033285701736122&amp;q=Google+Z%C3%BCrich+GmBH&amp;hl=en&amp;sll=47.36554,8.524864&amp;sspn=8.6286,1.407148&amp;ie=UTF8&amp;ll=52.522906,-4.174805&amp;spn=0,0&amp;z=6"&gt;Google Zürich GmBH&lt;/a&gt; lobby. I met other interns and after an introductory session we were taken on a tour through all the huge 3 buildings (I used to lose my way for the first 3 days, well but maps were every where). I met there some fellow interns who became my great friends later on. The first two weeks were scheduled to be the training phase, including sessions and tutorials. I got to say when you get access to all these foods, candies, games and entertainment facilities (fussball tables, ping-pong tables, xbox, ps3, rockband, pool, musical instruments, they even got a massage and meditation room!) You get really distracted at the beginning, but that was trivial the following weeks and I loved the idea, if you spoil your employees and make them happy they will feel ownership to the company and commitment thus they will produce amazing work, that was the motto.&lt;br /&gt;&lt;br /&gt;My host and manager was very excited and eager to start, so was I. I was the first intern to work under his supervision. He was a mentor, always there to help and give good advice, give me room to work, create and think outside the box and above all he was a good friend. Mostly that's the theme within all employees there, lieght weight, informal but respectful of course. later that week I had a standup coffee meeting with a guy who I later knew that he invented the automated language detection in Google translate! I was working in the MENA (Middle East and North Africa) team on a project allied with the Google translate team. I wish I was able to describe my project but the NDA (Non-Disclosure Agreement) I signed with Google prevents me as it is a new cool project and by the end of the three months I built successfully a huge portion of it. When it is released I will let you know!&lt;br /&gt;&lt;br /&gt;Transparency and trust, that's what I was thinking of when I was working. You have access to all the resources and individuals, all available to help you proceed in your project. You can mail anyone and say hey I wanna ask you something! He/She will answer immediately. If you are stuck with a certain program or library you can ask, there experts in it on the mailing list. Maybe you can find the guy who actually invented it and wrote the whole thing! (Like the case in &lt;a href="http://www.vim.org/"&gt;Vim&lt;/a&gt;, also you can find &lt;a href="http://en.wikipedia.org/wiki/Sergey_Brin"&gt;Sergey&lt;/a&gt;, &lt;a href="http://en.wikipedia.org/wiki/Larry_Page"&gt;Page &lt;/a&gt;and &lt;a href="http://en.wikipedia.org/wiki/Vint_Cerf"&gt;Cerf &lt;/a&gt;on the mailing list too!). Development process is totally different in Google, yes it is &lt;a href="http://agilemanifesto.org/"&gt;Agile &lt;/a&gt;and standup meetings are more common than coffee in Italy but there are other considerations. You want to meet deadlines and race to be innovative but also you have to produce code that is extremely scalable, dependable, throughly tested, following style convention and very readable. Handover time to another engineer shouldn't take a long time. I had to throw all most of what I know in C++ and adapt to the new framework of libraries, &lt;a href="http://labs.google.com/papers/bigtable.html"&gt;bigtables&lt;/a&gt;, &lt;a href="http://labs.google.com/papers/mapreduce.html"&gt;mapreduces &lt;/a&gt;...etc. If you required a functionality someone probably wrote it before so go directly to Code search and acess the code base. &lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://1.bp.blogspot.com/_ajH_vbhB1sY/TRgiLxgnMtI/AAAAAAAAAD4/FMEL2JyKVG4/s1600/google3.jpg"&gt;&lt;img style="float:right; margin:0 0 10px 10px;cursor:pointer; cursor:hand;width: 240px; height: 320px;" src="http://1.bp.blogspot.com/_ajH_vbhB1sY/TRgiLxgnMtI/AAAAAAAAAD4/FMEL2JyKVG4/s320/google3.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5555227726025536210" /&gt;&lt;/a&gt;TGIF (Thank Google It's Friday!) are the best weekly gatherings ever! You meet people from different teams in a social manner, relax, laugh, have fun and even karaoke which was a bad idea for me to participate! Every Friday night me and the other interns used to go discover the city and dine in a new place serving a new cuisine, ranging from &lt;a href="http://en.wikipedia.org/wiki/Fondue"&gt;Swiss cheese fondue&lt;/a&gt; to flaming &lt;a href="http://en.wikipedia.org/wiki/Pad_Thai"&gt;duck Phad Thai&lt;/a&gt;. It was delicious and enlightening!&lt;br /&gt;&lt;br /&gt;I have been to several parts of Switzerland, learned a little German,one of my friends at Google actually taught me the &lt;a href="http://en.wikipedia.org/wiki/Richter-tuned_harmonica"&gt;Blues Harp&lt;/a&gt; (AKA. Harmonica) and we used to practice three times a week. I travelled back to Spain to see friends, did water skiing on the lake in Zürich and was scheduled to do a sky-dive on top of the &lt;a href="http://en.wikipedia.org/wiki/Alps"&gt;Alps &lt;/a&gt;but it was cancelled for bad weather, I was pissed!&lt;br /&gt;&lt;br /&gt;Walking through the city was a pleasure itself. Enjoying a cup of coffee down one of the curling streets was amazing. Reading a book by the lake was a quality time. The only bad thing about Zürich is its prices!...I saw a suit in a shop and I kept looking for its price tag because I thought the numbers on the tag in front of it were the serial number not price!&lt;br /&gt;&lt;br /&gt;The student residence I used to live in was amazing. Imagine living in a place where 100 different students live from more than 35 countries. We laughed together, we watched World-cup together and cheered for all teams! we cooked, watched movies and partied together too. It was friendly, brotherly and definitely educating. I met there people who definitely left a mark on my life.&lt;br /&gt;&lt;br /&gt;In conclusion it was an amazing summer, educating, life changing experience. Working for the best company, living in an amazing city and meeting great people, what more can one ask for?!&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-451531758673196764?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/451531758673196764/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-26-google-summer-internship.html#comment-form' title='2 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/451531758673196764'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/451531758673196764'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-26-google-summer-internship.html' title='2010-12-27: Google Summer Internship, Zürich Switzerland'/><author><name>Hany SalahEldeen</name><uri>http://www.blogger.com/profile/06304841890215312435</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://1.bp.blogspot.com/-U-tAHHzl3Ok/TxNag880ahI/AAAAAAAABgc/-mvRIYmi_3Q/s220/339232_10150962841245323_533655322_21244974_36126378_o.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_ajH_vbhB1sY/TRghfEQpfTI/AAAAAAAAADw/oVdWI7AyHvU/s72-c/google1.jpg' height='72' width='72'/><thr:total>2</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6209463171418534906</id><published>2010-12-06T18:33:00.008-05:00</published><updated>2011-02-19T11:26:08.787-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Digital Preservation Award'/><category scheme='http://www.blogger.com/atom/ns#' term='2010'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><title type='text'>2010-12-06: Memento Wins the 2010 Digital Preservation Award</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_xf_Yufxwils/TP1y9UOZNLI/AAAAAAAAAag/Wof2fKrqXKc/s1600/dpa-trophy.jpg"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 158px; height: 200px;" src="http://2.bp.blogspot.com/_xf_Yufxwils/TP1y9UOZNLI/AAAAAAAAAag/Wof2fKrqXKc/s200/dpa-trophy.jpg" alt="" id="BLOGGER_PHOTO_ID_5547716713716987058" border="0" /&gt;&lt;/a&gt;The &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; Project won the &lt;a href="http://www.dpconline.org/advocacy/awards/581-2010-digital-preservation-award"&gt;2010 Digital Preservation Award&lt;/a&gt; in London on December 1, 2010.  The DPA is sponsored by the &lt;a href="http://www.dpconline.org/"&gt;Digital Preservation Coalition&lt;/a&gt;, and the Memento Project is sponsored by the &lt;a href="http://www.digitalpreservation.gov/"&gt;Library of Congress&lt;/a&gt; (see also: &lt;a href="http://www.digitalpreservation.gov/news/2009/20091228news_article_memento.html"&gt;LC's project page&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;Details about the DPA are provided in several press releases, including ones from the &lt;a href="http://www.dpconline.org/newsroom/latest-news/655-memento-project-wins-digital-preservation-award-2010"&gt;DPC&lt;/a&gt;, &lt;a href="http://www.odu.edu/ao/news/index.php?todo=details&amp;amp;id=25174"&gt;ODU&lt;/a&gt;, &lt;a href="http://www.lanl.gov/news/releases/research_library_teams_shares_2010_digital_preservation_award_nr.html"&gt;LANL&lt;/a&gt; and &lt;a href="http://www.digitalpreservation.gov/news/2010/20101222news_article_DPaward.html"&gt;LC&lt;/a&gt;.  DPC has also posted a short video of an &lt;a href="http://www.dpconline.org/advocacy/awards/dp-award-2010"&gt;interview with Herbert&lt;/a&gt;.  And for posterity, the &lt;a href="http://twitter.com/WilliamKilbride/status/10053028979679232"&gt;original tweet&lt;/a&gt; from &lt;a href="http://twitter.com/WilliamKilbride"&gt;William Kilbride&lt;/a&gt; announcing the winner (more information from the award ceremony will be announced on &lt;a href="http://twitter.com/#search?q=%23dpa2010"&gt;#dpa2010&lt;/a&gt;).&lt;br /&gt;&lt;br /&gt;Thanks to the DPC, the DPA judges, the Library of Congress, and everyone on the Memento team!&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6209463171418534906?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6209463171418534906/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-06-memento-wins-2010-digital.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6209463171418534906'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6209463171418534906'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-06-memento-wins-2010-digital.html' title='2010-12-06: Memento Wins the 2010 Digital Preservation Award'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_xf_Yufxwils/TP1y9UOZNLI/AAAAAAAAAag/Wof2fKrqXKc/s72-c/dpa-trophy.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5863849825790142517</id><published>2010-12-02T13:31:00.003-05:00</published><updated>2010-12-02T13:37:29.773-05:00</updated><title type='text'>2010-12-02:  NASA IPCC Data System Workshop</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_KwJot7AD4QA/TPfnQF2M77I/AAAAAAAAAB0/OtF9I4vbE-I/s1600/nasa-ipcc.png"&gt;&lt;img style="margin: 0pt 10px 10px 0pt; float: left; cursor: pointer; width: 175px; height: 82px;" src="http://2.bp.blogspot.com/_KwJot7AD4QA/TPfnQF2M77I/AAAAAAAAAB0/OtF9I4vbE-I/s400/nasa-ipcc.png" alt="" id="BLOGGER_PHOTO_ID_5546155729763495858" border="0" /&gt;&lt;/a&gt;I attended a &lt;a href="http://oodt.jpl.nasa.gov/wiki/display/CLIMATE/NASA-IPCC+Data+System+Workshop"&gt;NASA Intergovernmental Panel Climate Change&lt;/a&gt;(IPCC) Data System Workshop  in Greenbelt Maryland, November 9 - 10. The &lt;a href="http://www.ipcc.ch/"&gt;IPCC&lt;/a&gt; is an international committee overseeing the assessment of global climate change.&lt;br /&gt;&lt;br /&gt;The purpose of this workshop is to discuss technical plan to prepare, incorporate and share IPCC-relevant NASA satellite observational datasets to support the Coupled Model Intercomparison Project Phase 5 (&lt;a href="http://cmip-pcmdi.llnl.gov/cmip5/"&gt;CMIP5&lt;/a&gt;). &lt;a href="http://cmip-pcmdi.llnl.gov/"&gt;CMIP&lt;/a&gt; is a standard protocol and framework for evaluating climate model simulation (hindcast) and predictions/simulation of future climate change. CMIP5 is the 5th evaluation and being organized and lead by the Program for Climate Model Diagnosis and Intercomparison (&lt;a href="http://www-pcmdi.llnl.gov/"&gt;PCMDI&lt;/a&gt;) mission at Lawrence Livermore National Laboratory.  All of this activity will help contribute to the IPCC 5th Assessment Report (APCC AR5) and beyond.  In prior assessments, NASA observational datasets were not used (or very little).  NASA HQ has recognized the richness and important of NASA datasets and encouraged the satellite project teams to get involve and collaborate with the PCMDI on CMIP5.&lt;br /&gt;&lt;br /&gt;An interesting overview talk on Earth System Grid (ESG) was presented.  ESG is a distributed computational environment of grid services to support next generation climate modeling research.  More technical details of ESG can be found in this paper by &lt;a href="http://arxiv.org/pdf/0712.2262"&gt;Bernoldt et al (2005)&lt;/a&gt;.  Technical talks from JPL,GSFC, NCAR, NOAA, and ORNL discussed progress from each group to support CMIP5.  While most group are 1 or more year into the effort, we (at LaRC) are newbie.  Our group presented an overview of relevant CERES datasets and new tool for ordering and retrieving CERES data. The biggest hurdle and question is how do we make satellite observations look like model output.  This is critical for intercomparison.  Lots of talk on CF NetCDF compliant formats, technical notes and metadata for each dataset, and selection of relevant observation dataset to include into CMIP5.  A couple groups have gateways into the ESG while most have data nodes.  With tight deadline in April 1, 2011, we agree to let ORNL host our CERES dataset on their ESG data node.  We agree to set up a data node at Langley in the near future.&lt;br /&gt;&lt;br /&gt;-Louis&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5863849825790142517?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5863849825790142517/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-02-nasa-ipcc-data-system.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5863849825790142517'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5863849825790142517'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/12/2010-12-02-nasa-ipcc-data-system.html' title='2010-12-02:  NASA IPCC Data System Workshop'/><author><name>Louis Nguyen</name><uri>http://www.blogger.com/profile/17194521261535148267</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='29' height='32' src='http://4.bp.blogspot.com/_KwJot7AD4QA/Skucdv2E5MI/AAAAAAAAAAM/GMp8yz-Yxh8/S220/Louis_mugshot.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_KwJot7AD4QA/TPfnQF2M77I/AAAAAAAAAB0/OtF9I4vbE-I/s72-c/nasa-ipcc.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-4780994529451033864</id><published>2010-11-15T21:48:00.005-05:00</published><updated>2010-11-15T22:13:14.252-05:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='HTTP'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><category scheme='http://www.blogger.com/atom/ns#' term='Internet Draft'/><title type='text'>2010-11-15: Memento Presentation at UNC; Memento ID</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_xf_Yufxwils/TOHxPVSNKbI/AAAAAAAAAaQ/vMQv-9IbnNU/s1600/mementologo.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 180px; height: 180px;" src="http://2.bp.blogspot.com/_xf_Yufxwils/TOHxPVSNKbI/AAAAAAAAAaQ/vMQv-9IbnNU/s200/mementologo.png" alt="" id="BLOGGER_PHOTO_ID_5539974262356257202" border="0" /&gt;&lt;/a&gt;I recently had a chance to return to the &lt;a href="http://sils.unc.edu/"&gt;School of Information and Library Science&lt;/a&gt;, UNC Chapel Hill, where I had a most enjoyable post-doc during the academic year 2000-2001.  &lt;a href="http://www.ils.unc.edu/%7Ejaneg/"&gt;Jane Greenberg&lt;/a&gt; was nice enough to invite me to speak about Memento in her &lt;a href="http://sils.unc.edu/courses#520"&gt;INLS 520&lt;/a&gt; "Organization of Information" class on Tuesday, November 9th as well as give an &lt;a href="http://sils.unc.edu/events/2010/memento"&gt;invited lecture&lt;/a&gt; about &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; to the &lt;a href="http://scholcom.web.unc.edu/"&gt;UNC Scholarly Communications Working Group&lt;/a&gt; on Wednesday, November 10th. &lt;br /&gt;&lt;br /&gt;When I first went to UNC I had the office next to Jane and she was just an assistant professor, now she's a full professor and director of the &lt;a href="http://ils.unc.edu/mrc/"&gt;Metadata Research Center&lt;/a&gt;.  I enjoyed catching up with her and my many other friends and colleagues at SILS. &lt;br /&gt;&lt;br /&gt;My slides are available on &lt;a href="http://www.slideshare.net/phonedude"&gt;slideshare.net&lt;/a&gt;; they are mostly a combination of slides I've posted before, but with some updates in the HTTP headers.  Although the changes are very slight, the recently submitted (11/12/10) &lt;a href="https://datatracker.ietf.org/doc/draft-vandesompel-memento/?include_text=1"&gt;Memento Internet Draft&lt;/a&gt; takes precedence over all of our prior published papers and slides.  For those who don't know, &lt;a href="http://www.ietf.org/"&gt;IETF&lt;/a&gt; &lt;a href="http://en.wikipedia.org/wiki/Internet_Draft"&gt;Internet Drafts&lt;/a&gt; are the first step in the process of issuing an &lt;a href="http://en.wikipedia.org/wiki/Request_for_Comments"&gt;RFC&lt;/a&gt; (cf. "&lt;a href="http://www.youtube.com/watch?v=mEJL2Uuv-oQ"&gt;I'm Just a Bill...&lt;/a&gt;").&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_5789895"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/phonedude/memento-time-travel-for-the-web-5789895" title="Memento: Time Travel for the Web"&gt;Memento: Time Travel for the Web&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse5789895" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=unc-nelson-2010-101115180245-phpapp02&amp;amp;stripped_title=memento-time-travel-for-the-web-5789895&amp;amp;userName=phonedude"&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;embed name="__sse5789895" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=unc-nelson-2010-101115180245-phpapp02&amp;amp;stripped_title=memento-time-travel-for-the-web-5789895&amp;amp;userName=phonedude" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/phonedude"&gt;Michael Nelson&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-4780994529451033864?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/4780994529451033864/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/11/2010-11-15-memento-presentation-at-unc.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4780994529451033864'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/4780994529451033864'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/11/2010-11-15-memento-presentation-at-unc.html' title='2010-11-15: Memento Presentation at UNC; Memento ID'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_xf_Yufxwils/TOHxPVSNKbI/AAAAAAAAAaQ/vMQv-9IbnNU/s72-c/mementologo.png' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-6091828049536493618</id><published>2010-11-05T17:16:00.020-04:00</published><updated>2010-11-06T11:46:37.283-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Memento-Datetime'/><category scheme='http://www.blogger.com/atom/ns#' term='HTTP'/><category scheme='http://www.blogger.com/atom/ns#' term='Memento'/><category scheme='http://www.blogger.com/atom/ns#' term='Last-Modified'/><title type='text'>2010-11-05: Memento-Datetime is not Last-Modified</title><content type='html'>&lt;span style="font-size:100%;"&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_xf_Yufxwils/TNR0Y_c4bbI/AAAAAAAAAZQ/TUtWkn857vw/s1600/inode.gif"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 116px;" src="http://2.bp.blogspot.com/_xf_Yufxwils/TNR0Y_c4bbI/AAAAAAAAAZQ/TUtWkn857vw/s200/inode%20.gif" alt="" id="BLOGGER_PHOTO_ID_5536177814643436978" border="0" /&gt;&lt;/a&gt;One of the key contributions of the &lt;a href="http://www.mementoweb.org/"&gt;Memento Framework&lt;/a&gt; is the HTTP response header "&lt;a href="http://www.iana.org/assignments/message-headers/prov/memento-datetime"&gt;Memento-Datetime&lt;/a&gt;" (previously called "Content-Datetime" in our earlier publications &amp;amp; slides).  Memento-Datetime is the sticky, intended datetime* for the representation returned when a URI is dereferenced.  The presence of the Memento-Datetime HTTP response header is how the client realizes it has reached a Memento.&lt;br /&gt;&lt;br /&gt;Rather than formally explain what we mean by "sticky, intended datetime", it is easier to explain how it is neither the value in the HTTP response header &lt;a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.29"&gt;Last-Modified&lt;/a&gt;, nor is it the creation date of the resource (which has no corresponding HTTP header, for reasons that will become clear).  For the examples below, we'll define the following abbreviations:&lt;br /&gt;&lt;ul&gt;&lt;li&gt;&lt;span style="font-size:100%;"&gt;CD (Creation-Datetime) = the datetime the resource was created&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span style="font-size:100%;"&gt;MD (Memento-Datetime) = the datetime the representation was observed on the web&lt;/span&gt;&lt;/li&gt;&lt;li&gt;&lt;span style="font-size:100%;"&gt;LM (Last-Modified) = the datetime the resource last changed state&lt;/span&gt;&lt;/li&gt;&lt;/ul&gt;&lt;span style="font-size:130%;"&gt;Case 1: CD == MD == LM&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://1.bp.blogspot.com/_xf_Yufxwils/TNSNL-ezhVI/AAAAAAAAAZg/QR1Y1RkZcrM/s1600/single-memento-ait.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; width: 200px; height: 168px;" src="http://1.bp.blogspot.com/_xf_Yufxwils/TNSNL-ezhVI/AAAAAAAAAZg/QR1Y1RkZcrM/s200/single-memento-ait.png" alt="" id="BLOGGER_PHOTO_ID_55362050 78835463506" border="0" /&gt;&lt;/a&gt;We'll begin with a case in which all three datetime values could be the same.  Consider the case of this index page at Archive-It.org: &lt;/span&gt;&lt;span style="text-decoration: underline;font-size:100%;" &gt;&lt;br /&gt;&lt;/span&gt;&lt;span style="font-size:100%;"&gt;&lt;br /&gt;&lt;a href="http://wayback.archive-it.org/927/*/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html"&gt;http://wayback.archive-it.org/927/*/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The index page has a link to a single Memento.  For simplicity, we'll assume Archive-It.org created this index page and the Memento it references at the moment of the crawl, thus the various datetimes of the Memento would all be equal:&lt;br /&gt;&lt;br /&gt;Creation-Datetime:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Memento-Datetime:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Last-Modified:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;&lt;/span&gt;&lt;span style="font-size:100%;"&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Case 2: CD == MD &amp;lt; LM&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/_xf_Yufxwils/TNSRc4-GElI/AAAAAAAAAZo/7a-Y1rP5hi4/s1600/ait-archive-tag.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 168px;" src="http://4.bp.blogspot.com/_xf_Yufxwils/TNSRc4-GElI/AAAAAAAAAZo/7a-Y1rP5hi4/s200/ait-archive-tag.png" alt="" id="BLOGGER_PHOTO_ID_55362097674603 77170" border="0" /&gt;&lt;/a&gt;If we click on the Memento (&lt;a href="http://wayback.archive-it.org/927/20080305201649/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html"&gt;http://wayback.archive-it.or/927/20080305201649/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html&lt;/a&gt;), we see that it has a disclaimer banner ("You are viewing an archived web page...") that many archives employ to inform the reader that they are looking at a Memento and not the original resource.  Although there are many techniques for inserting such a banner, the Archive-It example directly modifies the original HTML to insert this banner (as well as handle URI rewriting, etc.).&lt;br /&gt;&lt;br /&gt;Now pretend the wording of the banner needs to be changed (for example, to address a new legal requirement).  The CD and MD of the Memento are unchanged, but the LM must reflect when the wording of the banner changed:&lt;br /&gt;&lt;br /&gt;Creation-Datetime:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Memento-Datetime:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Last-Modified: Fri, 05 Nov 2010 23:25:19 GMT&lt;br /&gt;&lt;br /&gt;Both your lawyer and your HTTP cache consider this an important change, so you have to update LM.  But it also clear that the &lt;span style="font-style: italic;"&gt;essence&lt;/span&gt; of March 2008 observation of the Memento by Archive-It.org is unchanged by the wording change of the archive banner, so MD is not updated.  And certainly the CD is unchanged by this modification.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Case 3: MD &amp;lt; CD &amp;lt;= LM &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://4.bp.blogspot.com/_xf_Yufxwils/TNSZy9JqHMI/AAAAAAAAAZw/Q6VJmirvqCE/s1600/lynx-copy-memento.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; width: 200px; height: 146px;" src="http://4.bp.blogspot.com/_xf_Yufxwils/TNSZy9JqHMI/AAAAAAAAAZw/Q6VJmirvqCE/s200/lynx-copy-memento.png" alt="" id="BLOGGER_PHOTO_ID_5536218942 632762562" border="0" /&gt;&lt;/a&gt;Now pretend you are making a new web archive, and you are populating it by crawling other web archives such as Archive-It.org (simulated with the &lt;a href="http://en.wikipedia.org/wiki/Lynx_%28web_browser%29" h="" ref="http://en.wikipedia.org/wiki/Lynx_%28web_browser%29"&gt;king of browsers&lt;/a&gt; in the image to the left).  You are effectively copying:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://wayback.archive-it.org/927/20080305201649/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html"&gt;http://wayback.archive-it.org/927/20080305201649/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;to:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://archive.example.org/20101105232519/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html"&gt;http://archive.example.org/20101105232519/http://www.nyu.edu/fas/projects/vcb/case_911_FLASHcontent.html &lt;/a&gt;&lt;br /&gt;&lt;br /&gt;The presence of the Memento-Datetime header from Archive-It.org indicates that the resource is an encapsulation of the state of another resource, at the MD datetime value.  The link between the Memento and the original resource is indicated with an HTTP Link response header:&lt;br /&gt;&lt;br /&gt;Link: rel="original"; &amp;lt;http://www.nyu.edu/fas/projects/vcb/case_911_flashcontent.html&amp;gt;&lt;br /&gt;&lt;br /&gt;Thus, MD is &lt;span style="font-style: italic;"&gt;sticky&lt;/span&gt; in that the new Memento at example.org retains the MD value it observed from Archive-It.org.  However, the CD and LM values reflect the datetime relative to example.org:&lt;br /&gt;&lt;br /&gt;Creation-Datetime: Fri, 05 Nov 2010 23:25:19 GMT&lt;br /&gt;Memento-Datetime:  Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Last-Modified: Fri, 05 Nov 2010 23:25:19 GMT&lt;br /&gt;&lt;br /&gt;The MD and LM datetimes can also vary for the example.org Memento as described in Case 2.  (In the unlikely case that the intent of example.org was to create an archive of how resources were archived, the MD could be reset to 05 Nov 2010 and the Link header would point to the Archive-It.org resource as the original resource instead of the nyu.edu resource; however, this is not the point of this discussion.)&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Case 4: CD &amp;lt; MD &amp;lt;= LM &lt;/span&gt;&lt;br /&gt;&lt;br /&gt;&lt;a href="http://2.bp.blogspot.com/_xf_Yufxwils/TNSqGsvNMvI/AAAAAAAAAZ4/ocRmcQBGdfk/s1600/apache-welcome.png"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 168px;" src="http://2.bp.blogspot.com/_xf_Yufxwils/TNSqGsvNMvI/AAAAAAAAAZ4/ocRmcQBGdfk/s200/apache-welcome.png" alt="" id="BLOGGER_PHOTO_ID_5536236874010276 594" border="0" /&gt;&lt;/a&gt;This scenario is probably less common, but you could imagine situations in which CD is the earliest datetime value.  This might happen in situations in which the resource was created with something akin to &lt;a href="http://en.wikipedia.org/wiki/Fork-exec"&gt;fork() &amp;amp; exec()&lt;/a&gt; semantics: the resource was technically created at a certain datetime , but it did not acquire its own state until a later datetime, reflected in the MD &amp;amp; LM values.&lt;br /&gt;&lt;br /&gt;For example, a &lt;a href="http://en.wikipedia.org/wiki/Web_archiving#Transactional_archiving"&gt;transactional archive&lt;/a&gt; might record as CD the first datetime in which a resource returns a 200 response, but might choose to delay archiving Mementos until the resource's state is something other than "Welcome to Apache".  In this scenario, you could have:&lt;br /&gt;&lt;br /&gt;Creation-Datetime: Wed, 05 Mar 2008 20:16:49 GMT&lt;br /&gt;Memento-Datetime: Fri, 05 Nov 2010 23:25:19 GMT&lt;br /&gt;Last-Modified: Fri, 05 Nov 2010 23:25:19 GMT&lt;br /&gt;&lt;br /&gt;The MD and LM datetimes could also vary as described in Case 2.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Creation Datetime Is Often Unavailable&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;To illustrate the differences between the various datetime concepts, the above examples have discussed Creation Datetime as if it is a commonly available value.  However, this is most often not the case -- in fact, there is no defined HTTP response header that corresponds to Creation Datetime.  This is due to the historical limitation of &lt;a href="http://%20en.wikipedia.org/wiki/Inode"&gt;Unix inodes&lt;/a&gt; (i.e., metadata for files), which track three notions of time: atime (access time of the file), mtime (modification time of the file), and ctime (modification time of the inode).  Modern &lt;a href="http://en.wikipedia.org/wiki/Web_content_management_system"&gt;content management systems&lt;/a&gt; might keep track of Creation Datetime, but it is not formally defined at the HTTP level.&lt;br /&gt;&lt;br /&gt;&lt;span style="font-size:130%;"&gt;Summary&lt;/span&gt;&lt;br /&gt;&lt;br /&gt;The above examples should provide illustrations of how the three notions of datetime, although obviously related, have slightly different semantics.  It should be clear that a Memento's Memento-Datetime is also not just Creation-Datetime or Last-Modified inherited from the original resource for which it is a Memento.  Rather than overload an existing HTTP response header (such as Last-Modified), we have introduced the Memento-Datetime (nee Content-Datetime) response header.   Additional information about Memento headers, Link rel types, and HTTP interactions can be found at &lt;a href="http://www.mementoweb.org/"&gt;mementoweb.org&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;-- Michael&lt;br /&gt;&lt;br /&gt;&lt;br /&gt;* Datetime = neologism of  "date" &amp;amp; "time": the former is often understood to  have a granularity of days, and the latter a granularity of seconds.&lt;/span&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-6091828049536493618?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/6091828049536493618/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/11/2010-11-05-memento-datetime-is-not-last.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6091828049536493618'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/6091828049536493618'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/11/2010-11-05-memento-datetime-is-not-last.html' title='2010-11-05: Memento-Datetime is not Last-Modified'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://2.bp.blogspot.com/_xf_Yufxwils/TNR0Y_c4bbI/AAAAAAAAAZQ/TUtWkn857vw/s72-c/inode%20.gif' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-2263285900479741604</id><published>2010-10-21T11:40:00.005-04:00</published><updated>2010-10-22T15:45:48.899-04:00</updated><title type='text'>2010-10-21: RRAC Presentation</title><content type='html'>Tuesday, I gave a presentation introducing some of the research we are doing in our WSDL group to the Records and Archivists (RRAC) national meeting. This group is made of archivists at &lt;a href='http://en.wikipedia.org/wiki/List_of_federally_funded_research_and_development_centers'&gt;Federally Funded Research and Development Centers&lt;/a&gt; (like &lt;a href='http://mitre.org'&gt;MITRE&lt;/a&gt; and &lt;a href='http://www.aero.org/education/tai/index.html'&gt;Aerospace&lt;/a&gt;) and University Archivists.&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_5533140"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/jbrunelle008/digital-preservation-odu" title="Digital Preservation - ODU"&gt;Digital Preservation - ODU&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse5533140" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=dpmitre-101022144221-phpapp02&amp;stripped_title=digital-preservation-odu&amp;userName=jbrunelle008" /&gt;&lt;param name="allowFullScreen" value="true"/&gt;&lt;param name="allowScriptAccess" value="always"/&gt;&lt;embed name="__sse5533140" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=dpmitre-101022144221-phpapp02&amp;stripped_title=digital-preservation-odu&amp;userName=jbrunelle008" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/jbrunelle008"&gt;Justin Brunelle&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;I used slides from several of &lt;a href='http://www.cs.odu.edu/~mln'&gt;Dr. Nelson's&lt;/a&gt; and &lt;a href='http://www.cs.odu.edu/~mklein'&gt;Martin Klein's&lt;/a&gt; presentations (credits recently given in the last slide).&lt;br /&gt;&lt;br /&gt;I also gave the same presentation to the Agile Development department (of which Carlton is a member) on Tuesday. Both groups widely received the research and had very interesting ideas and comments. The RRAC folks (who were of non-technical backgrounds) questioned the projected lifespan and availability of archives like the &lt;a href='http://www.archive.org/'&gt;Internet Archive (IA)&lt;/a&gt;. We also discussed the possibility of the &lt;a href='http://www.msnbc.msn.com/id/39285873'&gt;Twitter virus&lt;/a&gt; being stored in the IA (and I have yet to investigate this possibility). The other interesting topic of discussion was how to use &lt;a href='http://www.robotstxt.org/'&gt;robots.txt&lt;/a&gt; files.&lt;br /&gt;&lt;br /&gt;I thought the presentation went well, and I can provide more information on the other, less interesting questions offline.&lt;br /&gt;&lt;br /&gt;--Justin F. Brunelle&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-2263285900479741604?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/2263285900479741604/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/10/rrac-presentation.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2263285900479741604'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/2263285900479741604'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/10/rrac-presentation.html' title='2010-10-21: RRAC Presentation'/><author><name>Justin F Brunelle</name><uri>http://www.blogger.com/profile/00580381835470799911</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='32' height='24' src='http://3.bp.blogspot.com/_AmWWXD7g2JA/TG0DJ_mEneI/AAAAAAAAAAM/_AvbhphHU8I/S220/2010-05-24+17.06.03.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-7419226253112333413</id><published>2010-10-11T23:39:00.011-04:00</published><updated>2010-10-18T19:16:25.243-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='Internships'/><category scheme='http://www.blogger.com/atom/ns#' term='Hany'/><category scheme='http://www.blogger.com/atom/ns#' term='Google'/><category scheme='http://www.blogger.com/atom/ns#' term='Microsoft'/><title type='text'>2010-10-11: A Blast from the past: My road to Ws-Dl!</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjms2ny8oI/AAAAAAAAABk/3Jyk9Oo4e9o/s1600/201001221208.jpg"&gt;&lt;img style="float:left; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 240px; height: 320px;" src="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjms2ny8oI/AAAAAAAAABk/3Jyk9Oo4e9o/s320/201001221208.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5519415001593606786" /&gt;&lt;/a&gt;Hello everyone, I am Hany SalahEldeen, a PhD student in my first year and I am honored to be a new member of the Ws-Dl group at Old Dominion University and supervised by Dr. Michael Nelson.&lt;br /&gt;&lt;br /&gt;I have been in the group for a couple of months now so I thought I should introduce myself and give a background summary on my career before Ws-Dl because I believe if you didn't know where you were, you will never know where you are going.&lt;br /&gt;&lt;br /&gt;I received my BSc. in Computer Systems Engineering at &lt;a href="http://www.alex.edu.eg/"&gt;Alexandria University, Egypt&lt;/a&gt; in 2008. My graduation project entitled "&lt;a href="https://void.dev.java.net/"&gt;VOID: The web-based integrated development environment&lt;/a&gt;" was selected to win the first prize in the graduation projects competition in the University for year 2008. For the last 2 years of my degree I was working in a software company back home called &lt;a href="http://www.espace.com.eg/"&gt;eSpace technologies&lt;/a&gt;, I worked in developing systems using &lt;a href="http://rubyonrails.org/"&gt;Ruby on Rails&lt;/a&gt;, and was one of the members who developed &lt;a href="http://www.espace.com.eg/neverblock/"&gt;Neverblock&lt;/a&gt; (an open source project to enable easy development of non-blocking concurrent code.) along with fellow student and friend &lt;a href="http://www.blogger.com/profile/00508125412427130664"&gt;Mostafa Aly&lt;/a&gt; who is also in the Ws-Dl group.&lt;br /&gt;&lt;br /&gt;I started my masters program in &lt;a href="http://www.uab.es/english/"&gt;Universitat Autonoma de Barcelona, Spain&lt;/a&gt;. I worked in &lt;a href="http://www.cvc.uab.es/index.asp?idioma=en"&gt;CVC (the Computer vision center)&lt;/a&gt; in the &lt;a href="http://www.cat.uab.cat/"&gt;colour group&lt;/a&gt; under the supervision of &lt;a href="http://www.cat.uab.cat/~robert/"&gt;Robert Benavente&lt;/a&gt;, &lt;a href="http://www.cat.uab.cat/~maria/"&gt;Maria Vanrell&lt;/a&gt; and &lt;a href="http://cat.uab.cat/~joost/"&gt;Joost Van de Weijer&lt;/a&gt;, and on July 2009 I defended my thesis entitled "Colour naming Using Context-Based learning through a Perceptual Model", published a paper and the second is still under development. In a nutshell we were able to create a parametric model for the &lt;a href="http://en.wikipedia.org/wiki/Lab_color_space"&gt;Lab color space&lt;/a&gt; based on &lt;a href="http://en.wikipedia.org/wiki/Psychophysics"&gt;psychophysical&lt;/a&gt; experiments using real life images in the machine learning process to reach a better model near to human perception of color in context. In August 2008 I participated in the CVC team competing in &lt;a href="http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2009/"&gt;PASCAL VOC2009&lt;/a&gt; image classification world challenge in &lt;a href="http://en.wikipedia.org/wiki/Kyoto"&gt;Kyoto, Japan&lt;/a&gt; and won &lt;a href="http://pascallin.ecs.soton.ac.uk/challenges/VOC/voc2009/workshop/index.html"&gt;2 gold medals&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjyrILZZcI/AAAAAAAAACk/j172WE8hB54/s1600/20091215838.jpg"&gt;&lt;img style="float:center; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 320px; height: 260px;" src="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjyrILZZcI/AAAAAAAAACk/j172WE8hB54/s320/20091215838.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5519428166086124994" /&gt;&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;In September 2009 I started my internship at &lt;a href="http://www.microsoft.com/middleeast/egypt/cmic/"&gt;Cairo Microsoft Innovation center "CMIC"&lt;/a&gt; working on creating recommendation systems based on social networks with &lt;a href="http://www.microsoft.com/middleeast/Egypt/CMIC/Research_Team.aspx"&gt;Nayer Wanas&lt;/a&gt; and wrote a paper which is under review. Also performed a study that the research center presented by CMIC's director &lt;a href="http://www.microsoft.com/middleeast/Egypt/CMIC/CMIC_Team.aspx"&gt;Tarek Alabady&lt;/a&gt; to the minister of communication and information technology &lt;a href="http://en.wikipedia.org/wiki/Tarek_Kamel"&gt;Tarek Kamel&lt;/a&gt; in December 2009.&lt;br /&gt;&lt;br /&gt;In January 2010 I arrived Norfolk and started my first semester at ODU. Later in the same month I was invited by Google to attend the &lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://2.bp.blogspot.com/_ajH_vbhB1sY/TJjy02i0KgI/AAAAAAAAACs/sLo5euT7-Uc/s1600/20091214823.jpg"&gt;&lt;img style="float:left; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 280px; height: 240px;" src="http://2.bp.blogspot.com/_ajH_vbhB1sY/TJjy02i0KgI/AAAAAAAAACs/sLo5euT7-Uc/s200/20091214823.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5519428333151201794" /&gt;&lt;/a&gt;&lt;a href="http://www.google.com/jobs/gradforum/"&gt;2010 Google Grad CS Forum&lt;/a&gt;. An all-paid trip from Norfolk to San Fransisco including two day stay at &lt;a href="http://www1.hilton.com/en_US/hi/hotel/SFOFHHH-Hilton-San-Francisco-Union-Square-California/index.do"&gt;Hilton downtown&lt;/a&gt;, who can say no?! &lt;a href="http://www.google.com/profiles/hanah"&gt;Hanah Kim&lt;/a&gt; the University Programs Specialist contacted me giving me the details and the agenda. &lt;br /&gt;&lt;br /&gt;On the 21 I was with 82 other fellow PhD students from all over the states attending the opening reception hosted by &lt;a href="http://research.google.com/people/spector/"&gt;Alfred Spector&lt;/a&gt;, Google's VP of Research and Special Initiatives. He discussed with us several topics and answered all our questions. Surrounded by all these brilliant minds. I was so proud to represent old Dominion University in this prestigious event. Early next day a shuttle came to take us to the GooglePlex. There, &lt;a href="http://en.wikipedia.org/wiki/Marissa_Mayer"&gt;Marissa Mayer&lt;/a&gt;, VP Search Products &amp; User Experience welcomed us and gave along with &lt;a href="http://research.google.com/pubs/author89.html"&gt;Kevin McCurley&lt;/a&gt;, Research Scientist an amazing keynote and answered all our questions in regards to research in Google, publishing, research in the industry in general. After that we were taken on a tour around the humongous &lt;a href="http://en.wikipedia.org/wiki/Googleplex"&gt;GooglePlex&lt;/a&gt; campus. The tour took more than an hour and yet we just skimmed some of the public areas (some areas are restricted to outsiders and guests). &lt;br /&gt;&lt;br /&gt;After lunch, some of the students in their final years of their PhD were selected to give presentations about their work, which was definitely enlightening. After that we had two Tech-Talks, the first was by &lt;a href="http://www.informatik.uni-trier.de/~ley/db/indices/a-tree/g/Gonzalez:Hector.html"&gt;Hector Gonzalez&lt;/a&gt;, Research Scientist, in which he described to us new briefly techniques in extremely Large Scale data collaboration and integration. The second was by &lt;a href="http://en.wikipedia.org/wiki/T._V._Raman"&gt;T.V. Raman&lt;/a&gt;, Research Scientist, which I think was the most amazing talk I attended in a long time. T.V. is blind, &lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjp_SWoTgI/AAAAAAAAACc/gbtf7UC9t0w/s1600/201001221179.jpg"&gt;&lt;img style="float:right; margin:0 10px 10px 0;cursor:pointer; cursor:hand;width: 240px; height: 200px;" src="http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjp_SWoTgI/AAAAAAAAACc/gbtf7UC9t0w/s200/201001221179.jpg" border="0" alt=""id="BLOGGER_PHOTO_ID_5519418616810327554" /&gt;&lt;/a&gt; but he leads one of the biggest &lt;a href="http://www.nytimes.com/2009/01/04/business/04blind.html?_r=1"&gt;accessibility teams at Google&lt;/a&gt; and he specializes in auditory user interfaces and structured electronic documents. &lt;br /&gt;&lt;br /&gt;Finally there were some round tables with scientists from different fields who gladly answered our questions. &lt;a href="http://www.informatik.uni-trier.de/~ley/db/indices/a-tree/f/Frome:Andrea.html"&gt;Andrea frome&lt;/a&gt; which leads one of the teams in &lt;a href="http://google-latlong.blogspot.com/2008/05/street-view-revisits-manhattan.html"&gt;Google Maps specialized in Street View&lt;/a&gt; described to us her work and answered all our questions. Later that day we had dinner in an amazing Italian restaurant in the heart of San Fransisco named &lt;a href="http://www.paliodasti.com/"&gt;Palio d'Asti&lt;/a&gt;. The next day I flew back to Norfolk.&lt;br /&gt;&lt;br /&gt;That was a quick snapshot of the highlights in my career before Ws-Dl, I joined in february of 2010. I hope this post wasn't too long!&lt;br /&gt;&lt;br /&gt;For more details check out my &lt;a href="http://hany-salaheldeen.blogspot.com/"&gt;Blog&lt;/a&gt; and &lt;a href="http://www.cs.odu.edu/~hany/"&gt;Website&lt;/a&gt;.&lt;br /&gt;&lt;br /&gt;--Hany&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-7419226253112333413?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/7419226253112333413/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/10/blast-from-past-my-road-to-ws-dl.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7419226253112333413'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7419226253112333413'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/10/blast-from-past-my-road-to-ws-dl.html' title='2010-10-11: A Blast from the past: My road to Ws-Dl!'/><author><name>Hany SalahEldeen</name><uri>http://www.blogger.com/profile/06304841890215312435</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://1.bp.blogspot.com/-U-tAHHzl3Ok/TxNag880ahI/AAAAAAAABgc/-mvRIYmi_3Q/s220/339232_10150962841245323_533655322_21244974_36126378_o.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://4.bp.blogspot.com/_ajH_vbhB1sY/TJjms2ny8oI/AAAAAAAAABk/3Jyk9Oo4e9o/s72-c/201001221208.jpg' height='72' width='72'/><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-7914968450791367629</id><published>2010-10-11T14:13:00.004-04:00</published><updated>2010-10-11T17:03:49.201-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='scrapbook'/><category scheme='http://www.blogger.com/atom/ns#' term='archive'/><category scheme='http://www.blogger.com/atom/ns#' term='data portability'/><category scheme='http://www.blogger.com/atom/ns#' term='facebook'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><title type='text'>2010-10-11: ArchiveFacebook Version 1.2 is released</title><content type='html'>Celebrating a year from the &lt;a href="http://ws-dl.blogspot.com/2009/09/archivefacebook.html"&gt;very first release of ArchiveFacebook&lt;/a&gt; the development team is releasing the new version 1.2. Throughout the last couple of months we have received feedback from the users asking for enhancements and resolving issues. We also received lots of compliments and thumbs up! This feedback was channeled and analyzed to give us an idea on how to enhance the user experience.&lt;br /&gt;&lt;br /&gt;We released version 1.2 3 days ago with lots of bug fixes and new features, among which the expansion of stories and posts on comments. Several users suggested that it would be useful to be able to archive all the posts and comments on a certain activity (status update, event attendance, photo...etc). Now V 1.2 can support this and any activity stream within your Facebook profile.&lt;br /&gt;&lt;br /&gt;The new version seems to be highly anticipated to an extent that the number of downloads within the first 3 days even before announcing the release reached 2000 according to Mozilla:&lt;br /&gt;&lt;br /&gt;&lt;a href="https://addons.mozilla.org/en-US/firefox/addon/13993/"&gt;https://addons.mozilla.org/en-US/firefox/addon/13993/&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;Try out the new version and let us know what do you think. Development is triangle and feedback is one of its edges!&lt;br /&gt;&lt;br /&gt;Please join the ArchiveFacebook group to post issues and stay tuned with the latest updates and future releases:&lt;br /&gt;&lt;br /&gt;&lt;a href="http://groups.google.com/group/archivefacebook"&gt;http://groups.google.com/group/archivefacebook&lt;/a&gt;&lt;br /&gt;&lt;br /&gt;--Hany&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-7914968450791367629?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/7914968450791367629/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/10/2010-10-11-archivefacebook-version-12.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7914968450791367629'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/7914968450791367629'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/10/2010-10-11-archivefacebook-version-12.html' title='2010-10-11: ArchiveFacebook Version 1.2 is released'/><author><name>Hany SalahEldeen</name><uri>http://www.blogger.com/profile/06304841890215312435</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='21' height='32' src='http://1.bp.blogspot.com/-U-tAHHzl3Ok/TxNag880ahI/AAAAAAAABgc/-mvRIYmi_3Q/s220/339232_10150962841245323_533655322_21244974_36126378_o.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5009392857419210481</id><published>2010-10-04T18:35:00.003-04:00</published><updated>2010-10-04T19:50:02.191-04:00</updated><category scheme='http://www.blogger.com/atom/ns#' term='WAC'/><category scheme='http://www.blogger.com/atom/ns#' term='award'/><category scheme='http://www.blogger.com/atom/ns#' term='Web Archiving'/><category scheme='http://www.blogger.com/atom/ns#' term='digital preservation'/><category scheme='http://www.blogger.com/atom/ns#' term='Library of Congress'/><title type='text'>2010-10-04: WAC Kickoff Meeting; LC Storage Architectures Meeting, DPC Award Shortlist</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://3.bp.blogspot.com/_xf_Yufxwils/TKpW8JL-FtI/AAAAAAAAAZA/-pvAycaqnwM/s1600/StanfordGatesCS.jpg"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 150px;" src="http://3.bp.blogspot.com/_xf_Yufxwils/TKpW8JL-FtI/AAAAAAAAAZA/-pvAycaqnwM/s200/StanfordGatesCS.jpg" alt="" id="BLOGGER_PHOTO_ID_5524323484181599954" border="0" /&gt;&lt;/a&gt;On September 24, I attended the kickoff meeting at Stanford for the Web Archiving Cooperative (WAC) Project, a joint NSF project (~$2.8M) between &lt;a href="http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=1009916"&gt;Stanford&lt;/a&gt;, &lt;a href="http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=1009392"&gt;Old Dominion&lt;/a&gt; and &lt;a href="http://www.nsf.gov/awardsearch/showAward.do?AwardNumber=1008492"&gt;Harding&lt;/a&gt;.  A summary of the meeting will be published at a later date, but it was attended by several members of our Advisory Board (from memory: &lt;a href="http://polaris.gseis.ucla.edu/cborgman/Chriss_Site/Welcome.html"&gt;Chris Borgman&lt;/a&gt; (UCLA), &lt;a href="http://www.cdlib.org/contact/staff_directory/pcruse.html"&gt;Trisha Cruse&lt;/a&gt; (CDL), &lt;a href="http://www.csdl.tamu.edu/%7Efuruta/"&gt;Rick Furuta &lt;/a&gt;(TAMU), &lt;a href="http://sites.google.com/site/alonhalevy/"&gt;Alon Halevy&lt;/a&gt; (Google), &lt;a href="http://www.cs.cornell.edu/lagoze/"&gt;Carl Lagoze&lt;/a&gt; (Cornell), &lt;a href="http://research.yahoo.com/Raghu_Ramakrishnan"&gt;Raghu Ramakrishnan&lt;/a&gt; (Yahoo), &lt;a href="http://public.lanl.gov/herbertv/"&gt;Herbert Van de Sompel&lt;/a&gt; (LANL)) and several members and friends of the &lt;a href="http://infolab.stanford.edu/"&gt;Stanford Infolab&lt;/a&gt;. &lt;br /&gt;&lt;br /&gt;I gave two presentations, the first was a quick review of the state of web preservation (with the obligatory heavy emphasis on &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt;), and the second was some of my ruminations about future things that we should (or should not) explore in the context of WAC.&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_5310546"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/phonedude/review-of-web-archiving" title="Review of Web Archiving"&gt;Review of Web Archiving&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse5310546" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=review-archiving-nelson-100928190149-phpapp01&amp;amp;stripped_title=review-of-web-archiving&amp;amp;userName=phonedude"&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;embed name="__sse5310546" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=review-archiving-nelson-100928190149-phpapp01&amp;amp;stripped_title=review-of-web-archiving&amp;amp;userName=phonedude" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/phonedude"&gt;Michael Nelson&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;&lt;div style="width:425px" id="__ss_5310571"&gt;&lt;strong style="display:block;margin:12px 0 4px"&gt;&lt;a href="http://www.slideshare.net/phonedude/my-point-of-view-michael-l-nelson-web-archiving-cooperative" title="My Point of View: Michael L. Nelson  Web Archiving Cooperative"&gt;My Point of View: Michael L. Nelson  Web Archiving Cooperative&lt;/a&gt;&lt;/strong&gt;&lt;object id="__sse5310571" width="425" height="355"&gt;&lt;param name="movie" value="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=nelson-point-of-view-100928190630-phpapp02&amp;amp;stripped_title=my-point-of-view-michael-l-nelson-web-archiving-cooperative&amp;amp;userName=phonedude"&gt;&lt;param name="allowFullScreen" value="true"&gt;&lt;param name="allowScriptAccess" value="always"&gt;&lt;embed name="__sse5310571" src="http://static.slidesharecdn.com/swf/ssplayer2.swf?doc=nelson-point-of-view-100928190630-phpapp02&amp;amp;stripped_title=my-point-of-view-michael-l-nelson-web-archiving-cooperative&amp;amp;userName=phonedude" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="425" height="355"&gt;&lt;/embed&gt;&lt;/object&gt;&lt;div style="padding:5px 0 12px"&gt;View more &lt;a href="http://www.slideshare.net/"&gt;presentations&lt;/a&gt; from &lt;a href="http://www.slideshare.net/phonedude"&gt;Michael Nelson&lt;/a&gt;.&lt;/div&gt;&lt;/div&gt;&lt;br /&gt;&lt;br /&gt;That night I caught a redeye back to Norfolk so I could be in DC the following Monday for the Library of Congress &lt;a href="http://www.digitalpreservation.gov/news/events/other_meetings/storage10/"&gt;Designing Storage Architectures for Preservation Collections Meeting&lt;/a&gt;.  While I believe this is their fourth such meeting, it is the first one I attended and while (because?) I did not present or speak, I learned a great deal.  The meeting featured a good mix of academicians and storage industry leaders discussing very large scale storage architectures -- scales that we don't typically approach in our research at ODU.  The majority of the presentations were limited to 5 minutes each, so a good breadth of topics was covered and perusing the slides will be worth your time. &lt;br /&gt;&lt;br /&gt;Finally, Memento has been named one of five finalists for the &lt;a href="http://www.dpconline.org/"&gt;Digital Preservation Coalition &lt;/a&gt;&lt;a href="http://www.dpconline.org/advocacy/awards/dp-award-2010"&gt;2010 Digital Preservation Award&lt;/a&gt;.  It is an honor to be a finalist amongst the other projects (see the &lt;a href="http://www.dpconline.org/advocacy/awards/dp-award-2010/638-2010-digital-preservation-award-shortlists-press-release"&gt;DPC Press Release&lt;/a&gt; for a descriptions of all the projects).  The &lt;a href="http://www.dpconline.org/advocacy/awards/dp-award-2010/638-2010-digital-preservation-award-shortlists-press-release"&gt;Library of Congress&lt;/a&gt; has also issued a press release as well as &lt;a href="http://www.odu.edu/ao/news/index.php?todo=details&amp;amp;id=24046"&gt;ODU&lt;/a&gt;.  The final announcement will come in December -- here's hoping Memento can bring in the prize.&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5009392857419210481?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5009392857419210481/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/10/2010-10-04-wac-kickoff-meeting-lc.html#comment-form' title='1 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5009392857419210481'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5009392857419210481'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/10/2010-10-04-wac-kickoff-meeting-lc.html' title='2010-10-04: WAC Kickoff Meeting; LC Storage Architectures Meeting, DPC Award Shortlist'/><author><name>Michael L. Nelson</name><uri>http://www.blogger.com/profile/13202853768741690867</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='20' height='32' src='http://3.bp.blogspot.com/_xf_Yufxwils/SV_wwsUDovI/AAAAAAAAAAY/2KTAlEoNpN0/S220/mln-ad-small.jpg'/></author><media:thumbnail xmlns:media='http://search.yahoo.com/mrss/' url='http://3.bp.blogspot.com/_xf_Yufxwils/TKpW8JL-FtI/AAAAAAAAAZA/-pvAycaqnwM/s72-c/StanfordGatesCS.jpg' height='72' width='72'/><thr:total>1</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-5574125623670887771</id><published>2010-08-28T17:08:00.007-04:00</published><updated>2010-08-29T12:15:59.128-04:00</updated><title type='text'>2010-08-28: A Lookup for Nicknames and Diminutive Names</title><content type='html'>&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;I created a simple lookup&amp;nbsp;file that contains United States given names (first names) and their associated nicknames or diminutive names. For example&amp;nbsp;"gregory" -&amp;gt; "greg", or "geoffrey" -&amp;gt; "geoff". &amp;nbsp;The file can be downloaded and contributed to from here&amp;nbsp;&lt;/span&gt;&lt;a href="http://code.google.com/p/nickname-and-diminutive-names-lookup/"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;http://code.google.com/p/nickname-and-diminutive-names-lookup/&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;This lookup was started from&amp;nbsp;&lt;/span&gt;&lt;a href="http://www.tngenweb.org/franklin/frannick.htm" rel="nofollow"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;http://www.tngenweb.org/franklin/frannick.&lt;/span&gt;&lt;/a&gt;&lt;a href="http://www.tngenweb.org/franklin/frannick.htm" rel="nofollow"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;htm&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&amp;nbsp;which is&amp;nbsp;used for genealogy purposes. It  was a good source to&amp;nbsp;start from but because it is used for&amp;nbsp;genealogy&amp;nbsp;purposes there are some pretty of old names in there. &amp;nbsp;There was also a significant effort to make it machine readable, i.e. separate names with commas, remove human readable conventions, like "rickie(y)", so that it would be made into two different names "rickie", and "ricky".&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;This is a large list with about 700 entries. Any help from people to clean this list up and add to it is greatly appreciated. Think of it as a wiki where you can contribute or change it as needed. &amp;nbsp;CSV was the easiest format to use. Maybe I'll release this in XML or something later, or maybe a kind soul who uses this library wants to contribute another format they converted it into?&lt;br /&gt;&lt;br /&gt;&lt;div style="max-width: 65em;"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;/span&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;I was rather surprised that I&amp;nbsp;couldn't&amp;nbsp;find anything like this on the web. &amp;nbsp;The best I could find was the&lt;/span&gt;&lt;a href="http://www.peacockdata2.com/products/pdnickname/"&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt; pdNickname database&lt;/span&gt;&lt;/a&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt; and it costs $500. &amp;nbsp;So, I created my own and released it as open source so that others could benefit from my work.&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;&lt;br /&gt;&lt;/span&gt;&lt;br /&gt;&lt;span class="Apple-style-span" style="font-family: inherit;"&gt;--Carlton&lt;/span&gt;&lt;/div&gt;&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-5574125623670887771?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/5574125623670887771/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/08/lookup-for-nicknames-and-diminutive.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5574125623670887771'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/5574125623670887771'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/08/lookup-for-nicknames-and-diminutive.html' title='2010-08-28: A Lookup for Nicknames and Diminutive Names'/><author><name>Carlton Northern</name><uri>http://www.blogger.com/profile/07251369322162897601</uri><email>noreply@blogger.com</email><gd:image rel='http://schemas.google.com/g/2005#thumbnail' width='24' height='32' src='http://bp3.blogger.com/_2p-nbVtAey0/R9XKOh6q5XI/AAAAAAAAACI/ffhPDOG_gio/S220/600x400_3548673_600x400.jpg'/></author><thr:total>0</thr:total></entry><entry><id>tag:blogger.com,1999:blog-953024975153422094.post-609340104491727709</id><published>2010-08-18T17:33:00.007-04:00</published><updated>2010-09-08T22:00:36.582-04:00</updated><title type='text'>2010-08-18: Fall 2010 Classes</title><content type='html'>&lt;a onblur="try {parent.deselectBloggerImageGracefully();} catch(e) {}" href="http://www.djp3.net/codexperductum/archives/2008/01/the_9_types_of_college_teacher_1.html"&gt;&lt;img style="float: left; margin: 0pt 10px 10px 0pt; cursor: pointer; width: 200px; height: 197px;" src="http://3.bp.blogspot.com/_xf_Yufxwils/TGxXgDcQKuI/AAAAAAAAAYI/Ileqb04ek0s/s200/groening-college-teachers.jpg" alt="" id="BLOGGER_PHOTO_ID_5506872652558510818" border="0" /&gt;&lt;/a&gt;There will be two WS-DL classes offered for Fall 2010.  &lt;a href="https://www.leoonline.odu.edu/plsql/web/bwckschd.p_disp_detail_sched?term_in=201010&amp;amp;crn_in=15720#main_content"&gt;CS 418/518&lt;/a&gt; "Web Programming" will be taught by &lt;a href="http://www.cs.odu.edu/%7Emklein/"&gt;Martin Klein&lt;/a&gt;, but it will be similar in format and content to &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/"&gt;prior offerings&lt;/a&gt;, especially in respect to the focus on &lt;a href="http://en.wikipedia.org/wiki/LAMP_%28software_bundle%29"&gt;LAMP&lt;/a&gt;.  This class involves &lt;span style="font-style: italic;"&gt;significant&lt;/span&gt; programming, developing a single project throughout the semester.  It is a good complement to &lt;a href="http://www.cs.odu.edu/%7Emklein/teaching/cs595-s10/"&gt;CS 495/595&lt;/a&gt; "Web Server Development" which last taught by Martin, in Spring 2010.  2010-08-30 edit: The &lt;a href="http://www.cs.odu.edu/%7Emklein/teaching/cs518-f10/"&gt;class page for CS 418/518&lt;/a&gt; is now available.&lt;br /&gt;&lt;br /&gt;I will teach&lt;a href="https://www.leoonline.odu.edu/plsql/web/bwckschd.p_disp_detail_sched?term_in=201010&amp;amp;crn_in=21063#main_content"&gt; CS 895&lt;/a&gt; "Time on the Web", a new class that will deal explore the issues of Web resources evolving through time and how we interact with them.  Aside from the canonical background readings, we will focus on current and recent projects such as our own &lt;a href="http://www.mementoweb.org/"&gt;Memento&lt;/a&gt; &amp;amp; &lt;a href="http://arxiv.org/abs/0907.2268"&gt;Synchronicity&lt;/a&gt;, as well as &lt;a href="http://www.openannotation.org/"&gt;OAC&lt;/a&gt;,  &lt;a href="http://www.cond.org/zoetrope.html"&gt;Zoetrope&lt;/a&gt;, &lt;a href="http://people.csail.mit.edu/teevan/rse/"&gt;The Re:Search Engine&lt;/a&gt;, &lt;a href="https://wiki.umiacs.umd.edu/adapt/index.php/Main_Page"&gt;ADAPT&lt;/a&gt;, &lt;a href="http://www.dl.kuis.kyoto-u.ac.jp/%7Eadam/pastwebbrowser.html"&gt;Past Web Browser&lt;/a&gt;, and other projects and papers to be determined.  This class will be heavily oriented to research and will require the students to explore and investigate topics on their own, develop prototypes, and present the results to the rest of the class.&lt;br /&gt;&lt;br /&gt;I'll update this entry when class pages are available.&lt;br /&gt;&lt;br /&gt;2010-08-30 edit: CS 895 will begin on September 8th (not Sept 1), 4:20-7:00 PM, r. 3316.&lt;br /&gt;2010-09-08 edit: the &lt;a href="http://www.cs.odu.edu/%7Emln/teaching/cs895-f10/"&gt;CS 895 class page&lt;/a&gt; is now available.&lt;br /&gt;&lt;br /&gt;--Michael&lt;div class="blogger-post-footer"&gt;&lt;img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/953024975153422094-609340104491727709?l=ws-dl.blogspot.com' alt='' /&gt;&lt;/div&gt;</content><link rel='replies' type='application/atom+xml' href='http://ws-dl.blogspot.com/feeds/609340104491727709/comments/default' title='Post Comments'/><link rel='replies' type='text/html' href='http://ws-dl.blogspot.com/2010/08/2010-08-18-fall-2010-classes.html#comment-form' title='0 Comments'/><link rel='edit' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/609340104491727709'/><link rel='self' type='application/atom+xml' href='http://www.blogger.com/feeds/953024975153422094/posts/default/609340104491727709'/><link rel='alternate' type='text/html' href='http://ws-dl.blogspot.com/2010/08/2010-08-18-fall-2010-classes.html' title='2010-08-18: Fall 2010 Classes'/><author><name>Michael L. Nelson</name><uri>http://www.b
