feedback
Sep 1 2006

Quick C# Vista Photo Tag Reader

by John Dyer

I've been reading that Vista adds it's tagging data directly to the file (here and here). I tagged some JPEGs and then opened them up in a text file to see what the XMP data would look like. Then I decided to write some quick and dirty code to read the meta data Windows Vista adds to photos. The code opens the file and reads it line-by-line until it reaches the XMP section. Then it pulls out the Title, Subject, Comments, Rating, and Tags that Vista Photo Gallery adds.

public class VistaMetaExtractor
{
    public static VistaMetaInfo GetMetaInfo(string filename)
    {
        VistaMetaInfo metaInfo = null;
        // Find XMP data in file (it might be faster to read the enter file into memory for files under 10MB)
        string xmpData = FindStringInFile(filename, "<xmp:xmpmeta", "</xmp:xmpmeta>");
        if (xmpData != string.Empty)
        {
            // change namespace definitions (i.e. xmlns:prefix##="http://www.w3.org/2000/xmlns/" )
            xmpData = System.Text.RegularExpressions.Regex.Replace(xmpData, @"xmlns:prefix(?:(\d{1,3}))=""http://www.w3.org/2000/xmlns/""", @"xmlns:prefix$1=""http://randomurl.org""");
            XmlDocument xmlDocument = new XmlDocument();
            xmlDocument.LoadXml(@"<?xml version=""1.0""?>" + xmpData);
            // add namespaces
            XmlNamespaceManager nsMan = new XmlNamespaceManager(xmlDocument.NameTable);
            nsMan.AddNamespace("xmp", "http://ns.adobe.com/xap/1.0/");
            nsMan.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
            nsMan.AddNamespace("MicrosoftPhoto", "http://ns.microsoft.com/photo/1.0");
            nsMan.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
            nsMan.AddNamespace("tiff", "http://ns.adobe.com/tiff/1.0/");
            nsMan.AddNamespace("exif", "http://ns.adobe.com/exif/1.0/");
            // 2. fill in details from XMP data
            metaInfo = new VistaMetaInfo();
            metaInfo.FileInfo = new FileInfo(filename);
            // TAGS
            XmlNodeList tagNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:subject/rdf:Bag/rdf:li", nsMan);
            metaInfo.Tags = new string[tagNodes.Count];
            for (int i = 0; i < tagNodes.Count; i++)
            {
                metaInfo.Tags[ i ] = tagNodes[ i ].InnerText;
            }
            // TITLE
            XmlNodeList titleNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:title/rdf:Alt/rdf:li", nsMan);
            metaInfo.Title = (titleNodes.Count > 0) ? titleNodes[0].InnerText : "";
            // SUBJECT
            XmlNodeList subjectNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/dc:description/rdf:Alt/rdf:li", nsMan);
            metaInfo.Subject = (subjectNodes.Count > 0) ? subjectNodes[0].InnerText : "";
            // COMMENTS
            XmlNodeList commentNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/exif:UserComment/rdf:Alt/rdf:li", nsMan);
            metaInfo.Comments = (commentNodes.Count > 0) ? commentNodes[0].InnerText : "";
            // RATING
            XmlNodeList vistaRating = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/MicrosoftPhoto:Rating", nsMan);
            metaInfo.VistaRating = (vistaRating.Count > 0) ? Convert.ToInt32(vistaRating[0].InnerText) : 0;
            // STARS
            XmlNodeList ratingNodes = xmlDocument.SelectNodes("//rdf:RDF/rdf:Description/xmp:Rating", nsMan);
            metaInfo.Rating = (ratingNodes.Count > 0) ? Convert.ToInt32(ratingNodes[0].InnerText) : 0;
        }
        return metaInfo;
    }
    private static string FindStringInFile(string filename, string startString, string endString)
    {
        string output = string.Empty;
        bool inString = false;
        bool done = false;
        StreamReader sr = new StreamReader(filename);
        while (sr.Peek() >= 0 && !done)
        {
            string line = sr.ReadLine();
            if (inString)
            {
                // check for final
                int endIndex = line.IndexOf(endString);
                if (endIndex > -1)
                {
                    output += line.Substring(0, endIndex + endString.Length);
                    done = true;
                }
                else
                {
                    // keep appending if not at the end
                    output += line;
                }
            }
            else
            {
                // check for start
                int startIndex = line.IndexOf(startString);
                if (startIndex > -1)
                {
                    output += line.Substring(startIndex);
                    inString = true;
                }
            }
        }
        sr.Close();
        return output;
    }
}
public class VistaMetaInfo
{
    private FileInfo _fileInfo;
    private int _rating;
    private int _vistaRating;
    private string _title;
    private string _comments;
    private string _subject;
    private string[] _tags;
    public FileInfo FileInfo
    {
        get { return _fileInfo; }
        set { _fileInfo = value; }
    }
    
    public int Rating
    {
        get { return _rating; }
        set { _rating = value; }
    }
    public int VistaRating
    {
        get { return _vistaRating; }
        set { _vistaRating = value; }
    }
    public string Title
    {
        get { return _title; }
        set { _title = value; }
    }
    public string Subject
    {
        get { return _subject; }
        set { _subject = value; }
    }
    public string Comments
    {
        get { return _comments; }
        set { _comments = value; }
    }
    public string[] Tags
    {
        get { return _tags; }
        set { _tags = value; }
    }
}

It works pretty well and seems fast enough to make a simple gallery application..

Comments

Adam Shao September 7. 2006 23:49

You should use the Windows Imaging Component APIs to access the metadata. They are much easier to use. Here is a link to more information:

http://windowssdk.msdn.microsoft.com/en-us/library/ms735422.aspx

Adam Shao

John Dyer September 12. 2006 03:15

Adam, thanks for the tip. I was working on a quick web-based photo gallery in ASP.NET. I would like to use the WIC APIs, but it looks like it'd be harder to use the them to do basic web tasks. I'll check it out thought. Thanks!

John Dyer

Daniel January 15. 2008 08:45

Hello John,

there is bug in your implementation. This Code only works if there is no line feed between the start / end tag.

It's better to load the whole file into a buffer and walking through them.

Best regards...

Daniel

John Dyer January 15. 2008 08:51

@Daniel, thanks for the tip about the line feed. I don't however agree that you should load the entire file. It's better to only load what you need. The XMP data is in the first few kilobytes and images these days are many megabytes. Loading all that image data would be a massive memory and resource drain. Still though, reading the file as text and relying on line feeds was probably a bad idea. Using a buffer and converting to text on the fly would be better. Thanks again for the tip!
John Dyer's last post: Wii + Flash + Papervision3D + C# = Alumni World Map

John Dyer

Steve Lawrence May 16. 2008 21:50

I have used your code and have a small fix so it works whether the xmp is on one line or formatted:

[code]
private static string FindStringInFile(string filename, string startString, string endString)
{
string output = string.Empty;
int startIndex = 0;
int endIndex = 0;
int length = 0;
bool start = false;
bool done = false;

StreamReader sr = new StreamReader(filename);
while (sr.Peek() >= 0 && !done)
{
string line = sr.ReadLine();

// what to do. read line by line.
// if start is in current line, get position
// if end is in current line, get end position
// output from start to end
// if end is not in current line, append output and loop to next line

// check for start
startIndex = line.IndexOf(startString);

if (startIndex > -1)
{
// check for end
start = true;
endIndex = line.IndexOf(endString);
if (endIndex > -1)
{
length = (endIndex - startIndex) + endString.Length;
output += line.Substring(startIndex, length);
// since we have the whole line we do not need to read more
done = true;
}
else
{
output += line.Substring(startIndex);
}
}
else if (start)
{
// here the line is not same as start line but we still
// need to check for end of line.
endIndex = line.IndexOf(endString);
if (endIndex > -1)
{
length = endIndex + endString.Length;
output += line.Substring(0, length);
// since we have the whole line we do not need to read more
done = true;
}
else
{
// you get here if: a: you are not at start, b: end is not in line. put the whole line.
output += line.ToString();
}
}
}
sr.Close();
return output;
}
[/code]

Steve Lawrence

Add comment


(Will show your Gravatar icon)  

  Country flag

biuquote
  • Comment
  • Preview
Loading